https://touch-sp.hatenablog.com/entry/2025/09/07/212751

結果

結果はGoogle Bloggerに載せています。 support-touchsp.blogspot.com

RTX 4090を使って25分ほど掛かっています。

GPU 0 - Used memory: 23.90/23.99 GB
time: 1492.29 sec

Pythonスクリプト

import torch
from diffusers import WanPipeline, AutoencoderKLWan
from diffusers.utils import export_to_video
from diffusers.quantizers import PipelineQuantizationConfig

from utils import time_monitor, gpu_monitor

@time_monitor
@gpu_monitor(interval=0.5)
def main():
    pipeline_quant_config = PipelineQuantizationConfig(
        quant_backend="bitsandbytes_4bit",
        quant_kwargs={"load_in_4bit": True, "bnb_4bit_quant_type": "nf4", "bnb_4bit_compute_dtype": torch.bfloat16},
        components_to_quantize=["text_encoder", "transformer", "transformer_2", "vae"]
    )

    vae = AutoencoderKLWan.from_pretrained(
        "Wan-AI/Wan2.2-T2V-A14B-Diffusers",                                   
        subfolder="vae", 
        torch_dtype=torch.float32
    )

    pipe = WanPipeline.from_pretrained(
        "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
        quantization_config=pipeline_quant_config,
        vae=vae, 
        torch_dtype=torch.bfloat16
    )

    pipe.enable_model_cpu_offload()

    height = 480
    width = 832

    prompt = "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
    negative_prompt = "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走"
    output = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        height=height,
        width=width,
        num_frames=81,
        guidance_scale=4.0,
        guidance_scale_2=3.0,
        num_inference_steps=40,
        generator=torch.manual_seed(0),
    ).frames[0]
    export_to_video(output, "t2v_out.mp4", fps=16)

if __name__=="__main__":
    main()

こちらのスクリプトでVRAM使用量とタイムを計測しました。

pyproject.toml

[project]
name = "wan2"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
    "accelerate>=1.10.1",
    "bitsandbytes>=0.47.0",
    "diffusers>=0.35.1",
    "ftfy>=6.3.1",
    "imageio>=2.37.0",
    "imageio-ffmpeg>=0.6.0",
    "pynvml>=13.0.1",
    "torch==2.8.0+cu126",
    "transformers>=4.56.1",
]

[[tool.uv.index]]
name = "torch-cuda"
url = "https://download.pytorch.org/whl/cu126"
explicit = true

[tool.uv.sources]
torch = [{ index = "torch-cuda" }]

ランキング参加中

プログラミング