https://touch-sp.hatenablog.com/entry/2025/04/11/000041

はじめに

Version 0.9.1の記事はこちらです。
touch-sp.hatenablog.com

今回はVersion 0.9.5を使います。

PC環境

Windows 11
RTX 4090 (VRAM 24GB)
CUDA 12.4
Python 3.12

Python環境構築

pip install torch==2.6.0+cu124 --index-url https://download.pytorch.org/whl/cu124
pip install diffusers[torch]
pip install transformers sentencepiece imageio imageio-ffmpeg

diffusers==0.33.1
imageio==2.37.0
imageio-ffmpeg==0.6.0
sentencepiece==0.2.0
torch==2.6.0+cu124
transformers==4.51.1

Pythonスクリプト

VRAM消費量を動画生成時間を測定するためのスクリプトを書きました。

import torch
from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXConditionPipeline
from diffusers.utils import export_to_video
from typing import Tuple, TypedDict
from itertools import product
import gc
import time

def reset_memory():
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.reset_accumulated_memory_stats()
    torch.cuda.reset_peak_memory_stats()

class ResultDict(TypedDict):
    memeory: float
    time_required: float
    combination: str

def main(
    i: int,
    combination: Tuple[bool, bool, bool, bool, bool]
) -> ResultDict:
    
    if sum(combination[:3]) == 0:
        return None
    
    repo = "Lightricks/LTX-Video-0.9.5"
    pipe = LTXConditionPipeline.from_pretrained(repo, torch_dtype=torch.bfloat16)
    
    try:
        combination_list = []
        if combination[0]:
            pipe.to("cuda")
            combination_list.append("to(\"cada\")")
        if combination[1]:
            pipe.enable_model_cpu_offload()
            combination_list.append("enable_model_cpu_offload()")
        if combination[2]:
            pipe.enable_sequential_cpu_offload()
            combination_list.append("enable_sequential_cpu_offload()")
        if combination[3]:
            pipe.vae.enable_slicing()
            combination_list.append("vae.enable_slicing()")
        if combination[4]:
            pipe.vae.enable_tiling()
            combination_list.append("vae.enable_tiling()")

        # Define prompts
        prompt = "A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage"
        negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"

        pipe.vae.enable_tiling()
        pipe.vae.enable_slicing()

        # Generate the video
        start_time = time.time()
        generator = torch.Generator(device="cuda").manual_seed(0)
        video = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            width=768,
            height=512,
            num_frames=161,
            num_inference_steps=50,
            generator=generator,
        ).frames[0]

        # Export the video
        export_to_video(video, f"output{i}.mp4", fps=24)
        end_time = time.time()

        result: ResultDict = {
            "memory": round(torch.cuda.max_memory_reserved() / 1024**3, 2),
            "time_required": round(end_time - start_time, 2),
            "combination": "\n".join(combination_list)
        }

    except Exception as e:
        print("\n".join(combination_list))
        print(e)
        return None

    print("succeee!!")
    print("\n".join(combination_list))
    print(f"saved video as output{i}.mp4")
    return result

if __name__=="__main__":

    combinations = list(product([True, False], repeat=5))

    result_list = []
    for i, combination in enumerate(combinations):
        reset_memory()
        result = main(i, combination)
        if result is not None:
            result_list.append(result)
    
    print("Sorted by time taken")
    time_sorted_list = sorted(result_list, key=lambda x: x["time_required"])
    
    for time_sorted in time_sorted_list:
        print(time_sorted["combination"])
        print(f"time: {time_sorted["time_required"]} sec")
        print(f"memory: {time_sorted["memory"]} GB")
        print()

    print("Sorted by memory used")
    memory_sorted_list = sorted(result_list, key=lambda x: x["memory"])
    
    for memory_sorted in memory_sorted_list:
        print(memory_sorted["combination"])
        print(f"memory: {memory_sorted["memory"]} GB")
        print(f"time: {memory_sorted["time_required"]} sec")
        print()

作成動画

作成した動画は以下のGoogle Bloggerに載せています。
support-touchsp.blogspot.com

ベンチマーク結果

作成時間でソート

to("cada")
vae.enable_tiling()
time: 50.55 sec
memory: 23.92 GB

enable_model_cpu_offload()
time: 56.83 sec
memory: 10.08 GB

enable_model_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
time: 56.89 sec
memory: 10.08 GB

enable_model_cpu_offload()
vae.enable_slicing()
time: 56.98 sec
memory: 10.08 GB

enable_model_cpu_offload()
vae.enable_tiling()
time: 57.36 sec
memory: 10.08 GB

to("cada")
enable_model_cpu_offload()
vae.enable_slicing()
time: 59.96 sec
memory: 15.18 GB

to("cada")
enable_model_cpu_offload()
vae.enable_tiling()
time: 60.12 sec
memory: 15.18 GB

to("cada")
enable_model_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
time: 60.15 sec
memory: 15.18 GB

to("cada")
enable_model_cpu_offload()
time: 60.71 sec
memory: 15.18 GB

to("cada")
time: 62.63 sec
memory: 23.92 GB

to("cada")
vae.enable_slicing()
vae.enable_tiling()
time: 68.05 sec
memory: 23.92 GB

to("cada")
vae.enable_slicing()
time: 69.61 sec
memory: 23.92 GB

to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
time: 120.13 sec
memory: 15.16 GB

enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
time: 120.7 sec
memory: 5.61 GB

enable_sequential_cpu_offload()
time: 120.89 sec
memory: 5.61 GB

enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_tiling()
time: 121.0 sec
memory: 5.61 GB

enable_model_cpu_offload()
enable_sequential_cpu_offload()
time: 121.09 sec
memory: 5.61 GB

enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
time: 121.47 sec
memory: 5.61 GB

enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
time: 121.7 sec
memory: 5.61 GB

enable_sequential_cpu_offload()
vae.enable_slicing()
time: 121.73 sec
memory: 5.61 GB

enable_sequential_cpu_offload()
vae.enable_tiling()
time: 121.79 sec
memory: 5.61 GB

to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
time: 127.69 sec
memory: 15.18 GB

to("cada")
enable_sequential_cpu_offload()
time: 139.15 sec
memory: 15.18 GB

to("cada")
enable_sequential_cpu_offload()
vae.enable_slicing()
time: 140.03 sec
memory: 15.18 GB

to("cada")
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
time: 140.83 sec
memory: 15.18 GB

to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_tiling()
time: 141.25 sec
memory: 15.18 GB

to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
time: 141.78 sec
memory: 15.18 GB

to("cada")
enable_sequential_cpu_offload()
vae.enable_tiling()
time: 141.79 sec
memory: 15.18 GB

VRAM使用量でソート

enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
memory: 5.61 GB
time: 120.7 sec

enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
memory: 5.61 GB
time: 121.47 sec

enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_tiling()
memory: 5.61 GB
time: 121.0 sec

enable_model_cpu_offload()
enable_sequential_cpu_offload()
memory: 5.61 GB
time: 121.09 sec

enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
memory: 5.61 GB
time: 121.7 sec

enable_sequential_cpu_offload()
vae.enable_slicing()
memory: 5.61 GB
time: 121.73 sec

enable_sequential_cpu_offload()
vae.enable_tiling()
memory: 5.61 GB
time: 121.79 sec

enable_sequential_cpu_offload()
memory: 5.61 GB
time: 120.89 sec

enable_model_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
memory: 10.08 GB
time: 56.89 sec

enable_model_cpu_offload()
vae.enable_slicing()
memory: 10.08 GB
time: 56.98 sec

enable_model_cpu_offload()
vae.enable_tiling()
memory: 10.08 GB
time: 57.36 sec

enable_model_cpu_offload()
memory: 10.08 GB
time: 56.83 sec

to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
memory: 15.16 GB
time: 120.13 sec

to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
memory: 15.18 GB
time: 127.69 sec

to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_tiling()
memory: 15.18 GB
time: 141.25 sec

to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
memory: 15.18 GB
time: 141.78 sec

to("cada")
enable_model_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
memory: 15.18 GB
time: 60.15 sec

to("cada")
enable_model_cpu_offload()
vae.enable_slicing()
memory: 15.18 GB
time: 59.96 sec

to("cada")
enable_model_cpu_offload()
vae.enable_tiling()
memory: 15.18 GB
time: 60.12 sec

to("cada")
enable_model_cpu_offload()
memory: 15.18 GB
time: 60.71 sec

to("cada")
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
memory: 15.18 GB
time: 140.83 sec

to("cada")
enable_sequential_cpu_offload()
vae.enable_slicing()
memory: 15.18 GB
time: 140.03 sec

to("cada")
enable_sequential_cpu_offload()
vae.enable_tiling()
memory: 15.18 GB
time: 141.79 sec

to("cada")
enable_sequential_cpu_offload()
memory: 15.18 GB
time: 139.15 sec

to("cada")
vae.enable_slicing()
vae.enable_tiling()
memory: 23.92 GB
time: 68.05 sec

to("cada")
vae.enable_slicing()
memory: 23.92 GB
time: 69.61 sec

to("cada")
vae.enable_tiling()
memory: 23.92 GB
time: 50.55 sec

to("cada")
memory: 23.92 GB
time: 62.63 sec

ランキング参加中

プログラミング