はじめに
Version 0.9.1の記事はこちらです。touch-sp.hatenablog.com
今回はVersion 0.9.5を使います。
PC環境
Windows 11 RTX 4090 (VRAM 24GB) CUDA 12.4 Python 3.12
Python環境構築
pip install torch==2.6.0+cu124 --index-url https://download.pytorch.org/whl/cu124 pip install diffusers[torch] pip install transformers sentencepiece imageio imageio-ffmpeg
diffusers==0.33.1 imageio==2.37.0 imageio-ffmpeg==0.6.0 sentencepiece==0.2.0 torch==2.6.0+cu124 transformers==4.51.1
Pythonスクリプト
VRAM消費量を動画生成時間を測定するためのスクリプトを書きました。import torch from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXConditionPipeline from diffusers.utils import export_to_video from typing import Tuple, TypedDict from itertools import product import gc import time def reset_memory(): gc.collect() torch.cuda.empty_cache() torch.cuda.reset_accumulated_memory_stats() torch.cuda.reset_peak_memory_stats() class ResultDict(TypedDict): memeory: float time_required: float combination: str def main( i: int, combination: Tuple[bool, bool, bool, bool, bool] ) -> ResultDict: if sum(combination[:3]) == 0: return None repo = "Lightricks/LTX-Video-0.9.5" pipe = LTXConditionPipeline.from_pretrained(repo, torch_dtype=torch.bfloat16) try: combination_list = [] if combination[0]: pipe.to("cuda") combination_list.append("to(\"cada\")") if combination[1]: pipe.enable_model_cpu_offload() combination_list.append("enable_model_cpu_offload()") if combination[2]: pipe.enable_sequential_cpu_offload() combination_list.append("enable_sequential_cpu_offload()") if combination[3]: pipe.vae.enable_slicing() combination_list.append("vae.enable_slicing()") if combination[4]: pipe.vae.enable_tiling() combination_list.append("vae.enable_tiling()") # Define prompts prompt = "A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage" negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted" pipe.vae.enable_tiling() pipe.vae.enable_slicing() # Generate the video start_time = time.time() generator = torch.Generator(device="cuda").manual_seed(0) video = pipe( prompt=prompt, negative_prompt=negative_prompt, width=768, height=512, num_frames=161, num_inference_steps=50, generator=generator, ).frames[0] # Export the video export_to_video(video, f"output{i}.mp4", fps=24) end_time = time.time() result: ResultDict = { "memory": round(torch.cuda.max_memory_reserved() / 1024**3, 2), "time_required": round(end_time - start_time, 2), "combination": "\n".join(combination_list) } except Exception as e: print("\n".join(combination_list)) print(e) return None print("succeee!!") print("\n".join(combination_list)) print(f"saved video as output{i}.mp4") return result if __name__=="__main__": combinations = list(product([True, False], repeat=5)) result_list = [] for i, combination in enumerate(combinations): reset_memory() result = main(i, combination) if result is not None: result_list.append(result) print("Sorted by time taken") time_sorted_list = sorted(result_list, key=lambda x: x["time_required"]) for time_sorted in time_sorted_list: print(time_sorted["combination"]) print(f"time: {time_sorted["time_required"]} sec") print(f"memory: {time_sorted["memory"]} GB") print() print("Sorted by memory used") memory_sorted_list = sorted(result_list, key=lambda x: x["memory"]) for memory_sorted in memory_sorted_list: print(memory_sorted["combination"]) print(f"memory: {memory_sorted["memory"]} GB") print(f"time: {memory_sorted["time_required"]} sec") print()
作成動画
作成した動画は以下のGoogle Bloggerに載せています。support-touchsp.blogspot.com
ベンチマーク結果
作成時間でソート
to("cada")
vae.enable_tiling()
time: 50.55 sec
memory: 23.92 GB
enable_model_cpu_offload()
time: 56.83 sec
memory: 10.08 GB
enable_model_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
time: 56.89 sec
memory: 10.08 GB
enable_model_cpu_offload()
vae.enable_slicing()
time: 56.98 sec
memory: 10.08 GB
enable_model_cpu_offload()
vae.enable_tiling()
time: 57.36 sec
memory: 10.08 GB
to("cada")
enable_model_cpu_offload()
vae.enable_slicing()
time: 59.96 sec
memory: 15.18 GB
to("cada")
enable_model_cpu_offload()
vae.enable_tiling()
time: 60.12 sec
memory: 15.18 GB
to("cada")
enable_model_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
time: 60.15 sec
memory: 15.18 GB
to("cada")
enable_model_cpu_offload()
time: 60.71 sec
memory: 15.18 GB
to("cada")
time: 62.63 sec
memory: 23.92 GB
to("cada")
vae.enable_slicing()
vae.enable_tiling()
time: 68.05 sec
memory: 23.92 GB
to("cada")
vae.enable_slicing()
time: 69.61 sec
memory: 23.92 GB
to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
time: 120.13 sec
memory: 15.16 GB
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
time: 120.7 sec
memory: 5.61 GB
enable_sequential_cpu_offload()
time: 120.89 sec
memory: 5.61 GB
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_tiling()
time: 121.0 sec
memory: 5.61 GB
enable_model_cpu_offload()
enable_sequential_cpu_offload()
time: 121.09 sec
memory: 5.61 GB
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
time: 121.47 sec
memory: 5.61 GB
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
time: 121.7 sec
memory: 5.61 GB
enable_sequential_cpu_offload()
vae.enable_slicing()
time: 121.73 sec
memory: 5.61 GB
enable_sequential_cpu_offload()
vae.enable_tiling()
time: 121.79 sec
memory: 5.61 GB
to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
time: 127.69 sec
memory: 15.18 GB
to("cada")
enable_sequential_cpu_offload()
time: 139.15 sec
memory: 15.18 GB
to("cada")
enable_sequential_cpu_offload()
vae.enable_slicing()
time: 140.03 sec
memory: 15.18 GB
to("cada")
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
time: 140.83 sec
memory: 15.18 GB
to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_tiling()
time: 141.25 sec
memory: 15.18 GB
to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
time: 141.78 sec
memory: 15.18 GB
to("cada")
enable_sequential_cpu_offload()
vae.enable_tiling()
time: 141.79 sec
memory: 15.18 GB
VRAM使用量でソート
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
memory: 5.61 GB
time: 120.7 sec
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
memory: 5.61 GB
time: 121.47 sec
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_tiling()
memory: 5.61 GB
time: 121.0 sec
enable_model_cpu_offload()
enable_sequential_cpu_offload()
memory: 5.61 GB
time: 121.09 sec
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
memory: 5.61 GB
time: 121.7 sec
enable_sequential_cpu_offload()
vae.enable_slicing()
memory: 5.61 GB
time: 121.73 sec
enable_sequential_cpu_offload()
vae.enable_tiling()
memory: 5.61 GB
time: 121.79 sec
enable_sequential_cpu_offload()
memory: 5.61 GB
time: 120.89 sec
enable_model_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
memory: 10.08 GB
time: 56.89 sec
enable_model_cpu_offload()
vae.enable_slicing()
memory: 10.08 GB
time: 56.98 sec
enable_model_cpu_offload()
vae.enable_tiling()
memory: 10.08 GB
time: 57.36 sec
enable_model_cpu_offload()
memory: 10.08 GB
time: 56.83 sec
to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
memory: 15.16 GB
time: 120.13 sec
to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_slicing()
memory: 15.18 GB
time: 127.69 sec
to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
vae.enable_tiling()
memory: 15.18 GB
time: 141.25 sec
to("cada")
enable_model_cpu_offload()
enable_sequential_cpu_offload()
memory: 15.18 GB
time: 141.78 sec
to("cada")
enable_model_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
memory: 15.18 GB
time: 60.15 sec
to("cada")
enable_model_cpu_offload()
vae.enable_slicing()
memory: 15.18 GB
time: 59.96 sec
to("cada")
enable_model_cpu_offload()
vae.enable_tiling()
memory: 15.18 GB
time: 60.12 sec
to("cada")
enable_model_cpu_offload()
memory: 15.18 GB
time: 60.71 sec
to("cada")
enable_sequential_cpu_offload()
vae.enable_slicing()
vae.enable_tiling()
memory: 15.18 GB
time: 140.83 sec
to("cada")
enable_sequential_cpu_offload()
vae.enable_slicing()
memory: 15.18 GB
time: 140.03 sec
to("cada")
enable_sequential_cpu_offload()
vae.enable_tiling()
memory: 15.18 GB
time: 141.79 sec
to("cada")
enable_sequential_cpu_offload()
memory: 15.18 GB
time: 139.15 sec
to("cada")
vae.enable_slicing()
vae.enable_tiling()
memory: 23.92 GB
time: 68.05 sec
to("cada")
vae.enable_slicing()
memory: 23.92 GB
time: 69.61 sec
to("cada")
vae.enable_tiling()
memory: 23.92 GB
time: 50.55 sec
to("cada")
memory: 23.92 GB
time: 62.63 sec