From 963a11687cac89eed18c229f84cfa785d4c1a179 Mon Sep 17 00:00:00 2001 From: Chen Jingye Date: Fri, 22 Nov 2024 10:34:37 +0800 Subject: [PATCH 01/11] update mochi --- README.md | 2 ++ scripts/inference_mochi.py | 36 +++++++++++++++++++++++++++++++ shscripts/inference_mochi.sh | 14 ++++++++++++ tools/video_comparison/compare.sh | 21 +++++++++++++++++- 4 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 scripts/inference_mochi.py create mode 100644 shscripts/inference_mochi.sh diff --git a/README.md b/README.md index 1ab61da..257092c 100644 --- a/README.md +++ b/README.md @@ -295,6 +295,7 @@ VideoTuna/ |T2V-Models|HxWxL|Checkpoints| |:---------|:---------|:--------| +|Mochi|848x480, 3s|[Hugging Face](https://huggingface.co/genmo/mochi-1-preview) |CogVideoX-2B|720x480, 6s|[Hugging Face](https://huggingface.co/THUDM/CogVideoX-2b) |CogVideoX-5B|720x480, 6s|[Hugging Face](https://huggingface.co/THUDM/CogVideoX-5b) |Open-Sora 1.0|512×512x16|[Hugging Face](https://huggingface.co/hpcai-tech/Open-Sora/blob/main/OpenSora-v1-HQ-16x512x512.pth) @@ -364,6 +365,7 @@ After downloading, the model checkpoints should be placed as [Checkpoint Structu Task|Model|Command|Length (#frames)|Resolution|Inference Time (s)|GPU Memory (GiB)| |:---------|:---------|:---------|:---------|:---------|:---------|:---------| +|T2V|Mochi|`bash shscripts/inference_mochi.sh`|31|480x848|22.0|22| |I2V|CogVideoX-5b-I2V|`bash shscripts/inference_cogVideo_i2v_diffusers.sh`|49|576x1024|310.4|4.78| |T2V|CogVideoX-2b|`bash shscripts/inference_cogVideo_t2v_diffusers.sh`|49|576x1024|107.6|2.32| |T2V|Open Sora V1.0|`bash shscripts/inference_opensora_v10_16x256x256.sh`|16|256x256|11.2|23.99| diff --git a/scripts/inference_mochi.py b/scripts/inference_mochi.py new file mode 100644 index 0000000..8535e4e --- /dev/null +++ b/scripts/inference_mochi.py @@ -0,0 +1,36 @@ +import torch +from diffusers import MochiPipeline +from diffusers.utils import export_to_video +import argparse + +# create arg parser +parser = argparse.ArgumentParser() +parser.add_argument("--ckpt_path", type=str, default="genmo/mochi-1-preview") +parser.add_argument("--prompt_file", type=str, default="inputs/t2v/prompts.txt") +parser.add_argument("--savedir", type=str, default="results/t2v/") +parser.add_argument("--height", type=int, default=480) +parser.add_argument("--width", type=int, default=848) +parser.add_argument("--bs", type=int, default=1) +parser.add_argument("--fps", type=int, default=28) +parser.add_argument("--seed", type=int, default=123) + +args = parser.parse_args() + +pipe = MochiPipeline.from_pretrained("genmo/mochi-1-preview") +# Enable memory savings +pipe.enable_model_cpu_offload() +pipe.enable_vae_tiling() + +# there are many prompts in the prompt_file, we need to read them all +with open(args.prompt_file, 'r') as file: + prompts = file.readlines() + +# set seed +torch.manual_seed(args.seed) + +for index, prompt in enumerate(prompts): + + with torch.autocast("cuda", torch.bfloat16, cache_enabled=False): + frames = pipe(prompt, num_frames=84).frames[0] + + export_to_video(frames, f"{args.savedir}/mochi_{index}.mp4", fps=30) diff --git a/shscripts/inference_mochi.sh b/shscripts/inference_mochi.sh new file mode 100644 index 0000000..bc95843 --- /dev/null +++ b/shscripts/inference_mochi.sh @@ -0,0 +1,14 @@ +ckpt='genmo/mochi-1-preview' +prompt_file="inputs/t2v/prompts.txt" +savedir="results/t2v/mochi2" +height=480 +width=848 + +python3 scripts/inference_mochi.py \ + --ckpt_path $ckpt \ + --prompt_file $prompt_file \ + --savedir $savedir \ + --bs 1 --height $height --width $width \ + --fps 28 \ + --seed 124 + diff --git a/tools/video_comparison/compare.sh b/tools/video_comparison/compare.sh index 2873c5a..4de8a7d 100644 --- a/tools/video_comparison/compare.sh +++ b/tools/video_comparison/compare.sh @@ -4,7 +4,7 @@ input_dir='inputs/t2v' save_dir='results/compare1/' seed=42 unified_visualization_height=320 -inference_methods="videocrafter2;dynamicrafter;cogvideo—t2v;cogvideo—i2v;opensora" +inference_methods="videocrafter2;dynamicrafter;cogvideo—t2v;cogvideo—i2v;opensora;mochi" #### check input #### # Check if the directory exists @@ -116,5 +116,24 @@ if [[ $inference_methods == *"opensora"* ]]; then --frames 16 fi +################################ mochi ################################ +if [[ $inference_methods == *"mochi"* ]]; then + ckpt='genmo/mochi-1-preview' + prompt_file="${input_dir}/prompts.txt" + savedir="${save_dir}/t2v/mochi-${width}x${height}-28fps" + height=480 + width=848 + + python3 scripts/inference_mochi.py \ + --ckpt_path $ckpt \ + --prompt_file $prompt_file \ + --savedir $savedir \ + --bs 1 --height $height --width $width \ + --fps 28 \ + --seed 124 +fi + + + #### combine video python3 tools/video_comparison/combine.py --save_dir=$save_dir --input_dir=$input_dir --unified_height=$unified_visualization_height From 55a9cefd77ffb80565ac72da17acc8c9a70d04a1 Mon Sep 17 00:00:00 2001 From: Chen Jingye Date: Fri, 22 Nov 2024 10:35:59 +0800 Subject: [PATCH 02/11] update mochi --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 257092c..ca1af32 100644 --- a/README.md +++ b/README.md @@ -419,6 +419,7 @@ We support video alignment post-training to align human perference for video dif ## Acknowledgement We thank the following repos for sharing their awesome models and codes! +* [Mochi](https://www.genmo.ai/blog): A new SOTA in open-source video generation models * [VideoCrafter2](https://github.com/AILab-CVC/VideoCrafter): Overcoming Data Limitations for High-Quality Video Diffusion Models * [VideoCrafter1](https://github.com/AILab-CVC/VideoCrafter): Open Diffusion Models for High-Quality Video Generation * [DynamiCrafter](https://github.com/Doubiiu/DynamiCrafter): Animating Open-domain Images with Video Diffusion Priors From 2b8af2be2c6552bab162c44236f861b9d64860a2 Mon Sep 17 00:00:00 2001 From: yzxing87 Date: Wed, 27 Nov 2024 11:24:43 +0800 Subject: [PATCH 03/11] fix: create results folder --- scripts/inference_mochi.py | 3 +++ shscripts/inference_mochi.sh | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/inference_mochi.py b/scripts/inference_mochi.py index 8535e4e..c9ce7d9 100644 --- a/scripts/inference_mochi.py +++ b/scripts/inference_mochi.py @@ -2,6 +2,7 @@ from diffusers import MochiPipeline from diffusers.utils import export_to_video import argparse +import os # create arg parser parser = argparse.ArgumentParser() @@ -16,6 +17,8 @@ args = parser.parse_args() +os.makedirs(args.savedir, exist_ok=True) + pipe = MochiPipeline.from_pretrained("genmo/mochi-1-preview") # Enable memory savings pipe.enable_model_cpu_offload() diff --git a/shscripts/inference_mochi.sh b/shscripts/inference_mochi.sh index bc95843..9bb29f5 100644 --- a/shscripts/inference_mochi.sh +++ b/shscripts/inference_mochi.sh @@ -1,4 +1,4 @@ -ckpt='genmo/mochi-1-preview' +ckpt='checkpoints/mochi-1-preview' prompt_file="inputs/t2v/prompts.txt" savedir="results/t2v/mochi2" height=480 From 4756486fddeead4a5dacf77b72a8880f1c82b6e7 Mon Sep 17 00:00:00 2001 From: JingyeChen <576194329@qq.com> Date: Thu, 28 Nov 2024 08:40:58 +0000 Subject: [PATCH 04/11] Jingye modify the parameter passing of all-in-one inference --- tools/video_comparison/check_input.py | 3 +++ tools/video_comparison/combine.py | 3 ++- tools/video_comparison/compare.sh | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) mode change 100644 => 100755 tools/video_comparison/compare.sh diff --git a/tools/video_comparison/check_input.py b/tools/video_comparison/check_input.py index acf4f55..3c7d522 100644 --- a/tools/video_comparison/check_input.py +++ b/tools/video_comparison/check_input.py @@ -3,6 +3,7 @@ parser = argparse.ArgumentParser(description='Check the input directory') parser.add_argument('--input_dir', type=str, help='The input should be a directory', required=True) +parser.add_argument('--seed', type=int, help='The seed for the random number generator', default=42) args = parser.parse_args() # check if there are images in the input directory, jpg/png... @@ -26,6 +27,7 @@ for index, line in enumerate(lines): prompt = line.strip() print(f'creating image {index} using prompt: {prompt}') + out = pipe( prompt=prompt, guidance_scale=0., @@ -33,6 +35,7 @@ width=1024, num_inference_steps=4, max_sequence_length=256, + generator=torch.Generator("cuda").manual_seed(args.seed) ).images[0] index_str = str(index).zfill(5) out.save(f"{args.input_dir}/prompt_{index_str}.png") diff --git a/tools/video_comparison/combine.py b/tools/video_comparison/combine.py index e47e960..48c71e2 100644 --- a/tools/video_comparison/combine.py +++ b/tools/video_comparison/combine.py @@ -8,6 +8,7 @@ parser = argparse.ArgumentParser(description='Check the input directory') parser.add_argument('--input_dir', type=str, help='The input should be a directory', required=True) parser.add_argument('--save_dir', type=str, help='The directory of saving results', required=True) +parser.add_argument('--unified_height', type=int, help='The height of the unified video', default=320) args = parser.parse_args() methods = glob.glob(f'{args.save_dir}/*/*') @@ -42,7 +43,7 @@ def add_text_to_frame(frame, text='hi', position=(0,0)): # txt_clip = TextClip('hello world', color='orange', size=(100, 100)) # txt_clip = txt_clip.set_position('center').set_duration(max_duration) - # clips = [clip.resize(height=1080) for clip in clips] + clips = [clip.resize(height=args.unified_height) for clip in clips] # video_heights = [clip.size[1] for clip in clips] # print(methods) # print(len(clips)) diff --git a/tools/video_comparison/compare.sh b/tools/video_comparison/compare.sh old mode 100644 new mode 100755 index 4de8a7d..53f3bcf --- a/tools/video_comparison/compare.sh +++ b/tools/video_comparison/compare.sh @@ -130,7 +130,7 @@ if [[ $inference_methods == *"mochi"* ]]; then --savedir $savedir \ --bs 1 --height $height --width $width \ --fps 28 \ - --seed 124 + --seed ${seed} fi From 82a5032e83e97f51aa0a5fb6c1e26db70444fb0b Mon Sep 17 00:00:00 2001 From: JingyeChen <576194329@qq.com> Date: Thu, 28 Nov 2024 08:40:58 +0000 Subject: [PATCH 05/11] fix: modify parameter passing in all-in-one inference --- tools/video_comparison/check_input.py | 3 +++ tools/video_comparison/combine.py | 3 ++- tools/video_comparison/compare.sh | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) mode change 100644 => 100755 tools/video_comparison/compare.sh diff --git a/tools/video_comparison/check_input.py b/tools/video_comparison/check_input.py index acf4f55..3c7d522 100644 --- a/tools/video_comparison/check_input.py +++ b/tools/video_comparison/check_input.py @@ -3,6 +3,7 @@ parser = argparse.ArgumentParser(description='Check the input directory') parser.add_argument('--input_dir', type=str, help='The input should be a directory', required=True) +parser.add_argument('--seed', type=int, help='The seed for the random number generator', default=42) args = parser.parse_args() # check if there are images in the input directory, jpg/png... @@ -26,6 +27,7 @@ for index, line in enumerate(lines): prompt = line.strip() print(f'creating image {index} using prompt: {prompt}') + out = pipe( prompt=prompt, guidance_scale=0., @@ -33,6 +35,7 @@ width=1024, num_inference_steps=4, max_sequence_length=256, + generator=torch.Generator("cuda").manual_seed(args.seed) ).images[0] index_str = str(index).zfill(5) out.save(f"{args.input_dir}/prompt_{index_str}.png") diff --git a/tools/video_comparison/combine.py b/tools/video_comparison/combine.py index e47e960..48c71e2 100644 --- a/tools/video_comparison/combine.py +++ b/tools/video_comparison/combine.py @@ -8,6 +8,7 @@ parser = argparse.ArgumentParser(description='Check the input directory') parser.add_argument('--input_dir', type=str, help='The input should be a directory', required=True) parser.add_argument('--save_dir', type=str, help='The directory of saving results', required=True) +parser.add_argument('--unified_height', type=int, help='The height of the unified video', default=320) args = parser.parse_args() methods = glob.glob(f'{args.save_dir}/*/*') @@ -42,7 +43,7 @@ def add_text_to_frame(frame, text='hi', position=(0,0)): # txt_clip = TextClip('hello world', color='orange', size=(100, 100)) # txt_clip = txt_clip.set_position('center').set_duration(max_duration) - # clips = [clip.resize(height=1080) for clip in clips] + clips = [clip.resize(height=args.unified_height) for clip in clips] # video_heights = [clip.size[1] for clip in clips] # print(methods) # print(len(clips)) diff --git a/tools/video_comparison/compare.sh b/tools/video_comparison/compare.sh old mode 100644 new mode 100755 index 4de8a7d..53f3bcf --- a/tools/video_comparison/compare.sh +++ b/tools/video_comparison/compare.sh @@ -130,7 +130,7 @@ if [[ $inference_methods == *"mochi"* ]]; then --savedir $savedir \ --bs 1 --height $height --width $width \ --fps 28 \ - --seed 124 + --seed ${seed} fi From 04796fbb8bd3e19a69b04f90e5c38233ee75ebb4 Mon Sep 17 00:00:00 2001 From: JingyeChen <576194329@qq.com> Date: Thu, 28 Nov 2024 09:04:08 +0000 Subject: [PATCH 06/11] fix: modify parameter passing in all-in-one inference --- tools/video_comparison/compare.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/video_comparison/compare.sh b/tools/video_comparison/compare.sh index 53f3bcf..0d9f310 100755 --- a/tools/video_comparison/compare.sh +++ b/tools/video_comparison/compare.sh @@ -26,7 +26,7 @@ python tools/video_comparison/check_input.py --input_dir=$input_dir --seed=$seed ################################ videocrafter2 ################################ ckpt='checkpoints/videocrafter/t2v_v2_512/model.ckpt' -config='configs/train/000_videocrafter2ft/config.yaml' +config='configs/001_videocrafter2/vc2_t2v_320x512.yaml' prompt_file="${input_dir}/prompts.txt" height=320 width=512 @@ -46,7 +46,7 @@ fi ################################ dynamicrafter ################################ ckpt=checkpoints/dynamicrafter/i2v_576x1024/model.ckpt -config=configs/train/002_dynamicrafterft_1024/config.yaml +config=configs/002_dynamicrafter/dc_i2v_1024.yaml prompt_dir="${input_dir}" height=576 width=1024 @@ -93,7 +93,7 @@ fi ################################ opensora ################################ ckpt="checkpoints/open-sora/t2v_v10/OpenSora-v1-HQ-16x256x256.pth" -config='configs/train/001_opensorav10/config_opensorav10.yaml' +config='configs/003_opensora/opensorav10_256x256.yaml' height=256 width=256 fps=8 From 6a45433617b467f24ab1da1da8cc2c07289ac57c Mon Sep 17 00:00:00 2001 From: yzxing87 Date: Wed, 27 Nov 2024 11:24:43 +0800 Subject: [PATCH 07/11] fix: create results folder fix: modify parameter passing in all-in-one inference --- scripts/inference_mochi.py | 3 +++ shscripts/inference_mochi.sh | 2 +- tools/video_comparison/check_input.py | 3 +++ tools/video_comparison/combine.py | 3 ++- tools/video_comparison/compare.sh | 2 +- 5 files changed, 10 insertions(+), 3 deletions(-) mode change 100644 => 100755 tools/video_comparison/compare.sh diff --git a/scripts/inference_mochi.py b/scripts/inference_mochi.py index 8535e4e..c9ce7d9 100644 --- a/scripts/inference_mochi.py +++ b/scripts/inference_mochi.py @@ -2,6 +2,7 @@ from diffusers import MochiPipeline from diffusers.utils import export_to_video import argparse +import os # create arg parser parser = argparse.ArgumentParser() @@ -16,6 +17,8 @@ args = parser.parse_args() +os.makedirs(args.savedir, exist_ok=True) + pipe = MochiPipeline.from_pretrained("genmo/mochi-1-preview") # Enable memory savings pipe.enable_model_cpu_offload() diff --git a/shscripts/inference_mochi.sh b/shscripts/inference_mochi.sh index bc95843..9bb29f5 100644 --- a/shscripts/inference_mochi.sh +++ b/shscripts/inference_mochi.sh @@ -1,4 +1,4 @@ -ckpt='genmo/mochi-1-preview' +ckpt='checkpoints/mochi-1-preview' prompt_file="inputs/t2v/prompts.txt" savedir="results/t2v/mochi2" height=480 diff --git a/tools/video_comparison/check_input.py b/tools/video_comparison/check_input.py index acf4f55..3c7d522 100644 --- a/tools/video_comparison/check_input.py +++ b/tools/video_comparison/check_input.py @@ -3,6 +3,7 @@ parser = argparse.ArgumentParser(description='Check the input directory') parser.add_argument('--input_dir', type=str, help='The input should be a directory', required=True) +parser.add_argument('--seed', type=int, help='The seed for the random number generator', default=42) args = parser.parse_args() # check if there are images in the input directory, jpg/png... @@ -26,6 +27,7 @@ for index, line in enumerate(lines): prompt = line.strip() print(f'creating image {index} using prompt: {prompt}') + out = pipe( prompt=prompt, guidance_scale=0., @@ -33,6 +35,7 @@ width=1024, num_inference_steps=4, max_sequence_length=256, + generator=torch.Generator("cuda").manual_seed(args.seed) ).images[0] index_str = str(index).zfill(5) out.save(f"{args.input_dir}/prompt_{index_str}.png") diff --git a/tools/video_comparison/combine.py b/tools/video_comparison/combine.py index e47e960..48c71e2 100644 --- a/tools/video_comparison/combine.py +++ b/tools/video_comparison/combine.py @@ -8,6 +8,7 @@ parser = argparse.ArgumentParser(description='Check the input directory') parser.add_argument('--input_dir', type=str, help='The input should be a directory', required=True) parser.add_argument('--save_dir', type=str, help='The directory of saving results', required=True) +parser.add_argument('--unified_height', type=int, help='The height of the unified video', default=320) args = parser.parse_args() methods = glob.glob(f'{args.save_dir}/*/*') @@ -42,7 +43,7 @@ def add_text_to_frame(frame, text='hi', position=(0,0)): # txt_clip = TextClip('hello world', color='orange', size=(100, 100)) # txt_clip = txt_clip.set_position('center').set_duration(max_duration) - # clips = [clip.resize(height=1080) for clip in clips] + clips = [clip.resize(height=args.unified_height) for clip in clips] # video_heights = [clip.size[1] for clip in clips] # print(methods) # print(len(clips)) diff --git a/tools/video_comparison/compare.sh b/tools/video_comparison/compare.sh old mode 100644 new mode 100755 index 4de8a7d..53f3bcf --- a/tools/video_comparison/compare.sh +++ b/tools/video_comparison/compare.sh @@ -130,7 +130,7 @@ if [[ $inference_methods == *"mochi"* ]]; then --savedir $savedir \ --bs 1 --height $height --width $width \ --fps 28 \ - --seed 124 + --seed ${seed} fi From 4b49928acbb25e69685c0f296e626042fa70c7ca Mon Sep 17 00:00:00 2001 From: JingyeChen <576194329@qq.com> Date: Thu, 28 Nov 2024 09:04:08 +0000 Subject: [PATCH 08/11] fix: modify parameter passing in all-in-one inference Jingye modify the parameter passing of all-in-one inference --- tools/video_comparison/compare.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/video_comparison/compare.sh b/tools/video_comparison/compare.sh index 53f3bcf..0d9f310 100755 --- a/tools/video_comparison/compare.sh +++ b/tools/video_comparison/compare.sh @@ -26,7 +26,7 @@ python tools/video_comparison/check_input.py --input_dir=$input_dir --seed=$seed ################################ videocrafter2 ################################ ckpt='checkpoints/videocrafter/t2v_v2_512/model.ckpt' -config='configs/train/000_videocrafter2ft/config.yaml' +config='configs/001_videocrafter2/vc2_t2v_320x512.yaml' prompt_file="${input_dir}/prompts.txt" height=320 width=512 @@ -46,7 +46,7 @@ fi ################################ dynamicrafter ################################ ckpt=checkpoints/dynamicrafter/i2v_576x1024/model.ckpt -config=configs/train/002_dynamicrafterft_1024/config.yaml +config=configs/002_dynamicrafter/dc_i2v_1024.yaml prompt_dir="${input_dir}" height=576 width=1024 @@ -93,7 +93,7 @@ fi ################################ opensora ################################ ckpt="checkpoints/open-sora/t2v_v10/OpenSora-v1-HQ-16x256x256.pth" -config='configs/train/001_opensorav10/config_opensorav10.yaml' +config='configs/003_opensora/opensorav10_256x256.yaml' height=256 width=256 fps=8 From 078c44b04cf13f4ab259bdb7eda341db9e1ef0e4 Mon Sep 17 00:00:00 2001 From: yzxing87 Date: Thu, 28 Nov 2024 16:32:14 +0800 Subject: [PATCH 09/11] fix: mochi inference and video compare sccript --- README.md | 2 +- scripts/inference_mochi.py | 2 +- tools/video_comparison/compare.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5b79eca..91ffc2e 100644 --- a/README.md +++ b/README.md @@ -362,7 +362,7 @@ After downloading, the model checkpoints should be placed as [Checkpoint Structu Task|Model|Command|Length (#frames)|Resolution|Inference Time (s)|GPU Memory (GiB)| |:---------|:---------|:---------|:---------|:---------|:---------|:---------| -|T2V|Mochi|`bash shscripts/inference_mochi.sh`|31|480x848|22.0|22| +|T2V|Mochi|`bash shscripts/inference_mochi.sh`|84|480x848|109.0|26| |I2V|CogVideoX-5b-I2V|`bash shscripts/inference_cogVideo_i2v_diffusers.sh`|49|576x1024|310.4|4.78| |T2V|CogVideoX-2b|`bash shscripts/inference_cogVideo_t2v_diffusers.sh`|49|576x1024|107.6|2.32| |T2V|Open Sora V1.0|`bash shscripts/inference_opensora_v10_16x256x256.sh`|16|256x256|11.2|23.99| diff --git a/scripts/inference_mochi.py b/scripts/inference_mochi.py index c9ce7d9..5a59833 100644 --- a/scripts/inference_mochi.py +++ b/scripts/inference_mochi.py @@ -19,7 +19,7 @@ os.makedirs(args.savedir, exist_ok=True) -pipe = MochiPipeline.from_pretrained("genmo/mochi-1-preview") +pipe = MochiPipeline.from_pretrained("genmo/mochi-1-preview", variant="bf16", torch_dtype=torch.bfloat16) # Enable memory savings pipe.enable_model_cpu_offload() pipe.enable_vae_tiling() diff --git a/tools/video_comparison/compare.sh b/tools/video_comparison/compare.sh index 0d9f310..8bf24a3 100755 --- a/tools/video_comparison/compare.sh +++ b/tools/video_comparison/compare.sh @@ -4,7 +4,7 @@ input_dir='inputs/t2v' save_dir='results/compare1/' seed=42 unified_visualization_height=320 -inference_methods="videocrafter2;dynamicrafter;cogvideo—t2v;cogvideo—i2v;opensora;mochi" +inference_methods="videocrafter2;dynamicrafter;opensora;mochi" #### check input #### # Check if the directory exists From d4273c4af5be176e9423181f19a0c8f24ad41c4a Mon Sep 17 00:00:00 2001 From: JingyeChen <576194329@qq.com> Date: Thu, 28 Nov 2024 12:06:21 +0000 Subject: [PATCH 10/11] fix: modify logic in combine and compare script --- requirements.txt | 2 +- tools/video_comparison/combine.py | 8 ++------ tools/video_comparison/compare.sh | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3d61286..e8c7560 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,4 +38,4 @@ https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn git+https://github.com/huggingface/diffusers open_clip_torch==2.12.0 lmdeploy -moviepy +moviepy==1.0.2 diff --git a/tools/video_comparison/combine.py b/tools/video_comparison/combine.py index 48c71e2..8ab2adc 100644 --- a/tools/video_comparison/combine.py +++ b/tools/video_comparison/combine.py @@ -1,7 +1,7 @@ import os import argparse import glob -from moviepy.editor import VideoFileClip, clips_array, vfx, TextClip +from moviepy.editor import VideoFileClip, clips_array from PIL import Image, ImageDraw, ImageFont import numpy as np @@ -41,12 +41,8 @@ def add_text_to_frame(frame, text='hi', position=(0,0)): max_duration = max([clip.duration for clip in clips]) clips = [clip.set_end(max_duration).set_fps(max_fps) for clip in clips] - # txt_clip = TextClip('hello world', color='orange', size=(100, 100)) - # txt_clip = txt_clip.set_position('center').set_duration(max_duration) clips = [clip.resize(height=args.unified_height) for clip in clips] - # video_heights = [clip.size[1] for clip in clips] - # print(methods) - # print(len(clips)) + clips_with_name = [] for index, clip in enumerate(clips): method = methods[index].split('/')[-1] diff --git a/tools/video_comparison/compare.sh b/tools/video_comparison/compare.sh index 0d9f310..fd766a2 100755 --- a/tools/video_comparison/compare.sh +++ b/tools/video_comparison/compare.sh @@ -120,9 +120,9 @@ fi if [[ $inference_methods == *"mochi"* ]]; then ckpt='genmo/mochi-1-preview' prompt_file="${input_dir}/prompts.txt" - savedir="${save_dir}/t2v/mochi-${width}x${height}-28fps" height=480 width=848 + savedir="${save_dir}/t2v/mochi-${width}x${height}-28fps" python3 scripts/inference_mochi.py \ --ckpt_path $ckpt \ From 5841d268d1eaec936705af3cdba9573e17aba83b Mon Sep 17 00:00:00 2001 From: yzxing87 Date: Sat, 30 Nov 2024 09:54:00 +0800 Subject: [PATCH 11/11] fix: add cogvideo infer back --- tools/video_comparison/compare.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/video_comparison/compare.sh b/tools/video_comparison/compare.sh index b41c093..fd766a2 100755 --- a/tools/video_comparison/compare.sh +++ b/tools/video_comparison/compare.sh @@ -4,7 +4,7 @@ input_dir='inputs/t2v' save_dir='results/compare1/' seed=42 unified_visualization_height=320 -inference_methods="videocrafter2;dynamicrafter;opensora;mochi" +inference_methods="videocrafter2;dynamicrafter;cogvideo—t2v;cogvideo—i2v;opensora;mochi" #### check input #### # Check if the directory exists