Skip to content

Commit

Permalink
Merge pull request #5 from VideoVerses/feat/v2v
Browse files Browse the repository at this point in the history
add: v2v-modelscope
  • Loading branch information
YingqingHe authored Nov 22, 2024
2 parents 319cbe6 + c42b53f commit 7d19e72
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 12 deletions.
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -323,10 +323,6 @@ Please check [docs/CHECKPOINTS.md](docs/CHECKPOINTS.md) to download all the mode
``` shell
conda create --name videotuna python=3.10 -y
conda activate videotuna
pip install -U poetry pip
poetry config virtualenvs.create false
poetry install
pip install optimum-quanto==0.2.1
pip install -r requirements.txt
git clone https://github.com/JingyeChen/SwissArmyTransformer
pip install -e SwissArmyTransformer/
Expand All @@ -335,6 +331,8 @@ git clone https://github.com/tgxs002/HPSv2.git
cd ./HPSv2
pip install -e .
cd ..
conda config --add channels conda-forge
conda install ffmpeg
```

### 2.Prepare checkpoints
Expand Down
24 changes: 16 additions & 8 deletions docs/CHECKPOINTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ mkdir checkpoints
# ---- CogVideo (diffusers) ----
mkdir -p checkpoints/cogvideo; cd checkpoints/cogvideo
git clone https://huggingface.co/THUDM/CogVideoX-2b
git clone https://huggingface.co/THUDM/CogVideoX-5b
git clone https://huggingface.co/THUDM/CogVideoX-5b-I2V
git clone https://huggingface.co/THUDM/CogVideoX-2b # This are checkpoints for CogVideoX T2V-2B
git clone https://huggingface.co/THUDM/CogVideoX-5b # This are checkpoints for CogVideoX T2V-5B
git clone https://huggingface.co/THUDM/CogVideoX-5b-I2V # This are checkpoints for CogVideoX I2V-5B
git clone https://huggingface.co/THUDM/CogVideoX1.5-5B-SAT # This are checkpoints for CogVideoX 1.5-5B (both T2V and I2V)
# ---- Open-Sora ----
Expand Down Expand Up @@ -53,19 +54,26 @@ mkdir checkpoints/dynamicrafter/i2v_576x1024
wget https://huggingface.co/Doubiiu/DynamiCrafter_1024/resolve/main/model.ckpt -P checkpoints/dynamicrafter/i2v_576x1024 # dynamicrafter-i2v-1024
# ---- Videocrafter ----
mkdir checkpoints/videocrafter/
mkdir checkpoints/videocrafter/i2v_v1_512
mkdir -p checkpoints/videocrafter/i2v_v1_512
wget https://huggingface.co/VideoCrafter/Image2Video-512/resolve/main/model.ckpt -P checkpoints/videocrafter/i2v_v1_512 # videocrafter1-i2v-512
# ---- Stable Diffusion checkpoint for VC2 Training ----
mkdir checkpoints/stablediffusion/
mkdir checkpoints/stablediffusion/v2-1_512-ema
mkdir -p checkpoints/stablediffusion/v2-1_512-ema
wget https://huggingface.co/stabilityai/stable-diffusion-2-1-base/resolve/main/v2-1_512-ema-pruned.ckpt -P checkpoints/stablediffusion/v2-1_512-ema
wget https://huggingface.co/stabilityai/stable-diffusion-2-1-base/blob/main/v2-1_512-ema-pruned.ckpt -P checkpoints/stablediffusion/v2-1_512-ema
# ---------------------------- V2V ----------------------------
# ---- ModelScope Video-to-Video ----
cd checkpoints
# please ensure that you have installed lfs. If not, you can install it by running the following command:
git lfs install
# after installing lfs, you can clone the Video-to-Video checkpoints
git clone https://www.modelscope.cn/iic/Video-to-Video.git
```


### Checkpoint Orgnization Structure
After downloading, the model checkpoints should be placed as follows:

Expand Down
Binary file added inputs/v2v/001/00.mp4
Binary file not shown.
1 change: 1 addition & 0 deletions inputs/v2v/001/prompts.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a cartoon dog is running in the forest.
35 changes: 35 additions & 0 deletions scripts/inference_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,41 @@ def load_inputs_i2v(input_dir, video_size=(256,256), video_frames=16):

return filename_list, image_list, prompt_list

def load_inputs_v2v(input_dir, video_size=None, video_frames=None):
"""
Load prompt list and input videos for v2v from an input directory.
"""
# load prompt files
prompt_files = get_target_filelist(input_dir, ext='txt')
if len(prompt_files) > 1:
# only use the first one (sorted by name) if multiple exist
print(f"Warning: multiple prompt files exist. The one {os.path.split(prompt_files[0])[1]} is used.")
prompt_file = prompt_files[0]
elif len(prompt_files) == 1:
prompt_file = prompt_files[0]
elif len(prompt_files) == 0:
print(prompt_files)
raise ValueError(f"Error: found NO prompt file in {input_dir}")
prompt_list = load_prompts(prompt_file)
n_samples = len(prompt_list)

## load videos
video_filepaths = get_target_filelist(input_dir, ext='[m][p][4]')
video_filenames = [os.path.split(video_filepath)[-1] for video_filepath in video_filepaths]

return prompt_list, video_filepaths, video_filenames

def open_video_to_tensor(filepath, video_width=None, video_height=None):
if video_width is None and video_height is None:
vidreader = VideoReader(filepath, ctx=cpu(0), width=video_width, height=video_height)
else:
vidreader = VideoReader(filepath, ctx=cpu(0))
frame_indices = list(range(len(vidreader)))
frames = vidreader.get_batch(frame_indices)
frame_tensor = torch.tensor(frames.asnumpy()).permute(3, 0, 1, 2).float()
frame_tensor = (frame_tensor / 255. - 0.5) * 2
return frame_tensor.unsqueeze(0)

def load_video_batch(filepath_list, frame_stride, video_size=(256,256), video_frames=16):
'''
Notice about some special cases:
Expand Down
39 changes: 39 additions & 0 deletions scripts/inference_v2v_ms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os, sys
import argparse
sys.path.insert(0, os.getcwd())

from modelscope.models import Model
from modelscope.pipelines import pipeline
from modelscope.outputs import OutputKeys

from scripts.inference_utils import load_inputs_v2v

def get_parser():
parser = argparse.ArgumentParser()
parser.add_argument("--ckpt_path", type=str, default="checkpoints/Video-to-Video", help="Checkpoint path of the model")
parser.add_argument("--input_dir", type=str, default=None, help="A input directory containing videos and prompts for video-to-video enhancement")
parser.add_argument("--output_dir", type=str, default=None, help="Results saving directory")
return parser

# prepare arguments, model, pipeline.
args = get_parser().parse_args()
model = Model.from_pretrained(args.ckpt_path)
pipe = pipeline(task="video-to-video", model=model, model_revision='v1.1.0', device='cuda:0')
print(f"Successfully loaded model from {args.ckpt_path}")

os.makedirs(args.output_dir, exist_ok=True)

# load input prompts, video paths, video filenames
prompt_list, video_filepaths, video_filenames = load_inputs_v2v(input_dir=args.input_dir)

# video-to-video enhancement
for i, (prompt, videofilepath, videofilename) in enumerate(zip(prompt_list, video_filepaths, video_filenames)):
print(f"[{i}:03d] input path: {videofilepath}")
print(f"[{i}:03d] input name: {videofilename}")
print(f"[{i}:03d] prompt: {prompt}")
p_input = {
'video_path': videofilepath,
'text': prompt
}
output_video_path = pipe(p_input, output_video=os.path.join(args.output_dir, videofilename))[OutputKeys.OUTPUT_VIDEO]
print(f"Successfully processed {videofilename} and saved to {output_video_path}")
6 changes: 6 additions & 0 deletions shscripts/inference_v2v_ms.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
input_dir="inputs/v2v/001"
current_time=$(date +%Y%m%d%H%M%S)
output_dir="results/v2v/$current_time-v2v-modelscope-001"

python3 scripts/inference_v2v_ms.py \
--input_dir $input_dir --output_dir $output_dir

0 comments on commit 7d19e72

Please sign in to comment.