-
Notifications
You must be signed in to change notification settings - Fork 84
/
Copy pathevaluate.py
executable file
·148 lines (138 loc) · 8.74 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
import json
import argparse
import torch
import numpy as np
import model.model as models
from data.data_loaders import h36m_loader
from utils import util, h36m_utils, visualization
from utils import Animation
from utils import BVH
from model import metric
def main(config, args, output_folder):
resume = args.resume
name_list = ['Hips', 'RightUpLeg', 'RightLeg', 'RightFoot', 'LeftUpLeg', 'LeftLeg', 'LeftFoot', 'Spine', 'Spine1', 'Neck', 'Head', 'LeftArm', 'LeftForeArm', 'LeftHand', 'RightArm', 'RightForeArm', 'RightHand']
model = getattr(models, config.arch.type)(config)
checkpoint = torch.load(resume)
state_dict = checkpoint['state_dict']
model.load_state_dict(state_dict)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
model.eval()
if args.input == 'h36m':
test_data_loader = h36m_loader(config, is_training=False)
test_parameters = [torch.from_numpy(np.array(item)).float().to(device) for item in test_data_loader.dataset.get_parameters()]
error_list = {}
errors = []
sampling_export = np.random.choice(test_data_loader.n_samples-1, size=4, replace=False)
for video_idx, datas in enumerate(test_data_loader):
video_name = datas[-1][0]
datas = [item.float().to(device) for item in datas[:-1]]
poses_2d_pixel, poses_2d, poses_3d, bones, contacts, alphas, proj_facters = datas
with torch.no_grad():
pre_bones, pre_rotations, pre_rotations_full, pre_pose_3d, pre_c, pre_proj = model.forward_fk(poses_2d, test_parameters)
error = metric.mean_points_error(poses_3d, pre_pose_3d)*torch.mean(alphas[0]).data.cpu().numpy()
errors.append(error)
action_name = video_name.split('_')[1].split(' ')[0]
if action_name in error_list.keys():
error_list[action_name].append(error)
else:
error_list[action_name] = [error]
if video_idx in sampling_export:
if config.arch.translation:
R, T, f, c, k, p, res_w, res_h = test_data_loader.dataset.cameras[(int(video_name.split('_')[0].replace('S', '')), int(video_name.split('_')[-1]))]
pose_2d_film = (poses_2d_pixel[0, :, :2].cpu().numpy() - c[:, 0]) / f[:, 0]
translations = np.ones(shape=(pose_2d_film.shape[0], 3))
translations[:, :2] = pose_2d_film
translation = (translations * np.repeat(pre_proj[0].cpu().numpy(), 3, axis=-1).reshape((-1, 3))) * 8
else:
translation = np.zeros((poses_2d.shape[1], 3))
rotations = pre_rotations_full[0]
length = (pre_bones * test_parameters[3].unsqueeze(0) + test_parameters[2].repeat(bones.shape[0], 1, 1))[0].cpu().numpy()
BVH.save('%s/%s.bvh' % (output_folder, video_name), Animation.load_from_network(translation, rotations, length, third_dimension=1), names=name_list)
error_file = '%s/errors.txt' % output_folder
with open(error_file, 'w') as f:
f.writelines('=====Action===== ==mm==\n')
total = []
for key in error_list.keys():
mean_error = np.mean(np.array(error_list[key]))
total.append(mean_error)
print('%16s %.2f' % (key, mean_error))
f.writelines('%16s %.2f \n' % (key, mean_error))
print('%16s %.2f' % ('Average', np.mean(np.array(errors))))
f.writelines('%16s %.2f \n' % ('Average', np.mean(np.array(errors))))
f.close()
else:
parameters = [torch.from_numpy(np.array(item)).float().to(device) for item in h36m_loader(config, is_training=True).dataset.get_parameters()]
def export(pose_folder):
video_name = pose_folder.split('/')[-1]
files = util.make_dataset([pose_folder], phase='json', data_split=1, sort=True, sort_index=1)
IMAGE_WIDTH = 1080 # Should be changed refer to your test data
IMAGE_HEIGHT = 1080
pose_batch = []
confidence_batch = []
for pose_file_name in files:
with open(pose_file_name, 'r') as f:
h36m_locations, h36m_confidence = h36m_utils.convert_openpose(json.load(f))
pose_batch.append(h36m_locations)
confidence_batch.append(h36m_confidence)
poses_2d = np.concatenate(pose_batch, axis=0)
poses_2d[:, np.arange(0, poses_2d.shape[-1], 2)] /= (IMAGE_WIDTH*1) # The last number 1 is an adjustable varible, if the person takes full space of image, try to use a bigger number like 2
poses_2d[:, np.arange(1, poses_2d.shape[-1], 2)] /= (IMAGE_HEIGHT*1) # The last number 1 is an adjustable varible, if the person takes full space of image, try to use a bigger number like 2
confidences = np.concatenate(confidence_batch, axis=0)
poses_2d_root = (poses_2d - np.tile(poses_2d[:, :2], [1, int(poses_2d.shape[-1] / 2)]))
if args.smooth:
poses_2d_root, confidences = util.interp_pose(poses_2d_root, confidences, k=2)
if config.arch.confidence:
poses_2d_root_c = np.zeros((poses_2d_root.shape[0], int(poses_2d_root.shape[-1]/2*3)))
for joint_index in range(int(poses_2d_root.shape[-1] / 2)):
poses_2d_root_c[:, 3 * joint_index] = poses_2d_root[:, 2 * joint_index].copy()
poses_2d_root_c[:, 3 * joint_index + 1] = poses_2d_root[:, 2 * joint_index + 1].copy()
poses_2d_root_c[:, 3 * joint_index + 2] = np.array(confidences)[:, joint_index].copy()
poses_2d_root = poses_2d_root_c
poses_2d_root = np.divide((poses_2d_root - parameters[0].cpu().numpy()), parameters[1].cpu().numpy())
poses_2d_root = np.where(np.isfinite(poses_2d_root), poses_2d_root, 0)
poses_2d_root = torch.from_numpy(np.array(poses_2d_root)).unsqueeze(0).float().to(device)
with torch.no_grad():
pre_bones, pre_rotations, pre_rotations_full, pre_pose_3d, pre_c, pre_proj = model.forward_fk(poses_2d_root, parameters)
if config.arch.translation:
pose_2d_film = (poses_2d[:, :2] - 0.5)
translations = np.ones(shape=(pose_2d_film.shape[0], 3))
translations[:, :2] = pose_2d_film
translation = (translations * np.repeat(pre_proj[0].cpu().numpy(), 3, axis=-1).reshape((-1, 3)))
translation[:] -= translation[[0]]
# The scaling factor can be updated by different case
translation[:, :2] = translation[:, :2]*3
translation[:, 2] = translation[:, 2]*1.5
else:
translation = np.zeros((poses_2d.shape[0], 3))
rotations = pre_rotations_full[0]
length = (pre_bones * parameters[3].unsqueeze(0) + parameters[2].repeat(pre_bones.shape[0], 1, 1))[0].cpu().numpy()
BVH.save('%s/%s.bvh' % (output_folder, video_name), Animation.load_from_network(translation, rotations, length, third_dimension=1), names=name_list)
print('The bvh file of %s has been saved!' % video_name)
if args.input == 'demo':
for folder_name in [0, 2, 3, 4, 5, 6]: # The pretrained wild model required at least 101 frames as input, if you would like to use it in a short video, please train another version with --stage_number 1
export('./data/example/%s' % folder_name)
else:
export(args.input)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='### MotioNet eveluation')
parser.add_argument('-r', '--resume', default='./checkpoints/h36m_gt.pth', type=str,
help='path to checkpoint (default: None)')
parser.add_argument('-d', '--device', default="0", type=str,
help='indices of GPUs to enable (default: all)')
parser.add_argument('-i', '--input', default='h36m', type=str,
help='h36m or demo or [input_folder_path]')
parser.add_argument('-o', '--output', default='./output', type=str,
help='Output folder')
parser.add_argument('-s', '--smooth', default=False, action='store_true',
help='smooth function enabled')
parser.add_argument('--interface', default='openpose', type=str,
help='2D detection interface')
args = parser.parse_args()
if args.device:
os.environ["CUDA_VISIBLE_DEVICES"] = args.device
if args.resume:
config = torch.load(args.resume)['config']
output_folder = util.mkdir_dir('%s/%s' % (args.output, config.trainer.checkpoint_dir.split('/')[-1]))
main(config, args, output_folder)