-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathgallary_video.py
263 lines (215 loc) · 8.97 KB
/
gallary_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
'''
script to produce gallary video in the E3DGE demo video.
To use it in your code, simply modify
gt_root: the image ground truth.
video_trajectory_root: texture video inversion output path.
geo_video_trajectory_root: geometry video inversion output path.
video_output_path: the final gallary output path.
'''
import imageio
import traceback
from PIL import Image
from pathlib import Path
import cv2
import skvideo.io
import numpy as np
from ipdb import set_trace as st
import random
#read videos
tex_videos = []
geo_videos = []
# random.shuffle(tex_videos)
# random.shuffle(geo_videos) # shall be paired
# properties
vid_size = (1024, 1024)
speed = 5
video_length = 18 # seconds #
input_video_frames = 250 // speed
fps = 60
res_h, res_w = 1080, 1920
candidate_rows = 13 # vertical videos concatenated.
right_res = 90 # 12 videos, minimum size
mini_ratio = right_res / res_h
total_frames = fps * video_length #
gt_root = Path('datasets/test_img')
toonify=False # whether this is toonified video. Different save format, default=False
# toonify = True
if toonify:
# ! Toonify settings
video_trajectory_root = Path(
'logs/toonify/no_local_branch_N48/Toonify400_1024x1024/val/videos/')
geo_video_trajectory_root = video_trajectory_root
video_output_path = 'logs/final_organized/demo-video/gallary_toonify.mp4'
toonify_ids = [322, 360, 445, 540, 591, 597, 327, 438, 524,
532] # * for debug
random_ids = list(range(300))
random.shuffle(random_ids)
video_ids_for_iter = toonify_ids + toonify_ids + random_ids
video_ids = iter(video_ids_for_iter)
else:
# ! celeba hq settings
video_trajectory_root = Path(
'logs/final_organized/demo-video/output_video_V9_fixbug/ffhq1024x1024/val/videos/'
)
geo_video_trajectory_root = Path(
'logs/final_organized/demo-video/output_video/ffhq1024x1024/val/videos/'
)
video_output_path = 'logs/final_organized/demo-video/gallary_real.mp4'
csv_file_path = Path('logs/final_organized/demo-video/200.csv') # 600 ids
with open(csv_file_path, 'r') as f:
content = f.readlines()[:]
video_ids = [content[i].split(',')[0] for i in range(len(content))]
random.shuffle(video_ids)
pre_defined_ids = '529 838 647 485 726 800'.split(
) + '220 463 568 706 366 184'.split()
pre_defined_ids = pre_defined_ids + pre_defined_ids + pre_defined_ids
# random.shuffle(pre_defined_ids)
video_ids_for_iter = pre_defined_ids + video_ids
video_ids_for_iter.remove('560')
video_ids_for_iter.remove('1')
video_ids = iter(video_ids_for_iter)
def create_writer(video_output_path, crf=18):
writer = skvideo.io.FFmpegWriter(
video_output_path,
outputdict={
'-pix_fmt': 'yuv420p',
'-crf': str(crf) # 11
})
return writer
def mix_tex_geo_vid(tex_vid, geo_vid):
_, h, w, _ = tex_vid.shape
_, geo_h, geo_w, _ = geo_vid.shape
# vid_w = tex_vid.shape[-2]
# st()
if geo_h != h:
# resize geo videos
geo_vid = np.stack(
[cv2.resize(geo_vid[i], (w, h)) for i in range(geo_vid.shape[0])])
tex_vid[:, :, -w // 2:, :] = geo_vid[:, :, w // 2:, :]
return tex_vid # ! debug
def read_video(path, size, speed=1):
video = np.stack([
cv2.resize(frame[:, -frame.shape[0]:, :], size)
for i, frame in enumerate(imageio.get_reader(path, 'ffmpeg'))
if i % speed == 0
], 0)
return video
def sample_videos(concat_h=True):
def sample_single_vid(return_stem=False):
# img_stem = video_ids[random.randint(0, len(video_ids) - 1)]
# st()
global video_ids, video_ids_for_iter
try:
img_stem = next(video_ids)
except StopIteration as e:
video_ids = iter(video_ids_for_iter)
img_stem = next(video_ids)
try:
tex_vid = read_video(
str(video_trajectory_root / f'{img_stem}.jpg' /
'sample_video__elipsoid.mp4'), vid_size, speed)
geo_vid = read_video(
str(geo_video_trajectory_root / f'{img_stem}.jpg' /
'sample_depth_video__elipsoid.mp4'), vid_size, speed)
video = mix_tex_geo_vid(tex_vid, geo_vid)
if return_stem:
return video, img_stem
else:
return video
except Exception as e:
traceback.print_exc()
print(img_stem)
return sample_single_vid(return_stem)
# return None
# return sample_single_vid()
def sample_quater_video():
# return 2*2 video grid to show gt image
def concat_img_video():
video, img_stem = sample_single_vid(True)
print('stem: {}'.format(img_stem))
gt_img = np.array(Image.open(gt_root / f'{img_stem}.jpg'))
gt_img = cv2.resize(gt_img, (video.shape[2], video.shape[1]))
video = np.stack(
[np.concatenate([gt_img, frame], 1) for frame in video], 0)
return video
return np.concatenate([concat_img_video() for _ in range(2)], 1)
# sample_video_fn = sample_single_vid
sample_video_fn = sample_quater_video
# videos = [sample_single_vid() for _ in range(candidate_rows)]
videos = [sample_video_fn() for _ in range(candidate_rows)]
if concat_h:
videos = np.concatenate(videos, 1) # F H*15 W C
return videos
def get_current_res(anchor_u_l):
"""
anchor_w: w (u) coordinate of the left most largest frame, left u.
"""
if anchor_u_l <= 0:
# return (res_h, res_h + anchor_u_l) # clip
# return (res_h, res_h + anchor_u_l) # clip
return (res_h, res_h) # clip
else:
current_res = res_h * (mini_ratio + (res_w - anchor_u_l) / res_w *
(1 - mini_ratio)) # right_res -> res_h
current_res = int(current_res)
return (current_res, current_res)
# current_res = int(min(current_res, res_h)) # smaller than res_h
def composite_videos():
# loop_num = int(500 / 5 * candidate_rows)
frames = []
videos_stack = [sample_videos() for _ in range(5)] # init stack
while (len(frames) < total_frames):
# anchor_w = res_h # right frame position of the left most biggest frame
for anchor_w in range(0, res_h, 8): # 4 * 250 \approx 1080
anchor_w *= -1 # left most staring position
frame = np.zeros((res_h, res_w, 3)).astype(np.uint8)
current_anchor_w = anchor_w
# fill in a frame
for video_idx in range(5):
current_res_h_single, current_res_w = get_current_res(
current_anchor_w) # resolution of single frame
current_res_h = current_res_h_single * candidate_rows # * shall be int
video_frame = videos_stack[video_idx][
len(frames) % input_video_frames] # loop frames
video_frame = cv2.resize(
video_frame,
(current_res_w, current_res_h)) # (w, h) order
# * clip w (left most)
if current_anchor_w <= 0: # left most clip w
video_frame = video_frame[:, -(res_h + current_anchor_w):,
...]
else:
# * clip w, right most
video_frame = video_frame[:, 0:min(
res_w - current_anchor_w, current_res_h_single
)] # min of available right margin and h_single
# * clip h, split from the middle. total size = res_h
lower_clip_idx = int(current_res_h / 2 - res_h / 2)
upper_clip_idx = lower_clip_idx + res_h
video_frame = video_frame[lower_clip_idx:upper_clip_idx, ...]
try:
# frame[lower_clip_idx:upper_clip_idx, max(0, current_anchor_w):min(res_w, current_anchor_w+current_res_h_single)] = video_frame
frame[:,
max(0, current_anchor_w
):min(res_w, current_anchor_w +
current_res_h_single)] = video_frame
except Exception as exception:
traceback.print_exc()
st()
current_anchor_w += current_res_h_single
if (current_anchor_w >= res_w):
break
# print('current_anchor_w: {}'.format(current_anchor_w))
frames.append(frame)
print('frames: {}'.format(len(frames)))
if (len(frames) > total_frames):
break
# frame_idx = (frame_idx+1) / input_video_frames
videos_stack.pop(0) # remove head video
videos_stack.append(sample_videos())
writer = create_writer(video_output_path, 18) # 1024 output
for frame in frames:
writer.writeFrame(frame)
writer.close()
print('output video to :{}'.format(video_output_path))
composite_videos()