-
Notifications
You must be signed in to change notification settings - Fork 1
/
pdf_gen.py
215 lines (163 loc) · 8.29 KB
/
pdf_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import os
import numpy as np
from PIL import Image, ImageFont
from text_gen import get_wrapped_text, create_text_image
def get_parameter_from_path(path, param_prefix):
# Split the path by '/' to get a list of individual components
path_components = path.split('/')
# Find the component that starts with the parameter name followed by '_'
for component in path_components:
if component.startswith(param_prefix):
# Split the component by '_' and return the second element (the value)
return component.replace(param_prefix, '')
# If the parameter was not found, return None
return None
def load_files(paths, row_param, col_param):
# Get first two letters f row and col params
row_param_prefix = row_param[:2] + '_'
col_param_prefix = col_param[:2] + '_'
# Create an empty dictionary to store the sections and subsections
rows = {}
# Iterate through each path in the list
for path in paths:
# Extract the values for row, and column parameters
row = get_parameter_from_path(path, row_param_prefix)
column = get_parameter_from_path(path, col_param_prefix)
# If the section doesn't already exist in the dictionary, add it
if row not in rows:
rows[row] = {}
# Add the path to the appropriate row and column in the subsection
rows[row][column] = path
return rows
def get_all_images_in_subtree(root_dir):
images = []
for root, dirs, files in os.walk(root_dir):
for file in files:
if file.endswith('.png') or file.endswith('.jpg'):
images.append(os.path.join(root, file))
return images
def extract_keys_from_nested_dict(d, n):
if n == 0:
return d.keys()
else:
keys = []
for key, value in d.items():
if isinstance(value, dict):
keys.extend(extract_keys_from_nested_dict(value, n - 1))
return keys
def horizontal_concat_PIL_images(images):
return Image.fromarray(horizontal_concat_images([np.array(image) for image in images]))
def horizontal_concat_images(images):
return np.concatenate(images, axis=1)
def vertical_concat_PIL_images(images):
return Image.fromarray(vertical_concat_images([np.array(image) for image in images]))
def vertical_concat_images(images):
return np.concatenate(images, axis=0)
def create_hidden_param_row(hidden_params_string, width, height):
height = get_col_header_height(hidden_params_string, width, height,
font=ImageFont.truetype("/content/compare_diffusion/Monaco.ttf", 50))
return create_text_image(hidden_params_string, width, height, x_justify=0.01, y_justify=0.01, wrap=True)
def create_cols_title(title, width, height):
return create_text_image(title, width, height, x_justify=0.6, y_justify=0.5,
font=ImageFont.truetype("/content/compare_diffusion/Monaco.ttf", 50))
def create_rows_title(title, width, height):
return create_text_image(title, height, width, x_justify=0.3, y_justify=0.5,
font=ImageFont.truetype("/content/compare_diffusion/Monaco.ttf", 50)).rotate(90,
expand=True)
def create_cols_axis(col_headers, width, height, font, indent_width):
images = [create_text_image(col_header, width, height, font=font,
wrap=True, x_justify=0, y_justify=0) for col_header in col_headers]
images.insert(0, create_blank_image(indent_width, height))
return horizontal_concat_PIL_images(images)
def create_row_from_paths(paths, row_header, width, height, font):
# image_width, height, fontsize, wrap
images = get_PIL_images_from_paths(paths)
text_image = create_text_image(row_header, width, height,
font=font, wrap=True, y_justify=0.01)
images.insert(0, text_image)
pil_images = horizontal_concat_PIL_images(images)
return pil_images
def create_blank_image(width, height):
return Image.new('RGB', (width, height), color=(255, 255, 255))
def get_PIL_images_from_paths(image_paths):
return [Image.open(path).convert('RGB') for path in image_paths]
def filter_paths_by_names(paths, names):
return [path for path in paths if any(name in path for name in names)]
def get_row_header_width(longest_row_header, image_height, image_width, font):
longest_row_header = longest_row_header.replace('-', ' ').replace('_', ' ')
longest_word = max(longest_row_header.split(), key=len)
longest_word_width = font.getsize(longest_word)[0]
scale = 0.1
found_width = False
width = int(image_width * scale)
while not found_width:
wrapped_text = get_wrapped_text(longest_row_header, font, width)
num_lines = wrapped_text.count('\n') + 1
height = num_lines * font.getsize(longest_row_header)[1]
if height < image_height and longest_word_width < width:
found_width = True
else:
scale += 0.1
width = int(image_width * scale)
return width
def get_col_header_height(longest_col_header, image_width, image_height, font):
print('Image height: ', image_height)
num_lines = get_wrapped_text(longest_col_header, font, image_width).count('\n') + 1
height = num_lines * font.getsize(longest_col_header)[1]
return height
def generate_pdf(col_param, row_param, width, height, hidden_params, rows_per_page=10,
generated_images_path='output'):
font = ImageFont.truetype("/content/compare_diffusion/Monaco.ttf", 43)
image_paths = get_all_images_in_subtree(generated_images_path)
print('Image paths: ', image_paths)
files = load_files(image_paths, row_param, col_param)
print('Files: ', files)
print('Preparing headers...')
col_headers, row_headers = get_headers(files)
col_header_height = get_col_header_height(str(max(col_headers, key=len)), width, height, font)
row_header_width = get_row_header_width(str(max(row_headers, key=len)), height, width, font)
column_header_row = create_cols_axis(col_headers, width, col_header_height, font, row_header_width)
cols_title = create_cols_title(col_param, column_header_row.width, int(height / 3))
page_list = []
page_rows = [cols_title, column_header_row]
num_header_rows = len(page_rows)
print('Preparing images...')
for row_header in row_headers:
row = files[row_header]
row_paths = []
for col_header in col_headers:
row_paths.append(row[col_header])
image_row = create_row_from_paths(row_paths, row_header, row_header_width, height, font)
page_rows.append(image_row)
if len(page_rows) % rows_per_page == 0:
page = Image.fromarray(vertical_concat_images(page_rows))
page_list.append(page)
page_rows = [cols_title, column_header_row]
if len(page_rows) > num_header_rows:
page = Image.fromarray(vertical_concat_images(page_rows))
page_list.append(page)
final_pages = []
for idx, page in enumerate(page_list):
page_width, page_height = page.size
rows_title = create_rows_title(row_param, int(width / 3), page_height)
page = Image.fromarray(horizontal_concat_images([rows_title, page]))
page_width = page.width
hidden_param_string = get_hidden_params_string(hidden_params)
hidden_param_row = create_hidden_param_row(hidden_param_string, page_width, height)
page = vertical_concat_images([hidden_param_row, page])
final_pages.append(Image.fromarray(page))
print('Saving pdf...')
if len(final_pages) > 1:
final_pages[0].save('output.pdf', save_all=True, append_images=final_pages[1:], optimize=True)
else:
final_pages[0].save('output.pdf', optimize=True)
def get_headers(files):
row_headers = sorted(list(set(extract_keys_from_nested_dict(files, 0))))
col_headers = sorted(list(set(extract_keys_from_nested_dict(files, 1))))
return col_headers, row_headers
def get_hidden_params_string(hidden_params):
hidden_param_string = 'Hidden Params:: \n'
for hidden_param in hidden_params.keys():
hidden_param_string += hidden_param + ': ' + str(hidden_params[hidden_param]) + ', '
hidden_param_string = hidden_param_string[:-2]
return hidden_param_string