-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdefault.yaml
265 lines (256 loc) · 13.5 KB
/
default.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
# Dimensions of the final video
width: 1280
height: 768
# Frame rate of the final video
frames_per_second: 18
# When set, the runner will only simulate running all frames and output the evaluated functions for inspecting ahead of actually running the scenario or for debugging.
# simulate_output: G:\shit\latent\out\vanishing.csv
output_path: generated/vanishing
# A directory for intermediary results, and misc. other models that are required
persistence_dir: persistence/
# Torch device to use
torch_device: cuda
scenes:
- # The prompt to generate images from
prompt: a beautiful abstract portrait painting of a person made of (fog) disappearing, black and white
# How long this prompt should last at the frame rate specified above
duration: 10s
# Interpolation between the previous and this scene. does not add to the duration of the scene.
interpolation: 0s
# Which mechanism to use to generate frames. Must be defined with parameters and type below
mechanism: api
# Optionally, some parameters of a mechanism may be modified at the scene-level. For example the number of steps to sample for in a diffusion model.
mechanism_parameters:
# You can also reference notes in MIDI inputs by their name. See midi_notes.py in this repo for a full list of note names.
strength_schedule: min(1.0, 0.63-(note_playing(notes, "C2")*0.23)+(is_turbo_step*0.14)) #(sin(6*t)**20)*0.2
# no anim initially
animation_parameters:
translation_x: 0
translation_y: 0
translation_z: 0
# depth model parameters
near_plane: 200
far_plane: 10000
fov: 40
sampling_mode: bicubic
padding_mode: border
- # The prompt to generate images from
prompt: a beautiful pencil painting of a lush (forest) with (intricate branches) on a foggy morning in the mountains, with a reflective pond in the background, fog in the background vaguely resembling a face, very intricate, geometric shapes, (black and white), large field of view, (3d), vast landscape, by M.C. Escher, [[vertical lines]]
# How long this prompt should last at the frame rate specified above
duration: 6s
# Interpolation between the previous and this scene. does not add to the duration of the scene.
interpolation: 3s
# Which mechanism to use to generate frames. Must be defined with parameters and type below
mechanism: api
# Optionally, some parameters of a mechanism may be modified at the scene-level. For example the number of steps to sample for in a diffusion model.
mechanism_parameters:
# Overrides the mechanism parameters below
# note_playing is a function that returns a 0 or 1 depending on whether a note is currently playing in the midi input.
# Used to induce "motion" by modulating the denoising strength here
strength_schedule: (min(1.0, 0.64-(min(1, note_playing(notes, 36))*0.24)+(is_turbo_step*0.1))) #(sin(6*t)**20)*0.2
scale: 9
# more abrupt interpolation at the end so we don't have a high denoising strength for too long
# https://www.wolframalpha.com/input?i=x**0.9+from+0+to+1
interpolation_function: (x**0.9)
turbo_steps: 2
steps: 40
animation_parameters:
translation_x: 0
translation_y: 0
translation_z: 1+note_playing(notes, 36)*1
# TODO: these shouldn't be necessary
# depth model parameters
near_plane: 200
far_plane: 10000
fov: 40
sampling_mode: bicubic
padding_mode: border
- # The prompt to generate images from
prompt: a beautiful scenic painting of a vast forest with a river on a sunny day with the blue sky above, intricate, [[[by M.C. Escher]]], (wide field of view)
# How long this prompt should last at the frame rate specified above
duration: 12s
# Interpolation between the previous and this scene. does not add to the duration of the scene.
interpolation: 1500ms
# Which mechanism to use to generate frames. Must be defined with parameters and type below
mechanism: api
# Optionally, some parameters of a mechanism may be modified at the scene-level. For example the number of steps to sample for in a diffusion model.
mechanism_parameters:
# Overrides the mechanism parameters below
strength_schedule: (min(1.0, 0.65-(note_playing(notes, 36)*0.26)+(is_turbo_step*0.16))) #(sin(6*t)**20)*0.2
# more abrupt interpolation at the end
interpolation_function: x**1.1
animation_parameters:
translation_x: (note_playing(notes, 36)*6)+sin(0.4*t)
translation_y: 0
translation_z: 2.4+note_playing(notes, 36)*2
# depth model parameters
near_plane: 200
far_plane: 10000
fov: 40
sampling_mode: bicubic
padding_mode: border
- # Here we use a template to replace percentages in the prompt to shift attention during the scene. this modulates the prompt throughout the scene to give more variety.
prompt: a beautiful abstract portrait painting of a [person:{{round(1-scene_progress, 1)}}|cloud:{{round(scene_progress*2, 1)}}] made of (fog:{{round(scene_progress*2, 1)}}), [black and white], ((visionary art))
# How long this prompt should last at the frame rate specified above
duration: 7s
# Interpolation between the previous and this scene. does not add to the duration of the scene.
interpolation: 1500ms
# Which mechanism to use to generate frames. Must be defined with parameters and type below
mechanism: api
# Optionally, some parameters of a mechanism may be modified at the scene-level. For example the number of steps to sample for in a diffusion model.
mechanism_parameters:
# Overrides the mechanism parameters below
strength_schedule: (min(1.0, 0.57-(max(1, note_playing(notes, 36)+note_playing(notes, 59))*0.26)+(is_turbo_step*0.12))) #(sin(6*t)**20)*0.2
# more abrupt interpolation at the end
interpolation_function: x**1.8
# Interpolate towards or start the scene with this frame
init_frame: G:\shit\latent\in\person_made_fog.png
animation_parameters:
translation_x: 0
translation_y: 0
translation_z: 1
# depth model parameters
near_plane: 200
far_plane: 10000
fov: 40
sampling_mode: bicubic
padding_mode: border
- # The prompt to generate images from
prompt: a beautiful scenic painting of a vast valley with a forest and a river on a sunny day with the blue sky above, intricate, [[[by M.C. Escher]]], (wide field of view)
# How long this prompt should last at the frame rate specified above
duration: 12s
# Interpolation between the previous and this scene. does not add to the duration of the scene.
interpolation: 2000ms
# Which mechanism to use to generate frames. Must be defined with parameters and type below
mechanism: api
# Optionally, some parameters of a mechanism may be modified at the scene-level. For example the number of steps to sample for in a diffusion model.
mechanism_parameters:
# Overrides the mechanism parameters below
strength_schedule: (min(1.0, 0.62-(note_playing(notes, 36)*0.26)+(is_turbo_step*0.16))) #(sin(6*t)**20)*0.2
# more abrupt interpolation at the end
interpolation_function: x
animation_parameters:
translation_x: (note_playing(notes, 36)*6)+sin(0.4*t)
translation_y: 0
translation_z: 2.4+note_playing(notes, 36)*2
# depth model parameters
near_plane: 200
far_plane: 10000
fov: 40
sampling_mode: bicubic
padding_mode: border
- # The prompt to generate images from.
# This example shows prompt modulation based on a MIDI input. When a C#2 note plays in the clip, the prompt gets additional tokens added to it.
# Also shows tokenizing a complex, templated prompt into multiple lines using yaml multi-line string syntax.
prompt: |
a beautiful
{ % if note_playing(notes_currently_on, "C#2") % }
(abstract polygon geometric:1.4)
{% endif %}
painting
of a vast valley with a forest and a river on a sunny day with the blue sky above, intricate, [[[by M.C. Escher]]], (wide field of view)
# How long this prompt should last at the frame rate specified above
duration: 12s
# Interpolation between the previous and this scene. does not add to the duration of the scene.
interpolation: 2000ms
# Which mechanism to use to generate frames. Must be defined with parameters and type below
mechanism: api
# Optionally, some parameters of a mechanism may be modified at the scene-level. For example the number of steps to sample for in a diffusion model.
mechanism_parameters:
# Overrides the mechanism parameters below
strength_schedule: (min(1.0, 0.62-(note_playing(notes, 36)*0.26)+(is_turbo_step*0.16))) #(sin(6*t)**20)*0.2
# more abrupt interpolation at the end
interpolation_function: x
animation_parameters:
translation_x: (note_playing(notes, 36)*6)+sin(0.4*t)
translation_y: 0
translation_z: 2.4+note_playing(notes, 36)*2
# depth model parameters
near_plane: 200
far_plane: 10000
fov: 40
sampling_mode: bicubic
padding_mode: border
mechanisms:
- # Name of this parameter set within the config. Can be referenced in scenes
name: api
# Name of the mechanism to use
type: api
# Arbitrary parameters the text2video mechanism can take. These are always implementation specific
mechanism_parameters:
host: http://localhost:7860
seed: 2812101937
# How many turbo steps (lower amount of sampling steps) to run in-between "full sized" frames
turbo_steps: 4
# How many sampling steps to run for a turbo frame
turbo_sampling_steps: 30
# Noise schedule (how much noise to add to in-between frames), can be a function or static
noise_schedule: 0.02+note_playing(notes, 36)*0.05
steps: 35
# Euler a, LMS, ...
sampler: Euler a
# CFG scale, can be a function
scale: 9.5
# Reduce contrast of individual frames before passing them to img2img again.
# This helps keep the color spectrum more dynamic as stable diff (specifically for higher CFG scale values) tends to oversaturate the images over time
contrast_schedule: 0.982
# This is the inverse of the denoising strength. 1 = keep original frame, 0 = denoising strength 1.0, maintain as little as possible
strength_schedule: (min(1.0, 0.76-(note_playing(notes, 36)*0.5)+(is_turbo_step*0.12))) #(sin(6*t)**20)*0.2
# Type of animation to apply between frames, if any
animation: 3D
# Parameters for the animation
animation_parameters:
translation_x: (note_playing(notes, 36)*6)+sin(0.25*t)
translation_y: 0
translation_z: 1.4+note_playing(notes, 36)*6
# depth model parameters
near_plane: 200
far_plane: 10000
fov: 40
sampling_mode: bicubic
padding_mode: border
# Other multi-modal context
additional_context:
custom_functions:
- # The name under which the results of the function can be used in other functions
variable_name: camera_movements
function: |
bell_curve(2.5, 8.5, t, 4) +
0
# Store this many previous values of this function in the function's context. Can be used via the `prev` value in the function.
prev_values: 0
input_mechanisms:
# Midi input mechanism: Reads a given midi file's note_on events
# NOTE: currently only a single track midi file is properly supported
- type: midi
mechanism_parameters:
file: G:\shit\latent\out\vanishing_drums.mid
# Offset the midi as it's read. Useful for animating a certain portion of a midi clip
offset: 50.5
# override/add an initial set_tempo event at the start of the midi file if necessary (in bpm)
tempo: 60
# Optional prefix for the variable names added by this mechanism. Allows using multiple MIDI inputs
prefix: ""
# Optional track name
# If the midi file has named tracks, select the track with the given name from the file.
# If not given, all tracks will be used when a multi-track file is used
track: ""
# Spectral input mechanism: Analyzes an audio file using bandpass filters,
# passes a variable into the functions in the `0..1` range that indicates the amplitude of the signal within the filter.
# Normalized to cover the full range.
- type: "spectral"
mechanism_parameters:
file: E:\Downloads\05 - Vanishing.flac
offset: 50 # Start analyzing the file at 50s, for generating animations at an offset
filters:
- variable_name: beat # The name of the variable we will use in the animation function
f_center: 54 # Central frequency the filter will respond to (54Hz)
f_width: 40 # The filter will pass signals from 34Hz to 74Hz
order: 7 # The filter slope will be 7*6=42dB per octave
# Beat detection mechanism using librosa. A rudimentary input mechanism that simply takes an audio file and runs beat detection on it.
# The functions get an additional "is_beat" variable that is either 0 or 1 for inducing motion.
# This mechanism can be inaccurate, it's advised to run a simulation (see above) first or at least compare the tempo detected by librosa.
# - type: "beats-librosa"
# mechanism_parameters:
# file: E:\Downloads\05 - Vanishing.flac
# offset: 50 # Start analyzing the file at 50s, for generating animations at an offset