-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecur-context.h
167 lines (130 loc) · 4.88 KB
/
recur-context.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
/* Copyright 2013 Douglas Bagnall <[email protected]> LGPL */
#ifndef HAVE_RECUR_CONTEXT_H
#define HAVE_RECUR_CONTEXT_H
#define PERIODIC_PGM_DUMP 0
#define TEMPORAL_PGM_DUMP 0
#define NET_LOG_FILE "video.log"
//#define NET_LOG_FILE NULL
#define RECUR_TRAIN 1
#include <gst/gst.h>
#include "recur-common.h"
#include "recur-nn.h"
#include "mfcc.h"
#include <gst/fft/gstfftf32.h>
#include <gst/video/video.h>
#include <gst/audio/audio.h>
#include "pgm_dump.h"
#define RECUR_AUDIO_CHANNELS 1
#define RECUR_AUDIO_RATE 16000
#define RECUR_AUDIO_FORMAT "S16LE"
/*sizeof(S16LE)*/
typedef s16 audio_sample;
#define RECUR_AUDIO_BITS (8 * sizeof(audio_sample))
#define RECUR_N_FFT_BINS 40
#define RECUR_MFCC_MIN_FREQ 20
#define RECUR_MFCC_MAX_FREQ (RECUR_AUDIO_RATE * 0.499)
#define RECUR_MFCC_KNEE_FREQ 700
#define RECUR_MFCC_FOCUS_FREQ 0
#define RECUR_N_MFCCS 15
#define RECUR_N_VIDEO_FEATURES ((RECUR_INPUT_HEIGHT + 2) * (RECUR_INPUT_WIDTH + 2) * 3)
#define RECUR_N_HIDDEN 199
#define RECUR_BPTT_DEPTH 20
#define RECUR_RNG_SEED -1
#define NET_FILENAME "recur-" QUOTE(RECUR_N_HIDDEN) ".net"
#define PERIODIC_SAVE_NET 1
#define TRY_RELOAD 1
#define RECUR_WORKING_WIDTH 96
#define RECUR_WORKING_HEIGHT 72
#define RECUR_INPUT_WIDTH 4
#define RECUR_INPUT_HEIGHT 3
#define RECUR_RESOLUTION_GAIN 2
#define RECUR_OUTPUT_WIDTH (RECUR_INPUT_WIDTH * RECUR_RESOLUTION_GAIN)
#define RECUR_OUTPUT_HEIGHT (RECUR_INPUT_HEIGHT * RECUR_RESOLUTION_GAIN)
#define RECUR_OUTPUT_SIZE (RECUR_OUTPUT_HEIGHT * RECUR_OUTPUT_WIDTH * 3)
#define LEARN_RATE 1e-5
#define PRESYNAPTIC_NOISE 0
#define MOMENTUM 0.95
#define RECUR_FQ_LENGTH 16
#define RECUR_FQ_ADVANCE(f) do{ (f) = RECUR_FQ_NEXT(f);} while(0)
#define RECUR_FQ_PREVIOUS(f) (((f) - 1) & (RECUR_FQ_LENGTH - 1))
#define RECUR_FQ_NEXT(f) (((f) + 1) & (RECUR_FQ_LENGTH - 1))
#define RECUR_N_TRAINERS 12
/*all constructor constants are derived from RECUR_CONSTRUCTOR_DEPTH */
#define RECUR_AREA_GAIN (RECUR_RESOLUTION_GAIN * RECUR_RESOLUTION_GAIN)
#define RECUR_CONSTRUCTOR_DEPTH 5
#define RECUR_CONSTRUCTOR_N_LEAVES (1 << (2 * RECUR_CONSTRUCTOR_DEPTH - 2))
#define RECUR_CONSTRUCTOR_DIMENSION_GAIN (1 << (RECUR_CONSTRUCTOR_DEPTH - 1))
#define RECUR_N_CONSTRUCTORS (RECUR_CONSTRUCTOR_N_LEAVES * RECUR_AREA_GAIN / \
(RECUR_AREA_GAIN -1))
#define RECUR_CONSTRUCTOR_WIDTH (RECUR_OUTPUT_WIDTH * RECUR_CONSTRUCTOR_DIMENSION_GAIN)
#define RECUR_CONSTRUCTOR_HEIGHT (RECUR_OUTPUT_HEIGHT * RECUR_CONSTRUCTOR_DIMENSION_GAIN)
#define RECUR_CONFAB_PLANE_SIZE (RECUR_CONSTRUCTOR_N_LEAVES \
* RECUR_OUTPUT_WIDTH * RECUR_OUTPUT_HEIGHT)
/*recursive depth vs complexity and results.
depth leaves total nets resolution
1 1 1 8 x 6
2 4 5 16 x 12
3 16 21 32 x 24
4 64 85 64 x 48
5 256 341 128 x 96
6 1024 1365 256 x 192
7 4096
leaves = 1 << (2 * depth - 2)
total is (4 ** n - 1) / 3
= (leaf_n * 4 / 3)
next generation approaches 3 * total.
(binary 1/3 is 0.0101010101..)
previous size is floor(size / 4)
*/
typedef struct _RecurTrainer {
RecurNN *net;
int x;
int y;
int scale;
} RecurTrainer;
typedef struct _RecurContext RecurContext;
typedef struct _RecurFrame RecurFrame;
struct _RecurFrame {
u8 Y[RECUR_WORKING_WIDTH * RECUR_WORKING_HEIGHT] __attribute__((aligned (16)));
u8 Cb[RECUR_WORKING_WIDTH * RECUR_WORKING_HEIGHT] __attribute__((aligned (16)));
u8 Cr[RECUR_WORKING_WIDTH * RECUR_WORKING_HEIGHT] __attribute__((aligned (16)));
GstClockTime centre_time;
int pending;
};
struct _RecurContext {
GstVideoInfo video_info;
GQueue audio_queue;
RecurAudioBinner *audio_binner;
RecurNN *net;
RecurNN **training_nets;
RecurFrame *frame_queue;
int current_frame;
int video_lag;
u8 *planes;
RecurNN *constructors[RECUR_N_CONSTRUCTORS];
int fq_tail;
int fq_head;
RecurTrainer trainers[RECUR_N_TRAINERS];
float learn_rate;
float seed[RECUR_N_VIDEO_FEATURES];
float current_audio [RECUR_N_MFCCS];
float audio_volume;
int osdebug;
};
void recur_queue_video_buffer(RecurContext *context, GstBuffer *buffer);
void recur_fill_video_frame(RecurContext *context, GstVideoFrame *frame);
void recur_queue_audio_segment(RecurContext *context, GstBuffer *buffer);
void recur_fill_audio_segment(RecurContext *context, GstBuffer *buffer);
void recur_context_init(RecurContext *context);
void recur_context_finalize(RecurContext *context);
void recur_context_set_video_properties(RecurContext *context, GstVideoInfo *info);
float * recur_train_rnn(RecurContext *context, RecurFrame *src_frame,
RecurFrame *target_frame);
void recur_setup_nets(RecurContext *context, const char *log_file);
void recur_train_nets(RecurContext *context, RecurFrame *src_frame,
RecurFrame *target_frame);
void recur_confabulate(RecurContext *context, u8 *Y, u8 *Cb, u8 *Cr);
void rnn_recursive_construct(RecurContext *context, u8 *Y, u8 *Cb, u8 *Cr,
float *seed);
void possibly_save_state(RecurContext *context);
#endif