From c2395ffa8d32e0a0447c4ba534292015b282850a Mon Sep 17 00:00:00 2001 From: Michael Chinen Date: Thu, 29 Sep 2022 23:20:45 -0700 Subject: [PATCH] V2 (#89) V2 (1.2.0) --- .bazelrc | 96 +- .github/actions/lyra-builder/action.yml | 2 +- .../{setup-ndk => setup-lyra-deps}/action.yml | 6 +- .github/workflows/ci.yml | 32 +- BUILD | 1021 ++---- README.md | 262 +- WORKSPACE | 100 +- android_configure.bzl | 40 + android_example/AndroidManifest.xml | 1 + android_example/BUILD | 31 +- .../java/com/example/android/lyra/BUILD | 6 + .../example/android/lyra/MainActivity.java | 84 +- ...ecode_lib.cc => jni_lyra_benchmark_lib.cc} | 28 +- android_example/res/layout/activity_main.xml | 45 +- android_example/res/values/strings.xml | 2 + benchmark_decode_lib.cc | 187 -- benchmark_decode_lib.h | 52 - buffer_merger.cc | 123 - buffer_merger.h | 75 - buffer_merger_test.cc | 329 -- ...interface.h => buffered_filter_interface.h | 29 +- buffered_resampler.cc | 150 + buffered_resampler.h | 81 + buffered_resampler_test.cc | 246 ++ causal_convolutional_conditioning.h | 480 --- causal_convolutional_conditioning_test.cc | 430 --- comfort_noise_generator.cc | 137 +- comfort_noise_generator.h | 29 +- comfort_noise_generator_test.cc | 119 +- conv1d_layer_wrapper.h | 177 - conv1d_layer_wrapper_test.cc | 319 -- decoder_main.cc | 35 +- decoder_main_lib.cc | 143 +- decoder_main_lib.h | 29 +- decoder_main_lib_test.cc | 139 +- dilated_convolutional_layer_wrapper.h | 268 -- dilated_convolutional_layer_wrapper_test.cc | 362 --- dsp_util.cc | 80 - dsp_util.h | 151 - dsp_util_test.cc | 183 -- dsp_utils.cc | 44 + dsp_utils.h | 125 + dsp_utils_test.cc | 130 + encoder_main.cc | 16 +- encoder_main_lib.cc | 31 +- encoder_main_lib.h | 8 +- encoder_main_lib_test.cc | 12 +- exported_layers_test.h | 110 - ...enoiser.h => feature_estimator_interface.h | 21 +- feature_extractor_interface.h | 4 +- filter_banks.cc | 139 - filter_banks.h | 72 - filter_banks_test.cc | 185 -- fixed_packet_loss_model.cc | 58 + fixed_packet_loss_model.h | 48 + fixed_packet_loss_model_test.cc | 57 + generative_model_interface.h | 112 +- gilbert_model.cc | 2 +- gilbert_model.h | 6 +- layer_wrapper.h | 226 -- layer_wrapper_interface.h | 114 - layer_wrapper_test_common.h | 161 - layer_wrappers_lib.h | 27 - log_mel_spectrogram_extractor_impl.cc | 23 +- log_mel_spectrogram_extractor_impl.h | 14 +- ...el_spectrogram_extractor_impl_benchmark.cc | 33 +- log_mel_spectrogram_extractor_impl_test.cc | 37 +- benchmark_decode.cc => lyra_benchmark.cc | 28 +- lyra_benchmark_lib.cc | 300 ++ ...ock_filter_banks.h => lyra_benchmark_lib.h | 28 +- lyra_components.cc | 54 +- lyra_components.h | 26 +- lyra_config.cc | 153 +- lyra_config.h | 135 +- lyra_config_test.cc | 138 +- lyra_decoder.cc | 533 +-- lyra_decoder.h | 157 +- lyra_decoder_interface.h | 16 +- lyra_decoder_test.cc | 1350 ++++---- lyra_encoder.cc | 216 +- lyra_encoder.h | 32 +- lyra_encoder_interface.h | 6 +- lyra_encoder_test.cc | 436 +-- lyra_gan_model.cc | 70 + lyra_gan_model.h | 55 + lyra_gan_model_test.cc | 79 + lyra_integration_test.cc | 179 +- lyra_types.h | 284 -- lyra_wavegru.h | 385 --- lyra_wavegru_test.cc | 129 - model_coeffs/lyra_config.binarypb | 1 + model_coeffs/lyragan.tflite | Bin 0 -> 2759268 bytes model_coeffs/quantizer.tflite | Bin 0 -> 518396 bytes model_coeffs/soundstream_encoder.tflite | Bin 0 -> 2748716 bytes {wavegru => model_coeffs}/test_playback.wav | Bin naive_spectrogram_predictor.cc | 37 - naive_spectrogram_predictor.h | 50 - naive_spectrogram_predictor_test.cc | 81 - no_op_preprocessor_test.cc | 3 + noise_estimator.cc | 213 +- noise_estimator.h | 75 +- noise_estimator_interface.h | 10 +- noise_estimator_test.cc | 211 +- packet.h | 95 +- packet_interface.h | 4 +- packet_loss_handler.cc | 93 - packet_loss_handler.h | 74 - packet_loss_handler_interface.h | 45 - packet_loss_handler_test.cc | 289 -- ...edictor.h => packet_loss_model_interface.h | 21 +- packet_test.cc | 130 +- project_and_sample.h | 304 -- project_and_sample_test.cc | 255 -- quadrature_mirror_filter.cc | 126 - quadrature_mirror_filter.h | 90 - quadrature_mirror_filter_test.cc | 200 -- resampler.cc | 46 +- resampler.h | 19 +- resampler_interface.h | 6 + resampler_test.cc | 33 +- residual_vector_quantizer.cc | 170 + residual_vector_quantizer.h | 68 + residual_vector_quantizer_test.cc | 118 + soundstream_encoder.cc | 71 + soundstream_encoder.h | 57 + soundstream_encoder_test.cc | 61 + sparse_matmul/BUILD | 21 - sparse_matmul/compute/BUILD | 88 - sparse_matmul/compute/ar_inputs.h | 37 - sparse_matmul/compute/gru_gates.h | 214 -- sparse_matmul/compute/gru_gates_arm.h | 288 -- sparse_matmul/compute/gru_gates_avx_fixed.h | 348 -- sparse_matmul/compute/gru_gates_generic.h | 97 - sparse_matmul/compute/gru_gates_test.cc | 164 - sparse_matmul/compute/kernels_arm.h | 2886 ----------------- sparse_matmul/compute/kernels_avx.h | 601 ---- sparse_matmul/compute/kernels_generic.h | 273 -- sparse_matmul/compute/matmul.h | 199 -- sparse_matmul/compute/matmul_fixed_avx2.cc | 232 -- sparse_matmul/compute/matmul_fixed_avx2.h | 49 - sparse_matmul/compute/matmul_generic.cc | 122 - sparse_matmul/compute/matmul_generic.h | 41 - sparse_matmul/compute/thread_bounds.cc | 106 - sparse_matmul/compute/thread_bounds.h | 74 - sparse_matmul/layers/BUILD | 146 - sparse_matmul/layers/csr_blocksparse_matrix.h | 835 ----- sparse_matmul/layers/csrblocksparse_test.cc | 977 ------ sparse_matmul/layers/errno_mapping.cc | 195 -- sparse_matmul/layers/errno_mapping.h | 29 - sparse_matmul/layers/masked_sparse_matrix.h | 206 -- sparse_matmul/layers/read_array_ifstream.h | 66 - sparse_matmul/layers/sparse_linear_layer.h | 365 --- .../layers/sparse_linear_layer_test.cc | 187 -- sparse_matmul/layers/status_macros.h | 34 - .../768_512_95_4x4_QRhat_weights.raw.gz | Bin 20852 -> 0 bytes .../768_512_95_4x4_What_weights.raw.gz | Bin 5133 -> 0 bytes .../768_512_95_4x4_coarselogit_bias.raw.gz | Bin 1062 -> 0 bytes .../768_512_95_4x4_coarselogit_mask.raw.gz | Bin 2382 -> 0 bytes .../768_512_95_4x4_coarselogit_weights.raw.gz | Bin 55829 -> 0 bytes .../768_512_95_4x4_coarseproj_bias.raw.gz | Bin 2003 -> 0 bytes .../768_512_95_4x4_coarseproj_mask.raw.gz | Bin 4684 -> 0 bytes .../768_512_95_4x4_coarseproj_weights.raw.gz | Bin 113777 -> 0 bytes .../768_512_95_4x4_finelogit_bias.raw.gz | Bin 1055 -> 0 bytes .../768_512_95_4x4_finelogit_mask.raw.gz | Bin 2322 -> 0 bytes .../768_512_95_4x4_finelogit_weights.raw.gz | Bin 51615 -> 0 bytes .../768_512_95_4x4_fineproj_bias.raw.gz | Bin 2001 -> 0 bytes .../768_512_95_4x4_fineproj_mask.raw.gz | Bin 4459 -> 0 bytes .../768_512_95_4x4_fineproj_weights.raw.gz | Bin 111636 -> 0 bytes .../768_512_95_4x4_wavernn_gru_bias.raw.gz | Bin 10706 -> 0 bytes .../768_512_95_4x4_wavernn_gru_mask.raw.gz | Bin 50978 -> 0 bytes .../768_512_95_4x4_wavernn_gru_weights.raw.gz | Bin 1361746 -> 0 bytes .../layers/testdata/lyra_conv1d_bias.raw.gz | Bin 1980 -> 0 bytes .../layers/testdata/lyra_conv1d_mask.raw.gz | Bin 953 -> 0 bytes .../testdata/lyra_conv1d_weights.raw.gz | Bin 858640 -> 0 bytes sparse_matmul/layers/utils.cc | 129 - sparse_matmul/layers/utils.h | 338 -- sparse_matmul/layers/utils_test.cc | 185 -- sparse_matmul/numerics/BUILD | 160 - .../numerics/fast_transcendentals.cc | 81 - sparse_matmul/numerics/fast_transcendentals.h | 1177 ------- .../numerics/fasttranscendentals_test.cc | 665 ---- sparse_matmul/numerics/fixed_types.h | 139 - sparse_matmul/numerics/fixed_types_test.cc | 43 - sparse_matmul/numerics/float16_types.h | 149 - sparse_matmul/numerics/test_utils.h | 75 - sparse_matmul/numerics/type_utils.h | 89 - sparse_matmul/os/BUILD | 26 - sparse_matmul/os/coop_threads.cc | 63 - sparse_matmul/os/coop_threads.h | 179 - sparse_matmul/os/coop_threads_test.cc | 134 - sparse_matmul/sparse_matmul.h | 34 - sparse_matmul/vector/BUILD | 63 - sparse_matmul/vector/aligned_malloc.cc | 46 - sparse_matmul/vector/aligned_malloc.h | 32 - sparse_matmul/vector/cache_aligned_vector.h | 1117 ------- .../vector/cachealignedvector_benchmark.cc | 60 - .../vector/cachealignedvector_test.cc | 405 --- sparse_matmul/zlib_wrapper/BUILD | 20 - sparse_matmul/zlib_wrapper/gzipheader.cc | 190 -- sparse_matmul/zlib_wrapper/gzipheader.h | 107 - sparse_matmul/zlib_wrapper/zlibwrapper.cc | 841 ----- sparse_matmul/zlib_wrapper/zlibwrapper.h | 320 -- spectrogram_predictor_interface.h | 42 - testdata/16khz_increasing.wav | Bin 32044 -> 0 bytes testdata/16khz_sample_000001.wav | Bin 241992 -> 0 bytes testdata/16khz_stereo_white_noise.wav | Bin 64044 -> 0 bytes testdata/32khz_sample_000002.wav | Bin 324140 -> 0 bytes testdata/48khz_increasing.wav | Bin 96044 -> 0 bytes testdata/48khz_playback.wav | Bin 559916 -> 0 bytes testdata/48khz_sample_000003.wav | Bin 638252 -> 0 bytes testdata/8khz_sample_000000.wav | Bin 65708 -> 0 bytes testdata/BUILD | 117 +- testdata/codec.gz | Bin 149 -> 0 bytes testdata/decoding_stream_dump.textproto | 19 - testdata/encoding_stream_dump.textproto | 7 - ...d_frame => incomplete_encoded_packet.lyra} | 0 .../lyra_conditioning_stack_0_bias.raw.gz | Bin 90 -> 0 bytes ...onditioning_stack_0_fixed16_weights.raw.gz | Bin 224 -> 0 bytes .../lyra_conditioning_stack_0_mask.raw.gz | Bin 107 -> 0 bytes .../lyra_conditioning_stack_0_weights.raw.gz | Bin 347 -> 0 bytes .../lyra_conditioning_stack_1_bias.raw.gz | Bin 90 -> 0 bytes ...onditioning_stack_1_fixed16_weights.raw.gz | Bin 244 -> 0 bytes .../lyra_conditioning_stack_1_mask.raw.gz | Bin 114 -> 0 bytes .../lyra_conditioning_stack_1_weights.raw.gz | Bin 390 -> 0 bytes .../lyra_conditioning_stack_2_bias.raw.gz | Bin 90 -> 0 bytes ...onditioning_stack_2_fixed16_weights.raw.gz | Bin 222 -> 0 bytes .../lyra_conditioning_stack_2_mask.raw.gz | Bin 113 -> 0 bytes .../lyra_conditioning_stack_2_weights.raw.gz | Bin 346 -> 0 bytes testdata/lyra_config.textproto | 1 - testdata/lyra_conv1d_bias.raw.gz | Bin 74 -> 0 bytes testdata/lyra_conv1d_fixed16_weights.raw.gz | Bin 152 -> 0 bytes testdata/lyra_conv1d_mask.raw.gz | Bin 75 -> 0 bytes testdata/lyra_conv1d_weights.raw.gz | Bin 223 -> 0 bytes testdata/lyra_conv_cond_bias.raw.gz | Bin 47 -> 0 bytes .../lyra_conv_cond_fixed16_weights.raw.gz | Bin 101 -> 0 bytes testdata/lyra_conv_cond_mask.raw.gz | Bin 65 -> 0 bytes testdata/lyra_conv_cond_weights.raw.gz | Bin 126 -> 0 bytes testdata/lyra_conv_to_gates_bias.raw.gz | Bin 99 -> 0 bytes .../lyra_conv_to_gates_fixed16_weights.raw.gz | Bin 130 -> 0 bytes testdata/lyra_conv_to_gates_mask.raw.gz | Bin 78 -> 0 bytes testdata/lyra_conv_to_gates_weights.raw.gz | Bin 178 -> 0 bytes testdata/lyra_gru_layer_bias.raw.gz | Bin 95 -> 0 bytes .../lyra_gru_layer_fixed16_weights.raw.gz | Bin 154 -> 0 bytes testdata/lyra_gru_layer_mask.raw.gz | Bin 51 -> 0 bytes testdata/lyra_gru_layer_weights.raw.gz | Bin 242 -> 0 bytes testdata/lyra_means_bias.raw.gz | Bin 73 -> 0 bytes testdata/lyra_means_fixed16_weights.raw.gz | Bin 118 -> 0 bytes testdata/lyra_means_mask.raw.gz | Bin 47 -> 0 bytes testdata/lyra_means_weights.raw.gz | Bin 174 -> 0 bytes testdata/lyra_mix_bias.raw.gz | Bin 73 -> 0 bytes testdata/lyra_mix_fixed16_weights.raw.gz | Bin 116 -> 0 bytes testdata/lyra_mix_mask.raw.gz | Bin 45 -> 0 bytes testdata/lyra_mix_weights.raw.gz | Bin 172 -> 0 bytes testdata/lyra_proj_bias.raw.gz | Bin 55 -> 0 bytes testdata/lyra_proj_fixed16_weights.raw.gz | Bin 69 -> 0 bytes testdata/lyra_proj_mask.raw.gz | Bin 50 -> 0 bytes testdata/lyra_proj_weights.raw.gz | Bin 73 -> 0 bytes testdata/lyra_scales_bias.raw.gz | Bin 74 -> 0 bytes testdata/lyra_scales_fixed16_weights.raw.gz | Bin 119 -> 0 bytes testdata/lyra_scales_mask.raw.gz | Bin 48 -> 0 bytes testdata/lyra_scales_weights.raw.gz | Bin 175 -> 0 bytes testdata/lyra_transpose_0_bias.raw.gz | Bin 82 -> 0 bytes .../lyra_transpose_0_fixed16_weights.raw.gz | Bin 235 -> 0 bytes testdata/lyra_transpose_0_mask.raw.gz | Bin 101 -> 0 bytes testdata/lyra_transpose_0_weights.raw.gz | Bin 376 -> 0 bytes testdata/lyra_transpose_1_bias.raw.gz | Bin 83 -> 0 bytes .../lyra_transpose_1_fixed16_weights.raw.gz | Bin 224 -> 0 bytes testdata/lyra_transpose_1_mask.raw.gz | Bin 106 -> 0 bytes testdata/lyra_transpose_1_weights.raw.gz | Bin 349 -> 0 bytes testdata/lyra_transpose_2_bias.raw.gz | Bin 83 -> 0 bytes .../lyra_transpose_2_fixed16_weights.raw.gz | Bin 249 -> 0 bytes testdata/lyra_transpose_2_mask.raw.gz | Bin 104 -> 0 bytes testdata/lyra_transpose_2_weights.raw.gz | Bin 406 -> 0 bytes ..._encoded_frames => no_encoded_packet.lyra} | 0 ...me_16khz => one_encoded_packet_16khz.lyra} | Bin testdata/sample1_16kHz.wav | Bin 0 -> 110398 bytes testdata/sample1_32kHz.wav | Bin 0 -> 220752 bytes testdata/sample1_48kHz.wav | Bin 0 -> 331106 bytes testdata/sample1_8kHz.wav | Bin 0 -> 55222 bytes testdata/sample2_16kHz.wav | Bin 0 -> 92204 bytes testdata/sample2_32kHz.wav | Bin 0 -> 184364 bytes testdata/sample2_48kHz.wav | Bin 0 -> 276524 bytes testdata/sample2_8kHz.wav | Bin 0 -> 46124 bytes testdata/test_conv1d_bias.raw.gz | Bin 44 -> 0 bytes testdata/test_conv1d_fixed16_weights.raw.gz | Bin 59 -> 0 bytes testdata/test_conv1d_mask.raw.gz | Bin 48 -> 0 bytes testdata/test_conv1d_weights.raw.gz | Bin 54 -> 0 bytes testdata/test_dilated_bias.raw.gz | Bin 45 -> 0 bytes testdata/test_dilated_fixed16_weights.raw.gz | Bin 60 -> 0 bytes testdata/test_dilated_mask.raw.gz | Bin 49 -> 0 bytes testdata/test_dilated_weights.raw.gz | Bin 55 -> 0 bytes testdata/test_transpose_bias.raw.gz | Bin 47 -> 0 bytes .../test_transpose_fixed16_weights.raw.gz | Bin 67 -> 0 bytes testdata/test_transpose_mask.raw.gz | Bin 51 -> 0 bytes testdata/test_transpose_weights.raw.gz | Bin 62 -> 0 bytes testdata/transpose_2.gz | Bin 1240 -> 0 bytes ...hz.lyra => two_encoded_packets_16khz.lyra} | Bin testing/BUILD | 53 +- testing/mock_feature_extractor.h | 2 +- testing/mock_generative_model.h | 52 +- testing/mock_lyra_decoder.h | 9 +- testing/mock_lyra_encoder.h | 4 +- testing/mock_noise_estimator.h | 9 +- testing/mock_packet_loss_handler.h | 45 - testing/mock_resampler.h | 31 + testing/mock_vector_quantizer.h | 7 +- tflite_model_wrapper.cc | 121 + tflite_model_wrapper.h | 71 + tflite_model_wrapper_test.cc | 48 + transpose_convolutional_layer_wrapper.h | 120 - transpose_convolutional_layer_wrapper_test.cc | 293 -- vector_quantizer_impl.cc | 323 -- vector_quantizer_impl.h | 94 - vector_quantizer_impl_test.cc | 248 -- vector_quantizer_interface.h | 9 +- wav_util.cc => wav_utils.cc | 9 +- wav_util.h => wav_utils.h | 6 +- wav_util_test.cc => wav_utils_test.cc | 14 +- wavegru/lyra_16khz_ar_to_gates_bias.raw.gz | Bin 47 -> 0 bytes ...a_16khz_ar_to_gates_fixed16_weights.raw.gz | Bin 9191 -> 0 bytes wavegru/lyra_16khz_ar_to_gates_mask.raw.gz | Bin 87 -> 0 bytes wavegru/lyra_16khz_ar_to_gates_weights.raw.gz | Bin 41474 -> 0 bytes ...yra_16khz_conditioning_stack_0_bias.raw.gz | Bin 1955 -> 0 bytes ...onditioning_stack_0_fixed16_weights.raw.gz | Bin 103180 -> 0 bytes ...yra_16khz_conditioning_stack_0_mask.raw.gz | Bin 19674 -> 0 bytes ..._16khz_conditioning_stack_0_weights.raw.gz | Bin 188729 -> 0 bytes ...yra_16khz_conditioning_stack_1_bias.raw.gz | Bin 1962 -> 0 bytes ...onditioning_stack_1_fixed16_weights.raw.gz | Bin 99556 -> 0 bytes ...yra_16khz_conditioning_stack_1_mask.raw.gz | Bin 15746 -> 0 bytes ..._16khz_conditioning_stack_1_weights.raw.gz | Bin 185482 -> 0 bytes ...yra_16khz_conditioning_stack_2_bias.raw.gz | Bin 1923 -> 0 bytes ...onditioning_stack_2_fixed16_weights.raw.gz | Bin 99007 -> 0 bytes ...yra_16khz_conditioning_stack_2_mask.raw.gz | Bin 15492 -> 0 bytes ..._16khz_conditioning_stack_2_weights.raw.gz | Bin 186099 -> 0 bytes wavegru/lyra_16khz_conv1d_bias.raw.gz | Bin 1967 -> 0 bytes .../lyra_16khz_conv1d_fixed16_weights.raw.gz | Bin 45210 -> 0 bytes wavegru/lyra_16khz_conv1d_mask.raw.gz | Bin 5221 -> 0 bytes wavegru/lyra_16khz_conv1d_weights.raw.gz | Bin 83662 -> 0 bytes wavegru/lyra_16khz_conv_cond_bias.raw.gz | Bin 38 -> 0 bytes ...yra_16khz_conv_cond_fixed16_weights.raw.gz | Bin 105341 -> 0 bytes wavegru/lyra_16khz_conv_cond_mask.raw.gz | Bin 18448 -> 0 bytes wavegru/lyra_16khz_conv_cond_weights.raw.gz | Bin 188015 -> 0 bytes wavegru/lyra_16khz_conv_to_gates_bias.raw.gz | Bin 11455 -> 0 bytes ...16khz_conv_to_gates_fixed16_weights.raw.gz | Bin 520329 -> 0 bytes wavegru/lyra_16khz_conv_to_gates_mask.raw.gz | Bin 80877 -> 0 bytes .../lyra_16khz_conv_to_gates_weights.raw.gz | Bin 1070438 -> 0 bytes wavegru/lyra_16khz_gru_layer_bias.raw.gz | Bin 10950 -> 0 bytes ...yra_16khz_gru_layer_fixed16_weights.raw.gz | Bin 361317 -> 0 bytes wavegru/lyra_16khz_gru_layer_mask.raw.gz | Bin 24642 -> 0 bytes wavegru/lyra_16khz_gru_layer_weights.raw.gz | Bin 783037 -> 0 bytes wavegru/lyra_16khz_means_bias.raw.gz | Bin 151 -> 0 bytes .../lyra_16khz_means_fixed16_weights.raw.gz | Bin 21929 -> 0 bytes wavegru/lyra_16khz_means_mask.raw.gz | Bin 101 -> 0 bytes wavegru/lyra_16khz_means_weights.raw.gz | Bin 62795 -> 0 bytes wavegru/lyra_16khz_mix_bias.raw.gz | Bin 151 -> 0 bytes wavegru/lyra_16khz_mix_fixed16_weights.raw.gz | Bin 27856 -> 0 bytes wavegru/lyra_16khz_mix_mask.raw.gz | Bin 101 -> 0 bytes wavegru/lyra_16khz_mix_weights.raw.gz | Bin 61730 -> 0 bytes wavegru/lyra_16khz_proj_bias.raw.gz | Bin 1972 -> 0 bytes .../lyra_16khz_proj_fixed16_weights.raw.gz | Bin 91355 -> 0 bytes wavegru/lyra_16khz_proj_mask.raw.gz | Bin 13345 -> 0 bytes wavegru/lyra_16khz_proj_weights.raw.gz | Bin 182125 -> 0 bytes wavegru/lyra_16khz_quant_code_vectors.gz | Bin 21144 -> 0 bytes .../lyra_16khz_quant_codebook_dimensions.gz | Bin 84 -> 0 bytes wavegru/lyra_16khz_quant_mean_vectors.gz | Bin 626 -> 0 bytes wavegru/lyra_16khz_quant_transmat.gz | Bin 95170 -> 0 bytes wavegru/lyra_16khz_scales_bias.raw.gz | Bin 151 -> 0 bytes .../lyra_16khz_scales_fixed16_weights.raw.gz | Bin 26987 -> 0 bytes wavegru/lyra_16khz_scales_mask.raw.gz | Bin 101 -> 0 bytes wavegru/lyra_16khz_scales_weights.raw.gz | Bin 61718 -> 0 bytes wavegru/lyra_16khz_transpose_0_bias.raw.gz | Bin 1995 -> 0 bytes ...a_16khz_transpose_0_fixed16_weights.raw.gz | Bin 100623 -> 0 bytes wavegru/lyra_16khz_transpose_0_mask.raw.gz | Bin 18775 -> 0 bytes wavegru/lyra_16khz_transpose_0_weights.raw.gz | Bin 188749 -> 0 bytes wavegru/lyra_16khz_transpose_1_bias.raw.gz | Bin 1991 -> 0 bytes ...a_16khz_transpose_1_fixed16_weights.raw.gz | Bin 100815 -> 0 bytes wavegru/lyra_16khz_transpose_1_mask.raw.gz | Bin 18642 -> 0 bytes wavegru/lyra_16khz_transpose_1_weights.raw.gz | Bin 189274 -> 0 bytes wavegru/lyra_16khz_transpose_2_bias.raw.gz | Bin 1988 -> 0 bytes ...a_16khz_transpose_2_fixed16_weights.raw.gz | Bin 94360 -> 0 bytes wavegru/lyra_16khz_transpose_2_mask.raw.gz | Bin 18408 -> 0 bytes wavegru/lyra_16khz_transpose_2_weights.raw.gz | Bin 188283 -> 0 bytes wavegru/lyra_config.textproto | 1 - wavegru_model_impl.cc | 178 - wavegru_model_impl.h | 88 - wavegru_model_impl_test.cc | 84 - ...er_interface.h => zero_feature_estimator.h | 30 +- 387 files changed, 5880 insertions(+), 27771 deletions(-) rename .github/actions/{setup-ndk => setup-lyra-deps}/action.yml (52%) create mode 100644 android_configure.bzl create mode 100644 android_example/java/com/example/android/lyra/BUILD rename android_example/{jni_benchmark_decode_lib.cc => jni_lyra_benchmark_lib.cc} (70%) delete mode 100644 benchmark_decode_lib.cc delete mode 100644 benchmark_decode_lib.h delete mode 100644 buffer_merger.cc delete mode 100644 buffer_merger.h delete mode 100644 buffer_merger_test.cc rename filter_banks_interface.h => buffered_filter_interface.h (53%) create mode 100644 buffered_resampler.cc create mode 100644 buffered_resampler.h create mode 100644 buffered_resampler_test.cc delete mode 100644 causal_convolutional_conditioning.h delete mode 100644 causal_convolutional_conditioning_test.cc delete mode 100644 conv1d_layer_wrapper.h delete mode 100644 conv1d_layer_wrapper_test.cc delete mode 100644 dilated_convolutional_layer_wrapper.h delete mode 100644 dilated_convolutional_layer_wrapper_test.cc delete mode 100644 dsp_util.cc delete mode 100644 dsp_util.h delete mode 100644 dsp_util_test.cc create mode 100644 dsp_utils.cc create mode 100644 dsp_utils.h create mode 100644 dsp_utils_test.cc delete mode 100644 exported_layers_test.h rename testing/mock_denoiser.h => feature_estimator_interface.h (57%) delete mode 100644 filter_banks.cc delete mode 100644 filter_banks.h delete mode 100644 filter_banks_test.cc create mode 100644 fixed_packet_loss_model.cc create mode 100644 fixed_packet_loss_model.h create mode 100644 fixed_packet_loss_model_test.cc delete mode 100644 layer_wrapper.h delete mode 100644 layer_wrapper_interface.h delete mode 100644 layer_wrapper_test_common.h delete mode 100644 layer_wrappers_lib.h rename benchmark_decode.cc => lyra_benchmark.cc (56%) create mode 100644 lyra_benchmark_lib.cc rename testing/mock_filter_banks.h => lyra_benchmark_lib.h (57%) create mode 100644 lyra_gan_model.cc create mode 100644 lyra_gan_model.h create mode 100644 lyra_gan_model_test.cc delete mode 100644 lyra_types.h delete mode 100644 lyra_wavegru.h delete mode 100644 lyra_wavegru_test.cc create mode 100644 model_coeffs/lyra_config.binarypb create mode 100644 model_coeffs/lyragan.tflite create mode 100644 model_coeffs/quantizer.tflite create mode 100644 model_coeffs/soundstream_encoder.tflite rename {wavegru => model_coeffs}/test_playback.wav (100%) delete mode 100644 naive_spectrogram_predictor.cc delete mode 100644 naive_spectrogram_predictor.h delete mode 100644 naive_spectrogram_predictor_test.cc delete mode 100644 packet_loss_handler.cc delete mode 100644 packet_loss_handler.h delete mode 100644 packet_loss_handler_interface.h delete mode 100644 packet_loss_handler_test.cc rename testing/mock_spectrogram_predictor.h => packet_loss_model_interface.h (57%) delete mode 100644 project_and_sample.h delete mode 100644 project_and_sample_test.cc delete mode 100644 quadrature_mirror_filter.cc delete mode 100644 quadrature_mirror_filter.h delete mode 100644 quadrature_mirror_filter_test.cc create mode 100644 residual_vector_quantizer.cc create mode 100644 residual_vector_quantizer.h create mode 100644 residual_vector_quantizer_test.cc create mode 100644 soundstream_encoder.cc create mode 100644 soundstream_encoder.h create mode 100644 soundstream_encoder_test.cc delete mode 100644 sparse_matmul/BUILD delete mode 100644 sparse_matmul/compute/BUILD delete mode 100644 sparse_matmul/compute/ar_inputs.h delete mode 100644 sparse_matmul/compute/gru_gates.h delete mode 100644 sparse_matmul/compute/gru_gates_arm.h delete mode 100644 sparse_matmul/compute/gru_gates_avx_fixed.h delete mode 100644 sparse_matmul/compute/gru_gates_generic.h delete mode 100644 sparse_matmul/compute/gru_gates_test.cc delete mode 100644 sparse_matmul/compute/kernels_arm.h delete mode 100644 sparse_matmul/compute/kernels_avx.h delete mode 100644 sparse_matmul/compute/kernels_generic.h delete mode 100644 sparse_matmul/compute/matmul.h delete mode 100644 sparse_matmul/compute/matmul_fixed_avx2.cc delete mode 100644 sparse_matmul/compute/matmul_fixed_avx2.h delete mode 100644 sparse_matmul/compute/matmul_generic.cc delete mode 100644 sparse_matmul/compute/matmul_generic.h delete mode 100644 sparse_matmul/compute/thread_bounds.cc delete mode 100644 sparse_matmul/compute/thread_bounds.h delete mode 100644 sparse_matmul/layers/BUILD delete mode 100644 sparse_matmul/layers/csr_blocksparse_matrix.h delete mode 100644 sparse_matmul/layers/csrblocksparse_test.cc delete mode 100644 sparse_matmul/layers/errno_mapping.cc delete mode 100644 sparse_matmul/layers/errno_mapping.h delete mode 100644 sparse_matmul/layers/masked_sparse_matrix.h delete mode 100644 sparse_matmul/layers/read_array_ifstream.h delete mode 100644 sparse_matmul/layers/sparse_linear_layer.h delete mode 100644 sparse_matmul/layers/sparse_linear_layer_test.cc delete mode 100644 sparse_matmul/layers/status_macros.h delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_QRhat_weights.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_What_weights.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_coarselogit_bias.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_coarselogit_mask.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_coarselogit_weights.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_coarseproj_bias.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_coarseproj_mask.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_coarseproj_weights.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_finelogit_bias.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_finelogit_mask.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_finelogit_weights.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_fineproj_bias.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_fineproj_mask.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_fineproj_weights.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_wavernn_gru_bias.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_wavernn_gru_mask.raw.gz delete mode 100644 sparse_matmul/layers/testdata/768_512_95_4x4_wavernn_gru_weights.raw.gz delete mode 100644 sparse_matmul/layers/testdata/lyra_conv1d_bias.raw.gz delete mode 100644 sparse_matmul/layers/testdata/lyra_conv1d_mask.raw.gz delete mode 100644 sparse_matmul/layers/testdata/lyra_conv1d_weights.raw.gz delete mode 100644 sparse_matmul/layers/utils.cc delete mode 100644 sparse_matmul/layers/utils.h delete mode 100644 sparse_matmul/layers/utils_test.cc delete mode 100644 sparse_matmul/numerics/BUILD delete mode 100644 sparse_matmul/numerics/fast_transcendentals.cc delete mode 100644 sparse_matmul/numerics/fast_transcendentals.h delete mode 100644 sparse_matmul/numerics/fasttranscendentals_test.cc delete mode 100644 sparse_matmul/numerics/fixed_types.h delete mode 100644 sparse_matmul/numerics/fixed_types_test.cc delete mode 100644 sparse_matmul/numerics/float16_types.h delete mode 100644 sparse_matmul/numerics/test_utils.h delete mode 100644 sparse_matmul/numerics/type_utils.h delete mode 100644 sparse_matmul/os/BUILD delete mode 100644 sparse_matmul/os/coop_threads.cc delete mode 100644 sparse_matmul/os/coop_threads.h delete mode 100644 sparse_matmul/os/coop_threads_test.cc delete mode 100644 sparse_matmul/sparse_matmul.h delete mode 100644 sparse_matmul/vector/BUILD delete mode 100644 sparse_matmul/vector/aligned_malloc.cc delete mode 100644 sparse_matmul/vector/aligned_malloc.h delete mode 100644 sparse_matmul/vector/cache_aligned_vector.h delete mode 100644 sparse_matmul/vector/cachealignedvector_benchmark.cc delete mode 100644 sparse_matmul/vector/cachealignedvector_test.cc delete mode 100644 sparse_matmul/zlib_wrapper/BUILD delete mode 100644 sparse_matmul/zlib_wrapper/gzipheader.cc delete mode 100644 sparse_matmul/zlib_wrapper/gzipheader.h delete mode 100644 sparse_matmul/zlib_wrapper/zlibwrapper.cc delete mode 100644 sparse_matmul/zlib_wrapper/zlibwrapper.h delete mode 100644 spectrogram_predictor_interface.h delete mode 100644 testdata/16khz_increasing.wav delete mode 100644 testdata/16khz_sample_000001.wav delete mode 100644 testdata/16khz_stereo_white_noise.wav delete mode 100644 testdata/32khz_sample_000002.wav delete mode 100644 testdata/48khz_increasing.wav delete mode 100644 testdata/48khz_playback.wav delete mode 100644 testdata/48khz_sample_000003.wav delete mode 100644 testdata/8khz_sample_000000.wav delete mode 100644 testdata/codec.gz delete mode 100644 testdata/decoding_stream_dump.textproto delete mode 100644 testdata/encoding_stream_dump.textproto rename testdata/{incomplete_encoded_frame => incomplete_encoded_packet.lyra} (100%) delete mode 100644 testdata/lyra_conditioning_stack_0_bias.raw.gz delete mode 100644 testdata/lyra_conditioning_stack_0_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_conditioning_stack_0_mask.raw.gz delete mode 100644 testdata/lyra_conditioning_stack_0_weights.raw.gz delete mode 100644 testdata/lyra_conditioning_stack_1_bias.raw.gz delete mode 100644 testdata/lyra_conditioning_stack_1_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_conditioning_stack_1_mask.raw.gz delete mode 100644 testdata/lyra_conditioning_stack_1_weights.raw.gz delete mode 100644 testdata/lyra_conditioning_stack_2_bias.raw.gz delete mode 100644 testdata/lyra_conditioning_stack_2_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_conditioning_stack_2_mask.raw.gz delete mode 100644 testdata/lyra_conditioning_stack_2_weights.raw.gz delete mode 100644 testdata/lyra_config.textproto delete mode 100644 testdata/lyra_conv1d_bias.raw.gz delete mode 100644 testdata/lyra_conv1d_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_conv1d_mask.raw.gz delete mode 100644 testdata/lyra_conv1d_weights.raw.gz delete mode 100644 testdata/lyra_conv_cond_bias.raw.gz delete mode 100644 testdata/lyra_conv_cond_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_conv_cond_mask.raw.gz delete mode 100644 testdata/lyra_conv_cond_weights.raw.gz delete mode 100644 testdata/lyra_conv_to_gates_bias.raw.gz delete mode 100644 testdata/lyra_conv_to_gates_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_conv_to_gates_mask.raw.gz delete mode 100644 testdata/lyra_conv_to_gates_weights.raw.gz delete mode 100644 testdata/lyra_gru_layer_bias.raw.gz delete mode 100644 testdata/lyra_gru_layer_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_gru_layer_mask.raw.gz delete mode 100644 testdata/lyra_gru_layer_weights.raw.gz delete mode 100644 testdata/lyra_means_bias.raw.gz delete mode 100644 testdata/lyra_means_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_means_mask.raw.gz delete mode 100644 testdata/lyra_means_weights.raw.gz delete mode 100644 testdata/lyra_mix_bias.raw.gz delete mode 100644 testdata/lyra_mix_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_mix_mask.raw.gz delete mode 100644 testdata/lyra_mix_weights.raw.gz delete mode 100644 testdata/lyra_proj_bias.raw.gz delete mode 100644 testdata/lyra_proj_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_proj_mask.raw.gz delete mode 100644 testdata/lyra_proj_weights.raw.gz delete mode 100644 testdata/lyra_scales_bias.raw.gz delete mode 100644 testdata/lyra_scales_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_scales_mask.raw.gz delete mode 100644 testdata/lyra_scales_weights.raw.gz delete mode 100644 testdata/lyra_transpose_0_bias.raw.gz delete mode 100644 testdata/lyra_transpose_0_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_transpose_0_mask.raw.gz delete mode 100644 testdata/lyra_transpose_0_weights.raw.gz delete mode 100644 testdata/lyra_transpose_1_bias.raw.gz delete mode 100644 testdata/lyra_transpose_1_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_transpose_1_mask.raw.gz delete mode 100644 testdata/lyra_transpose_1_weights.raw.gz delete mode 100644 testdata/lyra_transpose_2_bias.raw.gz delete mode 100644 testdata/lyra_transpose_2_fixed16_weights.raw.gz delete mode 100644 testdata/lyra_transpose_2_mask.raw.gz delete mode 100644 testdata/lyra_transpose_2_weights.raw.gz rename testdata/{no_encoded_frames => no_encoded_packet.lyra} (100%) rename testdata/{one_encoded_frame_16khz => one_encoded_packet_16khz.lyra} (100%) create mode 100644 testdata/sample1_16kHz.wav create mode 100644 testdata/sample1_32kHz.wav create mode 100644 testdata/sample1_48kHz.wav create mode 100644 testdata/sample1_8kHz.wav create mode 100644 testdata/sample2_16kHz.wav create mode 100644 testdata/sample2_32kHz.wav create mode 100644 testdata/sample2_48kHz.wav create mode 100644 testdata/sample2_8kHz.wav delete mode 100644 testdata/test_conv1d_bias.raw.gz delete mode 100644 testdata/test_conv1d_fixed16_weights.raw.gz delete mode 100644 testdata/test_conv1d_mask.raw.gz delete mode 100644 testdata/test_conv1d_weights.raw.gz delete mode 100644 testdata/test_dilated_bias.raw.gz delete mode 100644 testdata/test_dilated_fixed16_weights.raw.gz delete mode 100644 testdata/test_dilated_mask.raw.gz delete mode 100644 testdata/test_dilated_weights.raw.gz delete mode 100644 testdata/test_transpose_bias.raw.gz delete mode 100644 testdata/test_transpose_fixed16_weights.raw.gz delete mode 100644 testdata/test_transpose_mask.raw.gz delete mode 100644 testdata/test_transpose_weights.raw.gz delete mode 100644 testdata/transpose_2.gz rename testdata/{two_encoded_frames_16khz.lyra => two_encoded_packets_16khz.lyra} (100%) delete mode 100644 testing/mock_packet_loss_handler.h create mode 100644 tflite_model_wrapper.cc create mode 100644 tflite_model_wrapper.h create mode 100644 tflite_model_wrapper_test.cc delete mode 100644 transpose_convolutional_layer_wrapper.h delete mode 100644 transpose_convolutional_layer_wrapper_test.cc delete mode 100644 vector_quantizer_impl.cc delete mode 100644 vector_quantizer_impl.h delete mode 100644 vector_quantizer_impl_test.cc rename wav_util.cc => wav_utils.cc (93%) rename wav_util.h => wav_utils.h (94%) rename wav_util_test.cc => wav_utils_test.cc (90%) delete mode 100644 wavegru/lyra_16khz_ar_to_gates_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_ar_to_gates_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_ar_to_gates_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_ar_to_gates_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_conditioning_stack_0_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_conditioning_stack_0_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_conditioning_stack_0_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_conditioning_stack_0_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_conditioning_stack_1_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_conditioning_stack_1_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_conditioning_stack_1_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_conditioning_stack_1_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_conditioning_stack_2_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_conditioning_stack_2_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_conditioning_stack_2_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_conditioning_stack_2_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_conv1d_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_conv1d_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_conv1d_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_conv1d_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_conv_cond_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_conv_cond_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_conv_cond_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_conv_cond_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_conv_to_gates_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_conv_to_gates_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_conv_to_gates_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_conv_to_gates_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_gru_layer_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_gru_layer_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_gru_layer_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_gru_layer_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_means_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_means_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_means_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_means_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_mix_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_mix_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_mix_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_mix_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_proj_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_proj_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_proj_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_proj_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_quant_code_vectors.gz delete mode 100644 wavegru/lyra_16khz_quant_codebook_dimensions.gz delete mode 100644 wavegru/lyra_16khz_quant_mean_vectors.gz delete mode 100644 wavegru/lyra_16khz_quant_transmat.gz delete mode 100644 wavegru/lyra_16khz_scales_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_scales_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_scales_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_scales_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_transpose_0_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_transpose_0_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_transpose_0_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_transpose_0_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_transpose_1_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_transpose_1_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_transpose_1_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_transpose_1_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_transpose_2_bias.raw.gz delete mode 100644 wavegru/lyra_16khz_transpose_2_fixed16_weights.raw.gz delete mode 100644 wavegru/lyra_16khz_transpose_2_mask.raw.gz delete mode 100644 wavegru/lyra_16khz_transpose_2_weights.raw.gz delete mode 100644 wavegru/lyra_config.textproto delete mode 100644 wavegru_model_impl.cc delete mode 100644 wavegru_model_impl.h delete mode 100644 wavegru_model_impl_test.cc rename denoiser_interface.h => zero_feature_estimator.h (52%) diff --git a/.bazelrc b/.bazelrc index 014e1911..8d8baec7 100644 --- a/.bazelrc +++ b/.bazelrc @@ -15,7 +15,6 @@ build --cxxopt=-std=gnu++17 build --linkopt=-lm -build --cxxopt=-Wno-sign-compare # Use the default C++ toolchain to build the tools used during the # build. build --host_crosstool_top=@bazel_tools//tools/cpp:toolchain @@ -49,3 +48,98 @@ build:android_arm64 --copt=-Os # use rules_jvm_external. After that, this might be removeable, and we can use # androidx and more recent deps instead of deprecated ones. build:android_arm64 --strict_java_deps=OFF + + +# Start Tensorflow +# The below is to allow tensorflow to build. +# Inspired by TensorFlow serving's .bazelrc to build from the source. +# It also may be useful to refer to TensorFlow .bazelrc for more details: +# https://github.com/tensorflow/tensorflow/blob/master/.bazelrc + +# Optimizations used for TF Serving release builds. +build:release --copt=-mavx +build:release --copt=-msse4.2 + +# Options used to build with CUDA. +build:cuda --repo_env TF_NEED_CUDA=1 +build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain +build:cuda --@local_config_cuda//:enable_cuda +build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true +build:cuda --action_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_50,sm_60,sm_70,sm_75,compute_80" + +# Options used to build with TPU support. +build:tpu --distinct_host_configuration=false +build:tpu --define=with_tpu_support=true --define=framework_shared_object=false + +# Please note that MKL on MacOS or windows is still not supported. +# If you would like to use a local MKL instead of downloading, please set the +# environment variable "TF_MKL_ROOT" every time before build. +build:mkl --define=build_with_mkl=true --define=enable_mkl=true --define=build_with_openmp=true +build:mkl --define=tensorflow_mkldnn_contraction_kernel=0 + +# This config option is used to enable MKL-DNN open source library only, +# without depending on MKL binary version. +build:mkl_open_source_only --define=build_with_mkl_dnn_only=true +build:mkl_open_source_only --define=build_with_mkl=true --define=enable_mkl=true +build:mkl_open_source_only --define=tensorflow_mkldnn_contraction_kernel=0 + +# Processor native optimizations (depends on build host capabilities). +build:nativeopt --copt=-march=native +build:nativeopt --host_copt=-march=native +build:nativeopt --copt=-O3 + +build --keep_going +build --verbose_failures=true +build --spawn_strategy=standalone +build --genrule_strategy=standalone + +build --define=grpc_no_ares=true + +# Sets the default Apple platform to macOS. +build --apple_platform_type=macos + +build --experimental_repo_remote_exec + +# Enable platform specific config (e.g. by default use --config=windows when on windows, and --config=linux when on linux) +build --enable_platform_specific_config + +# End Tensorflow + +## Windows config +startup --windows_enable_symlinks +build:windows --enable_runfiles + +# These settings below allow for compilation using MSVC +build:windows --copt=/D_USE_MATH_DEFINES +build:windows --host_copt=/D_USE_MATH_DEFINES +build:windows --cxxopt=-D_HAS_DEPRECATED_RESULT_OF=1 + +build:windows --cxxopt=/Zc:__cplusplus +# c++20 needed in MSVC for designated initializers (llvm libc++ +# and gnu stc++ provides them in c++17). +build:windows --cxxopt=/std:c++20 +build:windows --linkopt=-ldl +build:windows --host_cxxopt=/std:c++20 + +# Make sure to include as little of windows.h as possible +build:windows --copt=-DWIN32_LEAN_AND_MEAN +build:windows --host_copt=-DWIN32_LEAN_AND_MEAN +build:windows --copt=-DNOGDI +build:windows --host_copt=-DNOGDI + +# MSVC (Windows): Standards-conformant preprocessor mode +# See https://docs.microsoft.com/en-us/cpp/preprocessor/preprocessor-experimental-overview +build:windows --copt=/Zc:preprocessor +build:windows --host_copt=/Zc:preprocessor + +# Misc build options we need for windows according to tensorflow +build:windows --linkopt=/DEBUG +build:windows --host_linkopt=/DEBUG +build:windows --linkopt=/OPT:REF +build:windows --host_linkopt=/OPT:REF +build:windows --linkopt=/OPT:ICF +build:windows --host_linkopt=/OPT:ICF +# This is a workaround for this magic preprocessor constant/macro not existing +# in MSVC +build:windows --host_copt=-D__PRETTY_FUNCTION__=__FUNCSIG__ +build:windows --copt=-D__PRETTY_FUNCTION__=__FUNCSIG__ diff --git a/.github/actions/lyra-builder/action.yml b/.github/actions/lyra-builder/action.yml index a1256ea5..7d9ca6f4 100644 --- a/.github/actions/lyra-builder/action.yml +++ b/.github/actions/lyra-builder/action.yml @@ -28,7 +28,7 @@ runs: - shell: bash run: | mkdir action-product - cp -r wavegru action-product/ + cp -r model_coeffs action-product/ cp bazel-bin/encoder_main action-product/lyra-encoder cp bazel-bin/decoder_main action-product/lyra-decoder - uses: actions/upload-artifact@v2 diff --git a/.github/actions/setup-ndk/action.yml b/.github/actions/setup-lyra-deps/action.yml similarity index 52% rename from .github/actions/setup-ndk/action.yml rename to .github/actions/setup-lyra-deps/action.yml index 8d6667e1..2d3e79f6 100644 --- a/.github/actions/setup-ndk/action.yml +++ b/.github/actions/setup-lyra-deps/action.yml @@ -1,6 +1,6 @@ -name: setup-ndk +name: setup-lyra-deps -description: Setup NDK for lyra build +description: Setup NDK and python for lyra build runs: using: composite @@ -8,3 +8,5 @@ runs: - shell: bash run: | $ANDROID_HOME/cmdline-tools/latest/bin/sdkmanager --install "ndk;21.4.7075529" + python -m pip install --upgrade pip + pip install numpy diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8a7df2ae..4b9abadd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,8 +8,12 @@ jobs: steps: - name: Checkout repo uses: actions/checkout@v2 - - name: Setup NDK - uses: ./.github/actions/setup-ndk + - name: Set up Python 3.9 + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Setup Lyra dependencies + uses: ./.github/actions/setup-lyra-deps - name: Build Android App shell: bash run: | @@ -31,8 +35,12 @@ jobs: steps: - name: Checkout repo uses: actions/checkout@v2 - - name: Setup NDK - uses: ./.github/actions/setup-ndk + - name: Set up Python 3.9 + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Setup Lyra dependencies + uses: ./.github/actions/setup-lyra-deps - name: Build and upload uses: ./.github/actions/lyra-builder with: @@ -45,8 +53,12 @@ jobs: steps: - name: Checkout repo uses: actions/checkout@v2 - - name: Setup NDK - uses: ./.github/actions/setup-ndk + - name: Set up Python 3.9 + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Setup Lyra dependencies + uses: ./.github/actions/setup-lyra-deps - name: Build and upload uses: ./.github/actions/lyra-builder with: @@ -58,8 +70,12 @@ jobs: steps: - name: Checkout repo uses: actions/checkout@v2 - - name: Setup NDK - uses: ./.github/actions/setup-ndk + - name: Set up Python 3.9 + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Setup Lyra dependencies + uses: ./.github/actions/setup-lyra-deps - name: Build and upload uses: ./.github/actions/lyra-builder with: diff --git a/BUILD b/BUILD index 977c36ae..5dc8c30e 100644 --- a/BUILD +++ b/BUILD @@ -9,7 +9,7 @@ licenses(["notice"]) # To run all cc_tests in this directory: # bazel test //:all -# [internal] Command to run dsp_util_android_test. +# [internal] Command to run dsp_utils_android_test. # [internal] Command to run lyra_integration_android_test. @@ -23,12 +23,18 @@ exports_files( "encoder_main.cc", "encoder_main_lib.cc", "encoder_main_lib.h", + "lyra_benchmark.cc", + "lyra_benchmark_lib.cc", + "lyra_benchmark_lib.h", "lyra_components.h", "lyra_config.h", "lyra_decoder.cc", "lyra_decoder.h", "lyra_encoder.cc", "lyra_encoder.h", + "model_coeffs/lyragan.tflite", + "model_coeffs/quantizer.tflite", + "model_coeffs/soundstream_encoder.tflite", ], ) @@ -44,99 +50,20 @@ cc_library( ) cc_library( - name = "layer_wrapper_interface", - hdrs = ["layer_wrapper_interface.h"], - deps = [ - "//sparse_matmul", - ], -) - -cc_library( - name = "layer_wrapper", - hdrs = ["layer_wrapper.h"], - deps = [ - ":dsp_util", - ":layer_wrapper_interface", - "//sparse_matmul", - "@com_google_glog//:glog", - ], -) - -cc_library( - name = "conv1d_layer_wrapper", - hdrs = ["conv1d_layer_wrapper.h"], - deps = [ - ":layer_wrapper", - "//sparse_matmul", - "@com_google_absl//absl/memory", - "@com_google_glog//:glog", - ], -) - -cc_library( - name = "dilated_convolutional_layer_wrapper", - hdrs = ["dilated_convolutional_layer_wrapper.h"], - deps = [ - ":layer_wrapper", - "//sparse_matmul", - "@com_google_absl//absl/memory", - "@com_google_glog//:glog", - ], -) - -cc_library( - name = "transpose_convolutional_layer_wrapper", - hdrs = ["transpose_convolutional_layer_wrapper.h"], - deps = [ - ":layer_wrapper", - "//sparse_matmul", - "@com_google_absl//absl/memory", - "@com_google_glog//:glog", - ], -) - -cc_library( - name = "layer_wrappers_lib", - hdrs = ["layer_wrappers_lib.h"], - deps = [ - ":conv1d_layer_wrapper", - ":dilated_convolutional_layer_wrapper", - ":layer_wrapper", - ":transpose_convolutional_layer_wrapper", - ], -) - -cc_library( - name = "causal_convolutional_conditioning", - hdrs = ["causal_convolutional_conditioning.h"], - deps = [ - ":dsp_util", - ":layer_wrappers_lib", - ":lyra_types", - "//sparse_matmul", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings:str_format", - "@com_google_absl//absl/types:span", - "@com_google_glog//:glog", - ], -) - -cc_library( - name = "benchmark_decode_lib", - srcs = ["benchmark_decode_lib.cc"], - hdrs = ["benchmark_decode_lib.h"], + name = "lyra_benchmark_lib", + srcs = ["lyra_benchmark_lib.cc"], + hdrs = ["lyra_benchmark_lib.h"], deps = [ ":architecture_utils", - ":dsp_util", + ":dsp_utils", + ":feature_extractor_interface", ":generative_model_interface", - ":log_mel_spectrogram_extractor_impl", + ":lyra_components", ":lyra_config", - ":wavegru_model_impl", "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/status", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/time", - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", "@com_google_audio_dsp//audio/dsp:signal_vector_util", "@com_google_glog//:glog", @@ -145,32 +72,42 @@ cc_library( ) cc_library( - name = "generative_model_interface", + name = "feature_estimator_interface", hdrs = [ - "generative_model_interface.h", + "feature_estimator_interface.h", ], deps = [ - "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", ], ) cc_library( - name = "resampler_interface", + name = "zero_feature_estimator", hdrs = [ - "resampler_interface.h", + "zero_feature_estimator.h", ], deps = [ + ":feature_estimator_interface", "@com_google_absl//absl/types:span", ], ) cc_library( - name = "denoiser_interface", + name = "generative_model_interface", hdrs = [ - "denoiser_interface.h", + "generative_model_interface.h", + ], + deps = [ + "@com_google_glog//:glog", + ], +) + +cc_library( + name = "resampler_interface", + hdrs = [ + "resampler_interface.h", ], deps = [ - "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/types:span", ], ) @@ -181,7 +118,6 @@ cc_library( "feature_extractor_interface.h", ], deps = [ - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", ], ) @@ -192,7 +128,6 @@ cc_library( "lyra_decoder_interface.h", ], deps = [ - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", ], ) @@ -203,7 +138,6 @@ cc_library( "lyra_encoder_interface.h", ], deps = [ - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", ], ) @@ -214,88 +148,29 @@ cc_library( "vector_quantizer_interface.h", ], deps = [ - "@com_google_absl//absl/types:optional", - ], -) - -cc_library( - name = "filter_banks_interface", - hdrs = [ - "filter_banks_interface.h", - ], -) - -cc_library( - name = "wavegru_model_impl", - srcs = [ - "wavegru_model_impl.cc", - ], - hdrs = [ - "wavegru_model_impl.h", - ], - copts = [ - "-O3", - ], - data = glob(["wavegru/**"]), - deps = [ - ":buffer_merger", - ":causal_convolutional_conditioning", - ":generative_model_interface", - ":lyra_types", - ":lyra_wavegru", - "//sparse_matmul", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/status", - "@com_google_absl//absl/time", - "@com_google_absl//absl/types:optional", - "@com_google_glog//:glog", - "@gulrak_filesystem//:filesystem", ], ) cc_library( - name = "wavegru_model_impl_fixed16", + name = "lyra_gan_model", srcs = [ - "wavegru_model_impl.cc", + "lyra_gan_model.cc", ], hdrs = [ - "wavegru_model_impl.h", - ], - copts = [ - "-O3", - "-DUSE_FIXED16", + "lyra_gan_model.h", ], - data = glob(["wavegru/**"]), + data = ["model_coeffs/lyragan.tflite"], deps = [ - ":buffer_merger", - ":causal_convolutional_conditioning", + ":dsp_utils", ":generative_model_interface", - ":lyra_types", - ":lyra_wavegru", - "//sparse_matmul", + ":tflite_model_wrapper", "@com_google_absl//absl/memory", - "@com_google_absl//absl/status", - "@com_google_absl//absl/time", - "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", "@com_google_glog//:glog", "@gulrak_filesystem//:filesystem", ], ) -cc_library( - name = "naive_spectrogram_predictor", - srcs = [ - "naive_spectrogram_predictor.cc", - ], - hdrs = [ - "naive_spectrogram_predictor.h", - ], - deps = [ - ":log_mel_spectrogram_extractor_impl", - ":spectrogram_predictor_interface", - ], -) - cc_library( name = "lyra_decoder", srcs = [ @@ -306,76 +181,25 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ + ":buffered_filter_interface", + ":buffered_resampler", ":comfort_noise_generator", + ":feature_estimator_interface", ":generative_model_interface", ":lyra_components", ":lyra_config", ":lyra_decoder_interface", - ":packet_interface", - ":packet_loss_handler", - ":packet_loss_handler_interface", - ":resampler", - ":resampler_interface", - ":vector_quantizer_interface", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/status", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:optional", - "@com_google_absl//absl/types:span", - "@com_google_glog//:glog", - "@gulrak_filesystem//:filesystem", - ], -) - -cc_library( - name = "lyra_decoder_fixed16", - testonly = 1, - srcs = [ - "lyra_decoder.cc", - ], - hdrs = [ - "lyra_decoder.h", - ], - copts = ["-DUSE_FIXED16"], - visibility = ["//visibility:public"], - deps = [ - ":comfort_noise_generator", - ":generative_model_interface", - ":lyra_components_fixed16", - ":lyra_config", - ":lyra_decoder_interface", - ":packet_interface", - ":packet_loss_handler", - ":packet_loss_handler_interface", - ":resampler", - ":resampler_interface", + ":noise_estimator", + ":noise_estimator_interface", ":vector_quantizer_interface", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", "@com_google_glog//:glog", "@gulrak_filesystem//:filesystem", ], ) -cc_library( - name = "packet_loss_handler", - srcs = ["packet_loss_handler.cc"], - hdrs = ["packet_loss_handler.h"], - deps = [ - ":naive_spectrogram_predictor", - ":noise_estimator", - ":noise_estimator_interface", - ":packet_loss_handler_interface", - ":spectrogram_predictor_interface", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/types:optional", - "@com_google_glog//:glog", - ], -) - cc_library( name = "decoder_main_lib", srcs = [ @@ -385,10 +209,16 @@ cc_library( "decoder_main_lib.h", ], deps = [ + ":fixed_packet_loss_model", ":gilbert_model", ":lyra_config", ":lyra_decoder", - ":wav_util", + ":packet_loss_model_interface", + ":wav_utils", + "@com_google_absl//absl/flags:marshalling", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:bit_gen_ref", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_absl//absl/time", "@com_google_absl//absl/types:span", @@ -406,13 +236,11 @@ cc_library( "comfort_noise_generator.h", ], deps = [ - ":dsp_util", + ":dsp_utils", ":generative_model_interface", ":log_mel_spectrogram_extractor_impl", "@com_google_absl//absl/memory", "@com_google_absl//absl/random", - "@com_google_absl//absl/time", - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", "@com_google_audio_dsp//audio/dsp:number_util", "@com_google_audio_dsp//audio/dsp/mfcc", @@ -421,40 +249,6 @@ cc_library( ], ) -cc_library( - name = "lyra_encoder_fixed16", - srcs = [ - "lyra_encoder.cc", - ], - hdrs = [ - "lyra_encoder.h", - ], - visibility = ["//visibility:public"], - deps = [ - ":denoiser_interface", - ":dsp_util", - ":feature_extractor_interface", - ":lyra_components_fixed16", - ":lyra_config", - ":lyra_encoder_interface", - ":noise_estimator", - ":noise_estimator_interface", - ":packet", - ":packet_interface", - ":resampler", - ":resampler_interface", - ":vector_quantizer_interface", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/status", - "@com_google_absl//absl/types:optional", - "@com_google_absl//absl/types:span", - "@com_google_audio_dsp//audio/linear_filters:biquad_filter", - "@com_google_audio_dsp//audio/linear_filters:biquad_filter_coefficients", - "@com_google_glog//:glog", - "@gulrak_filesystem//:filesystem", - ], -) - cc_library( name = "lyra_encoder", srcs = [ @@ -465,8 +259,6 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ - ":denoiser_interface", - ":dsp_util", ":feature_extractor_interface", ":lyra_components", ":lyra_config", @@ -480,10 +272,7 @@ cc_library( ":vector_quantizer_interface", "@com_google_absl//absl/memory", "@com_google_absl//absl/status", - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", - "@com_google_audio_dsp//audio/linear_filters:biquad_filter", - "@com_google_audio_dsp//audio/linear_filters:biquad_filter_coefficients", "@com_google_glog//:glog", "@gulrak_filesystem//:filesystem", ], @@ -501,7 +290,7 @@ cc_library( ":lyra_config", ":lyra_encoder", ":no_op_preprocessor", - ":wav_util", + ":wav_utils", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", @@ -524,7 +313,7 @@ cc_library( ":log_mel_spectrogram_extractor_impl", ":noise_estimator_interface", "@com_google_absl//absl/memory", - "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", "@com_google_audio_dsp//audio/dsp:signal_vector_util", "@com_google_glog//:glog", ], @@ -536,38 +325,39 @@ cc_library( "noise_estimator_interface.h", ], deps = [ - "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", ], ) cc_library( - name = "gilbert_model", + name = "packet_loss_model_interface", + hdrs = ["packet_loss_model_interface.h"], + deps = [], +) + +cc_library( + name = "fixed_packet_loss_model", srcs = [ - "gilbert_model.cc", + "fixed_packet_loss_model.cc", ], hdrs = [ - "gilbert_model.h", - ], - deps = [ - "@com_google_absl//absl/memory", - "@com_google_glog//:glog", + "fixed_packet_loss_model.h", ], + deps = [":packet_loss_model_interface"], ) cc_library( - name = "packet_loss_handler_interface", + name = "gilbert_model", + srcs = [ + "gilbert_model.cc", + ], hdrs = [ - "packet_loss_handler_interface.h", + "gilbert_model.h", ], deps = [ - "@com_google_absl//absl/types:optional", - ], -) - -cc_library( - name = "spectrogram_predictor_interface", - hdrs = [ - "spectrogram_predictor_interface.h", + ":packet_loss_model_interface", + "@com_google_absl//absl/memory", + "@com_google_glog//:glog", ], ) @@ -582,7 +372,6 @@ cc_library( "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_glog//:glog", - "@com_google_protobuf//:protobuf", "@gulrak_filesystem//:filesystem", ], ) @@ -606,95 +395,75 @@ cc_library( "lyra_components.h", ], deps = [ - ":denoiser_interface", + ":feature_estimator_interface", ":feature_extractor_interface", ":generative_model_interface", - ":log_mel_spectrogram_extractor_impl", + ":lyra_gan_model", ":packet", ":packet_interface", - ":vector_quantizer_impl", + ":residual_vector_quantizer", + ":soundstream_encoder", ":vector_quantizer_interface", - ":wavegru_model_impl", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/status:statusor", - "@eigen_archive//:eigen", + ":zero_feature_estimator", "@gulrak_filesystem//:filesystem", ], ) cc_library( - name = "lyra_components_fixed16", + name = "log_mel_spectrogram_extractor_impl", srcs = [ - "lyra_components.cc", + "log_mel_spectrogram_extractor_impl.cc", ], hdrs = [ - "lyra_components.h", + "log_mel_spectrogram_extractor_impl.h", ], deps = [ - ":denoiser_interface", ":feature_extractor_interface", - ":generative_model_interface", - ":log_mel_spectrogram_extractor_impl", - ":packet", - ":packet_interface", - ":vector_quantizer_impl", - ":vector_quantizer_interface", - ":wavegru_model_impl_fixed16", "@com_google_absl//absl/memory", - "@com_google_absl//absl/status:statusor", - "@eigen_archive//:eigen", - "@gulrak_filesystem//:filesystem", - ], -) - -cc_library( - name = "lyra_types", - hdrs = ["lyra_types.h"], - copts = ["-O3"], - deps = [ - ":layer_wrapper", - "//sparse_matmul", + "@com_google_absl//absl/types:span", + "@com_google_audio_dsp//audio/dsp:number_util", + "@com_google_audio_dsp//audio/dsp/mfcc", + "@com_google_audio_dsp//audio/dsp/spectrogram", + "@com_google_glog//:glog", ], ) cc_library( - name = "log_mel_spectrogram_extractor_impl", + name = "soundstream_encoder", srcs = [ - "log_mel_spectrogram_extractor_impl.cc", + "soundstream_encoder.cc", ], hdrs = [ - "log_mel_spectrogram_extractor_impl.h", + "soundstream_encoder.h", ], + data = ["model_coeffs/soundstream_encoder.tflite"], deps = [ + ":dsp_utils", ":feature_extractor_interface", + ":tflite_model_wrapper", "@com_google_absl//absl/memory", - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", - "@com_google_audio_dsp//audio/dsp:number_util", - "@com_google_audio_dsp//audio/dsp/mfcc", - "@com_google_audio_dsp//audio/dsp/spectrogram", "@com_google_glog//:glog", + "@gulrak_filesystem//:filesystem", ], ) cc_library( - name = "vector_quantizer_impl", + name = "residual_vector_quantizer", srcs = [ - "vector_quantizer_impl.cc", + "residual_vector_quantizer.cc", ], hdrs = [ - "vector_quantizer_impl.h", + "residual_vector_quantizer.h", + ], + data = [ + "model_coeffs/quantizer.tflite", ], - data = glob(["wavegru/**"]), deps = [ + ":tflite_model_wrapper", ":vector_quantizer_interface", - "//sparse_matmul", "@com_google_absl//absl/memory", - "@com_google_absl//absl/status", - "@com_google_absl//absl/types:optional", - "@com_google_audio_dsp//audio/dsp:signal_vector_util", "@com_google_glog//:glog", - "@eigen_archive//:eigen", "@gulrak_filesystem//:filesystem", ], ) @@ -705,7 +474,6 @@ cc_library( "packet_interface.h", ], deps = [ - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", ], ) @@ -715,81 +483,40 @@ cc_library( hdrs = ["packet.h"], deps = [ ":packet_interface", - "@com_google_absl//absl/types:optional", - "@com_google_absl//absl/types:span", - "@com_google_glog//:glog", - ], -) - -cc_library( - name = "lyra_wavegru", - hdrs = ["lyra_wavegru.h"], - deps = [ - ":causal_convolutional_conditioning", - ":dsp_util", - ":layer_wrappers_lib", - ":lyra_types", - ":project_and_sample", - "//sparse_matmul", "@com_google_absl//absl/memory", - "@com_google_absl//absl/time", "@com_google_absl//absl/types:span", "@com_google_glog//:glog", - "@gulrak_filesystem//:filesystem", ], ) cc_library( - name = "project_and_sample", - hdrs = [ - "project_and_sample.h", - ], - copts = ["-O3"], - deps = [ - ":lyra_types", - "//sparse_matmul", - "@com_google_absl//absl/status", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/time", - "@com_google_glog//:glog", - ], + name = "buffered_filter_interface", + hdrs = ["buffered_filter_interface.h"], ) cc_library( - name = "filter_banks", - srcs = ["filter_banks.cc"], - hdrs = ["filter_banks.h"], + name = "buffered_resampler", + srcs = ["buffered_resampler.cc"], + hdrs = ["buffered_resampler.h"], deps = [ - ":filter_banks_interface", - ":quadrature_mirror_filter", + ":buffered_filter_interface", + ":resampler", + ":resampler_interface", "@com_google_absl//absl/memory", - "@com_google_absl//absl/types:span", "@com_google_glog//:glog", ], ) -cc_library( - name = "quadrature_mirror_filter", - srcs = ["quadrature_mirror_filter.cc"], - hdrs = ["quadrature_mirror_filter.h"], +cc_test( + name = "buffered_resampler_test", + srcs = ["buffered_resampler_test.cc"], deps = [ - ":dsp_util", + ":buffered_resampler", + ":lyra_config", + ":resampler_interface", + "//testing:mock_resampler", "@com_google_absl//absl/types:span", - "@com_google_audio_dsp//audio/linear_filters:biquad_filter", - "@com_google_audio_dsp//audio/linear_filters:biquad_filter_coefficients", - "@com_google_glog//:glog", - ], -) - -cc_library( - name = "buffer_merger", - srcs = ["buffer_merger.cc"], - hdrs = ["buffer_merger.h"], - deps = [ - ":filter_banks", - ":filter_banks_interface", - "@com_google_absl//absl/memory", - "@com_google_glog//:glog", + "@com_google_googletest//:gtest_main", ], ) @@ -820,6 +547,7 @@ cc_test( srcs = ["no_op_preprocessor_test.cc"], deps = [ ":no_op_preprocessor", + "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", ], ) @@ -829,6 +557,7 @@ cc_binary( srcs = [ "encoder_main.cc", ], + data = [":tflite_testdata"], linkopts = select({ ":android_config": ["-landroid"], "//conditions:default": [], @@ -850,6 +579,7 @@ cc_binary( srcs = [ "decoder_main.cc", ], + data = [":tflite_testdata"], linkopts = select({ ":android_config": ["-landroid"], "//conditions:default": [], @@ -867,108 +597,20 @@ cc_binary( ) cc_binary( - name = "benchmark_decode", + name = "lyra_benchmark", srcs = [ - "benchmark_decode.cc", + "lyra_benchmark.cc", ], linkopts = select({ ":android_config": ["-landroid"], "//conditions:default": [], }), deps = [ - ":benchmark_decode_lib", + ":lyra_benchmark_lib", "@com_google_absl//absl/flags:flag", "@com_google_absl//absl/flags:parse", "@com_google_absl//absl/flags:usage", - ], -) - -cc_test( - name = "lyra_wavegru_test", - size = "small", - timeout = "short", - srcs = ["lyra_wavegru_test.cc"], - data = glob(["wavegru/**"]), - deps = [ - ":exported_layers_test", - ":lyra_config", - ":lyra_wavegru", - "//sparse_matmul", - "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest_main", - "@gulrak_filesystem//:filesystem", - ], -) - -cc_test( - name = "lyra_wavegru_test_fixed16", - size = "small", - timeout = "short", - srcs = ["lyra_wavegru_test.cc"], - copts = [ - "-DUSE_FIXED16", - ], - data = glob(["wavegru/**"]), - deps = [ - ":lyra_config", - ":lyra_wavegru", - "//sparse_matmul", - "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest_main", - "@gulrak_filesystem//:filesystem", - ], -) - -cc_test( - name = "lyra_wavegru_test_bfloat16", - size = "small", - timeout = "short", - srcs = ["lyra_wavegru_test.cc"], - copts = [ - "-DUSE_BFLOAT16", - ], - data = glob(["wavegru/**"]), - deps = [ - ":lyra_config", - ":lyra_wavegru", - "//sparse_matmul", - "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest_main", - "@gulrak_filesystem//:filesystem", - ], -) - -cc_test( - name = "project_and_sample_test", - size = "small", - timeout = "short", - srcs = ["project_and_sample_test.cc"], - data = glob(["wavegru/**"]) + [ - "//testdata:lyra_means_bias.raw.gz", - "//testdata:lyra_means_fixed16_weights.raw.gz", - "//testdata:lyra_means_mask.raw.gz", - "//testdata:lyra_means_weights.raw.gz", - "//testdata:lyra_mix_bias.raw.gz", - "//testdata:lyra_mix_fixed16_weights.raw.gz", - "//testdata:lyra_mix_mask.raw.gz", - "//testdata:lyra_mix_weights.raw.gz", - "//testdata:lyra_proj_bias.raw.gz", - "//testdata:lyra_proj_fixed16_weights.raw.gz", - "//testdata:lyra_proj_mask.raw.gz", - "//testdata:lyra_proj_weights.raw.gz", - "//testdata:lyra_scales_bias.raw.gz", - "//testdata:lyra_scales_fixed16_weights.raw.gz", - "//testdata:lyra_scales_mask.raw.gz", - "//testdata:lyra_scales_weights.raw.gz", - ], - deps = [ - ":exported_layers_test", - ":lyra_types", - ":project_and_sample", - "//sparse_matmul", - "@com_google_absl//absl/strings:str_format", - "@com_google_googletest//:gtest_main", - "@gulrak_filesystem//:filesystem", + "@com_google_absl//absl/strings", ], ) @@ -976,26 +618,24 @@ cc_test( name = "lyra_decoder_test", size = "large", srcs = ["lyra_decoder_test.cc"], + data = [":tflite_testdata"], shard_count = 8, deps = [ + ":buffered_filter_interface", + ":buffered_resampler", + ":dsp_utils", + ":feature_estimator_interface", ":generative_model_interface", - ":log_mel_spectrogram_extractor_impl", + ":lyra_components", ":lyra_config", ":lyra_decoder", - ":packet", ":packet_interface", - ":packet_loss_handler_interface", ":resampler", - ":resampler_interface", ":vector_quantizer_interface", "//testing:mock_generative_model", - "//testing:mock_packet_loss_handler", - "//testing:mock_resampler", + "//testing:mock_noise_estimator", "//testing:mock_vector_quantizer", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/random", "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", "@gulrak_filesystem//:filesystem", @@ -1008,119 +648,46 @@ cc_test( srcs = ["comfort_noise_generator_test.cc"], deps = [ ":comfort_noise_generator", - "@com_google_absl//absl/types:optional", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "packet_loss_handler_test", - size = "small", - srcs = ["packet_loss_handler_test.cc"], - deps = [ - ":lyra_config", - ":noise_estimator_interface", - ":packet_loss_handler", - ":spectrogram_predictor_interface", - "//testing:mock_noise_estimator", - "//testing:mock_spectrogram_predictor", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "naive_spectrogram_predictor_test", - size = "small", - srcs = ["naive_spectrogram_predictor_test.cc"], - deps = [ + ":dsp_utils", ":log_mel_spectrogram_extractor_impl", - ":lyra_config", - ":naive_spectrogram_predictor", "@com_google_googletest//:gtest_main", ], ) cc_test( - name = "wavegru_model_impl_test", - size = "small", - timeout = "short", - srcs = ["wavegru_model_impl_test.cc"], + name = "lyra_gan_model_test", + srcs = ["lyra_gan_model_test.cc"], deps = [ ":lyra_config", - ":wavegru_model_impl", + ":lyra_gan_model", "@com_google_googletest//:gtest_main", "@gulrak_filesystem//:filesystem", ], ) -cc_library( - name = "exported_layers_test", - testonly = 1, - hdrs = [ - "exported_layers_test.h", - ], - deps = [ - ":layer_wrappers_lib", - ":lyra_types", - "//sparse_matmul", - "@com_google_absl//absl/random", - "@com_google_googletest//:gtest", - "@gulrak_filesystem//:filesystem", - ], -) - cc_test( name = "lyra_integration_test", size = "small", timeout = "long", srcs = ["lyra_integration_test.cc"], data = [ - "//testdata:16khz_sample_000001.wav", - "//testdata:32khz_sample_000002.wav", - "//testdata:48khz_sample_000003.wav", - "//testdata:8khz_sample_000000.wav", + ":tflite_testdata", + "//testdata:sample1_16kHz.wav", + "//testdata:sample1_32kHz.wav", + "//testdata:sample1_48kHz.wav", + "//testdata:sample1_8kHz.wav", ], shard_count = 4, deps = [ - ":dsp_util", + ":dsp_utils", ":log_mel_spectrogram_extractor_impl", ":lyra_config", ":lyra_decoder", ":lyra_encoder", - ":wav_util", - "@com_google_absl//absl/status:statusor", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:span", - "@com_google_glog//:glog", - "@com_google_googletest//:gtest_main", - "@gulrak_filesystem//:filesystem", - ], -) - -cc_test( - name = "lyra_integration_test_fixed16", - size = "small", - timeout = "long", - srcs = ["lyra_integration_test.cc"], - copts = ["-DUSE_FIXED16"], - data = [ - "//testdata:16khz_sample_000001.wav", - "//testdata:32khz_sample_000002.wav", - "//testdata:48khz_sample_000003.wav", - "//testdata:8khz_sample_000000.wav", - ], - shard_count = 4, - deps = [ - ":dsp_util", - ":log_mel_spectrogram_extractor_impl", - ":lyra_config", - ":lyra_decoder_fixed16", - ":lyra_encoder_fixed16", - ":wav_util", + ":wav_utils", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", - "@com_google_glog//:glog", "@com_google_googletest//:gtest_main", "@gulrak_filesystem//:filesystem", ], @@ -1131,15 +698,15 @@ cc_test( size = "small", srcs = ["encoder_main_lib_test.cc"], data = [ - "//testdata:16khz_sample_000001.wav", - "//testdata:32khz_sample_000002.wav", - "//testdata:48khz_sample_000003.wav", - "//testdata:8khz_sample_000000.wav", + ":tflite_testdata", + "//testdata:sample1_16kHz.wav", + "//testdata:sample1_32kHz.wav", + "//testdata:sample1_48kHz.wav", + "//testdata:sample1_8kHz.wav", ], deps = [ ":encoder_main_lib", "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", "@gulrak_filesystem//:filesystem", @@ -1151,16 +718,17 @@ cc_test( size = "large", srcs = ["decoder_main_lib_test.cc"], data = [ - "//testdata:incomplete_encoded_frame", - "//testdata:no_encoded_frames", - "//testdata:one_encoded_frame_16khz", - "//testdata:two_encoded_frames_16khz.lyra", + ":tflite_testdata", + "//testdata:incomplete_encoded_packet.lyra", + "//testdata:no_encoded_packet.lyra", + "//testdata:one_encoded_packet_16khz.lyra", + "//testdata:two_encoded_packets_16khz.lyra", ], shard_count = 4, deps = [ ":decoder_main_lib", ":lyra_config", - ":wav_util", + ":wav_utils", "@com_google_absl//absl/flags:flag", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", @@ -1174,8 +742,21 @@ cc_test( size = "small", srcs = ["noise_estimator_test.cc"], deps = [ + ":comfort_noise_generator", + ":dsp_utils", + ":log_mel_spectrogram_extractor_impl", + ":lyra_config", ":noise_estimator", - "@com_google_absl//absl/types:optional", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "fixed_packet_loss_model_test", + size = "small", + srcs = ["fixed_packet_loss_model_test.cc"], + deps = [ + ":fixed_packet_loss_model", "@com_google_googletest//:gtest_main", ], ) @@ -1214,28 +795,35 @@ cc_binary( ], ) +cc_test( + name = "soundstream_encoder_test", + srcs = ["soundstream_encoder_test.cc"], + deps = [ + ":lyra_config", + ":soundstream_encoder", + "@com_google_googletest//:gtest_main", + "@gulrak_filesystem//:filesystem", + ], +) + cc_test( name = "lyra_encoder_test", size = "small", srcs = ["lyra_encoder_test.cc"], + data = [":tflite_testdata"], shard_count = 8, deps = [ - ":denoiser_interface", ":feature_extractor_interface", ":lyra_config", ":lyra_encoder", ":noise_estimator_interface", ":packet", - ":packet_interface", ":resampler_interface", ":vector_quantizer_interface", - "//testing:mock_denoiser", "//testing:mock_feature_extractor", "//testing:mock_noise_estimator", "//testing:mock_resampler", "//testing:mock_vector_quantizer", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", "@gulrak_filesystem//:filesystem", @@ -1243,160 +831,15 @@ cc_test( ) cc_test( - name = "vector_quantizer_impl_test", + name = "residual_vector_quantizer_test", size = "small", srcs = [ - "vector_quantizer_impl_test.cc", + "residual_vector_quantizer_test.cc", ], deps = [ + ":log_mel_spectrogram_extractor_impl", ":lyra_config", - ":vector_quantizer_impl", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest_main", - "@eigen_archive//:eigen", - "@gulrak_filesystem//:filesystem", - ], -) - -cc_test( - name = "causal_convolutional_conditioning_test", - size = "small", - srcs = ["causal_convolutional_conditioning_test.cc"], - data = glob(["wavegru/**"]) + [ - "//testdata:codec.gz", - "//testdata:lyra_conditioning_stack_0_bias.raw.gz", - "//testdata:lyra_conditioning_stack_0_fixed16_weights.raw.gz", - "//testdata:lyra_conditioning_stack_0_mask.raw.gz", - "//testdata:lyra_conditioning_stack_0_weights.raw.gz", - "//testdata:lyra_conditioning_stack_1_bias.raw.gz", - "//testdata:lyra_conditioning_stack_1_fixed16_weights.raw.gz", - "//testdata:lyra_conditioning_stack_1_mask.raw.gz", - "//testdata:lyra_conditioning_stack_1_weights.raw.gz", - "//testdata:lyra_conditioning_stack_2_bias.raw.gz", - "//testdata:lyra_conditioning_stack_2_fixed16_weights.raw.gz", - "//testdata:lyra_conditioning_stack_2_mask.raw.gz", - "//testdata:lyra_conditioning_stack_2_weights.raw.gz", - "//testdata:lyra_conv1d_bias.raw.gz", - "//testdata:lyra_conv1d_fixed16_weights.raw.gz", - "//testdata:lyra_conv1d_mask.raw.gz", - "//testdata:lyra_conv1d_weights.raw.gz", - "//testdata:lyra_conv_cond_bias.raw.gz", - "//testdata:lyra_conv_cond_fixed16_weights.raw.gz", - "//testdata:lyra_conv_cond_mask.raw.gz", - "//testdata:lyra_conv_cond_weights.raw.gz", - "//testdata:lyra_conv_to_gates_bias.raw.gz", - "//testdata:lyra_conv_to_gates_fixed16_weights.raw.gz", - "//testdata:lyra_conv_to_gates_mask.raw.gz", - "//testdata:lyra_conv_to_gates_weights.raw.gz", - "//testdata:lyra_transpose_0_bias.raw.gz", - "//testdata:lyra_transpose_0_fixed16_weights.raw.gz", - "//testdata:lyra_transpose_0_mask.raw.gz", - "//testdata:lyra_transpose_0_weights.raw.gz", - "//testdata:lyra_transpose_1_bias.raw.gz", - "//testdata:lyra_transpose_1_fixed16_weights.raw.gz", - "//testdata:lyra_transpose_1_mask.raw.gz", - "//testdata:lyra_transpose_1_weights.raw.gz", - "//testdata:lyra_transpose_2_bias.raw.gz", - "//testdata:lyra_transpose_2_fixed16_weights.raw.gz", - "//testdata:lyra_transpose_2_mask.raw.gz", - "//testdata:lyra_transpose_2_weights.raw.gz", - "//testdata:transpose_2.gz", - ], - deps = [ - ":causal_convolutional_conditioning", - ":exported_layers_test", - ":lyra_config", - ":lyra_types", - "//sparse_matmul", - "@com_google_absl//absl/types:span", - "@com_google_googletest//:gtest_main", - "@gulrak_filesystem//:filesystem", - ], -) - -cc_library( - name = "layer_wrapper_test_common", - testonly = 1, - hdrs = [ - "layer_wrapper_test_common.h", - ], - visibility = ["//visibility:public"], - deps = [ - ":layer_wrappers_lib", - "//sparse_matmul", - "@com_google_absl//absl/memory", - "@com_google_googletest//:gtest", - ], -) - -cc_test( - name = "conv1d_layer_wrapper_test", - size = "small", - srcs = ["conv1d_layer_wrapper_test.cc"], - data = [ - "//testdata:lyra_conv1d_bias.raw.gz", - "//testdata:lyra_conv1d_fixed16_weights.raw.gz", - "//testdata:lyra_conv1d_mask.raw.gz", - "//testdata:lyra_conv1d_weights.raw.gz", - "//testdata:test_conv1d_bias.raw.gz", - "//testdata:test_conv1d_fixed16_weights.raw.gz", - "//testdata:test_conv1d_mask.raw.gz", - "//testdata:test_conv1d_weights.raw.gz", - ], - deps = [ - ":conv1d_layer_wrapper", - ":layer_wrapper", - ":layer_wrapper_test_common", - "//sparse_matmul", - "@com_google_googletest//:gtest_main", - "@gulrak_filesystem//:filesystem", - ], -) - -cc_test( - name = "dilated_convolutional_layer_wrapper_test", - size = "small", - srcs = ["dilated_convolutional_layer_wrapper_test.cc"], - data = [ - "//testdata:lyra_conditioning_stack_2_bias.raw.gz", - "//testdata:lyra_conditioning_stack_2_fixed16_weights.raw.gz", - "//testdata:lyra_conditioning_stack_2_mask.raw.gz", - "//testdata:lyra_conditioning_stack_2_weights.raw.gz", - "//testdata:test_dilated_bias.raw.gz", - "//testdata:test_dilated_fixed16_weights.raw.gz", - "//testdata:test_dilated_mask.raw.gz", - "//testdata:test_dilated_weights.raw.gz", - ], - deps = [ - ":dilated_convolutional_layer_wrapper", - ":layer_wrapper", - ":layer_wrapper_test_common", - "//sparse_matmul", - "@com_google_googletest//:gtest_main", - "@gulrak_filesystem//:filesystem", - ], -) - -cc_test( - name = "transpose_convolutional_layer_wrapper_test", - size = "small", - srcs = ["transpose_convolutional_layer_wrapper_test.cc"], - data = [ - "//testdata:lyra_transpose_2_bias.raw.gz", - "//testdata:lyra_transpose_2_fixed16_weights.raw.gz", - "//testdata:lyra_transpose_2_mask.raw.gz", - "//testdata:lyra_transpose_2_weights.raw.gz", - "//testdata:test_transpose_bias.raw.gz", - "//testdata:test_transpose_fixed16_weights.raw.gz", - "//testdata:test_transpose_mask.raw.gz", - "//testdata:test_transpose_weights.raw.gz", - ], - deps = [ - ":layer_wrapper", - ":layer_wrapper_test_common", - ":transpose_convolutional_layer_wrapper", - "//sparse_matmul", + ":residual_vector_quantizer", "@com_google_googletest//:gtest_main", "@gulrak_filesystem//:filesystem", ], @@ -1408,7 +851,6 @@ cc_test( srcs = ["packet_test.cc"], deps = [ ":packet", - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", ], @@ -1421,7 +863,7 @@ cc_library( ], hdrs = ["resampler.h"], deps = [ - ":dsp_util", + ":dsp_utils", ":resampler_interface", "@com_google_absl//absl/memory", "@com_google_absl//absl/types:span", @@ -1444,14 +886,12 @@ cc_test( ) cc_library( - name = "dsp_util", + name = "dsp_utils", srcs = [ - "dsp_util.cc", + "dsp_utils.cc", ], - hdrs = ["dsp_util.h"], + hdrs = ["dsp_utils.h"], deps = [ - "//sparse_matmul", - "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", "@com_google_audio_dsp//audio/dsp:signal_vector_util", "@com_google_glog//:glog", @@ -1459,11 +899,11 @@ cc_library( ) cc_library( - name = "wav_util", + name = "wav_utils", srcs = [ - "wav_util.cc", + "wav_utils.cc", ], - hdrs = ["wav_util.h"], + hdrs = ["wav_utils.h"], deps = [ "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", @@ -1473,86 +913,91 @@ cc_library( ], ) -cc_test( - name = "wav_util_test", - size = "small", - srcs = ["wav_util_test.cc"], - data = [ - "//testdata:16khz_sample_000001.wav", - "//testdata:lyra_config.textproto", +cc_library( + name = "tflite_model_wrapper", + srcs = [ + "tflite_model_wrapper.cc", + ], + hdrs = [ + "tflite_model_wrapper.h", ], deps = [ - ":wav_util", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/status:statusor", - "@com_google_googletest//:gtest_main", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/types:span", + "@com_google_glog//:glog", "@gulrak_filesystem//:filesystem", + "@org_tensorflow//tensorflow/lite:framework", + "@org_tensorflow//tensorflow/lite/delegates/xnnpack:xnnpack_delegate", + "@org_tensorflow//tensorflow/lite/kernels:builtin_ops", ], ) cc_test( - name = "dsp_util_test", + name = "wav_utils_test", size = "small", - srcs = ["dsp_util_test.cc"], - deps = [ - ":dsp_util", - "@com_google_absl//absl/types:span", - "@com_google_googletest//:gtest_main", + srcs = ["wav_utils_test.cc"], + data = [ + "//testdata:invalid.wav", + "//testdata:sample1_16kHz.wav", ], -) - -cc_test( - name = "filter_banks_test", - srcs = ["filter_banks_test.cc"], deps = [ - ":filter_banks", + ":wav_utils", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_googletest//:gtest_main", + "@gulrak_filesystem//:filesystem", ], ) cc_test( - name = "quadrature_mirror_filter_test", - srcs = ["quadrature_mirror_filter_test.cc"], + name = "dsp_utils_test", + size = "small", + srcs = ["dsp_utils_test.cc"], deps = [ - ":quadrature_mirror_filter", + ":dsp_utils", + "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", ], ) cc_test( - name = "buffer_merger_test", - srcs = ["buffer_merger_test.cc"], + name = "tflite_model_wrapper_test", + srcs = ["tflite_model_wrapper_test.cc"], + data = ["model_coeffs/lyragan.tflite"], deps = [ - ":buffer_merger", - ":filter_banks_interface", - ":lyra_config", - "//testing:mock_filter_banks", - "@com_google_absl//absl/memory", + ":tflite_model_wrapper", + "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", + "@gulrak_filesystem//:filesystem", + "@org_tensorflow//tensorflow/lite:framework", ], ) cc_test( name = "lyra_config_test", srcs = ["lyra_config_test.cc"], + data = [":tflite_testdata"], deps = [ ":lyra_config", + ":lyra_config_cc_proto", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/status", "@com_google_googletest//:gtest_main", + "@gulrak_filesystem//:filesystem", ], ) filegroup( - name = "wavegru_testdata", + name = "tflite_testdata", data = glob([ - "wavegru/*.gz", - "wavegru/*.textproto", + "model_coeffs/*", ]), ) filegroup( name = "android_example_assets", srcs = glob([ - "wavegru/*.gz", - "wavegru/*.textproto", - ]), + "model_coeffs/*.tflite", + ]) + ["model_coeffs/lyra_config.binarypb"], ) diff --git a/README.md b/README.md index 63c4cd2c..b2bb9d1f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ## What is Lyra? -[Lyra](https://ai.googleblog.com/2021/02/lyra-new-very-low-bitrate-codec-for.html) +[Lyra](https://ai.googleblog.com/2021/08/soundstream-end-to-end-neural-audio.html) is a high-quality, low-bitrate speech codec that makes voice communication available even on the slowest networks. To do this it applies traditional codec techniques while leveraging advances in machine learning (ML) with models @@ -12,27 +12,23 @@ and transmitting voice signals. ### Overview The basic architecture of the Lyra codec is quite simple. Features are extracted -from speech every 40ms and are then compressed for transmission at a bitrate of -3kbps. The features themselves are log mel spectrograms, a list of numbers -representing the speech energy in different frequency bands, which have -traditionally been used for their perceptual relevance because they are modeled -after human auditory response. On the other end, a generative model uses those -features to recreate the speech signal. +from speech every 20ms and are then compressed for transmission at a desired +bitrate between 3.2kbps and 9.2kbps. On the other end, a generative model uses +those features to recreate the speech signal. Lyra harnesses the power of new natural-sounding generative models to maintain the low bitrate of parametric codecs while achieving high quality, on par with state-of-the-art waveform codecs used in most streaming and communication platforms today. -Computational complexity is reduced by using a cheaper recurrent generative -model, a WaveRNN variation, that works at a lower rate, but generates in -parallel multiple signals in different frequency ranges that it later combines -into a single output signal at the desired sample rate. This trick, plus 64-bit -ARM optimizations, enables Lyra to not only run on cloud servers, but also -on-device on mid-range phones, such as Pixel phones, in real time (with a -processing latency of 100ms). This generative model is then trained on thousands -of hours of speech data with speakers in over 70 languages and optimized to -accurately recreate the input audio. +Computational complexity is reduced by using a cheaper convolutional generative +model called SoundStream, which enables Lyra to not only run on cloud servers, +but also on-device on low-end phones in real time (with a processing latency of +20ms). This whole system is then trained end-to-end on thousands of hours of +speech data with speakers in over 90 languages and optimized to accurately +recreate the input audio. + +Lyra is supported on Android, Linux, Mac and Windows. ## Prerequisites @@ -41,15 +37,16 @@ There are a few things you'll need to do to set up your computer to build Lyra. ### Common setup Lyra is built using Google's build system, Bazel. Install it following these -[instructions](https://docs.bazel.build/versions/master/install.html). -Bazel verson 5.0.0 is required, and some Linux distributions may make an older -version available in their application repositories, so make sure you are -using the required version or newer. The latest version can be downloaded via +[instructions](https://docs.bazel.build/versions/master/install.html). Bazel +verson 5.0.0 is required, and some Linux distributions may make an older version +available in their application repositories, so make sure you are using the +required version or newer. The latest version can be downloaded via [Github](https://github.com/bazelbuild/bazel/releases). -Lyra can be built from linux using bazel for an arm android target, or a linux -target. The android target is optimized for realtime performance. The linux -target is typically used for development and debugging. +You will also need python3 and numpy installed. + +Lyra can be built from Linux using Bazel for an ARM Android target, or a Linux +target, as well as Mac and Windows for native targets. ### Android requirements @@ -58,36 +55,37 @@ toolchain. If you develop with Android Studio already, you might not need to do these steps if ANDROID_HOME and ANDROID_NDK_HOME are defined and pointing at the right version of the NDK. -1. Download the sdk manager from https://developer.android.com/studio -2. Unzip and cd to the directory -3. Check the available packages to install in case they don't match the following steps. +1. Download command line tools from https://developer.android.com/studio +2. Unzip and cd to the directory +3. Check the available packages to install in case they don't match the + following steps. -``` shell -bin/sdkmanager --sdk_root=$HOME/android/sdk --list -``` + ```shell + bin/sdkmanager --sdk_root=$HOME/android/sdk --list + ``` -Some systems will already have the java runtime set up. But if you see an error -here like `ERROR: JAVA_HOME is not set and no 'java' command could be found -on your PATH.`, this means you need to install the java runtime with `sudo apt -install default-jdk` first. You will also need to add `export -JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64` (type `ls /usr/lib/jvm` to see -which path was installed) to your $HOME/.bashrc and reload it with `source -$HOME/.bashrc`. + Some systems will already have the java runtime set up. But if you see an + error here like `ERROR: JAVA_HOME is not set and no 'java' command could be + found on your PATH.`, this means you need to install the java runtime with + `sudo apt install default-jdk` first. You will also need to add `export + JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64` (type `ls /usr/lib/jvm` to see + which path was installed) to your $HOME/.bashrc and reload it with `source + $HOME/.bashrc`. -4. Install the r21 ndk, android sdk 30, and build tools: +4. Install the r21 ndk, android sdk 30, and build tools: -``` shell -bin/sdkmanager --sdk_root=$HOME/android/sdk --install "platforms;android-30" "build-tools;30.0.3" "ndk;21.4.7075529" -``` + ```shell + bin/sdkmanager --sdk_root=$HOME/android/sdk --install "platforms;android-30" "build-tools;30.0.3" "ndk;21.4.7075529" + ``` -5. Add the following to .bashrc (or export the variables) +5. Add the following to .bashrc (or export the variables) -``` shell -export ANDROID_NDK_HOME=$HOME/android/sdk/ndk/21.4.7075529 -export ANDROID_HOME=$HOME/android/sdk -``` + ```shell + export ANDROID_NDK_HOME=$HOME/android/sdk/ndk/21.4.7075529 + export ANDROID_HOME=$HOME/android/sdk + ``` -6. Reload .bashrc (with `source $HOME/.bashrc`) +6. Reload .bashrc (with `source $HOME/.bashrc`) ## Building @@ -96,7 +94,7 @@ platform. ### Building for Linux -You can build the cc_binaries with the default config. `encoder_main` is an +You can build the cc_binaries with the default config. `encoder_main` is an example of a file encoder. ```shell @@ -104,12 +102,12 @@ bazel build -c opt :encoder_main ``` You can run `encoder_main` to encode a test .wav file with some speech in it, -specified by `--input_path`. The `--model_path` flag contains the model data -necessary to encode, and `--output_path` specifies where to write the encoded -(compressed) representation. +specified by `--input_path`. The `--output_dir` specifies where to write the +encoded (compressed) representation, and the desired bitrate can be specified +using the `--bitrate` flag. ```shell -bazel-bin/encoder_main --model_path=wavegru --output_dir=$HOME/temp --input_path=testdata/16khz_sample_000001.wav +bazel-bin/encoder_main --input_path=testdata/sample1_16kHz.wav --output_dir=$HOME/temp --bitrate=3200 ``` Similarly, you can build decoder_main and use it on the output of encoder_main @@ -117,28 +115,29 @@ to decode the encoded data back into speech. ```shell bazel build -c opt :decoder_main -bazel-bin/decoder_main --model_path=wavegru --output_dir=$HOME/temp/ --encoded_path=$HOME/temp/16khz_sample_000001.lyra +bazel-bin/decoder_main --encoded_path=$HOME/temp/sample1_16kHz.lyra --output_dir=$HOME/temp/ --bitrate=3200 ``` Note: the default Bazel toolchain is automatically configured and likely uses -gcc/libstdc++ on Linux. This should be satisfactory for most users, but will -differ from the NDK toolchain, which uses clang/libc++. To use a custom clang +gcc/libstdc++ on Linux. This should be satisfactory for most users, but will +differ from the NDK toolchain, which uses clang/libc++. To use a custom clang toolchain on Linux, see toolchain/README.md and .bazelrc. ### Building for Android #### Android App + There is an example APK target called `lyra_android_example` that you can build after you have set up the NDK. -This example is an app with a minimal GUI that has buttons for two options. -One option is to record from the microphone and encode/decode with Lyra so you -can test what Lyra would sound like for your voice. The other option runs a +This example is an app with a minimal GUI that has buttons for two options. One +option is to record from the microphone and encode/decode with Lyra so you can +test what Lyra would sound like for your voice. The other option runs a benchmark that encodes and decodes in the background and prints the timings to logcat. ```shell -bazel build android_example:lyra_android_example --config=android_arm64 --copt=-DBENCHMARK +bazel build -c opt android_example:lyra_android_example --config=android_arm64 --copt=-DBENCHMARK adb install bazel-bin/android_example/lyra_android_example.apk ``` @@ -147,32 +146,24 @@ After this you should see an app called "Lyra Example App". You can open it, and you will see a simple TextView that says the benchmark is running, and when it finishes. -Press "Record from microphone", say a few words (be sure to have your microphone -near your mouth), and then press "Encode and decode to speaker". You should hear -your voice being played back after being coded with Lyra. +Press "Record from microphone", say a few words, and then press "Encode and +decode to speaker". You should hear your voice being played back after being +coded with Lyra. If you press 'Benchmark', you should see something like the following in logcat -on a Pixel 4 when running the benchmark: +on a Pixel 6 Pro when running the benchmark: ```shell -I Starting benchmarkDecode() -I I20210401 11:04:06.898649 6870 lyra_wavegru.h:75] lyra_wavegru running fast multiplication kernels for aarch64. -I I20210401 11:04:06.900411 6870 layer_wrapper.h:162] |lyra_16khz_ar_to_gates_| layer: Shape: [3072, 4]. Sparsity: 0 -I I20210401 11:04:07.031975 6870 layer_wrapper.h:162] |lyra_16khz_gru_layer_| layer: Shape: [3072, 1024]. Sparsity: 0.9375 -... -I I20210401 11:04:26.700160 6870 benchmark_decode_lib.cc:167] Using float arithmetic. -I I20210401 11:04:26.700352 6870 benchmark_decode_lib.cc:85] conditioning_only stats for generating 2000 frames of audio, max: 506 us, min: 368 us, mean: 391 us, stdev: 10.3923. -I I20210401 11:04:26.725538 6870 benchmark_decode_lib.cc:85] model_only stats for generating 2000 frames of audio, max: 12690 us, min: 9087 us, mean: 9237 us, stdev: 262.416. -I I20210401 11:04:26.729460 6870 benchmark_decode_lib.cc:85] combined_model_and_conditioning stats for generating 2000 frames of audio, max: 13173 us, min: 9463 us, mean: 9629 us, stdev: 270.788. -I Finished benchmarkDecode() +lyra_benchmark: feature_extractor: max: 0.685 ms min: 0.206 ms mean: 0.219 ms stdev: 0.000 ms +lyra_benchmark: quantizer_quantize: max: 0.250 ms min: 0.076 ms mean: 0.082 ms stdev: 0.000 ms +lyra_benchmark: quantizer_decode: max: 0.152 ms min: 0.027 ms mean: 0.030 ms stdev: 0.001 ms +lyra_benchmark: model_decode: max: 0.560 ms min: 0.223 ms mean: 0.237 ms stdev: 0.000 ms +lyra_benchmark: total: max: 1.560 ms min: 0.541 ms mean: 0.569 ms stdev: 0.005 ms ``` -This shows that decoding a 25Hz frame (each frame is .04 seconds) takes 9629 -microseconds on average (.0096 seconds). So decoding is performed at around -4.15 (.04/.0096) times faster than realtime. - -For even faster decoding, you can use a fixed point representation by building -with `--copt=-DUSE_FIXED16`, although there may be some loss of quality. +This shows that decoding a 50Hz frame (each frame is 20 milliseconds) takes +0.569 milliseconds on average. So decoding is performed at around 35 (20/0.569) +times faster than realtime. To build your own android app, you can either use the cc_library target outputs to create a .so that you can use in your own build system. Or you can use it @@ -203,17 +194,39 @@ a binary through the shell. # Push the binary and the data it needs, including the model and .wav files: adb push bazel-bin/encoder_main /data/local/tmp/ adb push bazel-bin/decoder_main /data/local/tmp/ -adb push wavegru/ /data/local/tmp/ +adb push model_coeffs/ /data/local/tmp/ adb push testdata/ /data/local/tmp/ adb shell cd /data/local/tmp -./encoder_main --model_path=/data/local/tmp/wavegru --output_dir=/data/local/tmp --input_path=testdata/16khz_sample_000001.wav -./decoder_main --model_path=/data/local/tmp/wavegru --output_dir=/data/local/tmp --encoded_path=16khz_sample_000001.lyra +./encoder_main --model_path=/data/local/tmp/model_coeffs --output_dir=/data/local/tmp --input_path=testdata/sample1_16kHz.wav +./decoder_main --model_path=/data/local/tmp/model_coeffs --output_dir=/data/local/tmp --encoded_path=sample1_16kHz.lyra ``` The encoder_main/decoder_main as above should also work. +### Building for Mac + +You will need to install the XCode command line tools in addition to the +prerequisites common to all platforms. XCode setup is a required step for using +Bazel on Mac. See this [guide](https://bazel.build/install/os-x) for how to +install XCode command line tools. Lyra has been built successfully using XCode +13.3. + +You can follow the instructions in the [Building for Linux](#building-for-linux) +section once this is completed. + +### Building for Windows + +You will need to install Build Tools for Visual Studio 2019 in addition to the +prerequisites common to all platforms. Visual Studio setup is a required step +for building C++ for Bazel on Windows. See this +[guide](https://bazel.build/install/windows) for how to install MSVC. You may +also need to install python 3 support, which is also described in the guide. + +You can follow the instructions in the [Building for Linux](#building-for-linux) +section once this is completed. + ## API For integrating Lyra into any project only two APIs are relevant: @@ -232,9 +245,11 @@ class LyraEncoder : public LyraEncoderInterface { int sample_rate_hz, int num_channels, int bitrate, bool enable_dtx, const ghc::filesystem::path& model_path); - absl::optional> Encode( + std::optional> Encode( const absl::Span audio) override; + bool set_bitrate(int bitrate) override; + int sample_rate_hz() const override; int num_channels() const override; @@ -252,11 +267,14 @@ DTX should be enabled and where the model weights are stored. It also checks that these weights exist and are compatible with the current Lyra version. Given a `LyraEncoder`, any audio stream can be compressed using the `Encode` -method. The provided span of int16-formatted samples is assumed to contain 40ms +method. The provided span of int16-formatted samples is assumed to contain 20ms of data at the sample rate chosen at `Create` time. As long as this condition is met the `Encode` method returns the encoded packet as a vector of bytes that is ready to be stored or transmitted over the network. +The bitrate can be dynamically modified using the `set_bitrate` setter. It +returns true if the desired bitrate is supported and correctly set. + The rest of the `LyraEncoder` methods are just getters for the different predetermined parameters. @@ -267,22 +285,17 @@ using the following interface: class LyraDecoder : public LyraDecoderInterface { public: static std::unique_ptr Create( - int sample_rate_hz, int num_channels, int bitrate, + int sample_rate_hz, int num_channels, const ghc::filesystem::path& model_path); bool SetEncodedPacket(absl::Span encoded) override; - absl::optional> DecodeSamples(int num_samples) override; - - absl::optional> DecodePacketLoss( - int num_samples) override; + std::optional> DecodeSamples(int num_samples) override; int sample_rate_hz() const override; int num_channels() const override; - int bitrate() const override; - int frame_rate() const override; bool is_comfort_noise() const override; @@ -290,27 +303,20 @@ class LyraDecoder : public LyraDecoderInterface { ``` Once again, the static `Create` method instantiates a `LyraDecoder` with the -desired sample rate in Hertz, number of channels and bitrate, as long as those -parameters are supported. Else it returns a `nullptr`. These parameters don't -need to be the same as the ones in `LyraEncoder`. And once again, the `Create` -method also needs to know where the model weights are stored. It also checks -that these weights exist and are compatible with the current Lyra version. +desired sample rate in Hertz and number of channels, as long as those parameters +are supported. Else it returns a `nullptr`. These parameters don't need to be +the same as the ones in `LyraEncoder`. And once again, the `Create` method also +needs to know where the model weights are stored. It also checks that these +weights exist and are compatible with the current Lyra version. Given a `LyraDecoder`, any packet can be decoded by first feeding it into `SetEncodedPacket`, which returns true if the provided span of bytes is a valid Lyra-encoded packet. -Then the int16-formatted samples can be obtained by calling `DecodeSamples`, as -long as the total number of samples obtained this way between any two calls to -`SetEncodedPacket` is less than 40ms of data at the sample rate chose at -`Create` time. - -If there isn't a packet available, but samples still need to be generated, -`DecodePacketLoss` can be used, which doesn't have a restriction on the number -of samples. - -In those cases, the decoder might switch to a comfort noise generation mode, -which can be checked using `is_confort_noise`. +Then the int16-formatted samples can be obtained by calling `DecodeSamples`. If +there isn't a packet available, but samples still need to be generated, the +decoder might switch to a comfort noise generation mode, which can be checked +using `is_comfort_noise`. The rest of the `LyraDecoder` methods are just getters for the different predetermined parameters. @@ -319,16 +325,6 @@ For an example on how to use `LyraEncoder` and `LyraDecoder` to encode and decode a stream of audio, please refer to the [integration test](lyra_integration_test.cc). -## Sparse Matrix Multiplication Library -Lyra uses a library in the `sparse_matmul` directory that enables fast execution -of sparse Matrix-Vector multiplication ops on mobile and desktop CPU platforms -(ARM and AVX2) to allow for real-time operation on phones. This library was -created by DeepMind for their implementation of WaveRNN with sparsity [[4]](#4), -which gave a huge improvement in complexity over WaveNet. - -A generic kernel is also provided, which enables debugging on non-optimized -platforms. Contributions for other platforms are welcome. - ## License Use of this source code is governed by a Apache v2.0 license that can be found @@ -336,18 +332,22 @@ in the LICENSE file. ## Papers -1. Kleijn, W. B., Lim, F. S., Luebs, A., Skoglund, J., Stimberg, F., Wang, Q., & - Walters, T. C. (2018, April). [Wavenet based low rate speech coding](https://arxiv.org/pdf/1712.01120). - In 2018 IEEE international conference on acoustics, speech and signal - processing (ICASSP) (pp. 676-680). IEEE. -2. Denton, T., Luebs, A., Lim, F. S., Storus, A., Yeh, H., Kleijn, W. B., & - Skoglund, J. (2021). [Handling Background Noise in Neural Speech Generation](https://arxiv.org/pdf/2102.11906). - arXiv preprint arXiv:2102.11906. -3. Kleijn, W. B., Storus, A., Chinen, M., Denton, T., Lim, F. S., Luebs, A., ... - & Yeh, H. (2021). [Generative Speech Coding with Predictive Variance - Regularization](https://arxiv.org/pdf/2102.09660). arXiv preprint - arXiv:2102.09660. -4. Kalchbrenner, N., Elsen, E., Simonyan, K., Noury, S., - Casagrande, N., Lockhart, E., ... & Kavukcuoglu, K. (2018, July). - [Efficient neural audio synthesis](https://arxiv.org/abs/1802.08435). - In International Conference on Machine Learning (pp. 2410-2419). PMLR. +1. Kleijn, W. B., Lim, F. S., Luebs, A., Skoglund, J., Stimberg, F., Wang, Q., + & Walters, T. C. (2018, April). + [Wavenet based low rate speech coding](https://arxiv.org/pdf/1712.01120). In + 2018 IEEE international conference on acoustics, speech and signal + processing (ICASSP) (pp. 676-680). IEEE. +2. Denton, T., Luebs, A., Chinen, M., Lim, F. S., Storus, A., Yeh, H., Kleijn, + W. B., & Skoglund, J. (2020, November). + [Handling Background Noise in Neural Speech Generation](https://arxiv.org/pdf/2102.11906). + In 2020 54th Asilomar Conference on Signals, Systems, and Computers (pp. + 667-671). IEEE. +3. Kleijn, W. B., Storus, A., Chinen, M., Denton, T., Lim, F. S., Luebs, A., + Skoglund, J., & Yeh, H. (2021, June). + [Generative speech coding with predictive variance regularization](https://arxiv.org/pdf/2102.09660). + In ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and + Signal Processing (ICASSP) (pp. 6478-6482). IEEE. +4. Zeghidour, N., Luebs, A., Omran, A., Skoglund, J., & Tagliasacchi, M. + (2021). + [SoundStream: An end-to-end neural audio codec](https://arxiv.org/pdf/2107.03312). + IEEE/ACM Transactions on Audio, Speech, and Language Processing. diff --git a/WORKSPACE b/WORKSPACE index e3b9f49d..3ff17da8 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -36,7 +36,7 @@ protobuf_deps() git_repository( name = "com_google_absl", remote = "https://github.com/abseil/abseil-cpp.git", - branch = "lts_2020_09_23", + tag = "20211102.0", ) # Filesystem @@ -60,25 +60,18 @@ new_git_repository( # Audio DSP git_repository( name = "com_google_audio_dsp", - remote = "https://github.com/google/multichannel-audio-tools.git", + # TODO(b/231448719) use main google repo after merging PR for TF eigen compatibility. + remote = "https://github.com/mchinen/multichannel-audio-tools.git", # There are no tags for this repo, we are synced to bleeding edge. - branch = "master", + commit = "14a45c5a7c965e5ef01fe537bd816ce10a247813", repo_mapping = { - "@com_github_glog_glog" : "@com_google_glog" + "@com_github_glog_glog" : "@com_google_glog", + "@eigen3": "@eigen_archive" } ) # Transitive dependencies of Audio DSP. -http_archive( - name = "eigen_archive", - build_file = "eigen.BUILD", - sha256 = "f3d69ac773ecaf3602cb940040390d4e71a501bb145ca9e01ce5464cf6d4eb68", - strip_prefix = "eigen-eigen-049af2f56331", - urls = [ - "http://mirror.tensorflow.org/bitbucket.org/eigen/eigen/get/049af2f56331.tar.gz", - "https://bitbucket.org/eigen/eigen/get/049af2f56331.tar.gz", - ], -) +# Note: eigen is used by Audio DSP, but provided through tensorflow workspace functions. http_archive( name = "fft2d", @@ -115,16 +108,11 @@ http_archive( load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace") bazel_skylib_workspace() -android_sdk_repository( - name = "androidsdk", - api_level = 30, - build_tools_version = "30.0.3" -) +load("//:android_configure.bzl", "android_configure") +android_configure(name = "local_config_android") -android_ndk_repository( - name = "androidndk", - api_level = 30 -) +load("@local_config_android//:android_configure.bzl", "android_workspace") +android_workspace() http_archive( name = "rules_android", @@ -134,14 +122,68 @@ http_archive( ) # Google Maven Repository -GMAVEN_TAG = "20180625-1" +# See https://github.com/android/android-test/blob/master/WORKSPACE for examples +# of importing android deps. +# The specific versions can be found in: +# https://github.com/android/android-test/blob/master/build_extensions/axt_versions.bzl +# and +# https://developer.android.com/jetpack/androidx/releases/ +# This allows us to use "@maven//some_android_package" deps for imports. + +RULES_JVM_EXTERNAL_TAG = "4.0" +RULES_JVM_EXTERNAL_SHA = "31701ad93dbfe544d597dbe62c9a1fdd76d81d8a9150c2bf1ecf928ecdf97169" http_archive( - name = "gmaven_rules", - strip_prefix = "gmaven_rules-%s" % GMAVEN_TAG, - url = "https://github.com/bazelbuild/gmaven_rules/archive/%s.tar.gz" % GMAVEN_TAG, + name = "rules_jvm_external", + strip_prefix = "rules_jvm_external-%s" % RULES_JVM_EXTERNAL_TAG, + sha256 = RULES_JVM_EXTERNAL_SHA, + url = "https://github.com/bazelbuild/rules_jvm_external/archive/%s.zip" % RULES_JVM_EXTERNAL_TAG, +) + +load("@rules_jvm_external//:defs.bzl", "maven_install") + +maven_install( + artifacts = [ + "androidx.annotation:annotation:1.2.0", + "androidx.appcompat:appcompat:1.3.1", + "androidx.core:core:1.6.0", + "androidx.constraintlayout:constraintlayout:2.1.1" + ], + repositories = [ + "https://maven.google.com", + "https://repo1.maven.org/maven2", + ], +) + + +# Begin Tensorflow WORKSPACE subset required for TFLite + +git_repository( + name = "org_tensorflow", + remote = "https://github.com/tensorflow/tensorflow.git", + # Below is reproducible and equivalent to `tag = "v2.9.0"` + commit = "8a20d54a3c1bfa38c03ea99a2ad3c1b0a45dfa95", + shallow_since = "1652465115 -0700" ) -load("@gmaven_rules//:gmaven.bzl", "gmaven_rules") +# Check bazel version requirement, which is stricter than TensorFlow's. +load( + "@org_tensorflow//tensorflow:version_check.bzl", + "check_bazel_version_at_least", +) + +check_bazel_version_at_least("3.7.2") + +# TF WORKSPACE Loading functions +# This section uses a subset of the tensorflow WORKSPACE loading by reusing its contents. +# There are four workspace() functions create repos for the dependencies. +# TF's loading is very complicated, and we only need a subset for TFLite. +# If we use the full TF loading sequence, we also run into conflicts and errors on some platforms. + +load("@org_tensorflow//tensorflow:workspace3.bzl", "workspace") +workspace() + +load("@org_tensorflow//tensorflow:workspace2.bzl", workspace2 = "workspace") +workspace2() -gmaven_rules() +# End Tensorflow WORKSPACE subset required for TFLite diff --git a/android_configure.bzl b/android_configure.bzl new file mode 100644 index 00000000..ee1bb360 --- /dev/null +++ b/android_configure.bzl @@ -0,0 +1,40 @@ +"""Repository rule for Android SDK and NDK autoconfiguration. +This rule is a no-op unless the required android environment variables are set. +""" + +# Based on https://github.com/envoyproxy/envoy-mobile/pull/2039 +# Workaround for https://github.com/bazelbuild/bazel/issues/14260 + +def _android_autoconf_impl(repository_ctx): + sdk_rule = "" + if repository_ctx.os.environ.get("ANDROID_HOME"): + sdk_rule = """ + native.android_sdk_repository( + name="androidsdk", + api_level=30, + build_tools_version="30.0.3", + ) +""" + + ndk_rule = "" + if repository_ctx.os.environ.get("ANDROID_NDK_HOME"): + ndk_rule = """ + native.android_ndk_repository( + name="androidndk", + api_level=30, + ) +""" + + if ndk_rule == "" and sdk_rule == "": + sdk_rule = "pass" + + repository_ctx.file("BUILD.bazel", "") + repository_ctx.file("android_configure.bzl", """ +def android_workspace(): + {} + {} +""".format(sdk_rule, ndk_rule)) + +android_configure = repository_rule( + implementation = _android_autoconf_impl, +) diff --git a/android_example/AndroidManifest.xml b/android_example/AndroidManifest.xml index 906d740c..a67dd47a 100644 --- a/android_example/AndroidManifest.xml +++ b/android_example/AndroidManifest.xml @@ -27,6 +27,7 @@ android:label="@string/app_name" android:theme="@style/AppTheme" android:taskAffinity=""> + adapter = + new ArrayAdapter<>(this, android.R.layout.simple_spinner_item, bpsArray); + adapter.setDropDownViewResource(android.R.layout.simple_spinner_dropdown_item); + spinner.setAdapter(adapter); + // The weights are stored inside of the APK as assets for this demo, but // the Lyra library requires them to live in files. // This helper function copies the assets to files. @@ -117,16 +130,18 @@ record = null; TAG, "Finished recording from microphone. Recorded " + micDataShortsWritten + " samples."); } - private synchronized void encodeAndDecodeMicDataToSpeaker() { + private synchronized void encodeAndDecodeMicDataToSpeaker(int bitrate) { // There must be at least enough data recorded to output something useful. - if (micDataShortsWritten < PLAYBACK_SKIP_SAMPLES) { + if (micDataShortsWritten == 0) { return; } // Whatever micData holds, encode and decode with Lyra. - short[] decodedAudio = encodeAndDecodeSamples(micData, micDataShortsWritten, weightsDirectory); + short[] decodedAudio = encodeAndDecodeSamples(micData, micDataShortsWritten, bitrate, + weightsDirectory); if (decodedAudio == null) { Log.e(TAG, "Failed to encode and decode microphone data."); + return; } // Create a new AudioTrack in static mode so we can write once and @@ -149,10 +164,10 @@ private synchronized void encodeAndDecodeMicDataToSpeaker() { int shortsWritten = player.write( decodedAudio, - PLAYBACK_SKIP_SAMPLES, - decodedAudio.length - PLAYBACK_SKIP_SAMPLES, + 0, + decodedAudio.length, AudioTrack.WRITE_BLOCKING); - Log.e( + Log.i( TAG, "Wrote " + shortsWritten @@ -168,6 +183,33 @@ private void stopRecording() { // Notify we stopped recording. Button button = (Button) findViewById(R.id.button_record); button.post(() -> button.setText("Record from microphone")); + Button decodeButton = (Button) findViewById(R.id.button_decode); + decodeButton.setEnabled(true); + } + + /** Called when user taps the 'Encode/Decode To Speaker' button. */ + public void onDecodeButtonClicked(View view) { + Log.i(TAG, "Starting decoding."); + + Button decodeButton = (Button) view; + decodeButton.setEnabled(false); + Button recordButton = (Button) findViewById(R.id.button_record); + recordButton.setEnabled(false); + + Spinner bpsSpinner = (Spinner) findViewById(R.id.bps_spinner); + int bps = Integer.parseInt(bpsSpinner.getSelectedItem().toString()); + MainActivity mainActivity = this; + Thread thread = + new Thread( + () -> { + encodeAndDecodeMicDataToSpeaker(bps); + mainActivity.runOnUiThread( + () -> { + decodeButton.setEnabled(true); + recordButton.setEnabled(true); + }); + }); + thread.start(); } /** Called when user taps the 'record microphone' button. */ @@ -175,7 +217,9 @@ public void onMicButtonClicked(View view) { if (!isRecording) { isRecording = true; // Begin recording, and set the button to be a stop button. - ((Button) view).setText("Stop and decode to speaker"); + ((Button) view).setText("Stop recording"); + Button decodeButton = (Button) findViewById(R.id.button_decode); + decodeButton.setEnabled(false); record = new AudioRecord.Builder() .setAudioSource(MediaRecorder.AudioSource.VOICE_COMMUNICATION) @@ -191,7 +235,6 @@ record = new Thread(this::recordAudioStream).start(); } else { stopRecording(); - encodeAndDecodeMicDataToSpeaker(); } } @@ -206,11 +249,11 @@ public void runBenchmark(View view) { new Thread( () -> { - Log.i(TAG, "Starting benchmarkDecode()"); + Log.i(TAG, "Starting lyraBenchmark()"); // Example of a call to a C++ lyra method on a background // thread. - benchmarkDecode(2000, weightsDirectory); - Log.i(TAG, "Finished benchmarkDecode()"); + lyraBenchmark(2000, weightsDirectory); + Log.i(TAG, "Finished lyraBenchmark()"); tv.post(() -> tv.setText("Finished benchmarking. See logcat for results.")); button.post(() -> button.setEnabled(true)); hasStartedDecode = false; @@ -222,14 +265,11 @@ public void runBenchmark(View view) { private void copyWeightsAssetsToDirectory(String targetDirectory) { try { AssetManager assetManager = getAssets(); - String[] files = assetManager.list(""); + String[] files = {"lyra_config.binarypb", "lyragan.tflite", + "quantizer.tflite", "soundstream_encoder.tflite"}; byte[] buffer = new byte[1024]; int amountRead; for (String file : files) { - // Lyra weights start with a 'lyra_' prefix. - if (!file.startsWith("lyra_")) { - continue; - } InputStream inputStream = assetManager.open(file); File outputFile = new File(targetDirectory, file); @@ -251,8 +291,8 @@ private void copyWeightsAssetsToDirectory(String targetDirectory) { * A method that is implemented by the 'lyra_android_example' C++ library, which is packaged with * this application. */ - public native String benchmarkDecode(int numCondVectors, String modelBasePath); + public native String lyraBenchmark(int numCondVectors, String modelBasePath); public native short[] encodeAndDecodeSamples( - short[] samples, int sampleLength, String modelBasePath); + short[] samples, int sampleLength, int bitrate, String modelBasePath); } diff --git a/android_example/jni_benchmark_decode_lib.cc b/android_example/jni_lyra_benchmark_lib.cc similarity index 70% rename from android_example/jni_benchmark_decode_lib.cc rename to android_example/jni_lyra_benchmark_lib.cc index 66d82e95..d3473cb8 100644 --- a/android_example/jni_benchmark_decode_lib.cc +++ b/android_example/jni_lyra_benchmark_lib.cc @@ -17,15 +17,16 @@ #include #include -#include "benchmark_decode_lib.h" +#include "absl/random/random.h" #include "decoder_main_lib.h" #include "encoder_main_lib.h" +#include "lyra_benchmark_lib.h" #include "lyra_config.h" extern "C" JNIEXPORT jshortArray JNICALL Java_com_example_android_lyra_MainActivity_encodeAndDecodeSamples( JNIEnv* env, jobject this_obj, jshortArray samples, jint sample_length, - jstring model_base_path) { + jint bitrate, jstring model_base_path) { std::vector samples_vector(sample_length); std::vector features; std::vector decoded_audio; @@ -36,14 +37,16 @@ Java_com_example_android_lyra_MainActivity_encodeAndDecodeSamples( const char* cpp_model_base_path = env->GetStringUTFChars(model_base_path, 0); std::unique_ptr decoder = chromemedia::codec::LyraDecoder::Create( - 16000, chromemedia::codec::kNumChannels, chromemedia::codec::kBitrate, - cpp_model_base_path); + 16000, chromemedia::codec::kNumChannels, cpp_model_base_path); + absl::BitGen gen; if (chromemedia::codec::EncodeWav( - samples_vector, chromemedia::codec::kNumChannels, 16000, false, false, - cpp_model_base_path, &features) && - chromemedia::codec::DecodeFeatures(features, 0.0, 1.0, decoder.get(), - &decoded_audio)) { + samples_vector, chromemedia::codec::kNumChannels, 16000, bitrate, + false, false, cpp_model_base_path, &features) && + chromemedia::codec::DecodeFeatures( + features, chromemedia::codec::BitrateToPacketSize(bitrate), + /*randomize_num_samples_requested=*/false, gen, decoder.get(), + nullptr, &decoded_audio)) { java_decoded_audio = env->NewShortArray(decoded_audio.size()); env->SetShortArrayRegion(java_decoded_audio, 0, decoded_audio.size(), &decoded_audio[0]); @@ -55,12 +58,15 @@ Java_com_example_android_lyra_MainActivity_encodeAndDecodeSamples( } extern "C" JNIEXPORT int JNICALL -Java_com_example_android_lyra_MainActivity_benchmarkDecode( +Java_com_example_android_lyra_MainActivity_lyraBenchmark( JNIEnv* env, jobject this_obj, jint num_cond_vectors, jstring model_base_path) { const char* cpp_model_base_path = env->GetStringUTFChars(model_base_path, 0); - int ret = chromemedia::codec::benchmark_decode(num_cond_vectors, - cpp_model_base_path); + int ret = + chromemedia::codec::lyra_benchmark(num_cond_vectors, cpp_model_base_path, + /*benchmark_feature_extraction=*/true, + /*benchmark_quantizer=*/true, + /*benchmark_generative_model=*/true); env->ReleaseStringUTFChars(model_base_path, cpp_model_base_path); return ret; } diff --git a/android_example/res/layout/activity_main.xml b/android_example/res/layout/activity_main.xml index 003d9d91..e4583ab7 100644 --- a/android_example/res/layout/activity_main.xml +++ b/android_example/res/layout/activity_main.xml @@ -15,12 +15,12 @@ limitations under the License. --> - + tools:context="com.example.android.lyra.MainActivity"> +