Skip to content

Commit

Permalink
add audio sift (bug on global combine)
Browse files Browse the repository at this point in the history
  • Loading branch information
pmhalvor committed Sep 19, 2024
1 parent 4c49de9 commit 26c5d0f
Show file tree
Hide file tree
Showing 4 changed files with 287 additions and 107 deletions.
19 changes: 14 additions & 5 deletions src/pipeline/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,30 @@
from apache_beam.options.pipeline_options import PipelineOptions, SetupOptions
from stages.search import GeometrySearch
from stages.audio import RetrieveAudio, WriteAudio
from stages.sift import Butterworth

from config import load_pipeline_config
config = load_pipeline_config()

def run():
# Initialize pipeline options
pipeline_options = PipelineOptions()
pipeline_options.view_as(SetupOptions).save_main_session = True
args = {
"start": config.input.start,
"end": config.input.end
}

with beam.Pipeline(options=pipeline_options) as p:
input_data = p | "Create Input" >> beam.Create([{'start': '2016-12-21T00:30:0', 'end':"2016-12-21T00:40:0"}])
search_results = input_data | "Run Geometry Search" >> beam.ParDo(GeometrySearch())
audio_results = search_results | "Retrieve Audio" >> beam.ParDo(RetrieveAudio())
# filtered_audio = audio_results | "Filter Frequency" >> FilterFrequency()
input_data = p | "Create Input" >> beam.Create([args])
search_results = input_data | "Run Geometry Search" >> beam.ParDo(GeometrySearch())

audio_results = search_results | "Retrieve Audio" >> beam.ParDo(RetrieveAudio())
audio_files = audio_results | "Store Audio (temp)" >> beam.ParDo(WriteAudio())

sifted_audio = audio_results | "Sift Audio" >> Butterworth(args)

# For debugging, you can write the output to a text file
audio_files = audio_results | "Store Audio (temp)" >> beam.ParDo(WriteAudio())
# audio_files | "Write Audio Output" >> beam.io.WriteToText('audio_files.txt')
# search_results | "Write Search Output" >> beam.io.WriteToText('search_results.txt')

Expand Down
25 changes: 17 additions & 8 deletions src/pipeline/config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pipeline:
general:
verbose: true
debug: true

input:
start: "2016-12-21T00:30:00"
Expand Down Expand Up @@ -28,16 +29,24 @@ pipeline:
source_sample_rate: 16000
margin: 30 # TODO set to 900 # seconds
offset: 13 # TODO set to 0 # hours
output_path_template: "data/audio/{year}/{month:02}/{filename}"
skip_existing: false

detection_filter:
highcut: 1500
lowcut: 50
order: 10
frequency_threshold: 0.015
output_path_template: "data/audio/raw/{year}/{month:02}/{filename}"
skip_existing: false # if true, skip downstream processing of existing audio files (false during development)

sift:
output_path_template: "data/audio/{sift_type}/{year}/{month:02}/{day:02}/{filename}"
max_duration: 600 # seconds
plot: true
plot_path_template: "data/plots/{sift_type}/{year}/{month:02}/{day:02}/{plot_name}.png"
window_size: 512

# Specific sift-mechanism parameters
butterworth:
highcut: 1500
lowcut: 50
order: 10
output: "sos" # "sos" or "ba"
peak_threshold: 0.015

model:
url: https://tfhub.dev/google/humpback_whale/1
model_sample_rate: 10000
Expand Down
94 changes: 0 additions & 94 deletions src/pipeline/stages/detection.py

This file was deleted.

Loading

0 comments on commit 26c5d0f

Please sign in to comment.