diff --git a/README.md b/README.md index dd9001e..61b8860 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Moreoever, in this repository we provide guidance on setting up a pipeline for r Below are the intructions on installing and using the snowmobile detector with both Docker and without. The output of the script is a subfolder `SNOWMOBILE_RESULTS` containing the `.csv` file of the analyzed file. Note that the folder `SNOWMOBILE_RESULTS` will be located in the same folder as the input file. -### Use with Docker (recommanded) +### Use with Docker Create the Docker image image: @@ -51,10 +51,8 @@ Note that you can change `./example/example_audio.mp3` to the path of your own f ### Use without Docker -Install `poetry` (the package manager we use) if it is not already installed. - ```bash -pip install poetry +pip install -r requirements.txt ``` Copy the github repository and get the model from [Zenodo](https://zenodo.org/record/7969521). diff --git a/cloud_analysis/main.py b/cloud_analysis/main.py index a7287a4..f8c9606 100644 --- a/cloud_analysis/main.py +++ b/cloud_analysis/main.py @@ -15,14 +15,14 @@ import torch from torch.utils.data import DataLoader -from predict import predict -from predict import initModel from utils.utils import AudioList +from utils.audio_signal import AudioSignal from google.cloud import storage from google.oauth2 import service_account import datetime +from pydub import AudioSegment import io import logging @@ -63,16 +63,13 @@ def send_email(subject, body): return repr(e) -from pydub import AudioSegment -import io - def convert_mp3_to_wav(mp3_file_object): # Load MP3 file from file object audio = AudioSegment.from_file(mp3_file_object, format="mp3") # Convert to WAV wav_file_object = io.BytesIO() - audio.export(wav_file_object, format="wav") + audio.export(wav_file_object, format="mp3") wav_file_object.seek(0) # Move file pointer to the start return wav_file_object @@ -109,9 +106,9 @@ def fetch_audio_data(bucket_name, blob_name): audio_file_object.seek(0) # Move file pointer to the start # Convert MP3 to WAV - wav_file_object = convert_mp3_to_wav(audio_file_object) - - return wav_file_object + #wav_file_object = convert_mp3_to_wav(audio_file_object) + + return audio_file_object def generate_signed_url(bucket_name, blob_name, expiration_time=86400): """ @@ -146,6 +143,55 @@ def generate_signed_url(bucket_name, blob_name, expiration_time=86400): return url +def initModel(model_path, device): + model = torch.load(model_path, map_location=torch.device(device)) + model.eval() + return model + + +def compute_hr(array): + + signal = AudioSignal(samples=array, fs=44100) + + signal.apply_butterworth_filter(order=18, Wn=np.asarray([1, 600]) / (signal.fs / 2)) + signal_hr = signal.harmonic_ratio( + win_length=int(1 * signal.fs), + hop_length=int(0.1 * signal.fs), + window="hamming", + ) + hr = np.mean(signal_hr) + + return hr + +def predict(testLoader, model, device): + + proba_list = [] + hr_list = [] + + for array in testLoader: + + # Compute confidence for the DL model + if device == "cpu": + tensor = torch.tensor(array) + else: + tensor = array + + tensor = tensor.to(device) + output = model(tensor) + output = np.exp(output.cpu().detach().numpy()) + proba_list.append(output[0]) + + # Compute HR if label=snowmobile + label = np.argmax(output[0], axis=0) + + if label == 1: + hr = compute_hr(np.array(array)) + hr_list.append(hr) + else: + hr_list.append(0) + + return proba_list, hr_list + def analyseAudioFile( audio_file_object, min_hr, min_conf, batch_size=1, num_workers=2, ): @@ -176,14 +222,15 @@ def analyseAudioFile( idx_end = idx_begin + 3 conf = np.array(item_audioclip) label = np.argmax(conf, axis=0) - confidence = conf[1] # conf.max() + confidence = conf.max() hr = np.array(item_hr) + # Append the conf and hr of each segment - conf_arr.append(confidence) + conf_arr.append(conf[1]) hr_arr.append(hr) # If the label is not "soundscape" then write the row: - if label != 0 and hr > min_hr and confidence > min_conf: + if hr > min_hr and confidence > min_conf: item_properties = [idx_begin, idx_end, confidence, hr] results.append(item_properties) @@ -208,15 +255,12 @@ def on_process_audio(audio_id: str, audio_rec: dict, bucket_name: str, blob_name # After processing we record each detection in the database. # Each detection should have a start and end time which is used to create an audio clip later. - count = 0 + count = len(results) detections = [] for r in results: start, end, confidence, harmonic_ratio = r - if harmonic_ratio > hr and confidence > conf: - count += 1 - # create the detections dataset detections.append({ u"start": start, @@ -245,7 +289,7 @@ def process_audio_endpoint(): max_conf, max_hr = maxes email_response = "Not sent" - if results > 1: + if results > 0: # Create a signed URL download_url = generate_signed_url(bucket_name, blob_name) @@ -258,10 +302,9 @@ def process_audio_endpoint(): email_body += f"Download the audio file here: {download_url}" email_response = send_email("Snowmobile Detection Alert", email_body) - return jsonify({"message": f"file {blob_name} processed. Max CONF = {max_conf}, MAX HR = {max_hr}, DET COUNT = {results}, E-MAIL = {email_response}"}) + return jsonify({"message": f"file {blob_name} processed. CONF SNOWMOBILE = {max_conf}, HR = {max_hr}, DET COUNT = {results}, E-MAIL = {email_response}"}) if __name__ == "__main__": app.debug = True app.run(host='0.0.0.0', port=8080) - diff --git a/post_analysis/run.py b/post_analysis/run.py new file mode 100644 index 0000000..64c1a54 --- /dev/null +++ b/post_analysis/run.py @@ -0,0 +1,78 @@ +import os +import glob +import numpy as np +import csv + +import torch +from torch.utils.data import DataLoader + +from src.utils.utils import AudioList +from src.utils.audio_signal import AudioSignal + +def initModel(model_path, device): + model = torch.load(model_path, map_location=torch.device(device)) + model.eval() + return model + +def compute_hr(array): + + signal = AudioSignal(samples=array, fs=44100) + + signal.apply_butterworth_filter(order=18, Wn=np.asarray([1, 600]) / (signal.fs / 2)) + signal_hr = signal.harmonic_ratio( + win_length=int(1 * signal.fs), + hop_length=int(0.1 * signal.fs), + window="hamming", + ) + hr = np.mean(signal_hr) + + return hr + +def predict(testLoader, model, device, threshold=0.99): + + proba_list = [] + hr_list = [] + + for array in testLoader: + + # Compute confidence for the DL model + if device == "cpu": + tensor = torch.tensor(array) + else: + tensor = array + + tensor = tensor.to(device) + output = model(tensor) + output = np.exp(output.cpu().detach().numpy()) + proba_list.append(output[0]) + + # Compute HR if confidence is more than a threshold + max_value = output[0].max() + if max_value >= threshold: + hr = compute_hr(np.array(array)) + hr_list.append(hr) + else: + hr_list.append(0) + + return proba_list, hr_list + +if __name__ == "__main__": + + mpath = "audioclip/assets/snowmobile_model.pth" + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + m = torch.load(mpath, map_location=device) + m.eval() + + file_paths = "/home/benjamin.cretois/data/proj_snowmobile/bugg_RPiID-10000000/**/**/*.mp3" + files_to_analyze = glob.glob(file_paths) + # take only a subset + files_to_analyze = files_to_analyze[ : 10] + + for file_path in files_to_analyze: + list_preds = AudioList().get_processed_list(file_path) + predLoader = DataLoader( + list_preds, batch_size=1, num_workers=10, pin_memory=False + ) + + pred_audioclip_array, pred_hr_array = predict(predLoader, m, device) + print(pred_audioclip_array) \ No newline at end of file diff --git a/src/predict.py b/src/predict.py index 171a2ed..9053e33 100644 --- a/src/predict.py +++ b/src/predict.py @@ -3,6 +3,7 @@ import os import numpy as np import csv +import sys import torch from torch.utils.data import DataLoader @@ -19,7 +20,7 @@ with open("./CONFIG.yaml") as f: cfg = yaml.load(f, Loader=FullLoader) -logging.basicConfig(filename='logs/logfile.log', level=logging.INFO, +logging.basicConfig(filename='./logs/logfile.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def initModel(model_path, device): @@ -41,7 +42,7 @@ def compute_hr(array): return hr -def predict(testLoader, model, device, threshold=0.99): +def predict(testLoader, model, device): proba_list = [] hr_list = [] @@ -59,9 +60,10 @@ def predict(testLoader, model, device, threshold=0.99): output = np.exp(output.cpu().detach().numpy()) proba_list.append(output[0]) - # Compute HR if confidence is more than a threshold - max_value = output[0].max() - if max_value >= threshold: + # Compute HR if label=snowmobile + label = np.argmax(output[0], axis=0) + + if label == 1: hr = compute_hr(np.array(array)) hr_list.append(hr) else: @@ -69,27 +71,7 @@ def predict(testLoader, model, device, threshold=0.99): return proba_list, hr_list - -def get_outname(input, out_path): - - # Get a name for the output // if there are multiple "." in the list - # only remove the extension - filename = input.split("/")[-1].split(".") - if len(filename) > 2: - filename = ".".join(filename[0:-1]) - else: - filename = input.split("/")[-1].split(".")[0] - - # Make folder if it doesn't exist - outpath = os.sep.join([out_path, os.path.dirname(input)]) - if not os.path.exists(outpath): - os.makedirs(outpath, exist_ok=True) - - file_path = os.path.join(outpath, filename + ".csv") - - return file_path - -def write_results(prob_audioclip_array, hr_array, outname, min_hr, min_conf): +def write_results(prob_audioclip_array, hr_array, outname): # Store the array result in a CSV friendly format rows_for_csv = [] @@ -104,10 +86,9 @@ def write_results(prob_audioclip_array, hr_array, outname, min_hr, min_conf): max_value = conf.max() hr = np.array(item_hr) - # If the label is not "soundscape" then write the row: - if label != 0 and hr > min_hr and max_value > min_conf: - item_properties = [idx_begin, idx_end, label, max_value, hr] - rows_for_csv.append(item_properties) + # Write everything in the row + item_properties = [idx_begin, idx_end, label, max_value, hr] + rows_for_csv.append(item_properties) # Update the start time of the detection idx_begin = idx_end @@ -129,7 +110,7 @@ def write_results(prob_audioclip_array, hr_array, outname, min_hr, min_conf): def analyzeFile( - file_path, model, device, num_workers, min_hr, min_conf, batch_size=1 + file_path, model, device, num_workers, batch_size=1 ): # Start time start_time = datetime.datetime.now() @@ -142,87 +123,46 @@ def analyzeFile( os.makedirs(result_folder) # Check if the output already exists - outname = file_path.split("/")[-1].split(".")[0] + "_ANALYZED.csv" + outname = file_path.split("/")[-1] + "_ANALYZED.csv" outpath = os.path.join(result_folder, outname) - if os.path.exists(outname): - print("File {} already exists".format(outname)) - else: - # Run the predictions - list_preds = AudioList().get_processed_list(file_path) - predLoader = DataLoader( - list_preds, batch_size=batch_size, num_workers=num_workers, pin_memory=False - ) - - pred_audioclip_array, pred_hr_array = predict(predLoader, model, device) - - write_results(pred_audioclip_array, pred_hr_array, outpath, min_hr, min_conf) - - # Give the tim it took to analyze file - delta_time = (datetime.datetime.now() - start_time).total_seconds() - message = "Finished {} in {:.2f} seconds".format(file_path, delta_time) - print(message, flush=True) - logging.info(message) + # Run the predictions + list_preds = AudioList().get_processed_list(file_path) + predLoader = DataLoader( + list_preds, batch_size=batch_size, num_workers=num_workers, pin_memory=False + ) + pred_audioclip_array, pred_hr_array = predict(predLoader, model, device) -if __name__ == "__main__": + write_results(pred_audioclip_array, pred_hr_array, outpath) - # Get the config for doing the predictions - # FOR TESTING THE PIPELINE WITH ONE FILE - parser = argparse.ArgumentParser() + # Give the time it took to analyze file + delta_time = (datetime.datetime.now() - start_time).total_seconds() + message = "Finished {} in {:.2f} seconds".format(file_path, delta_time) + print(message, flush=True) + logging.info(message) - parser.add_argument( - "--input", - help="Path to the file to analyze", - required=True, - type=str, - ) +def main(filename, cfg): - parser.add_argument( - "--num_workers", - help="Number of workers for reading in audiofiles", - default=1, - required=False, - type=int, - ) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = initModel(model_path=cfg["path_snowmobile_model"], device=device) - parser.add_argument( - "--min_hr", - help="Minimum value for harmonic ratio to take detection in", - default=0.1, - required=False, - type=int, - ) + analyzeFile(filename, + model, + device, + num_workers=cfg["NUM_WORKERS"], + batch_size=cfg["BATCH_SIZE"]) - parser.add_argument( - "--min_conf", - help="Minimum value for model confidence to take detection in", - default=0.99, - required=False, - type=int, - ) - - cli_args = parser.parse_args() - # Initiate model - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - model_path = cfg["path_snowmobile_model"] +if __name__ == "__main__": - model = initModel(model_path=model_path, device=device) + filename = sys.argv[1] # Analyze file - print("Analysing {}".format(cli_args.input)) + print("Analysing {}".format(filename)) try: - analyzeFile( - cli_args.input, - model, - device=device, - batch_size=1, - num_workers=cli_args.num_workers, - min_hr=cli_args.min_hr, - min_conf=cli_args.min_conf - ) + main(filename, cfg) except Exception as e: - print(f"File {cli_args.input} failed to be analyzed") - logging.error(f"File {cli_args.input} failed to be analyzed: {str(e)}") + print(f"File {filename} failed to be analyzed") + logging.error(f"File {filename} failed to be analyzed: {str(e)}") logging.error(traceback.format_exc())