From 7b696dd51fb9e5e6b7360083ba818d6627adc4dc Mon Sep 17 00:00:00 2001 From: johentsch Date: Thu, 23 Feb 2023 17:54:37 +0100 Subject: [PATCH 01/13] adds CLI skeleton --- src/ms3/cli.py | 2 +- src/ms3/dezrann.py | 53 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/ms3/cli.py b/src/ms3/cli.py index 8d265987..943d6a30 100644 --- a/src/ms3/cli.py +++ b/src/ms3/cli.py @@ -458,7 +458,7 @@ def get_arg_parser(): # reusable argument sets parse_args = argparse.ArgumentParser(add_help=False) parse_args.add_argument('-d', '--dir', metavar='DIR', default=os.getcwd(), type=check_dir, - help='Folder(s) that will be scanned for input files. Defaults to current working directory if no individual files are passed via -f.') + help='Folder(s) that will be scanned for input files. Defaults to current working directory.') parse_args.add_argument('-o', '--out', metavar='OUT_DIR', type=check_and_create, help='Output directory.') parse_args.add_argument('-n', '--nonrecursive', action='store_true', diff --git a/src/ms3/dezrann.py b/src/ms3/dezrann.py index b34bb648..90cf4d7a 100644 --- a/src/ms3/dezrann.py +++ b/src/ms3/dezrann.py @@ -253,12 +253,63 @@ def generate_all_dez(output_dir=OUTPUT_DIR): for i_piece, piece in enumerate(MOZART_SONATAS): generate_dez(MEASURE_PATHS[i_piece], HARMONY_PATHS[i_piece]) +def main(input_dir: str, + measures_dir: str, + output_dir: str, + harmony_layer: int, + keys_layer:int, + phrases_layer: int, + cadences_layer: int, + raw_layer: int): + pass + +def process_arguments(args) -> dict: + pass + + +def run(): + parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, + description='''\ + ----------------------------- + | DCML => Dezrann converter | + ----------------------------- + + This script converts DCML harmony annotations into the .dez JSON format used by the dezrann.net app. It is + standalone and does not require ms3 to be installed. Its only requirement is pandas. + + Apart from that, the script requires that you have previously extracted both harmonies and measures from the + annotated scores or that you are converting a DCML corpus (https://github.com/DCMLab/dcml_corpora), + where both facets are provided by default. In order to (re-) extract the labels, use the command: + + ms3 extract -X -M + + Or, if you want to convert other harmony or chord labels from your MuseScore files, use -L for labels. + ms3 extract -h will show you all options. + ''') + parser.add_argument(metavar='DIR', default=os.getcwd(), + help='Folder that will be scanned for TSV files to convert. Defaults to current working directory.') + parser.add_argument('-m', '--measures', metavar='DIR', + help='Folder(s) that will be scanned for TSV files to convert. Defaults to current working directory.') + parser.add_argument('-o', '--out', metavar='OUT_DIR', + help='Output directory for .dez files. Defaults to the input directory.') + parser.add_argument('-H', '--harmonies', choices=[0, 1, 2, 3, 4, 5, 6]) + parser.add_argument('-K', '--keys', choices=[0, 1, 2, 3, 4, 5, 6]) + parser.add_argument('-P', '--phrases', choices=[0, 1, 2, 3, 4, 5, 6]) + parser.add_argument('-C', '--cadences', choices=[0, 1, 2, 3, 4, 5, 6]) + parser.add_argument('--raw', choices=[0, 1, 2, 3, 4, 5, 6]) + args = parser.parse_args() + kwargs = process_arguments(args) + main(**kwargs) if __name__ == "__main__": + run() + + + #measures = ms3.load_tsv('src/ms3/K283-2_measures.tsv') #harmonies = ms3.load_tsv('src/ms3/K283-2_harmonies.tsv') #transformed = transform_df(labels=harmonies, measures=measures) #print(transformed) - dez = generate_dez('src/ms3/K283-2_measures.tsv', 'src/ms3/K283-2_harmonies.tsv') + dez = generate_dez('K283-2_measures.tsv', 'K283-2_harmonies.tsv') #generate_all_dez() \ No newline at end of file From 638cd80632b288ca42738f066c7538857186f98a Mon Sep 17 00:00:00 2001 From: johentsch Date: Thu, 23 Feb 2023 18:01:35 +0100 Subject: [PATCH 02/13] adds import and corrects positional argument --- src/ms3/dezrann.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ms3/dezrann.py b/src/ms3/dezrann.py index 90cf4d7a..83833f62 100644 --- a/src/ms3/dezrann.py +++ b/src/ms3/dezrann.py @@ -98,7 +98,7 @@ } ''' """ - +import argparse import json import os from typing import Dict, List, TypedDict, Union, Tuple @@ -286,7 +286,7 @@ def run(): Or, if you want to convert other harmony or chord labels from your MuseScore files, use -L for labels. ms3 extract -h will show you all options. ''') - parser.add_argument(metavar='DIR', default=os.getcwd(), + parser.add_argument("dir", metavar='DIR', help='Folder that will be scanned for TSV files to convert. Defaults to current working directory.') parser.add_argument('-m', '--measures', metavar='DIR', help='Folder(s) that will be scanned for TSV files to convert. Defaults to current working directory.') @@ -311,5 +311,5 @@ def run(): #transformed = transform_df(labels=harmonies, measures=measures) #print(transformed) - dez = generate_dez('K283-2_measures.tsv', 'K283-2_harmonies.tsv') + #dez = generate_dez('K283-2_measures.tsv', 'K283-2_harmonies.tsv') #generate_all_dez() \ No newline at end of file From 332102051cc86f8c5bba512baec9115e08d5907b Mon Sep 17 00:00:00 2001 From: johentsch Date: Thu, 23 Feb 2023 18:17:12 +0100 Subject: [PATCH 03/13] streamlines CLI arguments and suggests defaults --- src/ms3/dezrann.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/ms3/dezrann.py b/src/ms3/dezrann.py index 83833f62..b3a167e8 100644 --- a/src/ms3/dezrann.py +++ b/src/ms3/dezrann.py @@ -292,11 +292,29 @@ def run(): help='Folder(s) that will be scanned for TSV files to convert. Defaults to current working directory.') parser.add_argument('-o', '--out', metavar='OUT_DIR', help='Output directory for .dez files. Defaults to the input directory.') - parser.add_argument('-H', '--harmonies', choices=[0, 1, 2, 3, 4, 5, 6]) - parser.add_argument('-K', '--keys', choices=[0, 1, 2, 3, 4, 5, 6]) - parser.add_argument('-P', '--phrases', choices=[0, 1, 2, 3, 4, 5, 6]) - parser.add_argument('-C', '--cadences', choices=[0, 1, 2, 3, 4, 5, 6]) - parser.add_argument('--raw', choices=[0, 1, 2, 3, 4, 5, 6]) + parser.add_argument('-C', + '--cadences', + action="store_true", + ) + parser.add_argument('-H', + '--harmonies', + metavar="{1-6}, default: 4", + default=4, + choices=[1, 2, 3, 4, 5, 6], + ) + parser.add_argument('-K', + '--keys', + metavar="{1-6}, default: 5", + default=5, + choices=[1, 2, 3, 4, 5, 6]) + parser.add_argument('-P', + '--phrases', + metavar="{1-6}, default: 6", + default=6, + choices=[1, 2, 3, 4, 5, 6]) + parser.add_argument('--raw', + metavar="{1-6}", + choices=[1, 2, 3, 4, 5, 6]) args = parser.parse_args() kwargs = process_arguments(args) main(**kwargs) From c0c7e709b5a7381d1168c42013ff61f5f8e000d8 Mon Sep 17 00:00:00 2001 From: Louis Couturier Date: Thu, 23 Feb 2023 18:39:19 +0100 Subject: [PATCH 04/13] work in progress: CLI + line layout --- src/ms3/dezrann.py | 79 ++++++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/src/ms3/dezrann.py b/src/ms3/dezrann.py index b3a167e8..6c95bf1f 100644 --- a/src/ms3/dezrann.py +++ b/src/ms3/dezrann.py @@ -175,6 +175,11 @@ def make_dezrann_label( ) def convert_dcml_list_to_dezrann_list(values_dict: List[DcmlLabel], + cadences: bool, + harmony_line: int, + keys_line: int, + phrases_line: int, + raw_line: int, origin: Union[str, Tuple[str]] = "DCML") -> DezrannDict: label_list = [] for e in values_dict: @@ -186,7 +191,14 @@ def convert_dcml_list_to_dezrann_list(values_dict: List[DcmlLabel], origin=origin ) ) - return DezrannDict(labels=label_list, meta={"layout": []}) + layout = [] + if raw_line > 0: + layout.append({"filter": {"type": "Harmony"}, "style": {"line": line}}) + #if harmony_line > 0: + # ... + #if keys_line > 0: + # ... + return DezrannDict(labels=label_list, meta={"layout": layout}) def generate_dez(path_measures: str, @@ -256,14 +268,38 @@ def generate_all_dez(output_dir=OUTPUT_DIR): def main(input_dir: str, measures_dir: str, output_dir: str, - harmony_layer: int, - keys_layer:int, - phrases_layer: int, - cadences_layer: int, - raw_layer: int): + cadences: bool, + harmony_line: Optional[str], # will transform and pass in "bot.1", None otherwise + keys_line: Optional[str], + phrases_line: Optional[str], + raw_line: Optional[str]): pass +LINE_VALUES = { + 1: "top.1", + 2: "top.2", + 3: "top.3", + 4: "bot.1", + 5: "bot.2", + 6: "bot.3" +} + +def transform_line_argument(line: Optional[Union[int, str]]) -> Optional[str]: + if line is None: + return + try: + line = int(line) + assert line in [1,2,3,4,5,6, -1, -2, -3] + except (TypeError, ValueError, AssertionError): + raise ValueError(f"{line} is not a valid argument, shoube within 1-6.") + if line < 0: + line = abs(line) + 3 + return LINE_VALUES[line] + + def process_arguments(args) -> dict: + kwargs = {} + line_args = ('harmonies', 'keys', 'phrases', 'raw') pass @@ -289,32 +325,15 @@ def run(): parser.add_argument("dir", metavar='DIR', help='Folder that will be scanned for TSV files to convert. Defaults to current working directory.') parser.add_argument('-m', '--measures', metavar='DIR', - help='Folder(s) that will be scanned for TSV files to convert. Defaults to current working directory.') + help="Folder in which to look for the corrsponding measure maps. By default, the script will try " + "to find a sibling to the source dir called 'measures'.") parser.add_argument('-o', '--out', metavar='OUT_DIR', help='Output directory for .dez files. Defaults to the input directory.') - parser.add_argument('-C', - '--cadences', - action="store_true", - ) - parser.add_argument('-H', - '--harmonies', - metavar="{1-6}, default: 4", - default=4, - choices=[1, 2, 3, 4, 5, 6], - ) - parser.add_argument('-K', - '--keys', - metavar="{1-6}, default: 5", - default=5, - choices=[1, 2, 3, 4, 5, 6]) - parser.add_argument('-P', - '--phrases', - metavar="{1-6}, default: 6", - default=6, - choices=[1, 2, 3, 4, 5, 6]) - parser.add_argument('--raw', - metavar="{1-6}", - choices=[1, 2, 3, 4, 5, 6]) + parser.add_argument('-H', '--harmonies', choices=[0, 1, 2, 3, 4, 5, 6]) + parser.add_argument('-K', '--keys', choices=[0, 1, 2, 3, 4, 5, 6]) + parser.add_argument('-P', '--phrases', choices=[0, 1, 2, 3, 4, 5, 6]) + parser.add_argument('-C', '--cadences', choices=[0, 1, 2, 3, 4, 5, 6]) + parser.add_argument('--raw', choices=[0, 1, 2, 3, 4, 5, 6]) args = parser.parse_args() kwargs = process_arguments(args) main(**kwargs) From 22b787c658c136cc70827bec0d25e8fb375c611e Mon Sep 17 00:00:00 2001 From: Louis Couturier Date: Thu, 23 Feb 2023 19:43:33 +0100 Subject: [PATCH 05/13] end-of-day commit after collab session LC & JH --- src/ms3/dezrann.py | 197 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 157 insertions(+), 40 deletions(-) diff --git a/src/ms3/dezrann.py b/src/ms3/dezrann.py index 6c95bf1f..af4dfb51 100644 --- a/src/ms3/dezrann.py +++ b/src/ms3/dezrann.py @@ -101,7 +101,7 @@ import argparse import json import os -from typing import Dict, List, TypedDict, Union, Tuple +from typing import Dict, List, TypedDict, Union, Tuple, Optional from fractions import Fraction import pandas as pd @@ -115,10 +115,11 @@ def safe_frac(s: str) -> Union[Fraction, str]: return s class DezrannLabel(TypedDict): - type: str #= "Harmony" # Default value ? + """Represents one label in a .dez file.""" + type: str start: float duration: float - line: str #= "top.3" #Literal? + #line: str # Determined by the meta-layout tag: str layers: List[str] @@ -128,13 +129,18 @@ class DezrannDict(TypedDict): meta: Dict class DcmlLabel(TypedDict): + """Represents one label from a TSV annotation file""" quarterbeats: float duration: float label: str + harmony: str + key: str + phrase: str + cadence: str -def transform_df(labels: pd.DataFrame, - measures: pd.DataFrame, +def transform_df(labels: pd.DataFrame, + measures: pd.DataFrame, label_column: str = 'label') -> List[DcmlLabel]: """ @@ -166,20 +172,19 @@ def make_dezrann_label( else: layers = list(origin) return DezrannLabel( - type="Harmony", + type="Harmony", #TODO: adapt type to current label start=quarterbeats, duration=duration, - line="top.3", tag=label, layers=layers ) def convert_dcml_list_to_dezrann_list(values_dict: List[DcmlLabel], - cadences: bool, - harmony_line: int, - keys_line: int, - phrases_line: int, - raw_line: int, + cadences: bool = False, + harmony_line: Optional[str] = None, + keys_line: Optional[str] = None, + phrases_line: Optional[str] = None, + raw_line: Optional[str] = None, origin: Union[str, Tuple[str]] = "DCML") -> DezrannDict: label_list = [] for e in values_dict: @@ -192,18 +197,28 @@ def convert_dcml_list_to_dezrann_list(values_dict: List[DcmlLabel], ) ) layout = [] - if raw_line > 0: - layout.append({"filter": {"type": "Harmony"}, "style": {"line": line}}) - #if harmony_line > 0: - # ... - #if keys_line > 0: - # ... + if cadences: + layout.append({"filter": {"type": "Cadence"}, "style": {"line": "all"}}) + if harmony_line: + layout.append({"filter": {"type": "Harmony"}, "style": {"line": harmony_line}}) + if keys_line: + layout.append({"filter": {"type": "Localkey"}, "style": {"line": keys_line}}) + if phrases_line: + layout.append({"filter": {"type": "Phrase"}, "style": {"line": phrases_line}}) + if raw_line: + layout.append({"filter": {"type": "Harmony"}, "style": {"line": raw_line}}) + return DezrannDict(labels=label_list, meta={"layout": layout}) def generate_dez(path_measures: str, path_labels: str, output_path: str = "labels.dez", + cadences: bool = False, + harmonies: Optional[str] = None, + keys: Optional[str] = None, + phrases: Optional[str] = None, + raw: Optional[str] = None, origin: Union[str, Tuple[str]] = "DCML"): """ path_measures : :obj:`str` @@ -212,21 +227,33 @@ def generate_dez(path_measures: str, Path to a TSV file as output by format_data(). output_labels : :obj:`str` Path to a TSV file as output by format_data(). - origin : :obj:`list` - List of source(s) from which the labels originate. Defaults to ["DCML"]. + origin : :obj:`tuple` + Tuple of source(s) from which the labels originate. Defaults to "DCML". """ - harmonies = pd.read_csv( + harmonies_df = pd.read_csv( path_labels, sep='\t', usecols=['mc', 'mc_onset', 'duration_qb', 'label'], #'chord' converters={'mc_onset': safe_frac} ) - measures = pd.read_csv( - path_measures, sep='\t', - usecols=['mc', 'quarterbeats_all_endings'], - converters={'quarterbeats_all_endings': safe_frac} + try: + measures_df = pd.read_csv( + path_measures, sep='\t', + usecols=['mc', 'quarterbeats_all_endings'], + converters={'quarterbeats_all_endings': safe_frac} + ) + except ValueError as e: + raise ValueError(f"{path_measures} could not be loaded as a measure map because of the following error:\n'{e}'") + + dcml_labels = transform_df(labels=harmonies_df, measures=measures_df) + dezrann_content = convert_dcml_list_to_dezrann_list( + dcml_labels, + cadences=cadences, + harmony_line=harmonies, + keys_line=keys, + phrases_line=phrases, + raw_line=raw, + origin=origin ) - dcml_labels = transform_df(labels=harmonies, measures=measures) - dezrann_content = convert_dcml_list_to_dezrann_list(dcml_labels, origin=origin) # Manual post-processing #TODO: improve these cases # 1) Avoid NaN values in "duration" (happens in second endings) @@ -268,12 +295,45 @@ def generate_all_dez(output_dir=OUTPUT_DIR): def main(input_dir: str, measures_dir: str, output_dir: str, - cadences: bool, - harmony_line: Optional[str], # will transform and pass in "bot.1", None otherwise - keys_line: Optional[str], - phrases_line: Optional[str], - raw_line: Optional[str]): - pass + cadences: bool = False, + harmonies: Optional[str] = None, + keys: Optional[str] = None, + phrases: Optional[str] = None, + raw: Optional[str] = None): + if not cadences and all(arg is None for arg in (harmonies, keys, phrases, raw)): + print(f"Nothing to do because no features have been selected.") + return + input_files = [f for f in os.listdir(input_dir) if f.endswith('.tsv')] + # measures_files = glob.glob(f"{measures_dir}/*.tsv") + harmony_measure_matches = [] + for tsv_name in input_files: + measures_file_path = os.path.join(measures_dir, tsv_name) + if os.path.isfile(measures_file_path): + harmonies_file_path = os.path.join(input_dir, tsv_name) + harmony_measure_matches.append((harmonies_file_path, measures_file_path)) + else: + print(f"No measure map found for {tsv_name}. Skipping.") + continue + for input_file, measure_file in harmony_measure_matches: + if output_dir == input_dir: + output_file_path = measure_file.replace(".tsv", ".dez") + else: + dez_file = os.path.basename(measure_file).replace(".tsv", ".dez") + output_file_path = os.path.join(output_dir, dez_file) + try: + generate_dez( + path_labels=input_file, + path_measures=measure_file, + output_path=output_file_path, + cadences=cadences, + harmonies=harmonies, + keys=keys, + phrases=phrases, + raw=raw + ) + print(f"{output_file_path} successfully written.") + except Exception as e: + print(f"Converting {input_file} failed with '{e}'") LINE_VALUES = { 1: "top.1", @@ -295,12 +355,51 @@ def transform_line_argument(line: Optional[Union[int, str]]) -> Optional[str]: if line < 0: line = abs(line) + 3 return LINE_VALUES[line] + +def resolve_dir(d): + """ Resolves '~' to HOME directory and turns ``d`` into an absolute path. + """ + if d is None: + return None + d = str(d) + if '~' in d: + return os.path.expanduser(d) + return os.path.abspath(d) def process_arguments(args) -> dict: - kwargs = {} + input_dir = resolve_dir(args.dir) + assert os.path.isdir(input_dir), f"{args.dir} is not an existing directory." + if args.measures is None: + measures_dir = os.path.abspath(os.path.join(input_dir, '..', 'measures')) + if not os.path.isdir(measures_dir): + raise ValueError(f"No directory with measure maps was specified and the default path " + f"{measures_dir} does not exist.") + else: + measures_dir = resolve_dir(args.measures) + if not os.path.isdir(measures_dir): + raise ValueError(f"{measures_dir} is not an existing directory.") + if args.out is None: + output_dir = input_dir + else: + output_dir = resolve_dir(args.out) + if not os.path.isdir(output_dir): + raise ValueError(f"{output_dir} is not an existing directory.") + kwargs = dict( + input_dir=input_dir, + measures_dir=measures_dir, + output_dir=output_dir + ) line_args = ('harmonies', 'keys', 'phrases', 'raw') - pass + for arg in line_args: + arg_val = getattr(args, arg) + if arg_val is None: + continue + kwargs[arg] = transform_line_argument(arg_val) + if args.cadences: + kwargs['cadences'] = True + print(kwargs) + return kwargs def run(): @@ -329,11 +428,29 @@ def run(): "to find a sibling to the source dir called 'measures'.") parser.add_argument('-o', '--out', metavar='OUT_DIR', help='Output directory for .dez files. Defaults to the input directory.') - parser.add_argument('-H', '--harmonies', choices=[0, 1, 2, 3, 4, 5, 6]) - parser.add_argument('-K', '--keys', choices=[0, 1, 2, 3, 4, 5, 6]) - parser.add_argument('-P', '--phrases', choices=[0, 1, 2, 3, 4, 5, 6]) - parser.add_argument('-C', '--cadences', choices=[0, 1, 2, 3, 4, 5, 6]) - parser.add_argument('--raw', choices=[0, 1, 2, 3, 4, 5, 6]) + parser.add_argument('-C', + '--cadences', + action="store_true", + ) + parser.add_argument('-H', + '--harmonies', + metavar="{1-6}, default: 4", + default="4", + choices=["1", "2", "3", "4", "5", "6", "-1", "-2", "-3"], + ) + parser.add_argument('-K', + '--keys', + metavar="{1-6}, default: 5", + default="5", + choices=["1", "2", "3", "4", "5", "6", "-1", "-2", "-3"]) + parser.add_argument('-P', + '--phrases', + metavar="{1-6}, default: 6", + default="6", + choices=["1", "2", "3", "4", "5", "6", "-1", "-2", "-3"]) + parser.add_argument('--raw', + metavar="{1-6}", + choices=["1", "2", "3", "4", "5", "6", "-1", "-2", "-3"]) args = parser.parse_args() kwargs = process_arguments(args) main(**kwargs) From 8129db428454cc125a6612cc5fc718ce9c41aab4 Mon Sep 17 00:00:00 2001 From: johentsch Date: Fri, 24 Feb 2023 09:45:57 +0100 Subject: [PATCH 06/13] refines commandline interface with docstrings and better argument treatment; enables deactivating the default layers by passing 0 --- src/ms3/dezrann.py | 89 +++++++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 32 deletions(-) diff --git a/src/ms3/dezrann.py b/src/ms3/dezrann.py index af4dfb51..edda7e97 100644 --- a/src/ms3/dezrann.py +++ b/src/ms3/dezrann.py @@ -308,6 +308,9 @@ def main(input_dir: str, harmony_measure_matches = [] for tsv_name in input_files: measures_file_path = os.path.join(measures_dir, tsv_name) + if not os.path.isfile(measures_file_path): + # could be a directory + continue if os.path.isfile(measures_file_path): harmonies_file_path = os.path.join(input_dir, tsv_name) harmony_measure_matches.append((harmonies_file_path, measures_file_path)) @@ -345,13 +348,16 @@ def main(input_dir: str, } def transform_line_argument(line: Optional[Union[int, str]]) -> Optional[str]: + """Takes a number bet""" if line is None: return try: line = int(line) - assert line in [1,2,3,4,5,6, -1, -2, -3] + assert line in [1,2,3,4,5,6, 0 -1, -2, -3] except (TypeError, ValueError, AssertionError): - raise ValueError(f"{line} is not a valid argument, shoube within 1-6.") + raise ValueError(f"{line} is not a valid argument, should be within [0, 6].") + if line == 0: + return None if line < 0: line = abs(line) + 3 return LINE_VALUES[line] @@ -367,7 +373,8 @@ def resolve_dir(d): return os.path.abspath(d) -def process_arguments(args) -> dict: +def process_arguments(args: argparse.Namespace) -> dict: + """Transforms the user's input arguments into keyword arguments for :func:`main` or raises a ValueError.""" input_dir = resolve_dir(args.dir) assert os.path.isdir(input_dir), f"{args.dir} is not an existing directory." if args.measures is None: @@ -395,7 +402,13 @@ def process_arguments(args) -> dict: arg_val = getattr(args, arg) if arg_val is None: continue - kwargs[arg] = transform_line_argument(arg_val) + line_arg = transform_line_argument(arg_val) + if line_arg is None: + continue + kwargs[arg] = line_arg + if len(set(kwargs.values())) < len(kwargs.values()): + selected_args = {arg: f"'{getattr(args, arg)}' => {kwargs[arg]}" for arg in line_args if arg in kwargs} + raise ValueError(f"You selected the same annotation layer more than once: {selected_args}.") if args.cadences: kwargs['cadences'] = True print(kwargs) @@ -405,25 +418,26 @@ def process_arguments(args) -> dict: def run(): parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description='''\ - ----------------------------- - | DCML => Dezrann converter | - ----------------------------- - - This script converts DCML harmony annotations into the .dez JSON format used by the dezrann.net app. It is - standalone and does not require ms3 to be installed. Its only requirement is pandas. - - Apart from that, the script requires that you have previously extracted both harmonies and measures from the - annotated scores or that you are converting a DCML corpus (https://github.com/DCMLab/dcml_corpora), - where both facets are provided by default. In order to (re-) extract the labels, use the command: - - ms3 extract -X -M - - Or, if you want to convert other harmony or chord labels from your MuseScore files, use -L for labels. - ms3 extract -h will show you all options. - ''') - parser.add_argument("dir", metavar='DIR', - help='Folder that will be scanned for TSV files to convert. Defaults to current working directory.') - parser.add_argument('-m', '--measures', metavar='DIR', +----------------------------- +| DCML => Dezrann converter | +----------------------------- + +This script converts DCML harmony annotations into the .dez JSON format used by the dezrann.net app. It is +standalone and does not require ms3 to be installed. Its only requirement is pandas. + +Apart from that, the script requires that you have previously extracted both harmonies and measures from the +annotated scores or that you are converting a DCML corpus (https://github.com/DCMLab/dcml_corpora), +where both facets are provided by default. In order to (re-) extract the labels, use the command: + + ms3 extract -X -M + +Or, if you want to convert other harmony or chord labels from your MuseScore files, use -L for labels. +ms3 extract -h will show you all options. +''') + parser.add_argument("dir", metavar='IN_DIR', + help='Folder that will be scanned for TSV files to convert. Defaults to current working directory. ' + 'Sub-directories are not taken into account.') + parser.add_argument('-m', '--measures', metavar='MEASURES_DIR', help="Folder in which to look for the corrsponding measure maps. By default, the script will try " "to find a sibling to the source dir called 'measures'.") parser.add_argument('-o', '--out', metavar='OUT_DIR', @@ -431,26 +445,37 @@ def run(): parser.add_argument('-C', '--cadences', action="store_true", + help="Pass this flag if you want to add time-point cadence labels to the .dez files." ) - parser.add_argument('-H', - '--harmonies', - metavar="{1-6}, default: 4", + possible_line_arguments = ("0", "1", "2", "3", "4", "5", "6", "-1", "-2", "-3") + parser.add_argument('-H', + '--harmonies', + metavar="{0-6}, default: 4", default="4", - choices=["1", "2", "3", "4", "5", "6", "-1", "-2", "-3"], + choices=possible_line_arguments, + help="By default, harmony annotations will be set on the first line under the system (layer " + "4 out of 6). Pick another layer or pass 0 to not add harmonies." ) parser.add_argument('-K', '--keys', - metavar="{1-6}, default: 5", + metavar="{0-6}, default: 5", default="5", - choices=["1", "2", "3", "4", "5", "6", "-1", "-2", "-3"]) + choices=possible_line_arguments, + help="By default, local key segments will be set on the second line under the system (layer " + "5 out of 6). Pick another layer or pass 0 to not add key segments. Note, however, " + "that harmonies are underdetermined without their local key.") parser.add_argument('-P', '--phrases', - metavar="{1-6}, default: 6", + metavar="{0-6}, default: 6", default="6", - choices=["1", "2", "3", "4", "5", "6", "-1", "-2", "-3"]) + choices=possible_line_arguments, + help="By default, phrase annotations will be set on the third line under the system (layer " + "6 out of 6). Pick another layer or pass 0 to not add phrases.") parser.add_argument('--raw', metavar="{1-6}", - choices=["1", "2", "3", "4", "5", "6", "-1", "-2", "-3"]) + choices=possible_line_arguments, + help="Pass this argument to add a layer with the 'raw' labels, i.e. including local key, " + "cadence and phrase annotations.") args = parser.parse_args() kwargs = process_arguments(args) main(**kwargs) From f4310fed1ce63fb722fdc0bf01678201315802fd Mon Sep 17 00:00:00 2001 From: johentsch Date: Fri, 24 Feb 2023 10:23:19 +0100 Subject: [PATCH 07/13] this version converts raw labels, independent of the given line arguments --- src/ms3/dezrann.py | 66 ++++++++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/src/ms3/dezrann.py b/src/ms3/dezrann.py index edda7e97..a86f80ff 100644 --- a/src/ms3/dezrann.py +++ b/src/ms3/dezrann.py @@ -140,7 +140,7 @@ class DcmlLabel(TypedDict): def transform_df(labels: pd.DataFrame, - measures: pd.DataFrame, + measures: Optional[pd.DataFrame], label_column: str = 'label') -> List[DcmlLabel]: """ @@ -148,10 +148,18 @@ def transform_df(labels: pd.DataFrame, ---------- labels: Dataframe as found in the 'harmonies' folder of a DCML corpus. Needs to have columns with - the correct dtypes {'mc': int, 'mc_onset': fractions.Fraction} and no missing values. + the correct dtypes {'mc': int, + 'mc_onset': fractions.Fraction, + 'duration_qb': float, + 'quarterbeats': fraction.Fraction, + 'label': str, + 'chord': str, + 'cadence': str, + 'phraseend': str} + and no missing values. measures: - Dataframe as found in the 'measures' folder of a DCML corpus. Requires the columns - {'mc': int, 'quarterbeats_all_endings': fractions.Fraction} + (optional) Dataframe as found in the 'measures' folder of a DCML corpus for computing quarterbeats for pieces with + voltas. Requires the columns {'mc': int, 'quarterbeats_all_endings': fractions.Fraction} (ms3 >= 1.0.0). label_column: str, optional The column that is to be used as label string. Defaults to 'label'. @@ -159,10 +167,16 @@ def transform_df(labels: pd.DataFrame, ------- List of dictionaries where each represents one row of the input labels. """ - offset_dict = measures.set_index("mc")["quarterbeats_all_endings"] - quarterbeats = labels['mc'].map(offset_dict) - quarterbeats = quarterbeats.astype('float') + (labels.mc_onset * 4.0) - transformed_df = pd.concat([quarterbeats.rename('quarterbeats'), labels.duration_qb.rename('duration'), labels[label_column].rename('label')], axis=1) + + if measures is None or "quarterbeats_all_endings" not in measures.columns: + assert "quarterbeats" in labels.columns, f"Labels are lacking 'quarterbeats': {labels.columns}" + quarterbeats = labels["quarterbeats"] + else: + offset_dict = measures.set_index("mc")["quarterbeats_all_endings"] + quarterbeats = labels['mc'].map(offset_dict) + quarterbeats = quarterbeats.astype('float') + (labels.mc_onset * 4.0) + quarterbeats.rename('quarterbeats', inplace=True) + transformed_df = pd.concat([quarterbeats, labels.duration_qb.rename('duration'), labels[label_column].rename('label')], axis=1) return transformed_df.to_dict(orient='records') def make_dezrann_label( @@ -232,7 +246,7 @@ def generate_dez(path_measures: str, """ harmonies_df = pd.read_csv( path_labels, sep='\t', - usecols=['mc', 'mc_onset', 'duration_qb', 'label'], #'chord' + usecols=['mc', 'mc_onset', 'duration_qb', 'quarterbeats', 'label', 'chord', 'cadence', 'phraseend'], converters={'mc_onset': safe_frac} ) try: @@ -241,10 +255,13 @@ def generate_dez(path_measures: str, usecols=['mc', 'quarterbeats_all_endings'], converters={'quarterbeats_all_endings': safe_frac} ) - except ValueError as e: - raise ValueError(f"{path_measures} could not be loaded as a measure map because of the following error:\n'{e}'") - - dcml_labels = transform_df(labels=harmonies_df, measures=measures_df) + except (ValueError, AssertionError) as e: + measures_df = None + # raise ValueError(f"{path_measures} could not be loaded as a measure map because of the following error:\n'{e}'") + try: + dcml_labels = transform_df(labels=harmonies_df, measures=measures_df) + except Exception as e: + raise ValueError(f"Converting {path_labels} failed with the exception '{e}'.") dezrann_content = convert_dcml_list_to_dezrann_list( dcml_labels, cadences=cadences, @@ -317,9 +334,12 @@ def main(input_dir: str, else: print(f"No measure map found for {tsv_name}. Skipping.") continue + if len(harmony_measure_matches) == 0: + print(f"No matching measure maps found for any of these files: {input_files}") + return for input_file, measure_file in harmony_measure_matches: if output_dir == input_dir: - output_file_path = measure_file.replace(".tsv", ".dez") + output_file_path = input_file.replace(".tsv", ".dez") else: dez_file = os.path.basename(measure_file).replace(".tsv", ".dez") output_file_path = os.path.join(output_dir, dez_file) @@ -398,6 +418,7 @@ def process_arguments(args: argparse.Namespace) -> dict: output_dir=output_dir ) line_args = ('harmonies', 'keys', 'phrases', 'raw') + transformed_line_args = {} for arg in line_args: arg_val = getattr(args, arg) if arg_val is None: @@ -405,10 +426,11 @@ def process_arguments(args: argparse.Namespace) -> dict: line_arg = transform_line_argument(arg_val) if line_arg is None: continue - kwargs[arg] = line_arg - if len(set(kwargs.values())) < len(kwargs.values()): - selected_args = {arg: f"'{getattr(args, arg)}' => {kwargs[arg]}" for arg in line_args if arg in kwargs} + transformed_line_args[arg] = line_arg + if len(set(transformed_line_args.values())) < len(transformed_line_args.values()): + selected_args = {arg: f"'{getattr(args, arg)}' => {arg_val}" for arg, arg_val in transformed_line_args.items()} raise ValueError(f"You selected the same annotation layer more than once: {selected_args}.") + kwargs.update(transformed_line_args) if args.cadences: kwargs['cadences'] = True print(kwargs) @@ -484,11 +506,11 @@ def run(): run() + # import ms3 + # measures = ms3.load_tsv('K283-2_measures.tsv') + # harmonies = ms3.load_tsv('K283-2_harmonies.tsv') + # transformed = transform_df(labels=harmonies, measures=measures) + # print(transformed) - #measures = ms3.load_tsv('src/ms3/K283-2_measures.tsv') - #harmonies = ms3.load_tsv('src/ms3/K283-2_harmonies.tsv') - #transformed = transform_df(labels=harmonies, measures=measures) - #print(transformed) - #dez = generate_dez('K283-2_measures.tsv', 'K283-2_harmonies.tsv') #generate_all_dez() \ No newline at end of file From 16b7cc365ffb14754af9e4ca13c7382cddeab7f5 Mon Sep 17 00:00:00 2001 From: johentsch Date: Fri, 24 Feb 2023 14:50:45 +0100 Subject: [PATCH 08/13] adds to transform_df() the algorithm that copies the preceding label onto beat 1 of each alternative ending (volta); also prepares the function for creating labels pertaining to the various layers --- src/ms3/dezrann.py | 100 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 19 deletions(-) diff --git a/src/ms3/dezrann.py b/src/ms3/dezrann.py index a86f80ff..c50ded71 100644 --- a/src/ms3/dezrann.py +++ b/src/ms3/dezrann.py @@ -133,15 +133,39 @@ class DcmlLabel(TypedDict): quarterbeats: float duration: float label: str - harmony: str - key: str - phrase: str - cadence: str + + +def get_volta_groups(mc2volta: pd.Series) -> List[List[int]]: + """Takes a Series where the index has measure counts and values are NA for 'normal' measures and 1, 2... for + measures belonging to a first, second... ending. Returns for each group a list of MCs each of which pertains + to the first measure of an alternative ending. For example, two alternative two-bar endings in MC [15, 16][17, 18] + would figure as [15, 17] in the result list. + """ + volta_groups = [] + filled_volta_col = mc2volta.fillna(-1).astype(int) + volta_segmentation = (filled_volta_col != filled_volta_col.shift()).fillna(True).cumsum() + current_groups_first_mcs = [] + for i, segment in filled_volta_col.groupby(volta_segmentation): + volta_number = segment.iloc[0] + if volta_number == -1: + # current group ends, if there is one + if i == 1: + continue + elif len(current_groups_first_mcs) == 0: + raise RuntimeError(f"Mistake in the algorithm when processing column {filled_volta_col.volta}") + else: + volta_groups.append(current_groups_first_mcs) + current_groups_first_mcs = [] + else: + first_mc = segment.index[0] + current_groups_first_mcs.append(first_mc) + return volta_groups def transform_df(labels: pd.DataFrame, - measures: Optional[pd.DataFrame], - label_column: str = 'label') -> List[DcmlLabel]: + measures: pd.DataFrame, + label_column: str = 'label', + ) -> List[DcmlLabel]: """ Parameters @@ -154,31 +178,68 @@ def transform_df(labels: pd.DataFrame, 'quarterbeats': fraction.Fraction, 'label': str, 'chord': str, + 'localkey': str, 'cadence': str, 'phraseend': str} and no missing values. measures: (optional) Dataframe as found in the 'measures' folder of a DCML corpus for computing quarterbeats for pieces with voltas. Requires the columns {'mc': int, 'quarterbeats_all_endings': fractions.Fraction} (ms3 >= 1.0.0). - label_column: str, optional + label_column: {'label', 'chord', 'cadence', 'phraseend'} The column that is to be used as label string. Defaults to 'label'. Returns ------- List of dictionaries where each represents one row of the input labels. """ - - if measures is None or "quarterbeats_all_endings" not in measures.columns: - assert "quarterbeats" in labels.columns, f"Labels are lacking 'quarterbeats': {labels.columns}" + score_has_voltas = "quarterbeats_all_endings" in measures.columns + if not score_has_voltas: + assert "quarterbeats" in labels.columns, f"Labels are lacking 'quarterbeats' column: {labels.columns}" quarterbeats = labels["quarterbeats"] + last_mc = measures.iloc[-1] + end_of_score = last_mc.quarterbeats + last_mc.act_dur * 4.0 else: - offset_dict = measures.set_index("mc")["quarterbeats_all_endings"] + # the column 'quarterbeats_all_endings' is present, meaning the piece has first and second endings and the + # quarterbeats, which normally leave out first endings, need to be recomputed + last_mc = measures.iloc[-1] + end_of_score = last_mc.quarterbeats_all_endings + last_mc.act_dur * 4.0 + M = measures.set_index("mc") + offset_dict = M["quarterbeats_all_endings"] quarterbeats = labels['mc'].map(offset_dict) - quarterbeats = quarterbeats.astype('float') + (labels.mc_onset * 4.0) + quarterbeats = quarterbeats + (labels.mc_onset * 4.0) quarterbeats.rename('quarterbeats', inplace=True) - transformed_df = pd.concat([quarterbeats, labels.duration_qb.rename('duration'), labels[label_column].rename('label')], axis=1) + # also, the first beat of each volta needs to have a label for computing correct durations + volta_groups = get_volta_groups(M.volta) + label_and_qb = pd.concat([labels[label_column].rename('label'), quarterbeats.astype(float)], axis=1) + n_before = len(labels.index) + if label_column == 'phraseend': + label_and_qb = label_and_qb[label_and_qb.label == '{'] + if label_column == 'localkey': + label_and_qb = label_and_qb[label_and_qb.label != label_and_qb.label.shift().fillna(True)] + else: # {'chord', 'cadence', 'label'} + label_and_qb = label_and_qb[label_and_qb.label.notna()] + n_after = len(label_and_qb.index) + print(f"Creating labels for {n_after} {label_column} labels out of {n_before} rows.") + if label_column == 'cadence': + duration = pd.Series(0.0, dtype=float, index=label_and_qb.index, name='duration') + else: + if score_has_voltas: + for group in volta_groups: + volta_beginnings_quarterbeats = [M.loc[mc, 'quarterbeats_all_endings'] for mc in group] + labels_before_group = label_and_qb.loc[label_and_qb.quarterbeats < volta_beginnings_quarterbeats[0], 'label'] + for volta_beginning_qb in volta_beginnings_quarterbeats: + if volta_beginning_qb in label_and_qb.quarterbeats.values: + continue + repeated_label = pd.DataFrame([[labels_before_group.iloc[-1], float(volta_beginning_qb)]], + columns=['label', 'quarterbeats']) + label_and_qb = pd.concat([label_and_qb, repeated_label], ignore_index=True) + label_and_qb = label_and_qb.sort_values('quarterbeats') + qb_column = label_and_qb.quarterbeats + duration = qb_column.shift(-1).fillna(end_of_score) - qb_column + duration = duration.rename('duration').astype(float) + transformed_df = pd.concat([label_and_qb, duration], axis=1) return transformed_df.to_dict(orient='records') - + def make_dezrann_label( quarterbeats: float, duration: float, label: str, origin: Union[str, Tuple[str]]) -> DezrannLabel: if isinstance(origin, str): @@ -246,18 +307,19 @@ def generate_dez(path_measures: str, """ harmonies_df = pd.read_csv( path_labels, sep='\t', - usecols=['mc', 'mc_onset', 'duration_qb', 'quarterbeats', 'label', 'chord', 'cadence', 'phraseend'], - converters={'mc_onset': safe_frac} + converters={'mc': int, + 'mc_onset': safe_frac, + 'quarterbeats': safe_frac, + } ) try: measures_df = pd.read_csv( path_measures, sep='\t', - usecols=['mc', 'quarterbeats_all_endings'], + dtype={'mc': int, 'volta': 'Int64'}, converters={'quarterbeats_all_endings': safe_frac} ) except (ValueError, AssertionError) as e: - measures_df = None - # raise ValueError(f"{path_measures} could not be loaded as a measure map because of the following error:\n'{e}'") + raise ValueError(f"{path_measures} could not be loaded as a measure map because of the following error:\n'{e}'") try: dcml_labels = transform_df(labels=harmonies_df, measures=measures_df) except Exception as e: From a965a1eed8ee222e3f535983a513d133bc6e1595 Mon Sep 17 00:00:00 2001 From: johentsch Date: Fri, 24 Feb 2023 15:11:42 +0100 Subject: [PATCH 09/13] integrates calls to transform_df with the current logic within generate_dez() --- src/ms3/dezrann.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/ms3/dezrann.py b/src/ms3/dezrann.py index c50ded71..bc9acb4e 100644 --- a/src/ms3/dezrann.py +++ b/src/ms3/dezrann.py @@ -193,16 +193,16 @@ def transform_df(labels: pd.DataFrame, List of dictionaries where each represents one row of the input labels. """ score_has_voltas = "quarterbeats_all_endings" in measures.columns + last_mc_row = measures.iloc[-1] + end_of_score = float(last_mc_row.act_dur) * 4.0 if not score_has_voltas: assert "quarterbeats" in labels.columns, f"Labels are lacking 'quarterbeats' column: {labels.columns}" quarterbeats = labels["quarterbeats"] - last_mc = measures.iloc[-1] - end_of_score = last_mc.quarterbeats + last_mc.act_dur * 4.0 + end_of_score += float(last_mc_row.quarterbeats) else: # the column 'quarterbeats_all_endings' is present, meaning the piece has first and second endings and the # quarterbeats, which normally leave out first endings, need to be recomputed - last_mc = measures.iloc[-1] - end_of_score = last_mc.quarterbeats_all_endings + last_mc.act_dur * 4.0 + end_of_score += float(last_mc_row.quarterbeats_all_endings) M = measures.set_index("mc") offset_dict = M["quarterbeats_all_endings"] quarterbeats = labels['mc'].map(offset_dict) @@ -316,14 +316,24 @@ def generate_dez(path_measures: str, measures_df = pd.read_csv( path_measures, sep='\t', dtype={'mc': int, 'volta': 'Int64'}, - converters={'quarterbeats_all_endings': safe_frac} + converters={'quarterbeats_all_endings': safe_frac, + 'act_dur': safe_frac} ) except (ValueError, AssertionError) as e: raise ValueError(f"{path_measures} could not be loaded as a measure map because of the following error:\n'{e}'") - try: - dcml_labels = transform_df(labels=harmonies_df, measures=measures_df) - except Exception as e: - raise ValueError(f"Converting {path_labels} failed with the exception '{e}'.") + converted_labels = {} + if cadences: + converted_labels['cadences'] = transform_df(labels=harmonies_df, measures=measures_df, label_column='cadence') + for arg, label_column in ((harmonies, "chord"), + (keys, "localkey"), + (phrases, "phraseend"), + (raw, "label")): + if arg is not None: + converted_labels[arg] = transform_df(labels=harmonies_df, measures=measures_df, label_column=label_column) + # from pprint import pprint + # for line, converted in converted_labels.items(): + # print(line) + # pprint(converted) dezrann_content = convert_dcml_list_to_dezrann_list( dcml_labels, cadences=cadences, @@ -572,7 +582,6 @@ def run(): # measures = ms3.load_tsv('K283-2_measures.tsv') # harmonies = ms3.load_tsv('K283-2_harmonies.tsv') # transformed = transform_df(labels=harmonies, measures=measures) - # print(transformed) - #dez = generate_dez('K283-2_measures.tsv', 'K283-2_harmonies.tsv') + #dez = generate_dez('K283-2_measures.tsv', 'K283-2_harmonies.tsv', cadences=True, harmonies="bot.4", keys="bot.5", phrases="bot.6", raw="top.3") #generate_all_dez() \ No newline at end of file From cb659727609792febf1326af2cb758335b45f756 Mon Sep 17 00:00:00 2001 From: Louis Couturier Date: Fri, 24 Feb 2023 16:33:38 +0100 Subject: [PATCH 10/13] link DCML labels to final Dezrann labels and layout --- src/ms3/dezrann.py | 95 +++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 40 deletions(-) diff --git a/src/ms3/dezrann.py b/src/ms3/dezrann.py index bc9acb4e..1f257471 100644 --- a/src/ms3/dezrann.py +++ b/src/ms3/dezrann.py @@ -116,7 +116,7 @@ def safe_frac(s: str) -> Union[Fraction, str]: class DezrannLabel(TypedDict): """Represents one label in a .dez file.""" - type: str + label_type: str start: float duration: float #line: str # Determined by the meta-layout @@ -161,7 +161,6 @@ def get_volta_groups(mc2volta: pd.Series) -> List[List[int]]: current_groups_first_mcs.append(first_mc) return volta_groups - def transform_df(labels: pd.DataFrame, measures: pd.DataFrame, label_column: str = 'label', @@ -241,13 +240,17 @@ def transform_df(labels: pd.DataFrame, return transformed_df.to_dict(orient='records') def make_dezrann_label( - quarterbeats: float, duration: float, label: str, origin: Union[str, Tuple[str]]) -> DezrannLabel: + label_type: str, + quarterbeats: float, + duration: float, + label: str, + origin: Union[str, Tuple[str]]) -> DezrannLabel: if isinstance(origin, str): layers = [origin] else: layers = list(origin) return DezrannLabel( - type="Harmony", #TODO: adapt type to current label + label_type=label_type, start=quarterbeats, duration=duration, tag=label, @@ -255,37 +258,46 @@ def make_dezrann_label( ) def convert_dcml_list_to_dezrann_list(values_dict: List[DcmlLabel], - cadences: bool = False, - harmony_line: Optional[str] = None, - keys_line: Optional[str] = None, - phrases_line: Optional[str] = None, - raw_line: Optional[str] = None, + label_type: str, origin: Union[str, Tuple[str]] = "DCML") -> DezrannDict: - label_list = [] + dezrann_label_list = [] for e in values_dict: - label_list.append( + dezrann_label_list.append( make_dezrann_label( + label_type=label_type, quarterbeats=e["quarterbeats"], duration=e["duration"], label=e["label"], origin=origin ) ) + + return dezrann_label_list + #return DezrannDict(labels=label_list, meta={"layout": layout}) + +def make_layout( + cadences: bool = False, + harmonies: Optional[str] = None, + keys: Optional[str] = None, + phrases: Optional[str] = None, + raw: Optional[str] = None): + """ + Compile the line positions for target labels into Dezrann layout parameter. + """ layout = [] if cadences: layout.append({"filter": {"type": "Cadence"}, "style": {"line": "all"}}) - if harmony_line: - layout.append({"filter": {"type": "Harmony"}, "style": {"line": harmony_line}}) - if keys_line: - layout.append({"filter": {"type": "Localkey"}, "style": {"line": keys_line}}) - if phrases_line: - layout.append({"filter": {"type": "Phrase"}, "style": {"line": phrases_line}}) - if raw_line: - layout.append({"filter": {"type": "Harmony"}, "style": {"line": raw_line}}) - - return DezrannDict(labels=label_list, meta={"layout": layout}) + if harmonies: + layout.append({"filter": {"type": "Harmony"}, "style": {"line": harmonies}}) + if keys: + layout.append({"filter": {"type": "Local Key"}, "style": {"line": keys}}) + if phrases: + layout.append({"filter": {"type": "Phrase"}, "style": {"line": phrases}}) + if raw: + layout.append({"filter": {"type": "Harmony"}, "style": {"line": raw}}) + + return layout - def generate_dez(path_measures: str, path_labels: str, output_path: str = "labels.dez", @@ -321,28 +333,31 @@ def generate_dez(path_measures: str, ) except (ValueError, AssertionError) as e: raise ValueError(f"{path_measures} could not be loaded as a measure map because of the following error:\n'{e}'") - converted_labels = {} + + dezrann_labels = [] if cadences: - converted_labels['cadences'] = transform_df(labels=harmonies_df, measures=measures_df, label_column='cadence') - for arg, label_column in ((harmonies, "chord"), - (keys, "localkey"), - (phrases, "phraseend"), - (raw, "label")): + dcml_labels = transform_df(labels=harmonies_df, measures=measures_df, label_column='cadence') + dezrann_labels += convert_dcml_list_to_dezrann_list(dcml_labels, label_type="Cadence", origin=origin) + for arg, label_column, label_type in ((harmonies, "chord", "Harmony"), #Third argument + (keys, "localkey", "Local Key"), + (phrases, "phraseend", "Phrase"), + (raw, "label", "Harmony")): if arg is not None: - converted_labels[arg] = transform_df(labels=harmonies_df, measures=measures_df, label_column=label_column) - # from pprint import pprint - # for line, converted in converted_labels.items(): - # print(line) - # pprint(converted) - dezrann_content = convert_dcml_list_to_dezrann_list( - dcml_labels, + dcml_labels = transform_df(labels=harmonies_df, measures=measures_df, label_column=label_column) + dezrann_labels += convert_dcml_list_to_dezrann_list( + dcml_labels, + label_type=label_type, + origin=origin + ) + + layout = make_layout( cadences=cadences, - harmony_line=harmonies, - keys_line=keys, - phrases_line=phrases, - raw_line=raw, - origin=origin + harmonies=harmonies, + keys=keys, + phrases=phrases, + raw=raw ) + dezrann_content = DezrannDict(labels=dezrann_labels, meta={"layout": layout}) # Manual post-processing #TODO: improve these cases # 1) Avoid NaN values in "duration" (happens in second endings) From 544efa13e4ef73630d4f379f4df8275910eff728 Mon Sep 17 00:00:00 2001 From: Louis Couturier Date: Fri, 24 Feb 2023 16:42:15 +0100 Subject: [PATCH 11/13] fix label_type naming typo --- src/ms3/dezrann.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/ms3/dezrann.py b/src/ms3/dezrann.py index 1f257471..3168fab8 100644 --- a/src/ms3/dezrann.py +++ b/src/ms3/dezrann.py @@ -116,7 +116,7 @@ def safe_frac(s: str) -> Union[Fraction, str]: class DezrannLabel(TypedDict): """Represents one label in a .dez file.""" - label_type: str + type: str start: float duration: float #line: str # Determined by the meta-layout @@ -250,7 +250,7 @@ def make_dezrann_label( else: layers = list(origin) return DezrannLabel( - label_type=label_type, + type=label_type, start=quarterbeats, duration=duration, tag=label, @@ -273,7 +273,6 @@ def convert_dcml_list_to_dezrann_list(values_dict: List[DcmlLabel], ) return dezrann_label_list - #return DezrannDict(labels=label_list, meta={"layout": layout}) def make_layout( cadences: bool = False, @@ -411,7 +410,7 @@ def main(input_dir: str, # measures_files = glob.glob(f"{measures_dir}/*.tsv") harmony_measure_matches = [] for tsv_name in input_files: - measures_file_path = os.path.join(measures_dir, tsv_name) + measures_file_path = os.path.join(measures_dir, tsv_name).replace("harmonies", "measures") if not os.path.isfile(measures_file_path): # could be a directory continue From 7658f4821d7b1a32e16f6ad3eb546f211c73bc59 Mon Sep 17 00:00:00 2001 From: Louis Couturier Date: Fri, 24 Feb 2023 16:44:20 +0100 Subject: [PATCH 12/13] remove fixed manual post-processing steps (NaN durations and handling start=0.) --- src/ms3/dezrann.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/ms3/dezrann.py b/src/ms3/dezrann.py index 3168fab8..27f9ca99 100644 --- a/src/ms3/dezrann.py +++ b/src/ms3/dezrann.py @@ -357,17 +357,6 @@ def generate_dez(path_measures: str, raw=raw ) dezrann_content = DezrannDict(labels=dezrann_labels, meta={"layout": layout}) - - # Manual post-processing #TODO: improve these cases - # 1) Avoid NaN values in "duration" (happens in second endings) - # optional : in the transform_df : transformed_df = transformed_df.replace('NaN', 0) ? - for label in dezrann_content['labels']: - if pd.isnull(label['duration']): - print(f"WARNING: NaN duration detected in label {label}.") - label['duration'] = 0 - # 2) Remove "start" value in the first label ? - if dezrann_content['labels'][0]['start'] == 0.: - del dezrann_content['labels'][0]['start'] with open(output_path, 'w', encoding='utf-8') as f: json.dump(dezrann_content, f, indent=2) @@ -410,7 +399,7 @@ def main(input_dir: str, # measures_files = glob.glob(f"{measures_dir}/*.tsv") harmony_measure_matches = [] for tsv_name in input_files: - measures_file_path = os.path.join(measures_dir, tsv_name).replace("harmonies", "measures") + measures_file_path = os.path.join(measures_dir, tsv_name) if not os.path.isfile(measures_file_path): # could be a directory continue From a925564887e2b49b95bc57f2f7ad7f966e233610 Mon Sep 17 00:00:00 2001 From: Louis Couturier Date: Fri, 24 Feb 2023 18:26:08 +0100 Subject: [PATCH 13/13] add safe conversion of quarterbeats --- src/ms3/dezrann.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/ms3/dezrann.py b/src/ms3/dezrann.py index 27f9ca99..ff5aee61 100644 --- a/src/ms3/dezrann.py +++ b/src/ms3/dezrann.py @@ -328,6 +328,7 @@ def generate_dez(path_measures: str, path_measures, sep='\t', dtype={'mc': int, 'volta': 'Int64'}, converters={'quarterbeats_all_endings': safe_frac, + 'quarterbeats': safe_frac, 'act_dur': safe_frac} ) except (ValueError, AssertionError) as e: @@ -420,14 +421,14 @@ def main(input_dir: str, output_file_path = os.path.join(output_dir, dez_file) try: generate_dez( - path_labels=input_file, - path_measures=measure_file, - output_path=output_file_path, - cadences=cadences, - harmonies=harmonies, - keys=keys, - phrases=phrases, - raw=raw + path_labels=input_file, + path_measures=measure_file, + output_path=output_file_path, + cadences=cadences, + harmonies=harmonies, + keys=keys, + phrases=phrases, + raw=raw ) print(f"{output_file_path} successfully written.") except Exception as e: