johentsch · johentsch · Feb 22, 2023 · Feb 23, 2023 · Feb 23, 2023 · Feb 23, 2023
diff --git a/codemeta.json b/codemeta.json
@@ -63,4 +63,4 @@
             }
         }
     ]
-}
+}
diff --git a/src/ms3/annotations.py b/src/ms3/annotations.py
@@ -75,14 +75,17 @@ def __init__(
         cols : :obj:`dict`
             If your columns don't have standard names, pass a {NAME -> ACTUAL_NAME} dictionary.
             Required columns: label, mc, mc_onset, staff, voice
-            Additional columns: harmony_layer, regex_match, absolute_root, rootCase, absolute_base, leftParen, rightParen, offset_x, offset_y, nashville, decoded, color_name,
+            Additional columns: harmony_layer, regex_match, absolute_root, rootCase, absolute_base, leftParen,
+            rightParen, offset_x, offset_y, nashville, decoded, color_name,
             color_html, color_r, color_g, color_b, color_a, placement, minDistance, style, z
         index_col
         sep
         mscx_obj
         infer_types : :obj:`dict`, optional
-            If you want to check all labels against one or several regular expressions, pass them as a {label_type -> regEx} dictionary.
-            The column regex_match will display the label_type of the last matched regEx. If you pass None, the default behaviour
+            If you want to check all labels against one or several regular expressions, pass them as a {label_type ->
+            regEx} dictionary.
+            The column regex_match will display the label_type of the last matched regEx. If you pass None,
+            the default behaviour
             is detecting labels of the DCML harmony annotation standard's current version.
         read_only
         logger_cfg : :obj:`dict`, optional
@@ -133,20 +136,23 @@ def __init__(
     def add_initial_dots(self):
         if self.read_only:
             self.logger.warning(
-                f"Cannot change labels attached to a score. Detach them first."
+                "Cannot change labels attached to a score. Detach them first."
             )
             return
         label_col = self.cols["label"]
         notes = {"a", "b", "c", "d", "e", "f", "g", "h"}
-        add_dots = lambda s: "." + s if s[0].lower() in notes else s
+
+        def add_dots(s):
+            return "." + s if s[0].lower() in notes else s
+
         self.df[label_col] = self.df[label_col].map(add_dots)
 
     def prepare_for_attaching(
         self, staff=None, voice=None, harmony_layer=1, check_for_clashes=True
     ):
         if self.mscx_obj is None:
             self.logger.warning(
-                f"Annotations object not aware to which MSCX object it is attached."
+                "Annotations object not aware to which MSCX object it is attached."
             )
             return pd.DataFrame()
         df = self.df.copy()
@@ -213,25 +219,25 @@ def prepare_for_attaching(
             mn_col = self.cols["mn"] if "mn" in self.cols else "mn"
             if mn_col not in cols:
                 self.logger.error(
-                    f"Annotations need to have at least one column named 'mn' or 'mc'."
+                    "Annotations need to have at least one column named 'mn' or 'mc'."
                 )
                 error = True
             else:
                 inferred_positions = self.infer_mc_from_mn()
                 if inferred_positions.isna().any().any():
                     self.logger.error(
-                        f"Measure counts and corresponding mc_onsets could not be successfully inferred."
+                        "Measure counts and corresponding mc_onsets could not be successfully inferred."
                     )
                     error = True
                 else:
                     if "mn_onset" not in self.cols:
                         self.logger.info(
-                            f"Measure counts successfully inferred. Since there is no 'mn_onset' column, all "
-                            f"mc_onsets have been set to 0."
+                            "Measure counts successfully inferred. Since there is no 'mn_onset' column, all "
+                            "mc_onsets have been set to 0."
                         )
                     else:
                         self.logger.info(
-                            f"Measure counts and corresponding mc_onsets successfully inferred."
+                            "Measure counts and corresponding mc_onsets successfully inferred."
                         )
                     df.insert(df.columns.get_loc("mn"), "mc", inferred_positions["mc"])
                     df.loc[:, "mc_onset"] = inferred_positions["mc_onset"]
@@ -260,7 +266,7 @@ def prepare_for_attaching(
                     error = True
         elif check_for_clashes:
             self.logger.error(
-                f"Check for clashes could not be performed because there are columns missing."
+                "Check for clashes could not be performed because there are columns missing."
             )
 
         if error:
@@ -420,7 +426,7 @@ def tuple_or_na(row):
             elif has_rgb:
                 res.color = rgb2format(res, color_format)
             else:
-                logger.warning(
+                self.logger.warning(
                     f"Color format '{color_format}' could not be computed from columns {present_cols}."
                 )
             res.drop(columns=present_cols, inplace=True)
@@ -441,7 +447,8 @@ def expand_dcml(
         all_in_c=False,
         **kwargs,
     ):
-        """Expands all labels where the regex_match has been inferred as 'dcml' and stores the DataFrame in self._expanded.
+        """Expands all labels where the regex_match has been inferred as 'dcml' and stores the DataFrame in
+        self._expanded.
 
         Parameters
         ----------
@@ -485,13 +492,17 @@ def expand_dcml(
         df = self.get_labels(**kwargs)
         select_dcml = (df.regex_match == "dcml").fillna(False)
         if not select_dcml.any():
-            self.logger.info(f"Score does not contain any DCML harmonic annotations.")
+            self.logger.info("Score does not contain any DCML harmonic annotations.")
             return
         if not drop_others:
             warn_about_others = False
         if warn_about_others and (~select_dcml).any():
+            show_labels = decode_harmonies(
+                df[~select_dcml], keep_layer=True, logger=self.logger
+            )[["mc", "mn", "label", "harmony_layer"]].to_string()
             self.logger.warning(
-                f"Score contains {(~select_dcml).sum()} labels that don't (and {select_dcml.sum()} that do) match the DCML standard:\n{decode_harmonies(df[~select_dcml], keep_layer=True, logger=self.logger)[['mc', 'mn', 'label', 'harmony_layer']].to_string()}",
+                f"Score contains {(~select_dcml).sum()} labels that don't (and {select_dcml.sum()} that do) match the "
+                f"DCML standard:\n{show_labels}",
                 extra={"message_id": (15,)},
             )
         df = df[select_dcml]
@@ -529,7 +540,10 @@ def expand_dcml(
                             "To retain the old behavior, use either.*"
                         ),
                     )
-                    df.loc[select_dcml, exp.columns] = exp
+                    exp_shared_cols = exp.columns.isin(df.columns.values)
+                    df_shared_cols = df.columns.isin(exp.columns.values)
+                    df.loc[select_dcml, df_shared_cols] = exp.loc[:, exp_shared_cols]
+                    df = pd.concat([df, exp.loc[:, ~exp_shared_cols]], axis=1)
                     df.loc[:, key_cols] = df[key_cols].ffill()
                 self._expanded = df
             drop_cols = [
@@ -549,7 +563,7 @@ def expand_dcml(
     def infer_mc_from_mn(self, mscx_obj=None):
         if mscx_obj is None and self.mscx_obj is None:
             self.logger.error(
-                f"Either pass an MSCX object or load this Annotations object to a score using load_annotations()."
+                "Either pass an MSCX object or load this Annotations object to a score using load_annotations()."
             )
             return False
 
@@ -594,7 +608,8 @@ def infer_types(self, regex_dict=None):
                 column_position = self.df.columns.get_loc("harmony_layer") + 1
                 self.df.insert(column_position, "regex_match", regex_col)
             for name, regex in regex_dict.items():
-                # TODO: Check if in the loop, previously matched regex names are being overwritten by those matched after
+                # TODO: Check if in the loop, previously matched regex names are being overwritten by those matched
+                #  after
                 try:
                     mtch = decoded[sel].str.match(regex)
                 except AttributeError:
@@ -607,7 +622,7 @@ def infer_types(self, regex_dict=None):
     def remove_initial_dots(self):
         if self.read_only:
             self.logger.warning(
-                f"Cannot change labels attached to a score. Detach them first."
+                "Cannot change labels attached to a score. Detach them first."
             )
             return
         label_col = self.cols["label"]
@@ -654,6 +669,7 @@ def _treat_harmony_layer_param(self, harmony_layer, warnings=True):
                 plural = len(not_found) > 1
                 plural_s = "s" if plural else ""
                 self.logger.warning(
-                    f"No labels found with {'these' if plural else 'this'} label{plural_s} harmony_layer{plural_s}: {', '.join(not_found)}"
+                    f"No labels found with {'these' if plural else 'this'} label{plural_s} harmony_layer{plural_s}: "
+                    f"{', '.join(not_found)}"
                 )
         return [all_types[t] for t in lt if t in all_types]
diff --git a/src/ms3/cli.py b/src/ms3/cli.py
@@ -43,6 +43,7 @@ def gather_extract_params(args) -> List[str]:
         for name, arg in zip(
             (
                 "measures",
+                "measure_maps",
                 "notes",
                 "rests",
                 "labels",
@@ -53,6 +54,7 @@ def gather_extract_params(args) -> List[str]:
             ),
             (
                 args.measures,
+                args.measure_maps,
                 args.notes,
                 args.rests,
                 args.labels,
@@ -213,6 +215,7 @@ def extract_cmd(args, parse_obj: Optional[Parse] = None):
         notes_folder=args.notes,
         labels_folder=args.labels,
         measures_folder=args.measures,
+        measure_maps_folder=args.measure_maps,
         rests_folder=args.rests,
         events_folder=args.events,
         chords_folder=args.chords,
@@ -685,6 +688,15 @@ def get_arg_parser():
         const="../measures",
         help="Folder where to store TSV files with measure information needed for tasks such as unfolding repetitions.",
     )
+    extract_args.add_argument(
+        "-MM",
+        "--measure_maps",
+        metavar="folder",
+        nargs="?",
+        const="../measures",
+        help="Folder where to store <name>.mm.json files. They are a variant of the 'normal' --measures with renamed "
+        "columns, satisfying the MeasureMap specification.",
+    )
     extract_args.add_argument(
         "-N",
         "--notes",

diff --git a/src/ms3/corpus.py b/src/ms3/corpus.py
@@ -19,7 +19,10 @@
 import numpy as np
 import pandas as pd
 import pathos.multiprocessing as mp
-from ms3.utils.frictionless_helpers import store_dataframe_resource
+from ms3.utils.frictionless_helpers import (
+    store_as_json_or_yaml,
+    store_dataframe_resource,
+)
 from ms3.utils.functions import compute_path_from_file
 
 from ._typing import (
@@ -43,6 +46,7 @@
 )
 from .piece import Piece
 from .score import Score, compare_two_score_objects
+from .transformations import measures2measure_map
 from .utils import (
     File,
     ask_user_to_choose,
@@ -3046,6 +3050,7 @@ def store_extracted_facets(
         view_name: Optional[str] = None,
         root_dir: Optional[str] = None,
         measures_folder: Optional[str] = None,
+        measure_maps_folder: Optional[str] = None,
         notes_folder: Optional[str] = None,
         rests_folder: Optional[str] = None,
         notes_and_rests_folder: Optional[str] = None,
@@ -3097,11 +3102,19 @@ def store_extracted_facets(
         folder_params = {
             t: lcls[p] for t, p in zip(df_types, folder_vars) if lcls[p] is not None
         }
-        output_metadata = metadata_suffix is not None
-        if len(folder_params) == 0 and not output_metadata:
+        do_store_metadata = metadata_suffix is not None
+        do_store_measure_maps = measure_maps_folder is not None
+        if (
+            len(folder_params) == 0
+            and not do_store_metadata
+            and not do_store_measure_maps
+        ):
             self.logger.warning("Pass at least one parameter to store files.")
             return []
         facets = list(folder_params.keys())
+        do_store_measures_tsv = "measures" in facets
+        if do_store_measure_maps and not do_store_measures_tsv:
+            facets.append("measures")
         df_params = {p: True for p in folder_params.keys()}
         n_scores = len(self._get_parsed_score_files(view_name=view_name, flat=True))
         paths = []
@@ -3124,11 +3137,28 @@ def store_extracted_facets(
                     for facet, df in facet2dataframe.items():
                         if df is None:
                             continue
+                        piece_name = file.piece
+                        if facet == "measures" and do_store_measure_maps:
+                            directory = compute_path_from_file(
+                                file, root_dir=root_dir, folder=measure_maps_folder
+                            )
+                            file_path = os.path.join(directory, f"{piece}.mm.json")
+                            if simulate:
+                                self.logger.info(
+                                    f"Would have stored the MeasureMap from {file.rel_path} as {file_path}."
+                                )
+                            else:
+                                measure_map = measures2measure_map(df)
+                                measure_map_json = measure_map.to_dict(orient="records")
+                                store_as_json_or_yaml(
+                                    measure_map_json, file_path, logger=self.logger
+                                )
+                            if not do_store_measures_tsv:
+                                continue
                         folder = folder_params[facet]
                         directory = compute_path_from_file(
                             file, root_dir=root_dir, folder=folder
                         )
-                        piece_name = file.piece
                         if unfold:
                             piece_name += "_unfolded"
                         facet_param = "harmonies" if facet == "expanded" else facet
@@ -3153,7 +3183,7 @@ def store_extracted_facets(
                                 logger=self.logger,
                             )
                         paths.append(descriptor_or_resource_path)
-        if output_metadata:
+        if do_store_metadata:
             if not markdown:
                 metadata_paths = self.update_metadata_tsv_from_parsed_scores(
                     root_dir=root_dir, suffix=metadata_suffix, markdown_file=None
-Original file line number
+Diff line change
@@ Expand Up / @@ -63,4 +63,4 @@ @@
                 }
             }
         ]
-    }
+    }