diff --git a/src/rocrate_tabular/tabulator.py b/src/rocrate_tabular/tabulator.py index ef7c42f..b3f0123 100644 --- a/src/rocrate_tabular/tabulator.py +++ b/src/rocrate_tabular/tabulator.py @@ -236,7 +236,7 @@ def add_expanded_property(self, entity_data, ignore_props, name, target): ) return props - # Both of the following mutate entity_data + # Both of the following mutate entity_data. I'm not very happy about it def set_property(self, entity_data, name, value, target_id): """Add a property to entity_data, and add the target_id if defined""" @@ -281,19 +281,16 @@ def find_csv(self): FROM property WHERE property_label = '@type' AND value = 'File' AND LOWER(source_id) LIKE '%.csv' """) - for entity_id in [row["source_id"] for row in files]: - entity_id = entity_id.replace("#", "") - self.add_csv(self.crate_dir / entity_id, "csv_files") - - def add_csv(self, csv_path, table_name): - with open(csv_path, newline="") as f: - reader = csv.DictReader( - f - ) # Use DictReader to read each row as a dictionary + for entity in files: + entity_id = entity["source_id"] + print("Adding csv for {entity_id}") + csvtext = self.crate.get(entity_id).fetch() + reader = csv.DictReader(csvtext.splitlines()) rows = list(reader) + print(rows) if rows: # Insert rows into the table (the table will be created if it doesn't exist) - self.db[table_name].insert_all(rows, pk="id", alter=True, ignore=True) + self.db["csv_files"].insert_all(rows, pk="id", alter=True, ignore=True) # `pk="id"` assumes there's an 'id' column; if no primary key, you can remove it. @@ -354,7 +351,7 @@ def cli(): """) if args.csv: - tb.find_csv_contents() + tb.find_csv() tb.export_csv() diff --git a/tests/crates/textfiles/ro-crate-metadata.json b/tests/crates/textfiles/ro-crate-metadata.json index eac24c8..209ebee 100644 --- a/tests/crates/textfiles/ro-crate-metadata.json +++ b/tests/crates/textfiles/ro-crate-metadata.json @@ -25,7 +25,10 @@ { "@id": "https://creativecommons.org/licenses/by-nc-sa/3.0/au/" }, - "hasPart": { "@id": "#doc001" } + "hasPart": [ + { "@id": "doc001" }, + { "@id": "doc002" } + ] }, { "@id": "doc001", @@ -47,6 +50,19 @@ "name": "text file", "contentEncoding": "text/plain" }, + { + "@id": "doc002", + "@type": "Dataset", + "name": "A csv file for testing", + "hasPart": [ + { "@id": "doc002/utterances.csv" } + ] + }, + { "@id": "doc002/utterances.csv", + "@type": "File", + "name": "CSV file", + "contentEncoding": "text/csv" + }, { "@id": "https://creativecommons.org/licenses/by-nc-sa/3.0/au/", "@type": "CreativeWork",