Skip to content

Commit

Permalink
CSV loading is working when I test it on the command line
Browse files Browse the repository at this point in the history
  • Loading branch information
spikelynch committed Dec 5, 2024
1 parent dd743a9 commit f23c8c3
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 13 deletions.
21 changes: 9 additions & 12 deletions src/rocrate_tabular/tabulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def add_expanded_property(self, entity_data, ignore_props, name, target):
)
return props

# Both of the following mutate entity_data
# Both of the following mutate entity_data. I'm not very happy about it

def set_property(self, entity_data, name, value, target_id):
"""Add a property to entity_data, and add the target_id if defined"""
Expand Down Expand Up @@ -281,19 +281,16 @@ def find_csv(self):
FROM property
WHERE property_label = '@type' AND value = 'File' AND LOWER(source_id) LIKE '%.csv'
""")
for entity_id in [row["source_id"] for row in files]:
entity_id = entity_id.replace("#", "")
self.add_csv(self.crate_dir / entity_id, "csv_files")

def add_csv(self, csv_path, table_name):
with open(csv_path, newline="") as f:
reader = csv.DictReader(
f
) # Use DictReader to read each row as a dictionary
for entity in files:
entity_id = entity["source_id"]
print("Adding csv for {entity_id}")
csvtext = self.crate.get(entity_id).fetch()
reader = csv.DictReader(csvtext.splitlines())
rows = list(reader)
print(rows)
if rows:
# Insert rows into the table (the table will be created if it doesn't exist)
self.db[table_name].insert_all(rows, pk="id", alter=True, ignore=True)
self.db["csv_files"].insert_all(rows, pk="id", alter=True, ignore=True)
# `pk="id"` assumes there's an 'id' column; if no primary key, you can remove it.


Expand Down Expand Up @@ -354,7 +351,7 @@ def cli():
""")

if args.csv:
tb.find_csv_contents()
tb.find_csv()

tb.export_csv()

Expand Down
18 changes: 17 additions & 1 deletion tests/crates/textfiles/ro-crate-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@
{
"@id": "https://creativecommons.org/licenses/by-nc-sa/3.0/au/"
},
"hasPart": { "@id": "#doc001" }
"hasPart": [
{ "@id": "doc001" },
{ "@id": "doc002" }
]
},
{
"@id": "doc001",
Expand All @@ -47,6 +50,19 @@
"name": "text file",
"contentEncoding": "text/plain"
},
{
"@id": "doc002",
"@type": "Dataset",
"name": "A csv file for testing",
"hasPart": [
{ "@id": "doc002/utterances.csv" }
]
},
{ "@id": "doc002/utterances.csv",
"@type": "File",
"name": "CSV file",
"contentEncoding": "text/csv"
},
{
"@id": "https://creativecommons.org/licenses/by-nc-sa/3.0/au/",
"@type": "CreativeWork",
Expand Down

0 comments on commit f23c8c3

Please sign in to comment.