Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CSV loading is working when I test it on the command line #28

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 9 additions & 12 deletions src/rocrate_tabular/tabulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def add_expanded_property(self, entity_data, ignore_props, name, target):
)
return props

# Both of the following mutate entity_data
# Both of the following mutate entity_data. I'm not very happy about it

def set_property(self, entity_data, name, value, target_id):
"""Add a property to entity_data, and add the target_id if defined"""
Expand Down Expand Up @@ -281,19 +281,16 @@ def find_csv(self):
FROM property
WHERE property_label = '@type' AND value = 'File' AND LOWER(source_id) LIKE '%.csv'
""")
for entity_id in [row["source_id"] for row in files]:
entity_id = entity_id.replace("#", "")
self.add_csv(self.crate_dir / entity_id, "csv_files")

def add_csv(self, csv_path, table_name):
with open(csv_path, newline="") as f:
reader = csv.DictReader(
f
) # Use DictReader to read each row as a dictionary
for entity in files:
entity_id = entity["source_id"]
print("Adding csv for {entity_id}")
csvtext = self.crate.get(entity_id).fetch()
reader = csv.DictReader(csvtext.splitlines())
rows = list(reader)
print(rows)
if rows:
# Insert rows into the table (the table will be created if it doesn't exist)
self.db[table_name].insert_all(rows, pk="id", alter=True, ignore=True)
self.db["csv_files"].insert_all(rows, pk="id", alter=True, ignore=True)
# `pk="id"` assumes there's an 'id' column; if no primary key, you can remove it.


Expand Down Expand Up @@ -354,7 +351,7 @@ def cli():
""")

if args.csv:
tb.find_csv_contents()
tb.find_csv()

tb.export_csv()

Expand Down
18 changes: 17 additions & 1 deletion tests/crates/textfiles/ro-crate-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@
{
"@id": "https://creativecommons.org/licenses/by-nc-sa/3.0/au/"
},
"hasPart": { "@id": "#doc001" }
"hasPart": [
{ "@id": "doc001" },
{ "@id": "doc002" }
]
},
{
"@id": "doc001",
Expand All @@ -47,6 +50,19 @@
"name": "text file",
"contentEncoding": "text/plain"
},
{
"@id": "doc002",
"@type": "Dataset",
"name": "A csv file for testing",
"hasPart": [
{ "@id": "doc002/utterances.csv" }
]
},
{ "@id": "doc002/utterances.csv",
"@type": "File",
"name": "CSV file",
"contentEncoding": "text/csv"
},
{
"@id": "https://creativecommons.org/licenses/by-nc-sa/3.0/au/",
"@type": "CreativeWork",
Expand Down
Loading