Skip to content

Commit

Permalink
Set pmid column to string
Browse files Browse the repository at this point in the history
  • Loading branch information
lwrubel committed Jul 18, 2024
1 parent a27a5a5 commit f4f66e2
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions rialto_airflow/harvest/merge_pubs.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def dimensions_pubs_df(dimensions_pubs):
"""
# Polars is inferring volume is an integer, but it should be a string e.g. "97-B"
df = pl.scan_csv(
dimensions_pubs, schema_overrides={"volume": pl.String, "year": pl.String}
dimensions_pubs, schema_overrides={"volume": pl.String, "pmid": pl.String, "year": pl.String}
)
df = df.select(
pl.col("doi").map_elements(normalize_doi, return_dtype=pl.String),
Expand Down Expand Up @@ -88,7 +88,7 @@ def sulpub_df(sul_pub):
"""
Create a sulpub LazyFrame and rename columns
"""
df = pl.scan_csv(sul_pub, schema_overrides={"year": pl.String})
df = pl.scan_csv(sul_pub, schema_overrides={"year": pl.String, "pmid": pl.String})
df = df.drop_nulls("doi")
df = df.with_columns(
pl.col("doi").map_elements(normalize_doi, return_dtype=pl.String)
Expand Down

0 comments on commit f4f66e2

Please sign in to comment.