Skip to content

Commit

Permalink
fix: add and update fields in parquet export (#10)
Browse files Browse the repository at this point in the history
- add `owner` field
- transform `generic_name` into a lang field
  • Loading branch information
raphael0202 authored Nov 18, 2024
1 parent dc230ce commit 5b99a3f
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions openfoodfacts_exports/exports/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
pa.field("emb_codes", pa.string(), nullable=True),
pa.field("entry_dates_tags", pa.list_(pa.string()), nullable=True),
pa.field("food_groups_tags", pa.list_(pa.string()), nullable=True),
pa.field("generic_name", pa.string(), nullable=True),
pa.field("generic_name", LANGUAGE_FIELD_DATATYPE, nullable=True),
pa.field("images", IMAGES_DATATYPE, nullable=True),
pa.field("informers_tags", pa.list_(pa.string()), nullable=True),
pa.field("ingredients_analysis_tags", pa.list_(pa.string()), nullable=True),
Expand Down Expand Up @@ -180,6 +180,7 @@
pa.field("obsolete", pa.bool_()),
pa.field("origins_tags", pa.list_(pa.string()), nullable=True),
pa.field("origins", pa.string(), nullable=True),
pa.field("owner", pa.string(), nullable=True),
pa.field("packagings_complete", pa.bool_(), nullable=True),
pa.field("packaging_recycling_tags", pa.list_(pa.string()), nullable=True),
pa.field("packaging_shapes_tags", pa.list_(pa.string()), nullable=True),
Expand Down Expand Up @@ -213,6 +214,14 @@
)


LANGUAGE_FIELDS = [
"ingredients_text",
"product_name",
"packaging_text",
"generic_name",
]


class ImageSize(BaseModel):
h: int | None = None
w: int | None = None
Expand Down Expand Up @@ -344,7 +353,7 @@ class Product(BaseModel):
emb_codes: str | None = None
entry_dates_tags: list[str] | None = None
food_groups_tags: list[str] | None = None
generic_name: str | None = None
generic_name: list[LanguageField] | None = None
images: list[Image] | None = None
informers_tags: list[str] | None = None
ingredients_analysis_tags: list[str] | None = None
Expand Down Expand Up @@ -390,6 +399,7 @@ class Product(BaseModel):
obsolete: bool = False
origins_tags: list[str] | None = None
origins: str | None = None
owner: str | None = None
packagings_complete: bool | None = None
packaging_recycling_tags: list[str] | None = None
packaging_shapes_tags: list[str] | None = None
Expand Down Expand Up @@ -484,7 +494,7 @@ def parse_language_fields(cls, data: dict) -> dict:
The main language is stored with a `lang` value of "main", while other
languages are stored with their language code (2-letter code).
"""
for field_name in ("ingredients_text", "product_name", "packaging_text"):
for field_name in LANGUAGE_FIELDS:
main_language_value = data.pop(field_name, None)
data[field_name] = []

Expand Down

0 comments on commit 5b99a3f

Please sign in to comment.