Skip to content

Commit

Permalink
fix: ElementMetadata serializes when the filename is a Path object (
Browse files Browse the repository at this point in the history
  • Loading branch information
MthwRobinson authored Feb 16, 2023
1 parent 3c1b089 commit f5ff140
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 2 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.4.10

* Fixes `ElementMetadata` so that it's JSON serializable when the filename is a `Path` object.

## 0.4.9

* Added ingest modules and s3 connector, sample ingest script
Expand Down
15 changes: 14 additions & 1 deletion test_unstructured/staging/test_base_staging.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import csv
import json
import os
import pathlib
import pytest

import pandas as pd

import unstructured.staging.base as base

from unstructured.documents.elements import Title, NarrativeText, ListItem
from unstructured.documents.elements import ElementMetadata, Title, NarrativeText, ListItem


@pytest.fixture
Expand Down Expand Up @@ -64,3 +66,14 @@ def test_convert_to_dataframe():
)
assert df.type.equals(expected_df.type) is True
assert df.text.equals(expected_df.text) is True


def test_convert_to_isd_serializes_with_posix_paths():
metadata = ElementMetadata(filename=pathlib.PosixPath("../../fake-file.txt"))
elements = [
Title(text="Title 1", metadata=metadata),
NarrativeText(text="Narrative 1", metadata=metadata),
]
output = base.convert_to_isd(elements)
# NOTE(robinson) - json.dumps should run without raising an exception
json.dumps(output)
2 changes: 1 addition & 1 deletion unstructured/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.4.9" # pragma: no cover
__version__ = "0.4.10" # pragma: no cover
5 changes: 5 additions & 0 deletions unstructured/documents/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import dataclass
import hashlib
from typing import Callable, List, Optional, Union
import pathlib


class NoID(ABC):
Expand All @@ -16,6 +17,10 @@ class ElementMetadata:
page_number: Optional[int] = None
url: Optional[str] = None

def __post_init__(self):
if isinstance(self.filename, pathlib.Path):
self.filename = str(self.filename)

def to_dict(self):
return {key: value for key, value in self.__dict__.items() if value is not None}

Expand Down

0 comments on commit f5ff140

Please sign in to comment.