Skip to content

Commit

Permalink
Add helper for getting content from metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisJar committed Nov 15, 2024
1 parent 442e34e commit bdc9997
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions client/src/nv_ingest_client/util/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,3 +401,15 @@ def filter_function_kwargs(func, **kwargs):
args_dict = {k: kwargs.pop(k) for k in dict(kwargs) if k in func_args}

return args_dict


def get_content(results: List[any]):

text_elems = [elem for elem in result if element["document_type"] == "text" for result in results]
structured_elems = [elem for elem in result if element["document_type"] == "structured" for result in results]

text_content = [{"page_number": elem["metadata"]["content_metadata"]["page_number"], "content": elem["metadata"]["content"]} for elem in text_elems]
structured_content = [{"page_number": elem["metadata"]["content_metadata"]["page_number"], "content": elem["metadata"]["table_content"]} for elem in structured_elems]


return {"text_content": text_content, "structured_content": structured_content}

0 comments on commit bdc9997

Please sign in to comment.