diff --git a/src/library/README.md b/src/library/README.md new file mode 100644 index 0000000..2de14b9 --- /dev/null +++ b/src/library/README.md @@ -0,0 +1,82 @@ +# MEDFORD JSON TO RDF Converter json2rdf +Introduction +------------------ +MEDFORD JSON to RDF Converter is a Python script that converts MEDFORD JSON data to RDF/XML format using the rdflib library. It provides a simple way to transform MEDFORD JSON-based metadata into RDF triples, making it easier to work with Linked Data and semantic web applications. + +Installing +------------------ +To use json2rdf, you need to have the following Python libraries installed: + + rdflib: The library to work with RDF data. + +You can install this library using pip: + + pip install rdflib + +How to Use +------------------ +To use the MEDFORD JSON to RDF Converter, follow the steps below: + +1. Clone the repository to your local machine. +2. Prepare your JSON data: Create or obtain the JSON data that you want to convert to RDF. +3. Run the script: Execute the "converter.py" script from the command line, providing the appropriate parameters. +4. Command-line Arguments + + --input: Path to the input JSON file (default is stdin). + + --output: Path to the output RDF/XML file (default is stdout). + +5. Example + + a. Convert JSON data from "input.json" to RDF/XML and save it to "output.rdf": + + python3 converter.py --input input.json --output output.rdf + + b. Convert JSON data from stdin to RDF/XML and print the output to the terminal: + + python3 converter.py < input.json + +Design Principles +------------------ +The json2rdf library follows the following design principles: +1. Validity: Ensure that the RDF/XML adheres to the RDF specifications and is a valid XML document. Use proper namespace declarations and well-formed XML syntax. +2. Use namespaces: Utilize namespaces to uniquely identify resources and properties. This helps avoid conflicts and ensures clarity in the RDF data. +3. Clear subject-predicate-object structure: Represent triples (subject-predicate-object) clearly in the RDF/XML. Use appropriate XML elements and attributes to express this structure. +4. Reusability: Reuse existing vocabularies (e.g., Dublin Core, BIBO, VCARD) when appropriate instead of reinventing terms. This enhances interoperability and consistency. +5. Avoid deep nesting: Keep the RDF/XML structure simple and avoid excessive nesting of elements. +6. Avoid using the “about” or “id” attributes in RDF descriptions due to the local scope of RDF generation and the lack of global identifiers for “about”. Using a local ID is not much better; it can hinder interoperability with other RDF dataset. It is preferable to adhere to the standard RDF triple structure, which ensures consistency and clarity in RDF data modeling, omitting fields that can cause confusion due to the lack of a global space of “about” fields. + + This is controversial. If we were to conform to the true “about” standard, we would have to maintain a stateful database that remembers every label we have ever used in an “about”, and avoid using anything twice. The fact that this is stateful means that we would need to generate the rdf on a website rather than in an application, where the website would track the global ids. + + This is much more “expensive” than writing an app. The basic question is whether the expense is worth the trouble. In our case, it doesn’t seem to be. + + Meanwhile, external opinions are also split on whether an ID should be tracked even though it is a local key that is not universal. Some say it’s useful, others say it’s a waste of space. We have erred on the side of not including IDs, which are not considered useful by a subset of the RDF community. + +Theory of Operation +------------------ +The json2Rdf library works by traversing the MEDFORD JSON data recursively and mapping JSON properties to appropriate RDF terms using predefined namespaces. It identifies major tokens, such as "code," "data," "journal," etc., and creates RDF triples accordingly. It also handles minor tokens, such as "ref," "primary," and "copy," to provide additional information about the triples. + +The library follows a two-step process: + +Conversion: The MEDFORD JSON data is traversed, and RDF triples are generated based on the mapping of JSON properties to RDF terms. + +Serialization: The generated RDF triples are serialized into RDF/XML format using the rdflib library. + +RDF/XML Validation +------------------ +You can utilize the online RDF/XML validator available at https://www.w3.org/RDF/Validator/. This tool allows you to check the correctness and conformance of your RDF/XML data with the RDF specifications provided by the World Wide Web Consortium (W3C)." + +References +------------------ +MEDFORD specs: https://github.com/TuftsBCB/MEDFORD-Spec/blob/master/main.pdf + +rdflib library documentation: https://rdflib.readthedocs.io/en/stable/ + +RDF/XML specification: https://www.w3.org/TR/rdf-syntax-grammar/ + +DublinCore RDF/XML examples: https://www.dublincore.org/specifications/dublin-core/dcmes-xml/ + +Qualified DC: https://www.dublincore.org/specifications/dublin-core/dcq-rdf-xml/ + +Feel free to contribute to the project by submitting issues or pull requests on the repository page. If you have any questions or need further assistance, please don't hesitate to reach out. Happy converting! + diff --git a/src/library/__init__.py b/src/library/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/library/converter.py b/src/library/converter.py new file mode 100644 index 0000000..766ab4c --- /dev/null +++ b/src/library/converter.py @@ -0,0 +1,61 @@ +import json +import sys +import argparse +from json2rdf import json2rdf + + +def main(): + + parser = argparse.ArgumentParser( + description="Convert MEDFORD JSON data to RDF/XML format" + ) + parser.add_argument("--input", help="Path to the input JSON file (default stdin)") + parser.add_argument( + "--output", help="Path to the output RDF/XML file (default stdout)" + ) + args = parser.parse_args() + + if args.input: + try: + # Read JSON data from the specified input file + with open(args.input, "r") as json_file: + json_data = json.load(json_file) + except json.JSONDecodeError as e: + print("Error decoding JSON:", e) + exit(1) + except FileNotFoundError: + print("JSON file not found") + exit(1) + + else: + try: + # Read JSON data from stdin if --input is not provided + json_data = json.load(sys.stdin) + except json.JSONDecodeError as e: + print("Error decoding JSON:", e) + exit(1) + + # Convert json to RDF/XML + test = json2rdf.jsonToRdf(json_data) + test.json_to_graph() + rdf_xml_data_bytes = test.graph_to_rdfxml() + + if args.output: + try: + # Write the RDF/XML data to the specified output file + with open(args.output, "wb") as rdf_file: + rdf_file.write(rdf_xml_data_bytes) + print("RDF data written successfully.") + except IOError as e: + print("Error writing RDF data:", e) + exit(1) + + else: + # Write the RDF/XML data to stdout if --output is not provided + # Convert bytes data to a string and remove the last newline character + rdf_xml_data_str = rdf_xml_data_bytes.decode("utf-8").rstrip("\n") + print(rdf_xml_data_str) + + +if __name__ == "__main__": + main() diff --git a/src/library/json2rdf/__init__.py b/src/library/json2rdf/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/library/json2rdf/json2rdf.py b/src/library/json2rdf/json2rdf.py new file mode 100644 index 0000000..e360be9 --- /dev/null +++ b/src/library/json2rdf/json2rdf.py @@ -0,0 +1,348 @@ +from rdflib import Graph, Literal, Namespace, RDF, URIRef, DC +import xml.etree.ElementTree as ET +from datetime import datetime +import sys + + +sys.path.append("..") +# MFTERMS = Namespace("https://mf.cs.tufts.edu/mf/terms/") +# MF=Namespace("https://mf.cs.tufts.edu/mf/elements/") +MFTERMS = Namespace("https://www.eecs.tufts.edu/~wlou01/mf/terms/") +MF = Namespace("https://www.eecs.tufts.edu/~wlou01/mf/elements/") +BIBO = Namespace("http://purl.org/ontology/bibo/") +DCTERMS = Namespace("http://purl.org/dc/terms/") +IMS = Namespace("http://www.imsglobal.org/xsd/imsmd_v1p2/") +VCARD = Namespace("https://www.w3.org/2006/vcard/ns#") + + +class jsonToRdf: + # initialize graph and read json data + def __init__(self, json_data): + self.json_data = json_data + self.G = Graph() + + # mapping MEDFORD tags + def checkSubject(self, prop, parent): + term = prop.lower() + + sub = "" + if term == "association": + sub = getattr(DCTERMS, "publisher") + elif term == "cruiseid": + sub = getattr(MFTERMS, "cruiseID") + elif term == "desc": + if parent == getattr(MF, "date"): + sub = getattr(DCTERMS, "date") + elif parent == getattr(MF, "contributor"): + sub = getattr(VCARD, "fn") + else: + sub = getattr(DCTERMS, "title") + elif term == "divenumber": + sub = getattr(MFTERMS, "diveNumber") + elif term == "doi": + sub = getattr(DCTERMS, "identifier") + elif term == "email": + sub = getattr(VCARD, "email") + elif term == "id": + sub = getattr(DCTERMS, "identifier") + elif term == "issue": + sub = getattr(BIBO, "issue") + elif term == "link": + sub = getattr(DCTERMS, "source") + elif term == "note": + sub = getattr(DCTERMS, "description") + elif term == "orcid": + sub = getattr(DCTERMS, "identifier") + elif term == "mooringid": + sub = getattr(MFTERMS, "mooringID") + elif term == "pages": + sub = getattr(BIBO, "pageStart") + elif term == "pmid": + sub = getattr(BIBO, "pmid") + elif term == "version": + sub = getattr(DCTERMS, "hasVersion") + elif term == "volume": + sub = getattr(BIBO, "volume") + elif term == "role": + sub = getattr(VCARD, "role") + elif term == "shipname": + sub = getattr(MFTERMS, "shipName") + elif term == "size": + sub = getattr(DCTERMS, "extent") + elif term == "type": + if "code" in parent: + sub = getattr(DCTERMS, "subject") + elif "data" in parent: + sub = DCTERMS["format"] + elif "software" in parent: + sub = DCTERMS["format"] + else: + sub = getattr(DCTERMS, "type") + elif term == "uri": + sub = getattr(DCTERMS, "source") + else: + sub = getattr(MFTERMS, term) + + return sub + + # add properties for mf major tokens + def add_majorToken_to_graph(self, mfword, subject_url): + # add dc properties for all undecided mf terms + predicate = "" + obj = "" + # print(mfword) + if mfword == "code": + self.G.add((subject_url, RDF.type, MF["code"])) + predicate = RDF.type + obj = MF["code"] + elif mfword == "data": + self.G.add((subject_url, DCTERMS.type, Literal("dataset"))) + predicate = DCTERMS.type + obj = Literal("dataset") + elif mfword == "expedition": + self.G.add((subject_url, RDF.type, MF["expedition"])) + predicate = RDF.type + obj = MF["expedition"] + elif mfword == "file": + self.G.add((subject_url, DCTERMS.type, Literal("format"))) + predicate = DCTERMS.type + obj = Literal("format") + elif mfword == "freeform": + self.G.add((subject_url, RDF.type, MF["freeform"])) + predicate = RDF.type + obj = MF["freeform"] + elif mfword == "funding": + self.G.add((subject_url, RDF.type, MF["funding"])) + predicate = RDF.type + obj = MF["funding"] + elif mfword == "journal": + self.G.add((subject_url, DCTERMS.type, Literal("text"))) + predicate = DCTERMS.type + obj = Literal("text") + elif mfword == "medford": + self.G.add((subject_url, RDF.type, MF["medford"])) + predicate = RDF.type + obj = Literal("text") + elif mfword == "method": + self.G.add((subject_url, RDF.type, MF["method"])) + predicate = RDF.type + obj = Literal("text") + elif mfword == "paper": + self.G.add((subject_url, DCTERMS.type, Literal("text"))) + predicate = DCTERMS.type + obj = Literal("text") + elif mfword == "project": + self.G.add((subject_url, RDF.type, MF["project"])) + predicate = RDF.type + obj = Literal("project") + elif mfword == "species": + self.G.add((subject_url, RDF.type, MF["species"])) + predicate = RDF.type + obj = MF["species"] + elif mfword == "software": + self.G.add((subject_url, DCTERMS.type, Literal("software"))) + predicate = DCTERMS.type + obj = Literal("software") + return predicate, obj + + # add properties for mf sub tokens + def add_minorToken_to_graph(self, second, subject_url): + # add ref, primary, copy as properties too + if second == "ref": + + self.G.add((subject_url, MFTERMS["isRef"], Literal("true"))) + elif second == "primary": + + self.G.add((subject_url, MFTERMS["isPrimary"], Literal("true"))) + elif second == "copy": + + self.G.add((subject_url, MFTERMS["isCopy"], Literal("true"))) + elif second == "freeform": + self.G.add((subject_url, RDF.type, MF["freeform"])) + + # Function to convert nested JSON data to RDF triples in graph + + def json_to_graph(self): + + # Read the json data + for key, value in self.json_data.items(): + subject = None + + subject = getattr(MF, key.lower()) + self.helper(value, subject, "none") + + # helper function for json_to_graph + def helper(self, value, psubject, second): + + mfword = "" + # get the mf terms from the resource + for word in [ + "code", + "data", + "expedition", + "file", + "freeform", + "funding", + "journal", + "medford", + "method", + "paper", + "project", + "software", + "species", + ]: + if word in psubject: + # print("word:"+word) + mfword = word + + for item in value: + + # id + if not isinstance(item[0], int): + print("Invalid BEDFORD JSON format.") + sys.exit(1) + item_id = item[0] + # item + item_data = item[1] + + subject_url = psubject + "/" + str(item_id) + # print(subject_url) + + predicate, obj = self.add_majorToken_to_graph(mfword, subject_url) + + self.add_minorToken_to_graph(second, subject_url) + + # add the triple to the graph + for prop, prop_value in item_data.items(): + + length_of_multipro = len(prop_value) + + prop_val = "" + + # if one tag has more than one value and there is no dictionary + # data inside the list,concatenate the values together + if length_of_multipro > 1 and type(prop_value[0][1]) != dict: + for i in range(length_of_multipro): + prop_id = prop_value[i][0] + if not isinstance(prop_id, int): + print("Invalid BEDFORD JSON format.") + sys.exit(1) + prop_val = prop_val + prop_value[i][1] + ", " + prop_val = prop_val[:-2] + + else: + if not isinstance(prop_value[0][0], int): + print("Invalid BEDFORD JSON format.") + sys.exit(1) + prop_val = prop_value[0][1] + + sub_subject = "" + + # if the data is value only + if type(prop_val) != dict: + + sub_subject = self.checkSubject(prop, psubject) + # adjust predicates + if "date" in sub_subject: + try: + datetime.strptime(prop_val, "%Y-%m-%d") + dcdate = datetime.strptime(prop_val, "%Y-%m-%d").date() + + self.G.add((subject_url, DCTERMS["date"], Literal(dcdate))) + except ValueError: + + try: + datetime.strptime(prop_val, "%Y-%m-%d %H:%M:%S") + imsdatetime = datetime.strptime( + prop_val, "%Y-%m-%d %H:%M:%S" + ) + self.G.add( + (subject_url, IMS["datetime"], Literal(imsdatetime)) + ) + except ValueError: + self.G.add( + (subject_url, DCTERMS["date"], Literal(prop_val)) + ) + # retrived pageStart and pageEnd from pages + elif "pageStart" in sub_subject: + start, end = prop_val.split("-") + self.G.add((subject_url, BIBO["pageStart"], Literal(start))) + self.G.add((subject_url, BIBO["pageEnd"], Literal(end))) + elif "source" in sub_subject: + # Add the dcterms:source property with the URI value + source_uri = URIRef(prop_val) + self.G.add((subject_url, DCTERMS["source"], source_uri)) + else: + self.G.add((subject_url, sub_subject, Literal(prop_val))) + + else: + + # when there is a minor token,we need to remove the latest + # added triple first and add it in the next level + self.G.remove((subject_url, predicate, obj)) + temp = list(item_data.keys())[0].lower() + if mfword == "freeform": + psubject = getattr(MF, temp) + temp = "freeform" + # for the second layer of the minor token or + # when the value is a list of multiple dictionary values + self.helper(prop_value, psubject, temp) + + # parse graph to rdf/xml bytes data + + def graph_to_rdfxml(self): + + # Serialize the graph to RDF/XML format + rdf_xml_data = self.G.serialize(format="xml") + + # Parse the RDF/XML data + root = ET.fromstring(rdf_xml_data) + + # Register desired namespaces and prefixes + ET.register_namespace("mf", MF) + ET.register_namespace("mfterms", MFTERMS) + ET.register_namespace("bibo", BIBO) + ET.register_namespace("dcterms", DCTERMS) + ET.register_namespace("ims", IMS) + ET.register_namespace("vcard", VCARD) + + # Add the xmlns:dc attribute to the root element + root.set("xmlns:dc", DC) + + # Find the dcterms:source element using the full namespace URI + source_elem = root.find(".//{%s}source" % DCTERMS) + + if source_elem is not None: + # Change the tag of the dcterms:source element + source_elem.tag = "{%s}source" % DCTERMS + + # change the major token namespace + for elem in root.iter(): + + if elem.tag.endswith("Description"): + # Get the 'about' attribute value of the element + about_value = elem.attrib.get( + "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about" + ) + + # Get namespace + elem_namespace = about_value.rsplit("/", 1)[0] + term = elem_namespace.rsplit("/", 1)[1] + + if term in ["contributor", "date"]: + term = elem_namespace.rsplit("/", 1)[1] + elem.tag = "dc:" + term + elif term == "keyword": + term = elem_namespace.rsplit("/", 1)[1] + elem.tag = "dc:" + "subject" + # by default the elem.tag is rdf:Description + + # Remove the 'about' attribute from the element + elem.attrib.pop( + "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about", None + ) + + # Convert the modified XML tree back to as bytes + rdf_xml_data_bytes = ET.tostring(root) + + return rdf_xml_data_bytes diff --git a/src/library/requirements.txt b/src/library/requirements.txt new file mode 100644 index 0000000..f4559b7 --- /dev/null +++ b/src/library/requirements.txt @@ -0,0 +1,2 @@ +python>=3.6 +rdflib>=6.0.0 diff --git a/src/library/setup.py b/src/library/setup.py new file mode 100644 index 0000000..d3311b1 --- /dev/null +++ b/src/library/setup.py @@ -0,0 +1,11 @@ +from setuptools import setup, find_packages + +setup( + name="json2rdf", + version="1.0.0", + packages=find_packages(), + install_requires=[ + "rdflib", + # Other dependencies + ], +) diff --git a/src/library/tests/__init__.py b/src/library/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/library/tests/expectedrdfxml.txt b/src/library/tests/expectedrdfxml.txt new file mode 100644 index 0000000..09a6ad3 --- /dev/null +++ b/src/library/tests/expectedrdfxml.txt @@ -0,0 +1,10 @@ +The Expected RDF/XML contains two leading spaces at the beginning of the first line and the last line. +And the lines between the Expected RDF/XML contain four leading spaces. + +Example: + + + true + HiRise + Assembly of genome scaffolds + \ No newline at end of file diff --git a/src/library/tests/test.py b/src/library/tests/test.py new file mode 100644 index 0000000..db69a25 --- /dev/null +++ b/src/library/tests/test.py @@ -0,0 +1,457 @@ +from json2rdf import json2rdf +import difflib +import unittest +import sys + +sys.path.append("..") # Add the parent directory to the Python path + + +class TestJsonToRdf(unittest.TestCase): + + # find the difference between the two strings + def find_string_diff(self, string1, string2): + # Create a SequenceMatcher object + seq_matcher = difflib.SequenceMatcher(None, string1, string2) + + # Get the differences between the strings + diff = seq_matcher.get_opcodes() + + # Create a list to store the differences + differences = [] + + for opcode, i1, i2, j1, j2 in diff: + if opcode == "equal": + continue + elif opcode == "insert": + differences.append(f"Insert: '{string2[j1:j2]}' at position {i1}") + elif opcode == "delete": + differences.append(f"Delete: '{string1[i1:i2]}' from position {i1}") + elif opcode == "replace": + differences.append( + f"Replace: '{string1[i1:i2]}' with '{string2[j1:j2]}' at position {i1}" + ) + + return differences + + # The order of the XML namespaces (xmlns) listed in the RDF is arbitrary, + # and it may vary between different serializations. + # To compare the converted RDF/XML with the expected RDF/XML, + # we should disregard the order of the xmlns declarations. + # We can remove the xmlns prefixes from the RDF/XML to make the + # comparison more straightforward. + + def remove_rdfxmlns(self, rdfxml): + + # Split the string into lines + lines = rdfxml.splitlines() + + # Remove the first and last lines + lines = lines[1:-1] + + # Join the remaining lines back into a single string + result_rdfxml = "\n".join(lines) + + return result_rdfxml + + # Convert json to RFD/XML and remove the XML namespaces + def convert(self, json_input): + test = json2rdf.jsonToRdf(json_input) + test.json_to_graph() + rdf_xml_data_bytes = test.graph_to_rdfxml() + rdfxml = rdf_xml_data_bytes.decode("utf-8") + rdfxml2 = self.remove_rdfxmlns(rdfxml) + return rdfxml2 + + ############################################################### + # Test for code + + def test_case1(self): + json_input = { + "Code": [ + [ + 200, + { + "Ref": [ + [ + 90, + { + "desc": [[90, "HiRise"]], + "Type": [[91, "Assembly of genome scaffolds"]], + }, + ] + ] + }, + ] + ] + } + expected_rdfxml = """ + + true + HiRise + Assembly of genome scaffolds + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for contributor + + def test_case2(self): + json_input = { + "Contributor": [ + [ + 1, + { + "desc": [[1, "Polina Shpilker"]], + "Association": [ + [ + 2, + "Department of Computer Science, Tufts University, 1XX College Ave, 02155, MA, USA", + ] + ], + "Role": [[3, "First Author"], [4, "Corresponding Author"]], + }, + ] + ] + } + expected_rdfxml = """ + Polina Shpilker + Department of Computer Science, Tufts University, 1XX College Ave, 02155, MA, USA + First Author, Corresponding Author + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for data + + def test_case3(self): + json_input = { + "Data": [ + [ + 4, + { + "Ref": [ + [ + 4, + { + "desc": [ + [4, "Reef Genomics Pocillopora damicornis"] + ], + "URI": [[5, "http://pdam.reefgenomics.org"]], + "Type": [[6, "Website"]], + }, + ] + ] + }, + ] + ] + } + expected_rdfxml = """ + dataset + true + Reef Genomics Pocillopora damicornis + + Website + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for date + + def test_case4(self): + json_input = { + "Date": [[12, {"desc": [[12, "2018-05-09"]], "Note": [[15, "Received"]]}]] + } + expected_rdfxml = """ + 2018-05-09 + Received + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for expedition + def test_case5(self): + json_input = { + "Expedition": [ + [ + 15, + { + "desc": [[15, "Coral cruise"]], + "ShipName": [[16, "Ship"]], + "CruiseID": [[17, "39849"]], + "MooringID": [[18, "A78324"]], + }, + ] + ] + } + expected_rdfxml = """ + + Coral cruise + Ship + 39849 + A78324 + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for freeform + def test_case6(self): + json_input = { + "Freeform": [ + [ + 19, + { + "Date": [ + [ + 19, + { + "desc": [[19, "05-22"]], + "Note": [[20, "Hello Workd"]], + }, + ] + ] + }, + ] + ] + } + expected_rdfxml = """ + + 05-22 + Hello Workd + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for Funding + + def test_case7(self): + json_input = { + "Funding": [ + [ + 55, + { + "desc": [[55, "National Science Foundation"]], + "ID": [[56, "OCE-1358699"]], + }, + ] + ] + } + expected_rdfxml = """ + + National Science Foundation + OCE-1358699 + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for Journal + def test_case8(self): + json_input = { + "Journal": [ + [ + 7, + { + "desc": [[7, "Nature Scientific Reports"]], + "Volume": [[8, "8"]], + "Issue": [[9, "1"]], + "Pages": [[10, "1-10"]], + }, + ] + ] + } + expected_rdfxml = """ + text + Nature Scientific Reports + 8 + 1 + 1 + 10 + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for Keyword + def test_case9(self): + json_input = {"Keyword": [[59, {"desc": [[59, "Coral"]]}]]} + expected_rdfxml = """ + Coral + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for MEDFORD + def test_case10(self): + json_input = { + "MEDFORD": [[1, {"desc": [[1, "description"]], "Version": [[2, "1.0"]]}]] + } + expected_rdfxml = """ + + description + 1.0 + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for Method + def test_case11(self): + json_input = { + "Method": [ + [ + 73, + { + "desc": [[73, "Qiagen DNAeasy Midi kit"]], + "Type": [[74, "DNA extraction"]], + }, + ] + ] + } + expected_rdfxml = """ + + Qiagen DNAeasy Midi kit + DNA extraction + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for Paper + def test_case12(self): + json_input = { + "Paper": [ + [ + 5, + { + "Primary": [ + [ + 5, + { + "desc": [ + [ + 5, + "MEDFORD: A human and machine readable markup language to facilitate FAIR coral metadata", + ] + ], + "Note": [ + [ + 6, + "A paper describing the implementation of the MEDFORD file format, which has a parser that can take a provided MEDFORD file and additional arbitrary files and put them into a bag. A MEDFORD file can also be translated into other formats, such as the BCODMO submission format.", + ] + ], + }, + ] + ] + }, + ] + ] + } + expected_rdfxml = """ + text + true + MEDFORD: A human and machine readable markup language to facilitate FAIR coral metadata + A paper describing the implementation of the MEDFORD file format, which has a parser that can take a provided MEDFORD file and additional arbitrary files and put them into a bag. A MEDFORD file can also be translated into other formats, such as the BCODMO submission format. + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for Project + + def test_case13(self): + json_input = {"Project": [[1, {"desc": [[1, "ABC project"]]}]]} + + expected_rdfxml = """ + + ABC project + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for Software + def test_case14(self): + json_input = { + "Software": [ + [ + 100, + { + "Primary": [ + [60, {"desc": [[10, "Testing"]], "Type": [[20, "Python"]]}] + ] + }, + ] + ] + } + expected_rdfxml = """ + software + true + Testing + Python + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + ############################################################### + # Test for Species + def test_case15(self): + json_input = { + "Species": [ + [ + 65, + { + "desc": [[65, "Pocillopora damicornis"]], + "Loc": [[66, "Sabago Isthmus, Panama"]], + "ReefCollection": [[67, "March 2005"]], + "Cultured": [ + [68, "University of Miami Coral Resource Facility"] + ], + "CultureCollection": [[69, "Sept. 2016"]], + "Note": [ + [ + 71, + "study used two healthy fragments and two bleached fragments", + ] + ], + }, + ] + ] + } + expected_rdfxml = """ + + Pocillopora damicornis + Sabago Isthmus, Panama + March 2005 + University of Miami Coral Resource Facility + Sept. 2016 + study used two healthy fragments and two bleached fragments + """ + converted_rdfxml = self.convert(json_input) + diff = self.find_string_diff(expected_rdfxml, converted_rdfxml) + assert converted_rdfxml == expected_rdfxml, "\n".join(diff) + + +if __name__ == "__main__": + unittest.main()