From 57d762c7e10de7c07764834fa0fb8a5e54dd7a28 Mon Sep 17 00:00:00 2001 From: mdorf Date: Fri, 15 Nov 2024 18:10:42 -0800 Subject: [PATCH 1/4] resolved #218 - CSV Format Change --- Gemfile.lock | 24 ++++++-------- .../utils/ontology_csv_writer.rb | 4 +-- test/util/test_ontology_csv_writer.rb | 32 +++++++++++++++++-- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 0b42e018..6feb637d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -55,15 +55,10 @@ GEM launchy (>= 2.1, < 4.0) mail (~> 2.7) eventmachine (1.2.7) - faraday (2.12.0) - faraday-net_http (>= 2.0, < 3.4) - json - logger - faraday-net_http (3.3.0) - net-http - ffi (1.17.0-aarch64-linux-gnu) - ffi (1.17.0-arm64-darwin) - ffi (1.17.0-x86_64-linux-gnu) + faraday (1.2.0) + multipart-post (>= 1.2, < 3) + ruby2_keywords + ffi (1.17.0) hashie (5.0.0) htmlentities (4.3.4) http-accept (1.7.0) @@ -71,7 +66,7 @@ GEM domain_name (~> 0.5) i18n (0.9.5) concurrent-ruby (~> 1.0) - json (2.8.1) + json (2.8.2) json_pure (2.8.1) language_server-protocol (3.17.0.3) launchy (3.0.1) @@ -99,8 +94,7 @@ GEM minitest (>= 2.12, < 5.0) powerbar multi_json (1.15.0) - net-http (0.5.0) - uri + multipart-post (2.4.1) net-http-persistent (2.9.4) net-imap (0.4.18) date @@ -126,10 +120,10 @@ GEM mail (>= 2.0) powerbar (2.0.1) hashie (>= 1.1.0) - pry (0.14.2) + pry (0.15.0) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (6.0.1) + public_suffix (5.1.1) racc (1.8.1) rack (2.2.10) rack-test (0.8.3) @@ -167,6 +161,7 @@ GEM rubocop-ast (1.36.1) parser (>= 3.3.1.0) ruby-progressbar (1.13.0) + ruby2_keywords (0.0.5) rubyzip (1.3.0) simplecov (0.22.0) docile (~> 1.1) @@ -186,7 +181,6 @@ GEM timeout (0.4.2) tzinfo (0.3.62) unicode-display_width (2.6.0) - uri (1.0.2) uuid (2.3.9) macaddr (~> 1.0) diff --git a/lib/ontologies_linked_data/utils/ontology_csv_writer.rb b/lib/ontologies_linked_data/utils/ontology_csv_writer.rb index 7fbb0a6d..a1abec76 100644 --- a/lib/ontologies_linked_data/utils/ontology_csv_writer.rb +++ b/lib/ontologies_linked_data/utils/ontology_csv_writer.rb @@ -39,7 +39,7 @@ def write_class(ont_class) row[CLASS_ID] = ont_class.id # Preferred label - row[PREF_LABEL] = ont_class.prefLabel + row[PREF_LABEL] = Array(ont_class.prefLabel).first # Synonyms synonyms = ont_class.synonym @@ -50,7 +50,7 @@ def write_class(ont_class) row[DEFINITIONS] = definitions.join('|') unless definitions.empty? # Obsolete - row[OBSOLETE] = ont_class.obsolete + row[OBSOLETE] = Array(ont_class.obsolete).first.to_s.upcase # CUI cuis = ont_class.cui diff --git a/test/util/test_ontology_csv_writer.rb b/test/util/test_ontology_csv_writer.rb index 355b4dd3..652f02f0 100644 --- a/test/util/test_ontology_csv_writer.rb +++ b/test/util/test_ontology_csv_writer.rb @@ -49,8 +49,16 @@ def self.before_suite end def get_csv_string - gz = Zlib::GzipReader.open(@@csv_path) - return gz.read + get_csv_string_from_path(@@csv_path) + end + + def enclosed_in_square_brackets_with_quotes?(string) + /\A\[\s*(["']).*\1\s*\]\z/ === string + end + + def get_csv_string_from_path(csv_path) + gz = Zlib::GzipReader.open(csv_path) + gz.read end def test_csv_writer_valid @@ -309,4 +317,24 @@ def test_csv_writer_content_props_other assert class_exists, %Q end + + def test_for_non_array_values + acronym = 'CHEBITEST' + sub_id = 1 + submission_parse(acronym, "CHEBI Ontology TEST", + "./test/data/ontology_files/chebi_test.obo", sub_id, + process_rdf: true, index_search: true, extract_metadata: false) + sub = LinkedData::Models::OntologySubmission.where(ontology: [acronym: acronym], submissionId: sub_id) + .include(:version, :submissionId, :ontology).first + sub.ontology.bring(:acronym) + classes = CSV.parse(get_csv_string_from_path(sub.csv_path), headers:true) + assert_equal 20, classes.count + + classes.each do |row| + row.each do |_, val| + assert_equal false, enclosed_in_square_brackets_with_quotes?(val), "Expected a String, but received an Array: #{val}" + end + end + end + end \ No newline at end of file From a486f06c1276c48984e52e4ef0b17e4b673a9469 Mon Sep 17 00:00:00 2001 From: mdorf Date: Fri, 15 Nov 2024 18:14:51 -0800 Subject: [PATCH 2/4] resolved #218 - CSV Format Change --- test/data/ontology_files/chebi_test.obo | 228 ++++++++++++++++++++++++ 1 file changed, 228 insertions(+) create mode 100644 test/data/ontology_files/chebi_test.obo diff --git a/test/data/ontology_files/chebi_test.obo b/test/data/ontology_files/chebi_test.obo new file mode 100644 index 00000000..acb4e3c2 --- /dev/null +++ b/test/data/ontology_files/chebi_test.obo @@ -0,0 +1,228 @@ +format-version: 1.2 +data-version: 237 +date: 30:10:2024 06:16 +saved-by: chebi +subsetdef: 1_STAR "Preliminary entries" +subsetdef: 2_STAR "Annotated by 3rd party" +subsetdef: 3_STAR "Manually annotated by ChEBI Team" +synonymtypedef: BRAND_NAME "BRAND NAME" +synonymtypedef: INN "INN" +synonymtypedef: IUPAC_NAME "IUPAC NAME" +default-namespace: chebi_ontology +remark: Author: ChEBI curation team +remark: ChEBI Release version 237 +remark: ChEBI subsumes and replaces the Chemical Ontology first +remark: developed by Michael Ashburner & Pankaj Jaiswal. +remark: For any queries contact chebi-help@ebi.ac.uk +ontology: chebi + +[Term] +id: CHEBI:137366 +name: CHEBI:4042 +is_obsolete: true + +[Term] +id: CHEBI:137377 +name: CHEBI:81850 +is_obsolete: true + +[Term] +id: CHEBI:143109 +name: waterssdfsdfss +is_obsolete: true + +[Term] +id: CHEBI:177198 +name: CHEBI:50860 +is_obsolete: true + +[Term] +id: CHEBI:189822 +name: testing532 +is_obsolete: true + +[Term] +id: CHEBI:24431 +name: chemical entity +def: "A chemical entity is a physical entity of interest in chemistry including molecular entities, parts thereof, and chemical substances." [] +subset: 3_STAR +synonym: "chemical entity" EXACT [UniProt] + +[Term] +id: CHEBI:27189 +name: unclassifieds +is_obsolete: true + +[Term] +id: CHEBI:30430 +name: indium atom +def: "A metallic element first identified and named from the brilliant indigo (Latin indicum) blue line in its flame spectrum." [] +subset: 3_STAR +synonym: "49In" RELATED [IUPAC] +synonym: "In" RELATED [IUPAC] +synonym: "indio" RELATED [ChEBI] +synonym: "Indium" RELATED [ChEBI] +synonym: "indium" EXACT IUPAC_NAME [IUPAC] +synonym: "indium" RELATED [ChEBI] +xref: CAS:7440-74-6 {source="ChemIDplus"} +xref: CAS:7440-74-6 {source="NIST Chemistry WebBook"} +xref: Gmelin:16297 {source="Gmelin"} +xref: WebElements:In +is_a: CHEBI:33317 ! boron group element atom +property_value: http://purl.obolibrary.org/obo/chebi/charge "0" xsd:string +property_value: http://purl.obolibrary.org/obo/chebi/formula "In" xsd:string +property_value: http://purl.obolibrary.org/obo/chebi/inchi "InChI=1S/In" xsd:string +property_value: http://purl.obolibrary.org/obo/chebi/inchikey "APFVFJFRJDLVQX-UHFFFAOYSA-N" xsd:string +property_value: http://purl.obolibrary.org/obo/chebi/mass "114.81800" xsd:string +property_value: http://purl.obolibrary.org/obo/chebi/monoisotopicmass "114.90388" xsd:string +property_value: http://purl.obolibrary.org/obo/chebi/smiles "[In]" xsd:string + +[Term] +id: CHEBI:33250 +name: atom +alt_id: CHEBI:22671 +alt_id: CHEBI:23907 +def: "A chemical entity constituting the smallest component of an element having the chemical properties of the element." [] +subset: 3_STAR +synonym: "atom" EXACT IUPAC_NAME [IUPAC] +synonym: "atome" RELATED [IUPAC] +synonym: "atomo" RELATED [IUPAC] +synonym: "atoms" RELATED [ChEBI] +synonym: "atomus" RELATED [ChEBI] +synonym: "element" RELATED [ChEBI] +synonym: "elements" RELATED [ChEBI] +is_a: CHEBI:24431 ! chemical entity + +[Term] +id: CHEBI:33317 +name: boron group element atom +subset: 3_STAR +synonym: "boron group element" RELATED [ChEBI] +synonym: "boron group elements" RELATED [ChEBI] +synonym: "Element der Borgruppe" RELATED [ChEBI] +synonym: "group 13 elements" EXACT IUPAC_NAME [IUPAC] +synonym: "group III elements" RELATED [ChEBI] +is_a: CHEBI:33560 ! p-block element atom + +[Term] +id: CHEBI:33318 +name: main group element atom +def: "An atom belonging to one of the main groups (found in the s- and p- blocks) of the periodic table." [] +subset: 3_STAR +synonym: "Hauptgruppenelement" RELATED [ChEBI] +synonym: "Hauptgruppenelemente" RELATED [ChEBI] +synonym: "main group element" RELATED [ChEBI] +synonym: "main group elements" EXACT IUPAC_NAME [IUPAC] +is_a: CHEBI:33250 ! atom + +[Term] +id: CHEBI:33560 +name: p-block element atom +def: "Any main group element atom belonging to the p-block of the periodic table." [] +subset: 3_STAR +synonym: "p-block element" RELATED [ChEBI] +synonym: "p-block elements" RELATED [ChEBI] +is_a: CHEBI:33318 ! main group element atom + +[Term] +id: CHEBI:49631 +name: gallium atom +alt_id: CHEBI:33326 +alt_id: CHEBI:49630 +def: "A metallic element predicted as eka-aluminium by Mendeleev in 1870 and discovered by Paul-Emile Lecoq de Boisbaudran in 1875. Named in honour of France (Latin Gallia) and perhaps also from the Latin gallus cock, a translation of Lecoq." [] +subset: 3_STAR +synonym: "31Ga" RELATED [IUPAC] +synonym: "Ga" RELATED [IUPAC] +synonym: "galio" RELATED [ChEBI] +synonym: "gallium" EXACT IUPAC_NAME [IUPAC] +synonym: "gallium" RELATED [ChEBI] +xref: CAS:7440-55-3 {source="ChemIDplus"} +xref: CAS:7440-55-3 {source="NIST Chemistry WebBook"} +xref: WebElements:Ga +is_a: CHEBI:33317 ! boron group element atom +property_value: http://purl.obolibrary.org/obo/chebi/charge "0" xsd:string +property_value: http://purl.obolibrary.org/obo/chebi/formula "Ga" xsd:string +property_value: http://purl.obolibrary.org/obo/chebi/inchi "InChI=1S/Ga" xsd:string +property_value: http://purl.obolibrary.org/obo/chebi/inchikey "GYHNNYVSQQEPJS-UHFFFAOYSA-N" xsd:string +property_value: http://purl.obolibrary.org/obo/chebi/mass "69.72300" xsd:string +property_value: http://purl.obolibrary.org/obo/chebi/monoisotopicmass "68.92557" xsd:string +property_value: http://purl.obolibrary.org/obo/chebi/smiles "[Ga]" xsd:string + +[Term] +id: CHEBI:64352 +name: UDP-N-acetyl-D-glucosamine(2-) +is_obsolete: true + +[Term] +id: CHEBI:64360 +name: tocilizumab +is_obsolete: true + +[Term] +id: CHEBI:64867 +name: PHS C26 +is_obsolete: true + +[Typedef] +id: has_functional_parent +name: has functional parent +is_cyclic: false +is_transitive: false + +[Typedef] +id: has_major_microspecies_at_pH_7_3 +name: has major microspecies at pH 7.3 +is_cyclic: true +is_transitive: false + +[Typedef] +id: has_parent_hydride +name: has parent hydride +is_cyclic: false +is_transitive: false + +[Typedef] +id: has_part +name: has part +xref: BFO:0000051 +is_cyclic: false +is_transitive: true + +[Typedef] +id: has_role +name: has role +xref: RO:0000087 +is_cyclic: false +is_transitive: false + +[Typedef] +id: is_conjugate_acid_of +name: is conjugate acid of +is_cyclic: true +is_transitive: false +inverse_of: is_conjugate_base_of ! is conjugate base of + +[Typedef] +id: is_conjugate_base_of +name: is conjugate base of +is_cyclic: true +is_transitive: false + +[Typedef] +id: is_enantiomer_of +name: is enantiomer of +is_cyclic: true +is_transitive: false + +[Typedef] +id: is_substituent_group_from +name: is substituent group from +is_cyclic: false +is_transitive: false + +[Typedef] +id: is_tautomer_of +name: is tautomer of +is_cyclic: true +is_transitive: true + From d380a88e389b919aed5e2b6d2bd7010b36f5bb10 Mon Sep 17 00:00:00 2001 From: mdorf Date: Sun, 17 Nov 2024 16:10:51 -0800 Subject: [PATCH 3/4] fixed a unit test that failed as a result of the earlier fix --- test/util/test_ontology_csv_writer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/util/test_ontology_csv_writer.rb b/test/util/test_ontology_csv_writer.rb index 652f02f0..b7714883 100644 --- a/test/util/test_ontology_csv_writer.rb +++ b/test/util/test_ontology_csv_writer.rb @@ -260,7 +260,7 @@ def test_csv_writer_content_non_obsolete classes = CSV.parse(get_csv_string, headers:true) classes.select do |row| if row[LinkedData::Utils::OntologyCSVWriter::PREF_LABEL] == preferred_label - assert_equal 'false', row[LinkedData::Utils::OntologyCSVWriter::OBSOLETE] + assert_equal 'false', row[LinkedData::Utils::OntologyCSVWriter::OBSOLETE].to_s.downcase class_exists = true end end From 3aadf69fa004886b8e08830003788d09e9d550d5 Mon Sep 17 00:00:00 2001 From: Alex Skrenchuk Date: Mon, 18 Nov 2024 14:19:38 -0800 Subject: [PATCH 4/4] gemfile update --- Gemfile.lock | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 6feb637d..a08e9bad 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -55,10 +55,15 @@ GEM launchy (>= 2.1, < 4.0) mail (~> 2.7) eventmachine (1.2.7) - faraday (1.2.0) - multipart-post (>= 1.2, < 3) - ruby2_keywords - ffi (1.17.0) + faraday (2.12.1) + faraday-net_http (>= 2.0, < 3.5) + json + logger + faraday-net_http (3.4.0) + net-http (>= 0.5.0) + ffi (1.17.0-aarch64-linux-gnu) + ffi (1.17.0-arm64-darwin) + ffi (1.17.0-x86_64-linux-gnu) hashie (5.0.0) htmlentities (4.3.4) http-accept (1.7.0) @@ -94,7 +99,8 @@ GEM minitest (>= 2.12, < 5.0) powerbar multi_json (1.15.0) - multipart-post (2.4.1) + net-http (0.5.0) + uri net-http-persistent (2.9.4) net-imap (0.4.18) date @@ -123,7 +129,7 @@ GEM pry (0.15.0) coderay (~> 1.1) method_source (~> 1.0) - public_suffix (5.1.1) + public_suffix (6.0.1) racc (1.8.1) rack (2.2.10) rack-test (0.8.3) @@ -161,7 +167,6 @@ GEM rubocop-ast (1.36.1) parser (>= 3.3.1.0) ruby-progressbar (1.13.0) - ruby2_keywords (0.0.5) rubyzip (1.3.0) simplecov (0.22.0) docile (~> 1.1) @@ -181,6 +186,7 @@ GEM timeout (0.4.2) tzinfo (0.3.62) unicode-display_width (2.6.0) + uri (1.0.2) uuid (2.3.9) macaddr (~> 1.0)