From 6986cfa006652e25fd2ee267ab3f85d8cfa0f8ed Mon Sep 17 00:00:00 2001 From: "E. Lynette Rayle" Date: Mon, 11 Nov 2019 09:05:46 -0500 Subject: [PATCH 1/2] add language processing to ldpath service --- app/services/qa/linked_data/ldpath_service.rb | 102 ++++++++---- .../mapper/graph_ldpath_mapper_service.rb | 3 +- .../config/context_property_map_spec.rb | 6 +- .../linked_data/ldpath_service_spec.rb | 157 +++++++++++++++++- .../graph_ldpath_mapper_service_spec.rb | 28 ++-- 5 files changed, 241 insertions(+), 55 deletions(-) diff --git a/app/services/qa/linked_data/ldpath_service.rb b/app/services/qa/linked_data/ldpath_service.rb index 834a3447..7a7499eb 100644 --- a/app/services/qa/linked_data/ldpath_service.rb +++ b/app/services/qa/linked_data/ldpath_service.rb @@ -4,39 +4,85 @@ module Qa module LinkedData class LdpathService - VALUE_ON_ERROR = [].freeze + LANGUAGE_PATTERN = "*LANG*".freeze + PROPERTY_NAME = "property".freeze class_attribute :predefined_prefixes self.predefined_prefixes = Ldpath::Transform.default_prefixes.with_indifferent_access - # Create the ldpath program for a given ldpath. - # @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html) - # @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" }) - # @return [Ldpath::Program] an executable program that will extract a value from a graph - def self.ldpath_program(ldpath:, prefixes: {}) - program_code = "" - prefixes.each { |key, url| program_code << "@prefix #{key} : <#{url}> \;\n" } - program_code << "property = #{ldpath} \;" - Ldpath::Program.parse program_code - rescue => e - Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.parse_logger_error')}... cause: #{e.message}\n ldpath_program=\n#{program_code}") - raise StandardError, I18n.t("qa.linked_data.ldpath.parse_error") + "... cause: #{e.message}" - end + class << self + # Create the ldpath program for a given ldpath. + # @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html) + # @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" }) + # @param languages [Array] limit results to these languages and anything not tagged (applies to ldpaths with *LANG* marker) + # @return [Ldpath::Program] an executable program that will extract a value from a graph + def ldpath_program(ldpath:, prefixes: {}, languages: []) + program_code = ldpath_program_code(ldpath: ldpath, prefixes: prefixes, languages: languages) + Ldpath::Program.parse program_code + rescue => e + Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.parse_logger_error')}... cause: #{e.message}\n ldpath_program=\n#{program_code}") + raise StandardError, I18n.t("qa.linked_data.ldpath.parse_error") + "... cause: #{e.message}" + end + + # Create the program code for a given ldpath. + # @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html) + # @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" }) + # @param languages [Array] limit results to these languages and anything not tagged (applies to ldpaths with *LANG* marker) + # @return [String] the program code string used with Ldpath::Program.parse + def ldpath_program_code(ldpath:, prefixes: {}, languages: []) + program_code = "" + prefixes.each { |key, url| program_code << "@prefix #{key} : <#{url}> \;\n" } + property_explode(program_code, ldpath, languages) + end + + # Evaluate an ldpath for a specific subject uri in the context of a graph and return the extracted values. + # @param program [Ldpath::Program] an executable program that will extract a value from a graph + # @param graph [RDF::Graph] the graph from which the values will be extracted + # @param subject_uri [RDF::URI] retrieved values will be limited to those with the subject uri + # @param limit_to_context [Boolean] if true, the evaluation process will not make any outside network calls. + # It will limit results to those found in the context graph. + ## @return [Array] the extracted values based on the ldpath + def ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?) + raise ArgumentError, "You must specify a program when calling ldpath_evaluate" if program.blank? + output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context) + property_implode(output) + rescue ParseError => e + Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.evaluate_logger_error')} (cause: #{e.message}") + raise ParseError, I18n.t("qa.linked_data.ldpath.evaluate_error") + "... cause: #{e.message}" + end + + private + + # create program code with a property per language + untagged + def property_explode(program_code, ldpath, languages) + return program_code << "#{PROPERTY_NAME} = #{ldpath} \;\n" unless ldpath.index(LANGUAGE_PATTERN) + return program_code << "#{PROPERTY_NAME} = #{ldpath.gsub(LANGUAGE_PATTERN, '')} \;\n" unless languages.present? + languages.map { |language| program_code << "#{property_name_for(language)} = #{ldpath.gsub(LANGUAGE_PATTERN, "[@#{language}]")} \;\n" } + program_code << "#{PROPERTY_NAME} = #{ldpath.gsub(LANGUAGE_PATTERN, '[@none]')} \;\n" + end + + # flatten all properties and turn into RDF::Literals with language tagging if appropriate + def property_implode(output) + return nil if output.blank? + output.each do |property_name, values| + output[property_name] = remap_string_values(property_name, values) if values.first.is_a? String + end + output.values.flatten.uniq + end + + def property_name_for(language) + "#{language}_#{PROPERTY_NAME}" + end + + def language_from(property_name) + return nil if property_name.casecmp?(PROPERTY_NAME) + property_name.chomp("_#{PROPERTY_NAME}") + end - # Evaluate an ldpath for a specific subject uri in the context of a graph and return the extracted values. - # @param program [Ldpath::Program] an executable program that will extract a value from a graph - # @param graph [RDF::Graph] the graph from which the values will be extracted - # @param subject_uri [RDF::URI] retrieved values will be limited to those with the subject uri - # @param limit_to_context [Boolean] if true, the evaluation process will not make any outside network calls. - # It will limit results to those found in the context graph. - ## @return [Array] the extracted values based on the ldpath - def self.ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?) - return VALUE_ON_ERROR if program.blank? - output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context) - output.present? ? output['property'].uniq : nil - rescue => e - Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.evaluate_logger_error')} (cause: #{e.message}") - raise StandardError, I18n.t("qa.linked_data.ldpath.evaluate_error") + "... cause: #{e.message}" + def remap_string_values(property_name, values) + language = language_from(property_name) + values.map { |v| RDF::Literal.new(v, language: language) } + end end end end diff --git a/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb b/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb index 70db6d6c..1fa36d2d 100644 --- a/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb +++ b/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb @@ -19,9 +19,10 @@ class GraphLdpathMapperService # @example ldpath map # { # uri: :subject_uri, - # id: 'locid:lccn :: xsd::string', + # id: 'locid:lccn', # label: 'skos:prefLabel :: xsd::string', # altlabel: 'skos:altLabel :: xsd::string', + # sameas: 'skos:sameAs :: xsd::anyURI', # sort: 'vivo:rank :: xsd::integer' # } # @param subject_uri [RDF::URI] the subject within the graph for which the values are being extracted diff --git a/spec/models/linked_data/config/context_property_map_spec.rb b/spec/models/linked_data/config/context_property_map_spec.rb index 51cf1690..c990dc38 100644 --- a/spec/models/linked_data/config/context_property_map_spec.rb +++ b/spec/models/linked_data/config/context_property_map_spec.rb @@ -277,9 +277,9 @@ let(:expanded_id) { '123' } before do - allow(Ldpath::Program).to receive(:parse).with('property = madsrdf:identifiesRWO/madsrdf:birthDate/schema:label ;').and_return(basic_program) - allow(Ldpath::Program).to receive(:parse).with('property = skos:prefLabel ::xsd:string ;').and_return(expanded_label_program) - allow(Ldpath::Program).to receive(:parse).with('property = loc:lccn ::xsd:string ;').and_return(expanded_id_program) + allow(Ldpath::Program).to receive(:parse).with("property = madsrdf:identifiesRWO/madsrdf:birthDate/schema:label ;\n").and_return(basic_program) + allow(Ldpath::Program).to receive(:parse).with("property = skos:prefLabel ::xsd:string ;\n").and_return(expanded_label_program) + allow(Ldpath::Program).to receive(:parse).with("property = loc:lccn ::xsd:string ;\n").and_return(expanded_id_program) allow(basic_program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_return('property' => [expanded_uri]) allow(expanded_label_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_label]) allow(expanded_id_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_id]) diff --git a/spec/services/linked_data/ldpath_service_spec.rb b/spec/services/linked_data/ldpath_service_spec.rb index 17d8d5c3..877d321f 100644 --- a/spec/services/linked_data/ldpath_service_spec.rb +++ b/spec/services/linked_data/ldpath_service_spec.rb @@ -17,10 +17,13 @@ context 'when ldpath_program gets parse error' do let(:cause) { "undefined method `ascii_tree' for nil:NilClass" } let(:warning) { I18n.t('qa.linked_data.ldpath.parse_logger_error') } - let(:program_code) { "@prefix skos : ;\nproperty = skos:prefLabel ::xsd:string ;" } + let(:program_code) { "BAD_PROGRAM ;" } let(:log_message) { "WARNING: #{warning}... cause: #{cause}\n ldpath_program=\n#{program_code}" } - before { allow(Ldpath::Program).to receive(:parse).with(anything).and_raise(cause) } + before do + allow(described_class).to receive(:ldpath_program_code).with(anything).and_return(program_code) + allow(Ldpath::Program).to receive(:parse).with(anything).and_raise(cause) + end it 'logs error and returns PARSE ERROR as the value' do expect(Rails.logger).to receive(:warn).with(log_message) @@ -29,20 +32,149 @@ end end + describe '.ldpath_program_code' do + subject { described_class.ldpath_program_code(ldpath: ldpath, prefixes: prefixes, languages: languages) } + + context 'for a ldpath without language pattern' do + let(:ldpath) { 'dcterms:identifier' } + let(:languages) { [:fr] } + let(:prefixes) { { "dcterms" => "http://purl.org/dc/terms/" } } + it 'generates the simple program code' do + expected_program = <<-PROGRAM +@prefix dcterms : \; +property = dcterms:identifier \; +PROGRAM + expect(subject).to eq expected_program + end + end + + context 'for a ldpath with language pattern' do + let(:ldpath) { 'madsrdf:authoritativeLabel*LANG* ::xsd:string' } + let(:prefixes) { { "madsrdf" => "http://www.loc.gov/mads/rdf/v1#" } } + context 'and no languages specified' do + let(:languages) { nil } + it 'generates the simple program code' do + expected_program = <<-PROGRAM +@prefix madsrdf : \; +property = madsrdf:authoritativeLabel ::xsd:string \; +PROGRAM + expect(subject).to eq expected_program + end + end + + context 'and one language specified' do + let(:languages) { [:en] } + it 'generates a program with the language' do + expected_program = <<-PROGRAM +@prefix madsrdf : \; +en_property = madsrdf:authoritativeLabel[@en] ::xsd:string \; +property = madsrdf:authoritativeLabel[@none] ::xsd:string \; +PROGRAM + expect(subject).to eq expected_program + end + end + + context 'and multiple languages specified' do + let(:languages) { [:fr, :de] } + it 'generates a program with languages' do + expected_program = <<-PROGRAM +@prefix madsrdf : \; +fr_property = madsrdf:authoritativeLabel[@fr] ::xsd:string \; +de_property = madsrdf:authoritativeLabel[@de] ::xsd:string \; +property = madsrdf:authoritativeLabel[@none] ::xsd:string \; +PROGRAM + expect(subject).to eq expected_program + end + end + end + end + describe '.ldpath_evaluate' do subject { described_class.ldpath_evaluate(program: program, graph: graph, subject_uri: subject_uri) } let(:program) { instance_double(Ldpath::Program) } let(:graph) { instance_double(RDF::Graph) } let(:subject_uri) { instance_double(RDF::URI) } - let(:values) { ['Expanded Label'] } before do - allow(Ldpath::Program).to receive(:parse).with('property = skos:prefLabel ::xsd:string ;').and_return(program) - allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_return('property' => values) + allow(Ldpath::Program).to receive(:parse).with(anything).and_return(program) end - it 'returns the extracted label' do - expect(subject).to match_array values + + context 'when program does not contain languages' do + context 'and value is a string' do + let(:values) { ['value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true) + .and_return('property' => values) + end + it 'returns the string values as is' do + expected_values = values.map { |v| RDF::Literal.new(v) } + expect(subject).to match_array expected_values + end + end + + context 'and value is a URI' do + let(:values) { [RDF::URI.new('http://example.com/1'), RDF::URI.new('http://example.com/2')] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true) + .and_return('property' => values) + end + it 'returns the URIs' do + expected_values = values + expect(subject).to match_array expected_values + end + end + + context 'and value is numeric' do + let(:values) { [23, 14, 55] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true) + .and_return('property' => values) + end + it 'returns the URIs' do + expected_values = values + expect(subject).to match_array expected_values + end + end + end + + context 'when program has languages' do + context 'and one language specified' do + let(:en_values) { ['en_value'] } + let(:untagged_values) { ['untagged_value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true) + .and_return('en_property' => en_values, 'property' => untagged_values) + end + it 'generates a program with the language' do + expected_values = + en_values.map { |v| RDF::Literal.new(v, language: :en) } + + untagged_values.map { |v| RDF::Literal.new(v) } + expect(subject).to match_array expected_values + end + end + + context 'and multiple languages specified' do + let(:fr_values) { ['fr_value1', 'fr_value2', 'fr_value1'] } + let(:de_values) { ['de_value'] } + let(:untagged_values) { ['untagged_value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true) + .and_return('fr_property' => fr_values, 'de_property' => de_values, 'property' => untagged_values) + end + it 'returns the extracted label' do + expected_values = + (fr_values.uniq.map { |v| RDF::Literal.new(v, language: :fr) } + + de_values.map { |v| RDF::Literal.new(v, language: :de) } + + untagged_values.map { |v| RDF::Literal.new(v) }).uniq + expect(subject).to match_array expected_values + end + end end context 'when ldpath_evaluate gets parse error' do @@ -50,11 +182,18 @@ let(:warning) { I18n.t('qa.linked_data.ldpath.evaluate_logger_error') } let(:log_message) { "WARNING: #{warning} (cause: #{cause}" } - before { allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_raise(cause) } + before { allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_raise(ParseError, cause) } it 'logs error and returns PARSE ERROR as the value' do expect(Rails.logger).to receive(:warn).with(log_message) - expect { subject.values(graph, subject_uri) }.to raise_error StandardError, I18n.t('qa.linked_data.ldpath.evaluate_error') + "... cause: #{cause}" + expect { subject }.to raise_error ParseError, I18n.t('qa.linked_data.ldpath.evaluate_error') + "... cause: #{cause}" + end + end + + context 'when program is empty' do + let(:program) { nil } + it 'returns empty array' do + expect { subject }.to raise_error ArgumentError, "You must specify a program when calling ldpath_evaluate" end end end diff --git a/spec/services/linked_data/mapper/graph_ldpath_mapper_service_spec.rb b/spec/services/linked_data/mapper/graph_ldpath_mapper_service_spec.rb index ee1f6057..6b6c8686 100644 --- a/spec/services/linked_data/mapper/graph_ldpath_mapper_service_spec.rb +++ b/spec/services/linked_data/mapper/graph_ldpath_mapper_service_spec.rb @@ -38,11 +38,11 @@ expect(subject.keys).to match_array [:uri, :id, :label, :altlabel, :sameas, :sort] validate_entry(subject, :uri, [subject_uri.to_s], RDF::URI) - validate_entry(subject, :id, ['530369'], String) - validate_entry(subject, :label, ['Cornell University'], String) - validate_entry(subject, :altlabel, ['Ithaca (N.Y.). Cornell University'], String) + validate_entry(subject, :id, ['530369'], RDF::Literal) + validate_entry(subject, :label, ['Cornell University'], RDF::Literal) + validate_entry(subject, :altlabel, ['Ithaca (N.Y.). Cornell University'], RDF::Literal) validate_entry(subject, :sameas, ['http://id.loc.gov/authorities/names/n79021621'], RDF::URI) - validate_entry(subject, :sort, ['1'], String) + validate_entry(subject, :sort, ['1'], RDF::Literal) end end @@ -55,11 +55,11 @@ expect(subject.keys).to match_array [:uri, :id, :label, :altlabel, :sameas, :sort] validate_entry(subject, :uri, [subject_uri.to_s], RDF::URI) - validate_entry(subject, :id, ['510103'], String) - validate_entry(subject, :label, ['Cornell University. Libraries'], String) - validate_entry(subject, :altlabel, ['Cornell University. Central Libraries', 'Cornell University. John M. Olin Library', 'Cornell University. White Library'], String) + validate_entry(subject, :id, ['510103'], RDF::Literal) + validate_entry(subject, :label, ['Cornell University. Libraries'], RDF::Literal) + validate_entry(subject, :altlabel, ['Cornell University. Central Libraries', 'Cornell University. John M. Olin Library', 'Cornell University. White Library'], RDF::Literal) validate_entry(subject, :sameas, ['http://id.loc.gov/authorities/names/n50000040', 'https://viaf.org/viaf/147713418'], RDF::URI) - validate_entry(subject, :sort, ['2'], String) + validate_entry(subject, :sort, ['2'], RDF::Literal) end end @@ -72,11 +72,11 @@ expect(subject.keys).to match_array [:uri, :id, :label, :altlabel, :sameas, :sort] validate_entry(subject, :uri, [subject_uri.to_s], RDF::URI) - validate_entry(subject, :id, ['5140'], String) - validate_entry(subject, :label, ['Cornell, Joseph'], String) + validate_entry(subject, :id, ['5140'], RDF::Literal) + validate_entry(subject, :label, ['Cornell, Joseph'], RDF::Literal) validate_entry(subject, :altlabel, [], NilClass) validate_entry(subject, :sameas, [], NilClass) - validate_entry(subject, :sort, ['3'], String) + validate_entry(subject, :sort, ['3'], RDF::Literal) end end @@ -98,11 +98,11 @@ expect(subject.keys).to match_array [:uri, :id, :label, :altlabel, :sameas, :sort, :context] validate_entry(subject, :uri, [subject_uri.to_s], RDF::URI) - validate_entry(subject, :id, ['5140'], String) - validate_entry(subject, :label, ['Cornell, Joseph'], String) + validate_entry(subject, :id, ['5140'], RDF::Literal) + validate_entry(subject, :label, ['Cornell, Joseph'], RDF::Literal) validate_entry(subject, :altlabel, [], NilClass) validate_entry(subject, :sameas, [], NilClass) - validate_entry(subject, :sort, ['3'], String) + validate_entry(subject, :sort, ['3'], RDF::Literal) expect(subject[:context]).to be_kind_of Hash expect(subject[:context]).to include(context) From fce0947a0f18230ead75d8f92e9b113c1a63c622 Mon Sep 17 00:00:00 2001 From: "E. Lynette Rayle" Date: Wed, 20 Nov 2019 19:26:17 -0500 Subject: [PATCH 2/2] use maintain_literals in ldpath to support language sorting NOTE: This commit references a branch in ldpath. It needs to be removed when PR #18 is merged into the ldpath gem. --- .circleci/config.yml | 2 +- Gemfile | 2 + app/services/qa/linked_data/ldpath_service.rb | 6 +- .../mapper/graph_ldpath_mapper_service.rb | 2 +- qa.gemspec | 2 +- .../linked_data/lod_lang_defaults.json | 17 +- .../linked_data/lod_lang_multi_defaults.json | 17 +- .../linked_data/lod_lang_no_defaults.json | 15 +- .../linked_data/lod_lang_param.json | 18 +- .../linked_data/lod_min_config.json | 9 +- .../config/context_property_map_spec.rb | 6 +- .../linked_data/ldpath_service_spec.rb | 206 +++++++++++++----- 12 files changed, 204 insertions(+), 98 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index dce96104..73cb0b40 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,7 +27,7 @@ jobs: project: qa - samvera/engine_cart_generate: - cache_key: v7-internal-test-app-{{ checksum "qa.gemspec" }}-{{ checksum "spec/test_app_templates/lib/generators/test_app_generator.rb" }}-{{ checksum "lib/generators/qa/install/install_generator.rb" }}-<< parameters.rails_version >>-<< parameters.ruby_version >> + cache_key: v10-internal-test-app-{{ checksum "qa.gemspec" }}-{{ checksum "spec/test_app_templates/lib/generators/test_app_generator.rb" }}-{{ checksum "lib/generators/qa/install/install_generator.rb" }}-<< parameters.rails_version >>-<< parameters.ruby_version >> - samvera/bundle_for_gem: ruby_version: << parameters.ruby_version >> diff --git a/Gemfile b/Gemfile index 4855aecd..9343a95e 100644 --- a/Gemfile +++ b/Gemfile @@ -7,6 +7,8 @@ group :development, :test do gem 'simplecov', require: false end +gem 'ldpath', github: 'samvera-labs/ldpath', branch: 'maintain_literals' + # BEGIN ENGINE_CART BLOCK # engine_cart: 0.10.0 # engine_cart stanza: 0.10.0 diff --git a/app/services/qa/linked_data/ldpath_service.rb b/app/services/qa/linked_data/ldpath_service.rb index 7a7499eb..67bf1296 100644 --- a/app/services/qa/linked_data/ldpath_service.rb +++ b/app/services/qa/linked_data/ldpath_service.rb @@ -42,10 +42,10 @@ def ldpath_program_code(ldpath:, prefixes: {}, languages: []) # @param limit_to_context [Boolean] if true, the evaluation process will not make any outside network calls. # It will limit results to those found in the context graph. ## @return [Array] the extracted values based on the ldpath - def ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?) + def ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?, maintain_literals: false) raise ArgumentError, "You must specify a program when calling ldpath_evaluate" if program.blank? - output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context) - property_implode(output) + output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context, maintain_literals: maintain_literals) + maintain_literals ? property_implode(output) : output.values.flatten.uniq rescue ParseError => e Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.evaluate_logger_error')} (cause: #{e.message}") raise ParseError, I18n.t("qa.linked_data.ldpath.evaluate_error") + "... cause: #{e.message}" diff --git a/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb b/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb index 1fa36d2d..32acc4c4 100644 --- a/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb +++ b/app/services/qa/linked_data/mapper/graph_ldpath_mapper_service.rb @@ -38,7 +38,7 @@ def self.map_values(graph:, ldpath_map:, subject_uri:, prefixes: {}) ldpath_map.each do |key, ldpath| next value_map[key] = [subject_uri] if ldpath == :subject_uri ldpath_program = ldpath_service.ldpath_program(ldpath: ldpath, prefixes: prefixes) - values = ldpath_service.ldpath_evaluate(program: ldpath_program, graph: graph, subject_uri: subject_uri) + values = ldpath_service.ldpath_evaluate(program: ldpath_program, graph: graph, subject_uri: subject_uri, maintain_literals: true) value_map[key] = values end value_map = yield value_map if block_given? diff --git a/qa.gemspec b/qa.gemspec index 65af7b34..b5029991 100644 --- a/qa.gemspec +++ b/qa.gemspec @@ -19,7 +19,7 @@ Gem::Specification.new do |s| s.add_dependency 'activerecord-import' s.add_dependency 'deprecation' s.add_dependency 'faraday' - s.add_dependency 'ldpath' + # s.add_dependency 'ldpath' s.add_dependency 'nokogiri', '~> 1.6' s.add_dependency 'rails', '~> 5.0' s.add_dependency 'rdf' diff --git a/spec/fixtures/authorities/linked_data/lod_lang_defaults.json b/spec/fixtures/authorities/linked_data/lod_lang_defaults.json index e8ce281b..22456b87 100644 --- a/spec/fixtures/authorities/linked_data/lod_lang_defaults.json +++ b/spec/fixtures/authorities/linked_data/lod_lang_defaults.json @@ -1,5 +1,8 @@ { "QA_CONFIG_VERSION": "2.0", + "prefixes": { + "dcterms": "http://purl.org/dc/terms/" + }, "term": { "url": { "@context": "http://www.w3.org/ns/hydra/context.jsonld", @@ -21,9 +24,9 @@ "term_id": "URI", "language": [ "fr" ], "results": { - "id_predicate": "http://id.loc.gov/vocabulary/identifiers/lccn", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel" + "id_ldpath": "loc:lccn", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel" } }, "search": { @@ -46,10 +49,10 @@ }, "language": [ "fr" ], "results": { - "id_predicate": "http://purl.org/dc/terms/identifier", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel", - "sort_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel" + "id_ldpath": "dcterms:identifier", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel", + "sort_ldpath": "skos:prefLabel" } } } diff --git a/spec/fixtures/authorities/linked_data/lod_lang_multi_defaults.json b/spec/fixtures/authorities/linked_data/lod_lang_multi_defaults.json index 91264cb8..d7660b5d 100644 --- a/spec/fixtures/authorities/linked_data/lod_lang_multi_defaults.json +++ b/spec/fixtures/authorities/linked_data/lod_lang_multi_defaults.json @@ -1,5 +1,8 @@ { "QA_CONFIG_VERSION": "2.0", + "prefixes": { + "dcterms": "http://purl.org/dc/terms/" + }, "term": { "url": { "@context": "http://www.w3.org/ns/hydra/context.jsonld", @@ -21,9 +24,9 @@ "term_id": "URI", "language": [ "en", "fr" ], "results": { - "id_predicate": "http://purl.org/dc/terms/identifier", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel" + "id_ldpath": "dcterms:identifier", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel" } }, "search": { @@ -46,10 +49,10 @@ }, "language": [ "en", "fr" ], "results": { - "id_predicate": "http://purl.org/dc/terms/identifier", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel", - "sort_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel" + "id_ldpath": "dcterms:identifier", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel", + "sort_ldpath": "skos:prefLabel" } } } diff --git a/spec/fixtures/authorities/linked_data/lod_lang_no_defaults.json b/spec/fixtures/authorities/linked_data/lod_lang_no_defaults.json index 3f729c40..517abd56 100644 --- a/spec/fixtures/authorities/linked_data/lod_lang_no_defaults.json +++ b/spec/fixtures/authorities/linked_data/lod_lang_no_defaults.json @@ -1,5 +1,8 @@ { "QA_CONFIG_VERSION": "2.0", + "prefixes": { + "dcterms": "http://purl.org/dc/terms/" + }, "term": { "url": { "@context": "http://www.w3.org/ns/hydra/context.jsonld", @@ -20,8 +23,8 @@ }, "term_id": "URI", "results": { - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel" + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel" } }, "search": { @@ -43,10 +46,10 @@ "query": "query" }, "results": { - "id_predicate": "http://purl.org/dc/terms/identifier", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel", - "sort_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel" + "id_ldpath": "dcterms:identifier", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel", + "sort_ldpath": "skos:prefLabel" } } } diff --git a/spec/fixtures/authorities/linked_data/lod_lang_param.json b/spec/fixtures/authorities/linked_data/lod_lang_param.json index 7049e4b3..8b0a9ea7 100644 --- a/spec/fixtures/authorities/linked_data/lod_lang_param.json +++ b/spec/fixtures/authorities/linked_data/lod_lang_param.json @@ -1,5 +1,9 @@ { "QA_CONFIG_VERSION": "2.0", + "prefixes": { + "dcterms": "http://purl.org/dc/terms/", + "loc": "http://id.loc.gov/vocabulary/identifiers/" + }, "term": { "url": { "@context": "http://www.w3.org/ns/hydra/context.jsonld", @@ -27,9 +31,9 @@ }, "term_id": "URI", "results": { - "id_predicate": "http://id.loc.gov/vocabulary/identifiers/lccn", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel" + "id_ldpath": "loc:lccn", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel" } }, "search": { @@ -58,10 +62,10 @@ "query": "query" }, "results": { - "id_predicate": "http://purl.org/dc/terms/identifier", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel", - "altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel", - "sort_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel" + "id_ldpath": "dcterms:identifier", + "label_ldpath": "skos:prefLabel", + "altlabel_ldpath": "skos:altLabel", + "sort_ldpath": "skos:prefLabel" } } } diff --git a/spec/fixtures/authorities/linked_data/lod_min_config.json b/spec/fixtures/authorities/linked_data/lod_min_config.json index 257bb371..78624a13 100644 --- a/spec/fixtures/authorities/linked_data/lod_min_config.json +++ b/spec/fixtures/authorities/linked_data/lod_min_config.json @@ -1,5 +1,8 @@ { "QA_CONFIG_VERSION": "2.0", + "prefixes": { + "loc": "http://id.loc.gov/vocabulary/identifiers/" + }, "term": { "url": { "@context": "http://www.w3.org/ns/hydra/context.jsonld", @@ -20,8 +23,8 @@ }, "term_id": "URI", "results": { - "id_predicate": "http://id.loc.gov/vocabulary/identifiers/lccn", - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel" + "id_ldpath": "loc:lccn", + "label_ldpath": "skos:prefLabel" } }, "search": { @@ -43,7 +46,7 @@ "query": "query" }, "results": { - "label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel" + "label_ldpath": "skos:prefLabel" } } } diff --git a/spec/models/linked_data/config/context_property_map_spec.rb b/spec/models/linked_data/config/context_property_map_spec.rb index c990dc38..5380d897 100644 --- a/spec/models/linked_data/config/context_property_map_spec.rb +++ b/spec/models/linked_data/config/context_property_map_spec.rb @@ -280,9 +280,9 @@ allow(Ldpath::Program).to receive(:parse).with("property = madsrdf:identifiesRWO/madsrdf:birthDate/schema:label ;\n").and_return(basic_program) allow(Ldpath::Program).to receive(:parse).with("property = skos:prefLabel ::xsd:string ;\n").and_return(expanded_label_program) allow(Ldpath::Program).to receive(:parse).with("property = loc:lccn ::xsd:string ;\n").and_return(expanded_id_program) - allow(basic_program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_return('property' => [expanded_uri]) - allow(expanded_label_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_label]) - allow(expanded_id_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_id]) + allow(basic_program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true, maintain_literals: false).and_return('property' => [expanded_uri]) + allow(expanded_label_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true, maintain_literals: false).and_return('property' => [expanded_label]) + allow(expanded_id_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true, maintain_literals: false).and_return('property' => [expanded_id]) end it 'returns the uri, id, label for the expanded uri value' do expanded_values = subject.expanded_values(graph, subject_uri).first diff --git a/spec/services/linked_data/ldpath_service_spec.rb b/spec/services/linked_data/ldpath_service_spec.rb index 877d321f..268da57f 100644 --- a/spec/services/linked_data/ldpath_service_spec.rb +++ b/spec/services/linked_data/ldpath_service_spec.rb @@ -90,7 +90,7 @@ end describe '.ldpath_evaluate' do - subject { described_class.ldpath_evaluate(program: program, graph: graph, subject_uri: subject_uri) } + subject { described_class.ldpath_evaluate(program: program, graph: graph, subject_uri: subject_uri, maintain_literals: maintain_literals) } let(:program) { instance_double(Ldpath::Program) } let(:graph) { instance_double(RDF::Graph) } @@ -100,79 +100,161 @@ allow(Ldpath::Program).to receive(:parse).with(anything).and_return(program) end - context 'when program does not contain languages' do - context 'and value is a string' do - let(:values) { ['value'] } - before do - allow(program).to receive(:evaluate) - .with(subject_uri, context: graph, limit_to_context: true) - .and_return('property' => values) - end - it 'returns the string values as is' do - expected_values = values.map { |v| RDF::Literal.new(v) } - expect(subject).to match_array expected_values + context 'when program does not request languages' do + context 'and not maintaining literals' do + let(:maintain_literals) { false } + + context 'and value is a string' do + let(:values) { ['value', 'value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('property' => values) + end + it 'returns the string values as is' do + expected_values = ['value'] + expect(subject).to match_array expected_values + end end - end - context 'and value is a URI' do - let(:values) { [RDF::URI.new('http://example.com/1'), RDF::URI.new('http://example.com/2')] } - before do - allow(program).to receive(:evaluate) - .with(subject_uri, context: graph, limit_to_context: true) - .and_return('property' => values) + context 'and value is a URI' do + let(:values) { ['http://example.com/1', 'http://example.com/2'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('property' => values) + end + it 'returns the URIs' do + expected_values = values + expect(subject).to match_array expected_values + end end - it 'returns the URIs' do - expected_values = values - expect(subject).to match_array expected_values + + context 'and value is numeric' do + let(:values) { [23, 14, 55] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('property' => values) + end + it 'returns the numeric values' do + expected_values = values + expect(subject).to match_array expected_values + end end end - context 'and value is numeric' do - let(:values) { [23, 14, 55] } - before do - allow(program).to receive(:evaluate) - .with(subject_uri, context: graph, limit_to_context: true) - .and_return('property' => values) + context 'and maintaining literals' do + let(:maintain_literals) { true } + + context 'and value is a string' do + let(:values) { [RDF::Literal.new('value'), RDF::Literal.new('value')] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('property' => values) + end + it 'returns the string values as is' do + expected_values = [RDF::Literal.new('value')] + expect(subject).to match_array expected_values + end end - it 'returns the URIs' do - expected_values = values - expect(subject).to match_array expected_values + + context 'and value is a URI' do + let(:values) { [RDF::URI.new('http://example.com/1'), RDF::URI.new('http://example.com/2')] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('property' => values) + end + it 'returns the URIs' do + expect(subject).to match_array values + end + end + + context 'and value is numeric' do + let(:values) { [RDF::Literal.new(23), RDF::Literal.new(14), RDF::Literal.new(55)] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('property' => values) + end + it 'returns the numeric values' do + expect(subject).to match_array values + end end end end context 'when program has languages' do - context 'and one language specified' do - let(:en_values) { ['en_value'] } - let(:untagged_values) { ['untagged_value'] } - before do - allow(program).to receive(:evaluate) - .with(subject_uri, context: graph, limit_to_context: true) - .and_return('en_property' => en_values, 'property' => untagged_values) + context 'and not maintaining literals' do + let(:maintain_literals) { false } + + context 'and one language specified' do + let(:en_values) { ['en_value'] } + let(:untagged_values) { ['untagged_value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('en_property' => en_values, 'property' => untagged_values) + end + it 'generates a program with the language' do + expected_values = en_values + untagged_values + expect(subject).to match_array expected_values + end end - it 'generates a program with the language' do - expected_values = - en_values.map { |v| RDF::Literal.new(v, language: :en) } + - untagged_values.map { |v| RDF::Literal.new(v) } - expect(subject).to match_array expected_values + + context 'and multiple languages specified' do + let(:fr_values) { ['fr_value1', 'fr_value2', 'fr_value1'] } + let(:de_values) { ['de_value'] } + let(:untagged_values) { ['untagged_value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('fr_property' => fr_values, 'de_property' => de_values, 'property' => untagged_values) + end + it 'returns the extracted label' do + expected_values = fr_values.uniq + de_values + untagged_values + expect(subject).to match_array expected_values + end end end - context 'and multiple languages specified' do - let(:fr_values) { ['fr_value1', 'fr_value2', 'fr_value1'] } - let(:de_values) { ['de_value'] } - let(:untagged_values) { ['untagged_value'] } - before do - allow(program).to receive(:evaluate) - .with(subject_uri, context: graph, limit_to_context: true) - .and_return('fr_property' => fr_values, 'de_property' => de_values, 'property' => untagged_values) + context 'and maintaining literals' do + let(:maintain_literals) { true } + + context 'and one language specified' do + let(:en_values) { ['en_value'] } + let(:untagged_values) { ['untagged_value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('en_property' => en_values, 'property' => untagged_values) + end + it 'generates a program with the language' do + expected_values = + en_values.map { |v| RDF::Literal.new(v, language: :en) } + + untagged_values.map { |v| RDF::Literal.new(v) } + expect(subject).to match_array expected_values + end end - it 'returns the extracted label' do - expected_values = - (fr_values.uniq.map { |v| RDF::Literal.new(v, language: :fr) } + - de_values.map { |v| RDF::Literal.new(v, language: :de) } + - untagged_values.map { |v| RDF::Literal.new(v) }).uniq - expect(subject).to match_array expected_values + + context 'and multiple languages specified' do + let(:fr_values) { ['fr_value1', 'fr_value2', 'fr_value1'] } + let(:de_values) { ['de_value'] } + let(:untagged_values) { ['untagged_value'] } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: maintain_literals) + .and_return('fr_property' => fr_values, 'de_property' => de_values, 'property' => untagged_values) + end + it 'returns the extracted label' do + expected_values = + (fr_values.uniq.map { |v| RDF::Literal.new(v, language: :fr) } + + de_values.map { |v| RDF::Literal.new(v, language: :de) } + + untagged_values.map { |v| RDF::Literal.new(v) }).uniq + expect(subject).to match_array expected_values + end end end end @@ -181,8 +263,13 @@ let(:cause) { "unknown cause" } let(:warning) { I18n.t('qa.linked_data.ldpath.evaluate_logger_error') } let(:log_message) { "WARNING: #{warning} (cause: #{cause}" } + let(:maintain_literals) { false } - before { allow(program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_raise(ParseError, cause) } + before do + allow(program).to receive(:evaluate) + .with(subject_uri, context: graph, limit_to_context: true, maintain_literals: false) + .and_raise(ParseError, cause) + end it 'logs error and returns PARSE ERROR as the value' do expect(Rails.logger).to receive(:warn).with(log_message) @@ -192,7 +279,8 @@ context 'when program is empty' do let(:program) { nil } - it 'returns empty array' do + let(:maintain_literals) { false } + it 'raise ArgumentError' do expect { subject }.to raise_error ArgumentError, "You must specify a program when calling ldpath_evaluate" end end