Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] add language processing to ldpath service #284

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
project: qa

- samvera/engine_cart_generate:
cache_key: v7-internal-test-app-{{ checksum "qa.gemspec" }}-{{ checksum "spec/test_app_templates/lib/generators/test_app_generator.rb" }}-{{ checksum "lib/generators/qa/install/install_generator.rb" }}-<< parameters.rails_version >>-<< parameters.ruby_version >>
cache_key: v10-internal-test-app-{{ checksum "qa.gemspec" }}-{{ checksum "spec/test_app_templates/lib/generators/test_app_generator.rb" }}-{{ checksum "lib/generators/qa/install/install_generator.rb" }}-<< parameters.rails_version >>-<< parameters.ruby_version >>

- samvera/bundle_for_gem:
ruby_version: << parameters.ruby_version >>
Expand Down
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ group :development, :test do
gem 'simplecov', require: false
end

gem 'ldpath', github: 'samvera-labs/ldpath', branch: 'maintain_literals'

# BEGIN ENGINE_CART BLOCK
# engine_cart: 0.10.0
# engine_cart stanza: 0.10.0
Expand Down
102 changes: 74 additions & 28 deletions app/services/qa/linked_data/ldpath_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,85 @@
module Qa
module LinkedData
class LdpathService
VALUE_ON_ERROR = [].freeze
LANGUAGE_PATTERN = "*LANG*".freeze
PROPERTY_NAME = "property".freeze

class_attribute :predefined_prefixes
self.predefined_prefixes = Ldpath::Transform.default_prefixes.with_indifferent_access

# Create the ldpath program for a given ldpath.
# @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html)
# @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" })
# @return [Ldpath::Program] an executable program that will extract a value from a graph
def self.ldpath_program(ldpath:, prefixes: {})
program_code = ""
prefixes.each { |key, url| program_code << "@prefix #{key} : <#{url}> \;\n" }
program_code << "property = #{ldpath} \;"
Ldpath::Program.parse program_code
rescue => e
Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.parse_logger_error')}... cause: #{e.message}\n ldpath_program=\n#{program_code}")
raise StandardError, I18n.t("qa.linked_data.ldpath.parse_error") + "... cause: #{e.message}"
end
class << self
# Create the ldpath program for a given ldpath.
# @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html)
# @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" })
# @param languages [Array<Symbol>] limit results to these languages and anything not tagged (applies to ldpaths with *LANG* marker)
# @return [Ldpath::Program] an executable program that will extract a value from a graph
def ldpath_program(ldpath:, prefixes: {}, languages: [])
program_code = ldpath_program_code(ldpath: ldpath, prefixes: prefixes, languages: languages)
Ldpath::Program.parse program_code
rescue => e
Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.parse_logger_error')}... cause: #{e.message}\n ldpath_program=\n#{program_code}")
raise StandardError, I18n.t("qa.linked_data.ldpath.parse_error") + "... cause: #{e.message}"
end

# Create the program code for a given ldpath.
# @param ldpath [String] ldpath to follow to get a value from a graph (documation: http://marmotta.apache.org/ldpath/language.html)
# @param prefixes [Hash] shortcut names for URI prefixes with key = part of predicate that is the same for all terms (e.g. { "madsrdf": "http://www.loc.gov/mads/rdf/v1#" })
# @param languages [Array<Symbol>] limit results to these languages and anything not tagged (applies to ldpaths with *LANG* marker)
# @return [String] the program code string used with Ldpath::Program.parse
def ldpath_program_code(ldpath:, prefixes: {}, languages: [])
program_code = ""
prefixes.each { |key, url| program_code << "@prefix #{key} : <#{url}> \;\n" }
property_explode(program_code, ldpath, languages)
end

# Evaluate an ldpath for a specific subject uri in the context of a graph and return the extracted values.
# @param program [Ldpath::Program] an executable program that will extract a value from a graph
# @param graph [RDF::Graph] the graph from which the values will be extracted
# @param subject_uri [RDF::URI] retrieved values will be limited to those with the subject uri
# @param limit_to_context [Boolean] if true, the evaluation process will not make any outside network calls.
# It will limit results to those found in the context graph.
## @return [Array<RDF::Literal>] the extracted values based on the ldpath
def ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?, maintain_literals: false)
raise ArgumentError, "You must specify a program when calling ldpath_evaluate" if program.blank?
output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context, maintain_literals: maintain_literals)
maintain_literals ? property_implode(output) : output.values.flatten.uniq
rescue ParseError => e
Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.evaluate_logger_error')} (cause: #{e.message}")
raise ParseError, I18n.t("qa.linked_data.ldpath.evaluate_error") + "... cause: #{e.message}"
end

private

# create program code with a property per language + untagged
def property_explode(program_code, ldpath, languages)
return program_code << "#{PROPERTY_NAME} = #{ldpath} \;\n" unless ldpath.index(LANGUAGE_PATTERN)
return program_code << "#{PROPERTY_NAME} = #{ldpath.gsub(LANGUAGE_PATTERN, '')} \;\n" unless languages.present?
languages.map { |language| program_code << "#{property_name_for(language)} = #{ldpath.gsub(LANGUAGE_PATTERN, "[@#{language}]")} \;\n" }
program_code << "#{PROPERTY_NAME} = #{ldpath.gsub(LANGUAGE_PATTERN, '[@none]')} \;\n"
end

# flatten all properties and turn into RDF::Literals with language tagging if appropriate
def property_implode(output)
return nil if output.blank?
output.each do |property_name, values|
output[property_name] = remap_string_values(property_name, values) if values.first.is_a? String
end
output.values.flatten.uniq
end

def property_name_for(language)
"#{language}_#{PROPERTY_NAME}"
end

def language_from(property_name)
return nil if property_name.casecmp?(PROPERTY_NAME)
property_name.chomp("_#{PROPERTY_NAME}")
end

# Evaluate an ldpath for a specific subject uri in the context of a graph and return the extracted values.
# @param program [Ldpath::Program] an executable program that will extract a value from a graph
# @param graph [RDF::Graph] the graph from which the values will be extracted
# @param subject_uri [RDF::URI] retrieved values will be limited to those with the subject uri
# @param limit_to_context [Boolean] if true, the evaluation process will not make any outside network calls.
# It will limit results to those found in the context graph.
## @return [Array<String>] the extracted values based on the ldpath
def self.ldpath_evaluate(program:, graph:, subject_uri:, limit_to_context: Qa.config.limit_ldpath_to_context?)
return VALUE_ON_ERROR if program.blank?
output = program.evaluate(subject_uri, context: graph, limit_to_context: limit_to_context)
output.present? ? output['property'].uniq : nil
rescue => e
Rails.logger.warn("WARNING: #{I18n.t('qa.linked_data.ldpath.evaluate_logger_error')} (cause: #{e.message}")
raise StandardError, I18n.t("qa.linked_data.ldpath.evaluate_error") + "... cause: #{e.message}"
def remap_string_values(property_name, values)
language = language_from(property_name)
values.map { |v| RDF::Literal.new(v, language: language) }
end
end
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ class GraphLdpathMapperService
# @example ldpath map
# {
# uri: :subject_uri,
# id: 'locid:lccn :: xsd::string',
# id: 'locid:lccn',
# label: 'skos:prefLabel :: xsd::string',
# altlabel: 'skos:altLabel :: xsd::string',
# sameas: 'skos:sameAs :: xsd::anyURI',
# sort: 'vivo:rank :: xsd::integer'
# }
# @param subject_uri [RDF::URI] the subject within the graph for which the values are being extracted
Expand All @@ -37,7 +38,7 @@ def self.map_values(graph:, ldpath_map:, subject_uri:, prefixes: {})
ldpath_map.each do |key, ldpath|
next value_map[key] = [subject_uri] if ldpath == :subject_uri
ldpath_program = ldpath_service.ldpath_program(ldpath: ldpath, prefixes: prefixes)
values = ldpath_service.ldpath_evaluate(program: ldpath_program, graph: graph, subject_uri: subject_uri)
values = ldpath_service.ldpath_evaluate(program: ldpath_program, graph: graph, subject_uri: subject_uri, maintain_literals: true)
value_map[key] = values
end
value_map = yield value_map if block_given?
Expand Down
2 changes: 1 addition & 1 deletion qa.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Gem::Specification.new do |s|
s.add_dependency 'activerecord-import'
s.add_dependency 'deprecation'
s.add_dependency 'faraday'
s.add_dependency 'ldpath'
# s.add_dependency 'ldpath'
s.add_dependency 'nokogiri', '~> 1.6'
s.add_dependency 'rails', '~> 5.0'
s.add_dependency 'rdf'
Expand Down
17 changes: 10 additions & 7 deletions spec/fixtures/authorities/linked_data/lod_lang_defaults.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
{
"QA_CONFIG_VERSION": "2.0",
"prefixes": {
"dcterms": "http://purl.org/dc/terms/"
},
"term": {
"url": {
"@context": "http://www.w3.org/ns/hydra/context.jsonld",
Expand All @@ -21,9 +24,9 @@
"term_id": "URI",
"language": [ "fr" ],
"results": {
"id_predicate": "http://id.loc.gov/vocabulary/identifiers/lccn",
"label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel"
"id_ldpath": "loc:lccn",
"label_ldpath": "skos:prefLabel",
"altlabel_ldpath": "skos:altLabel"
}
},
"search": {
Expand All @@ -46,10 +49,10 @@
},
"language": [ "fr" ],
"results": {
"id_predicate": "http://purl.org/dc/terms/identifier",
"label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel",
"sort_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel"
"id_ldpath": "dcterms:identifier",
"label_ldpath": "skos:prefLabel",
"altlabel_ldpath": "skos:altLabel",
"sort_ldpath": "skos:prefLabel"
}
}
}
17 changes: 10 additions & 7 deletions spec/fixtures/authorities/linked_data/lod_lang_multi_defaults.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
{
"QA_CONFIG_VERSION": "2.0",
"prefixes": {
"dcterms": "http://purl.org/dc/terms/"
},
"term": {
"url": {
"@context": "http://www.w3.org/ns/hydra/context.jsonld",
Expand All @@ -21,9 +24,9 @@
"term_id": "URI",
"language": [ "en", "fr" ],
"results": {
"id_predicate": "http://purl.org/dc/terms/identifier",
"label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel"
"id_ldpath": "dcterms:identifier",
"label_ldpath": "skos:prefLabel",
"altlabel_ldpath": "skos:altLabel"
}
},
"search": {
Expand All @@ -46,10 +49,10 @@
},
"language": [ "en", "fr" ],
"results": {
"id_predicate": "http://purl.org/dc/terms/identifier",
"label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel",
"sort_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel"
"id_ldpath": "dcterms:identifier",
"label_ldpath": "skos:prefLabel",
"altlabel_ldpath": "skos:altLabel",
"sort_ldpath": "skos:prefLabel"
}
}
}
15 changes: 9 additions & 6 deletions spec/fixtures/authorities/linked_data/lod_lang_no_defaults.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
{
"QA_CONFIG_VERSION": "2.0",
"prefixes": {
"dcterms": "http://purl.org/dc/terms/"
},
"term": {
"url": {
"@context": "http://www.w3.org/ns/hydra/context.jsonld",
Expand All @@ -20,8 +23,8 @@
},
"term_id": "URI",
"results": {
"label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel"
"label_ldpath": "skos:prefLabel",
"altlabel_ldpath": "skos:altLabel"
}
},
"search": {
Expand All @@ -43,10 +46,10 @@
"query": "query"
},
"results": {
"id_predicate": "http://purl.org/dc/terms/identifier",
"label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel",
"sort_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel"
"id_ldpath": "dcterms:identifier",
"label_ldpath": "skos:prefLabel",
"altlabel_ldpath": "skos:altLabel",
"sort_ldpath": "skos:prefLabel"
}
}
}
18 changes: 11 additions & 7 deletions spec/fixtures/authorities/linked_data/lod_lang_param.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"QA_CONFIG_VERSION": "2.0",
"prefixes": {
"dcterms": "http://purl.org/dc/terms/",
"loc": "http://id.loc.gov/vocabulary/identifiers/"
},
"term": {
"url": {
"@context": "http://www.w3.org/ns/hydra/context.jsonld",
Expand Down Expand Up @@ -27,9 +31,9 @@
},
"term_id": "URI",
"results": {
"id_predicate": "http://id.loc.gov/vocabulary/identifiers/lccn",
"label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel"
"id_ldpath": "loc:lccn",
"label_ldpath": "skos:prefLabel",
"altlabel_ldpath": "skos:altLabel"
}
},
"search": {
Expand Down Expand Up @@ -58,10 +62,10 @@
"query": "query"
},
"results": {
"id_predicate": "http://purl.org/dc/terms/identifier",
"label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel",
"altlabel_predicate": "http://www.w3.org/2004/02/skos/core#altLabel",
"sort_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel"
"id_ldpath": "dcterms:identifier",
"label_ldpath": "skos:prefLabel",
"altlabel_ldpath": "skos:altLabel",
"sort_ldpath": "skos:prefLabel"
}
}
}
9 changes: 6 additions & 3 deletions spec/fixtures/authorities/linked_data/lod_min_config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
{
"QA_CONFIG_VERSION": "2.0",
"prefixes": {
"loc": "http://id.loc.gov/vocabulary/identifiers/"
},
"term": {
"url": {
"@context": "http://www.w3.org/ns/hydra/context.jsonld",
Expand All @@ -20,8 +23,8 @@
},
"term_id": "URI",
"results": {
"id_predicate": "http://id.loc.gov/vocabulary/identifiers/lccn",
"label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel"
"id_ldpath": "loc:lccn",
"label_ldpath": "skos:prefLabel"
}
},
"search": {
Expand All @@ -43,7 +46,7 @@
"query": "query"
},
"results": {
"label_predicate": "http://www.w3.org/2004/02/skos/core#prefLabel"
"label_ldpath": "skos:prefLabel"
}
}
}
12 changes: 6 additions & 6 deletions spec/models/linked_data/config/context_property_map_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -277,12 +277,12 @@
let(:expanded_id) { '123' }

before do
allow(Ldpath::Program).to receive(:parse).with('property = madsrdf:identifiesRWO/madsrdf:birthDate/schema:label ;').and_return(basic_program)
allow(Ldpath::Program).to receive(:parse).with('property = skos:prefLabel ::xsd:string ;').and_return(expanded_label_program)
allow(Ldpath::Program).to receive(:parse).with('property = loc:lccn ::xsd:string ;').and_return(expanded_id_program)
allow(basic_program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true).and_return('property' => [expanded_uri])
allow(expanded_label_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_label])
allow(expanded_id_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true).and_return('property' => [expanded_id])
allow(Ldpath::Program).to receive(:parse).with("property = madsrdf:identifiesRWO/madsrdf:birthDate/schema:label ;\n").and_return(basic_program)
allow(Ldpath::Program).to receive(:parse).with("property = skos:prefLabel ::xsd:string ;\n").and_return(expanded_label_program)
allow(Ldpath::Program).to receive(:parse).with("property = loc:lccn ::xsd:string ;\n").and_return(expanded_id_program)
allow(basic_program).to receive(:evaluate).with(subject_uri, context: graph, limit_to_context: true, maintain_literals: false).and_return('property' => [expanded_uri])
allow(expanded_label_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true, maintain_literals: false).and_return('property' => [expanded_label])
allow(expanded_id_program).to receive(:evaluate).with(RDF::URI.new(subject_uri), context: graph, limit_to_context: true, maintain_literals: false).and_return('property' => [expanded_id])
end
it 'returns the uri, id, label for the expanded uri value' do
expanded_values = subject.expanded_values(graph, subject_uri).first
Expand Down
Loading