Skip to content

Commit

Permalink
Merge branch 'CW-2555-schema-updates' into 'dev'
Browse files Browse the repository at this point in the history
CW-2555 Schema updates

Closes CW-2555

See merge request epi2melabs/workflows/wf-artic!146
  • Loading branch information
vlshesketh committed Aug 9, 2023
2 parents cab982c + c490464 commit b27c17e
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 15 deletions.
158 changes: 146 additions & 12 deletions lib/NfcoreSchema.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@ class NfcoreSchema {
println ''
log.error 'ERROR: Validation of pipeline parameters failed!'
JSONObject exceptionJSON = e.toJSON()
printExceptions(exceptionJSON, params_json, log, enums)
println ''
HashSet<String> observed_exceptions = []
printExceptions(exceptionJSON, params_json, log, enums, raw_schema, observed_exceptions)
has_error = true
}

Expand All @@ -196,7 +196,7 @@ class NfcoreSchema {
warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}"
}
log.warn warn_msg
log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}"
log.info "${colors.dim}- Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}"
println ''
}

Expand Down Expand Up @@ -361,16 +361,150 @@ class NfcoreSchema {
return output
}

// Traverse the raw_schema to resolve a $ref, returning a JSONObject or JSONArray.
private static def getSchemaLocation(raw_schema, schema_components) {
// a quick search did not reveal how to resolve a $ref inside a
// JSONObject so we'll go with traversing it ourselves!
def head = raw_schema
for (component in schema_components) {
if (component == '#') {
continue
}
else if (component.isNumber()) {
// assume numeric components are indexing an array
head = head[component as int]
}
else {
// otherwise grab by key
head = head[component]
}
}
return head
}

// Return a map of dependencies for a path in a schema, that relate to a key that is known to be missing.
// Each key in the map is a parameter that has been switched on and has a requirement on the missing key.
// The value of each key in the map is a list of all parameters that are required by the corresponding key.
private static HashMap<String, String> getSchemaDependencies(raw_schema, schema_components, missing_key) {
def head = getSchemaLocation(raw_schema, schema_components[1..-1] + 'dependencies')
HashMap<String, String> requirements = []
for (key in head.keySet()) {
if (head[key].contains(missing_key)) {
requirements[key] = head[key]
}
}
return requirements
}

// Return a list of all required parameters that are adjacent to a given path in a schema
private static ArrayList getSchemaRequirements(raw_schema, schema_components) {
def head = getSchemaLocation(raw_schema, schema_components[1..-2])
ArrayList requirements = []
for (entry in head) {
// this could be an allOf with more allOf/oneOf/anyOf inside
// we'll ignore anything that isn't a required key
if (entry.has("required")) {
requirements += entry["required"]
}
}
return requirements
}

//
// Loop over nested exceptions and print the causingException
//
private static void printExceptions(ex_json, params_json, log, enums) {
private static void printExceptions(ex_json, params_json, log, enums, raw_schema, observed_exceptions) {
def causingExceptions = ex_json['causingExceptions']
if (causingExceptions.length() == 0) {
def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/
// Missing required param
if (m.matches()) {
log.error "* Missing required parameter: --${m[0][1]}"
// Missing required param...
if (ex_json["keyword"] == "required") {
def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/
// ...but what sort of required param
ArrayList schema_components = ex_json['schemaLocation'].split('/')
if (schema_components[-1].isNumber()) {
// schemaLocation ends in array position, indicating use of
// xOf in the schema. this is some sort of complex definition
String schema_error_location = schema_components[1..-2].join('.')
String composition_type = schema_components[-2]
if (schema_error_location in observed_exceptions){
return
}
// try to check the schema to get the adjacent requirements
ArrayList requirement_group = []
try {
requirement_group = getSchemaRequirements(raw_schema, schema_components)
}
catch (groovy.lang.MissingPropertyException e) {}
if (requirement_group.size() == 0) {
// fall back to old error if we cannot read the schema
log.error "* Missing required parameter: --${m[0][1]}"
return
}
else if (composition_type == "allOf") {
// similarly, use the old error for all members of an allOf
for (choice in requirement_group) {
log.error "* Missing required parameter: --${choice}"
}
return
}
// construct an appropriate message for the other types of composition
String stderr_msg_prefix = "* Bad parameter configuration. "
if (composition_type == "oneOf") {
stderr_msg_prefix += "You must select only one option of:"
}
else if (composition_type == "anyOf") {
stderr_msg_prefix += "You must select at least one of:"
}
ArrayList stderr_msg = [stderr_msg_prefix]
// glue choices to error
for (choice in requirement_group) {
stderr_msg += " --${choice}"
}
log.error stderr_msg.join('\n')
// suppress similar warnings
observed_exceptions.add(schema_error_location)
}
else {
// boring simple required
log.error "* Missing required parameter: --${m[0][1]}"
}
}
// Missing dependent param
else if (ex_json['keyword'] == "dependencies") {
def m = ex_json['message'] =~ /property \[([^\]]+)\] is required/
HashMap<String, String> dependent_group = []
ArrayList schema_components = ex_json['schemaLocation'].split('/')
// try to check the schema to get the dependencies
try {
dependent_group = getSchemaDependencies(raw_schema, schema_components, m[0][1])
}
catch (groovy.lang.MissingPropertyException e) {}
if (dependent_group.size() == 0) {
// fall back to old error if we cannot read the schema
log.error "* Missing required parameter: --${m[0][1]}"
return
}
// we don't actually get told which parameter triggered the dependency
// only the name of the parameter that is actually missing.
// dependent_group will contain a list of triggering keys and their
// deps so we can print them all out for the user.
for (trigger_key in dependent_group.keySet()) {
String schema_error_location = "${ex_json.schemaLocation}/dependencies/${trigger_key}"
if (schema_error_location in observed_exceptions){
return
}
String stderr_msg_prefix = "* Missing parameter(s) that are required by --${trigger_key}:"
ArrayList stderr_msg = [stderr_msg_prefix]
// list dependencies of trigger_key that are not in the params_json
for (dep_key in dependent_group[trigger_key]) {
if (!params_json.has(dep_key)) {
stderr_msg += " --${dep_key}"
}
}
log.error stderr_msg.join('\n')
// suppress similar warnings
observed_exceptions.add(schema_error_location)
}
}
// Other base-level error
else if (ex_json['pointerToViolation'] == '#') {
Expand All @@ -381,19 +515,19 @@ class NfcoreSchema {
def param = ex_json['pointerToViolation'] - ~/^#\//
def param_val = params_json[param].toString()
if (ex_json['message'] =~ /.*not a valid enum value.*/) {
String stderr_msg = "* --${param}: ${param_val} is not a valid choice, pick one of:\n"
ArrayList stderr_msg = ["* --${param}: ${param_val} is not a valid choice, pick one of:"]
for (choice in enums[param]) {
stderr_msg += " - ${choice}\n"
stderr_msg += " - ${choice}"
}
log.error stderr_msg // avoid silly new lines between choices
log.error stderr_msg.join('\n') // avoid silly new lines between choices by printing one string
}
else {
log.error "* --${param}: ${ex_json['message']} (${param_val})"
}
}
}
for (ex in causingExceptions) {
printExceptions(ex, params_json, log, enums)
printExceptions(ex, params_json, log, enums, raw_schema, observed_exceptions)
}
}

Expand Down
13 changes: 10 additions & 3 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"properties": {
"fastq": {
"type": "string",
"title": "FASTQ",
"format": "path",
"description": "FASTQ files to use in the analysis.",
"help_text": "This accepts one of three cases: (i) the path to a single FASTQ file; (ii) the path to a top-level directory containing FASTQ files; (iii) the path to a directory containing one level of sub-directories which in turn contain FASTQ files. In the first and second case, a sample name can be supplied with `--sample`. In the last case, the data is assumed to be multiplexed with the names of the sub-directories as barcodes. In this case, a sample sheet can be provided with `--sample_sheet`."
Expand All @@ -28,6 +29,7 @@
},
"basecaller_cfg": {
"type": "string",
"title": "Basecaller configuration",
"description": "Name of the model that was used to basecall signal data, used to select an appropriate Medaka model.",
"help_text": "The basecaller configuration is used to automatically select the appropriate Medaka model. The automatic selection can be overridden with the 'medaka_variant_model' and 'medaka_consensus_model' parameters. The model list only shows models that are compatible with this workflow.",
"default": "dna_r9.4.1_450bps_hac",
Expand Down Expand Up @@ -200,28 +202,31 @@
},
"min_len": {
"type": "number",
"title": "Minimum read length",
"min": 50,
"max": 10000,
"description": "Minimum read length (default: set by scheme)."
},
"max_len": {
"type": "number",
"title": "Maximum read length",
"min": 50,
"max": 10000,
"description": "Maximum read length (default: set by scheme)."
},
"max_softclip_length": {
"type": "integer",
"description": "Remove reads with alignments showing large softclipping"
"title": "Maximum soft clip length",
"description": "Remove reads with alignments showing large soft clipping"
},
"update_data": {
"type": "boolean",
"default": false,
"description": "Update pangolin & Nextclade data at runtime."
"description": "Update Pangolin and Nextclade data at runtime."
},
"pangolin_options": {
"type": "string",
"description": "Pass options to pangolin, use like this \"--analysis-mode fast --min-length 26000\"."
"description": "Pass options to Pangolin, for example \"--analysis-mode fast --min-length 26000\"."
},
"nextclade_version": {
"type": "string",
Expand Down Expand Up @@ -252,10 +257,12 @@
"properties": {
"lab_id": {
"type": "string",
"title": "Laboratory identifier",
"description": "Laboratory identifier, used in reporting."
},
"testkit": {
"type": "string",
"title": "Test kit identifier",
"description": "Test kit identifier, used in reporting."
},
"disable_ping": {
Expand Down

0 comments on commit b27c17e

Please sign in to comment.