-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Unify getting limit values and checking them #237
base: master
Are you sure you want to change the base?
Changes from 1 commit
c18833a
178f02d
6221f39
3d3e2de
c899d12
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
// Functions to apply limits to nodes | ||
|
||
'use strict'; | ||
|
||
var async = require('async'); | ||
var nodeError = require('../limits/error'); | ||
var estimates = require('../limits/estimates'); | ||
|
||
// Obtain limit values given the limit definitions, which has the form: | ||
// { maximumNumberOfRows: { // name of the limit argument as used by checker functions | ||
// default: 1000000, // limit default value | ||
// message: 'too many rows' // error to be shown when limit is exceeded | ||
// name: 'maximumNumberOfRows' // (optional) name of the limit configuration parameter | ||
// ... | ||
// The result if of the form: | ||
// { maximumNumberOfRows: { // name of the limit argument as used by checker functions | ||
// value: 1000000, // value of the limit | ||
// message: 'too many rows' // error to be shown when limit is exceeded | ||
// ... | ||
function limitValues(node, context, limits, callback) { | ||
var values = {}; | ||
async.forEachOf(limits, function(limitParams, limit, done) { | ||
var limitName = limit || limitParams.name; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be |
||
var limitValue = context.getLimit(node, limitName, limitParams.default, limitParams.message); | ||
values[limit] = limitValue; | ||
return done(null); | ||
}, function(err) { | ||
callback(err, values); | ||
}); | ||
} | ||
|
||
// Execute single limit checker. | ||
// limits defines the limits to use and is of the form accepted by `limitValues` | ||
function applyCheck(node, context, checker, limits, params, callback) { | ||
if (!checker) { | ||
return callback(null); | ||
} | ||
async.waterfall( | ||
[ | ||
function check$limitValues(done) { | ||
// Get the limit values from the limits definitions | ||
limitValues(node, context, limits, done); | ||
}, | ||
function check$apply(limits, done) { | ||
// Call the checker function passing the limit values | ||
checker(node, context, limits, params, done); | ||
} | ||
], | ||
callback | ||
); | ||
} | ||
|
||
// Execute multiple limit checkers, given an array of checker definitions; each checker definition is of the form: | ||
// { | ||
// checker: function(node, context, limitValues, params, callback) // checker function | ||
// limits: { lim1: { ... } } // limit definitions as accepted by `limitValues` | ||
// params: { param1: value1, ... } // parameters passed to the checker | ||
// } | ||
function check(node, context, checkerDefinitions, callback) { | ||
async.eachSeries(checkerDefinitions, function(checkerDefinition, done) { | ||
var params = checkerDefinition.params || {}; | ||
var limits = checkerDefinition.limits || {}; | ||
applyCheck(node, context, checkerDefinition.checker, limits, params, done); | ||
}, callback); | ||
} | ||
|
||
function limitNumberOfRows(node, context, limits, _params, callback) { | ||
var limit = limits.maximumNumberOfRows; | ||
if (!Number.isFinite(limit.value)) { | ||
return callback(null); | ||
} | ||
estimates.estimatedNumberOfRows(node, context, function(err, outputRows) { | ||
if (err) { | ||
// if estimation is not available don't abort the analysis | ||
context.logError(err); | ||
err = null; | ||
} else { | ||
if (outputRows > limit.value) { | ||
err = nodeError(node, [limit.message]); | ||
} | ||
} | ||
return callback(err); | ||
}); | ||
} | ||
|
||
function limitInputRows(node, context, limits, params, callback) { | ||
var limit = limits.maximumNumberOfRows; | ||
var input = params.input; | ||
if (!Number.isFinite(limit.value)) { | ||
return callback(null); | ||
} | ||
estimates.estimatedNumberOfRows(node[input], context, function(err, numRows) { | ||
if (err) { | ||
context.logError(err); | ||
err = null; | ||
} else { | ||
if (numRows > limit.value) { | ||
err = nodeError(node, [limit.message]); | ||
} | ||
} | ||
return callback(err); | ||
}); | ||
} | ||
|
||
function limitInputRowsAndAvgGroupedRows(node, context, limits, params, callback) { | ||
var limit = limits.rowsLimit; | ||
var groupedLimit = limits.groupedRowsLimit; | ||
var input = params.input; | ||
var column = params.column; | ||
var limitPresent = Number.isFinite(limit.value); | ||
var groupedLimitPresent = Number.isFinite(groupedLimit.value); | ||
if (!limitPresent && !groupedLimitPresent) { | ||
return callback(null); | ||
} | ||
|
||
function check(inputRows, numCategories) { | ||
var numberOfRowsPerCategory = inputRows/(numCategories || 1); | ||
var messages = []; | ||
if (limitPresent && inputRows > limit.value) { | ||
messages.push(limit.message); | ||
} | ||
if (groupedLimitPresent && numberOfRowsPerCategory > groupedLimit.value) { | ||
messages.push(groupedLimit.message); | ||
} | ||
return callback(nodeError(node, messages)); | ||
} | ||
|
||
estimates.estimatedNumberOfRows(node[input], context, function(err, inputRows) { | ||
if (err) { | ||
context.logError(err); | ||
// something went wrong... lack of stats? anyway, we'll let the analysis pass | ||
return callback(null); | ||
} else { | ||
var numCategories = 1; | ||
if (column && groupedLimitPresent) { | ||
estimates.numberOfDistinctValues( | ||
node[input], context, column, | ||
function(err, num) { | ||
if (err) { | ||
context.logError(err); | ||
} else { | ||
numCategories = num; | ||
} | ||
check(inputRows, numCategories); | ||
} | ||
); | ||
} | ||
else { | ||
check(inputRows, numCategories); | ||
} | ||
} | ||
}); | ||
} | ||
|
||
module.exports = { | ||
check: check, | ||
// basic limit checker functions | ||
limitNumberOfRows: limitNumberOfRows, | ||
limitInputRows: limitInputRows, | ||
limitInputRowsAndAvgGroupedRows: limitInputRowsAndAvgGroupedRows | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,7 +32,8 @@ LimitsContext.prototype.runSQL = function(sql, callback) { | |
// * limitName is the limit parameter name (as used in globalLimits) | ||
// * defaultValue is the value used if the limit is not defined in globalLimits | ||
// * errorMessage used when the limit is exceeded | ||
LimitsContext.prototype.getLimit = function (nodeType, limitName, defaultValue, errorMessage) { | ||
LimitsContext.prototype.getLimit = function (node, limitName, defaultValue, errorMessage) { | ||
var nodeType = node.getType(); | ||
var limit = null; | ||
var limits = this.limits.analyses; | ||
if (limits) { | ||
|
@@ -41,7 +42,15 @@ LimitsContext.prototype.getLimit = function (nodeType, limitName, defaultValue, | |
} | ||
limit = limits[limitName]; | ||
} | ||
return { value: limit || defaultValue, message: errorMessage }; | ||
if (!limit) { | ||
// apply default value; a sync. function (with a node parameter) can be used | ||
if (typeof defaultValue === 'function') { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a remnant of a previsous iteration. I think I'll remove it, since it's not needed now There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If possible, I would remove it now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll do |
||
limit = defaultValue(node); | ||
} else { | ||
limit = defaultValue; | ||
} | ||
} | ||
return { value: limit, message: errorMessage }; | ||
}; | ||
|
||
module.exports = LimitsContext; |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,7 @@ dot.templateSettings.strip = false; | |
var id = require('../util/id'); | ||
var QueryBuilder = require('../filter/query-builder'); | ||
var Validator = require('./validator'); | ||
var limits = require('./limits'); | ||
var Checks = require('../limits/checks'); | ||
|
||
var TYPE = require('./type'); | ||
module.exports.TYPE = TYPE; | ||
|
@@ -56,6 +56,13 @@ var NODE_RESERVED_KEYWORDS = { | |
setStatusFromInputNodes: 1 | ||
}; | ||
|
||
var DEFAULT_LIMITS = { | ||
maximumNumberOfRows: { | ||
default: Infinity, | ||
message: 'too many result rows' | ||
} | ||
}; | ||
|
||
function Node() { | ||
// args will look like ['owner', {param1: '', param2: ''}, {param1: '', param3: ''}, ...] | ||
var args = (arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments)); | ||
|
@@ -471,9 +478,8 @@ function validate(validator, params, expectedParamName) { | |
} | ||
|
||
Node.prototype.checkLimits = function(context, callback) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As this method must be overridden, can we document what is the signature for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure |
||
// the default is to limit number of output rows if a maximumNumberOfRows is | ||
// present in the configuration (not by default) | ||
var defaultLimit = null; | ||
var limit = context.getLimit(this.getType(), 'maximumNumberOfRows', defaultLimit, 'too many result rows'); | ||
limits.limitNumberOfRows(this, context, limit, callback); | ||
Checks.check(this, context, [{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to have that default limit? Or should it be implicit? I mean: could we just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Having this here allow us to limit the number of result rows for any analysis type by setting a limit value in the configuration. If there's no configuration value, since where have |
||
checker: Checks.limitNumberOfRows, | ||
limits: DEFAULT_LIMITS | ||
}], callback); | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
'use strict'; | ||
|
||
var Node = require('../node'); | ||
var limits = require('../limits'); | ||
var Checks = require('../../limits/checks'); | ||
var debug = require('../../util/debug')('analysis:buffer'); | ||
|
||
var TYPE = 'buffer'; | ||
|
@@ -12,6 +12,22 @@ var PARAMS = { | |
dissolved: Node.PARAM.NULLABLE(Node.PARAM.BOOLEAN()) | ||
}; | ||
|
||
var LIMITS = { | ||
maximumNumberOfRows: { | ||
default: 1e6, | ||
name: 'maximumNumberOfRows', | ||
message: 'too many source rows' | ||
} | ||
}; | ||
|
||
var LIMITS_DISSOLVED = { | ||
maximumNumberOfRows: { | ||
default: 1e5, | ||
name: 'maximumNumberOfRowsDissolved', | ||
message: 'too many source rows' | ||
} | ||
}; | ||
|
||
var Buffer = Node.create(TYPE, PARAMS, { cache: true, version: 2 }); | ||
|
||
module.exports = Buffer; | ||
|
@@ -122,17 +138,11 @@ Buffer.prototype.isolinesDissolvedQuery = function() { | |
}; | ||
|
||
Buffer.prototype.checkLimits = function(context, callback) { | ||
var defaultLimit; | ||
var limitName; | ||
var limit; | ||
var limits = this.dissolved ? LIMITS_DISSOLVED : LIMITS; | ||
|
||
if (this.dissolved) { | ||
limitName = 'maximumNumberOfRowsDissolved'; | ||
defaultLimit = 100000; | ||
} else { | ||
limitName = 'maximumNumberOfRows'; | ||
defaultLimit = 1000000; | ||
} | ||
limit = context.getLimit(TYPE, limitName, defaultLimit, 'too many source rows'); | ||
limits.limitInputRows(this, 'source', context, limit, callback); | ||
Checks.check(this, context, [{ | ||
checker: Checks.limitInputRows, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't it a bit weird that the Checks.check function receives a reference to a function of the same module? I have the feeling this is still very _DSL_ish approach. Why not something like:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where limits has already configuration limits, that could be injected from outside, to override the default limit provided here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason for the duality ( Then, for each check passed to I see your point with the more object-oriented approach you propose, but I think we would miss the ability to easily have multiple checks. (which may not be important really, I'm not sure... so far we're not needing it). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What I like about |
||
params: { input: 'source' }, | ||
limits: limits | ||
}], callback); | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we use async here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need really, at some point in writing here I was expecting that getting limit values could eventually need some async operation, but now I see no reason for this. I'll remove it. My only doubt is whether to maintain this
limitValues
function as async (with callback) or not...