Skip to content

Commit

Permalink
fix(zql): Fix LIKE and ILIKE (#60)
Browse files Browse the repository at this point in the history
https://www.postgresql.org/docs/current/functions-matching.html#FUNCTIONS-LIKE

Our implementation of LIKE and ILIKE was incorrect. Now it should match
the spec.
  • Loading branch information
arv authored Apr 6, 2024
1 parent 66c9a71 commit edf42b0
Show file tree
Hide file tree
Showing 3 changed files with 224 additions and 37 deletions.
151 changes: 149 additions & 2 deletions src/zql/ast-to-ivm/pipeline-builder.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import {compareUTF8} from 'compare-utf8';
import {describe, expect, test} from 'vitest';
import {z} from 'zod';
import {Entity} from '../../generate.js';
import {AST, Condition} from '../ast/ast.js';
import {AST, Condition, SimpleCondition} from '../ast/ast.js';
import {makeTestContext} from '../context/context.js';
import {DifferenceStream} from '../ivm/graph/difference-stream.js';
import {Materialite} from '../ivm/materialite.js';
Expand All @@ -13,7 +13,7 @@ import {
WhereCondition,
astForTesting as ast,
} from '../query/entity-query.js';
import {buildPipeline} from './pipeline-builder.js';
import {buildPipeline, getOperator} from './pipeline-builder.js';

const e1 = z.object({
id: z.string(),
Expand Down Expand Up @@ -410,5 +410,152 @@ describe('OR', () => {
}
});

describe('getOperator', () => {
const cases = [
{op: '=', left: 1, right: 1, expected: true},
{op: '!=', left: 1, right: 1, expected: false},
{op: '=', left: 'a', right: 'a', expected: true},
{op: '!=', left: 'a', right: 'a', expected: false},
{op: '=', left: true, right: true, expected: true},
{op: '!=', left: true, right: true, expected: false},

{op: '=', left: 1, right: 2, expected: false},
{op: '!=', left: 1, right: 2, expected: true},
{op: '=', left: 'a', right: 'b', expected: false},
{op: '!=', left: 'a', right: 'b', expected: true},
{op: '=', left: true, right: false, expected: false},
{op: '!=', left: true, right: false, expected: true},

{op: '>', left: 1, right: 1, expected: false},
{op: '>=', left: 1, right: 1, expected: true},
{op: '<', left: 1, right: 1, expected: false},
{op: '<=', left: 1, right: 1, expected: true},
{op: '>', left: 'a', right: 'a', expected: false},
{op: '>=', left: 'a', right: 'a', expected: true},
{op: '<', left: 'a', right: 'a', expected: false},
{op: '<=', left: 'a', right: 'a', expected: true},

{op: '>', left: 1, right: 2, expected: false},
{op: '>=', left: 1, right: 2, expected: false},
{op: '<', left: 1, right: 2, expected: true},
{op: '<=', left: 1, right: 2, expected: true},
{op: '>', left: 'a', right: 'b', expected: false},
{op: '>=', left: 'a', right: 'b', expected: false},
{op: '<', left: 'a', right: 'b', expected: true},
{op: '<=', left: 'a', right: 'b', expected: true},

{op: 'IN', left: 1, right: [1, 2, 3], expected: true},
{op: 'IN', left: 1, right: [2, 3], expected: false},
{op: 'IN', left: 'a', right: ['a', 'b', 'c'], expected: true},
{op: 'IN', left: 'a', right: ['b', 'c'], expected: false},
{op: 'IN', left: true, right: [true, false], expected: true},
{op: 'IN', left: true, right: [false], expected: false},

{op: 'LIKE', left: 'abc', right: 'abc', expected: true},
{op: 'LIKE', left: 'abc', right: 'ABC', expected: false},
{op: 'LIKE', left: 'abc', right: 'ab', expected: false},
{op: 'LIKE', left: 'abc', right: 'ab%', expected: true},
{op: 'LIKE', left: 'abc', right: '%bc', expected: true},
{op: 'LIKE', left: 'abbc', right: 'a%c', expected: true},
{op: 'LIKE', left: 'abc', right: 'a_c', expected: true},
{op: 'LIKE', left: 'abc', right: 'a__', expected: true},
{op: 'LIKE', left: 'abc', right: '_bc', expected: true},
{op: 'LIKE', left: 'abc', right: '___', expected: true},
{op: 'LIKE', left: 'abc', right: '%', expected: true},
{op: 'LIKE', left: 'abc', right: '_', expected: false},
{op: 'LIKE', left: 'abc', right: 'a', expected: false},
{op: 'LIKE', left: 'abc', right: 'b', expected: false},
{op: 'LIKE', left: 'abc', right: 'c', expected: false},
{op: 'LIKE', left: 'abc', right: 'd', expected: false},
{op: 'LIKE', left: 'abc', right: 'ab', expected: false},

{op: 'ILIKE', left: 'abc', right: 'abc', expected: true},
{op: 'ILIKE', left: 'abc', right: 'ABC', expected: true},
{op: 'ILIKE', left: 'Abc', right: 'ab', expected: false},
{op: 'ILIKE', left: 'Abc', right: 'ab%', expected: true},
{op: 'ILIKE', left: 'Abc', right: '%bc', expected: true},
{op: 'ILIKE', left: 'Abbc', right: 'a%c', expected: true},
{op: 'ILIKE', left: 'Abc', right: 'a_c', expected: true},
{op: 'ILIKE', left: 'Abc', right: 'a__', expected: true},
{op: 'ILIKE', left: 'Abc', right: '_bc', expected: true},
{op: 'ILIKE', left: 'Abc', right: '___', expected: true},
{op: 'ILIKE', left: 'Abc', right: '%', expected: true},
{op: 'ILIKE', left: 'Abc', right: '_', expected: false},
{op: 'ILIKE', left: 'Abc', right: 'a', expected: false},
{op: 'ILIKE', left: 'Abc', right: 'b', expected: false},
{op: 'ILIKE', left: 'Abc', right: 'c', expected: false},
{op: 'ILIKE', left: 'Abc', right: 'd', expected: false},
{op: 'ILIKE', left: 'Abc', right: 'ab', expected: false},

// and some tricky likes
{op: 'LIKE', left: 'abc', right: 'a%b%c', expected: true},
{op: 'LIKE', left: 'abc', right: 'a%b', expected: false},
{op: 'LIKE', left: 'abc', right: '.*', expected: false},
{op: 'LIKE', left: 'abc', right: '...', expected: false},
...Array.from('/\\[](){}^$+?*.|%_', c => ({
op: 'LIKE',
left: c,
right: '_',
expected: true,
})),
{op: 'LIKE', left: 'a%b', right: 'a\\%b', expected: true},
{op: 'LIKE', left: 'a_b', right: 'a\\_b', expected: true},

{op: 'LIKE', left: 'a\\bc', right: 'a\\\\bc', expected: true},
{op: 'LIKE', left: 'a\\Bc', right: 'a\\\\Bc', expected: true},
{op: 'LIKE', left: 'ab', right: 'a\\b', expected: true},
{op: 'LIKE', left: 'a"b', right: 'a"b', expected: true},
{op: 'LIKE', left: "a'b", right: "a'b", expected: true},

{op: 'LIKE', left: 'a{', right: 'a{', expected: true},
{op: 'LIKE', left: 'a{', right: 'a\\{', expected: true},
{op: 'LIKE', left: 'a\n', right: 'a\n', expected: true},
{op: 'LIKE', left: 'an', right: 'a\\n', expected: true},
{op: 'LIKE', left: 'a ', right: 'a\\s', expected: false},
] as const;

for (const c of cases) {
test(`${c.left} ${c.op} ${c.right} === ${c.expected}`, () => {
const condition = {
op: c.op,
field: 'field',
value: {type: 'literal', value: c.right},
} as SimpleCondition;
expect(getOperator(condition)(c.left)).toBe(c.expected);
});

if (['LIKE', 'IN'].includes(c.op)) {
test(`${c.left} NOT ${c.op} ${c.right} === ${!c.expected}`, () => {
const condition = {
op: 'NOT ' + c.op,
field: 'field',
value: {type: 'literal', value: c.right},
} as SimpleCondition;
expect(getOperator(condition)(c.left)).toBe(!c.expected);
});
}

// if op is LIKE and expected is true then test ILIKE as well
if (c.op === 'LIKE' && c.expected) {
test(`${c.left} ILIKE ${c.right}`, () => {
const condition = {
op: 'ILIKE',
field: 'field',
value: {type: 'literal', value: c.right},
} as SimpleCondition;
expect(getOperator(condition)(c.left)).toBe(c.expected);
});
}
}

expect(() =>
getOperator({
op: 'LIKE',
field: 'field',
value: {type: 'literal', value: '\\'},
} as SimpleCondition),
).toThrow('LIKE pattern must not end with escape character');
});

// order-by and limit are properties of the materialize view
// and not a part of the pipeline.
106 changes: 73 additions & 33 deletions src/zql/ast-to-ivm/pipeline-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import {
Condition,
Ordering,
SimpleCondition,
SimpleOperator,
} from '../ast/ast.js';
import {must} from '../error/asserts.js';
import {DifferenceStream, concat} from '../ivm/graph/difference-stream.js';
Expand Down Expand Up @@ -160,13 +159,9 @@ function applySimpleCondition<T extends Entity>(
stream: DifferenceStream<T>,
condition: SimpleCondition,
) {
const operator = getOperator(condition.op);
return stream.filter(x =>
operator(
(x as Record<string, unknown>)[condition.field],
condition.value.value,
),
);
const operator = getOperator(condition);
const {field} = condition;
return stream.filter(x => operator((x as Record<string, unknown>)[field]));
}

function applyGroupBy<T extends Entity>(
Expand Down Expand Up @@ -297,53 +292,98 @@ function makeKeyFunction(columns: string[]) {
// We're well-typed in the query builder so once we're down here
// we can assume that the operator is valid.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
function getOperator(op: SimpleOperator): (l: any, r: any) => boolean {
export function getOperator(condition: SimpleCondition): (lhs: any) => boolean {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const rhs = condition.value.value as any;
const {op} = condition;
switch (op) {
case '=':
return (l, r) => l === r;
return lhs => lhs === rhs;
case '!=':
return (l, r) => l !== r;
return lhs => lhs !== rhs;
case '<':
return (l, r) => l < r;
return lhs => lhs < rhs;
case '>':
return (l, r) => l > r;
return lhs => lhs > rhs;
case '>=':
return (l, r) => l >= r;
return lhs => lhs >= rhs;
case '<=':
return (l, r) => l <= r;
return lhs => lhs <= rhs;
case 'IN':
return opIn;
return lhs => rhs.includes(lhs);
case 'NOT IN':
return not(opIn);
return lhs => !rhs.includes(lhs);
case 'LIKE':
return opLike;
return getLikeOp(rhs, '');
case 'NOT LIKE':
return not(opLike);
return not(getLikeOp(rhs, ''));
case 'ILIKE':
return opIlike;
return getLikeOp(rhs, 'i');
case 'NOT ILIKE':
return not(opIlike);
return not(getLikeOp(rhs, 'i'));
default:
throw new Error(`Operator ${op} not supported`);
}
}

interface Includes<T> {
includes(v: T): boolean;
function not<T>(f: (lhs: T) => boolean) {
return (lhs: T) => !f(lhs);
}

function opIn<T>(l: T, r: Includes<T>) {
return r.includes(l);
}
function getLikeOp(pattern: string, flags: 'i' | ''): (lhs: string) => boolean {
// if lhs does not contain '%' or '_' then it is a simple string comparison.
// if it does contain '%' or '_' then it is a regex comparison.
// '%' is a wildcard for any number of characters
// '_' is a wildcard for a single character
// Postgres SQL allows escaping using `\`.

function opLike<T>(l: Includes<T>, r: T) {
return l.includes(r);
if (!/_|%|\\/.test(pattern)) {
if (flags === 'i') {
const rhsLower = pattern.toLowerCase();
return (lhs: string) => lhs.toLowerCase() === rhsLower;
}
return (lhs: string) => lhs === pattern;
}
const re = patternToRegExp(pattern, flags);
return (lhs: string) => re.test(lhs);
}

function opIlike(l: string, r: string) {
return l.toLowerCase().includes(r.toLowerCase());
}
const specialCharsRe = /[$()*+.?[\]\\^{|}]/;

function patternToRegExp(source: string, flags: '' | 'i' = ''): RegExp {
// There are a few cases:
// % => .*
// _ => .
// \x => \x for any x except special regexp chars
// special regexp chars => \special regexp chars
let pattern = '^';
for (let i = 0; i < source.length; i++) {
let c = source[i];
switch (c) {
case '%':
pattern += '.*';
break;
case '_':
pattern += '.';
break;

// @ts-expect-error fallthrough
case '\\':
if (i === source.length - 1) {
throw new Error('LIKE pattern must not end with escape character');
}
i++;
c = source[i];

// fall through
default:
if (specialCharsRe.test(c)) {
pattern += '\\';
}
pattern += c;

function not<T>(f: (l: T, r: T) => boolean) {
return (l: T, r: T) => !f(l, r);
break;
}
}
return new RegExp(pattern + '$', flags);
}
4 changes: 2 additions & 2 deletions src/zql/integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,11 +282,11 @@ test('each where operator', async () => {
// expect(await stmt.exec()).toEqual([{id: 'a'}, {id: 'b'}]);
// stmt.destroy();

stmt = q.select('id').where('assignee', 'LIKE', 'al').prepare();
stmt = q.select('id').where('assignee', 'LIKE', 'al%').prepare();
expect(await stmt.exec()).toEqual([{id: 'c'}]);
stmt.destroy();

stmt = q.select('id').where('assignee', 'ILIKE', 'AL').prepare();
stmt = q.select('id').where('assignee', 'ILIKE', 'AL%').prepare();
expect(await stmt.exec()).toEqual([{id: 'c'}]);
stmt.destroy();

Expand Down

0 comments on commit edf42b0

Please sign in to comment.