From 63b935ce64fcaa2bf47da0afbdbd627fde8b97be Mon Sep 17 00:00:00 2001 From: Engin Al Date: Thu, 9 Feb 2023 17:04:04 +0300 Subject: [PATCH 01/15] Remove unnecessary concat function, and use UTF8 compatible functions and COLLATE --- .../src/adapter/ClickHouseQuery.js | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.js b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.js index f6d94af5ab301..261daaa5bf5c3 100644 --- a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.js @@ -16,7 +16,7 @@ class ClickHouseFilter extends BaseFilter { likeIgnoreCase(column, not, param, type) { const p = (!type || type === 'contains' || type === 'ends') ? '%' : ''; const s = (!type || type === 'contains' || type === 'starts') ? '%' : ''; - return `lower(${column}) ${not ? 'NOT' : ''} LIKE CONCAT('${p}', lower(${this.allocateParam(param)}), '${s}')`; + return `lowerUTF8(${column}) ${not ? 'NOT' : ''} LIKE lowerUTF8(${p}${this.allocateParam(param)}${s})`; } castParameter() { @@ -149,6 +149,28 @@ export class ClickHouseQuery extends BaseQuery { return `${fieldAlias} ${direction}`; } + orderBy() { + // + // ClickHouse orders string by bytes, so we need to use COLLATE 'en' to order by string + // + if (R.isEmpty(this.order)) { + return ''; + } + + const orderByString = R.pipe( + R.map(this.orderHashToString), + R.reject(R.isNil), + R.join(' COLLATE \'en\''), + R.join(', ') + )(this.order); + + if (!orderByString) { + return ''; + } + + return ` ORDER BY ${orderByString}`; + } + groupByClause() { if (this.ungrouped) { return ''; From 49111c22cf5d0b560f35bfb9c21797cc88ec7e80 Mon Sep 17 00:00:00 2001 From: Engin Al Date: Fri, 7 Jun 2024 18:21:39 +0300 Subject: [PATCH 02/15] Add integration testing for ordering with collation --- .../clickhouse/ClickHouseDbRunner.js | 2 ++ .../clickhouse-dataschema-compiler.test.ts | 35 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/packages/cubejs-schema-compiler/test/integration/clickhouse/ClickHouseDbRunner.js b/packages/cubejs-schema-compiler/test/integration/clickhouse/ClickHouseDbRunner.js index 443e58c0e2c57..85819c3eb8006 100644 --- a/packages/cubejs-schema-compiler/test/integration/clickhouse/ClickHouseDbRunner.js +++ b/packages/cubejs-schema-compiler/test/integration/clickhouse/ClickHouseDbRunner.js @@ -44,6 +44,8 @@ export class ClickHouseDbRunner { (4, 400, '2017-01-06 16:00:00', '2017-01-24 16:00:00', 2, null, 120.120, 10.60), (5, 500, '2017-01-06 16:00:00', '2017-01-24 16:00:00', 2, null, 120.120, 58.10), (6, 500, '2016-09-06 16:00:00', '2016-09-06 16:00:00', 2, null, 120.120, 58.10) + (7, 300, '2017-01-07 16:00:00', '2017-01-25 16:00:00', 2, 'Google', 120.120, 59.60), + `, { queryOptions: { session_id: clickHouse.sessionId, join_use_nulls: '1' } }), await clickHouse.querying(` INSERT INTO diff --git a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts index 955e9e11ea2d2..44f29fc223f28 100644 --- a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts @@ -375,6 +375,41 @@ describe('ClickHouse DataSchemaCompiler', () => { return true; }); } + it('collation in order by', async () => { + const { compiler, cubeEvaluator, joinGraph } = testPrepareCompiler(` + cube('visitors', { + sql: \` + select * from visitors + \`, + + dimensions: { + source: { + type: 'string', + sql: 'source' + } + } + }) + `); + await compiler.compile(); + + const query = new ClickHouseQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [], + dimensions: ['visitors.source'], + order: [{ + id: 'visitors.source', + desc: false + }], + timezone: 'America/Los_Angeles' + }); + logSqlAndParams(query); + + const sqlAndParams = query.buildSqlAndParams(); + const res = await dbRunner.testQuery(sqlAndParams); + const sql = sqlAndParams[0]; + expect(sql).toMatch('ORDER BY source COLLATE \'en\''); + + expect(res).toEqual([{ visitors__source: 'google' }, { visitors__source: 'Google' }, { visitors__source: 'some' }]); + }); it('export import', () => { const { compiler, cubeEvaluator, joinGraph } = prepareCompiler({ From dd2fd32e38312eefae4ab321e3b66ae9c6affaaf Mon Sep 17 00:00:00 2001 From: Engin Al Date: Fri, 24 Jan 2025 17:09:33 +0300 Subject: [PATCH 03/15] Added back CONCAT() to prevent sql injection --- packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts index 8877acba26c34..0ca7a462c8fc5 100644 --- a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts @@ -20,7 +20,7 @@ class ClickHouseFilter extends BaseFilter { public likeIgnoreCase(column, not, param, type) { const p = (!type || type === 'contains' || type === 'ends') ? '%' : ''; const s = (!type || type === 'contains' || type === 'starts') ? '%' : ''; - return `lowerUTF8(${column}) ${not ? 'NOT' : ''} LIKE lowerUTF8(${p}${this.allocateParam(param)}${s})`; + return `lowerUTF8(${column}) ${not ? 'NOT' : ''} LIKE CONCAT('${p}', lowerUTF8(${this.allocateParam(param)}), '${s}')`; } public castParameter() { From a5b279ee5f10a979825993cc191afb0714796606 Mon Sep 17 00:00:00 2001 From: Engin Al Date: Fri, 24 Jan 2025 21:10:38 +0300 Subject: [PATCH 04/15] Change intentation from 4 spaces to 2 spaces --- .../src/adapter/ClickHouseQuery.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts index 0ca7a462c8fc5..ded8b00b6057a 100644 --- a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts @@ -175,21 +175,21 @@ export class ClickHouseQuery extends BaseQuery { // ClickHouse orders string by bytes, so we need to use COLLATE 'en' to order by string // if (R.isEmpty(this.order)) { - return ""; + return ''; } const orderByString = R.pipe( - R.map((order) => this.orderHashToString(order) + " COLLATE 'en'"), - R.reject(R.isNil), - R.join(", ") + R.map((order) => `${this.orderHashToString(order)} COLLATE 'en'`), + R.reject(R.isNil), + R.join(', ') )(this.order); if (!orderByString) { - return ""; + return ''; } return ` ORDER BY ${orderByString}`; -} + } public groupByClause() { if (this.ungrouped) { From f7a4759e331cda4852169d69f2574e43dc3ca73b Mon Sep 17 00:00:00 2001 From: Engin Al Date: Fri, 24 Jan 2025 23:31:33 +0300 Subject: [PATCH 05/15] Only apply coalition if field's type is string --- .../src/adapter/ClickHouseQuery.ts | 32 +++++++++++++++---- .../clickhouse-dataschema-compiler.test.ts | 16 +++++++--- .../clickhouse-graph-builder.test.ts | 5 +-- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts index ded8b00b6057a..01bcc84bdb547 100644 --- a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts @@ -125,7 +125,7 @@ export class ClickHouseQuery extends BaseQuery { .join(' AND '); } - public getFieldAlias(id) { + public getField(id) { const equalIgnoreCase = (a, b) => ( typeof a === 'string' && typeof b === 'string' && a.toUpperCase() === b.toUpperCase() ); @@ -136,16 +136,30 @@ export class ClickHouseQuery extends BaseQuery { d => equalIgnoreCase(d.dimension, id), ); + if (!field) { + field = this.measures.find( + d => equalIgnoreCase(d.measure, id) || equalIgnoreCase(d.expressionName, id), + ); + } + + return field; + } + + public getFieldAlias(id) { + const field = this.getField(id); + if (field) { return field.aliasName(); } - field = this.measures.find( - d => equalIgnoreCase(d.measure, id) || equalIgnoreCase(d.expressionName, id), - ); + return null; + } + + public getFieldType(id) { + const field = this.getField(id); if (field) { - return field.aliasName(); + return field.definition().type; } return null; @@ -179,7 +193,13 @@ export class ClickHouseQuery extends BaseQuery { } const orderByString = R.pipe( - R.map((order) => `${this.orderHashToString(order)} COLLATE 'en'`), + R.map((order) => { + let orderString = this.orderHashToString(order); + if (this.getFieldType(order) === 'string') { + orderString = `${orderString} COLLATE 'en'`; + } + return orderString; + }), R.reject(R.isNil), R.join(', ') )(this.order); diff --git a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts index 345e7972970e3..3a5fb0e03acd6 100644 --- a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts @@ -168,7 +168,8 @@ describe('ClickHouse DataSchemaCompiler', () => { { visitors__created_at_day: '2017-01-02T00:00:00.000', visitors__visitor_count: '1' }, { visitors__created_at_day: '2017-01-04T00:00:00.000', visitors__visitor_count: '1' }, { visitors__created_at_day: '2017-01-05T00:00:00.000', visitors__visitor_count: '1' }, - { visitors__created_at_day: '2017-01-06T00:00:00.000', visitors__visitor_count: '2' } + { visitors__created_at_day: '2017-01-06T00:00:00.000', visitors__visitor_count: '2' }, + { visitors__created_at_day: '2017-01-07T00:00:00.000', visitors__visitor_count: '1' } ] ); }); @@ -229,7 +230,7 @@ describe('ClickHouse DataSchemaCompiler', () => { expect(res).toEqual( [ { visitors__status: 'Approved', visitors__visitor_count: '2' }, - { visitors__status: 'Canceled', visitors__visitor_count: '4' } + { visitors__status: 'Canceled', visitors__visitor_count: '5' } ] ); }); @@ -299,6 +300,7 @@ describe('ClickHouse DataSchemaCompiler', () => { expect(res).toEqual( [ { visitors__enabled_source: 'google', visitors__visitor_count: '1' }, + { visitors__enabled_source: 'Google', visitors__visitor_count: '1' }, { visitors__enabled_source: 'some', visitors__visitor_count: '2' }, { visitors__enabled_source: null, visitors__visitor_count: '3' }, ] @@ -338,7 +340,8 @@ describe('ClickHouse DataSchemaCompiler', () => { { visitors__created_at: '2016-09-06T16:00:00.000' }, { visitors__created_at: '2017-01-04T16:00:00.000' }, { visitors__created_at: '2017-01-05T16:00:00.000' }, - { visitors__created_at: '2017-01-06T16:00:00.000' } + { visitors__created_at: '2017-01-06T16:00:00.000' }, + { visitors__created_at: '2017-01-07T16:00:00.000' } ], [{ visitors__created_at: '2017-01-06T16:00:00.000' }], [ @@ -347,7 +350,10 @@ describe('ClickHouse DataSchemaCompiler', () => { { visitors__created_at: '2017-01-04T16:00:00.000' }, { visitors__created_at: '2017-01-05T16:00:00.000' } ], - [{ visitors__created_at: '2017-01-06T16:00:00.000' }] + [ + { visitors__created_at: '2017-01-06T16:00:00.000' }, + { visitors__created_at: '2017-01-07T16:00:00.000' } + ] ]; ['in_date_range', 'not_in_date_range', 'on_the_date', 'before_date', 'after_date'].map((operator, index) => { const filterValues = index < 2 ? ['2017-01-01', '2017-01-03'] : ['2017-01-06', '2017-01-06']; @@ -408,7 +414,7 @@ describe('ClickHouse DataSchemaCompiler', () => { const sqlAndParams = query.buildSqlAndParams(); const res = await dbRunner.testQuery(sqlAndParams); const sql = sqlAndParams[0]; - expect(sql).toMatch('ORDER BY source COLLATE \'en\''); + expect(sql).toMatch('ORDER BY `visitors__source` COLLATE \'en\''); expect(res).toEqual([{ visitors__source: 'google' }, { visitors__source: 'Google' }, { visitors__source: 'some' }]); }); diff --git a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-graph-builder.test.ts b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-graph-builder.test.ts index 599d764f0cdbf..a18ad1b27b5da 100644 --- a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-graph-builder.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-graph-builder.test.ts @@ -380,9 +380,9 @@ describe('ClickHouse JoinGraph', () => { order: [] }, [{ visitors__visitor_revenue: '300', - visitors__visitor_count: '5', + visitors__visitor_count: '6', visitor_checkins__visitor_checkins_count: '6', - visitors__per_visitor_revenue: '60' + visitors__per_visitor_revenue: '50' }])); // FAILS - need to finish query to override ::timestamptz @@ -1222,6 +1222,7 @@ describe('ClickHouse JoinGraph', () => { { visitors__location: '120.12,40.6' }, { visitors__location: '120.12,58.1' }, { visitors__location: '120.12,58.6' }, + { visitors__location: '120.12,59.6' }, { visitors__location: '120.12,70.6' } ])); From 600c1fa3e4591c3f30f5bc48796956f63cd0d6be Mon Sep 17 00:00:00 2001 From: Engin Al Date: Mon, 27 Jan 2025 16:35:02 +0300 Subject: [PATCH 06/15] Replaced Google with Gork to make ordering clear, added toValidUTF8 to make sure strings are turned into utf8 for like case --- .../cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts | 4 ++-- .../test/integration/clickhouse/ClickHouseDbRunner.ts | 2 +- .../clickhouse/clickhouse-dataschema-compiler.test.ts | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts index 01bcc84bdb547..f17654484b024 100644 --- a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts @@ -20,7 +20,7 @@ class ClickHouseFilter extends BaseFilter { public likeIgnoreCase(column, not, param, type) { const p = (!type || type === 'contains' || type === 'ends') ? '%' : ''; const s = (!type || type === 'contains' || type === 'starts') ? '%' : ''; - return `lowerUTF8(${column}) ${not ? 'NOT' : ''} LIKE CONCAT('${p}', lowerUTF8(${this.allocateParam(param)}), '${s}')`; + return `lowerUTF8(toValidUTF8(${column})) ${not ? 'NOT' : ''} LIKE CONCAT('${p}', lowerUTF8(toValidUTF8(${this.allocateParam(param)})), '${s}')`; } public castParameter() { @@ -184,7 +184,7 @@ export class ClickHouseQuery extends BaseQuery { return `${fieldAlias} ${direction}`; } - public orderBy() { + public override orderBy() { // // ClickHouse orders string by bytes, so we need to use COLLATE 'en' to order by string // diff --git a/packages/cubejs-schema-compiler/test/integration/clickhouse/ClickHouseDbRunner.ts b/packages/cubejs-schema-compiler/test/integration/clickhouse/ClickHouseDbRunner.ts index 00c5e1e755e4e..a7e2049c968c7 100644 --- a/packages/cubejs-schema-compiler/test/integration/clickhouse/ClickHouseDbRunner.ts +++ b/packages/cubejs-schema-compiler/test/integration/clickhouse/ClickHouseDbRunner.ts @@ -66,7 +66,7 @@ export class ClickHouseDbRunner extends BaseDbRunner { (4, 400, '2017-01-06 16:00:00', '2017-01-24 16:00:00', 2, null, 120.120, 10.60), (5, 500, '2017-01-06 16:00:00', '2017-01-24 16:00:00', 2, null, 120.120, 58.10), (6, 500, '2016-09-06 16:00:00', '2016-09-06 16:00:00', 2, null, 120.120, 58.10), - (7, 300, '2017-01-07 16:00:00', '2017-01-25 16:00:00', 2, 'Google', 120.120, 59.60) + (7, 300, '2017-01-07 16:00:00', '2017-01-25 16:00:00', 2, 'Gork', 120.120, 59.60) ` }); await clickHouse.command({ query: ` diff --git a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts index 3a5fb0e03acd6..104e997130194 100644 --- a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts @@ -300,7 +300,7 @@ describe('ClickHouse DataSchemaCompiler', () => { expect(res).toEqual( [ { visitors__enabled_source: 'google', visitors__visitor_count: '1' }, - { visitors__enabled_source: 'Google', visitors__visitor_count: '1' }, + { visitors__enabled_source: 'Gork', visitors__visitor_count: '1' }, { visitors__enabled_source: 'some', visitors__visitor_count: '2' }, { visitors__enabled_source: null, visitors__visitor_count: '3' }, ] @@ -416,7 +416,7 @@ describe('ClickHouse DataSchemaCompiler', () => { const sql = sqlAndParams[0]; expect(sql).toMatch('ORDER BY `visitors__source` COLLATE \'en\''); - expect(res).toEqual([{ visitors__source: 'google' }, { visitors__source: 'Google' }, { visitors__source: 'some' }]); + expect(res).toEqual([{ visitors__source: 'google' }, { visitors__source: 'Gork' }, { visitors__source: 'some' }]); }); it('export import', () => { From eadfa1ae314e0900d4e72668163375087ff3b6ef Mon Sep 17 00:00:00 2001 From: Engin Al Date: Mon, 27 Jan 2025 17:10:21 +0300 Subject: [PATCH 07/15] Added CUBEJS_DB_CLICKHOUSE_SORT_COLLATION env variable for collation selection in Clickhouse sorting --- packages/cubejs-backend-shared/src/env.ts | 13 +++++++++ .../test/db_env_multi.test.ts | 29 +++++++++++++++++++ .../test/db_env_single.test.ts | 17 +++++++++++ .../src/adapter/ClickHouseQuery.ts | 6 ++-- 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/packages/cubejs-backend-shared/src/env.ts b/packages/cubejs-backend-shared/src/env.ts index 33d1854ff7c73..2752dac7a925f 100644 --- a/packages/cubejs-backend-shared/src/env.ts +++ b/packages/cubejs-backend-shared/src/env.ts @@ -1132,6 +1132,19 @@ const variables: Record any> = { ] ), + /** + * ClickHouse sort collation. + */ + clickhouseSortCollation: ({ + dataSource + }: { + dataSource: string, + }) => ( + process.env[ + keyByDataSource('CUBEJS_DB_CLICKHOUSE_SORT_COLLATION', dataSource) + ] + ), + /** **************************************************************** * ElasticSearch Driver * ***************************************************************** */ diff --git a/packages/cubejs-backend-shared/test/db_env_multi.test.ts b/packages/cubejs-backend-shared/test/db_env_multi.test.ts index b37ce923c20c4..333b25c2ef667 100644 --- a/packages/cubejs-backend-shared/test/db_env_multi.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_multi.test.ts @@ -1539,6 +1539,35 @@ describe('Multiple datasources', () => { ); }); + test('getEnv("clickhouseSortCollation")', () => { + process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION = 'default1'; + process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_SORT_COLLATION = 'postgres1'; + process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_SORT_COLLATION = 'wrong1'; + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('postgres1'); + expect(() => getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION = 'default2'; + process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_SORT_COLLATION = 'postgres2'; + process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_SORT_COLLATION = 'wrong2'; + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('postgres2'); + expect(() => getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + delete process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION; + delete process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_SORT_COLLATION; + delete process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_SORT_COLLATION; + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toBeUndefined(); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toBeUndefined(); + expect(() => getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + }); + test('getEnv("elasticApiId")', () => { process.env.CUBEJS_DB_ELASTIC_APIKEY_ID = 'default1'; process.env.CUBEJS_DS_POSTGRES_DB_ELASTIC_APIKEY_ID = 'postgres1'; diff --git a/packages/cubejs-backend-shared/test/db_env_single.test.ts b/packages/cubejs-backend-shared/test/db_env_single.test.ts index 765e94b7c175d..ce92377a7be4d 100644 --- a/packages/cubejs-backend-shared/test/db_env_single.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_single.test.ts @@ -975,6 +975,23 @@ describe('Single datasources', () => { expect(getEnv('clickhouseReadOnly', { dataSource: 'wrong' })).toBeUndefined(); }); + test('getEnv("clickhouseSortCollation")', () => { + process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION = 'default1'; + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('default1'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toEqual('default1'); + + process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION = 'default2'; + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('default2'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toEqual('default2'); + + delete process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION; + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toBeUndefined(); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toBeUndefined(); + expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toBeUndefined(); + }); + test('getEnv("elasticApiId")', () => { process.env.CUBEJS_DB_ELASTIC_APIKEY_ID = 'default1'; expect(getEnv('elasticApiId', { dataSource: 'default' })).toEqual('default1'); diff --git a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts index f17654484b024..5c5484baec9e5 100644 --- a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts @@ -1,6 +1,6 @@ import R from 'ramda'; -import { parseSqlInterval } from '@cubejs-backend/shared'; +import { getEnv, parseSqlInterval } from '@cubejs-backend/shared'; import { BaseQuery } from './BaseQuery'; import { BaseFilter } from './BaseFilter'; import { UserError } from '../compiler/UserError'; @@ -192,11 +192,13 @@ export class ClickHouseQuery extends BaseQuery { return ''; } + const collation = getEnv('clickhouseSortCollation') || 'en'; + const orderByString = R.pipe( R.map((order) => { let orderString = this.orderHashToString(order); if (this.getFieldType(order) === 'string') { - orderString = `${orderString} COLLATE 'en'`; + orderString = `${orderString} COLLATE '${collation}'`; } return orderString; }), From 8807178790afe8cb2f046d9478a45e54b508c2d1 Mon Sep 17 00:00:00 2001 From: Engin Al Date: Tue, 28 Jan 2025 16:56:52 +0300 Subject: [PATCH 08/15] Only use collation for clickhouse if env variable is set Co-authored-by: Konstantin Burkalev --- .../cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts index 5c5484baec9e5..017dde1ad2c93 100644 --- a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts @@ -192,12 +192,12 @@ export class ClickHouseQuery extends BaseQuery { return ''; } - const collation = getEnv('clickhouseSortCollation') || 'en'; + const collation = getEnv('clickhouseSortCollation'); const orderByString = R.pipe( R.map((order) => { let orderString = this.orderHashToString(order); - if (this.getFieldType(order) === 'string') { + if (this.getFieldType(order) === 'string') && collation !== '' { orderString = `${orderString} COLLATE '${collation}'`; } return orderString; From be6875bce6f1b7c2be0e8aea0cc83c07076b7c22 Mon Sep 17 00:00:00 2001 From: Engin Al Date: Thu, 6 Feb 2025 04:24:10 +0300 Subject: [PATCH 09/15] Add CUBEJS_DB_CLICKHOUSE_USE_COLLATION env var to disable collation, add data_type to AliasedColumn and TemplateColumn to be able to use the data type in the templates --- packages/cubejs-backend-shared/src/env.ts | 35 +++++++++++++++- .../test/db_env_multi.test.ts | 42 +++++++++++++++++++ .../test/db_env_single.test.ts | 28 +++++++++++++ .../src/adapter/ClickHouseQuery.ts | 28 ++++++++++++- .../cubesql/src/compile/engine/df/wrapper.rs | 38 +++++++++++++++-- rust/cubesql/cubesql/src/transport/service.rs | 6 ++- 6 files changed, 169 insertions(+), 8 deletions(-) diff --git a/packages/cubejs-backend-shared/src/env.ts b/packages/cubejs-backend-shared/src/env.ts index 3f4857459433e..ffc75621d2eb0 100644 --- a/packages/cubejs-backend-shared/src/env.ts +++ b/packages/cubejs-backend-shared/src/env.ts @@ -1147,10 +1147,43 @@ const variables: Record any> = { dataSource: string, }) => ( process.env[ - keyByDataSource('CUBEJS_DB_CLICKHOUSE_SORT_COLLATION', dataSource) + keyByDataSource('CUBEJS_DB_CLICKHOUSE_SORT_COLLATION', dataSource) || 'en' ] ), + /** + * Clickhouse use collation flag. + */ + + clickhouseUseCollation: ({ dataSource }: { dataSource: string }) => { + const val = process.env[ + keyByDataSource( + 'CUBEJS_DB_CLICKHOUSE_USE_COLLATION', + dataSource, + ) + ]; + + if (val) { + if (val.toLocaleLowerCase() === 'true') { + return true; + } else if (val.toLowerCase() === 'false') { + return false; + } else { + throw new TypeError( + `The ${ + keyByDataSource( + 'CUBEJS_DB_CLICKHOUSE_USE_COLLATION', + dataSource, + ) + } must be either 'true' or 'false'.` + ); + } + } else { + // Default to true + return true; + } + }, + /** **************************************************************** * ElasticSearch Driver * ***************************************************************** */ diff --git a/packages/cubejs-backend-shared/test/db_env_multi.test.ts b/packages/cubejs-backend-shared/test/db_env_multi.test.ts index 333b25c2ef667..f7f5b12221c68 100644 --- a/packages/cubejs-backend-shared/test/db_env_multi.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_multi.test.ts @@ -1568,6 +1568,48 @@ describe('Multiple datasources', () => { ); }); + test('getEnv("clickhouseUseCollation")', () => { + process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'true'; + process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION = 'true'; + process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION = 'true'; + expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true); + expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'false'; + process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION = 'false'; + process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION = 'false'; + expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(false); + expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(false); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'wrong'; + process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION = 'wrong'; + process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION = 'wrong'; + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'default' })).toThrow( + 'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.' + ); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toThrow( + 'The CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.' + ); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + delete process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION; + delete process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION; + delete process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION; + expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true); + expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + }); + test('getEnv("elasticApiId")', () => { process.env.CUBEJS_DB_ELASTIC_APIKEY_ID = 'default1'; process.env.CUBEJS_DS_POSTGRES_DB_ELASTIC_APIKEY_ID = 'postgres1'; diff --git a/packages/cubejs-backend-shared/test/db_env_single.test.ts b/packages/cubejs-backend-shared/test/db_env_single.test.ts index ce92377a7be4d..c612ea77f7e19 100644 --- a/packages/cubejs-backend-shared/test/db_env_single.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_single.test.ts @@ -992,6 +992,34 @@ describe('Single datasources', () => { expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toBeUndefined(); }); + test('getEnv("clickhouseUseCollation")', () => { + process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'true'; + expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true); + expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true); + expect(getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toEqual(true); + + process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'false'; + expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(false); + expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(false); + expect(getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toEqual(false); + + process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'wrong'; + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'default' })).toThrow( + 'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.' + ); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toThrow( + 'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.' + ); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow( + 'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.' + ); + + delete process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION; + expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true); + expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true); + expect(getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toEqual(true); + }); + test('getEnv("elasticApiId")', () => { process.env.CUBEJS_DB_ELASTIC_APIKEY_ID = 'default1'; expect(getEnv('elasticApiId', { dataSource: 'default' })).toEqual('default1'); diff --git a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts index 017dde1ad2c93..e97a12b5fa099 100644 --- a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts @@ -184,6 +184,14 @@ export class ClickHouseQuery extends BaseQuery { return `${fieldAlias} ${direction}`; } + public getCollation() { + const useCollation = getEnv('clickhouseUseCollation', { dataSource: this.dataSource }); + if (useCollation) { + return getEnv('clickhouseSortCollation', { dataSource: this.dataSource }); + } + return null; + } + public override orderBy() { // // ClickHouse orders string by bytes, so we need to use COLLATE 'en' to order by string @@ -192,12 +200,12 @@ export class ClickHouseQuery extends BaseQuery { return ''; } - const collation = getEnv('clickhouseSortCollation'); + const collation = this.getCollation(); const orderByString = R.pipe( R.map((order) => { let orderString = this.orderHashToString(order); - if (this.getFieldType(order) === 'string') && collation !== '' { + if (collation && this.getFieldType(order) === 'string') { orderString = `${orderString} COLLATE '${collation}'`; } return orderString; @@ -326,6 +334,22 @@ export class ClickHouseQuery extends BaseQuery { // ClickHouse intervals have a distinct type for each granularity delete templates.types.interval; delete templates.types.binary; + + const collation = this.getCollation(); + + if (collation) { + templates.expressions.sort = `${templates.expressions.sort}{% if data_type and data_type == 'string' %} COLLATE '${collation}'{% endif %}`; + templates.expressions.order_by = `${templates.expressions.order_by}{% if data_type and data_type == 'string' %} COLLATE '${collation}'{% endif %}`; + + const oldOrderBy = '{% if order_by %}\nORDER BY {{ order_by | map(attribute=\'expr\') | join(\', \') }}{% endif %}'; + + const newOrderBy = + '{% if order_by %}\nORDER BY {% for item in order_by %}{{ item.expr }}' + + `{%- if item.data_type and item.data_type == 'string' %} COLLATE '${collation}'{% endif %}` + + '{%- if not loop.last %}, {% endif %}{% endfor %}{% endif %}'; + + templates.statements.select = templates.statements.select.replace(oldOrderBy, newOrderBy); + } return templates; } } diff --git a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs index 28ff55e6be59c..5423883f5b8ea 100644 --- a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs @@ -23,8 +23,8 @@ use cubeclient::models::{V1LoadRequestQuery, V1LoadRequestQueryJoinSubquery}; use datafusion::{ error::{DataFusionError, Result}, logical_plan::{ - plan::Extension, replace_col, Column, DFSchema, DFSchemaRef, Expr, GroupingSet, JoinType, - LogicalPlan, UserDefinedLogicalNode, + plan::Extension, replace_col, Column, DFSchema, DFSchemaRef, Expr, ExprSchemable, + GroupingSet, JoinType, LogicalPlan, UserDefinedLogicalNode, }, physical_plan::{aggregates::AggregateFunction, functions::BuiltinScalarFunction}, scalar::ScalarValue, @@ -797,6 +797,12 @@ impl CubeScanWrapperNode { // When generating column expression that points to literal member it would render literal and generate alias // Here it should just generate the literal // 2. It would not allow to provide aliases for expressions, instead it usually generates them + + let data_type = expr + .get_type(&node.schema) + .and_then(|dt| Self::generate_sql_type(generator.clone(), dt)) + .unwrap_or_else(|_| "".to_string()); + let (expr, sql) = Self::generate_sql_for_expr( plan.clone(), new_sql, @@ -806,7 +812,11 @@ impl CubeScanWrapperNode { Arc::new(HashMap::new()), ) .await?; - columns.push(AliasedColumn { expr, alias }); + columns.push(AliasedColumn { + expr, + alias, + data_type, + }); new_sql = sql; } @@ -1221,6 +1231,13 @@ impl CubeScanWrapperNode { let join_condition = join_condition[0].expr.clone(); sql = new_sql; + let data_type = condition + .get_type(&schema) + .and_then(|dt| { + Self::generate_sql_type(generator.clone(), dt) + }) + .unwrap_or_else(|_| "".to_string()); + let join_sql_expression = { // TODO this is NOT a proper way to generate member expr here // TODO Do we even want a full-blown member expression here? or arguments + expr will be enough? @@ -1228,6 +1245,7 @@ impl CubeScanWrapperNode { &AliasedColumn { expr: join_condition, alias: "__join__alias__unused".to_string(), + data_type, }, &ungrouped_scan_node.used_cubes, )?; @@ -1518,6 +1536,13 @@ impl CubeScanWrapperNode { } else { original_expr.clone() }; + //let data_type = expr.get_type(&schema.clone())?; + //let data_type = Self::generate_sql_type(generator.clone(), data_type.clone())?; + let data_type = expr + .get_type(&schema) + .and_then(|dt| Self::generate_sql_type(generator.clone(), dt)) + .unwrap_or_else(|_| "".to_string()); + let (expr_sql, new_sql_query) = Self::generate_sql_for_expr( plan.clone(), sql, @@ -1535,6 +1560,7 @@ impl CubeScanWrapperNode { aliased_columns.push(AliasedColumn { expr: expr_sql, alias, + data_type, }); } Ok((aliased_columns, sql)) @@ -2040,6 +2066,10 @@ impl CubeScanWrapperNode { asc, nulls_first, } => { + let data_type = expr + .get_type(plan.schema()) + .and_then(|dt| Self::generate_sql_type(sql_generator.clone(), dt)) + .unwrap_or_else(|_| "".to_string()); let (expr, sql_query) = Self::generate_sql_for_expr( plan.clone(), sql_query, @@ -2051,7 +2081,7 @@ impl CubeScanWrapperNode { .await?; let resulting_sql = sql_generator .get_sql_templates() - .sort_expr(expr, asc, nulls_first) + .sort_expr(expr, asc, nulls_first, data_type) .map_err(|e| { DataFusionError::Internal(format!( "Can't generate SQL for sort expr: {}", diff --git a/rust/cubesql/cubesql/src/transport/service.rs b/rust/cubesql/cubesql/src/transport/service.rs index 85d9d270910f9..7805a5dd9a82c 100644 --- a/rust/cubesql/cubesql/src/transport/service.rs +++ b/rust/cubesql/cubesql/src/transport/service.rs @@ -349,6 +349,7 @@ pub struct SqlTemplates { pub struct AliasedColumn { pub expr: String, pub alias: String, + pub data_type: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -357,6 +358,7 @@ pub struct TemplateColumn { pub alias: String, pub aliased: String, pub index: usize, + pub data_type: String, } impl SqlTemplates { @@ -498,6 +500,7 @@ impl SqlTemplates { alias: c.alias.to_string(), aliased: self.alias_expr(&c.expr, &c.alias)?, index: i + 1, + data_type: c.data_type, }) }) .collect::, _>>() @@ -708,10 +711,11 @@ impl SqlTemplates { expr: String, asc: bool, nulls_first: bool, + data_type: String, ) -> Result { self.render_template( "expressions/sort", - context! { expr => expr, asc => asc, nulls_first => nulls_first }, + context! { expr => expr, asc => asc, nulls_first => nulls_first, data_type => data_type }, ) } From 4fc9ed497bb0025b01f088522b78810e2946b0d3 Mon Sep 17 00:00:00 2001 From: Engin Al Date: Thu, 6 Feb 2025 04:43:08 +0300 Subject: [PATCH 10/15] Fix default clickhouseSortCollection value --- packages/cubejs-backend-shared/src/env.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/cubejs-backend-shared/src/env.ts b/packages/cubejs-backend-shared/src/env.ts index ffc75621d2eb0..2acc148b9bcd4 100644 --- a/packages/cubejs-backend-shared/src/env.ts +++ b/packages/cubejs-backend-shared/src/env.ts @@ -1147,8 +1147,8 @@ const variables: Record any> = { dataSource: string, }) => ( process.env[ - keyByDataSource('CUBEJS_DB_CLICKHOUSE_SORT_COLLATION', dataSource) || 'en' - ] + keyByDataSource('CUBEJS_DB_CLICKHOUSE_SORT_COLLATION', dataSource) + ] || 'en' ), /** From b44e57e5beda0326f8538a765f7b599e0cae74ac Mon Sep 17 00:00:00 2001 From: Engin Al Date: Thu, 6 Feb 2025 04:56:51 +0300 Subject: [PATCH 11/15] Use ILIKE instead of LIKE with lowerUTF8 and toValidUTF8 --- packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts index e97a12b5fa099..338af8d274859 100644 --- a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts @@ -20,7 +20,7 @@ class ClickHouseFilter extends BaseFilter { public likeIgnoreCase(column, not, param, type) { const p = (!type || type === 'contains' || type === 'ends') ? '%' : ''; const s = (!type || type === 'contains' || type === 'starts') ? '%' : ''; - return `lowerUTF8(toValidUTF8(${column})) ${not ? 'NOT' : ''} LIKE CONCAT('${p}', lowerUTF8(toValidUTF8(${this.allocateParam(param)})), '${s}')`; + return `${column} ${not ? 'NOT' : ''} ILIKE CONCAT('${p}', ${this.allocateParam(param)}, '${s}')`; } public castParameter() { From ce3ec561848b4f3a73f1ae672eb8c0a5ccb32ca5 Mon Sep 17 00:00:00 2001 From: Engin Al Date: Thu, 6 Feb 2025 18:42:20 +0300 Subject: [PATCH 12/15] Fix clickhouseUseCollation tests to check for default value --- packages/cubejs-backend-shared/test/db_env_multi.test.ts | 4 ++-- packages/cubejs-backend-shared/test/db_env_single.test.ts | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/cubejs-backend-shared/test/db_env_multi.test.ts b/packages/cubejs-backend-shared/test/db_env_multi.test.ts index f7f5b12221c68..cb2081a46ce6d 100644 --- a/packages/cubejs-backend-shared/test/db_env_multi.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_multi.test.ts @@ -1561,8 +1561,8 @@ describe('Multiple datasources', () => { delete process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION; delete process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_SORT_COLLATION; delete process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_SORT_COLLATION; - expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toBeUndefined(); - expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toBeUndefined(); + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('en'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('en'); expect(() => getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toThrow( 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' ); diff --git a/packages/cubejs-backend-shared/test/db_env_single.test.ts b/packages/cubejs-backend-shared/test/db_env_single.test.ts index c612ea77f7e19..b7d70e264598a 100644 --- a/packages/cubejs-backend-shared/test/db_env_single.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_single.test.ts @@ -987,9 +987,9 @@ describe('Single datasources', () => { expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toEqual('default2'); delete process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION; - expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toBeUndefined(); - expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toBeUndefined(); - expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toBeUndefined(); + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('en'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('en'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toEqual('en'); }); test('getEnv("clickhouseUseCollation")', () => { From b649df26d8e933661320e2e29d7f63eb94bc50a5 Mon Sep 17 00:00:00 2001 From: Engin Al Date: Thu, 6 Feb 2025 19:39:46 +0300 Subject: [PATCH 13/15] Fix clickhouse integration tests for the missing values --- .../clickhouse-dataschema-compiler.test.ts | 2 +- .../clickhouse-graph-builder.test.ts | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts index 104e997130194..fdb9bd04a6be0 100644 --- a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts @@ -414,7 +414,7 @@ describe('ClickHouse DataSchemaCompiler', () => { const sqlAndParams = query.buildSqlAndParams(); const res = await dbRunner.testQuery(sqlAndParams); const sql = sqlAndParams[0]; - expect(sql).toMatch('ORDER BY `visitors__source` COLLATE \'en\''); + expect(sql).toMatch('ORDER BY `visitors__source` ASC COLLATE \'en\''); expect(res).toEqual([{ visitors__source: 'google' }, { visitors__source: 'Gork' }, { visitors__source: 'some' }]); }); diff --git a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-graph-builder.test.ts b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-graph-builder.test.ts index a18ad1b27b5da..b93fda8368915 100644 --- a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-graph-builder.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-graph-builder.test.ts @@ -340,6 +340,13 @@ describe('ClickHouse JoinGraph', () => { visitors__visitor_count: '2', visitor_checkins__visitor_checkins_count: '0', visitors__per_visitor_revenue: null + }, + { + visitors__created_at_day: '2017-01-07T00:00:00.000', + visitors__visitor_revenue: null, + visitors__visitor_count: '1', + visitor_checkins__visitor_checkins_count: '0', + visitors__per_visitor_revenue: null } ] ); @@ -574,7 +581,8 @@ describe('ClickHouse JoinGraph', () => { { visitors__created_at_sql_utils_day: '2017-01-02T00:00:00.000', visitors__visitor_count: '1' }, { visitors__created_at_sql_utils_day: '2017-01-04T00:00:00.000', visitors__visitor_count: '1' }, { visitors__created_at_sql_utils_day: '2017-01-05T00:00:00.000', visitors__visitor_count: '1' }, - { visitors__created_at_sql_utils_day: '2017-01-06T00:00:00.000', visitors__visitor_count: '2' } + { visitors__created_at_sql_utils_day: '2017-01-06T00:00:00.000', visitors__visitor_count: '2' }, + { visitors__created_at_sql_utils_day: '2017-01-07T00:00:00.000', visitors__visitor_count: '1' } ])); it('running total total', () => runQueryTest({ @@ -591,7 +599,7 @@ describe('ClickHouse JoinGraph', () => { timezone: 'America/Los_Angeles' }, [ { - visitors__revenue_running: '1500' + visitors__revenue_running: '1800' } ])); @@ -885,6 +893,10 @@ describe('ClickHouse JoinGraph', () => { debugLog(JSON.stringify(res)); expect(res).toEqual( [{ + visitors__checkins: '0', + visitors__created_at_day: '2017-01-07T00:00:00.000', + visitors__visitor_count: '1' + }, { visitors__checkins: '0', visitors__created_at_day: '2017-01-06T00:00:00.000', visitors__visitor_count: '2' @@ -1283,7 +1295,7 @@ describe('ClickHouse JoinGraph', () => { }, { visitors__created_at_year: '2017-01-01T00:00:00.000', - visitors__visitor_count: '5' + visitors__visitor_count: '6' } ])); }); From 79ab001143fe7c1d5aa770729014bf3d476c82ab Mon Sep 17 00:00:00 2001 From: Engin Al Date: Thu, 6 Feb 2025 20:30:26 +0000 Subject: [PATCH 14/15] Assert collation in the 'collation in order by' clickhouse integration test --- packages/cubejs-backend-shared/src/env.ts | 17 +++++++++-------- .../clickhouse-dataschema-compiler.test.ts | 3 +++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/packages/cubejs-backend-shared/src/env.ts b/packages/cubejs-backend-shared/src/env.ts index 2acc148b9bcd4..c815203787b53 100644 --- a/packages/cubejs-backend-shared/src/env.ts +++ b/packages/cubejs-backend-shared/src/env.ts @@ -1141,15 +1141,16 @@ const variables: Record any> = { /** * ClickHouse sort collation. */ - clickhouseSortCollation: ({ - dataSource - }: { - dataSource: string, - }) => ( - process.env[ + clickhouseSortCollation: ({ dataSource }: {dataSource: string }) => { + const val = process.env[ keyByDataSource('CUBEJS_DB_CLICKHOUSE_SORT_COLLATION', dataSource) - ] || 'en' - ), + ]; + if (!val) { + // Default to 'en' collation + return 'en'; + } + return val; + }, /** * Clickhouse use collation flag. diff --git a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts index fdb9bd04a6be0..bf8d3e86e373d 100644 --- a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts @@ -411,6 +411,9 @@ describe('ClickHouse DataSchemaCompiler', () => { }); logSqlAndParams(query); + const collation = query.getCollation(); + expect(collation).toEqual('en'); + const sqlAndParams = query.buildSqlAndParams(); const res = await dbRunner.testQuery(sqlAndParams); const sql = sqlAndParams[0]; From beac44994cdb124ae15020bf8f5e0d20f03d8320 Mon Sep 17 00:00:00 2001 From: Engin Al Date: Tue, 11 Feb 2025 21:57:36 +0300 Subject: [PATCH 15/15] Fix getFieldType by using hash.id instead --- .../cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts | 8 ++++++-- .../clickhouse/clickhouse-dataschema-compiler.test.ts | 5 +---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts index 338af8d274859..fbfd40eb59a59 100644 --- a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts @@ -155,8 +155,12 @@ export class ClickHouseQuery extends BaseQuery { return null; } - public getFieldType(id) { - const field = this.getField(id); + public getFieldType(hash) { + if (!hash || !hash.id) { + return null; + } + + const field = this.getField(hash.id); if (field) { return field.definition().type; diff --git a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts index bf8d3e86e373d..50e5baa562b06 100644 --- a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts @@ -411,15 +411,12 @@ describe('ClickHouse DataSchemaCompiler', () => { }); logSqlAndParams(query); - const collation = query.getCollation(); - expect(collation).toEqual('en'); - const sqlAndParams = query.buildSqlAndParams(); const res = await dbRunner.testQuery(sqlAndParams); const sql = sqlAndParams[0]; expect(sql).toMatch('ORDER BY `visitors__source` ASC COLLATE \'en\''); - expect(res).toEqual([{ visitors__source: 'google' }, { visitors__source: 'Gork' }, { visitors__source: 'some' }]); + expect(res).toEqual([{ visitors__source: 'google' }, { visitors__source: 'Gork' }, { visitors__source: 'some' }, { visitors__source: null }]); }); it('export import', () => {