diff --git a/packages/cubejs-backend-shared/src/env.ts b/packages/cubejs-backend-shared/src/env.ts index f6df8bd4bf933..85a177ca54fee 100644 --- a/packages/cubejs-backend-shared/src/env.ts +++ b/packages/cubejs-backend-shared/src/env.ts @@ -1169,6 +1169,53 @@ const variables: Record any> = { ] ), + /** + * ClickHouse sort collation. + */ + clickhouseSortCollation: ({ dataSource }: {dataSource: string }) => { + const val = process.env[ + keyByDataSource('CUBEJS_DB_CLICKHOUSE_SORT_COLLATION', dataSource) + ]; + if (!val) { + // Default to 'en' collation + return 'en'; + } + return val; + }, + + /** + * Clickhouse use collation flag. + */ + + clickhouseUseCollation: ({ dataSource }: { dataSource: string }) => { + const val = process.env[ + keyByDataSource( + 'CUBEJS_DB_CLICKHOUSE_USE_COLLATION', + dataSource, + ) + ]; + + if (val) { + if (val.toLocaleLowerCase() === 'true') { + return true; + } else if (val.toLowerCase() === 'false') { + return false; + } else { + throw new TypeError( + `The ${ + keyByDataSource( + 'CUBEJS_DB_CLICKHOUSE_USE_COLLATION', + dataSource, + ) + } must be either 'true' or 'false'.` + ); + } + } else { + // Default to true + return true; + } + }, + /** **************************************************************** * ElasticSearch Driver * ***************************************************************** */ diff --git a/packages/cubejs-backend-shared/test/db_env_multi.test.ts b/packages/cubejs-backend-shared/test/db_env_multi.test.ts index b37ce923c20c4..cb2081a46ce6d 100644 --- a/packages/cubejs-backend-shared/test/db_env_multi.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_multi.test.ts @@ -1539,6 +1539,77 @@ describe('Multiple datasources', () => { ); }); + test('getEnv("clickhouseSortCollation")', () => { + process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION = 'default1'; + process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_SORT_COLLATION = 'postgres1'; + process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_SORT_COLLATION = 'wrong1'; + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('postgres1'); + expect(() => getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION = 'default2'; + process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_SORT_COLLATION = 'postgres2'; + process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_SORT_COLLATION = 'wrong2'; + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('postgres2'); + expect(() => getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + delete process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION; + delete process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_SORT_COLLATION; + delete process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_SORT_COLLATION; + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('en'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('en'); + expect(() => getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + }); + + test('getEnv("clickhouseUseCollation")', () => { + process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'true'; + process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION = 'true'; + process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION = 'true'; + expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true); + expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'false'; + process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION = 'false'; + process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION = 'false'; + expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(false); + expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(false); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'wrong'; + process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION = 'wrong'; + process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION = 'wrong'; + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'default' })).toThrow( + 'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.' + ); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toThrow( + 'The CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.' + ); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + + delete process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION; + delete process.env.CUBEJS_DS_POSTGRES_DB_CLICKHOUSE_USE_COLLATION; + delete process.env.CUBEJS_DS_WRONG_DB_CLICKHOUSE_USE_COLLATION; + expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true); + expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow( + 'The wrong data source is missing in the declared CUBEJS_DATASOURCES.' + ); + }); + test('getEnv("elasticApiId")', () => { process.env.CUBEJS_DB_ELASTIC_APIKEY_ID = 'default1'; process.env.CUBEJS_DS_POSTGRES_DB_ELASTIC_APIKEY_ID = 'postgres1'; diff --git a/packages/cubejs-backend-shared/test/db_env_single.test.ts b/packages/cubejs-backend-shared/test/db_env_single.test.ts index 765e94b7c175d..b7d70e264598a 100644 --- a/packages/cubejs-backend-shared/test/db_env_single.test.ts +++ b/packages/cubejs-backend-shared/test/db_env_single.test.ts @@ -975,6 +975,51 @@ describe('Single datasources', () => { expect(getEnv('clickhouseReadOnly', { dataSource: 'wrong' })).toBeUndefined(); }); + test('getEnv("clickhouseSortCollation")', () => { + process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION = 'default1'; + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('default1'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('default1'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toEqual('default1'); + + process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION = 'default2'; + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('default2'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('default2'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toEqual('default2'); + + delete process.env.CUBEJS_DB_CLICKHOUSE_SORT_COLLATION; + expect(getEnv('clickhouseSortCollation', { dataSource: 'default' })).toEqual('en'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'postgres' })).toEqual('en'); + expect(getEnv('clickhouseSortCollation', { dataSource: 'wrong' })).toEqual('en'); + }); + + test('getEnv("clickhouseUseCollation")', () => { + process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'true'; + expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true); + expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true); + expect(getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toEqual(true); + + process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'false'; + expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(false); + expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(false); + expect(getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toEqual(false); + + process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION = 'wrong'; + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'default' })).toThrow( + 'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.' + ); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toThrow( + 'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.' + ); + expect(() => getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toThrow( + 'The CUBEJS_DB_CLICKHOUSE_USE_COLLATION must be either \'true\' or \'false\'.' + ); + + delete process.env.CUBEJS_DB_CLICKHOUSE_USE_COLLATION; + expect(getEnv('clickhouseUseCollation', { dataSource: 'default' })).toEqual(true); + expect(getEnv('clickhouseUseCollation', { dataSource: 'postgres' })).toEqual(true); + expect(getEnv('clickhouseUseCollation', { dataSource: 'wrong' })).toEqual(true); + }); + test('getEnv("elasticApiId")', () => { process.env.CUBEJS_DB_ELASTIC_APIKEY_ID = 'default1'; expect(getEnv('elasticApiId', { dataSource: 'default' })).toEqual('default1'); diff --git a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts index d643f1cdfcd9f..fbfd40eb59a59 100644 --- a/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts +++ b/packages/cubejs-schema-compiler/src/adapter/ClickHouseQuery.ts @@ -1,4 +1,6 @@ -import { parseSqlInterval } from '@cubejs-backend/shared'; +import R from 'ramda'; + +import { getEnv, parseSqlInterval } from '@cubejs-backend/shared'; import { BaseQuery } from './BaseQuery'; import { BaseFilter } from './BaseFilter'; import { UserError } from '../compiler/UserError'; @@ -18,7 +20,7 @@ class ClickHouseFilter extends BaseFilter { public likeIgnoreCase(column, not, param, type) { const p = (!type || type === 'contains' || type === 'ends') ? '%' : ''; const s = (!type || type === 'contains' || type === 'starts') ? '%' : ''; - return `lower(${column}) ${not ? 'NOT' : ''} LIKE CONCAT('${p}', lower(${this.allocateParam(param)}), '${s}')`; + return `${column} ${not ? 'NOT' : ''} ILIKE CONCAT('${p}', ${this.allocateParam(param)}, '${s}')`; } public castParameter() { @@ -123,7 +125,7 @@ export class ClickHouseQuery extends BaseQuery { .join(' AND '); } - public getFieldAlias(id) { + public getField(id) { const equalIgnoreCase = (a, b) => ( typeof a === 'string' && typeof b === 'string' && a.toUpperCase() === b.toUpperCase() ); @@ -134,16 +136,34 @@ export class ClickHouseQuery extends BaseQuery { d => equalIgnoreCase(d.dimension, id), ); + if (!field) { + field = this.measures.find( + d => equalIgnoreCase(d.measure, id) || equalIgnoreCase(d.expressionName, id), + ); + } + + return field; + } + + public getFieldAlias(id) { + const field = this.getField(id); + if (field) { return field.aliasName(); } - field = this.measures.find( - d => equalIgnoreCase(d.measure, id) || equalIgnoreCase(d.expressionName, id), - ); + return null; + } + + public getFieldType(hash) { + if (!hash || !hash.id) { + return null; + } + + const field = this.getField(hash.id); if (field) { - return field.aliasName(); + return field.definition().type; } return null; @@ -168,6 +188,43 @@ export class ClickHouseQuery extends BaseQuery { return `${fieldAlias} ${direction}`; } + public getCollation() { + const useCollation = getEnv('clickhouseUseCollation', { dataSource: this.dataSource }); + if (useCollation) { + return getEnv('clickhouseSortCollation', { dataSource: this.dataSource }); + } + return null; + } + + public override orderBy() { + // + // ClickHouse orders string by bytes, so we need to use COLLATE 'en' to order by string + // + if (R.isEmpty(this.order)) { + return ''; + } + + const collation = this.getCollation(); + + const orderByString = R.pipe( + R.map((order) => { + let orderString = this.orderHashToString(order); + if (collation && this.getFieldType(order) === 'string') { + orderString = `${orderString} COLLATE '${collation}'`; + } + return orderString; + }), + R.reject(R.isNil), + R.join(', ') + )(this.order); + + if (!orderByString) { + return ''; + } + + return ` ORDER BY ${orderByString}`; + } + public groupByClause() { if (this.ungrouped) { return ''; @@ -281,6 +338,22 @@ export class ClickHouseQuery extends BaseQuery { // ClickHouse intervals have a distinct type for each granularity delete templates.types.interval; delete templates.types.binary; + + const collation = this.getCollation(); + + if (collation) { + templates.expressions.sort = `${templates.expressions.sort}{% if data_type and data_type == 'string' %} COLLATE '${collation}'{% endif %}`; + templates.expressions.order_by = `${templates.expressions.order_by}{% if data_type and data_type == 'string' %} COLLATE '${collation}'{% endif %}`; + + const oldOrderBy = '{% if order_by %}\nORDER BY {{ order_by | map(attribute=\'expr\') | join(\', \') }}{% endif %}'; + + const newOrderBy = + '{% if order_by %}\nORDER BY {% for item in order_by %}{{ item.expr }}' + + `{%- if item.data_type and item.data_type == 'string' %} COLLATE '${collation}'{% endif %}` + + '{%- if not loop.last %}, {% endif %}{% endfor %}{% endif %}'; + + templates.statements.select = templates.statements.select.replace(oldOrderBy, newOrderBy); + } return templates; } } diff --git a/packages/cubejs-schema-compiler/test/integration/clickhouse/ClickHouseDbRunner.ts b/packages/cubejs-schema-compiler/test/integration/clickhouse/ClickHouseDbRunner.ts index 296a224446a57..a7e2049c968c7 100644 --- a/packages/cubejs-schema-compiler/test/integration/clickhouse/ClickHouseDbRunner.ts +++ b/packages/cubejs-schema-compiler/test/integration/clickhouse/ClickHouseDbRunner.ts @@ -65,7 +65,8 @@ export class ClickHouseDbRunner extends BaseDbRunner { (3, 300, '2017-01-05 16:00:00', '2017-01-19 16:00:00', 2, 'google', 120.120, 70.60), (4, 400, '2017-01-06 16:00:00', '2017-01-24 16:00:00', 2, null, 120.120, 10.60), (5, 500, '2017-01-06 16:00:00', '2017-01-24 16:00:00', 2, null, 120.120, 58.10), - (6, 500, '2016-09-06 16:00:00', '2016-09-06 16:00:00', 2, null, 120.120, 58.10) + (6, 500, '2016-09-06 16:00:00', '2016-09-06 16:00:00', 2, null, 120.120, 58.10), + (7, 300, '2017-01-07 16:00:00', '2017-01-25 16:00:00', 2, 'Gork', 120.120, 59.60) ` }); await clickHouse.command({ query: ` diff --git a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts index 1d2cc00e649df..50e5baa562b06 100644 --- a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-dataschema-compiler.test.ts @@ -168,7 +168,8 @@ describe('ClickHouse DataSchemaCompiler', () => { { visitors__created_at_day: '2017-01-02T00:00:00.000', visitors__visitor_count: '1' }, { visitors__created_at_day: '2017-01-04T00:00:00.000', visitors__visitor_count: '1' }, { visitors__created_at_day: '2017-01-05T00:00:00.000', visitors__visitor_count: '1' }, - { visitors__created_at_day: '2017-01-06T00:00:00.000', visitors__visitor_count: '2' } + { visitors__created_at_day: '2017-01-06T00:00:00.000', visitors__visitor_count: '2' }, + { visitors__created_at_day: '2017-01-07T00:00:00.000', visitors__visitor_count: '1' } ] ); }); @@ -229,7 +230,7 @@ describe('ClickHouse DataSchemaCompiler', () => { expect(res).toEqual( [ { visitors__status: 'Approved', visitors__visitor_count: '2' }, - { visitors__status: 'Canceled', visitors__visitor_count: '4' } + { visitors__status: 'Canceled', visitors__visitor_count: '5' } ] ); }); @@ -299,6 +300,7 @@ describe('ClickHouse DataSchemaCompiler', () => { expect(res).toEqual( [ { visitors__enabled_source: 'google', visitors__visitor_count: '1' }, + { visitors__enabled_source: 'Gork', visitors__visitor_count: '1' }, { visitors__enabled_source: 'some', visitors__visitor_count: '2' }, { visitors__enabled_source: null, visitors__visitor_count: '3' }, ] @@ -338,7 +340,8 @@ describe('ClickHouse DataSchemaCompiler', () => { { visitors__created_at: '2016-09-06T16:00:00.000' }, { visitors__created_at: '2017-01-04T16:00:00.000' }, { visitors__created_at: '2017-01-05T16:00:00.000' }, - { visitors__created_at: '2017-01-06T16:00:00.000' } + { visitors__created_at: '2017-01-06T16:00:00.000' }, + { visitors__created_at: '2017-01-07T16:00:00.000' } ], [{ visitors__created_at: '2017-01-06T16:00:00.000' }], [ @@ -347,7 +350,10 @@ describe('ClickHouse DataSchemaCompiler', () => { { visitors__created_at: '2017-01-04T16:00:00.000' }, { visitors__created_at: '2017-01-05T16:00:00.000' } ], - [{ visitors__created_at: '2017-01-06T16:00:00.000' }] + [ + { visitors__created_at: '2017-01-06T16:00:00.000' }, + { visitors__created_at: '2017-01-07T16:00:00.000' } + ] ]; ['in_date_range', 'not_in_date_range', 'on_the_date', 'before_date', 'after_date'].map((operator, index) => { const filterValues = index < 2 ? ['2017-01-01', '2017-01-03'] : ['2017-01-06', '2017-01-06']; @@ -377,6 +383,41 @@ describe('ClickHouse DataSchemaCompiler', () => { return true; }); } + it('collation in order by', async () => { + const { compiler, cubeEvaluator, joinGraph } = testPrepareCompiler(` + cube('visitors', { + sql: \` + select * from visitors + \`, + + dimensions: { + source: { + type: 'string', + sql: 'source' + } + } + }) + `); + await compiler.compile(); + + const query = new ClickHouseQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [], + dimensions: ['visitors.source'], + order: [{ + id: 'visitors.source', + desc: false + }], + timezone: 'America/Los_Angeles' + }); + logSqlAndParams(query); + + const sqlAndParams = query.buildSqlAndParams(); + const res = await dbRunner.testQuery(sqlAndParams); + const sql = sqlAndParams[0]; + expect(sql).toMatch('ORDER BY `visitors__source` ASC COLLATE \'en\''); + + expect(res).toEqual([{ visitors__source: 'google' }, { visitors__source: 'Gork' }, { visitors__source: 'some' }, { visitors__source: null }]); + }); it('export import', () => { const { compiler, cubeEvaluator, joinGraph } = prepareCompiler({ diff --git a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-graph-builder.test.ts b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-graph-builder.test.ts index 599d764f0cdbf..b93fda8368915 100644 --- a/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-graph-builder.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/clickhouse/clickhouse-graph-builder.test.ts @@ -340,6 +340,13 @@ describe('ClickHouse JoinGraph', () => { visitors__visitor_count: '2', visitor_checkins__visitor_checkins_count: '0', visitors__per_visitor_revenue: null + }, + { + visitors__created_at_day: '2017-01-07T00:00:00.000', + visitors__visitor_revenue: null, + visitors__visitor_count: '1', + visitor_checkins__visitor_checkins_count: '0', + visitors__per_visitor_revenue: null } ] ); @@ -380,9 +387,9 @@ describe('ClickHouse JoinGraph', () => { order: [] }, [{ visitors__visitor_revenue: '300', - visitors__visitor_count: '5', + visitors__visitor_count: '6', visitor_checkins__visitor_checkins_count: '6', - visitors__per_visitor_revenue: '60' + visitors__per_visitor_revenue: '50' }])); // FAILS - need to finish query to override ::timestamptz @@ -574,7 +581,8 @@ describe('ClickHouse JoinGraph', () => { { visitors__created_at_sql_utils_day: '2017-01-02T00:00:00.000', visitors__visitor_count: '1' }, { visitors__created_at_sql_utils_day: '2017-01-04T00:00:00.000', visitors__visitor_count: '1' }, { visitors__created_at_sql_utils_day: '2017-01-05T00:00:00.000', visitors__visitor_count: '1' }, - { visitors__created_at_sql_utils_day: '2017-01-06T00:00:00.000', visitors__visitor_count: '2' } + { visitors__created_at_sql_utils_day: '2017-01-06T00:00:00.000', visitors__visitor_count: '2' }, + { visitors__created_at_sql_utils_day: '2017-01-07T00:00:00.000', visitors__visitor_count: '1' } ])); it('running total total', () => runQueryTest({ @@ -591,7 +599,7 @@ describe('ClickHouse JoinGraph', () => { timezone: 'America/Los_Angeles' }, [ { - visitors__revenue_running: '1500' + visitors__revenue_running: '1800' } ])); @@ -885,6 +893,10 @@ describe('ClickHouse JoinGraph', () => { debugLog(JSON.stringify(res)); expect(res).toEqual( [{ + visitors__checkins: '0', + visitors__created_at_day: '2017-01-07T00:00:00.000', + visitors__visitor_count: '1' + }, { visitors__checkins: '0', visitors__created_at_day: '2017-01-06T00:00:00.000', visitors__visitor_count: '2' @@ -1222,6 +1234,7 @@ describe('ClickHouse JoinGraph', () => { { visitors__location: '120.12,40.6' }, { visitors__location: '120.12,58.1' }, { visitors__location: '120.12,58.6' }, + { visitors__location: '120.12,59.6' }, { visitors__location: '120.12,70.6' } ])); @@ -1282,7 +1295,7 @@ describe('ClickHouse JoinGraph', () => { }, { visitors__created_at_year: '2017-01-01T00:00:00.000', - visitors__visitor_count: '5' + visitors__visitor_count: '6' } ])); }); diff --git a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs index 28ff55e6be59c..5423883f5b8ea 100644 --- a/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs @@ -23,8 +23,8 @@ use cubeclient::models::{V1LoadRequestQuery, V1LoadRequestQueryJoinSubquery}; use datafusion::{ error::{DataFusionError, Result}, logical_plan::{ - plan::Extension, replace_col, Column, DFSchema, DFSchemaRef, Expr, GroupingSet, JoinType, - LogicalPlan, UserDefinedLogicalNode, + plan::Extension, replace_col, Column, DFSchema, DFSchemaRef, Expr, ExprSchemable, + GroupingSet, JoinType, LogicalPlan, UserDefinedLogicalNode, }, physical_plan::{aggregates::AggregateFunction, functions::BuiltinScalarFunction}, scalar::ScalarValue, @@ -797,6 +797,12 @@ impl CubeScanWrapperNode { // When generating column expression that points to literal member it would render literal and generate alias // Here it should just generate the literal // 2. It would not allow to provide aliases for expressions, instead it usually generates them + + let data_type = expr + .get_type(&node.schema) + .and_then(|dt| Self::generate_sql_type(generator.clone(), dt)) + .unwrap_or_else(|_| "".to_string()); + let (expr, sql) = Self::generate_sql_for_expr( plan.clone(), new_sql, @@ -806,7 +812,11 @@ impl CubeScanWrapperNode { Arc::new(HashMap::new()), ) .await?; - columns.push(AliasedColumn { expr, alias }); + columns.push(AliasedColumn { + expr, + alias, + data_type, + }); new_sql = sql; } @@ -1221,6 +1231,13 @@ impl CubeScanWrapperNode { let join_condition = join_condition[0].expr.clone(); sql = new_sql; + let data_type = condition + .get_type(&schema) + .and_then(|dt| { + Self::generate_sql_type(generator.clone(), dt) + }) + .unwrap_or_else(|_| "".to_string()); + let join_sql_expression = { // TODO this is NOT a proper way to generate member expr here // TODO Do we even want a full-blown member expression here? or arguments + expr will be enough? @@ -1228,6 +1245,7 @@ impl CubeScanWrapperNode { &AliasedColumn { expr: join_condition, alias: "__join__alias__unused".to_string(), + data_type, }, &ungrouped_scan_node.used_cubes, )?; @@ -1518,6 +1536,13 @@ impl CubeScanWrapperNode { } else { original_expr.clone() }; + //let data_type = expr.get_type(&schema.clone())?; + //let data_type = Self::generate_sql_type(generator.clone(), data_type.clone())?; + let data_type = expr + .get_type(&schema) + .and_then(|dt| Self::generate_sql_type(generator.clone(), dt)) + .unwrap_or_else(|_| "".to_string()); + let (expr_sql, new_sql_query) = Self::generate_sql_for_expr( plan.clone(), sql, @@ -1535,6 +1560,7 @@ impl CubeScanWrapperNode { aliased_columns.push(AliasedColumn { expr: expr_sql, alias, + data_type, }); } Ok((aliased_columns, sql)) @@ -2040,6 +2066,10 @@ impl CubeScanWrapperNode { asc, nulls_first, } => { + let data_type = expr + .get_type(plan.schema()) + .and_then(|dt| Self::generate_sql_type(sql_generator.clone(), dt)) + .unwrap_or_else(|_| "".to_string()); let (expr, sql_query) = Self::generate_sql_for_expr( plan.clone(), sql_query, @@ -2051,7 +2081,7 @@ impl CubeScanWrapperNode { .await?; let resulting_sql = sql_generator .get_sql_templates() - .sort_expr(expr, asc, nulls_first) + .sort_expr(expr, asc, nulls_first, data_type) .map_err(|e| { DataFusionError::Internal(format!( "Can't generate SQL for sort expr: {}", diff --git a/rust/cubesql/cubesql/src/transport/service.rs b/rust/cubesql/cubesql/src/transport/service.rs index 85d9d270910f9..7805a5dd9a82c 100644 --- a/rust/cubesql/cubesql/src/transport/service.rs +++ b/rust/cubesql/cubesql/src/transport/service.rs @@ -349,6 +349,7 @@ pub struct SqlTemplates { pub struct AliasedColumn { pub expr: String, pub alias: String, + pub data_type: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -357,6 +358,7 @@ pub struct TemplateColumn { pub alias: String, pub aliased: String, pub index: usize, + pub data_type: String, } impl SqlTemplates { @@ -498,6 +500,7 @@ impl SqlTemplates { alias: c.alias.to_string(), aliased: self.alias_expr(&c.expr, &c.alias)?, index: i + 1, + data_type: c.data_type, }) }) .collect::, _>>() @@ -708,10 +711,11 @@ impl SqlTemplates { expr: String, asc: bool, nulls_first: bool, + data_type: String, ) -> Result { self.render_template( "expressions/sort", - context! { expr => expr, asc => asc, nulls_first => nulls_first }, + context! { expr => expr, asc => asc, nulls_first => nulls_first, data_type => data_type }, ) }