Skip to content

Commit

Permalink
Merge pull request #3 from troshnev/utf8_tests
Browse files Browse the repository at this point in the history
[fix] fixing ParameterNameSegment and adding new tests
  • Loading branch information
troshnev authored Sep 25, 2024
2 parents 0d4ba62 + 145b3e8 commit d7e51a4
Show file tree
Hide file tree
Showing 5 changed files with 823 additions and 25 deletions.
7 changes: 4 additions & 3 deletions src/sqlfluff/dialects/dialect_vertica.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,10 +519,11 @@
)
),
ParameterNameSegment=RegexParser(
#need to cover cases where non-ascii word is parameter like ALTER TABLE some_table TO utf8_identifier_eg_Verkäufer;
# need to cover cases where non-ascii word is parameter
# like ```ALTER TABLE some_table TO utf8_identifier_eg_Verkäufer;```
r"[\p{L}_][\p{L}\p{N}$_]*",
sqlfluff.core.parser.CodeSegment,
type="parameter"
CodeSegment,
type="parameter",
),
)

Expand Down
45 changes: 45 additions & 0 deletions test/fixtures/dialects/vertica/select.sql
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,48 @@ SELECT DISTINCT customer_name
FROM customer_dimension
WHERE customer_region = 'East'
AND customer_name ILIKE 'Amer%';

/* https://docs.vertica.com/24.3.x/en/sql-reference/language-elements/identifiers/
* Unquoted SQL identifiers must begin with one of the following:
* * Non-Unicode letters: A–Z or a-z
* -- /actually Vertica accepts also non-ASCII UTF-8 Unicode
* characters here, which is not well documented/
* * Underscore (_)
* Subsequent characters in an identifier can be any combination of
* the following:
* * Non-Unicode letters: A–Z or a-z
* * Underscore (_)
* * Digits(0–9)
* * Unicode letters (letters with diacriticals or not in the Latin
* alphabet), unsupported for model names
* * Dollar sign ($), unsupported for model names
*
* Vertica accepts **non-ASCII UTF-8 Unicode characters** for table
* names, column names, and other identifiers,
* extending the cases where upper/lower case distinctions are
* ignored (case-folded) to all alphabets,
* including Latin, Cyrillic, and Greek.
*/
-- unqouted identifiers
SELECT * FROM public.sales;

SELECT * FROM public.sales1;
SELECT * FROM public.sales_;
SELECT * FROM public.s$ales$;
SELECT * FROM public._sales;
SELECT * FROM public._1234sales;

SELECT * FROM public1.sales;
SELECT * FROM public_.sales;
SELECT * FROM p$ublic$.sales;
SELECT * FROM _public.sales;
SELECT * FROM _1234public.sales;

SELECT * FROM public1.sales1;
SELECT * FROM public1_.sales1_;
SELECT * FROM p$ublic1_$.s$ales1_$;


-- quoted identifiers
SELECT * FROM "12public"."12344gr";
SELECT * FROM "_1234public"."_1234sales";
290 changes: 289 additions & 1 deletion test/fixtures/dialects/vertica/select.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: 296aff357834f41b502939251b9c5574cbfe4c2e96ad50e6d2ca5f83dc0d21b0
_hash: 81dff6951792526333c0948e979cdfd183bbd58721afe5f4c607b3f816f1ada0
file:
- statement:
select_statement:
Expand Down Expand Up @@ -724,3 +724,291 @@ file:
- keyword: ILIKE
- quoted_literal: "'Amer%'"
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: public
- dot: .
- naked_identifier: sales
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: public
- dot: .
- naked_identifier: sales1
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: public
- dot: .
- naked_identifier: sales_
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: public
- dot: .
- naked_identifier: s$ales$
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: public
- dot: .
- naked_identifier: _sales
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: public
- dot: .
- naked_identifier: _1234sales
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: public1
- dot: .
- naked_identifier: sales
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: public_
- dot: .
- naked_identifier: sales
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: p$ublic$
- dot: .
- naked_identifier: sales
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: _public
- dot: .
- naked_identifier: sales
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: _1234public
- dot: .
- naked_identifier: sales
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: public1
- dot: .
- naked_identifier: sales1
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: public1_
- dot: .
- naked_identifier: sales1_
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- naked_identifier: p$ublic1_$
- dot: .
- naked_identifier: s$ales1_$
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- quoted_identifier: '"12public"'
- dot: .
- quoted_identifier: '"12344gr"'
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
- quoted_identifier: '"_1234public"'
- dot: .
- quoted_identifier: '"_1234sales"'
- statement_terminator: ;
Loading

0 comments on commit d7e51a4

Please sign in to comment.