Skip to content

Commit

Permalink
parser: Allow double quoted identifiers
Browse files Browse the repository at this point in the history
Quoted identifiers can contain arbitrary characters. Double quotes are
escaped as two double quotes. Quoted identifiers is always an identifier,
never a keyword. Quoted identifiers are case sensitive, whereas unquoted
ones are always folded to lower case. See
https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS

For backward compatibility and convenience, as an extension to SQL
standard, double quoted strings are also allowed, like in SQLite. The
SQLite developers think this is a very bad idea, thus support for
double quoted strings may be deprecated in the future, and eventually
removed. See https://www.sqlite.org/quirks.html#double_quoted_string_literals_are_accepted
  • Loading branch information
dnicolodi committed Feb 1, 2025
1 parent a84ba5d commit 8ce1b52
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 4 deletions.
5 changes: 4 additions & 1 deletion beanquery/parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,12 @@ def string(self, value):
def boolean(self, value):
return value == 'TRUE'

def identifier(self, value):
def unquoted_identifier(self, value):
return value.lower()

def quoted_identifier(self, value):
return value.replace('""', '"')

def asterisk(self, value):
return ast.Asterisk()

Expand Down
12 changes: 11 additions & 1 deletion beanquery/parser/bql.ebnf
Original file line number Diff line number Diff line change
Expand Up @@ -321,11 +321,21 @@ list
= '(' &( literal ',') @:','.{ (literal | ()) }+ ')'
;
@name
identifier
=
| unquoted_identifier
| quoted_identifier
;
@name
unquoted_identifier
= /[a-zA-Z_][a-zA-Z0-9_]*/
;
quoted_identifier
= /\"((?:[^\"]|\"\")+)\"/
;
asterisk
= '*'
;
Expand Down
23 changes: 21 additions & 2 deletions beanquery/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -996,7 +996,8 @@ def block1():
)
self._error(
'expecting one of: '
'<identifier> [a-zA-Z_][a-zA-Z0-9_]*'
'<identifier> <quoted_identifier>'
'<unquoted_identifier>'
)

@tatsumasu('Column')
Expand Down Expand Up @@ -1074,10 +1075,28 @@ def block1():
self._token(')')

@tatsumasu()
@isname
def _identifier_(self):
with self._choice():
with self._option():
self._unquoted_identifier_()
with self._option():
self._quoted_identifier_()
self._error(
'expecting one of: '
'<quoted_identifier>'
'<unquoted_identifier> [a-zA-'
'Z_][a-zA-Z0-9_]* \\"((?:[^\\"]|\\"\\")+)\\"'
)

@tatsumasu()
@isname
def _unquoted_identifier_(self):
self._pattern('[a-zA-Z_][a-zA-Z0-9_]*')

@tatsumasu()
def _quoted_identifier_(self):
self._pattern('\\"((?:[^\\"]|\\"\\")+)\\"')

@tatsumasu()
def _asterisk_(self):
self._token('*')
Expand Down
23 changes: 23 additions & 0 deletions beanquery/parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,3 +642,26 @@ def test_text(self):
def test_synthetic(self):
node = ast.And([ast.Constant(False), ast.Constant(True)])
self.assertIsNone(node.text)


class TestIdentifier(unittest.TestCase):

@staticmethod
def parse(string):
return parser.BQLParser().parse(string, start='identifier', semantics=parser.BQLSemantics())

def test_unquoted_identifer(self):
self.assertEqual(self.parse('foo'), 'foo')
# normalization to lower case
self.assertEqual(self.parse('Foo'), 'foo')

def test_quoted_identifier(self):
self.assertEqual(self.parse('"foo"'), 'foo')
self.assertEqual(self.parse('"foo bar"'), 'foo bar')
self.assertEqual(self.parse('"1 + 2"'), '1 + 2')
# no normalization to lower case
self.assertEqual(self.parse('"Foo"'), 'Foo')
# keywords allowed
self.assertEqual(self.parse('"select"'), 'select')
# quoted quotes
self.assertEqual(self.parse('"foo""bar"'), 'foo"bar')

0 comments on commit 8ce1b52

Please sign in to comment.