Better handling of parenthesis inside string literals

When scanning for the matching parenthesis, string literals
were not taken into account, which would inevitably lead to
errors if one wanted to do a selection on a string containing
a parenthesis.

Now it uses a state-machine to skip the ones that are located
inside literals.
This commit is contained in:
Salvo 'LtWorf' Tomaselli 2016-04-09 11:37:28 +02:00
parent 6bc219c635
commit 1b049e13f0
10 changed files with 49 additions and 3 deletions

View File

@ -2,8 +2,8 @@
- Improve error reporting
- Release is now signed with PGP
- Doesn't crash on network errors
- Fixed optimization introduced in 2.2
that did not hold in all cases.
- Fixed optimization introduced in 2.2 that did not hold in all cases
- Better handling of parenthesis inside string literals
2.3
- Very small release. The windows setup now installs the C++ library

View File

@ -291,7 +291,21 @@ def _find_matching_parenthesis(expression, start=0, openpar=u'(', closepar=u')')
close parenthesis to the 1st open parenthesis found
starting from start (0 by default)'''
par_count = 0 # Count of parenthesis
string = False
escape = False
for i in range(start, len(expression)):
if expression[i] == '\'' and not escape:
string = not string
if expression[i] == '\\' and not escape:
escape = True
else:
escape = False
if string:
continue
if expression[i] == openpar:
par_count += 1
elif expression[i] == closepar:
@ -299,6 +313,30 @@ def _find_matching_parenthesis(expression, start=0, openpar=u'(', closepar=u')')
if par_count == 0:
return i # Closing parenthesis of the parameter
def _find_token(haystack, needle):
'''
Like the string function find, but
ignores tokens that are within a string
literal.
'''
r = -1
string = False
escape = False
for i in range(len(haystack)):
if haystack[i] == '\'' and not escape:
string = not string
if haystack[i] == '\\' and not escape:
escape = True
else:
escape = False
if string:
continue
if haystack[i:].startswith(needle):
return i
return r
def tokenize(expression):
'''This function converts a relational expression into a list where
@ -331,7 +369,7 @@ def tokenize(expression):
par = expression.find(
'(', _find_matching_parenthesis(expression))
else: # Expression without parenthesis, so adding what's between start and parenthesis as whole
par = expression.find('(')
par = _find_token(expression, '(')
items.append(expression[:par].strip())
# Inserting parameter of the operator

1
test/par1.query Normal file
View File

@ -0,0 +1 @@
σ name=='(' (people)

1
test/par1.result Normal file
View File

@ -0,0 +1 @@
id,name,chief,age

1
test/par2.query Normal file
View File

@ -0,0 +1 @@
σ (name=='(') (people)

1
test/par2.result Normal file
View File

@ -0,0 +1 @@
id,name,chief,age

1
test/par3.query Normal file
View File

@ -0,0 +1 @@
σ (name==')') (people)

1
test/par3.result Normal file
View File

@ -0,0 +1 @@
id,name,chief,age

1
test/par4.query Normal file
View File

@ -0,0 +1 @@
σ name==')' (people)

1
test/par4.result Normal file
View File

@ -0,0 +1 @@
id,name,chief,age