- Improved tokenizer

- GUI shows unicode errors (i really can't wait for debian to have pyqt in python 3)



git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@339 014f5005-505e-4b48-8d0a-63407b615a7c
This commit is contained in:
LtWorf 2011-11-01 18:47:03 +00:00
parent f6a6947218
commit 48bde3ea84
5 changed files with 30 additions and 23 deletions

View File

@ -1,3 +1,6 @@
1.2
- Better tokenizer, gives more indicative errors
1.1 1.1
- Incorrect relational operations now raise an exception instead of returning None - Incorrect relational operations now raise an exception instead of returning None
- Forces relations to have correct names for attributes - Forces relations to have correct names for attributes

View File

@ -40,7 +40,7 @@
# #
# Language definition here: # Language definition here:
# https://galileo.dmi.unict.it/wiki/relational/doku.php?id=language # https://galileo.dmi.unict.it/wiki/relational/doku.php?id=language
import re
RELATION=0 RELATION=0
UNARY=1 UNARY=1
@ -65,6 +65,9 @@ u_operators=(PROJECTION,SELECTION,RENAME) # List of unary operators
op_functions={PRODUCT:'product',DIFFERENCE:'difference',UNION:'union',INTERSECTION:'intersection',DIVISION:'division',JOIN:'join',JOIN_LEFT:'outer_left',JOIN_RIGHT:'outer_right',JOIN_FULL:'outer',PROJECTION:'projection',SELECTION:'selection',RENAME:'rename'} # Associates operator with python method op_functions={PRODUCT:'product',DIFFERENCE:'difference',UNION:'union',INTERSECTION:'intersection',DIVISION:'division',JOIN:'join',JOIN_LEFT:'outer_left',JOIN_RIGHT:'outer_right',JOIN_FULL:'outer',PROJECTION:'projection',SELECTION:'selection',RENAME:'rename'} # Associates operator with python method
class ParseException (Exception):
pass
class node (object): class node (object):
'''This class is a node of a relational expression. Leaves are relations and internal nodes are operations. '''This class is a node of a relational expression. Leaves are relations and internal nodes are operations.
@ -276,15 +279,18 @@ def tokenize(expression):
''' '''
while len(expression)>0: while len(expression)>0:
if expression.startswith('('): #Parenthesis state if expression.startswith('('): #Parenthesis state
state=2 state=2
end=_find_matching_parenthesis(expression) end=_find_matching_parenthesis(expression)
if end==None:
raise ParseException("Missing matching ')' in '%s'" %expression)
#Appends the tokenization of the content of the parenthesis #Appends the tokenization of the content of the parenthesis
items.append(tokenize(expression[1:end])) items.append(tokenize(expression[1:end]))
#Removes the entire parentesis and content from the expression #Removes the entire parentesis and content from the expression
expression=expression[end+1:].strip() expression=expression[end+1:].strip()
elif expression.startswith(u"σ") or expression.startswith(u"π") or expression.startswith(u"ρ"): #Unary 2 bytes elif expression.startswith((u"σ",u"π",u"ρ")): #Unary 2 bytes
items.append(expression[0:1]) #Adding operator in the top of the list items.append(expression[0:1]) #Adding operator in the top of the list
expression=expression[1:].strip() #Removing operator from the expression expression=expression[1:].strip() #Removing operator from the expression
@ -295,24 +301,19 @@ def tokenize(expression):
items.append(expression[:par].strip()) #Inserting parameter of the operator items.append(expression[:par].strip()) #Inserting parameter of the operator
expression=expression[par:].strip() #Removing parameter from the expression expression=expression[par:].strip() #Removing parameter from the expression
elif expression.startswith("*") or expression.startswith("-"): # Binary 1 byte elif expression.startswith((u"÷",u"",u"",u"*",u"-")):
items.append(expression[0]) items.append(expression[0])
expression=expression[1:].strip() #1 char from the expression expression=expression[1:].strip() #1 char from the expression
state=4 state=4
elif expression.startswith(u"") or expression.startswith(u""): #Binary short 3 bytes
items.append(expression[0:1]) #Adding operator in the top of the list
expression=expression[1:].strip() #Removing operator from the expression
state=4
elif expression.startswith(u"÷"): #Binary short 2 bytes
items.append(expression[0:1]) #Adding operator in the top of the list
expression=expression[1:].strip() #Removing operator from the expression
state=4
elif expression.startswith(u""): #Binary long elif expression.startswith(u""): #Binary long
i=expression.find(u"") i=expression.find(u"")
if i==-1:
raise ParseException(u"Expected ᐊ in %s" % (expression,))
items.append(expression[:i+1]) items.append(expression[:i+1])
expression=expression[i+1:].strip() expression=expression[i+1:].strip()
state=4 state=4
elif re.match(r'[_0-9A-Za-z]',expression[0])==None: #At this point we only have relation names, so we raise errors for anything else
raise ParseException("Unexpected '%c' in '%s'" % (expression[0],expression))
else: #Relation (hopefully) else: #Relation (hopefully)
if state==1: #Previous was a relation, appending to the last token if state==1: #Previous was a relation, appending to the last token
i=items.pop() i=items.pop()
@ -352,7 +353,7 @@ def parse(expr):
You can use parenthesis to change priority: a ᐅᐊ (q d). You can use parenthesis to change priority: a ᐅᐊ (q d).
IMPORTANT: The encoding used by this module is UTF-8 (all strings must be UTF-8) IMPORTANT: all strings must be unicode
EXAMPLES EXAMPLES
σage > 25 and rank == weight(A) σage > 25 and rank == weight(A)
@ -365,10 +366,11 @@ def parse(expr):
return tree(expr).toPython() return tree(expr).toPython()
if __name__=="__main__": if __name__=="__main__":
#while True: while True:
# e=raw_input("Expression: ") e=unicode(raw_input("Expression: "),'utf-8')
# print parse(e) print parse(e)
b=u"σ age>1 and skill=='C' (peopleᐅᐊskills)"
print b[0]
parse(b)
#b=u"σ age>1 and skill=='C' (peopleᐅᐊskills)"
#print b[0]
#parse(b)
pass

View File

@ -85,7 +85,7 @@ if __name__ == "__main__":
pyqt=True pyqt=True
try: try:
import sip import sip #needed on windows
from PyQt4 import QtGui from PyQt4 import QtGui
except: except:
print >> sys.stderr, "PyQt seems to be missing, trying to use Pyside" print >> sys.stderr, "PyQt seems to be missing, trying to use Pyside"

View File

@ -103,8 +103,8 @@ class relForm(QtGui.QMainWindow):
self.selectedRelation=result self.selectedRelation=result
self.showRelation(self.selectedRelation) #Show the result in the table self.showRelation(self.selectedRelation) #Show the result in the table
except Exception, e: except Exception, e:
print e print e.__unicode__()
QtGui.QMessageBox.information(None,QtGui.QApplication.translate("Form", "Error"),"%s\n%s" % (QtGui.QApplication.translate("Form", "Check your query!"),e.__str__()) ) QtGui.QMessageBox.information(None,QtGui.QApplication.translate("Form", "Error"),u"%s\n%s" % (QtGui.QApplication.translate("Form", "Check your query!"),e.__unicode__()) )
return return
#Adds to history #Adds to history

View File

@ -8,6 +8,7 @@ It might be necessary to have the: Microsoft Visual C++ 2008 Redistributable Pac
Create an exe file Create an exe file
- Remove the directory relational_pyside (uses symlinks that won't work on windows)
- Move the file windows/input.py to ../ - Move the file windows/input.py to ../
- Chech that the version number is correct - Chech that the version number is correct
- Execute "python input.py py2exe" - Execute "python input.py py2exe"
@ -23,3 +24,4 @@ A directory named "Output" will be created, which will contain the installer.
Notes: Notes:
- To create the setup, don't move the "dist" directory or its content. - To create the setup, don't move the "dist" directory or its content.
- Do not delete or move the directory windows/font dejavu - Do not delete or move the directory windows/font dejavu
- If the shell is open, it will not work. The windows shell does not support unicode and will generate exceptions when trying to print expressions on it