diff --git a/CHANGELOG b/CHANGELOG
index 9fcf323..50bcec5 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -7,6 +7,7 @@
- Can use both PySide and PyQt
- Removed buttons for adding and deleting tuples
- Can edit relations within the GUI
+- Migrating to unicode (instead of strings)
1.0
- Adds history in the GUI
diff --git a/relational/optimizations.py b/relational/optimizations.py
index 442c15e..b4513ac 100644
--- a/relational/optimizations.py
+++ b/relational/optimizations.py
@@ -36,6 +36,21 @@ from tokenize import generate_tokens
sel_op=('//=','**=','and','not','in','//','**','<<','>>','==','!=','>=','<=','+=','-=','*=','/=','%=','or','+','-','*','/','&','|','^','~','<','>','%','=','(',')',',','[',']')
+PRODUCT=parser.PRODUCT
+DIFFERENCE=parser.DIFFERENCE
+UNION=parser.UNION
+INTERSECTION=parser.INTERSECTION
+DIVISION=parser.DIVISION
+JOIN=parser.JOIN
+JOIN_LEFT=parser.JOIN_LEFT
+JOIN_RIGHT=parser.JOIN_RIGHT
+JOIN_FULL=parser.JOIN_FULL
+PROJECTION=parser.PROJECTION
+SELECTION=parser.SELECTION
+RENAME=parser.RENAME
+ARROW=parser.ARROW
+
+
def replace_node(replace,replacement):
'''This function replaces "replace" node with the node "with",
the father of the node will now point to the with node'''
@@ -86,7 +101,7 @@ def duplicated_select(n):
in and
'''
changes=0
- if n.name=='σ' and n.child.name=='σ':
+ if n.name==SELECTION and n.child.name==SELECTION:
if n.prop != n.child.prop: #Nested but different, joining them
n.prop = n.prop + " and " + n.child.prop
@@ -114,30 +129,30 @@ def futile_union_intersection_subtraction(n):
changes=0
#Union and intersection of the same thing
- if n.name in ('ᑌ','ᑎ') and n.left==n.right:
+ if n.name in (UNION,INTERSECTION) and n.left==n.right:
changes=1
replace_node(n,n.left)
#selection and union of the same thing
- elif (n.name == 'ᑌ'):
- if n.left.name=='σ' and n.left.child==n.right:
+ elif (n.name == UNION):
+ if n.left.name==SELECTION and n.left.child==n.right:
changes=1
replace_node(n,n.right)
- elif n.right.name=='σ' and n.right.child==n.left:
+ elif n.right.name==SELECTION and n.right.child==n.left:
changes=1
replace_node(n,n.left)
#selection and intersection of the same thing
- elif n.name == 'ᑎ':
- if n.left.name=='σ' and n.left.child==n.right:
+ elif n.name == INTERSECTION:
+ if n.left.name==SELECTION and n.left.child==n.right:
changes=1
replace_node(n,n.left)
- elif n.right.name=='σ' and n.right.child==n.left:
+ elif n.right.name==SELECTION and n.right.child==n.left:
changes=1
replace_node(n,n.right)
#Subtraction and selection of the same thing
- elif (n.name == '-' and (n.right.name=='σ' and n.right.child==n.left)): #Subtraction of two equal things, but one has a selection
+ elif (n.name == DIFFERENCE and (n.right.name==SELECTION and n.right.child==n.left)): #Subtraction of two equal things, but one has a selection
n.name=n.right.name
n.kind=n.right.kind
n.child=n.right.child
@@ -145,10 +160,10 @@ def futile_union_intersection_subtraction(n):
n.left=n.right=None
#Subtraction of the same thing or with selection on the left child
- elif (n.name=='-' and ((n.left==n.right) or (n.left.name=='σ' and n.left.child==n.right)) ):#Empty relation
+ elif (n.name==DIFFERENCE and ((n.left==n.right) or (n.left.name==SELECTION and n.left.child==n.right)) ):#Empty relation
changes=1
n.kind=parser.UNARY
- n.name='σ'
+ n.name=SELECTION
n.prop='False'
n.child=n.left.get_left_leaf()
#n.left=n.right=None
@@ -161,8 +176,8 @@ def down_to_unions_subtractions_intersections(n):
σ i==2 (c) ᑌ σ i==2(d).
'''
changes=0
- _o=('ᑌ','-','ᑎ')
- if n.name=='σ' and n.child.name in _o:
+ _o=(UNION,DIFFERENCE,INTERSECTION)
+ if n.name==SELECTION and n.child.name in _o:
left=parser.node()
left.prop=n.prop
@@ -190,7 +205,7 @@ def duplicated_projection(n):
them with π i (R)'''
changes=0
- if n.name=='π' and n.child.name=='π':
+ if n.name==PROJECTION and n.child.name==PROJECTION:
n.child=n.child.child
changes+=1
@@ -201,13 +216,13 @@ def selection_inside_projection(n):
converts them into π k(σ j (R))'''
changes=0
- if n.name=='σ' and n.child.name=='π':
+ if n.name==SELECTION and n.child.name==PROJECTION:
changes=1
temp=n.prop
n.prop=n.child.prop
n.child.prop=temp
- n.name='π'
- n.child.name='σ'
+ n.name=PROJECTION
+ n.child.name=SELECTION
return changes+recoursive_scan(selection_inside_projection,n)
@@ -219,15 +234,15 @@ def swap_union_renames(n):
Does the same with subtraction and intersection'''
changes=0
- if n.name in ('-','ᑌ','ᑎ') and n.left.name==n.right.name and n.left.name=='ρ':
+ if n.name in (DIFFERENCE,UNION,INTERSECTION) and n.left.name==n.right.name and n.left.name==RENAME:
l_vars={}
for i in n.left.prop.split(','):
- q=i.split('➡')
+ q=i.split(ARROW)
l_vars[q[0].strip()]=q[1].strip()
r_vars={}
for i in n.right.prop.split(','):
- q=i.split('➡')
+ q=i.split(ARROW)
r_vars[q[0].strip()]=q[1].strip()
if r_vars==l_vars:
@@ -240,7 +255,7 @@ def swap_union_renames(n):
q.left=n.left.child
q.right=n.right.child
- n.name='ρ'
+ n.name=RENAME
n.kind=parser.UNARY
n.child=q
n.prop=n.left.prop
@@ -252,14 +267,14 @@ def futile_renames(n):
'''This function purges renames like id->id'''
changes=0
- if n.name=='ρ':
+ if n.name==RENAME:
#Located two nested renames.
changes=1
#Creating a dictionary with the attributes
_vars={}
for i in n.prop.split(','):
- q=i.split('➡')
+ q=i.split(ARROW)
_vars[q[0].strip()]=q[1].strip()
#Scans dictionary to locate things like "a->b,b->c" and replace them with "a->c"
for key in list(_vars.keys()):
@@ -290,7 +305,7 @@ def subsequent_renames(n):
futile_renames(n)
changes=0
- if n.name=='ρ' and n.child.name==n.name:
+ if n.name==RENAME and n.child.name==n.name:
#Located two nested renames.
changes=1
#Joining the attribute into one
@@ -300,7 +315,7 @@ def subsequent_renames(n):
#Creating a dictionary with the attributes
_vars={}
for i in n.prop.split(','):
- q=i.split('➡')
+ q=i.split(ARROW)
_vars[q[0].strip()]=q[1].strip()
#Scans dictionary to locate things like "a->b,b->c" and replace them with "a->c"
for key in list(_vars.keys()):
@@ -338,7 +353,7 @@ def tokenize_select(expression):
selection. The expression can contain parenthesis.
It will use a subclass of str with the attribute level, which
will specify the nesting level of the token into parenthesis.'''
- g=generate_tokens(StringIO(expression).readline)
+ g=generate_tokens(StringIO(str(expression)).readline)
l=list(token[1] for token in g)
l.remove('')
@@ -375,13 +390,13 @@ def swap_rename_projection(n):
'''
changes=0
- if n.name=='π' and n.child.name=='ρ':
+ if n.name==PROJECTION and n.child.name==RENAME:
changes=1
#π index,name(ρ id➡index(R))
_vars={}
for i in n.child.prop.split(','):
- q=i.split('➡')
+ q=i.split(ARROW)
_vars[q[1].strip()]=q[0].strip()
_pr=n.prop.split(',')
@@ -401,7 +416,7 @@ def swap_rename_projection(n):
n.prop+='%s➡%s,' % (_vars[i],i)
n.prop=n.prop[:-1]
- n.child.name='π'
+ n.child.name=PROJECTION
n.child.prop=''
for i in _pr:
n.child.prop+=i+','
@@ -416,12 +431,12 @@ def swap_rename_select(n):
selection, so the operation is still valid.'''
changes=0
- if n.name=='σ' and n.child.name=='ρ':
+ if n.name==SELECTION and n.child.name==RENAME:
changes=1
#Dictionary containing attributes of rename
_vars={}
for i in n.child.prop.split(','):
- q=i.split('➡')
+ q=i.split(ARROW)
_vars[q[1].strip()]=q[0].strip()
#tokenizes expression in select
@@ -437,8 +452,8 @@ def swap_rename_select(n):
_tokens[i]=_vars[_tokens[i].split('.')[0]]+'.'+splitted[1]
#Swapping operators
- n.name='ρ'
- n.child.name='σ'
+ n.name=RENAME
+ n.child.name=SELECTION
n.prop=n.child.prop
n.child.prop=''
@@ -452,16 +467,16 @@ def select_union_intersect_subtract(n):
and replaces them with σ (i OR q) (a)
Removing a O(n²) operation like the union'''
changes=0
- if n.name in ('ᑌ', 'ᑎ', '-') and n.left.name=='σ' and n.right.name=='σ' and n.left.child==n.right.child:
+ if n.name in (UNION, INTERSECTION, DIFFERENCE) and n.left.name==SELECTION and n.right.name==SELECTION and n.left.child==n.right.child:
cahnges=1
- d={'ᑌ':'or', 'ᑎ':'and', '-':'and not'}
+ d={UNION:'or', INTERSECTION:'and', DIFFERENCE:'and not'}
op=d[n.name]
newnode=parser.node()
newnode.prop='((%s) %s (%s))' % (n.left.prop,op,n.right.prop)
- newnode.name='σ'
+ newnode.name=SELECTION
newnode.child=n.left.child
newnode.kind=parser.UNARY
replace_node(n,newnode)
@@ -474,12 +489,11 @@ def selection_and_product(n,rels):
i contains attributes belonging to Q and l contains attributes belonging to both'''
changes=0
- if n.name=='σ' and n.child.name in ('*','ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ'):
+ if n.name==SELECTION and n.child.name in (PRODUCT,JOIN,JOIN_LEFT,JOIN_RIGHT,JOIN_FULL):
l_attr=n.child.left.result_format(rels)
r_attr=n.child.right.result_format(rels)
tokens=tokenize_select(n.prop)
-
groups=[]
temp=[]
@@ -521,7 +535,7 @@ def selection_and_product(n,rels):
if len(left)>0:
changes=1
l_node=parser.node()
- l_node.name='σ'
+ l_node.name=SELECTION
l_node.kind=parser.UNARY
l_node.child=n.child.left
l_node.prop=''
@@ -539,7 +553,7 @@ def selection_and_product(n,rels):
if len(right)>0:
changes=1
r_node=parser.node()
- r_node.name='σ'
+ r_node.name=SELECTION
r_node.prop=''
r_node.kind=parser.UNARY
r_node.child=n.child.right
diff --git a/relational/optimizer.py b/relational/optimizer.py
index 7d5a7bf..a7dde2d 100644
--- a/relational/optimizer.py
+++ b/relational/optimizer.py
@@ -33,6 +33,8 @@ import parser
RELATION=parser.RELATION
UNARY=parser.UNARY
BINARY=parser.BINARY
+
+
b_operators=parser.b_operators
u_operators=parser.u_operators
op_functions=parser.op_functions
@@ -53,12 +55,12 @@ def optimize_all(expression,rels,specific=True,general=True,debug=None):
steps.
Return value: this will return an optimized version of the expression'''
- if isinstance(expression,str):
+ if isinstance(expression,unicode):
n=tree(expression) #Gets the tree
elif isinstance(expression,node):
n=expression
else:
- raise (TypeError("expression must be a string or a node"))
+ raise (TypeError("expression must be a unicode string or a node"))
if isinstance(debug,list):
dbg=True
@@ -118,7 +120,7 @@ if __name__=="__main__":
rels["D1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/dates.csv")
rels["S1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/skillo.csv")
print rels'''
- n=tree("π indice,qq,name (ρ age➡qq,id➡indice (P1-P2))")
+ n=tree(u"π indice,qq,name (ρ age➡qq,id➡indice (P1-P2))")
#n=tree("σ id==3 and indice==2 and name==5 or name<2(P1 * S1)")
print n
print n.toPython()
diff --git a/relational/parser.py b/relational/parser.py
index 854e0ea..ecd5eea 100644
--- a/relational/parser.py
+++ b/relational/parser.py
@@ -45,10 +45,25 @@
RELATION=0
UNARY=1
BINARY=2
-b_operators=('*','-','ᑌ','ᑎ','÷','ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ') # List of binary operators
-u_operators=('π','σ','ρ') # List of unary operators
-op_functions={'*':'product','-':'difference','ᑌ':'union','ᑎ':'intersection','÷':'division','ᐅᐊ':'join','ᐅLEFTᐊ':'outer_left','ᐅRIGHTᐊ':'outer_right','ᐅFULLᐊ':'outer','π':'projection','σ':'selection','ρ':'rename'} # Associates operator with python method
+PRODUCT=u'*'
+DIFFERENCE=u'-'
+UNION=u'ᑌ'
+INTERSECTION=u'ᑎ'
+DIVISION=u'÷'
+JOIN=u'ᐅᐊ'
+JOIN_LEFT=u'ᐅLEFTᐊ'
+JOIN_RIGHT=u'ᐅRIGHTᐊ'
+JOIN_FULL=u'ᐅFULLᐊ'
+PROJECTION=u'π'
+SELECTION=u'σ'
+RENAME=u'ρ'
+ARROW=u'➡'
+
+b_operators=(u'*',u'-',u'ᑌ',u'ᑎ',u'÷',u'ᐅᐊ',u'ᐅLEFTᐊ',u'ᐅRIGHTᐊ',u'ᐅFULLᐊ') # List of binary operators
+u_operators=(u'π',u'σ',u'ρ') # List of unary operators
+
+op_functions={u'*':'product',u'-':'difference',u'ᑌ':'union',u'ᑎ':'intersection',u'÷':'division',u'ᐅᐊ':'join',u'ᐅLEFTᐊ':'outer_left',u'ᐅRIGHTᐊ':'outer_right',u'ᐅFULLᐊ':'outer',u'π':'projection',u'σ':'selection',u'ρ':'rename'} # Associates operator with python method
class node (object):
'''This class is a node of a relational expression. Leaves are relations and internal nodes are operations.
@@ -77,11 +92,11 @@ class node (object):
expression=expression[0]
#The list contains only 1 string. Means it is the name of a relation
- if len(expression)==1 and isinstance(expression[0],str):
+ if len(expression)==1 and isinstance(expression[0],unicode):
self.kind=RELATION
self.name=expression[0]
return
-
+
'''Expression from right to left, searching for binary operators
this means that binary operators have lesser priority than
unary operators.
@@ -93,7 +108,7 @@ class node (object):
within sub-lists, they won't be found here, ensuring that they will
have highest priority.'''
for i in range(len(expression)-1,-1,-1):
- if expression[i] in b_operators: #Binary operator
+ if expression[i] in b_operators: #Binary operator
self.kind=BINARY
self.name=expression[i]
self.left=node(expression[:i])
@@ -118,9 +133,9 @@ class node (object):
prop =self.prop
#Converting parameters
- if self.name=='π':#Projection
+ if self.name==u'π':#Projection
prop='\"%s\"' % prop.replace(' ','').replace(',','\",\"')
- elif self.name=="ρ": #Rename
+ elif self.name==u"ρ": #Rename
prop='{\"%s\"}' % prop.replace(',','\",\"').replace('➡','\":\"').replace(' ','')
else: #Selection
prop='\"%s\"' % prop
@@ -234,6 +249,9 @@ def tokenize(expression):
'''This function converts an expression into a list where
every token of the expression is an item of a list. Expressions into
parenthesis will be converted into sublists.'''
+ if not isinstance(expression,unicode):
+ raise Exception('expected unicode')
+
items=[] #List for the tokens
'''This is a state machine. Initial status is determined by the starting of the
@@ -266,9 +284,9 @@ def tokenize(expression):
#Removes the entire parentesis and content from the expression
expression=expression[end+1:].strip()
- elif expression.startswith("σ") or expression.startswith("π") or expression.startswith("ρ"): #Unary 2 bytes
- items.append(expression[0:2]) #Adding operator in the top of the list
- expression=expression[2:].strip() #Removing operator from the expression
+ elif expression.startswith(u"σ") or expression.startswith(u"π") or expression.startswith(u"ρ"): #Unary 2 bytes
+ items.append(expression[0:1]) #Adding operator in the top of the list
+ expression=expression[1:].strip() #Removing operator from the expression
if expression.startswith('('): #Expression with parenthesis, so adding what's between open and close without tokenization
par=expression.find('(',_find_matching_parenthesis(expression))
@@ -281,18 +299,18 @@ def tokenize(expression):
items.append(expression[0])
expression=expression[1:].strip() #1 char from the expression
state=4
- elif expression.startswith("ᑎ") or expression.startswith("ᑌ"): #Binary short 3 bytes
- items.append(expression[0:3]) #Adding operator in the top of the list
- expression=expression[3:].strip() #Removing operator from the expression
+ elif expression.startswith(u"ᑎ") or expression.startswith(u"ᑌ"): #Binary short 3 bytes
+ items.append(expression[0:1]) #Adding operator in the top of the list
+ expression=expression[1:].strip() #Removing operator from the expression
state=4
- elif expression.startswith("÷"): #Binary short 2 bytes
- items.append(expression[0:2]) #Adding operator in the top of the list
- expression=expression[2:].strip() #Removing operator from the expression
+ elif expression.startswith(u"÷"): #Binary short 2 bytes
+ items.append(expression[0:1]) #Adding operator in the top of the list
+ expression=expression[1:].strip() #Removing operator from the expression
state=4
- elif expression.startswith("ᐅ"): #Binary long
- i=expression.find("ᐊ")
- items.append(expression[:i+3])
- expression=expression[i+3:].strip()
+ elif expression.startswith(u"ᐅ"): #Binary long
+ i=expression.find(u"ᐊ")
+ items.append(expression[:i+1])
+ expression=expression[i+1:].strip()
state=4
else: #Relation (hopefully)
@@ -347,6 +365,10 @@ def parse(expr):
return tree(expr).toPython()
if __name__=="__main__":
- while True:
- e=raw_input("Expression: ")
- print parse(e)
\ No newline at end of file
+ #while True:
+ # e=raw_input("Expression: ")
+ # print parse(e)
+ b=u"σ age>1 and skill=='C' (peopleᐅᐊskills)"
+ print b[0]
+ parse(b)
+
\ No newline at end of file
diff --git a/relational_gui/about.py b/relational_gui/about.py
index 2df14da..53c93b6 100644
--- a/relational_gui/about.py
+++ b/relational_gui/about.py
@@ -136,10 +136,7 @@ class Ui_Dialog(object):
self.label_3.setText(QtGui.QApplication.translate("Dialog", "Version "+version, None, QtGui.QApplication.UnicodeUTF8))
self.label_3.setTextInteractionFlags(QtCore.Qt.LinksAccessibleByMouse|QtCore.Qt.TextSelectableByMouse)
self.groupBox_3.setTitle(QtGui.QApplication.translate("Dialog", "Author", None, QtGui.QApplication.UnicodeUTF8))
- if os.name=='nt': #Differentiates acknowledgements depending on the system
- self.label_2.setText(QtGui.QApplication.translate("Dialog", "Salvo \"LtWorf\" Tomaselli <tiposchi@tiscali.it>
Emilio Di Prima <emiliodiprima [at] msn [dot] com> (For the windows version)", None, QtGui.QApplication.UnicodeUTF8))
- else:
- self.label_2.setText(QtGui.QApplication.translate("Dialog", "Salvo \"LtWorf\" Tomaselli <tiposchi@tiscali.it>", None, QtGui.QApplication.UnicodeUTF8))
+ self.label_2.setText(QtGui.QApplication.translate("Dialog", "Salvo \"LtWorf\" Tomaselli <tiposchi@tiscali.it>
Emilio Di Prima <emiliodiprima[at]msn[dot]com> (For the windows version)", None, QtGui.QApplication.UnicodeUTF8))
self.label_2.setOpenExternalLinks (True)
self.label_2.setTextInteractionFlags(QtCore.Qt.LinksAccessibleByMouse|QtCore.Qt.TextSelectableByMouse)
self.groupBox_2.setTitle(QtGui.QApplication.translate("Dialog", "Links", None, QtGui.QApplication.UnicodeUTF8))
diff --git a/relational_gui/compatibility.py b/relational_gui/compatibility.py
index 0886143..7e19695 100644
--- a/relational_gui/compatibility.py
+++ b/relational_gui/compatibility.py
@@ -31,8 +31,8 @@ except:
def get_py_str(a):
'''Returns a python string out of a QString'''
if pyqt:
- return str(a.toUtf8())
- return str(a.encode("utf-8")) #Already a python string in PySide
+ return unicode(a.toUtf8(),'utf-8')
+ return unicode(a.encode("utf-8")) #Already a python string in PySide
def set_utf8_text(component,text):
if not pyqt:
diff --git a/relational_gui/guihandler.py b/relational_gui/guihandler.py
index bfd3bcb..5150b3b 100644
--- a/relational_gui/guihandler.py
+++ b/relational_gui/guihandler.py
@@ -81,13 +81,14 @@ class relForm(QtGui.QMainWindow):
'''Executes the query'''
query=compatibility.get_py_str(self.ui.txtQuery.text())
-
+ print query.__class__
res_rel=compatibility.get_py_str(self.ui.txtResult.text())#result relation's name
if not rtypes.is_valid_relation_name(res_rel):
QtGui.QMessageBox.information(self,QtGui.QApplication.translate("Form", "Error"),QtGui.QApplication.translate("Form", "Wrong name for destination relation."))
return
+ expr=parser.parse(query)#Converting expression to python code
try:
#Converting string to utf8 and then from qstring to normal string
expr=parser.parse(query)#Converting expression to python code
@@ -105,7 +106,7 @@ class relForm(QtGui.QMainWindow):
#Adds to history
item='%s = %s' % (compatibility.get_py_str(self.ui.txtResult.text()),compatibility.get_py_str(self.ui.txtQuery.text()))
- item=unicode(item.decode('utf-8'))
+ #item=item.decode('utf-8'))
compatibility.add_list_item(self.ui.lstHistory,item)
self.qcounter+=1