diff --git a/CHANGELOG b/CHANGELOG index 9fcf323..50bcec5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,6 +7,7 @@ - Can use both PySide and PyQt - Removed buttons for adding and deleting tuples - Can edit relations within the GUI +- Migrating to unicode (instead of strings) 1.0 - Adds history in the GUI diff --git a/relational/optimizations.py b/relational/optimizations.py index 442c15e..b4513ac 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -36,6 +36,21 @@ from tokenize import generate_tokens sel_op=('//=','**=','and','not','in','//','**','<<','>>','==','!=','>=','<=','+=','-=','*=','/=','%=','or','+','-','*','/','&','|','^','~','<','>','%','=','(',')',',','[',']') +PRODUCT=parser.PRODUCT +DIFFERENCE=parser.DIFFERENCE +UNION=parser.UNION +INTERSECTION=parser.INTERSECTION +DIVISION=parser.DIVISION +JOIN=parser.JOIN +JOIN_LEFT=parser.JOIN_LEFT +JOIN_RIGHT=parser.JOIN_RIGHT +JOIN_FULL=parser.JOIN_FULL +PROJECTION=parser.PROJECTION +SELECTION=parser.SELECTION +RENAME=parser.RENAME +ARROW=parser.ARROW + + def replace_node(replace,replacement): '''This function replaces "replace" node with the node "with", the father of the node will now point to the with node''' @@ -86,7 +101,7 @@ def duplicated_select(n): in and ''' changes=0 - if n.name=='σ' and n.child.name=='σ': + if n.name==SELECTION and n.child.name==SELECTION: if n.prop != n.child.prop: #Nested but different, joining them n.prop = n.prop + " and " + n.child.prop @@ -114,30 +129,30 @@ def futile_union_intersection_subtraction(n): changes=0 #Union and intersection of the same thing - if n.name in ('ᑌ','ᑎ') and n.left==n.right: + if n.name in (UNION,INTERSECTION) and n.left==n.right: changes=1 replace_node(n,n.left) #selection and union of the same thing - elif (n.name == 'ᑌ'): - if n.left.name=='σ' and n.left.child==n.right: + elif (n.name == UNION): + if n.left.name==SELECTION and n.left.child==n.right: changes=1 replace_node(n,n.right) - elif n.right.name=='σ' and n.right.child==n.left: + elif n.right.name==SELECTION and n.right.child==n.left: changes=1 replace_node(n,n.left) #selection and intersection of the same thing - elif n.name == 'ᑎ': - if n.left.name=='σ' and n.left.child==n.right: + elif n.name == INTERSECTION: + if n.left.name==SELECTION and n.left.child==n.right: changes=1 replace_node(n,n.left) - elif n.right.name=='σ' and n.right.child==n.left: + elif n.right.name==SELECTION and n.right.child==n.left: changes=1 replace_node(n,n.right) #Subtraction and selection of the same thing - elif (n.name == '-' and (n.right.name=='σ' and n.right.child==n.left)): #Subtraction of two equal things, but one has a selection + elif (n.name == DIFFERENCE and (n.right.name==SELECTION and n.right.child==n.left)): #Subtraction of two equal things, but one has a selection n.name=n.right.name n.kind=n.right.kind n.child=n.right.child @@ -145,10 +160,10 @@ def futile_union_intersection_subtraction(n): n.left=n.right=None #Subtraction of the same thing or with selection on the left child - elif (n.name=='-' and ((n.left==n.right) or (n.left.name=='σ' and n.left.child==n.right)) ):#Empty relation + elif (n.name==DIFFERENCE and ((n.left==n.right) or (n.left.name==SELECTION and n.left.child==n.right)) ):#Empty relation changes=1 n.kind=parser.UNARY - n.name='σ' + n.name=SELECTION n.prop='False' n.child=n.left.get_left_leaf() #n.left=n.right=None @@ -161,8 +176,8 @@ def down_to_unions_subtractions_intersections(n): σ i==2 (c) ᑌ σ i==2(d). ''' changes=0 - _o=('ᑌ','-','ᑎ') - if n.name=='σ' and n.child.name in _o: + _o=(UNION,DIFFERENCE,INTERSECTION) + if n.name==SELECTION and n.child.name in _o: left=parser.node() left.prop=n.prop @@ -190,7 +205,7 @@ def duplicated_projection(n): them with π i (R)''' changes=0 - if n.name=='π' and n.child.name=='π': + if n.name==PROJECTION and n.child.name==PROJECTION: n.child=n.child.child changes+=1 @@ -201,13 +216,13 @@ def selection_inside_projection(n): converts them into π k(σ j (R))''' changes=0 - if n.name=='σ' and n.child.name=='π': + if n.name==SELECTION and n.child.name==PROJECTION: changes=1 temp=n.prop n.prop=n.child.prop n.child.prop=temp - n.name='π' - n.child.name='σ' + n.name=PROJECTION + n.child.name=SELECTION return changes+recoursive_scan(selection_inside_projection,n) @@ -219,15 +234,15 @@ def swap_union_renames(n): Does the same with subtraction and intersection''' changes=0 - if n.name in ('-','ᑌ','ᑎ') and n.left.name==n.right.name and n.left.name=='ρ': + if n.name in (DIFFERENCE,UNION,INTERSECTION) and n.left.name==n.right.name and n.left.name==RENAME: l_vars={} for i in n.left.prop.split(','): - q=i.split('➡') + q=i.split(ARROW) l_vars[q[0].strip()]=q[1].strip() r_vars={} for i in n.right.prop.split(','): - q=i.split('➡') + q=i.split(ARROW) r_vars[q[0].strip()]=q[1].strip() if r_vars==l_vars: @@ -240,7 +255,7 @@ def swap_union_renames(n): q.left=n.left.child q.right=n.right.child - n.name='ρ' + n.name=RENAME n.kind=parser.UNARY n.child=q n.prop=n.left.prop @@ -252,14 +267,14 @@ def futile_renames(n): '''This function purges renames like id->id''' changes=0 - if n.name=='ρ': + if n.name==RENAME: #Located two nested renames. changes=1 #Creating a dictionary with the attributes _vars={} for i in n.prop.split(','): - q=i.split('➡') + q=i.split(ARROW) _vars[q[0].strip()]=q[1].strip() #Scans dictionary to locate things like "a->b,b->c" and replace them with "a->c" for key in list(_vars.keys()): @@ -290,7 +305,7 @@ def subsequent_renames(n): futile_renames(n) changes=0 - if n.name=='ρ' and n.child.name==n.name: + if n.name==RENAME and n.child.name==n.name: #Located two nested renames. changes=1 #Joining the attribute into one @@ -300,7 +315,7 @@ def subsequent_renames(n): #Creating a dictionary with the attributes _vars={} for i in n.prop.split(','): - q=i.split('➡') + q=i.split(ARROW) _vars[q[0].strip()]=q[1].strip() #Scans dictionary to locate things like "a->b,b->c" and replace them with "a->c" for key in list(_vars.keys()): @@ -338,7 +353,7 @@ def tokenize_select(expression): selection. The expression can contain parenthesis. It will use a subclass of str with the attribute level, which will specify the nesting level of the token into parenthesis.''' - g=generate_tokens(StringIO(expression).readline) + g=generate_tokens(StringIO(str(expression)).readline) l=list(token[1] for token in g) l.remove('') @@ -375,13 +390,13 @@ def swap_rename_projection(n): ''' changes=0 - if n.name=='π' and n.child.name=='ρ': + if n.name==PROJECTION and n.child.name==RENAME: changes=1 #π index,name(ρ id➡index(R)) _vars={} for i in n.child.prop.split(','): - q=i.split('➡') + q=i.split(ARROW) _vars[q[1].strip()]=q[0].strip() _pr=n.prop.split(',') @@ -401,7 +416,7 @@ def swap_rename_projection(n): n.prop+='%s➡%s,' % (_vars[i],i) n.prop=n.prop[:-1] - n.child.name='π' + n.child.name=PROJECTION n.child.prop='' for i in _pr: n.child.prop+=i+',' @@ -416,12 +431,12 @@ def swap_rename_select(n): selection, so the operation is still valid.''' changes=0 - if n.name=='σ' and n.child.name=='ρ': + if n.name==SELECTION and n.child.name==RENAME: changes=1 #Dictionary containing attributes of rename _vars={} for i in n.child.prop.split(','): - q=i.split('➡') + q=i.split(ARROW) _vars[q[1].strip()]=q[0].strip() #tokenizes expression in select @@ -437,8 +452,8 @@ def swap_rename_select(n): _tokens[i]=_vars[_tokens[i].split('.')[0]]+'.'+splitted[1] #Swapping operators - n.name='ρ' - n.child.name='σ' + n.name=RENAME + n.child.name=SELECTION n.prop=n.child.prop n.child.prop='' @@ -452,16 +467,16 @@ def select_union_intersect_subtract(n): and replaces them with σ (i OR q) (a) Removing a O(n²) operation like the union''' changes=0 - if n.name in ('ᑌ', 'ᑎ', '-') and n.left.name=='σ' and n.right.name=='σ' and n.left.child==n.right.child: + if n.name in (UNION, INTERSECTION, DIFFERENCE) and n.left.name==SELECTION and n.right.name==SELECTION and n.left.child==n.right.child: cahnges=1 - d={'ᑌ':'or', 'ᑎ':'and', '-':'and not'} + d={UNION:'or', INTERSECTION:'and', DIFFERENCE:'and not'} op=d[n.name] newnode=parser.node() newnode.prop='((%s) %s (%s))' % (n.left.prop,op,n.right.prop) - newnode.name='σ' + newnode.name=SELECTION newnode.child=n.left.child newnode.kind=parser.UNARY replace_node(n,newnode) @@ -474,12 +489,11 @@ def selection_and_product(n,rels): i contains attributes belonging to Q and l contains attributes belonging to both''' changes=0 - if n.name=='σ' and n.child.name in ('*','ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ'): + if n.name==SELECTION and n.child.name in (PRODUCT,JOIN,JOIN_LEFT,JOIN_RIGHT,JOIN_FULL): l_attr=n.child.left.result_format(rels) r_attr=n.child.right.result_format(rels) tokens=tokenize_select(n.prop) - groups=[] temp=[] @@ -521,7 +535,7 @@ def selection_and_product(n,rels): if len(left)>0: changes=1 l_node=parser.node() - l_node.name='σ' + l_node.name=SELECTION l_node.kind=parser.UNARY l_node.child=n.child.left l_node.prop='' @@ -539,7 +553,7 @@ def selection_and_product(n,rels): if len(right)>0: changes=1 r_node=parser.node() - r_node.name='σ' + r_node.name=SELECTION r_node.prop='' r_node.kind=parser.UNARY r_node.child=n.child.right diff --git a/relational/optimizer.py b/relational/optimizer.py index 7d5a7bf..a7dde2d 100644 --- a/relational/optimizer.py +++ b/relational/optimizer.py @@ -33,6 +33,8 @@ import parser RELATION=parser.RELATION UNARY=parser.UNARY BINARY=parser.BINARY + + b_operators=parser.b_operators u_operators=parser.u_operators op_functions=parser.op_functions @@ -53,12 +55,12 @@ def optimize_all(expression,rels,specific=True,general=True,debug=None): steps. Return value: this will return an optimized version of the expression''' - if isinstance(expression,str): + if isinstance(expression,unicode): n=tree(expression) #Gets the tree elif isinstance(expression,node): n=expression else: - raise (TypeError("expression must be a string or a node")) + raise (TypeError("expression must be a unicode string or a node")) if isinstance(debug,list): dbg=True @@ -118,7 +120,7 @@ if __name__=="__main__": rels["D1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/dates.csv") rels["S1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/skillo.csv") print rels''' - n=tree("π indice,qq,name (ρ age➡qq,id➡indice (P1-P2))") + n=tree(u"π indice,qq,name (ρ age➡qq,id➡indice (P1-P2))") #n=tree("σ id==3 and indice==2 and name==5 or name<2(P1 * S1)") print n print n.toPython() diff --git a/relational/parser.py b/relational/parser.py index 854e0ea..ecd5eea 100644 --- a/relational/parser.py +++ b/relational/parser.py @@ -45,10 +45,25 @@ RELATION=0 UNARY=1 BINARY=2 -b_operators=('*','-','ᑌ','ᑎ','÷','ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ') # List of binary operators -u_operators=('π','σ','ρ') # List of unary operators -op_functions={'*':'product','-':'difference','ᑌ':'union','ᑎ':'intersection','÷':'division','ᐅᐊ':'join','ᐅLEFTᐊ':'outer_left','ᐅRIGHTᐊ':'outer_right','ᐅFULLᐊ':'outer','π':'projection','σ':'selection','ρ':'rename'} # Associates operator with python method +PRODUCT=u'*' +DIFFERENCE=u'-' +UNION=u'ᑌ' +INTERSECTION=u'ᑎ' +DIVISION=u'÷' +JOIN=u'ᐅᐊ' +JOIN_LEFT=u'ᐅLEFTᐊ' +JOIN_RIGHT=u'ᐅRIGHTᐊ' +JOIN_FULL=u'ᐅFULLᐊ' +PROJECTION=u'π' +SELECTION=u'σ' +RENAME=u'ρ' +ARROW=u'➡' + +b_operators=(u'*',u'-',u'ᑌ',u'ᑎ',u'÷',u'ᐅᐊ',u'ᐅLEFTᐊ',u'ᐅRIGHTᐊ',u'ᐅFULLᐊ') # List of binary operators +u_operators=(u'π',u'σ',u'ρ') # List of unary operators + +op_functions={u'*':'product',u'-':'difference',u'ᑌ':'union',u'ᑎ':'intersection',u'÷':'division',u'ᐅᐊ':'join',u'ᐅLEFTᐊ':'outer_left',u'ᐅRIGHTᐊ':'outer_right',u'ᐅFULLᐊ':'outer',u'π':'projection',u'σ':'selection',u'ρ':'rename'} # Associates operator with python method class node (object): '''This class is a node of a relational expression. Leaves are relations and internal nodes are operations. @@ -77,11 +92,11 @@ class node (object): expression=expression[0] #The list contains only 1 string. Means it is the name of a relation - if len(expression)==1 and isinstance(expression[0],str): + if len(expression)==1 and isinstance(expression[0],unicode): self.kind=RELATION self.name=expression[0] return - + '''Expression from right to left, searching for binary operators this means that binary operators have lesser priority than unary operators. @@ -93,7 +108,7 @@ class node (object): within sub-lists, they won't be found here, ensuring that they will have highest priority.''' for i in range(len(expression)-1,-1,-1): - if expression[i] in b_operators: #Binary operator + if expression[i] in b_operators: #Binary operator self.kind=BINARY self.name=expression[i] self.left=node(expression[:i]) @@ -118,9 +133,9 @@ class node (object): prop =self.prop #Converting parameters - if self.name=='π':#Projection + if self.name==u'π':#Projection prop='\"%s\"' % prop.replace(' ','').replace(',','\",\"') - elif self.name=="ρ": #Rename + elif self.name==u"ρ": #Rename prop='{\"%s\"}' % prop.replace(',','\",\"').replace('➡','\":\"').replace(' ','') else: #Selection prop='\"%s\"' % prop @@ -234,6 +249,9 @@ def tokenize(expression): '''This function converts an expression into a list where every token of the expression is an item of a list. Expressions into parenthesis will be converted into sublists.''' + if not isinstance(expression,unicode): + raise Exception('expected unicode') + items=[] #List for the tokens '''This is a state machine. Initial status is determined by the starting of the @@ -266,9 +284,9 @@ def tokenize(expression): #Removes the entire parentesis and content from the expression expression=expression[end+1:].strip() - elif expression.startswith("σ") or expression.startswith("π") or expression.startswith("ρ"): #Unary 2 bytes - items.append(expression[0:2]) #Adding operator in the top of the list - expression=expression[2:].strip() #Removing operator from the expression + elif expression.startswith(u"σ") or expression.startswith(u"π") or expression.startswith(u"ρ"): #Unary 2 bytes + items.append(expression[0:1]) #Adding operator in the top of the list + expression=expression[1:].strip() #Removing operator from the expression if expression.startswith('('): #Expression with parenthesis, so adding what's between open and close without tokenization par=expression.find('(',_find_matching_parenthesis(expression)) @@ -281,18 +299,18 @@ def tokenize(expression): items.append(expression[0]) expression=expression[1:].strip() #1 char from the expression state=4 - elif expression.startswith("ᑎ") or expression.startswith("ᑌ"): #Binary short 3 bytes - items.append(expression[0:3]) #Adding operator in the top of the list - expression=expression[3:].strip() #Removing operator from the expression + elif expression.startswith(u"ᑎ") or expression.startswith(u"ᑌ"): #Binary short 3 bytes + items.append(expression[0:1]) #Adding operator in the top of the list + expression=expression[1:].strip() #Removing operator from the expression state=4 - elif expression.startswith("÷"): #Binary short 2 bytes - items.append(expression[0:2]) #Adding operator in the top of the list - expression=expression[2:].strip() #Removing operator from the expression + elif expression.startswith(u"÷"): #Binary short 2 bytes + items.append(expression[0:1]) #Adding operator in the top of the list + expression=expression[1:].strip() #Removing operator from the expression state=4 - elif expression.startswith("ᐅ"): #Binary long - i=expression.find("ᐊ") - items.append(expression[:i+3]) - expression=expression[i+3:].strip() + elif expression.startswith(u"ᐅ"): #Binary long + i=expression.find(u"ᐊ") + items.append(expression[:i+1]) + expression=expression[i+1:].strip() state=4 else: #Relation (hopefully) @@ -347,6 +365,10 @@ def parse(expr): return tree(expr).toPython() if __name__=="__main__": - while True: - e=raw_input("Expression: ") - print parse(e) \ No newline at end of file + #while True: + # e=raw_input("Expression: ") + # print parse(e) + b=u"σ age>1 and skill=='C' (peopleᐅᐊskills)" + print b[0] + parse(b) + \ No newline at end of file diff --git a/relational_gui/about.py b/relational_gui/about.py index 2df14da..53c93b6 100644 --- a/relational_gui/about.py +++ b/relational_gui/about.py @@ -136,10 +136,7 @@ class Ui_Dialog(object): self.label_3.setText(QtGui.QApplication.translate("Dialog", "Version "+version, None, QtGui.QApplication.UnicodeUTF8)) self.label_3.setTextInteractionFlags(QtCore.Qt.LinksAccessibleByMouse|QtCore.Qt.TextSelectableByMouse) self.groupBox_3.setTitle(QtGui.QApplication.translate("Dialog", "Author", None, QtGui.QApplication.UnicodeUTF8)) - if os.name=='nt': #Differentiates acknowledgements depending on the system - self.label_2.setText(QtGui.QApplication.translate("Dialog", "Salvo \"LtWorf\" Tomaselli <tiposchi@tiscali.it>
Emilio Di Prima <emiliodiprima [at] msn [dot] com> (For the windows version)", None, QtGui.QApplication.UnicodeUTF8)) - else: - self.label_2.setText(QtGui.QApplication.translate("Dialog", "Salvo \"LtWorf\" Tomaselli <tiposchi@tiscali.it>", None, QtGui.QApplication.UnicodeUTF8)) + self.label_2.setText(QtGui.QApplication.translate("Dialog", "Salvo \"LtWorf\" Tomaselli <tiposchi@tiscali.it>
Emilio Di Prima <emiliodiprima[at]msn[dot]com> (For the windows version)", None, QtGui.QApplication.UnicodeUTF8)) self.label_2.setOpenExternalLinks (True) self.label_2.setTextInteractionFlags(QtCore.Qt.LinksAccessibleByMouse|QtCore.Qt.TextSelectableByMouse) self.groupBox_2.setTitle(QtGui.QApplication.translate("Dialog", "Links", None, QtGui.QApplication.UnicodeUTF8)) diff --git a/relational_gui/compatibility.py b/relational_gui/compatibility.py index 0886143..7e19695 100644 --- a/relational_gui/compatibility.py +++ b/relational_gui/compatibility.py @@ -31,8 +31,8 @@ except: def get_py_str(a): '''Returns a python string out of a QString''' if pyqt: - return str(a.toUtf8()) - return str(a.encode("utf-8")) #Already a python string in PySide + return unicode(a.toUtf8(),'utf-8') + return unicode(a.encode("utf-8")) #Already a python string in PySide def set_utf8_text(component,text): if not pyqt: diff --git a/relational_gui/guihandler.py b/relational_gui/guihandler.py index bfd3bcb..5150b3b 100644 --- a/relational_gui/guihandler.py +++ b/relational_gui/guihandler.py @@ -81,13 +81,14 @@ class relForm(QtGui.QMainWindow): '''Executes the query''' query=compatibility.get_py_str(self.ui.txtQuery.text()) - + print query.__class__ res_rel=compatibility.get_py_str(self.ui.txtResult.text())#result relation's name if not rtypes.is_valid_relation_name(res_rel): QtGui.QMessageBox.information(self,QtGui.QApplication.translate("Form", "Error"),QtGui.QApplication.translate("Form", "Wrong name for destination relation.")) return + expr=parser.parse(query)#Converting expression to python code try: #Converting string to utf8 and then from qstring to normal string expr=parser.parse(query)#Converting expression to python code @@ -105,7 +106,7 @@ class relForm(QtGui.QMainWindow): #Adds to history item='%s = %s' % (compatibility.get_py_str(self.ui.txtResult.text()),compatibility.get_py_str(self.ui.txtQuery.text())) - item=unicode(item.decode('utf-8')) + #item=item.decode('utf-8')) compatibility.add_list_item(self.ui.lstHistory,item) self.qcounter+=1