diff --git a/relational/optimizations.py b/relational/optimizations.py index a78bcdc..94697d5 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -213,6 +213,7 @@ def swap_rename_select(n): them with ρ j(σ k(R)). Renaming the attributes used in the selection, so the operation is still valid.''' #TODO document into the wiki + #FIXME selection of date.day won't work. changes=0 if n.name=='σ' and n.child.name=='ρ': @@ -248,4 +249,117 @@ def swap_rename_select(n): changes+=swap_rename_select(n.left) return changes +def selection_and_product(n,rels): + '''This function locates things like σ k (R*Q) and converts them into + σ l (σ j (R) * σ i (Q)). Where j contains only attributes belonging to R, + i contains attributes belonging to Q and l contains attributes belonging to both''' + + #TODO document in the wiki + changes=0 + + if n.name=='σ' and n.child.name=='*': + changes=1 + + l_attr=n.child.left.result_format(rels) + r_attr=n.child.right.result_format(rels) + + tokens=tokenize_select(n.prop) + + groups=[] + temp=[] + + for i in tokens: + if i=='and': + groups.append(temp) + temp=[] + else: + temp.append(i) + if len(temp)!=0: + groups.append(temp) + temp=[] + + left=[] + right=[] + both=[] + + print "Attributi R",r_attr, "Attributi L",l_attr + print "Gruppi",groups + + for i in groups: + l_fields=False #has fields in left? + r_fields=False #has fields in left? + + for j in i: + if j in l_attr:#Field in left + l_fields=True + if j in r_attr:#Field in right + r_fields=True + + if l_fields and r_fields:#Fields in both + both.append(i) + elif l_fields: + left.append(i) + elif r_fields: + right.append(i) + else:#Unknown.. adding in both + both.append(i) + + print "left", left, "right",right,"both",both + + #Preparing left selection + if len(left)>0: + l_node=optimizer.node() + l_node.name='σ' + l_node.kind=optimizer.UNARY + l_node.child=n.child.left + l_node.prop='' + n.child.left=l_node + while len(left)>0: + c=left.pop(0) + for i in c: + l_node.prop+=i+ ' ' + if len(left)>0: + l_node.prop+=' and ' + + #Preparing right selection + if len(right)>0: + r_node=optimizer.node() + r_node.name='σ' + r_node.prop='' + r_node.kind=optimizer.UNARY + r_node.child=n.child.right + n.child.right=r_node + while len(right)>0: + c=right.pop(0) + for i in c: + r_node.prop+=i+ ' ' + if len(right)>0: + r_node.prop+=' and ' + + #Changing main selection + n.prop='' + if len(both)!=0: + while len(both)>0: + c=both.pop(0) + for i in c: + n.prop+=i+ ' ' + if len(both)>1: + n.prop+=' and ' + else:#No need for general select + n.name=n.child.name + n.kind=n.child.kind + n.left=n.child.left + n.right=n.child.right + + #recoursive scan + if n.kind==optimizer.UNARY: + changes+=selection_and_product(n.child,rels) + elif n.kind==optimizer.BINARY: + changes+=selection_and_product(n.right,rels) + changes+=selection_and_product(n.left,rels) + return changes + + + general_optimizations=[duplicated_select,down_to_unions_subtractions_intersections,duplicated_projection,selection_inside_projection,subsequent_renames,swap_rename_select] +specific_optimizations=[selection_and_product] \ No newline at end of file diff --git a/relational/optimizer.py b/relational/optimizer.py index 5b4974e..26b2a3e 100644 --- a/relational/optimizer.py +++ b/relational/optimizer.py @@ -39,11 +39,7 @@ class node (object): If the node is a binary operator, it will have left and right properties. If the node is a unary operator, it will have a child, pointing to the child node and a prop containing - the string with the props of the operation. - - It can be helpful to know the fields returned by an operation. Providing a dictionary with names and instances - of relations in the constructor, the node is able to return the list of fields that the result will have. - ''' + the string with the props of the operation.''' kind=None def __init__(self,expression=None): @@ -80,14 +76,12 @@ class node (object): '''This function returns a list containing the fields that the resulting relation will have. Since it needs to know real instances of relations, it requires a dictionary where keys are the names of the relations and the values are the relation objects.''' - print "Rels========",rels if rels==None: return if self.kind==RELATION: return rels[self.name].header.attributes elif self.kind==BINARY and self.name in ('-','ᑌ','ᑎ'): - print "OK" return self.left.result_format(rels) elif self.name=='π': l=[] @@ -246,19 +240,23 @@ if __name__=="__main__": #a= tokenize(u"π a,b (a*b)") #a=tokenize("(a-b*c)*(b-c)") - import relation + import relation,optimizations rels={} rels["P1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/people.csv") rels["P2"]= relation.relation("/home/salvo/dev/relational/trunk/samples/people.csv") rels["R1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/person_room.csv") rels["R2"]= relation.relation("/home/salvo/dev/relational/trunk/samples/person_room.csv") + rels["D1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/dates.csv") + rels["S1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/skillo.csv") print rels #n=tree("π indice,qq,name (ρ age➡qq,id➡indice (P1-P2))") - n=tree("P1 ᐅᐊ R2") + n=tree("σ id==3 and id==indice and indice==2 and name==5(P1 * S1)") + print optimizations.selection_and_product(n,rels) + print n print n.result_format(rels) - + #a=general_optimize("σ age==3 and qq<=2 or nome!='ciccio d\\'urso'(ρ ciccio➡age,nome➡nom(R-Q))") #a=general_optimize("σ i==2 (σ b>5 (d))") #print a