Code refactory to move the new parser into parser.py out of optimizer.py, that will still be compatible
git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@190 014f5005-505e-4b48-8d0a-63407b615a7c
This commit is contained in:
parent
6524ea2d9f
commit
3c4b91272b
@ -86,3 +86,4 @@
|
||||
- Implemented futile_union_intersection_subtraction general optimization
|
||||
- Implemented swap_rename_projection general optimization
|
||||
- Replaced old relational algebra to python compiler with new one based on the new tokenizer/parser (Rev 188)
|
||||
- Code refactory to move the new parser into parser.py out of optimizer.py, that will still be compatible (Rev 190)
|
@ -22,240 +22,20 @@
|
||||
For now it is highly experimental, and it shouldn't be used in 3rd party applications.'''
|
||||
|
||||
import optimizations
|
||||
|
||||
RELATION=0
|
||||
UNARY=1
|
||||
BINARY=2
|
||||
b_operators=('*','-','ᑌ','ᑎ','ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ')
|
||||
u_operators=('π','σ','ρ')
|
||||
|
||||
op_functions={'*':'product','-':'difference','ᑌ':'union','ᑎ':'intersection','ᐅᐊ':'join','ᐅLEFTᐊ':'outer_left','ᐅRIGHTᐊ':'outer_right','ᐅFULLᐊ':'outer','π':'projection','σ':'selection','ρ':'rename'}
|
||||
|
||||
class node (object):
|
||||
'''This class is a node of a relational expression. Leaves are relations and internal nodes are operations.
|
||||
|
||||
The kind property says if the node is a binary operator, unary operator or relation.
|
||||
Since relations are leaves, a relation node will have no attribute for children.
|
||||
|
||||
If the node is a binary operator, it will have left and right properties.
|
||||
|
||||
If the node is a unary operator, it will have a child, pointing to the child node and a prop containing
|
||||
the string with the props of the operation.'''
|
||||
kind=None
|
||||
|
||||
def __init__(self,expression=None):
|
||||
|
||||
if expression==None or len(expression)==0:
|
||||
return
|
||||
|
||||
'''Generates the tree from the tokenized expression'''
|
||||
|
||||
#If the list contains only a list, it will consider the lower level list.
|
||||
#This will allow things like ((((((a))))) to work
|
||||
while len(expression)==1 and isinstance(expression[0],list):
|
||||
expression=expression[0]
|
||||
|
||||
#The list contains only 1 string. Means it is the name of a relation
|
||||
if len(expression)==1 and isinstance(expression[0],str):
|
||||
self.kind=RELATION
|
||||
self.name=expression[0]
|
||||
return
|
||||
|
||||
'''Expression from right to left, searching for binary operators
|
||||
this means that binary operators have lesser priority than
|
||||
unary operators.
|
||||
It find the operator with lesser priority, uses it as root of this
|
||||
(sub)tree using everything on its left as left parameter (so building
|
||||
a left subtree with the part of the list located on left) and doing
|
||||
the same on right.
|
||||
Since it searches for strings, and expressions into parenthesis are
|
||||
within sub-lists, they won't be found here, ensuring that they will
|
||||
have highest priority.'''
|
||||
for i in range(len(expression)-1,-1,-1):
|
||||
if expression[i] in b_operators: #Binary operator
|
||||
self.kind=BINARY
|
||||
self.name=expression[i]
|
||||
self.left=node(expression[:i])
|
||||
self.right=node(expression[i+1:])
|
||||
return
|
||||
'''Searches for unary operators, parsing from right to left'''
|
||||
for i in range(len(expression)-1,-1,-1):
|
||||
if expression[i] in u_operators: #Unary operator
|
||||
self.kind=UNARY
|
||||
self.name=expression[i]
|
||||
self.prop=expression[1+i].strip()
|
||||
self.child=node(expression[2+i])
|
||||
|
||||
return
|
||||
pass
|
||||
def toPython(self):
|
||||
'''This method converts the expression into python code'''
|
||||
if self.name in b_operators:
|
||||
return '%s.%s(%s)' % (self.left.toPython(),op_functions[self.name],self.right.toPython())
|
||||
elif self.name in u_operators:
|
||||
prop =self.prop
|
||||
|
||||
#Converting parameters
|
||||
if self.name=='π':#Projection
|
||||
prop='\"%s\"' % prop.replace(' ','').replace(',','\",\"')
|
||||
elif self.name=="ρ": #Rename
|
||||
prop='{\"%s\"}' % prop.replace(',','\",\"').replace('➡','\":\"').replace(' ','')
|
||||
else: #Selection
|
||||
prop='\"%s\"' % prop
|
||||
|
||||
return '%s.%s(%s)' % (self.child.toPython(),op_functions[self.name],prop)
|
||||
else:
|
||||
return self.name
|
||||
pass
|
||||
def result_format(self,rels):
|
||||
'''This function returns a list containing the fields that the resulting relation will have.
|
||||
Since it needs to know real instances of relations, it requires a dictionary where keys are
|
||||
the names of the relations and the values are the relation objects.'''
|
||||
if rels==None:
|
||||
return
|
||||
|
||||
if self.kind==RELATION:
|
||||
return list(rels[self.name].header.attributes)
|
||||
elif self.kind==BINARY and self.name in ('-','ᑌ','ᑎ'):
|
||||
return self.left.result_format(rels)
|
||||
elif self.name=='π':
|
||||
l=[]
|
||||
for i in self.prop.split(','):
|
||||
l.append(i.strip())
|
||||
return l
|
||||
elif self.name=='*':
|
||||
return self.left.result_format(rels)+self.right.result_format(rels)
|
||||
elif self.name=='σ' :
|
||||
return self.child.result_format(rels)
|
||||
elif self.name=='ρ':
|
||||
_vars={}
|
||||
for i in self.prop.split(','):
|
||||
q=i.split('➡')
|
||||
_vars[q[0].strip()]=q[1].strip()
|
||||
|
||||
_fields=self.child.result_format(rels)
|
||||
for i in range(len(_fields)):
|
||||
if _fields[i] in _vars:
|
||||
_fields[i]=_vars[_fields[i]]
|
||||
return _fields
|
||||
elif self.name in ('ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ'):
|
||||
return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))
|
||||
import parser
|
||||
|
||||
|
||||
pass
|
||||
|
||||
def __eq__(self,other):
|
||||
if not (isinstance(other,node) and self.name==other.name and self.kind==other.kind):
|
||||
return False
|
||||
|
||||
if self.kind==UNARY:
|
||||
if other.prop!=self.prop:
|
||||
return False
|
||||
return self.child==other.child
|
||||
if self.kind==BINARY:
|
||||
return self.left==other.left and self.right==other.right
|
||||
return True
|
||||
def __str__(self):
|
||||
if (self.kind==RELATION):
|
||||
return self.name
|
||||
elif (self.kind==UNARY):
|
||||
return self.name + " "+ self.prop+ " (" + self.child.__str__() +")"
|
||||
elif (self.kind==BINARY):
|
||||
if self.left.kind==RELATION:
|
||||
le=self.left.__str__()
|
||||
else:
|
||||
le="("+self.left.__str__()+")"
|
||||
if self.right.kind==RELATION:
|
||||
re=self.right.__str__()
|
||||
else:
|
||||
re="("+self.right.__str__()+")"
|
||||
|
||||
return (le+ self.name +re)
|
||||
|
||||
def tokenize(expression):
|
||||
'''This function converts an expression into a list where
|
||||
every token of the expression is an item of a list. Expressions into
|
||||
parenthesis will be converted into sublists.'''
|
||||
items=[] #List for the tokens
|
||||
|
||||
'''This is a state machine. Initial status is determined by the starting of the
|
||||
expression. There are the following statuses:
|
||||
|
||||
relation: this is the status if the expressions begins with something else than an
|
||||
operator or a parenthesis.
|
||||
binary operator: this is the status when parsing a binary operator, nothing much to say
|
||||
unary operator: this status is more complex, since it will be followed by a parameter AND a
|
||||
sub-expression.
|
||||
sub-expression: this status is entered when finding a '(' and will be exited when finding a ')'.
|
||||
means that the others open must be counted to determine which close is the right one.'''
|
||||
|
||||
expression=expression.strip() #Removes initial and endind spaces
|
||||
state=0
|
||||
'''
|
||||
0 initial and useless
|
||||
1 previous stuff was a relation
|
||||
2 previous stuff was a sub-expression
|
||||
3 previous stuff was a unary operator
|
||||
4 previous stuff was a binary operator
|
||||
'''
|
||||
|
||||
while len(expression)>0:
|
||||
if expression.startswith('('): #Parenthesis state
|
||||
state=2
|
||||
par_count=0 #Count of parenthesis
|
||||
end=0
|
||||
|
||||
for i in range(len(expression)):
|
||||
if expression[i]=='(':
|
||||
par_count+=1
|
||||
elif expression[i]==')':
|
||||
par_count-=1
|
||||
if par_count==0:
|
||||
end=i
|
||||
break
|
||||
#Appends the tokenization of the content of the parenthesis
|
||||
items.append(tokenize(expression[1:end]))
|
||||
#Removes the entire parentesis and content from the expression
|
||||
expression=expression[end+1:].strip()
|
||||
|
||||
elif expression.startswith("σ") or expression.startswith("π") or expression.startswith("ρ"): #Unary 2 bytes
|
||||
items.append(expression[0:2]) #Adding operator in the top of the list
|
||||
expression=expression[2:].strip() #Removing operator from the expression
|
||||
par=expression.find('(')
|
||||
|
||||
items.append(expression[:par]) #Inserting parameter of the operator
|
||||
expression=expression[par:].strip() #Removing parameter from the expression
|
||||
elif expression.startswith("*") or expression.startswith("-"): # Binary 1 byte
|
||||
items.append(expression[0])
|
||||
expression=expression[1:].strip() #1 char from the expression
|
||||
state=4
|
||||
elif expression.startswith("ᑎ") or expression.startswith("ᑌ"): #Binary short 3 bytes
|
||||
items.append(expression[0:3]) #Adding operator in the top of the list
|
||||
expression=expression[3:].strip() #Removing operator from the expression
|
||||
|
||||
state=4
|
||||
elif expression.startswith("ᐅ"): #Binary long
|
||||
i=expression.find("ᐊ")
|
||||
items.append(expression[:i+3])
|
||||
expression=expression[i+3:].strip()
|
||||
|
||||
state=4
|
||||
else: #Relation (hopefully)
|
||||
if state==1: #Previous was a relation, appending to the last token
|
||||
i=items.pop()
|
||||
items.append(i+expression[0])
|
||||
expression=expression[1:].strip() #1 char from the expression
|
||||
else:
|
||||
state=1
|
||||
items.append(expression[0])
|
||||
expression=expression[1:].strip() #1 char from the expression
|
||||
|
||||
return items
|
||||
|
||||
def tree(expression):
|
||||
'''This function parses a relational algebra expression into a tree and returns
|
||||
the root node using the Node class defined in this module.'''
|
||||
return node(tokenize(expression))
|
||||
#Stuff that was here before, keeping it for compatibility
|
||||
RELATION=parser.RELATION
|
||||
UNARY=parser.UNARY
|
||||
BINARY=parser.BINARY
|
||||
b_operators=parser.b_operators
|
||||
u_operators=parser.u_operators
|
||||
op_functions=parser.op_functions
|
||||
node=parser.node
|
||||
tokenize=parser.tokenize
|
||||
tree=parser.tree
|
||||
#End of the stuff
|
||||
|
||||
def optimize_all(expression,rels):
|
||||
'''This function performs all the available optimizations'''
|
||||
@ -281,7 +61,6 @@ def specific_optimize(expression,rels):
|
||||
total+=i(n,rels) #Performs the optimization
|
||||
return n.__str__()
|
||||
|
||||
|
||||
def general_optimize(expression):
|
||||
'''This function performs general optimizations. Means that it will not need to
|
||||
know the fields used by the relations'''
|
||||
@ -322,8 +101,6 @@ if __name__=="__main__":
|
||||
'''
|
||||
σ skill=='C' (π id,name,chief,age (σ chief==i and age>a (ρ id➡i,age➡a(π id,age(people))*people)) ᐅᐊ skills)
|
||||
(π id,name,chief,age (σ chief == i and age > a ((ρ age➡a,id➡i (π id,age (people)))*people)))ᐅᐊ(σ skill == 'C' (skills))
|
||||
|
||||
|
||||
'''
|
||||
|
||||
#print specific_optimize("σ name==skill and age>21 and id==indice and skill=='C'(P1ᐅᐊS1)",rels)
|
||||
|
@ -18,7 +18,242 @@
|
||||
#
|
||||
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
|
||||
|
||||
import optimizer
|
||||
|
||||
RELATION=0
|
||||
UNARY=1
|
||||
BINARY=2
|
||||
b_operators=('*','-','ᑌ','ᑎ','ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ')
|
||||
u_operators=('π','σ','ρ')
|
||||
|
||||
op_functions={'*':'product','-':'difference','ᑌ':'union','ᑎ':'intersection','ᐅᐊ':'join','ᐅLEFTᐊ':'outer_left','ᐅRIGHTᐊ':'outer_right','ᐅFULLᐊ':'outer','π':'projection','σ':'selection','ρ':'rename'}
|
||||
|
||||
class node (object):
|
||||
'''This class is a node of a relational expression. Leaves are relations and internal nodes are operations.
|
||||
|
||||
The kind property says if the node is a binary operator, unary operator or relation.
|
||||
Since relations are leaves, a relation node will have no attribute for children.
|
||||
|
||||
If the node is a binary operator, it will have left and right properties.
|
||||
|
||||
If the node is a unary operator, it will have a child, pointing to the child node and a prop containing
|
||||
the string with the props of the operation.'''
|
||||
kind=None
|
||||
|
||||
def __init__(self,expression=None):
|
||||
|
||||
if expression==None or len(expression)==0:
|
||||
return
|
||||
|
||||
'''Generates the tree from the tokenized expression'''
|
||||
|
||||
#If the list contains only a list, it will consider the lower level list.
|
||||
#This will allow things like ((((((a))))) to work
|
||||
while len(expression)==1 and isinstance(expression[0],list):
|
||||
expression=expression[0]
|
||||
|
||||
#The list contains only 1 string. Means it is the name of a relation
|
||||
if len(expression)==1 and isinstance(expression[0],str):
|
||||
self.kind=RELATION
|
||||
self.name=expression[0]
|
||||
return
|
||||
|
||||
'''Expression from right to left, searching for binary operators
|
||||
this means that binary operators have lesser priority than
|
||||
unary operators.
|
||||
It find the operator with lesser priority, uses it as root of this
|
||||
(sub)tree using everything on its left as left parameter (so building
|
||||
a left subtree with the part of the list located on left) and doing
|
||||
the same on right.
|
||||
Since it searches for strings, and expressions into parenthesis are
|
||||
within sub-lists, they won't be found here, ensuring that they will
|
||||
have highest priority.'''
|
||||
for i in range(len(expression)-1,-1,-1):
|
||||
if expression[i] in b_operators: #Binary operator
|
||||
self.kind=BINARY
|
||||
self.name=expression[i]
|
||||
self.left=node(expression[:i])
|
||||
self.right=node(expression[i+1:])
|
||||
return
|
||||
'''Searches for unary operators, parsing from right to left'''
|
||||
for i in range(len(expression)-1,-1,-1):
|
||||
if expression[i] in u_operators: #Unary operator
|
||||
self.kind=UNARY
|
||||
self.name=expression[i]
|
||||
self.prop=expression[1+i].strip()
|
||||
self.child=node(expression[2+i])
|
||||
|
||||
return
|
||||
pass
|
||||
def toPython(self):
|
||||
'''This method converts the expression into python code'''
|
||||
if self.name in b_operators:
|
||||
return '%s.%s(%s)' % (self.left.toPython(),op_functions[self.name],self.right.toPython())
|
||||
elif self.name in u_operators:
|
||||
prop =self.prop
|
||||
|
||||
#Converting parameters
|
||||
if self.name=='π':#Projection
|
||||
prop='\"%s\"' % prop.replace(' ','').replace(',','\",\"')
|
||||
elif self.name=="ρ": #Rename
|
||||
prop='{\"%s\"}' % prop.replace(',','\",\"').replace('➡','\":\"').replace(' ','')
|
||||
else: #Selection
|
||||
prop='\"%s\"' % prop
|
||||
|
||||
return '%s.%s(%s)' % (self.child.toPython(),op_functions[self.name],prop)
|
||||
else:
|
||||
return self.name
|
||||
pass
|
||||
def result_format(self,rels):
|
||||
'''This function returns a list containing the fields that the resulting relation will have.
|
||||
Since it needs to know real instances of relations, it requires a dictionary where keys are
|
||||
the names of the relations and the values are the relation objects.'''
|
||||
if rels==None:
|
||||
return
|
||||
|
||||
if self.kind==RELATION:
|
||||
return list(rels[self.name].header.attributes)
|
||||
elif self.kind==BINARY and self.name in ('-','ᑌ','ᑎ'):
|
||||
return self.left.result_format(rels)
|
||||
elif self.name=='π':
|
||||
l=[]
|
||||
for i in self.prop.split(','):
|
||||
l.append(i.strip())
|
||||
return l
|
||||
elif self.name=='*':
|
||||
return self.left.result_format(rels)+self.right.result_format(rels)
|
||||
elif self.name=='σ' :
|
||||
return self.child.result_format(rels)
|
||||
elif self.name=='ρ':
|
||||
_vars={}
|
||||
for i in self.prop.split(','):
|
||||
q=i.split('➡')
|
||||
_vars[q[0].strip()]=q[1].strip()
|
||||
|
||||
_fields=self.child.result_format(rels)
|
||||
for i in range(len(_fields)):
|
||||
if _fields[i] in _vars:
|
||||
_fields[i]=_vars[_fields[i]]
|
||||
return _fields
|
||||
elif self.name in ('ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ'):
|
||||
return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))
|
||||
|
||||
|
||||
pass
|
||||
|
||||
def __eq__(self,other):
|
||||
if not (isinstance(other,node) and self.name==other.name and self.kind==other.kind):
|
||||
return False
|
||||
|
||||
if self.kind==UNARY:
|
||||
if other.prop!=self.prop:
|
||||
return False
|
||||
return self.child==other.child
|
||||
if self.kind==BINARY:
|
||||
return self.left==other.left and self.right==other.right
|
||||
return True
|
||||
def __str__(self):
|
||||
if (self.kind==RELATION):
|
||||
return self.name
|
||||
elif (self.kind==UNARY):
|
||||
return self.name + " "+ self.prop+ " (" + self.child.__str__() +")"
|
||||
elif (self.kind==BINARY):
|
||||
if self.left.kind==RELATION:
|
||||
le=self.left.__str__()
|
||||
else:
|
||||
le="("+self.left.__str__()+")"
|
||||
if self.right.kind==RELATION:
|
||||
re=self.right.__str__()
|
||||
else:
|
||||
re="("+self.right.__str__()+")"
|
||||
|
||||
return (le+ self.name +re)
|
||||
|
||||
def tokenize(expression):
|
||||
'''This function converts an expression into a list where
|
||||
every token of the expression is an item of a list. Expressions into
|
||||
parenthesis will be converted into sublists.'''
|
||||
items=[] #List for the tokens
|
||||
|
||||
'''This is a state machine. Initial status is determined by the starting of the
|
||||
expression. There are the following statuses:
|
||||
|
||||
relation: this is the status if the expressions begins with something else than an
|
||||
operator or a parenthesis.
|
||||
binary operator: this is the status when parsing a binary operator, nothing much to say
|
||||
unary operator: this status is more complex, since it will be followed by a parameter AND a
|
||||
sub-expression.
|
||||
sub-expression: this status is entered when finding a '(' and will be exited when finding a ')'.
|
||||
means that the others open must be counted to determine which close is the right one.'''
|
||||
|
||||
expression=expression.strip() #Removes initial and endind spaces
|
||||
state=0
|
||||
'''
|
||||
0 initial and useless
|
||||
1 previous stuff was a relation
|
||||
2 previous stuff was a sub-expression
|
||||
3 previous stuff was a unary operator
|
||||
4 previous stuff was a binary operator
|
||||
'''
|
||||
|
||||
while len(expression)>0:
|
||||
if expression.startswith('('): #Parenthesis state
|
||||
state=2
|
||||
par_count=0 #Count of parenthesis
|
||||
end=0
|
||||
|
||||
for i in range(len(expression)):
|
||||
if expression[i]=='(':
|
||||
par_count+=1
|
||||
elif expression[i]==')':
|
||||
par_count-=1
|
||||
if par_count==0:
|
||||
end=i
|
||||
break
|
||||
#Appends the tokenization of the content of the parenthesis
|
||||
items.append(tokenize(expression[1:end]))
|
||||
#Removes the entire parentesis and content from the expression
|
||||
expression=expression[end+1:].strip()
|
||||
|
||||
elif expression.startswith("σ") or expression.startswith("π") or expression.startswith("ρ"): #Unary 2 bytes
|
||||
items.append(expression[0:2]) #Adding operator in the top of the list
|
||||
expression=expression[2:].strip() #Removing operator from the expression
|
||||
par=expression.find('(')
|
||||
|
||||
items.append(expression[:par]) #Inserting parameter of the operator
|
||||
expression=expression[par:].strip() #Removing parameter from the expression
|
||||
elif expression.startswith("*") or expression.startswith("-"): # Binary 1 byte
|
||||
items.append(expression[0])
|
||||
expression=expression[1:].strip() #1 char from the expression
|
||||
state=4
|
||||
elif expression.startswith("ᑎ") or expression.startswith("ᑌ"): #Binary short 3 bytes
|
||||
items.append(expression[0:3]) #Adding operator in the top of the list
|
||||
expression=expression[3:].strip() #Removing operator from the expression
|
||||
|
||||
state=4
|
||||
elif expression.startswith("ᐅ"): #Binary long
|
||||
i=expression.find("ᐊ")
|
||||
items.append(expression[:i+3])
|
||||
expression=expression[i+3:].strip()
|
||||
|
||||
state=4
|
||||
else: #Relation (hopefully)
|
||||
if state==1: #Previous was a relation, appending to the last token
|
||||
i=items.pop()
|
||||
items.append(i+expression[0])
|
||||
expression=expression[1:].strip() #1 char from the expression
|
||||
else:
|
||||
state=1
|
||||
items.append(expression[0])
|
||||
expression=expression[1:].strip() #1 char from the expression
|
||||
|
||||
return items
|
||||
|
||||
def tree(expression):
|
||||
'''This function parses a relational algebra expression into a tree and returns
|
||||
the root node using the Node class defined in this module.'''
|
||||
return node(tokenize(expression))
|
||||
|
||||
|
||||
|
||||
def parse(expr):
|
||||
'''This function parses a relational algebra expression, converting it into python,
|
||||
@ -50,7 +285,7 @@ def parse(expr):
|
||||
ρid➡i,name➡n(π a,b(A))
|
||||
A ᐅᐊ B
|
||||
'''
|
||||
return optimizer.tree(expr).toPython()
|
||||
return tree(expr).toPython()
|
||||
|
||||
if __name__=="__main__":
|
||||
while True:
|
||||
|
Loading…
Reference in New Issue
Block a user