
- Improved tokenizer for select in optimizations, now can accept operators in identifiers git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@220 014f5005-505e-4b48-8d0a-63407b615a7c
548 lines
18 KiB
Python
548 lines
18 KiB
Python
# -*- coding: utf-8 -*-
|
||
# Relational
|
||
# Copyright (C) 2009 Salvo "LtWorf" Tomaselli
|
||
#
|
||
# Relational is free software: you can redistribute it and/or modify
|
||
# it under the terms of the GNU General Public License as published by
|
||
# the Free Software Foundation, either version 3 of the License, or
|
||
# (at your option) any later version.
|
||
#
|
||
# This program is distributed in the hope that it will be useful,
|
||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
# GNU General Public License for more details.
|
||
#
|
||
# You should have received a copy of the GNU General Public License
|
||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
#
|
||
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
|
||
|
||
'''This module contains functions to perform various optimizations on the expression trees.
|
||
The list general_optimizations contains pointers to general functions, so they can be called
|
||
within a cycle.
|
||
|
||
It is possible to add new general optimizations by adding the function in the list
|
||
general_optimizations present in this module. And the optimization will be executed with the
|
||
other ones when optimizing.
|
||
|
||
A function will have one parameter, which is the root node of the tree describing the expression.
|
||
The class used is defined in optimizer module.
|
||
A function will have to return the number of changes performed on the tree.
|
||
'''
|
||
|
||
import optimizer
|
||
import parser
|
||
sel_op=('//=','**=','and','not','in','//','**','<<','>>','==','!=','>=','<=','+=','-=','*=','/=','%=','or','+','-','*','/','&','|','^','~','<','>','%','=','(',')',',','[',']')
|
||
|
||
def replace_node(replace,replacement):
|
||
'''This function replaces "replace" node with the node "with",
|
||
the father of the node will now point to the with node'''
|
||
replace.name=replacement.name
|
||
replace.kind=replacement.kind
|
||
|
||
if replace.kind==optimizer.UNARY:
|
||
replace.child=replacement.child
|
||
replace.prop=replacement.prop
|
||
elif replace.kind==optimizer.BINARY:
|
||
replace.right=replacement.right
|
||
replace.left=replacement.left
|
||
|
||
def recoursive_scan(function,node,rels=None):
|
||
'''Does a recoursive optimization on the tree'''
|
||
changes=0
|
||
#recoursive scan
|
||
if node.kind==optimizer.UNARY:
|
||
if rels!=None:
|
||
changes+=function(node.child,rels)
|
||
else:
|
||
changes+=function(node.child)
|
||
elif node.kind==optimizer.BINARY:
|
||
if rels!=None:
|
||
changes+=function(node.right,rels)
|
||
changes+=function(node.left,rels)
|
||
else:
|
||
changes+=function(node.right)
|
||
changes+=function(node.left)
|
||
return changes
|
||
|
||
|
||
def duplicated_select(n):
|
||
'''This function locates and deletes things like
|
||
σ a ( σ a(C)) and the ones like σ a ( σ b(C))'''
|
||
changes=0
|
||
if n.name=='σ' and n.child.name=='σ':
|
||
if n.prop != n.child.prop: #Nested but different, joining them
|
||
n.prop = n.prop + " and " + n.child.prop
|
||
n.child=n.child.child
|
||
changes=1
|
||
changes+=duplicated_select(n)
|
||
|
||
return changes+recoursive_scan(duplicated_select,n)
|
||
|
||
def futile_union_intersection_subtraction(n):
|
||
'''This function locates things like r ᑌ r, and replaces them with r.
|
||
R ᑌ R --> R
|
||
R ᑎ R --> R
|
||
R - R --> σ False (R)
|
||
σ k (R) - R --> σ False (R)
|
||
R - σ k (R) --> σ not k (R)
|
||
σ k (R) ᑌ R --> R
|
||
σ k (R) ᑎ R --> σ k (R)
|
||
'''
|
||
|
||
changes=0
|
||
|
||
if n.name in ('ᑌ','ᑎ') and n.left==n.right:
|
||
changes=1
|
||
replace_node(n,n.left)
|
||
elif (n.name == 'ᑌ' and ((n.left.name=='σ' and n.left.child==n.right) or (n.right.name=='σ' and n.right.child==n.left))): #Union of two equal things, but one has a selection
|
||
changes=1
|
||
if n.left=='σ':#Selection on left. replacing self with right.
|
||
replace_node(n,n.right)
|
||
else:#Selection on left. replacing self with right.
|
||
replace_node(n,n.left)
|
||
elif (n.name == 'ᑎ' and ((n.left.name=='σ' and n.left.child==n.right) or (n.right.name=='σ' and n.right.child==n.left))): #Intersection of two equal things, but one has a selection
|
||
changes=1
|
||
if n.left=='σ':#Swapping with the selection
|
||
replace_node(n,n.left)
|
||
else:
|
||
replace_node(n,n.right)
|
||
#TODO make work the following line...
|
||
elif (n.name == '-' and (n.right.name=='σ' and n.right.child==n.left)): #Subtraction of two equal things, but one has a selection
|
||
n.name=n.right.name
|
||
n.kind=n.right.kind
|
||
n.child=n.right.child
|
||
n.prop='(not (%s))' % n.right.prop
|
||
n.left=n.right=None
|
||
elif (n.name=='-' and ((n.left==n.right) or (n.left.name=='σ' and n.left.child==n.right)) ):#Empty relation
|
||
changes=1
|
||
n.kind=optimizer.UNARY
|
||
n.name='σ'
|
||
n.prop='False'
|
||
n.child=n.left.get_left_leaf()
|
||
#n.left=n.right=None
|
||
|
||
return changes+recoursive_scan(futile_union_intersection_subtraction,n)
|
||
|
||
def down_to_unions_subtractions_intersections(n):
|
||
'''This funcion locates things like σ i==2 (c ᑌ d), where the union
|
||
can be a subtraction and an intersection and replaces them with
|
||
σ i==2 (c) ᑌ σ i==2(d).
|
||
'''
|
||
changes=0
|
||
_o=('ᑌ','-','ᑎ')
|
||
if n.name=='σ' and n.child.name in _o:
|
||
|
||
left=optimizer.node()
|
||
left.prop=n.prop
|
||
left.name=n.name
|
||
left.child=n.child.left
|
||
left.kind=optimizer.UNARY
|
||
right=optimizer.node()
|
||
right.prop=n.prop
|
||
right.name=n.name
|
||
right.child=n.child.right
|
||
right.kind=optimizer.UNARY
|
||
|
||
n.name=n.child.name
|
||
n.left=left
|
||
n.right=right
|
||
n.child=None
|
||
n.prop=None
|
||
n.kind=optimizer.BINARY
|
||
changes+=1
|
||
|
||
return changes+recoursive_scan(down_to_unions_subtractions_intersections,n)
|
||
|
||
def duplicated_projection(n):
|
||
'''This function locates thing like π i ( π j (R)) and replaces
|
||
them with π i (R)'''
|
||
changes=0
|
||
|
||
if n.name=='π' and n.child.name=='π':
|
||
n.child=n.child.child
|
||
changes+=1
|
||
|
||
return changes+recoursive_scan(duplicated_projection,n)
|
||
|
||
def selection_inside_projection(n):
|
||
'''This function locates things like σ j (π k(R)) and
|
||
converts them into π k(σ j (R))'''
|
||
changes=0
|
||
|
||
if n.name=='σ' and n.child.name=='π':
|
||
changes=1
|
||
temp=n.prop
|
||
n.prop=n.child.prop
|
||
n.child.prop=temp
|
||
n.name='π'
|
||
n.child.name='σ'
|
||
|
||
return changes+recoursive_scan(selection_inside_projection,n)
|
||
|
||
def swap_union_renames(n):
|
||
'''This function locates things like
|
||
ρ a➡b(R) ᑌ ρ a➡b(Q)
|
||
and replaces them with
|
||
ρ a➡b(R ᑌ Q).
|
||
Does the same with subtraction and intersection'''
|
||
changes=0
|
||
|
||
if n.name in ('-','ᑌ','ᑎ') and n.left.name==n.right.name and n.left.name=='ρ':
|
||
l_vars={}
|
||
for i in n.left.prop.split(','):
|
||
q=i.split('➡')
|
||
l_vars[q[0].strip()]=q[1].strip()
|
||
|
||
r_vars={}
|
||
for i in n.right.prop.split(','):
|
||
q=i.split('➡')
|
||
r_vars[q[0].strip()]=q[1].strip()
|
||
|
||
if r_vars==l_vars:
|
||
changes=1
|
||
|
||
#Copying self, but child will be child of renames
|
||
q=optimizer.node()
|
||
q.name=n.name
|
||
q.kind=optimizer.BINARY
|
||
q.left=n.left.child
|
||
q.right=n.right.child
|
||
|
||
n.name='ρ'
|
||
n.kind=optimizer.UNARY
|
||
n.child=q
|
||
n.prop=n.left.prop
|
||
n.left=n.right=None
|
||
|
||
return changes+recoursive_scan(swap_union_renames,n)
|
||
|
||
def futile_renames(n):
|
||
'''This function purges renames like id->id'''
|
||
changes=0
|
||
|
||
if n.name=='ρ':
|
||
#Located two nested renames.
|
||
changes=1
|
||
|
||
#Creating a dictionary with the attributes
|
||
_vars={}
|
||
for i in n.prop.split(','):
|
||
q=i.split('➡')
|
||
_vars[q[0].strip()]=q[1].strip()
|
||
#Scans dictionary to locate things like "a->b,b->c" and replace them with "a->c"
|
||
for key in list(_vars.keys()):
|
||
try:
|
||
value=_vars[key]
|
||
except:
|
||
value=None
|
||
if key==value:
|
||
_vars.pop(value) #Removes the unused one
|
||
#Reset prop var
|
||
n.prop=""
|
||
|
||
#Generates new prop var
|
||
for i in _vars.items():
|
||
n.prop+="%s➡%s," % (i[0],i[1])
|
||
n.prop=n.prop[:-1] #Removing ending comma
|
||
|
||
if len(n.prop)==0: #Nothing to rename, removing the rename op
|
||
replace_node(n,n.child)
|
||
|
||
return changes+recoursive_scan(futile_renames,n)
|
||
|
||
def subsequent_renames(n):
|
||
'''This function removes redoundant subsequent renames joining them into one'''
|
||
|
||
'''Purges renames like id->id Since it's needed to be performed BEFORE this one
|
||
so it is not in the list with the other optimizations'''
|
||
futile_renames(n)
|
||
changes=0
|
||
|
||
if n.name=='ρ' and n.child.name==n.name:
|
||
#Located two nested renames.
|
||
changes=1
|
||
#Joining the attribute into one
|
||
n.prop+=','+n.child.prop
|
||
n.child=n.child.child
|
||
|
||
#Creating a dictionary with the attributes
|
||
_vars={}
|
||
for i in n.prop.split(','):
|
||
q=i.split('➡')
|
||
_vars[q[0].strip()]=q[1].strip()
|
||
#Scans dictionary to locate things like "a->b,b->c" and replace them with "a->c"
|
||
for key in list(_vars.keys()):
|
||
try:
|
||
value=_vars[key]
|
||
except:
|
||
value=None
|
||
if value in _vars.keys():
|
||
if _vars[value]!=key:
|
||
#Double rename on attribute
|
||
_vars[key] = _vars[_vars[key]] #Sets value
|
||
_vars.pop(value) #Removes the unused one
|
||
else: #Cycle rename a->b,b->a
|
||
_vars.pop(value) #Removes the unused one
|
||
_vars.pop(key) #Removes the unused one
|
||
|
||
#Reset prop var
|
||
n.prop=""
|
||
|
||
#Generates new prop var
|
||
for i in _vars.items():
|
||
n.prop+="%s➡%s," % (i[0],i[1])
|
||
n.prop=n.prop[:-1] #Removing ending comma
|
||
|
||
if len(n.prop)==0: #Nothing to rename, removing the rename op
|
||
replace_node(n,n.child)
|
||
|
||
return changes+recoursive_scan(subsequent_renames,n)
|
||
|
||
class level_string(str):
|
||
level=0
|
||
|
||
def tokenize_select(expression):
|
||
'''This function returns the list of tokens present in a
|
||
selection. The expression can contain parenthesis.
|
||
It will use a subclass of str with the attribute level, which
|
||
will specify the nesting level of the token into parenthesis.'''
|
||
sel_op=('//=','**=','and ','not ','in ','//','**','<<','>>','==','!=','>=','<=','+=','-=','*=','/=','%=','or ','+','-','*','/','&','|','^','~','<','>','%','=','(',')',',','[',']')
|
||
l=0
|
||
while l!=len(expression):
|
||
l=len(expression)
|
||
if expression.startswith('(') and parser.find_matching_parenthesis(expression)+1==len(expression):
|
||
expression= expression[1:-1]
|
||
|
||
tokens=[]
|
||
temp=''
|
||
level=0
|
||
|
||
while len(expression)!=0:
|
||
expression=expression.strip()
|
||
|
||
if expression[0:1]=='(': #Expression into parenthesis
|
||
level+=1
|
||
elif expression[0:1]==')':
|
||
level-=1
|
||
|
||
for i in range(4,0,-1):#operators
|
||
if expression[0:i] in sel_op:
|
||
t=level_string(temp)
|
||
t.level=level
|
||
tokens.append(t)
|
||
temp=''
|
||
t=level_string(expression[0:i].strip())
|
||
t.level=level
|
||
tokens.append(t)
|
||
expression=expression[i:]
|
||
if expression[0:1]=="'":#String
|
||
end=expression.index("'",1)
|
||
while expression[end-1]=='\\':
|
||
end=expression.index("'",end+1)
|
||
#Add string to list
|
||
t=level_string(expression[0:end+1])
|
||
t.level=level
|
||
tokens.append(t)
|
||
expression=expression[end+1:]
|
||
else:
|
||
temp+=expression[0:1]
|
||
expression=expression[1:]
|
||
pass
|
||
if len(temp)!=0:
|
||
t=level_string(temp)
|
||
t.level=level
|
||
tokens.append(t)
|
||
while True:
|
||
try:
|
||
tokens.remove('')
|
||
except:
|
||
break
|
||
return tokens
|
||
|
||
def swap_rename_projection(n):
|
||
'''This function locates things like π k(ρ j(R))
|
||
and replaces them with ρ j(π k(R)).
|
||
This will let rename work on a hopefully smaller set
|
||
and more important, will hopefully allow further optimizations.
|
||
Will also eliminate fields in the rename that are cutted in the projection.
|
||
'''
|
||
changes=0
|
||
|
||
if n.name=='π' and n.child.name=='ρ':
|
||
changes=1
|
||
|
||
#π index,name(ρ id➡index(R))
|
||
_vars={}
|
||
for i in n.child.prop.split(','):
|
||
q=i.split('➡')
|
||
_vars[q[1].strip()]=q[0].strip()
|
||
|
||
_pr=n.prop.split(',')
|
||
for i in range(len(_pr)):
|
||
try:
|
||
_pr[i]=_vars[_pr[i].strip()]
|
||
except:
|
||
pass
|
||
|
||
_pr_reborn=n.prop.split(',')
|
||
for i in list(_vars.keys()):
|
||
if i not in _pr_reborn:
|
||
_vars.pop(i)
|
||
n.name=n.child.name
|
||
n.prop=''
|
||
for i in _vars.keys():
|
||
n.prop+='%s➡%s,' % (_vars[i],i)
|
||
n.prop=n.prop[:-1]
|
||
|
||
n.child.name='π'
|
||
n.child.prop=''
|
||
for i in _pr:
|
||
n.child.prop+=i+','
|
||
n.child.prop=n.child.prop[:-1]
|
||
|
||
|
||
return changes+recoursive_scan(swap_rename_projection,n)
|
||
|
||
def swap_rename_select(n):
|
||
'''This function locates things like σ k(ρ j(R)) and replaces
|
||
them with ρ j(σ k(R)). Renaming the attributes used in the
|
||
selection, so the operation is still valid.'''
|
||
changes=0
|
||
|
||
if n.name=='σ' and n.child.name=='ρ':
|
||
changes=1
|
||
#Dictionary containing attributes of rename
|
||
_vars={}
|
||
for i in n.child.prop.split(','):
|
||
q=i.split('➡')
|
||
_vars[q[1].strip()]=q[0].strip()
|
||
|
||
#tokenizes expression in select
|
||
_tokens=tokenize_select(n.prop)
|
||
|
||
#Renaming stuff
|
||
for i in range(len(_tokens)):
|
||
splitted=_tokens[i].split('.',1)
|
||
if splitted[0] in _vars:
|
||
if len(splitted)==1:
|
||
_tokens[i]=_vars[_tokens[i].split('.')[0]]
|
||
else:
|
||
_tokens[i]=_vars[_tokens[i].split('.')[0]]+'.'+splitted[1]
|
||
|
||
#Swapping operators
|
||
n.name='ρ'
|
||
n.child.name='σ'
|
||
|
||
n.prop=n.child.prop
|
||
n.child.prop=''
|
||
for i in _tokens:
|
||
n.child.prop+=i+ ' '
|
||
|
||
return changes+recoursive_scan(swap_rename_select,n)
|
||
|
||
def selection_and_product(n,rels):
|
||
'''This function locates things like σ k (R*Q) and converts them into
|
||
σ l (σ j (R) * σ i (Q)). Where j contains only attributes belonging to R,
|
||
i contains attributes belonging to Q and l contains attributes belonging to both'''
|
||
changes=0
|
||
|
||
if n.name=='σ' and n.child.name in ('*','ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ'):
|
||
l_attr=n.child.left.result_format(rels)
|
||
r_attr=n.child.right.result_format(rels)
|
||
|
||
tokens=tokenize_select(n.prop)
|
||
|
||
groups=[]
|
||
temp=[]
|
||
|
||
for i in tokens:
|
||
if i=='and' and i.level==0:
|
||
groups.append(temp)
|
||
temp=[]
|
||
else:
|
||
temp.append(i)
|
||
if len(temp)!=0:
|
||
groups.append(temp)
|
||
temp=[]
|
||
|
||
left=[]
|
||
right=[]
|
||
both=[]
|
||
|
||
for i in groups:
|
||
l_fields=False #has fields in left?
|
||
r_fields=False #has fields in left?
|
||
|
||
for j in set(i).difference(sel_op):
|
||
j=j.split('.')[0]
|
||
if j in l_attr:#Field in left
|
||
l_fields=True
|
||
if j in r_attr:#Field in right
|
||
r_fields=True
|
||
|
||
if l_fields and r_fields:#Fields in both
|
||
both.append(i)
|
||
elif l_fields:
|
||
left.append(i)
|
||
elif r_fields:
|
||
right.append(i)
|
||
else:#Unknown.. adding in both
|
||
both.append(i)
|
||
|
||
#Preparing left selection
|
||
if len(left)>0:
|
||
changes=1
|
||
l_node=optimizer.node()
|
||
l_node.name='σ'
|
||
l_node.kind=optimizer.UNARY
|
||
l_node.child=n.child.left
|
||
l_node.prop=''
|
||
n.child.left=l_node
|
||
while len(left)>0:
|
||
c=left.pop(0)
|
||
for i in c:
|
||
l_node.prop+=i+ ' '
|
||
if len(left)>0:
|
||
l_node.prop+=' and '
|
||
if '(' in l_node.prop:
|
||
l_node.prop='(%s)' % l_node.prop
|
||
|
||
#Preparing right selection
|
||
if len(right)>0:
|
||
changes=1
|
||
r_node=optimizer.node()
|
||
r_node.name='σ'
|
||
r_node.prop=''
|
||
r_node.kind=optimizer.UNARY
|
||
r_node.child=n.child.right
|
||
n.child.right=r_node
|
||
while len(right)>0:
|
||
c=right.pop(0)
|
||
for i in c:
|
||
r_node.prop+=i+ ' '
|
||
if len(right)>0:
|
||
r_node.prop+=' and '
|
||
if '(' in r_node.prop:
|
||
r_node.prop='(%s)' % r_node.prop
|
||
#Changing main selection
|
||
n.prop=''
|
||
if len(both)!=0:
|
||
while len(both)>0:
|
||
c=both.pop(0)
|
||
for i in c:
|
||
n.prop+=i+ ' '
|
||
if len(both)>0:
|
||
n.prop+=' and '
|
||
if '(' in n.prop:
|
||
n.prop='(%s)' % n.prop
|
||
else:#No need for general select
|
||
replace_node(n,n.child)
|
||
|
||
return changes+recoursive_scan(selection_and_product,n,rels)
|
||
|
||
general_optimizations=[duplicated_select,down_to_unions_subtractions_intersections,duplicated_projection,selection_inside_projection,subsequent_renames,swap_rename_select,futile_union_intersection_subtraction,swap_union_renames,swap_rename_projection]
|
||
specific_optimizations=[selection_and_product]
|
||
|
||
if __name__=="__main__":
|
||
print tokenize_select("skill == 'C' and id % 2 == 0") |