# -*- coding: utf-8 -*-
# Relational
# Copyright (C) 2009 Salvo "LtWorf" Tomaselli
#
# Relation is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#
# author Salvo "LtWorf" Tomaselli
'''This module contains functions to perform various optimizations on the expression trees.
The list general_optimizations contains pointers to general functions, so they can be called
within a cycle.
It is possible to add new general optimizations by adding the function in the list
general_optimizations present in this module. And the optimization will be executed with the
other ones when optimizing.
A function will have one parameter, which is the root node of the tree describing the expression.
The class used is defined in optimizer module.
A function will have to return the number of changes performed on the tree.
'''
import optimizer
def duplicated_select(n):
changes=0
'''This function locates and deletes things like
σ a ( σ a(C)) and the ones like σ a ( σ b(C))'''
if n.name=='σ' and n.child.name=='σ':
if n.prop != n.child.prop: #Nested but different, joining them
n.prop = n.prop + " and " + n.child.prop
n.child=n.child.child
changes=1
changes+=duplicated_select(n)
#recoursive scan
if n.kind==optimizer.UNARY:
changes+=duplicated_select(n.child)
elif n.kind==optimizer.BINARY:
changes+=duplicated_select(n.right)
changes+=duplicated_select(n.left)
return changes
def down_to_unions_subtractions_intersections(n):
'''This funcion locates things like σ i==2 (c ᑌ d), where the union
can be a subtraction and an intersection and replaces them with
σ i==2 (c) ᑌ σ i==2(d).
'''
changes=0
_o=('ᑌ','-','ᑎ')
if n.name=='σ' and n.child.name in _o:
left=optimizer.node()
left.prop=n.prop
left.name=n.name
left.child=n.child.left
left.kind=optimizer.UNARY
right=optimizer.node()
right.prop=n.prop
right.name=n.name
right.child=n.child.right
right.kind=optimizer.UNARY
n.name=n.child.name
n.left=left
n.right=right
n.child=None
n.prop=None
n.kind=optimizer.BINARY
changes+=1
#recoursive scan
if n.kind==optimizer.UNARY:
changes+=down_to_unions_subtractions_intersections(n.child)
elif n.kind==optimizer.BINARY:
changes+=down_to_unions_subtractions_intersections(n.right)
changes+=down_to_unions_subtractions_intersections(n.left)
return changes
def duplicated_projection(n):
'''This function locates thing like π i ( π j (R)) and replaces
them with π i (R)'''
changes=0
if n.name=='π' and n.child.name=='π':
n.child=n.child.child
changes+=1
#recoursive scan
if n.kind==optimizer.UNARY:
changes+=duplicated_projection(n.child)
elif n.kind==optimizer.BINARY:
changes+=duplicated_projection(n.right)
changes+=duplicated_projection(n.left)
return changes
def selection_inside_projection(n):
'''This function locates things like σ j (π k(R)) and
converts them into π k(σ j (R))'''
changes=0
if n.name=='σ' and n.child.name=='π':
changes=1
temp=n.prop
n.prop=n.child.prop
n.child.prop=temp
n.name='π'
n.child.name='σ'
#recoursive scan
if n.kind==optimizer.UNARY:
changes+=selection_inside_projection(n.child)
elif n.kind==optimizer.BINARY:
changes+=selection_inside_projection(n.right)
changes+=selection_inside_projection(n.left)
return changes
def subsequent_renames(n):
'''This function removes redoundant subsequent renames'''
changes=0
if n.name=='ρ' and n.child.name==n.name:
#Located two nested renames.
changes=1
#Joining the attribute into one
n.prop+=','+n.child.prop
n.child=n.child.child
#Creating a dictionary with the attributes
_vars={}
for i in n.prop.split(','):
q=i.split('➡')
_vars[q[0].strip()]=q[1].strip()
#Scans dictionary to locate things like "a->b,b->c" and replace them with "a->c"
for i in list(_vars.keys()):
if _vars[i] in _vars.keys():
#Double rename on attribute
_vars[i] = _vars[_vars[i]] #Sets value
_vars.pop(i) #Removes the unused one
#Reset prop var
n.prop=""
#Generates new prop var
for i in _vars.items():
n.prop+="%s➡%s," % (i[0],i[1])
n.prop=n.prop[:-1] #Removing ending comma
#recoursive scan
if n.kind==optimizer.UNARY:
changes+=subsequent_renames(n.child)
elif n.kind==optimizer.BINARY:
changes+=subsequent_renames(n.right)
changes+=subsequent_renames(n.left)
return changes
def tokenize_select(expression):
'''This function returns the list of tokens present in a
selection. The expression can't contain parenthesis.'''
op=('//=','**=','and','not','//','**','<<','>>','==','!=','>=','<=','+=','-=','*=','/=','%=','or','+','-','*','/','&','|','^','~','<','>','%','=')
tokens=[]
temp=''
while len(expression)!=0:
expression=expression.strip()
if expression[0:3] in op:#3char op
tokens.append(temp)
temp=''
tokens.append(expression[0:3])
expression=expression[3:]
elif expression[0:2] in op:#2char op
tokens.append(temp)
temp=''
tokens.append(expression[0:2])
expression=expression[2:]
elif expression[0:1] in op:#1char op
tokens.append(temp)
temp=''
tokens.append(expression[0:1])
expression=expression[1:]
elif expression[0:1]=="'":#String
end=expression.index("'",1)
while expression[end-1]=='\\':
end=expression.index("'",end+1)
#Add string to list
tokens.append(expression[0:end+1])
expression=expression[end+1:]
else:
temp+=expression[0:1]
expression=expression[1:]
pass
if len(temp)!=0:
tokens.append(temp)
return tokens
def swap_rename_select(n):
'''This function locates things like σ k(ρ j(R)) and replaces
them with ρ j(σ k(R)). Renaming the attributes used in the
selection, so the operation is still valid.'''
#TODO document into the wiki
changes=0
if n.name=='σ' and n.child.name=='ρ':
changes=1
#Dictionary containing attributes of rename
_vars={}
for i in n.child.prop.split(','):
q=i.split('➡')
_vars[q[1].strip()]=q[0].strip()
#tokenizes expression in select
_tokens=tokenize_select(n.prop)
#Renaming stuff
for i in range(len(_tokens)):
splitted=_tokens[i].split('.',1)
if splitted[0] in _vars:
if len(splitted)==1:
_tokens[i]=_vars[_tokens[i].split('.')[0]]
else:
_tokens[i]=_vars[_tokens[i].split('.')[0]]+'.'+splitted[1]
#Swapping operators
n.name='ρ'
n.child.name='σ'
n.prop=n.child.prop
n.child.prop=''
for i in _tokens:
n.child.prop+=i+ ' '
#recoursive scan
if n.kind==optimizer.UNARY:
changes+=swap_rename_select(n.child)
elif n.kind==optimizer.BINARY:
changes+=swap_rename_select(n.right)
changes+=swap_rename_select(n.left)
return changes
def selection_and_product(n,rels):
'''This function locates things like σ k (R*Q) and converts them into
σ l (σ j (R) * σ i (Q)). Where j contains only attributes belonging to R,
i contains attributes belonging to Q and l contains attributes belonging to both'''
#TODO document in the wiki
changes=0
if n.name=='σ' and n.child.name in ('*','ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ'):
l_attr=n.child.left.result_format(rels)
r_attr=n.child.right.result_format(rels)
tokens=tokenize_select(n.prop)
groups=[]
temp=[]
for i in tokens:
if i=='and':
groups.append(temp)
temp=[]
else:
temp.append(i)
if len(temp)!=0:
groups.append(temp)
temp=[]
left=[]
right=[]
both=[]
for i in groups:
l_fields=False #has fields in left?
r_fields=False #has fields in left?
for j in i:
j=j.split('.')[0]
if j in l_attr:#Field in left
l_fields=True
if j in r_attr:#Field in right
r_fields=True
if l_fields and r_fields:#Fields in both
both.append(i)
elif l_fields:
left.append(i)
elif r_fields:
right.append(i)
else:#Unknown.. adding in both
both.append(i)
#Preparing left selection
if len(left)>0:
changes=1
l_node=optimizer.node()
l_node.name='σ'
l_node.kind=optimizer.UNARY
l_node.child=n.child.left
l_node.prop=''
n.child.left=l_node
while len(left)>0:
c=left.pop(0)
for i in c:
l_node.prop+=i+ ' '
if len(left)>0:
l_node.prop+=' and '
#Preparing right selection
if len(right)>0:
changes=1
r_node=optimizer.node()
r_node.name='σ'
r_node.prop=''
r_node.kind=optimizer.UNARY
r_node.child=n.child.right
n.child.right=r_node
while len(right)>0:
c=right.pop(0)
for i in c:
r_node.prop+=i+ ' '
if len(right)>0:
r_node.prop+=' and '
#Changing main selection
n.prop=''
if len(both)!=0:
while len(both)>0:
c=both.pop(0)
for i in c:
n.prop+=i+ ' '
if len(both)>0:
n.prop+=' and '
else:#No need for general select
n.name=n.child.name
n.kind=n.child.kind
n.left=n.child.left
n.right=n.child.right
#recoursive scan
if n.kind==optimizer.UNARY:
changes+=selection_and_product(n.child,rels)
elif n.kind==optimizer.BINARY:
changes+=selection_and_product(n.right,rels)
changes+=selection_and_product(n.left,rels)
return changes
general_optimizations=[duplicated_select,down_to_unions_subtractions_intersections,duplicated_projection,selection_inside_projection,subsequent_renames,swap_rename_select]
specific_optimizations=[selection_and_product]