relational/relational/optimizations.py
Salvo 'LtWorf' Tomaselli 2ceaa3e1db
The final rename
2020-08-12 17:35:55 +02:00

568 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Relational
# Copyright (C) 2009-2020 Salvo "LtWorf" Tomaselli
#
# Relational is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
#
# This module contains functions to perform various optimizations on the expression trees.
# The list general_optimizations contains pointers to general functions, so they can be called
# within a cycle.
#
# It is possible to add new general optimizations by adding the function in the list
# general_optimizations present in this module. And the optimization will be executed with the
# other ones when optimizing.
#
# A function will have one parameter, which is the root node of the tree describing the expression.
# The class used is defined in optimizer module.
# A function will have to return the number of changes performed on the tree.
from io import StringIO
from tokenize import generate_tokens
from typing import Tuple, Dict, List
from relational.relation import Relation
from relational import parser
from relational.parser import Binary, Unary, Node, PRODUCT, \
DIFFERENCE, UNION, INTERSECTION, DIVISION, JOIN, \
JOIN_LEFT, JOIN_RIGHT, JOIN_FULL, PROJECTION, \
SELECTION, RENAME, ARROW
sel_op = (
'//=', '**=', 'and', 'not', 'in', '//', '**', '<<', '>>', '==', '!=', '>=', '<=', '+=', '-=',
'*=', '/=', '%=', 'or', '+', '-', '*', '/', '&', '|', '^', '~', '<', '>', '%', '=', '(', ')', ',', '[', ']')
def find_duplicates(node, dups=None):
'''
Finds repeated subtrees in a parse
tree.
'''
if dups is None:
dups = {}
dups[str(node)] = node
def duplicated_select(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates and deletes things like
σ a ( σ a(C)) and the ones like σ a ( σ b(C))
replacing the 1st one with a single select and
the 2nd one with a single select with both conditions
in and
'''
changes = 0
while isinstance(n, Unary) and n.name == SELECTION and isinstance(n.child, Unary) and n.child.name == SELECTION:
changes += 1
prop = n.prop
if n.prop != n.child.prop: # Nested but different, joining them
prop = n.prop + " and " + n.child.prop
# This adds parenthesis if they are needed
if n.child.prop.startswith('(') or n.prop.startswith('('):
prop = '(%s)' % prop
n = Unary(
SELECTION,
prop,
n.child.child,
)
return n, changes
def futile_union_intersection_subtraction(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like r r, and replaces them with r.
R R --> R
R ∩ R --> R
R - R --> σ False (R)
σ k (R) - R --> σ False (R)
R - σ k (R) --> σ not k (R)
σ k (R) R --> R
σ k (R) ∩ R --> σ k (R)
'''
if not isinstance(n, Binary):
return n, 0
# Union and intersection of the same thing
if n.name in (UNION, INTERSECTION, JOIN, JOIN_LEFT, JOIN_RIGHT, JOIN_FULL) and n.left == n.right:
return n.left, 1
# selection and union of the same thing
elif n.name == UNION:
if n.left.name == SELECTION and isinstance(n.left, Unary) and n.left.child == n.right:
return n.right, 1
elif n.right.name == SELECTION and isinstance(n.right, Unary) and n.right.child == n.left:
return n.left, 1
# selection and intersection of the same thing
elif n.name == INTERSECTION:
if n.left.name == SELECTION and isinstance(n.left, Unary) and n.left.child == n.right:
return n.left, 1
elif n.right.name == SELECTION and \
isinstance(n.right, Unary) and \
n.right.child == n.left:
return n.right, 1
# Subtraction and selection of the same thing
elif n.name == DIFFERENCE and \
isinstance(n, Binary) and \
n.right.name == SELECTION and \
isinstance(n.right, Unary) and \
n.right.child == n.left:
return Unary(
SELECTION,
'(not (%s))' % n.right.prop,
n.right.child), 1
# Subtraction of the same thing or with selection on the left child
elif n.name == DIFFERENCE and \
isinstance(n, Binary) and \
(n.left == n.right or (n.left.name == SELECTION and isinstance(n.left, Unary) and n.left.child == n.right)):
return Unary(
SELECTION,
'False',
n.get_left_leaf()
), 1
return n, 0
def down_to_unions_subtractions_intersections(n: parser.Node) -> Tuple[parser.Node, int]:
'''This funcion locates things like σ i==2 (c d), where the union
can be a subtraction and an intersection and replaces them with
σ i==2 (c) σ i==2(d).
'''
changes = 0
_o = (UNION, DIFFERENCE, INTERSECTION)
if isinstance(n, Unary) and n.name == SELECTION and n.child.name in _o:
assert isinstance(n.child, Binary)
l = Unary(SELECTION, n.prop, n.child.left)
r = Unary(SELECTION, n.prop, n.child.right)
return Binary(n.child.name, l, r), 1
return n, 0
def duplicated_projection(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates thing like π i ( π j (R)) and replaces
them with π i (R)'''
if isinstance(n, Unary) and n.name == PROJECTION and isinstance(n.child, Unary) and n.child.name == PROJECTION:
return Unary(
PROJECTION,
n.prop,
n.child.child), 1
return n, 0
def selection_inside_projection(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like σ j (π k(R)) and
converts them into π k(σ j (R))'''
if isinstance(n, Unary) and n.name == SELECTION and isinstance(n.child, Unary) and n.child.name == PROJECTION:
child = Unary(
SELECTION,
n.prop,
n.child.child
)
return Unary(PROJECTION, n.child.prop, child), 0
return n, 0
def swap_union_renames(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like
ρ a➡b(R) ρ a➡b(Q)
and replaces them with
ρ a➡b(R Q).
Does the same with subtraction and intersection'''
if n.name in (DIFFERENCE, UNION, INTERSECTION) and \
isinstance(n, Binary) and \
n.left.name == RENAME and \
isinstance(n.left, Unary) and\
n.right.name == RENAME and \
isinstance(n.right, Unary):
l_vars = n.left.get_rename_prop()
r_vars = n.right.get_rename_prop()
if r_vars == l_vars:
child = Binary(n.name, n.left.child, n.right.child)
return Unary(RENAME, n.left.prop, child), 1
return n, 0
def futile_renames(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function purges renames like
ρ id->id,a->q (A)
into
ρ a->q (A)
or removes the operation entirely if they all get removed
'''
if isinstance(n, Unary) and n.name == RENAME:
renames = n.get_rename_prop()
changes = False
for k, v in renames.items():
if k == v:
changes = True
del renames[k]
if len(renames) == 0: # Nothing to rename, removing the rename
return n.child, 1
elif changes:
# Changing the node in place, no need to return to cause a recursive step
n.set_rename_prop(renames)
return n, 0
def subsequent_renames(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function removes redundant subsequent renames joining them into one
ρ .. ρ .. (A)
into
ρ ... (A)
'''
if isinstance(n, Unary) and \
n.name == RENAME and \
isinstance(n.child, Unary) and \
n.child.name == RENAME:
# Located two nested renames.
prop = n.prop + ',' + n.child.prop
child = n.child.child
n = Unary(RENAME, prop, child)
# Creating a dictionary with the attributes
renames = n.get_rename_prop()
# Scans dictionary to locate things like "a->b,b->c" and replace them
# with "a->c"
for key, value in tuple(renames.items()):
if value in renames:
if renames[value] != key:
# Double rename on attribute
renames[key] = renames[renames[key]] # Sets value
del renames[value] # Removes the unused one
else: # Cycle rename a->b,b->a
del renames[value] # Removes the unused one
del renames[key] # Removes the unused one
if len(renames) == 0: # Nothing to rename, removing the rename op
return n.child, 1
else:
n.set_rename_prop(renames)
return n, 1
return n, 0
class LevelString(str):
level = 0
def tokenize_select(expression: str) -> List[LevelString]:
'''This function returns the list of tokens present in a
selection. The expression can contain parenthesis.
It will use a subclass of str with the attribute level, which
will specify the nesting level of the token into parenthesis.'''
g = generate_tokens(StringIO(str(expression)).readline)
l = list(token[1] for token in g)
# Changes the 'a','.','method' token group into a single 'a.method' token
try:
while True:
dot = l.index('.')
l[dot] = '%s.%s' % (l[dot - 1], l[dot + 1])
l.pop(dot + 1)
l.pop(dot - 1)
except:
pass
r = []
level = 0
for i in l:
if not i:
continue
value = LevelString(i)
value.level = level
if value == '(':
level += 1
elif value == ')':
level -= 1
r.append(value)
return r
def swap_rename_projection(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like
π k(ρ j(R))
and replaces them with
ρ j(π k(R)).
This will let rename work on a hopefully smaller set
and more important, will hopefully allow further optimizations.
Will also eliminate fields in the rename that are cut in the projection.
'''
if isinstance(n, Unary) and \
n.name == PROJECTION and \
isinstance(n.child, Unary) and \
n.child.name == RENAME:
# π index,name(ρ id➡index(R))
renames = n.child.get_rename_prop()
projections = set(n.get_projection_prop())
# Use pre-rename names in the projection
for k, v in renames.items():
if v in projections:
projections.remove(v)
projections.add(k)
# Eliminate fields
for i in list(renames.keys()):
if i not in projections:
del renames[i]
child = Unary(PROJECTION,'' , n.child.child)
child.set_projection_prop(list(projections))
n = Unary(RENAME, '', child)
n.set_rename_prop(renames)
return n, 1
return n, 0
def swap_rename_select(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like
σ k(ρ j(R))
and replaces them with
ρ j(σ k(R)).
Renaming the attributes used in the
selection, so the operation is still valid.'''
if isinstance(n, Unary) and \
n.name == SELECTION and \
isinstance(n.child, Unary) and \
n.child.name == RENAME:
# This is an inverse mapping for the rename
renames = {v: k for k, v in n.child.get_rename_prop().items()}
# tokenizes expression in select
tokens = tokenize_select(n.prop)
# Renaming stuff, no enum because I edit the tokens
for i in range(len(tokens)):
splitted = tokens[i].split('.', 1)
if splitted[0] in renames:
tokens[i] = LevelString(renames[splitted[0]])
if len(splitted) > 1:
tokens[i] = LevelString(tokens[i] + '.' + splitted[1])
child = Unary(SELECTION, ' '.join(tokens), n.child.child)
return Unary(RENAME, n.child.prop, child), 1
return n, 0
def select_union_intersect_subtract(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like
σ i(a) σ q(a)
and replaces them with
σ (i OR q) (a)
Removing a O(n²) operation like the union'''
if isinstance(n, Binary) and \
n.name in {UNION, INTERSECTION, DIFFERENCE} and \
isinstance(n.left, Unary) and \
n.left.name == SELECTION and \
isinstance(n.right, Unary) and \
n.right.name == SELECTION and \
n.left.child == n.right.child:
d = {UNION: 'or', INTERSECTION: 'and', DIFFERENCE: 'and not'}
op = d[n.name]
if n.left.prop.startswith('(') or n.right.prop.startswith('('):
t_str = '('
if n.left.prop.startswith('('):
t_str += '(%s)'
else:
t_str += '%s'
t_str += ' %s '
if n.right.prop.startswith('('):
t_str += '(%s)'
else:
t_str += '%s'
t_str += ')'
prop = t_str % (n.left.prop, op, n.right.prop)
else:
prop = '%s %s %s' % (n.left.prop, op, n.right.prop)
return Unary(SELECTION, prop, n.left.child), 1
return n, 0
def union_and_product(n: parser.Node) -> Tuple[parser.Node, int]:
'''
A * B A * C = A * (B C)
Same thing with inner join
'''
if isinstance(n, Binary) and \
n.name == UNION and \
isinstance(n.left, Binary) and \
n.left.name in {PRODUCT, JOIN} and \
isinstance(n.right, Binary) and \
n.left.name == n.right.name:
if n.left.left == n.right.left or n.left.left == n.right.right:
l = n.left.right
r = n.right.left if n.left.left == n.right.right else n.right.right
newchild = Binary(UNION, l, r)
return Binary(n.left.name, n.left.left, newchild), 1
elif n.left.right == n.right.left or n.left.left == n.right.right:
l = n.left.left
r = n.right.left if n.right.left == n.right.right else n.right.right
newchild = Binary(UNION, l, r)
return Binary(n.left.name, n.left.right, newchild), 1
return n, 0
def projection_and_union(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]:
'''
Turns
π a,b,c(A) π a,b,c(B)
into
π a,b,c(A B)
if A and B are union compatible
'''
changes = 0
if n.name == UNION and \
isinstance(n, Binary) and \
n.left.name == PROJECTION and \
isinstance(n.left, Unary) and \
n.right.name == PROJECTION and \
isinstance(n.right, Unary) and \
set(n.left.child.result_format(rels)) == set(n.right.child.result_format(rels)):
child = Binary(UNION, n.left.child, n.right.child)
return Unary(PROJECTION, n.right.prop, child), 0
return n, 0
def selection_and_product(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]:
'''This function locates things like σ k (R*Q) and converts them into
σ l (σ j (R) * σ i (Q)). Where j contains only attributes belonging to R,
i contains attributes belonging to Q and l contains attributes belonging to both'''
if isinstance(n, Unary) and n.name == SELECTION and \
isinstance(n.child, Binary) and \
n.child.name in (PRODUCT, JOIN):
l_attr = n.child.left.result_format(rels)
r_attr = n.child.right.result_format(rels)
tokens = tokenize_select(n.prop)
groups: List[List[LevelString]] = []
temp: List[LevelString] = []
for k in tokens:
if k == 'and' and k.level == 0:
groups.append(temp)
temp = []
else:
temp.append(k)
if len(temp):
groups.append(temp)
del temp
left = []
right = []
both = []
for i in groups:
l_fields = False # has fields in left?
r_fields = False # has fields in left?
for j in set(i).difference(sel_op):
t = j.split('.')[0]
if t in l_attr: # Field in left
l_fields = True
if t in r_attr: # Field in right
r_fields = True
if l_fields and not r_fields:
left.append(i)
elif r_fields and not l_fields:
right.append(i)
else: # Unknown.. adding in both
both.append(i)
# Preparing left selection
if left:
l_prop = ' and '.join((' '.join(i) for i in left))
if '(' in l_prop:
l_prop = '(%s)' % l_prop
l_node: Node = Unary(SELECTION, l_prop, n.child.left)
else:
l_node = n.child.left
# Preparing right selection
if right:
r_prop = ' and '.join((' '.join(i) for i in right))
if '(' in r_prop:
r_prop = '(%s)' % r_prop
r_node: Node = Unary(SELECTION, r_prop, n.child.right)
else:
r_node = n.child.right
b_node = Binary(n.child.name, l_node, r_node)
# Changing main selection
if both:
both_prop = ' and '.join((' '.join(i) for i in both))
if '(' in both_prop:
both_prop = '(%s)' % both_prop
r = Unary(SELECTION, both_prop, b_node)
return r, len(left) + len(right)
else: # No need for general select
return b_node, 1
return n, 0
def useless_projection(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]:
'''
Removes projections that are over all the fields
'''
if isinstance(n, Unary) and n.name == PROJECTION and \
set(n.child.result_format(rels)) == set(i.strip() for i in n.prop.split(',')):
return n.child, 1
return n, 0
general_optimizations = [
duplicated_select,
down_to_unions_subtractions_intersections,
duplicated_projection,
selection_inside_projection,
subsequent_renames,
futile_renames,
swap_rename_select,
futile_union_intersection_subtraction,
swap_union_renames,
swap_rename_projection,
select_union_intersect_subtract,
union_and_product,
]
specific_optimizations = [
selection_and_product,
projection_and_union,
useless_projection,
]