Merge pull request #20 from ltworf/refactor_parser

Refactor parser
This commit is contained in:
Salvo 'LtWorf' Tomaselli 2020-06-09 23:49:11 +02:00 committed by GitHub
commit 5d3823d0ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 436 additions and 492 deletions

View File

@ -1,4 +1,7 @@
2.6 3.0
- Refactored parser to use better typing
- Refactored and fixed some optimizations
- Added more test cases
- Improved survey sending - Improved survey sending
- Prevent relation/field names from being reserved keywords - Prevent relation/field names from being reserved keywords
- Fixed issue in cli where loading an invalid file would lead to a crash - Fixed issue in cli where loading an invalid file would lead to a crash

View File

@ -1,5 +1,5 @@
# Relational # Relational
# Copyright (C) 2009-2018 Salvo "LtWorf" Tomaselli # Copyright (C) 2009-2020 Salvo "LtWorf" Tomaselli
# #
# Relational is free software: you can redistribute it and/or modify # Relational is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
@ -30,8 +30,9 @@
from io import StringIO from io import StringIO
from tokenize import generate_tokens from tokenize import generate_tokens
from typing import Tuple, Dict
from relational.relation import Relation
from relational import parser from relational import parser
sel_op = ( sel_op = (
@ -98,36 +99,7 @@ def replace_node(replace, replacement):
replace.left = replacement.left replace.left = replacement.left
def recoursive_scan(function, node, rels=None): def duplicated_select(n: parser.Node) -> Tuple[parser.Node, int]:
'''Does a recoursive optimization on the tree.
This function will recoursively execute the function given
as "function" parameter starting from node to all the tree.
if rels is provided it will be passed as argument to the function.
Otherwise the function will be called just on the node.
Result value: function is supposed to return the amount of changes
it has performed on the tree.
The various result will be added up and this final value will be the
returned value.'''
changes = 0
# recoursive scan
if node.kind == parser.UNARY:
if rels != None:
changes += function(node.child, rels)
else:
changes += function(node.child)
elif node.kind == parser.BINARY:
if rels != None:
changes += function(node.right, rels)
changes += function(node.left, rels)
else:
changes += function(node.right)
changes += function(node.left)
return changes
def duplicated_select(n: parser.Node) -> int:
'''This function locates and deletes things like '''This function locates and deletes things like
σ a ( σ a(C)) and the ones like σ a ( σ b(C)) σ a ( σ a(C)) and the ones like σ a ( σ b(C))
replacing the 1st one with a single select and replacing the 1st one with a single select and
@ -135,243 +107,189 @@ def duplicated_select(n: parser.Node) -> int:
in and in and
''' '''
changes = 0 changes = 0
if n.name == SELECTION and n.child.name == SELECTION: while n.name == SELECTION and n.child.name == SELECTION:
changes += 1
prop = n.prop
if n.prop != n.child.prop: # Nested but different, joining them if n.prop != n.child.prop: # Nested but different, joining them
n.prop = n.prop + " and " + n.child.prop prop = n.prop + " and " + n.child.prop
# This adds parenthesis if they are needed # This adds parenthesis if they are needed
if n.child.prop.startswith('(') or n.prop.startswith('('): if n.child.prop.startswith('(') or n.prop.startswith('('):
n.prop = '(%s)' % n.prop prop = '(%s)' % prop
n = parser.Unary(
n.child = n.child.child SELECTION,
changes = 1 prop,
changes += duplicated_select(n) n.child.child,
)
return changes + recoursive_scan(duplicated_select, n) return n, changes
def futile_union_intersection_subtraction(n: parser.Node) -> int: def futile_union_intersection_subtraction(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like r r, and replaces them with r. '''This function locates things like r r, and replaces them with r.
R R --> R R R --> R
R R --> R R R --> R
R - R --> σ False (R) R - R --> σ False (R)
σ k (R) - R --> σ False (R) σ k (R) - R --> σ False (R)
R - σ k (R) --> σ not k (R) R - σ k (R) --> σ not k (R)
σ k (R) R --> R σ k (R) R --> R
σ k (R) R --> σ k (R) σ k (R) R --> σ k (R)
''' '''
changes = 0 changes = 0
# Union and intersection of the same thing # Union and intersection of the same thing
if n.name in (UNION, INTERSECTION, JOIN, JOIN_LEFT, JOIN_RIGHT, JOIN_FULL) and n.left == n.right: if n.name in (UNION, INTERSECTION, JOIN, JOIN_LEFT, JOIN_RIGHT, JOIN_FULL) and n.left == n.right:
changes = 1 return n.left, 1
replace_node(n, n.left)
# selection and union of the same thing # selection and union of the same thing
elif (n.name == UNION): elif (n.name == UNION):
if n.left.name == SELECTION and n.left.child == n.right: if n.left.name == SELECTION and n.left.child == n.right:
changes = 1 return n.right, 1
replace_node(n, n.right)
elif n.right.name == SELECTION and n.right.child == n.left: elif n.right.name == SELECTION and n.right.child == n.left:
changes = 1 return n.left, 1
replace_node(n, n.left)
# selection and intersection of the same thing # selection and intersection of the same thing
elif n.name == INTERSECTION: elif n.name == INTERSECTION:
if n.left.name == SELECTION and n.left.child == n.right: if n.left.name == SELECTION and n.left.child == n.right:
changes = 1 return n.left, 1
replace_node(n, n.left)
elif n.right.name == SELECTION and n.right.child == n.left: elif n.right.name == SELECTION and n.right.child == n.left:
changes = 1 return n.right, 1
replace_node(n, n.right)
# Subtraction and selection of the same thing # Subtraction and selection of the same thing
elif n.name == DIFFERENCE and \ elif n.name == DIFFERENCE and \
n.right.name == SELECTION and \ n.right.name == SELECTION and \
n.right.child == n.left: n.right.child == n.left:
n.name = n.right.name return parser.Unary(
n.kind = n.right.kind SELECTION,
n.child = n.right.child '(not (%s))' % n.right.prop,
n.prop = '(not (%s))' % n.right.prop n.right.child), 1
n.left = n.right = None
# Subtraction of the same thing or with selection on the left child # Subtraction of the same thing or with selection on the left child
elif n.name == DIFFERENCE and (n.left == n.right or (n.left.name == SELECTION and n.left.child == n.right)): elif n.name == DIFFERENCE and (n.left == n.right or (n.left.name == SELECTION and n.left.child == n.right)):
changes = 1 return parser.Unary(
n.kind = parser.UNARY SELECTION,
n.name = SELECTION 'False',
n.prop = 'False' n.get_left_leaf()
n.child = n.left.get_left_leaf() ), 1
# n.left=n.right=None return n, 0
return changes + recoursive_scan(futile_union_intersection_subtraction, n)
def down_to_unions_subtractions_intersections(n: parser.Node) -> int: def down_to_unions_subtractions_intersections(n: parser.Node) -> Tuple[parser.Node, int]:
'''This funcion locates things like σ i==2 (c d), where the union '''This funcion locates things like σ i==2 (c d), where the union
can be a subtraction and an intersection and replaces them with can be a subtraction and an intersection and replaces them with
σ i==2 (c) σ i==2(d). σ i==2 (c) σ i==2(d).
''' '''
changes = 0 changes = 0
_o = (UNION, DIFFERENCE, INTERSECTION) _o = (UNION, DIFFERENCE, INTERSECTION)
if n.name == SELECTION and n.child.name in _o: if n.name == SELECTION and n.child.name in _o:
l = parser.Unary(SELECTION, n.prop, n.child.left)
r = parser.Unary(SELECTION, n.prop, n.child.right)
left = parser.Node() return parser.Binary(n.child.name, l, r), 1
left.prop = n.prop return n, 0
left.name = n.name
left.child = n.child.left
left.kind = parser.UNARY
right = parser.Node()
right.prop = n.prop
right.name = n.name
right.child = n.child.right
right.kind = parser.UNARY
n.name = n.child.name
n.left = left
n.right = right
n.child = None
n.prop = None
n.kind = parser.BINARY
changes += 1
return changes + recoursive_scan(down_to_unions_subtractions_intersections, n)
def duplicated_projection(n: parser.Node) -> int: def duplicated_projection(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates thing like π i ( π j (R)) and replaces '''This function locates thing like π i ( π j (R)) and replaces
them with π i (R)''' them with π i (R)'''
changes = 0
if n.name == PROJECTION and n.child.name == PROJECTION: if n.name == PROJECTION and n.child.name == PROJECTION:
n.child = n.child.child return parser.Unary(
changes += 1 PROJECTION,
n.prop,
return changes + recoursive_scan(duplicated_projection, n) n.child.child), 1
return n, 0
def selection_inside_projection(n: parser.Node) -> int: def selection_inside_projection(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like σ j (π k(R)) and '''This function locates things like σ j (π k(R)) and
converts them into π k(σ j (R))''' converts them into π k(σ j (R))'''
changes = 0
if n.name == SELECTION and n.child.name == PROJECTION: if n.name == SELECTION and n.child.name == PROJECTION:
changes = 1 child = parser.Unary(
temp = n.prop SELECTION,
n.prop = n.child.prop n.prop,
n.child.prop = temp n.child.child
n.name = PROJECTION )
n.child.name = SELECTION
return changes + recoursive_scan(selection_inside_projection, n) return parser.Unary(PROJECTION, n.child.prop, child), 0
return n, 0
def swap_union_renames(n: parser.Node) -> int: def swap_union_renames(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like '''This function locates things like
ρ ab(R) ρ ab(Q) ρ ab(R) ρ ab(Q)
and replaces them with and replaces them with
ρ ab(R Q). ρ ab(R Q).
Does the same with subtraction and intersection''' Does the same with subtraction and intersection'''
changes = 0 if n.name in (DIFFERENCE, UNION, INTERSECTION) and n.left.name == RENAME and n.right.name == RENAME:
l_vars = n.left.get_rename_prop()
if n.name in (DIFFERENCE, UNION, INTERSECTION) and n.left.name == n.right.name and n.left.name == RENAME: r_vars = n.right.get_rename_prop()
l_vars = {}
for i in n.left.prop.split(','):
q = i.split(ARROW)
l_vars[q[0].strip()] = q[1].strip()
r_vars = {}
for i in n.right.prop.split(','):
q = i.split(ARROW)
r_vars[q[0].strip()] = q[1].strip()
if r_vars == l_vars: if r_vars == l_vars:
changes = 1 child = parser.Binary(n.name, n.left.child, n.right.child)
return parser.Unary(RENAME, n.left.prop, child), 1
# Copying self, but child will be child of renames return n, 0
q = parser.Node()
q.name = n.name
q.kind = parser.BINARY
q.left = n.left.child
q.right = n.right.child
n.name = RENAME
n.kind = parser.UNARY
n.child = q
n.prop = n.left.prop
n.left = n.right = None
return changes + recoursive_scan(swap_union_renames, n)
def futile_renames(n: parser.Node) -> int: def futile_renames(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function purges renames like id->id''' '''This function purges renames like
changes = 0 ρ id->id,a->q (A)
into
ρ a->q (A)
or removes the operation entirely if they all get removed
'''
if n.name == RENAME: if n.name == RENAME:
# Located two nested renames. renames = n.get_rename_prop()
changes = 1 changes = False
for k, v in renames.items():
if k == v:
changes = True
del renames[k]
if len(renames) == 0: # Nothing to rename, removing the rename
return n.child, 1
elif changes:
# Changing the node in place, no need to return to cause a recursive step
n.set_rename_prop(renames)
# Creating a dictionary with the attributes return n, 0
_vars = {}
for i in n.prop.split(','):
q = i.split(ARROW)
_vars[q[0].strip()] = q[1].strip()
# Scans dictionary to locate things like "a->b,b->c" and replace them
# with "a->c"
for key in list(_vars.keys()):
value = _vars.get(key)
if key == value:
_vars.pop(value) # Removes the unused one
if len(_vars) == 0: # Nothing to rename, removing the rename op
replace_node(n, n.child)
else:
n.prop = ','.join('%s%s%s' % (i[0], ARROW, i[1]) for i in _vars.items())
return changes + recoursive_scan(futile_renames, n)
def subsequent_renames(n: parser.Node) -> int: def subsequent_renames(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function removes redoundant subsequent renames joining them into one''' '''This function removes redundant subsequent renames joining them into one
ρ .. ρ .. (A)
'''Purges renames like id->id Since it's needed to be performed BEFORE this one into
so it is not in the list with the other optimizations''' ρ ... (A)
futile_renames(n) '''
changes = 0
if n.name == RENAME and n.child.name == RENAME: if n.name == RENAME and n.child.name == RENAME:
# Located two nested renames. # Located two nested renames.
changes = 1 prop = n.prop + ',' + n.child.prop
# Joining the attribute into one child = n.child.child
n.prop += ',' + n.child.prop n = parser.Unary(RENAME, prop, child)
n.child = n.child.child
# Creating a dictionary with the attributes # Creating a dictionary with the attributes
_vars = {} renames = n.get_rename_prop()
for i in n.prop.split(','):
q = i.split(ARROW)
_vars[q[0].strip()] = q[1].strip()
# Scans dictionary to locate things like "a->b,b->c" and replace them # Scans dictionary to locate things like "a->b,b->c" and replace them
# with "a->c" # with "a->c"
for key in list(_vars.keys()): for key, value in tuple(renames.items()):
value = _vars.get(key)
if value in _vars.keys(): if value in renames:
if _vars[value] != key: if renames[value] != key:
# Double rename on attribute # Double rename on attribute
_vars[key] = _vars[_vars[key]] # Sets value renames[key] = renames[renames[key]] # Sets value
_vars.pop(value) # Removes the unused one del renames[value] # Removes the unused one
else: # Cycle rename a->b,b->a else: # Cycle rename a->b,b->a
_vars.pop(value) # Removes the unused one del renames[value] # Removes the unused one
_vars.pop(key) # Removes the unused one del renames[key] # Removes the unused one
if len(_vars) == 0: # Nothing to rename, removing the rename op if len(renames) == 0: # Nothing to rename, removing the rename op
replace_node(n, n.child) return n.child, 1
else: else:
n.prop = ','.join('%s%s%s' % (i[0], ARROW, i[1]) for i in _vars.items()) n.set_rename_prop(renames)
return n, 1
return changes + recoursive_scan(subsequent_renames, n) return n, 0
class level_string(str): class level_string(str):
@ -411,101 +329,84 @@ def tokenize_select(expression):
return l return l
def swap_rename_projection(n: parser.Node) -> int: def swap_rename_projection(n: parser.Node) -> Tuple[parser.Node, int]:
'''This function locates things like π k(ρ j(R)) '''This function locates things like
and replaces them with ρ j(π k(R)). π k(ρ j(R))
and replaces them with
ρ j(π k(R)).
This will let rename work on a hopefully smaller set This will let rename work on a hopefully smaller set
and more important, will hopefully allow further optimizations. and more important, will hopefully allow further optimizations.
Will also eliminate fields in the rename that are cutted in the projection.
Will also eliminate fields in the rename that are cut in the projection.
''' '''
changes = 0
if n.name == PROJECTION and n.child.name == RENAME: if n.name == PROJECTION and n.child.name == RENAME:
changes = 1
# π index,name(ρ id➡index(R)) # π index,name(ρ id➡index(R))
_vars = {} renames = n.child.get_rename_prop()
for i in n.child.prop.split(','): projections = set(n.get_projection_prop())
q = i.split(ARROW)
_vars[q[1].strip()] = q[0].strip()
_pr = n.prop.split(',') # Use pre-rename names in the projection
for i in range(len(_pr)): for k, v in renames.items():
try: if v in projections:
_pr[i] = _vars[_pr[i].strip()] projections.remove(v)
except: projections.add(k)
pass
_pr_reborn = n.prop.split(',') # Eliminate fields
for i in list(_vars.keys()): for i in list(renames.keys()):
if i not in _pr_reborn: if i not in projections:
_vars.pop(i) del renames[i]
n.name = n.child.name
n.prop = ','.join('%s%s%s' % (i[1], ARROW, i[0]) for i in _vars.items()) child = parser.Unary(PROJECTION,'' , n.child.child)
child.set_projection_prop(projections)
n = parser.Unary(RENAME, '', child)
n.set_rename_prop(renames)
return n, 1
n.child.name = PROJECTION return n, 0
n.child.prop = ''
for i in _pr:
n.child.prop += i + ','
n.child.prop = n.child.prop[:-1]
return changes + recoursive_scan(swap_rename_projection, n)
def swap_rename_select(n: parser.Node) -> int: def swap_rename_select(n: parser.Node) -> int:
'''This function locates things like σ k(ρ j(R)) and replaces '''This function locates things like
them with ρ j(σ k(R)). Renaming the attributes used in the σ k(ρ j(R))
and replaces them with
ρ j(σ k(R)).
Renaming the attributes used in the
selection, so the operation is still valid.''' selection, so the operation is still valid.'''
changes = 0
if n.name == SELECTION and n.child.name == RENAME: if n.name == SELECTION and n.child.name == RENAME:
changes = 1 # This is an inverse mapping for the rename
# Dictionary containing attributes of rename renames = {v: k for k, v in n.child.get_rename_prop().items()}
_vars = {}
for i in n.child.prop.split(','):
q = i.split(ARROW)
_vars[q[1].strip()] = q[0].strip()
# tokenizes expression in select # tokenizes expression in select
_tokens = tokenize_select(n.prop) tokens = tokenize_select(n.prop)
# Renaming stuff # Renaming stuff, no enum because I edit the tokens
for i in range(len(_tokens)): for i in range(len(tokens)):
splitted = _tokens[i].split('.', 1) splitted = tokens[i].split('.', 1)
if splitted[0] in _vars: if splitted[0] in renames:
if len(splitted) == 1: tokens[i] = renames[splitted[0]]
_tokens[i] = _vars[_tokens[i].split('.')[0]] if len(splitted) > 1:
else: tokens[i] += '.' + splitted[1]
_tokens[i] = _vars[
_tokens[i].split('.')[0]] + '.' + splitted[1]
# Swapping operators child = parser.Unary(SELECTION, ' '.join(tokens), n.child.child)
n.name = RENAME return parser.Unary(RENAME, n.child.prop, child), 1
n.child.name = SELECTION return n, 0
n.prop = n.child.prop
n.child.prop = ' '.join(_tokens)
return changes + recoursive_scan(swap_rename_select, n)
def select_union_intersect_subtract(n: parser.Node) -> int: def select_union_intersect_subtract(n: parser.Node) -> int:
'''This function locates things like σ i(a) σ q(a) '''This function locates things like
and replaces them with σ (i OR q) (a) σ i(a) σ q(a)
and replaces them with
σ (i OR q) (a)
Removing a O() operation like the union''' Removing a O() operation like the union'''
changes = 0
if n.name in {UNION, INTERSECTION, DIFFERENCE} and \ if n.name in {UNION, INTERSECTION, DIFFERENCE} and \
n.left.name == SELECTION and \ n.left.name == SELECTION and \
n.right.name == SELECTION and \ n.right.name == SELECTION and \
n.left.child == n.right.child: n.left.child == n.right.child:
changes = 1
d = {UNION: 'or', INTERSECTION: 'and', DIFFERENCE: 'and not'} d = {UNION: 'or', INTERSECTION: 'and', DIFFERENCE: 'and not'}
op = d[n.name] op = d[n.name]
newnode = parser.Node()
if n.left.prop.startswith('(') or n.right.prop.startswith('('): if n.left.prop.startswith('(') or n.right.prop.startswith('('):
t_str = '(' t_str = '('
if n.left.prop.startswith('('): if n.left.prop.startswith('('):
@ -519,54 +420,34 @@ def select_union_intersect_subtract(n: parser.Node) -> int:
t_str += '%s' t_str += '%s'
t_str += ')' t_str += ')'
newnode.prop = t_str % (n.left.prop, op, n.right.prop) prop = t_str % (n.left.prop, op, n.right.prop)
else: else:
newnode.prop = '%s %s %s' % (n.left.prop, op, n.right.prop) prop = '%s %s %s' % (n.left.prop, op, n.right.prop)
newnode.name = SELECTION return parser.Unary(SELECTION, prop, n.left.child), 1
newnode.child = n.left.child return n, 0
newnode.kind = parser.UNARY
replace_node(n, newnode)
return changes + recoursive_scan(select_union_intersect_subtract, n)
def union_and_product(n: parser.Node) -> int: def union_and_product(n: parser.Node) -> Tuple[parser.Node, int]:
''' '''
A * B A * C = A * (B C) A * B A * C = A * (B C)
Same thing with inner join Same thing with inner join
''' '''
changes = 0
if n.name == UNION and n.left.name in {PRODUCT, JOIN} and n.left.name == n.right.name: if n.name == UNION and n.left.name in {PRODUCT, JOIN} and n.left.name == n.right.name:
newnode = parser.Node()
newnode.kind = parser.BINARY
newnode.name = n.left.name
newchild = parser.Node()
newchild.kind = parser.BINARY
newchild.name = UNION
if n.left.left == n.right.left or n.left.left == n.right.right: if n.left.left == n.right.left or n.left.left == n.right.right:
newnode.left = n.left.left l = n.left.right
newnode.right = newchild r = n.right.left if n.left.left == n.right.right else n.right.right
newchild = parser.Binary(UNION, l, r)
newchild.left = n.left.right return parser.Binary(n.left.name, n.left.left, newchild), 1
newchild.right = n.right.left if n.left.left == n.right.right else n.right.right
replace_node(n, newnode)
changes = 1
elif n.left.right == n.right.left or n.left.left == n.right.right: elif n.left.right == n.right.left or n.left.left == n.right.right:
newnode.left = n.left.right l = n.left.left
newnode.right = newchild r = n.right.left if n.right.left == n.right.right else n.right.right
newchild = parser.Binary(UNION, l, r)
newchild.left = n.left.left return parser.Binary(n.left.name, n.left.right, newchild), 1
newchild.right = n.right.left if n.right.left == n.right.right else n.right.right return n, 0
replace_node(n, newnode)
changes = 1
return changes + recoursive_scan(union_and_product, n)
def projection_and_union(n, rels): def projection_and_union(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]:
''' '''
Turns Turns
π a,b,c(A) π a,b,c(B) π a,b,c(A) π a,b,c(B)
@ -581,28 +462,16 @@ def projection_and_union(n, rels):
n.left.name == PROJECTION and \ n.left.name == PROJECTION and \
n.right.name == PROJECTION and \ n.right.name == PROJECTION and \
set(n.left.child.result_format(rels)) == set(n.right.child.result_format(rels)): set(n.left.child.result_format(rels)) == set(n.right.child.result_format(rels)):
newchild = parser.Node()
newchild.kind = parser.BINARY child = parser.Binary(UNION, n.left.child, n.right.child)
newchild.name = UNION return parser.Unary(PROJECTION, n.right.prop, child), 0
newchild.left = n.left.child return n, 0
newchild.right = n.right.child
newnode = parser.Node()
newnode.child = newchild
newnode.kind = parser.UNARY
newnode.name = PROJECTION
newnode.prop = n.right.prop
replace_node(n, newnode)
changes = 1
return changes + recoursive_scan(projection_and_union, n, rels)
def selection_and_product(n, rels): def selection_and_product(n: parser.Node, rels: Dict[str, Relation]) -> parser.Node:
'''This function locates things like σ k (R*Q) and converts them into '''This function locates things like σ k (R*Q) and converts them into
σ l (σ j (R) * σ i (Q)). Where j contains only attributes belonging to R, σ l (σ j (R) * σ i (Q)). Where j contains only attributes belonging to R,
i contains attributes belonging to Q and l contains attributes belonging to both''' i contains attributes belonging to Q and l contains attributes belonging to both'''
changes = 0
if n.name == SELECTION and n.child.name in (PRODUCT, JOIN): if n.name == SELECTION and n.child.name in (PRODUCT, JOIN):
l_attr = n.child.left.result_format(rels) l_attr = n.child.left.result_format(rels)
@ -637,76 +506,71 @@ def selection_and_product(n, rels):
if j in r_attr: # Field in right if j in r_attr: # Field in right
r_fields = True r_fields = True
if l_fields and r_fields: # Fields in both if l_fields and not r_fields:
both.append(i)
elif l_fields:
left.append(i) left.append(i)
elif r_fields: elif r_fields and not l_fields:
right.append(i) right.append(i)
else: # Unknown.. adding in both else: # Unknown.. adding in both
both.append(i) both.append(i)
# Preparing left selection # Preparing left selection
if len(left) > 0: if left:
changes = 1 l_prop = ''
l_node = parser.Node()
l_node.name = SELECTION
l_node.kind = parser.UNARY
l_node.child = n.child.left
l_node.prop = ''
n.child.left = l_node
while len(left) > 0: while len(left) > 0:
c = left.pop(0) c = left.pop(0)
for i in c: for i in c:
l_node.prop += i + ' ' l_prop += i + ' '
if len(left) > 0: if len(left) > 0:
l_node.prop += ' and ' l_prop += ' and '
if '(' in l_node.prop: if '(' in l_prop:
l_node.prop = '(%s)' % l_node.prop l_prop = '(%s)' % l_prop
l_node = parser.Unary(SELECTION, l_prop, n.child.left)
else:
l_node = n.child.left
# Preparing right selection # Preparing right selection
if len(right) > 0: if right:
changes = 1 r_prop = ''
r_node = parser.Node()
r_node.name = SELECTION
r_node.prop = ''
r_node.kind = parser.UNARY
r_node.child = n.child.right
n.child.right = r_node
while len(right) > 0: while len(right) > 0:
c = right.pop(0) c = right.pop(0)
r_node.prop += ' '.join(c) r_prop += ' '.join(c)
if len(right) > 0: if len(right) > 0:
r_node.prop += ' and ' r_prop += ' and '
if '(' in r_node.prop: if '(' in r_prop:
r_node.prop = '(%s)' % r_node.prop r_prop = '(%s)' % r_prop
r_node = parser.Unary(SELECTION, r_prop, n.child.right)
else:
r_node = n.child.right
b_node = parser.Binary(n.child.name, l_node, r_node)
# Changing main selection # Changing main selection
n.prop = '' if both:
if len(both) != 0: both_prop = ''
while len(both) > 0: while len(both) > 0:
c = both.pop(0) c = both.pop(0)
n.prop += ' '.join(c) both_prop += ' '.join(c)
if len(both) > 0: if len(both) > 0:
n.prop += ' and ' both_prop += ' and '
if '(' in n.prop: if '(' in both_prop:
n.prop = '(%s)' % n.prop both_prop = '(%s)' % both_prop
r = parser.Unary(SELECTION, both_prop, b_node)
return r, len(left) + len(right)
else: # No need for general select else: # No need for general select
replace_node(n, n.child) return b_node, 1
return changes + recoursive_scan(selection_and_product, n, rels) return n, 0
def useless_projection(n, rels) -> int: def useless_projection(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]:
''' '''
Removes projections that are over all the fields Removes projections that are over all the fields
''' '''
changes = 0
if n.name == PROJECTION and \ if n.name == PROJECTION and \
set(n.child.result_format(rels)) == set(i.strip() for i in n.prop.split(',')): set(n.child.result_format(rels)) == set(i.strip() for i in n.prop.split(',')):
changes = 1 return n.child, 1
replace_node(n, n.child)
return changes + recoursive_scan(useless_projection, n, rels) return n, 0
general_optimizations = [ general_optimizations = [
duplicated_select, duplicated_select,
@ -714,6 +578,7 @@ general_optimizations = [
duplicated_projection, duplicated_projection,
selection_inside_projection, selection_inside_projection,
subsequent_renames, subsequent_renames,
futile_renames,
swap_rename_select, swap_rename_select,
futile_union_intersection_subtraction, futile_union_intersection_subtraction,
swap_union_renames, swap_union_renames,
@ -726,6 +591,3 @@ specific_optimizations = [
projection_and_union, projection_and_union,
useless_projection, useless_projection,
] ]
if __name__ == "__main__":
print (tokenize_select("skill == 'C' and id % 2 == 0"))

View File

@ -1,5 +1,5 @@
# Relational # Relational
# Copyright (C) 2008-2016 Salvo "LtWorf" Tomaselli # Copyright (C) 2008-2020 Salvo "LtWorf" Tomaselli
# #
# Relational is free software: you can redistribute it and/or modify # Relational is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
@ -22,23 +22,22 @@
# relational query, or it can be a parse tree for a relational expression (ie: class parser.node). # relational query, or it can be a parse tree for a relational expression (ie: class parser.node).
# The functions will always return a string with the optimized query, but if a parse tree was provided, # The functions will always return a string with the optimized query, but if a parse tree was provided,
# the parse tree itself will be modified accordingly. # the parse tree itself will be modified accordingly.
from typing import Union, Optional, Dict, Any from typing import Union, Optional, Dict, Any, Tuple
from relational.relation import Relation
from relational import optimizations from relational import optimizations
from relational.parser import Node, RELATION, UNARY, BINARY, op_functions, tokenize, tree from relational.parser import Node, Variable, Unary, Binary, op_functions, tokenize, tree
from relational import querysplit from relational import querysplit
from relational.maintenance import UserInterface from relational.maintenance import UserInterface
ContextDict = Dict[str,Any]
def optimize_program(code, rels: Dict[str, Relation]):
def optimize_program(code, rels: ContextDict):
''' '''
Optimize an entire program, composed by multiple expressions Optimize an entire program, composed by multiple expressions
and assignments. and assignments.
''' '''
lines = code.split('\n') lines = code.split('\n')
context = {} # type: ContextDict context = {}
for line in lines: for line in lines:
line = line.strip() line = line.strip()
@ -53,7 +52,7 @@ def optimize_program(code, rels: ContextDict):
return querysplit.split(node, rels) return querysplit.split(node, rels)
def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]: def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
'''This function performs all the available optimizations. '''This function performs all the available optimizations.
expression : see documentation of this module expression : see documentation of this module
@ -82,23 +81,23 @@ def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool
total = 0 total = 0
if specific: if specific:
for i in optimizations.specific_optimizations: for i in optimizations.specific_optimizations:
res = i(n, rels) # Performs the optimization n, c = recursive_scan(i, n, rels)
if res != 0 and dbg: if c != 0 and dbg:
debug.append(str(n)) debug.append(str(n))
total += res total += c
if general: if general:
for i in optimizations.general_optimizations: for i in optimizations.general_optimizations:
res = i(n) # Performs the optimization n, c = recursive_scan(i, n, None)
if res != 0 and dbg: if c != 0 and dbg:
debug.append(str(n)) debug.append(str(n))
total += res total += c
if tostr: if tostr:
return str(n) return str(n)
else: else:
return n return n
def specific_optimize(expression, rels: ContextDict): def specific_optimize(expression, rels: Dict[str, Relation]):
'''This function performs specific optimizations. Means that it will need to '''This function performs specific optimizations. Means that it will need to
know the fields used by the relations. know the fields used by the relations.
@ -117,3 +116,35 @@ def general_optimize(expression):
Return value: this will return an optimized version of the expression''' Return value: this will return an optimized version of the expression'''
return optimize_all(expression, None, specific=False, general=True) return optimize_all(expression, None, specific=False, general=True)
def recursive_scan(function, node, rels) -> Tuple[Node, int]:
'''Does a recursive optimization on the tree.
This function will recursively execute the function given
as "function" parameter starting from node to all the tree.
if rels is provided it will be passed as argument to the function.
Otherwise the function will be called just on the node.
Result value: function is supposed to return the amount of changes
it has performed on the tree.
The various result will be added up and this final value will be the
returned value.'''
args = []
if rels:
args.append(rels)
changes = 0
node, c = function(node, *args)
changes += c
if isinstance(node, Unary):
node.child, c = recursive_scan(function, node.child, rels)
changes += c
elif isinstance(node, Binary):
node.left, c = recursive_scan(function, node.left, rels)
changes += c
node.right, c = recursive_scan(function, node.right, rels)
changes += c
return node, changes

View File

@ -1,5 +1,5 @@
# Relational # Relational
# Copyright (C) 2008-2017 Salvo "LtWorf" Tomaselli # Copyright (C) 2008-2020 Salvo "LtWorf" Tomaselli
# #
# Relational is free software: you can redistribute it and/or modify # Relational is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
@ -24,14 +24,11 @@
# #
# Language definition here: # Language definition here:
# http://ltworf.github.io/relational/grammar.html # http://ltworf.github.io/relational/grammar.html
from typing import Optional, Union, List, Any from typing import Optional, Union, List, Any, Dict
from dataclasses import dataclass
from relational import rtypes from relational import rtypes
RELATION = 0
UNARY = 1
BINARY = 2
PRODUCT = '*' PRODUCT = '*'
DIFFERENCE = '-' DIFFERENCE = '-'
UNION = '' UNION = ''
@ -84,9 +81,8 @@ class CallableString(str):
''' '''
return eval(self, context) return eval(self, context)
@dataclass
class Node: class Node:
'''This class is a node of a relational expression. Leaves are relations '''This class is a node of a relational expression. Leaves are relations
and internal nodes are operations. and internal nodes are operations.
@ -102,72 +98,12 @@ class Node:
operation. operation.
This class is used to convert an expression into python code.''' This class is used to convert an expression into python code.'''
kind = None # type: Optional[int] name: str
__hash__ = None # type: None
def __init__(self, expression: Optional[list] = None) -> None: def __init__(self, name: str) -> None:
'''Generates the tree from the tokenized expression raise NotImplementedError('This is supposed to be an abstract class')
If no expression is specified then it will create an empty node'''
if expression is None or len(expression) == 0:
return
# If the list contains only a list, it will consider the lower level list. def toCode(self): #FIXME return type
# This will allow things like ((((((a))))) to work
while len(expression) == 1 and isinstance(expression[0], list):
expression = expression[0]
# The list contains only 1 string. Means it is the name of a relation
if len(expression) == 1:
self.kind = RELATION
self.name = expression[0]
if not rtypes.is_valid_relation_name(self.name):
raise ParserException(
u"'%s' is not a valid relation name" % self.name)
return
# Expression from right to left, searching for binary operators
# this means that binary operators have lesser priority than
# unary operators.
# It finds the operator with lesser priority, uses it as root of this
# (sub)tree using everything on its left as left parameter (so building
# a left subtree with the part of the list located on left) and doing
# the same on right.
# Since it searches for strings, and expressions into parenthesis are
# within sub-lists, they won't be found here, ensuring that they will
# have highest priority.
for i in range(len(expression) - 1, -1, -1):
if expression[i] in b_operators: # Binary operator
self.kind = BINARY
self.name = expression[i]
if len(expression[:i]) == 0:
raise ParserException(
u"Expected left operand for '%s'" % self.name)
if len(expression[i + 1:]) == 0:
raise ParserException(
u"Expected right operand for '%s'" % self.name)
self.left = node(expression[:i])
self.right = node(expression[i + 1:])
return
'''Searches for unary operators, parsing from right to left'''
for i in range(len(expression) - 1, -1, -1):
if expression[i] in u_operators: # Unary operator
self.kind = UNARY
self.name = expression[i]
if len(expression) <= i + 2:
raise ParserException(
u"Expected more tokens in '%s'" % self.name)
self.prop = expression[1 + i].strip()
self.child = node(expression[2 + i])
return
raise ParserException("Expected operator in '%s'" % expression)
def toCode(self):
'''This method converts the AST into a python code object''' '''This method converts the AST into a python code object'''
code = self._toPython() code = self._toPython()
return compile(code, '<relational_expression>', 'eval') return compile(code, '<relational_expression>', 'eval')
@ -181,25 +117,7 @@ class Node:
return CallableString(self._toPython()) return CallableString(self._toPython())
def _toPython(self) -> str: def _toPython(self) -> str:
''' raise NotImplementedError()
Same as toPython but returns a regular string
'''
if self.name in b_operators:
return '%s.%s(%s)' % (self.left.toPython(), op_functions[self.name], self.right.toPython())
elif self.name in u_operators:
prop = self.prop
# Converting parameters
if self.name == PROJECTION:
prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
elif self.name == RENAME:
prop = '{\"%s\"}' % prop.replace(
',', '\",\"').replace(ARROW, '\":\"').replace(' ', '')
else: # Selection
prop = repr(prop)
return '%s.%s(%s)' % (self.child.toPython(), op_functions[self.name], prop)
return self.name
def printtree(self, level: int = 0) -> str: def printtree(self, level: int = 0) -> str:
'''returns a representation of the tree using indentation''' '''returns a representation of the tree using indentation'''
@ -216,27 +134,20 @@ class Node:
return '\n' + r return '\n' + r
def get_left_leaf(self) -> 'Node': def get_left_leaf(self) -> 'Node':
'''This function returns the leftmost leaf in the tree.''' raise NotImplementedError()
if self.kind == RELATION:
return self
elif self.kind == UNARY:
return self.child.get_left_leaf()
elif self.kind == BINARY:
return self.left.get_left_leaf()
raise ValueError('What kind of alien object is this?')
def result_format(self, rels: dict) -> list: def result_format(self, rels: dict) -> list: #FIXME types
'''This function returns a list containing the fields that the resulting relation will have. '''This function returns a list containing the fields that the resulting relation will have.
It requires a dictionary where keys are the names of the relations and the values are It requires a dictionary where keys are the names of the relations and the values are
the relation objects.''' the relation objects.'''
if not isinstance(rels, dict): if not isinstance(rels, dict):
raise TypeError('Can\'t be of None type') raise TypeError('Can\'t be of None type')
if self.kind == RELATION: if isinstance(self, Variable): #FIXME this is ugly
return list(rels[self.name].header) return list(rels[self.name].header)
elif self.kind == BINARY and self.name in (DIFFERENCE, UNION, INTERSECTION): elif isinstance(self, Binary) and self.name in (DIFFERENCE, UNION, INTERSECTION):
return self.left.result_format(rels) return self.left.result_format(rels)
elif self.kind == BINARY and self.name == DIVISION: elif isinstance(self, Binary) and self.name == DIVISION:
return list(set(self.left.result_format(rels)) - set(self.right.result_format(rels))) return list(set(self.left.result_format(rels)) - set(self.right.result_format(rels)))
elif self.name == PROJECTION: elif self.name == PROJECTION:
return [i.strip() for i in self.prop.split(',')] return [i.strip() for i in self.prop.split(',')]
@ -259,7 +170,7 @@ class Node:
return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels)))) return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))
raise ValueError('What kind of alien object is this?') raise ValueError('What kind of alien object is this?')
def __eq__(self, other): def __eq__(self, other): #FIXME
if not (isinstance(other, node) and self.name == other.name and self.kind == other.kind): if not (isinstance(other, node) and self.name == other.name and self.kind == other.kind):
return False return False
@ -271,22 +182,151 @@ class Node:
return self.left == other.left and self.right == other.right return self.left == other.left and self.right == other.right
return True return True
@dataclass
class Variable(Node):
def _toPython(self) -> str:
return self.name
def __str__(self): def __str__(self):
if (self.kind == RELATION): return self.name
return self.name
elif (self.kind == UNARY): def get_left_leaf(self) -> Node:
return self.name + " " + self.prop + " (" + self.child.__str__() + ")" return self
elif (self.kind == BINARY):
le = self.left.__str__()
if self.right.kind != BINARY:
re = self.right.__str__()
else:
re = "(" + self.right.__str__() + ")"
return (le + self.name + re)
raise ValueError('What kind of alien object is this?')
def _find_matching_parenthesis(expression: str, start=0, openpar=u'(', closepar=u')') -> Optional[int]: @dataclass
class Binary(Node):
left: Node
right: Node
def get_left_leaf(self) -> Node:
return self.left.get_left_leaf()
def _toPython(self) -> str:
return '%s.%s(%s)' % (self.left._toPython(), op_functions[self.name], self.right._toPython())
def __str__(self):
le = self.left.__str__()
if isinstance(self.right, Binary):
re = "(" + self.right.__str__() + ")"
else:
re = self.right.__str__()
return (le + self.name + re) #TODO use fstrings
@dataclass
class Unary(Node):
prop: str
child: Node
def get_left_leaf(self) -> Node:
return self.child.get_left_leaf()
def __str__(self):
return self.name + " " + self.prop + " (" + self.child.__str__() + ")" #TODO use fstrings
def _toPython(self) -> str:
prop = self.prop
# Converting parameters
if self.name == PROJECTION:
prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
elif self.name == RENAME:
prop = repr(self.get_rename_prop())
else: # Selection
prop = repr(prop)
return '%s.%s(%s)' % (self.child._toPython(), op_functions[self.name], prop)
def get_projection_prop(self) -> List[str]:
if self.name != PROJECTION:
raise ValueError('This is only supported on projection nodes')
return [i.strip() for i in self.prop.split(',')]
def set_projection_prop(self, p: List[str]) -> None:
if self.name != PROJECTION:
raise ValueError('This is only supported on projection nodes')
self.prop = ','.join(p)
def get_rename_prop(self) -> Dict[str, str]:
'''
Returns the dictionary that the rename operation wants
'''
if self.name != RENAME:
raise ValueError('This is only supported on rename nodes')
r = {}
for i in self.prop.split(','):
q = i.split(ARROW)
r[q[0].strip()] = q[1].strip()
return r
def set_rename_prop(self, renames: Dict[str, str]) -> None:
'''
Sets the prop field based on the dictionary for renames
'''
if self.name != RENAME:
raise ValueError('This is only supported on rename nodes')
self.prop = ','.join(f'{k}{ARROW}{v}' for k, v in renames.items())
def parse_tokens(expression: List[Union[list, str]]) -> Node:
'''Generates the tree from the tokenized expression
If no expression is specified then it will create an empty node'''
# If the list contains only a list, it will consider the lower level list.
# This will allow things like ((((((a))))) to work
while len(expression) == 1 and isinstance(expression[0], list):
expression = expression[0]
# The list contains only 1 string. Means it is the name of a relation
if len(expression) == 1:
if not rtypes.is_valid_relation_name(expression[0]):
raise ParserException(
u"'%s' is not a valid relation name" % expression[0])
return Variable(expression[0]) #FIXME Move validation in the object
# Expression from right to left, searching for binary operators
# this means that binary operators have lesser priority than
# unary operators.
# It finds the operator with lesser priority, uses it as root of this
# (sub)tree using everything on its left as left parameter (so building
# a left subtree with the part of the list located on left) and doing
# the same on right.
# Since it searches for strings, and expressions into parenthesis are
# within sub-lists, they won't be found here, ensuring that they will
# have highest priority.
for i in range(len(expression) - 1, -1, -1):
if expression[i] in b_operators: # Binary operator
if len(expression[:i]) == 0:
raise ParserException(
u"Expected left operand for '%s'" % self.name)
if len(expression[i + 1:]) == 0:
raise ParserException(
u"Expected right operand for '%s'" % self.name)
return Binary(expression[i], parse_tokens(expression[:i]), parse_tokens(expression[i + 1:]))
'''Searches for unary operators, parsing from right to left'''
for i in range(len(expression) - 1, -1, -1):
if expression[i] in u_operators: # Unary operator
if len(expression) <= i + 2:
raise ParserException(
u"Expected more tokens in '%s'" % self.name)
return Unary(
expression[i],
prop=expression[1 + i].strip(),
child=parse_tokens(expression[2 + i])
)
raise ParserException('Parse error') #FIXME more details
def _find_matching_parenthesis(expression: str, start=0, openpar='(', closepar=')') -> Optional[int]:
'''This function returns the position of the matching '''This function returns the position of the matching
close parenthesis to the 1st open parenthesis found close parenthesis to the 1st open parenthesis found
starting from start (0 by default)''' starting from start (0 by default)'''
@ -391,7 +431,7 @@ def tokenize(expression: str) -> list:
def tree(expression: str) -> Node: def tree(expression: str) -> Node:
'''This function parses a relational algebra expression into a AST and returns '''This function parses a relational algebra expression into a AST and returns
the root node using the Node class.''' the root node using the Node class.'''
return Node(tokenize(expression)) return parse_tokens(tokenize(expression))
def parse(expr: str) -> CallableString: def parse(expr: str) -> CallableString:
@ -400,11 +440,3 @@ def parse(expr: str) -> CallableString:
Python expression. Python expression.
''' '''
return tree(expr).toPython() return tree(expr).toPython()
if __name__ == "__main__":
while True:
e = input("Expression: ")
print (parse(e))
# Backwards compatibility
node = Node

View File

@ -0,0 +1 @@
ρ name➡n,age➡a(σTrue(people)) ρ age➡a,name➡n(people)

View File

@ -0,0 +1,9 @@
id,n,chief,a
0,jack,0,22
1,carl,0,20
2,john,1,30
3,dean,1,33
4,eve,0,25
5,duncan,4,30
6,paul,4,30
7,alia,1,28

View File

@ -0,0 +1 @@
σ i%2==0 (ρ id➡i (people))

View File

@ -0,0 +1,5 @@
i,name,chief,age
0,jack,0,22
2,john,1,30
4,eve,0,25
6,paul,4,30