commit
5d3823d0ea
@ -1,4 +1,7 @@
|
|||||||
2.6
|
3.0
|
||||||
|
- Refactored parser to use better typing
|
||||||
|
- Refactored and fixed some optimizations
|
||||||
|
- Added more test cases
|
||||||
- Improved survey sending
|
- Improved survey sending
|
||||||
- Prevent relation/field names from being reserved keywords
|
- Prevent relation/field names from being reserved keywords
|
||||||
- Fixed issue in cli where loading an invalid file would lead to a crash
|
- Fixed issue in cli where loading an invalid file would lead to a crash
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# Relational
|
# Relational
|
||||||
# Copyright (C) 2009-2018 Salvo "LtWorf" Tomaselli
|
# Copyright (C) 2009-2020 Salvo "LtWorf" Tomaselli
|
||||||
#
|
#
|
||||||
# Relational is free software: you can redistribute it and/or modify
|
# Relational is free software: you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
@ -30,8 +30,9 @@
|
|||||||
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from tokenize import generate_tokens
|
from tokenize import generate_tokens
|
||||||
|
from typing import Tuple, Dict
|
||||||
|
|
||||||
|
from relational.relation import Relation
|
||||||
from relational import parser
|
from relational import parser
|
||||||
|
|
||||||
sel_op = (
|
sel_op = (
|
||||||
@ -98,36 +99,7 @@ def replace_node(replace, replacement):
|
|||||||
replace.left = replacement.left
|
replace.left = replacement.left
|
||||||
|
|
||||||
|
|
||||||
def recoursive_scan(function, node, rels=None):
|
def duplicated_select(n: parser.Node) -> Tuple[parser.Node, int]:
|
||||||
'''Does a recoursive optimization on the tree.
|
|
||||||
|
|
||||||
This function will recoursively execute the function given
|
|
||||||
as "function" parameter starting from node to all the tree.
|
|
||||||
if rels is provided it will be passed as argument to the function.
|
|
||||||
Otherwise the function will be called just on the node.
|
|
||||||
|
|
||||||
Result value: function is supposed to return the amount of changes
|
|
||||||
it has performed on the tree.
|
|
||||||
The various result will be added up and this final value will be the
|
|
||||||
returned value.'''
|
|
||||||
changes = 0
|
|
||||||
# recoursive scan
|
|
||||||
if node.kind == parser.UNARY:
|
|
||||||
if rels != None:
|
|
||||||
changes += function(node.child, rels)
|
|
||||||
else:
|
|
||||||
changes += function(node.child)
|
|
||||||
elif node.kind == parser.BINARY:
|
|
||||||
if rels != None:
|
|
||||||
changes += function(node.right, rels)
|
|
||||||
changes += function(node.left, rels)
|
|
||||||
else:
|
|
||||||
changes += function(node.right)
|
|
||||||
changes += function(node.left)
|
|
||||||
return changes
|
|
||||||
|
|
||||||
|
|
||||||
def duplicated_select(n: parser.Node) -> int:
|
|
||||||
'''This function locates and deletes things like
|
'''This function locates and deletes things like
|
||||||
σ a ( σ a(C)) and the ones like σ a ( σ b(C))
|
σ a ( σ a(C)) and the ones like σ a ( σ b(C))
|
||||||
replacing the 1st one with a single select and
|
replacing the 1st one with a single select and
|
||||||
@ -135,243 +107,189 @@ def duplicated_select(n: parser.Node) -> int:
|
|||||||
in and
|
in and
|
||||||
'''
|
'''
|
||||||
changes = 0
|
changes = 0
|
||||||
if n.name == SELECTION and n.child.name == SELECTION:
|
while n.name == SELECTION and n.child.name == SELECTION:
|
||||||
|
changes += 1
|
||||||
|
prop = n.prop
|
||||||
|
|
||||||
if n.prop != n.child.prop: # Nested but different, joining them
|
if n.prop != n.child.prop: # Nested but different, joining them
|
||||||
n.prop = n.prop + " and " + n.child.prop
|
prop = n.prop + " and " + n.child.prop
|
||||||
|
|
||||||
# This adds parenthesis if they are needed
|
# This adds parenthesis if they are needed
|
||||||
if n.child.prop.startswith('(') or n.prop.startswith('('):
|
if n.child.prop.startswith('(') or n.prop.startswith('('):
|
||||||
n.prop = '(%s)' % n.prop
|
prop = '(%s)' % prop
|
||||||
|
n = parser.Unary(
|
||||||
n.child = n.child.child
|
SELECTION,
|
||||||
changes = 1
|
prop,
|
||||||
changes += duplicated_select(n)
|
n.child.child,
|
||||||
|
)
|
||||||
return changes + recoursive_scan(duplicated_select, n)
|
return n, changes
|
||||||
|
|
||||||
|
|
||||||
def futile_union_intersection_subtraction(n: parser.Node) -> int:
|
def futile_union_intersection_subtraction(n: parser.Node) -> Tuple[parser.Node, int]:
|
||||||
'''This function locates things like r ᑌ r, and replaces them with r.
|
'''This function locates things like r ∪ r, and replaces them with r.
|
||||||
R ᑌ R --> R
|
R ∪ R --> R
|
||||||
R ᑎ R --> R
|
R ∩ R --> R
|
||||||
R - R --> σ False (R)
|
R - R --> σ False (R)
|
||||||
σ k (R) - R --> σ False (R)
|
σ k (R) - R --> σ False (R)
|
||||||
R - σ k (R) --> σ not k (R)
|
R - σ k (R) --> σ not k (R)
|
||||||
σ k (R) ᑌ R --> R
|
σ k (R) ∪ R --> R
|
||||||
σ k (R) ᑎ R --> σ k (R)
|
σ k (R) ∩ R --> σ k (R)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
changes = 0
|
changes = 0
|
||||||
|
|
||||||
# Union and intersection of the same thing
|
# Union and intersection of the same thing
|
||||||
if n.name in (UNION, INTERSECTION, JOIN, JOIN_LEFT, JOIN_RIGHT, JOIN_FULL) and n.left == n.right:
|
if n.name in (UNION, INTERSECTION, JOIN, JOIN_LEFT, JOIN_RIGHT, JOIN_FULL) and n.left == n.right:
|
||||||
changes = 1
|
return n.left, 1
|
||||||
replace_node(n, n.left)
|
|
||||||
|
|
||||||
# selection and union of the same thing
|
# selection and union of the same thing
|
||||||
elif (n.name == UNION):
|
elif (n.name == UNION):
|
||||||
if n.left.name == SELECTION and n.left.child == n.right:
|
if n.left.name == SELECTION and n.left.child == n.right:
|
||||||
changes = 1
|
return n.right, 1
|
||||||
replace_node(n, n.right)
|
|
||||||
elif n.right.name == SELECTION and n.right.child == n.left:
|
elif n.right.name == SELECTION and n.right.child == n.left:
|
||||||
changes = 1
|
return n.left, 1
|
||||||
replace_node(n, n.left)
|
|
||||||
|
|
||||||
# selection and intersection of the same thing
|
# selection and intersection of the same thing
|
||||||
elif n.name == INTERSECTION:
|
elif n.name == INTERSECTION:
|
||||||
if n.left.name == SELECTION and n.left.child == n.right:
|
if n.left.name == SELECTION and n.left.child == n.right:
|
||||||
changes = 1
|
return n.left, 1
|
||||||
replace_node(n, n.left)
|
|
||||||
elif n.right.name == SELECTION and n.right.child == n.left:
|
elif n.right.name == SELECTION and n.right.child == n.left:
|
||||||
changes = 1
|
return n.right, 1
|
||||||
replace_node(n, n.right)
|
|
||||||
|
|
||||||
# Subtraction and selection of the same thing
|
# Subtraction and selection of the same thing
|
||||||
elif n.name == DIFFERENCE and \
|
elif n.name == DIFFERENCE and \
|
||||||
n.right.name == SELECTION and \
|
n.right.name == SELECTION and \
|
||||||
n.right.child == n.left:
|
n.right.child == n.left:
|
||||||
n.name = n.right.name
|
return parser.Unary(
|
||||||
n.kind = n.right.kind
|
SELECTION,
|
||||||
n.child = n.right.child
|
'(not (%s))' % n.right.prop,
|
||||||
n.prop = '(not (%s))' % n.right.prop
|
n.right.child), 1
|
||||||
n.left = n.right = None
|
|
||||||
|
|
||||||
# Subtraction of the same thing or with selection on the left child
|
# Subtraction of the same thing or with selection on the left child
|
||||||
elif n.name == DIFFERENCE and (n.left == n.right or (n.left.name == SELECTION and n.left.child == n.right)):
|
elif n.name == DIFFERENCE and (n.left == n.right or (n.left.name == SELECTION and n.left.child == n.right)):
|
||||||
changes = 1
|
return parser.Unary(
|
||||||
n.kind = parser.UNARY
|
SELECTION,
|
||||||
n.name = SELECTION
|
'False',
|
||||||
n.prop = 'False'
|
n.get_left_leaf()
|
||||||
n.child = n.left.get_left_leaf()
|
), 1
|
||||||
# n.left=n.right=None
|
return n, 0
|
||||||
|
|
||||||
return changes + recoursive_scan(futile_union_intersection_subtraction, n)
|
|
||||||
|
|
||||||
|
|
||||||
def down_to_unions_subtractions_intersections(n: parser.Node) -> int:
|
def down_to_unions_subtractions_intersections(n: parser.Node) -> Tuple[parser.Node, int]:
|
||||||
'''This funcion locates things like σ i==2 (c ᑌ d), where the union
|
'''This funcion locates things like σ i==2 (c ∪ d), where the union
|
||||||
can be a subtraction and an intersection and replaces them with
|
can be a subtraction and an intersection and replaces them with
|
||||||
σ i==2 (c) ᑌ σ i==2(d).
|
σ i==2 (c) ∪ σ i==2(d).
|
||||||
'''
|
'''
|
||||||
changes = 0
|
changes = 0
|
||||||
_o = (UNION, DIFFERENCE, INTERSECTION)
|
_o = (UNION, DIFFERENCE, INTERSECTION)
|
||||||
if n.name == SELECTION and n.child.name in _o:
|
if n.name == SELECTION and n.child.name in _o:
|
||||||
|
l = parser.Unary(SELECTION, n.prop, n.child.left)
|
||||||
|
r = parser.Unary(SELECTION, n.prop, n.child.right)
|
||||||
|
|
||||||
left = parser.Node()
|
return parser.Binary(n.child.name, l, r), 1
|
||||||
left.prop = n.prop
|
return n, 0
|
||||||
left.name = n.name
|
|
||||||
left.child = n.child.left
|
|
||||||
left.kind = parser.UNARY
|
|
||||||
right = parser.Node()
|
|
||||||
right.prop = n.prop
|
|
||||||
right.name = n.name
|
|
||||||
right.child = n.child.right
|
|
||||||
right.kind = parser.UNARY
|
|
||||||
|
|
||||||
n.name = n.child.name
|
|
||||||
n.left = left
|
|
||||||
n.right = right
|
|
||||||
n.child = None
|
|
||||||
n.prop = None
|
|
||||||
n.kind = parser.BINARY
|
|
||||||
changes += 1
|
|
||||||
|
|
||||||
return changes + recoursive_scan(down_to_unions_subtractions_intersections, n)
|
|
||||||
|
|
||||||
|
|
||||||
def duplicated_projection(n: parser.Node) -> int:
|
def duplicated_projection(n: parser.Node) -> Tuple[parser.Node, int]:
|
||||||
'''This function locates thing like π i ( π j (R)) and replaces
|
'''This function locates thing like π i ( π j (R)) and replaces
|
||||||
them with π i (R)'''
|
them with π i (R)'''
|
||||||
changes = 0
|
|
||||||
|
|
||||||
if n.name == PROJECTION and n.child.name == PROJECTION:
|
if n.name == PROJECTION and n.child.name == PROJECTION:
|
||||||
n.child = n.child.child
|
return parser.Unary(
|
||||||
changes += 1
|
PROJECTION,
|
||||||
|
n.prop,
|
||||||
return changes + recoursive_scan(duplicated_projection, n)
|
n.child.child), 1
|
||||||
|
return n, 0
|
||||||
|
|
||||||
|
|
||||||
def selection_inside_projection(n: parser.Node) -> int:
|
def selection_inside_projection(n: parser.Node) -> Tuple[parser.Node, int]:
|
||||||
'''This function locates things like σ j (π k(R)) and
|
'''This function locates things like σ j (π k(R)) and
|
||||||
converts them into π k(σ j (R))'''
|
converts them into π k(σ j (R))'''
|
||||||
changes = 0
|
|
||||||
|
|
||||||
if n.name == SELECTION and n.child.name == PROJECTION:
|
if n.name == SELECTION and n.child.name == PROJECTION:
|
||||||
changes = 1
|
child = parser.Unary(
|
||||||
temp = n.prop
|
SELECTION,
|
||||||
n.prop = n.child.prop
|
n.prop,
|
||||||
n.child.prop = temp
|
n.child.child
|
||||||
n.name = PROJECTION
|
)
|
||||||
n.child.name = SELECTION
|
|
||||||
|
|
||||||
return changes + recoursive_scan(selection_inside_projection, n)
|
return parser.Unary(PROJECTION, n.child.prop, child), 0
|
||||||
|
return n, 0
|
||||||
|
|
||||||
|
|
||||||
def swap_union_renames(n: parser.Node) -> int:
|
def swap_union_renames(n: parser.Node) -> Tuple[parser.Node, int]:
|
||||||
'''This function locates things like
|
'''This function locates things like
|
||||||
ρ a➡b(R) ᑌ ρ a➡b(Q)
|
ρ a➡b(R) ∪ ρ a➡b(Q)
|
||||||
and replaces them with
|
and replaces them with
|
||||||
ρ a➡b(R ᑌ Q).
|
ρ a➡b(R ∪ Q).
|
||||||
Does the same with subtraction and intersection'''
|
Does the same with subtraction and intersection'''
|
||||||
changes = 0
|
if n.name in (DIFFERENCE, UNION, INTERSECTION) and n.left.name == RENAME and n.right.name == RENAME:
|
||||||
|
l_vars = n.left.get_rename_prop()
|
||||||
if n.name in (DIFFERENCE, UNION, INTERSECTION) and n.left.name == n.right.name and n.left.name == RENAME:
|
r_vars = n.right.get_rename_prop()
|
||||||
l_vars = {}
|
|
||||||
for i in n.left.prop.split(','):
|
|
||||||
q = i.split(ARROW)
|
|
||||||
l_vars[q[0].strip()] = q[1].strip()
|
|
||||||
|
|
||||||
r_vars = {}
|
|
||||||
for i in n.right.prop.split(','):
|
|
||||||
q = i.split(ARROW)
|
|
||||||
r_vars[q[0].strip()] = q[1].strip()
|
|
||||||
|
|
||||||
if r_vars == l_vars:
|
if r_vars == l_vars:
|
||||||
changes = 1
|
child = parser.Binary(n.name, n.left.child, n.right.child)
|
||||||
|
return parser.Unary(RENAME, n.left.prop, child), 1
|
||||||
# Copying self, but child will be child of renames
|
return n, 0
|
||||||
q = parser.Node()
|
|
||||||
q.name = n.name
|
|
||||||
q.kind = parser.BINARY
|
|
||||||
q.left = n.left.child
|
|
||||||
q.right = n.right.child
|
|
||||||
|
|
||||||
n.name = RENAME
|
|
||||||
n.kind = parser.UNARY
|
|
||||||
n.child = q
|
|
||||||
n.prop = n.left.prop
|
|
||||||
n.left = n.right = None
|
|
||||||
|
|
||||||
return changes + recoursive_scan(swap_union_renames, n)
|
|
||||||
|
|
||||||
|
|
||||||
def futile_renames(n: parser.Node) -> int:
|
def futile_renames(n: parser.Node) -> Tuple[parser.Node, int]:
|
||||||
'''This function purges renames like id->id'''
|
'''This function purges renames like
|
||||||
changes = 0
|
ρ id->id,a->q (A)
|
||||||
|
into
|
||||||
|
ρ a->q (A)
|
||||||
|
|
||||||
|
or removes the operation entirely if they all get removed
|
||||||
|
'''
|
||||||
if n.name == RENAME:
|
if n.name == RENAME:
|
||||||
# Located two nested renames.
|
renames = n.get_rename_prop()
|
||||||
changes = 1
|
changes = False
|
||||||
|
for k, v in renames.items():
|
||||||
|
if k == v:
|
||||||
|
changes = True
|
||||||
|
del renames[k]
|
||||||
|
if len(renames) == 0: # Nothing to rename, removing the rename
|
||||||
|
return n.child, 1
|
||||||
|
elif changes:
|
||||||
|
# Changing the node in place, no need to return to cause a recursive step
|
||||||
|
n.set_rename_prop(renames)
|
||||||
|
|
||||||
# Creating a dictionary with the attributes
|
return n, 0
|
||||||
_vars = {}
|
|
||||||
for i in n.prop.split(','):
|
|
||||||
q = i.split(ARROW)
|
|
||||||
_vars[q[0].strip()] = q[1].strip()
|
|
||||||
# Scans dictionary to locate things like "a->b,b->c" and replace them
|
|
||||||
# with "a->c"
|
|
||||||
for key in list(_vars.keys()):
|
|
||||||
value = _vars.get(key)
|
|
||||||
if key == value:
|
|
||||||
_vars.pop(value) # Removes the unused one
|
|
||||||
|
|
||||||
if len(_vars) == 0: # Nothing to rename, removing the rename op
|
|
||||||
replace_node(n, n.child)
|
|
||||||
else:
|
|
||||||
n.prop = ','.join('%s%s%s' % (i[0], ARROW, i[1]) for i in _vars.items())
|
|
||||||
|
|
||||||
return changes + recoursive_scan(futile_renames, n)
|
|
||||||
|
|
||||||
|
|
||||||
def subsequent_renames(n: parser.Node) -> int:
|
def subsequent_renames(n: parser.Node) -> Tuple[parser.Node, int]:
|
||||||
'''This function removes redoundant subsequent renames joining them into one'''
|
'''This function removes redundant subsequent renames joining them into one
|
||||||
|
ρ .. ρ .. (A)
|
||||||
'''Purges renames like id->id Since it's needed to be performed BEFORE this one
|
into
|
||||||
so it is not in the list with the other optimizations'''
|
ρ ... (A)
|
||||||
futile_renames(n)
|
'''
|
||||||
changes = 0
|
|
||||||
|
|
||||||
if n.name == RENAME and n.child.name == RENAME:
|
if n.name == RENAME and n.child.name == RENAME:
|
||||||
# Located two nested renames.
|
# Located two nested renames.
|
||||||
changes = 1
|
prop = n.prop + ',' + n.child.prop
|
||||||
# Joining the attribute into one
|
child = n.child.child
|
||||||
n.prop += ',' + n.child.prop
|
n = parser.Unary(RENAME, prop, child)
|
||||||
n.child = n.child.child
|
|
||||||
|
|
||||||
# Creating a dictionary with the attributes
|
# Creating a dictionary with the attributes
|
||||||
_vars = {}
|
renames = n.get_rename_prop()
|
||||||
for i in n.prop.split(','):
|
|
||||||
q = i.split(ARROW)
|
|
||||||
_vars[q[0].strip()] = q[1].strip()
|
|
||||||
# Scans dictionary to locate things like "a->b,b->c" and replace them
|
# Scans dictionary to locate things like "a->b,b->c" and replace them
|
||||||
# with "a->c"
|
# with "a->c"
|
||||||
for key in list(_vars.keys()):
|
for key, value in tuple(renames.items()):
|
||||||
value = _vars.get(key)
|
|
||||||
if value in _vars.keys():
|
if value in renames:
|
||||||
if _vars[value] != key:
|
if renames[value] != key:
|
||||||
# Double rename on attribute
|
# Double rename on attribute
|
||||||
_vars[key] = _vars[_vars[key]] # Sets value
|
renames[key] = renames[renames[key]] # Sets value
|
||||||
_vars.pop(value) # Removes the unused one
|
del renames[value] # Removes the unused one
|
||||||
else: # Cycle rename a->b,b->a
|
else: # Cycle rename a->b,b->a
|
||||||
_vars.pop(value) # Removes the unused one
|
del renames[value] # Removes the unused one
|
||||||
_vars.pop(key) # Removes the unused one
|
del renames[key] # Removes the unused one
|
||||||
|
|
||||||
if len(_vars) == 0: # Nothing to rename, removing the rename op
|
if len(renames) == 0: # Nothing to rename, removing the rename op
|
||||||
replace_node(n, n.child)
|
return n.child, 1
|
||||||
else:
|
else:
|
||||||
n.prop = ','.join('%s%s%s' % (i[0], ARROW, i[1]) for i in _vars.items())
|
n.set_rename_prop(renames)
|
||||||
|
return n, 1
|
||||||
|
|
||||||
return changes + recoursive_scan(subsequent_renames, n)
|
return n, 0
|
||||||
|
|
||||||
|
|
||||||
class level_string(str):
|
class level_string(str):
|
||||||
@ -411,101 +329,84 @@ def tokenize_select(expression):
|
|||||||
return l
|
return l
|
||||||
|
|
||||||
|
|
||||||
def swap_rename_projection(n: parser.Node) -> int:
|
def swap_rename_projection(n: parser.Node) -> Tuple[parser.Node, int]:
|
||||||
'''This function locates things like π k(ρ j(R))
|
'''This function locates things like
|
||||||
and replaces them with ρ j(π k(R)).
|
π k(ρ j(R))
|
||||||
|
and replaces them with
|
||||||
|
ρ j(π k(R)).
|
||||||
This will let rename work on a hopefully smaller set
|
This will let rename work on a hopefully smaller set
|
||||||
and more important, will hopefully allow further optimizations.
|
and more important, will hopefully allow further optimizations.
|
||||||
Will also eliminate fields in the rename that are cutted in the projection.
|
|
||||||
|
Will also eliminate fields in the rename that are cut in the projection.
|
||||||
'''
|
'''
|
||||||
changes = 0
|
|
||||||
|
|
||||||
if n.name == PROJECTION and n.child.name == RENAME:
|
if n.name == PROJECTION and n.child.name == RENAME:
|
||||||
changes = 1
|
|
||||||
|
|
||||||
# π index,name(ρ id➡index(R))
|
# π index,name(ρ id➡index(R))
|
||||||
_vars = {}
|
renames = n.child.get_rename_prop()
|
||||||
for i in n.child.prop.split(','):
|
projections = set(n.get_projection_prop())
|
||||||
q = i.split(ARROW)
|
|
||||||
_vars[q[1].strip()] = q[0].strip()
|
|
||||||
|
|
||||||
_pr = n.prop.split(',')
|
# Use pre-rename names in the projection
|
||||||
for i in range(len(_pr)):
|
for k, v in renames.items():
|
||||||
try:
|
if v in projections:
|
||||||
_pr[i] = _vars[_pr[i].strip()]
|
projections.remove(v)
|
||||||
except:
|
projections.add(k)
|
||||||
pass
|
|
||||||
|
|
||||||
_pr_reborn = n.prop.split(',')
|
# Eliminate fields
|
||||||
for i in list(_vars.keys()):
|
for i in list(renames.keys()):
|
||||||
if i not in _pr_reborn:
|
if i not in projections:
|
||||||
_vars.pop(i)
|
del renames[i]
|
||||||
n.name = n.child.name
|
|
||||||
|
|
||||||
n.prop = ','.join('%s%s%s' % (i[1], ARROW, i[0]) for i in _vars.items())
|
child = parser.Unary(PROJECTION,'' , n.child.child)
|
||||||
|
child.set_projection_prop(projections)
|
||||||
|
n = parser.Unary(RENAME, '', child)
|
||||||
|
n.set_rename_prop(renames)
|
||||||
|
return n, 1
|
||||||
|
|
||||||
n.child.name = PROJECTION
|
return n, 0
|
||||||
n.child.prop = ''
|
|
||||||
for i in _pr:
|
|
||||||
n.child.prop += i + ','
|
|
||||||
n.child.prop = n.child.prop[:-1]
|
|
||||||
|
|
||||||
return changes + recoursive_scan(swap_rename_projection, n)
|
|
||||||
|
|
||||||
|
|
||||||
def swap_rename_select(n: parser.Node) -> int:
|
def swap_rename_select(n: parser.Node) -> int:
|
||||||
'''This function locates things like σ k(ρ j(R)) and replaces
|
'''This function locates things like
|
||||||
them with ρ j(σ k(R)). Renaming the attributes used in the
|
σ k(ρ j(R))
|
||||||
|
and replaces them with
|
||||||
|
ρ j(σ k(R)).
|
||||||
|
Renaming the attributes used in the
|
||||||
selection, so the operation is still valid.'''
|
selection, so the operation is still valid.'''
|
||||||
changes = 0
|
|
||||||
|
|
||||||
if n.name == SELECTION and n.child.name == RENAME:
|
if n.name == SELECTION and n.child.name == RENAME:
|
||||||
changes = 1
|
# This is an inverse mapping for the rename
|
||||||
# Dictionary containing attributes of rename
|
renames = {v: k for k, v in n.child.get_rename_prop().items()}
|
||||||
_vars = {}
|
|
||||||
for i in n.child.prop.split(','):
|
|
||||||
q = i.split(ARROW)
|
|
||||||
_vars[q[1].strip()] = q[0].strip()
|
|
||||||
|
|
||||||
# tokenizes expression in select
|
# tokenizes expression in select
|
||||||
_tokens = tokenize_select(n.prop)
|
tokens = tokenize_select(n.prop)
|
||||||
|
|
||||||
# Renaming stuff
|
# Renaming stuff, no enum because I edit the tokens
|
||||||
for i in range(len(_tokens)):
|
for i in range(len(tokens)):
|
||||||
splitted = _tokens[i].split('.', 1)
|
splitted = tokens[i].split('.', 1)
|
||||||
if splitted[0] in _vars:
|
if splitted[0] in renames:
|
||||||
if len(splitted) == 1:
|
tokens[i] = renames[splitted[0]]
|
||||||
_tokens[i] = _vars[_tokens[i].split('.')[0]]
|
if len(splitted) > 1:
|
||||||
else:
|
tokens[i] += '.' + splitted[1]
|
||||||
_tokens[i] = _vars[
|
|
||||||
_tokens[i].split('.')[0]] + '.' + splitted[1]
|
|
||||||
|
|
||||||
# Swapping operators
|
child = parser.Unary(SELECTION, ' '.join(tokens), n.child.child)
|
||||||
n.name = RENAME
|
return parser.Unary(RENAME, n.child.prop, child), 1
|
||||||
n.child.name = SELECTION
|
return n, 0
|
||||||
|
|
||||||
n.prop = n.child.prop
|
|
||||||
n.child.prop = ' '.join(_tokens)
|
|
||||||
|
|
||||||
return changes + recoursive_scan(swap_rename_select, n)
|
|
||||||
|
|
||||||
|
|
||||||
def select_union_intersect_subtract(n: parser.Node) -> int:
|
def select_union_intersect_subtract(n: parser.Node) -> int:
|
||||||
'''This function locates things like σ i(a) ᑌ σ q(a)
|
'''This function locates things like
|
||||||
and replaces them with σ (i OR q) (a)
|
σ i(a) ∪ σ q(a)
|
||||||
|
and replaces them with
|
||||||
|
σ (i OR q) (a)
|
||||||
Removing a O(n²) operation like the union'''
|
Removing a O(n²) operation like the union'''
|
||||||
changes = 0
|
|
||||||
if n.name in {UNION, INTERSECTION, DIFFERENCE} and \
|
if n.name in {UNION, INTERSECTION, DIFFERENCE} and \
|
||||||
n.left.name == SELECTION and \
|
n.left.name == SELECTION and \
|
||||||
n.right.name == SELECTION and \
|
n.right.name == SELECTION and \
|
||||||
n.left.child == n.right.child:
|
n.left.child == n.right.child:
|
||||||
changes = 1
|
|
||||||
|
|
||||||
d = {UNION: 'or', INTERSECTION: 'and', DIFFERENCE: 'and not'}
|
d = {UNION: 'or', INTERSECTION: 'and', DIFFERENCE: 'and not'}
|
||||||
op = d[n.name]
|
op = d[n.name]
|
||||||
|
|
||||||
newnode = parser.Node()
|
|
||||||
|
|
||||||
if n.left.prop.startswith('(') or n.right.prop.startswith('('):
|
if n.left.prop.startswith('(') or n.right.prop.startswith('('):
|
||||||
t_str = '('
|
t_str = '('
|
||||||
if n.left.prop.startswith('('):
|
if n.left.prop.startswith('('):
|
||||||
@ -519,54 +420,34 @@ def select_union_intersect_subtract(n: parser.Node) -> int:
|
|||||||
t_str += '%s'
|
t_str += '%s'
|
||||||
t_str += ')'
|
t_str += ')'
|
||||||
|
|
||||||
newnode.prop = t_str % (n.left.prop, op, n.right.prop)
|
prop = t_str % (n.left.prop, op, n.right.prop)
|
||||||
else:
|
else:
|
||||||
newnode.prop = '%s %s %s' % (n.left.prop, op, n.right.prop)
|
prop = '%s %s %s' % (n.left.prop, op, n.right.prop)
|
||||||
newnode.name = SELECTION
|
return parser.Unary(SELECTION, prop, n.left.child), 1
|
||||||
newnode.child = n.left.child
|
return n, 0
|
||||||
newnode.kind = parser.UNARY
|
|
||||||
replace_node(n, newnode)
|
|
||||||
|
|
||||||
return changes + recoursive_scan(select_union_intersect_subtract, n)
|
|
||||||
|
|
||||||
|
|
||||||
def union_and_product(n: parser.Node) -> int:
|
def union_and_product(n: parser.Node) -> Tuple[parser.Node, int]:
|
||||||
'''
|
'''
|
||||||
A * B ∪ A * C = A * (B ∪ C)
|
A * B ∪ A * C = A * (B ∪ C)
|
||||||
Same thing with inner join
|
Same thing with inner join
|
||||||
'''
|
'''
|
||||||
|
|
||||||
changes = 0
|
|
||||||
if n.name == UNION and n.left.name in {PRODUCT, JOIN} and n.left.name == n.right.name:
|
if n.name == UNION and n.left.name in {PRODUCT, JOIN} and n.left.name == n.right.name:
|
||||||
|
|
||||||
newnode = parser.Node()
|
|
||||||
newnode.kind = parser.BINARY
|
|
||||||
newnode.name = n.left.name
|
|
||||||
|
|
||||||
newchild = parser.Node()
|
|
||||||
newchild.kind = parser.BINARY
|
|
||||||
newchild.name = UNION
|
|
||||||
|
|
||||||
if n.left.left == n.right.left or n.left.left == n.right.right:
|
if n.left.left == n.right.left or n.left.left == n.right.right:
|
||||||
newnode.left = n.left.left
|
l = n.left.right
|
||||||
newnode.right = newchild
|
r = n.right.left if n.left.left == n.right.right else n.right.right
|
||||||
|
newchild = parser.Binary(UNION, l, r)
|
||||||
newchild.left = n.left.right
|
return parser.Binary(n.left.name, n.left.left, newchild), 1
|
||||||
newchild.right = n.right.left if n.left.left == n.right.right else n.right.right
|
|
||||||
replace_node(n, newnode)
|
|
||||||
changes = 1
|
|
||||||
elif n.left.right == n.right.left or n.left.left == n.right.right:
|
elif n.left.right == n.right.left or n.left.left == n.right.right:
|
||||||
newnode.left = n.left.right
|
l = n.left.left
|
||||||
newnode.right = newchild
|
r = n.right.left if n.right.left == n.right.right else n.right.right
|
||||||
|
newchild = parser.Binary(UNION, l, r)
|
||||||
newchild.left = n.left.left
|
return parser.Binary(n.left.name, n.left.right, newchild), 1
|
||||||
newchild.right = n.right.left if n.right.left == n.right.right else n.right.right
|
return n, 0
|
||||||
replace_node(n, newnode)
|
|
||||||
changes = 1
|
|
||||||
return changes + recoursive_scan(union_and_product, n)
|
|
||||||
|
|
||||||
|
|
||||||
def projection_and_union(n, rels):
|
def projection_and_union(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]:
|
||||||
'''
|
'''
|
||||||
Turns
|
Turns
|
||||||
π a,b,c(A) ∪ π a,b,c(B)
|
π a,b,c(A) ∪ π a,b,c(B)
|
||||||
@ -581,28 +462,16 @@ def projection_and_union(n, rels):
|
|||||||
n.left.name == PROJECTION and \
|
n.left.name == PROJECTION and \
|
||||||
n.right.name == PROJECTION and \
|
n.right.name == PROJECTION and \
|
||||||
set(n.left.child.result_format(rels)) == set(n.right.child.result_format(rels)):
|
set(n.left.child.result_format(rels)) == set(n.right.child.result_format(rels)):
|
||||||
newchild = parser.Node()
|
|
||||||
|
|
||||||
newchild.kind = parser.BINARY
|
child = parser.Binary(UNION, n.left.child, n.right.child)
|
||||||
newchild.name = UNION
|
return parser.Unary(PROJECTION, n.right.prop, child), 0
|
||||||
newchild.left = n.left.child
|
return n, 0
|
||||||
newchild.right = n.right.child
|
|
||||||
|
|
||||||
newnode = parser.Node()
|
|
||||||
newnode.child = newchild
|
|
||||||
newnode.kind = parser.UNARY
|
|
||||||
newnode.name = PROJECTION
|
|
||||||
newnode.prop = n.right.prop
|
|
||||||
replace_node(n, newnode)
|
|
||||||
changes = 1
|
|
||||||
return changes + recoursive_scan(projection_and_union, n, rels)
|
|
||||||
|
|
||||||
|
|
||||||
def selection_and_product(n, rels):
|
def selection_and_product(n: parser.Node, rels: Dict[str, Relation]) -> parser.Node:
|
||||||
'''This function locates things like σ k (R*Q) and converts them into
|
'''This function locates things like σ k (R*Q) and converts them into
|
||||||
σ l (σ j (R) * σ i (Q)). Where j contains only attributes belonging to R,
|
σ l (σ j (R) * σ i (Q)). Where j contains only attributes belonging to R,
|
||||||
i contains attributes belonging to Q and l contains attributes belonging to both'''
|
i contains attributes belonging to Q and l contains attributes belonging to both'''
|
||||||
changes = 0
|
|
||||||
|
|
||||||
if n.name == SELECTION and n.child.name in (PRODUCT, JOIN):
|
if n.name == SELECTION and n.child.name in (PRODUCT, JOIN):
|
||||||
l_attr = n.child.left.result_format(rels)
|
l_attr = n.child.left.result_format(rels)
|
||||||
@ -637,76 +506,71 @@ def selection_and_product(n, rels):
|
|||||||
if j in r_attr: # Field in right
|
if j in r_attr: # Field in right
|
||||||
r_fields = True
|
r_fields = True
|
||||||
|
|
||||||
if l_fields and r_fields: # Fields in both
|
if l_fields and not r_fields:
|
||||||
both.append(i)
|
|
||||||
elif l_fields:
|
|
||||||
left.append(i)
|
left.append(i)
|
||||||
elif r_fields:
|
elif r_fields and not l_fields:
|
||||||
right.append(i)
|
right.append(i)
|
||||||
else: # Unknown.. adding in both
|
else: # Unknown.. adding in both
|
||||||
both.append(i)
|
both.append(i)
|
||||||
|
|
||||||
# Preparing left selection
|
# Preparing left selection
|
||||||
if len(left) > 0:
|
if left:
|
||||||
changes = 1
|
l_prop = ''
|
||||||
l_node = parser.Node()
|
|
||||||
l_node.name = SELECTION
|
|
||||||
l_node.kind = parser.UNARY
|
|
||||||
l_node.child = n.child.left
|
|
||||||
l_node.prop = ''
|
|
||||||
n.child.left = l_node
|
|
||||||
while len(left) > 0:
|
while len(left) > 0:
|
||||||
c = left.pop(0)
|
c = left.pop(0)
|
||||||
for i in c:
|
for i in c:
|
||||||
l_node.prop += i + ' '
|
l_prop += i + ' '
|
||||||
if len(left) > 0:
|
if len(left) > 0:
|
||||||
l_node.prop += ' and '
|
l_prop += ' and '
|
||||||
if '(' in l_node.prop:
|
if '(' in l_prop:
|
||||||
l_node.prop = '(%s)' % l_node.prop
|
l_prop = '(%s)' % l_prop
|
||||||
|
l_node = parser.Unary(SELECTION, l_prop, n.child.left)
|
||||||
|
else:
|
||||||
|
l_node = n.child.left
|
||||||
|
|
||||||
# Preparing right selection
|
# Preparing right selection
|
||||||
if len(right) > 0:
|
if right:
|
||||||
changes = 1
|
r_prop = ''
|
||||||
r_node = parser.Node()
|
|
||||||
r_node.name = SELECTION
|
|
||||||
r_node.prop = ''
|
|
||||||
r_node.kind = parser.UNARY
|
|
||||||
r_node.child = n.child.right
|
|
||||||
n.child.right = r_node
|
|
||||||
while len(right) > 0:
|
while len(right) > 0:
|
||||||
c = right.pop(0)
|
c = right.pop(0)
|
||||||
r_node.prop += ' '.join(c)
|
r_prop += ' '.join(c)
|
||||||
if len(right) > 0:
|
if len(right) > 0:
|
||||||
r_node.prop += ' and '
|
r_prop += ' and '
|
||||||
if '(' in r_node.prop:
|
if '(' in r_prop:
|
||||||
r_node.prop = '(%s)' % r_node.prop
|
r_prop = '(%s)' % r_prop
|
||||||
|
r_node = parser.Unary(SELECTION, r_prop, n.child.right)
|
||||||
|
else:
|
||||||
|
r_node = n.child.right
|
||||||
|
|
||||||
|
b_node = parser.Binary(n.child.name, l_node, r_node)
|
||||||
|
|
||||||
# Changing main selection
|
# Changing main selection
|
||||||
n.prop = ''
|
if both:
|
||||||
if len(both) != 0:
|
both_prop = ''
|
||||||
while len(both) > 0:
|
while len(both) > 0:
|
||||||
c = both.pop(0)
|
c = both.pop(0)
|
||||||
n.prop += ' '.join(c)
|
both_prop += ' '.join(c)
|
||||||
if len(both) > 0:
|
if len(both) > 0:
|
||||||
n.prop += ' and '
|
both_prop += ' and '
|
||||||
if '(' in n.prop:
|
if '(' in both_prop:
|
||||||
n.prop = '(%s)' % n.prop
|
both_prop = '(%s)' % both_prop
|
||||||
|
r = parser.Unary(SELECTION, both_prop, b_node)
|
||||||
|
return r, len(left) + len(right)
|
||||||
else: # No need for general select
|
else: # No need for general select
|
||||||
replace_node(n, n.child)
|
return b_node, 1
|
||||||
|
|
||||||
return changes + recoursive_scan(selection_and_product, n, rels)
|
return n, 0
|
||||||
|
|
||||||
|
|
||||||
def useless_projection(n, rels) -> int:
|
def useless_projection(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]:
|
||||||
'''
|
'''
|
||||||
Removes projections that are over all the fields
|
Removes projections that are over all the fields
|
||||||
'''
|
'''
|
||||||
changes = 0
|
|
||||||
if n.name == PROJECTION and \
|
if n.name == PROJECTION and \
|
||||||
set(n.child.result_format(rels)) == set(i.strip() for i in n.prop.split(',')):
|
set(n.child.result_format(rels)) == set(i.strip() for i in n.prop.split(',')):
|
||||||
changes = 1
|
return n.child, 1
|
||||||
replace_node(n, n.child)
|
|
||||||
|
|
||||||
return changes + recoursive_scan(useless_projection, n, rels)
|
return n, 0
|
||||||
|
|
||||||
general_optimizations = [
|
general_optimizations = [
|
||||||
duplicated_select,
|
duplicated_select,
|
||||||
@ -714,6 +578,7 @@ general_optimizations = [
|
|||||||
duplicated_projection,
|
duplicated_projection,
|
||||||
selection_inside_projection,
|
selection_inside_projection,
|
||||||
subsequent_renames,
|
subsequent_renames,
|
||||||
|
futile_renames,
|
||||||
swap_rename_select,
|
swap_rename_select,
|
||||||
futile_union_intersection_subtraction,
|
futile_union_intersection_subtraction,
|
||||||
swap_union_renames,
|
swap_union_renames,
|
||||||
@ -726,6 +591,3 @@ specific_optimizations = [
|
|||||||
projection_and_union,
|
projection_and_union,
|
||||||
useless_projection,
|
useless_projection,
|
||||||
]
|
]
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print (tokenize_select("skill == 'C' and id % 2 == 0"))
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# Relational
|
# Relational
|
||||||
# Copyright (C) 2008-2016 Salvo "LtWorf" Tomaselli
|
# Copyright (C) 2008-2020 Salvo "LtWorf" Tomaselli
|
||||||
#
|
#
|
||||||
# Relational is free software: you can redistribute it and/or modify
|
# Relational is free software: you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
@ -22,23 +22,22 @@
|
|||||||
# relational query, or it can be a parse tree for a relational expression (ie: class parser.node).
|
# relational query, or it can be a parse tree for a relational expression (ie: class parser.node).
|
||||||
# The functions will always return a string with the optimized query, but if a parse tree was provided,
|
# The functions will always return a string with the optimized query, but if a parse tree was provided,
|
||||||
# the parse tree itself will be modified accordingly.
|
# the parse tree itself will be modified accordingly.
|
||||||
from typing import Union, Optional, Dict, Any
|
from typing import Union, Optional, Dict, Any, Tuple
|
||||||
|
|
||||||
|
from relational.relation import Relation
|
||||||
from relational import optimizations
|
from relational import optimizations
|
||||||
from relational.parser import Node, RELATION, UNARY, BINARY, op_functions, tokenize, tree
|
from relational.parser import Node, Variable, Unary, Binary, op_functions, tokenize, tree
|
||||||
from relational import querysplit
|
from relational import querysplit
|
||||||
from relational.maintenance import UserInterface
|
from relational.maintenance import UserInterface
|
||||||
|
|
||||||
ContextDict = Dict[str,Any]
|
|
||||||
|
|
||||||
|
def optimize_program(code, rels: Dict[str, Relation]):
|
||||||
def optimize_program(code, rels: ContextDict):
|
|
||||||
'''
|
'''
|
||||||
Optimize an entire program, composed by multiple expressions
|
Optimize an entire program, composed by multiple expressions
|
||||||
and assignments.
|
and assignments.
|
||||||
'''
|
'''
|
||||||
lines = code.split('\n')
|
lines = code.split('\n')
|
||||||
context = {} # type: ContextDict
|
context = {}
|
||||||
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
@ -53,7 +52,7 @@ def optimize_program(code, rels: ContextDict):
|
|||||||
return querysplit.split(node, rels)
|
return querysplit.split(node, rels)
|
||||||
|
|
||||||
|
|
||||||
def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
|
def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
|
||||||
'''This function performs all the available optimizations.
|
'''This function performs all the available optimizations.
|
||||||
|
|
||||||
expression : see documentation of this module
|
expression : see documentation of this module
|
||||||
@ -82,23 +81,23 @@ def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool
|
|||||||
total = 0
|
total = 0
|
||||||
if specific:
|
if specific:
|
||||||
for i in optimizations.specific_optimizations:
|
for i in optimizations.specific_optimizations:
|
||||||
res = i(n, rels) # Performs the optimization
|
n, c = recursive_scan(i, n, rels)
|
||||||
if res != 0 and dbg:
|
if c != 0 and dbg:
|
||||||
debug.append(str(n))
|
debug.append(str(n))
|
||||||
total += res
|
total += c
|
||||||
if general:
|
if general:
|
||||||
for i in optimizations.general_optimizations:
|
for i in optimizations.general_optimizations:
|
||||||
res = i(n) # Performs the optimization
|
n, c = recursive_scan(i, n, None)
|
||||||
if res != 0 and dbg:
|
if c != 0 and dbg:
|
||||||
debug.append(str(n))
|
debug.append(str(n))
|
||||||
total += res
|
total += c
|
||||||
if tostr:
|
if tostr:
|
||||||
return str(n)
|
return str(n)
|
||||||
else:
|
else:
|
||||||
return n
|
return n
|
||||||
|
|
||||||
|
|
||||||
def specific_optimize(expression, rels: ContextDict):
|
def specific_optimize(expression, rels: Dict[str, Relation]):
|
||||||
'''This function performs specific optimizations. Means that it will need to
|
'''This function performs specific optimizations. Means that it will need to
|
||||||
know the fields used by the relations.
|
know the fields used by the relations.
|
||||||
|
|
||||||
@ -117,3 +116,35 @@ def general_optimize(expression):
|
|||||||
|
|
||||||
Return value: this will return an optimized version of the expression'''
|
Return value: this will return an optimized version of the expression'''
|
||||||
return optimize_all(expression, None, specific=False, general=True)
|
return optimize_all(expression, None, specific=False, general=True)
|
||||||
|
|
||||||
|
|
||||||
|
def recursive_scan(function, node, rels) -> Tuple[Node, int]:
|
||||||
|
'''Does a recursive optimization on the tree.
|
||||||
|
|
||||||
|
This function will recursively execute the function given
|
||||||
|
as "function" parameter starting from node to all the tree.
|
||||||
|
if rels is provided it will be passed as argument to the function.
|
||||||
|
Otherwise the function will be called just on the node.
|
||||||
|
|
||||||
|
Result value: function is supposed to return the amount of changes
|
||||||
|
it has performed on the tree.
|
||||||
|
The various result will be added up and this final value will be the
|
||||||
|
returned value.'''
|
||||||
|
|
||||||
|
args = []
|
||||||
|
if rels:
|
||||||
|
args.append(rels)
|
||||||
|
|
||||||
|
changes = 0
|
||||||
|
node, c = function(node, *args)
|
||||||
|
changes += c
|
||||||
|
|
||||||
|
if isinstance(node, Unary):
|
||||||
|
node.child, c = recursive_scan(function, node.child, rels)
|
||||||
|
changes += c
|
||||||
|
elif isinstance(node, Binary):
|
||||||
|
node.left, c = recursive_scan(function, node.left, rels)
|
||||||
|
changes += c
|
||||||
|
node.right, c = recursive_scan(function, node.right, rels)
|
||||||
|
changes += c
|
||||||
|
return node, changes
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# Relational
|
# Relational
|
||||||
# Copyright (C) 2008-2017 Salvo "LtWorf" Tomaselli
|
# Copyright (C) 2008-2020 Salvo "LtWorf" Tomaselli
|
||||||
#
|
#
|
||||||
# Relational is free software: you can redistribute it and/or modify
|
# Relational is free software: you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
@ -24,14 +24,11 @@
|
|||||||
#
|
#
|
||||||
# Language definition here:
|
# Language definition here:
|
||||||
# http://ltworf.github.io/relational/grammar.html
|
# http://ltworf.github.io/relational/grammar.html
|
||||||
from typing import Optional, Union, List, Any
|
from typing import Optional, Union, List, Any, Dict
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from relational import rtypes
|
from relational import rtypes
|
||||||
|
|
||||||
RELATION = 0
|
|
||||||
UNARY = 1
|
|
||||||
BINARY = 2
|
|
||||||
|
|
||||||
PRODUCT = '*'
|
PRODUCT = '*'
|
||||||
DIFFERENCE = '-'
|
DIFFERENCE = '-'
|
||||||
UNION = '∪'
|
UNION = '∪'
|
||||||
@ -84,9 +81,8 @@ class CallableString(str):
|
|||||||
'''
|
'''
|
||||||
return eval(self, context)
|
return eval(self, context)
|
||||||
|
|
||||||
|
@dataclass
|
||||||
class Node:
|
class Node:
|
||||||
|
|
||||||
'''This class is a node of a relational expression. Leaves are relations
|
'''This class is a node of a relational expression. Leaves are relations
|
||||||
and internal nodes are operations.
|
and internal nodes are operations.
|
||||||
|
|
||||||
@ -102,72 +98,12 @@ class Node:
|
|||||||
operation.
|
operation.
|
||||||
|
|
||||||
This class is used to convert an expression into python code.'''
|
This class is used to convert an expression into python code.'''
|
||||||
kind = None # type: Optional[int]
|
name: str
|
||||||
__hash__ = None # type: None
|
|
||||||
|
|
||||||
def __init__(self, expression: Optional[list] = None) -> None:
|
def __init__(self, name: str) -> None:
|
||||||
'''Generates the tree from the tokenized expression
|
raise NotImplementedError('This is supposed to be an abstract class')
|
||||||
If no expression is specified then it will create an empty node'''
|
|
||||||
if expression is None or len(expression) == 0:
|
|
||||||
return
|
|
||||||
|
|
||||||
# If the list contains only a list, it will consider the lower level list.
|
def toCode(self): #FIXME return type
|
||||||
# This will allow things like ((((((a))))) to work
|
|
||||||
while len(expression) == 1 and isinstance(expression[0], list):
|
|
||||||
expression = expression[0]
|
|
||||||
|
|
||||||
# The list contains only 1 string. Means it is the name of a relation
|
|
||||||
if len(expression) == 1:
|
|
||||||
self.kind = RELATION
|
|
||||||
self.name = expression[0]
|
|
||||||
if not rtypes.is_valid_relation_name(self.name):
|
|
||||||
raise ParserException(
|
|
||||||
u"'%s' is not a valid relation name" % self.name)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Expression from right to left, searching for binary operators
|
|
||||||
# this means that binary operators have lesser priority than
|
|
||||||
# unary operators.
|
|
||||||
# It finds the operator with lesser priority, uses it as root of this
|
|
||||||
# (sub)tree using everything on its left as left parameter (so building
|
|
||||||
# a left subtree with the part of the list located on left) and doing
|
|
||||||
# the same on right.
|
|
||||||
# Since it searches for strings, and expressions into parenthesis are
|
|
||||||
# within sub-lists, they won't be found here, ensuring that they will
|
|
||||||
# have highest priority.
|
|
||||||
for i in range(len(expression) - 1, -1, -1):
|
|
||||||
if expression[i] in b_operators: # Binary operator
|
|
||||||
self.kind = BINARY
|
|
||||||
self.name = expression[i]
|
|
||||||
|
|
||||||
if len(expression[:i]) == 0:
|
|
||||||
raise ParserException(
|
|
||||||
u"Expected left operand for '%s'" % self.name)
|
|
||||||
|
|
||||||
if len(expression[i + 1:]) == 0:
|
|
||||||
raise ParserException(
|
|
||||||
u"Expected right operand for '%s'" % self.name)
|
|
||||||
|
|
||||||
self.left = node(expression[:i])
|
|
||||||
self.right = node(expression[i + 1:])
|
|
||||||
return
|
|
||||||
'''Searches for unary operators, parsing from right to left'''
|
|
||||||
for i in range(len(expression) - 1, -1, -1):
|
|
||||||
if expression[i] in u_operators: # Unary operator
|
|
||||||
self.kind = UNARY
|
|
||||||
self.name = expression[i]
|
|
||||||
|
|
||||||
if len(expression) <= i + 2:
|
|
||||||
raise ParserException(
|
|
||||||
u"Expected more tokens in '%s'" % self.name)
|
|
||||||
|
|
||||||
self.prop = expression[1 + i].strip()
|
|
||||||
self.child = node(expression[2 + i])
|
|
||||||
|
|
||||||
return
|
|
||||||
raise ParserException("Expected operator in '%s'" % expression)
|
|
||||||
|
|
||||||
def toCode(self):
|
|
||||||
'''This method converts the AST into a python code object'''
|
'''This method converts the AST into a python code object'''
|
||||||
code = self._toPython()
|
code = self._toPython()
|
||||||
return compile(code, '<relational_expression>', 'eval')
|
return compile(code, '<relational_expression>', 'eval')
|
||||||
@ -181,25 +117,7 @@ class Node:
|
|||||||
return CallableString(self._toPython())
|
return CallableString(self._toPython())
|
||||||
|
|
||||||
def _toPython(self) -> str:
|
def _toPython(self) -> str:
|
||||||
'''
|
raise NotImplementedError()
|
||||||
Same as toPython but returns a regular string
|
|
||||||
'''
|
|
||||||
if self.name in b_operators:
|
|
||||||
return '%s.%s(%s)' % (self.left.toPython(), op_functions[self.name], self.right.toPython())
|
|
||||||
elif self.name in u_operators:
|
|
||||||
prop = self.prop
|
|
||||||
|
|
||||||
# Converting parameters
|
|
||||||
if self.name == PROJECTION:
|
|
||||||
prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
|
|
||||||
elif self.name == RENAME:
|
|
||||||
prop = '{\"%s\"}' % prop.replace(
|
|
||||||
',', '\",\"').replace(ARROW, '\":\"').replace(' ', '')
|
|
||||||
else: # Selection
|
|
||||||
prop = repr(prop)
|
|
||||||
|
|
||||||
return '%s.%s(%s)' % (self.child.toPython(), op_functions[self.name], prop)
|
|
||||||
return self.name
|
|
||||||
|
|
||||||
def printtree(self, level: int = 0) -> str:
|
def printtree(self, level: int = 0) -> str:
|
||||||
'''returns a representation of the tree using indentation'''
|
'''returns a representation of the tree using indentation'''
|
||||||
@ -216,27 +134,20 @@ class Node:
|
|||||||
return '\n' + r
|
return '\n' + r
|
||||||
|
|
||||||
def get_left_leaf(self) -> 'Node':
|
def get_left_leaf(self) -> 'Node':
|
||||||
'''This function returns the leftmost leaf in the tree.'''
|
raise NotImplementedError()
|
||||||
if self.kind == RELATION:
|
|
||||||
return self
|
|
||||||
elif self.kind == UNARY:
|
|
||||||
return self.child.get_left_leaf()
|
|
||||||
elif self.kind == BINARY:
|
|
||||||
return self.left.get_left_leaf()
|
|
||||||
raise ValueError('What kind of alien object is this?')
|
|
||||||
|
|
||||||
def result_format(self, rels: dict) -> list:
|
def result_format(self, rels: dict) -> list: #FIXME types
|
||||||
'''This function returns a list containing the fields that the resulting relation will have.
|
'''This function returns a list containing the fields that the resulting relation will have.
|
||||||
It requires a dictionary where keys are the names of the relations and the values are
|
It requires a dictionary where keys are the names of the relations and the values are
|
||||||
the relation objects.'''
|
the relation objects.'''
|
||||||
if not isinstance(rels, dict):
|
if not isinstance(rels, dict):
|
||||||
raise TypeError('Can\'t be of None type')
|
raise TypeError('Can\'t be of None type')
|
||||||
|
|
||||||
if self.kind == RELATION:
|
if isinstance(self, Variable): #FIXME this is ugly
|
||||||
return list(rels[self.name].header)
|
return list(rels[self.name].header)
|
||||||
elif self.kind == BINARY and self.name in (DIFFERENCE, UNION, INTERSECTION):
|
elif isinstance(self, Binary) and self.name in (DIFFERENCE, UNION, INTERSECTION):
|
||||||
return self.left.result_format(rels)
|
return self.left.result_format(rels)
|
||||||
elif self.kind == BINARY and self.name == DIVISION:
|
elif isinstance(self, Binary) and self.name == DIVISION:
|
||||||
return list(set(self.left.result_format(rels)) - set(self.right.result_format(rels)))
|
return list(set(self.left.result_format(rels)) - set(self.right.result_format(rels)))
|
||||||
elif self.name == PROJECTION:
|
elif self.name == PROJECTION:
|
||||||
return [i.strip() for i in self.prop.split(',')]
|
return [i.strip() for i in self.prop.split(',')]
|
||||||
@ -259,7 +170,7 @@ class Node:
|
|||||||
return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))
|
return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))
|
||||||
raise ValueError('What kind of alien object is this?')
|
raise ValueError('What kind of alien object is this?')
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other): #FIXME
|
||||||
if not (isinstance(other, node) and self.name == other.name and self.kind == other.kind):
|
if not (isinstance(other, node) and self.name == other.name and self.kind == other.kind):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -271,22 +182,151 @@ class Node:
|
|||||||
return self.left == other.left and self.right == other.right
|
return self.left == other.left and self.right == other.right
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Variable(Node):
|
||||||
|
def _toPython(self) -> str:
|
||||||
|
return self.name
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
if (self.kind == RELATION):
|
return self.name
|
||||||
return self.name
|
|
||||||
elif (self.kind == UNARY):
|
def get_left_leaf(self) -> Node:
|
||||||
return self.name + " " + self.prop + " (" + self.child.__str__() + ")"
|
return self
|
||||||
elif (self.kind == BINARY):
|
|
||||||
le = self.left.__str__()
|
|
||||||
if self.right.kind != BINARY:
|
|
||||||
re = self.right.__str__()
|
|
||||||
else:
|
|
||||||
re = "(" + self.right.__str__() + ")"
|
|
||||||
return (le + self.name + re)
|
|
||||||
raise ValueError('What kind of alien object is this?')
|
|
||||||
|
|
||||||
|
|
||||||
def _find_matching_parenthesis(expression: str, start=0, openpar=u'(', closepar=u')') -> Optional[int]:
|
@dataclass
|
||||||
|
class Binary(Node):
|
||||||
|
left: Node
|
||||||
|
right: Node
|
||||||
|
|
||||||
|
def get_left_leaf(self) -> Node:
|
||||||
|
return self.left.get_left_leaf()
|
||||||
|
|
||||||
|
def _toPython(self) -> str:
|
||||||
|
return '%s.%s(%s)' % (self.left._toPython(), op_functions[self.name], self.right._toPython())
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
le = self.left.__str__()
|
||||||
|
if isinstance(self.right, Binary):
|
||||||
|
re = "(" + self.right.__str__() + ")"
|
||||||
|
else:
|
||||||
|
re = self.right.__str__()
|
||||||
|
return (le + self.name + re) #TODO use fstrings
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Unary(Node):
|
||||||
|
prop: str
|
||||||
|
child: Node
|
||||||
|
|
||||||
|
def get_left_leaf(self) -> Node:
|
||||||
|
return self.child.get_left_leaf()
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name + " " + self.prop + " (" + self.child.__str__() + ")" #TODO use fstrings
|
||||||
|
|
||||||
|
def _toPython(self) -> str:
|
||||||
|
prop = self.prop
|
||||||
|
|
||||||
|
# Converting parameters
|
||||||
|
if self.name == PROJECTION:
|
||||||
|
prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
|
||||||
|
elif self.name == RENAME:
|
||||||
|
prop = repr(self.get_rename_prop())
|
||||||
|
else: # Selection
|
||||||
|
prop = repr(prop)
|
||||||
|
|
||||||
|
return '%s.%s(%s)' % (self.child._toPython(), op_functions[self.name], prop)
|
||||||
|
|
||||||
|
def get_projection_prop(self) -> List[str]:
|
||||||
|
if self.name != PROJECTION:
|
||||||
|
raise ValueError('This is only supported on projection nodes')
|
||||||
|
return [i.strip() for i in self.prop.split(',')]
|
||||||
|
|
||||||
|
def set_projection_prop(self, p: List[str]) -> None:
|
||||||
|
if self.name != PROJECTION:
|
||||||
|
raise ValueError('This is only supported on projection nodes')
|
||||||
|
self.prop = ','.join(p)
|
||||||
|
|
||||||
|
def get_rename_prop(self) -> Dict[str, str]:
|
||||||
|
'''
|
||||||
|
Returns the dictionary that the rename operation wants
|
||||||
|
'''
|
||||||
|
if self.name != RENAME:
|
||||||
|
raise ValueError('This is only supported on rename nodes')
|
||||||
|
r = {}
|
||||||
|
for i in self.prop.split(','):
|
||||||
|
q = i.split(ARROW)
|
||||||
|
r[q[0].strip()] = q[1].strip()
|
||||||
|
return r
|
||||||
|
|
||||||
|
def set_rename_prop(self, renames: Dict[str, str]) -> None:
|
||||||
|
'''
|
||||||
|
Sets the prop field based on the dictionary for renames
|
||||||
|
'''
|
||||||
|
if self.name != RENAME:
|
||||||
|
raise ValueError('This is only supported on rename nodes')
|
||||||
|
self.prop = ','.join(f'{k}{ARROW}{v}' for k, v in renames.items())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def parse_tokens(expression: List[Union[list, str]]) -> Node:
|
||||||
|
'''Generates the tree from the tokenized expression
|
||||||
|
If no expression is specified then it will create an empty node'''
|
||||||
|
|
||||||
|
# If the list contains only a list, it will consider the lower level list.
|
||||||
|
# This will allow things like ((((((a))))) to work
|
||||||
|
while len(expression) == 1 and isinstance(expression[0], list):
|
||||||
|
expression = expression[0]
|
||||||
|
|
||||||
|
# The list contains only 1 string. Means it is the name of a relation
|
||||||
|
if len(expression) == 1:
|
||||||
|
|
||||||
|
if not rtypes.is_valid_relation_name(expression[0]):
|
||||||
|
raise ParserException(
|
||||||
|
u"'%s' is not a valid relation name" % expression[0])
|
||||||
|
return Variable(expression[0]) #FIXME Move validation in the object
|
||||||
|
|
||||||
|
# Expression from right to left, searching for binary operators
|
||||||
|
# this means that binary operators have lesser priority than
|
||||||
|
# unary operators.
|
||||||
|
# It finds the operator with lesser priority, uses it as root of this
|
||||||
|
# (sub)tree using everything on its left as left parameter (so building
|
||||||
|
# a left subtree with the part of the list located on left) and doing
|
||||||
|
# the same on right.
|
||||||
|
# Since it searches for strings, and expressions into parenthesis are
|
||||||
|
# within sub-lists, they won't be found here, ensuring that they will
|
||||||
|
# have highest priority.
|
||||||
|
for i in range(len(expression) - 1, -1, -1):
|
||||||
|
if expression[i] in b_operators: # Binary operator
|
||||||
|
|
||||||
|
|
||||||
|
if len(expression[:i]) == 0:
|
||||||
|
raise ParserException(
|
||||||
|
u"Expected left operand for '%s'" % self.name)
|
||||||
|
|
||||||
|
if len(expression[i + 1:]) == 0:
|
||||||
|
raise ParserException(
|
||||||
|
u"Expected right operand for '%s'" % self.name)
|
||||||
|
return Binary(expression[i], parse_tokens(expression[:i]), parse_tokens(expression[i + 1:]))
|
||||||
|
'''Searches for unary operators, parsing from right to left'''
|
||||||
|
for i in range(len(expression) - 1, -1, -1):
|
||||||
|
if expression[i] in u_operators: # Unary operator
|
||||||
|
if len(expression) <= i + 2:
|
||||||
|
raise ParserException(
|
||||||
|
u"Expected more tokens in '%s'" % self.name)
|
||||||
|
|
||||||
|
return Unary(
|
||||||
|
expression[i],
|
||||||
|
prop=expression[1 + i].strip(),
|
||||||
|
child=parse_tokens(expression[2 + i])
|
||||||
|
)
|
||||||
|
raise ParserException('Parse error') #FIXME more details
|
||||||
|
|
||||||
|
|
||||||
|
def _find_matching_parenthesis(expression: str, start=0, openpar='(', closepar=')') -> Optional[int]:
|
||||||
'''This function returns the position of the matching
|
'''This function returns the position of the matching
|
||||||
close parenthesis to the 1st open parenthesis found
|
close parenthesis to the 1st open parenthesis found
|
||||||
starting from start (0 by default)'''
|
starting from start (0 by default)'''
|
||||||
@ -391,7 +431,7 @@ def tokenize(expression: str) -> list:
|
|||||||
def tree(expression: str) -> Node:
|
def tree(expression: str) -> Node:
|
||||||
'''This function parses a relational algebra expression into a AST and returns
|
'''This function parses a relational algebra expression into a AST and returns
|
||||||
the root node using the Node class.'''
|
the root node using the Node class.'''
|
||||||
return Node(tokenize(expression))
|
return parse_tokens(tokenize(expression))
|
||||||
|
|
||||||
|
|
||||||
def parse(expr: str) -> CallableString:
|
def parse(expr: str) -> CallableString:
|
||||||
@ -400,11 +440,3 @@ def parse(expr: str) -> CallableString:
|
|||||||
Python expression.
|
Python expression.
|
||||||
'''
|
'''
|
||||||
return tree(expr).toPython()
|
return tree(expr).toPython()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
while True:
|
|
||||||
e = input("Expression: ")
|
|
||||||
print (parse(e))
|
|
||||||
|
|
||||||
# Backwards compatibility
|
|
||||||
node = Node
|
|
||||||
|
1
tests_dir/people_rename.query
Normal file
1
tests_dir/people_rename.query
Normal file
@ -0,0 +1 @@
|
|||||||
|
ρ name➡n,age➡a(σTrue(people)) ∪ ρ age➡a,name➡n(people)
|
9
tests_dir/people_rename.result
Normal file
9
tests_dir/people_rename.result
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
id,n,chief,a
|
||||||
|
0,jack,0,22
|
||||||
|
1,carl,0,20
|
||||||
|
2,john,1,30
|
||||||
|
3,dean,1,33
|
||||||
|
4,eve,0,25
|
||||||
|
5,duncan,4,30
|
||||||
|
6,paul,4,30
|
||||||
|
7,alia,1,28
|
1
tests_dir/people_rename_select.query
Normal file
1
tests_dir/people_rename_select.query
Normal file
@ -0,0 +1 @@
|
|||||||
|
σ i%2==0 (ρ id➡i (people))
|
5
tests_dir/people_rename_select.result
Normal file
5
tests_dir/people_rename_select.result
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
i,name,chief,age
|
||||||
|
0,jack,0,22
|
||||||
|
2,john,1,30
|
||||||
|
4,eve,0,25
|
||||||
|
6,paul,4,30
|
Loading…
x
Reference in New Issue
Block a user