Merge pull request #20 from ltworf/refactor_parser

Refactor parser
2020-06-09 23:49:11 +02:00
parent 2480c955ae d7145171de
commit 5d3823d0ea
8 changed files with 436 additions and 492 deletions
--- a/5
+++ b/5
@@ -1,4 +1,7 @@
-2.6
+3.0
 - Refactored parser to use better typing
 - Refactored and fixed some optimizations
 - Added more test cases
 - Improved survey sending
 - Prevent relation/field names from being reserved keywords
 - Fixed issue in cli where loading an invalid file would lead to a crash
--- a/relational/optimizations.py
+++ b/relational/optimizations.py
@@ -1,5 +1,5 @@
 # Relational
-# Copyright (C) 2009-2018  Salvo "LtWorf" Tomaselli
+# Copyright (C) 2009-2020  Salvo "LtWorf" Tomaselli
 #
 # Relational is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -30,8 +30,9 @@
 from io import StringIO
 from tokenize import generate_tokens
 from typing import Tuple, Dict
-
+from relational.relation import Relation
 from relational import parser
 sel_op = (
@@ -98,36 +99,7 @@ def replace_node(replace, replacement):
        replace.left = replacement.left
-def recoursive_scan(function, node, rels=None):
+def duplicated_select(n: parser.Node) -> Tuple[parser.Node, int]:
    '''Does a recoursive optimization on the tree.
    This function will recoursively execute the function given
    as "function" parameter starting from node to all the tree.
    if rels is provided it will be passed as argument to the function.
    Otherwise the function will be called just on the node.
    Result value: function is supposed to return the amount of changes
    it has performed on the tree.
    The various result will be added up and this final value will be the
    returned value.'''
    changes = 0
    # recoursive scan
    if node.kind == parser.UNARY:
        if rels != None:
            changes += function(node.child, rels)
        else:
            changes += function(node.child)
    elif node.kind == parser.BINARY:
        if rels != None:
            changes += function(node.right, rels)
            changes += function(node.left, rels)
        else:
            changes += function(node.right)
            changes += function(node.left)
    return changes
 def duplicated_select(n: parser.Node) -> int:
    '''This function locates and deletes things like
    σ a ( σ a(C)) and the ones like σ a ( σ b(C))
    replacing the 1st one with a single select and
@@ -135,243 +107,189 @@ def duplicated_select(n: parser.Node) -> int:
    in and
    '''
    changes = 0
-    if n.name == SELECTION and n.child.name == SELECTION:
+    while n.name == SELECTION and n.child.name == SELECTION:
        changes += 1
        prop = n.prop
        if n.prop != n.child.prop:  # Nested but different, joining them
-            n.prop = n.prop + " and " + n.child.prop
+            prop = n.prop + " and " + n.child.prop
            # This adds parenthesis if they are needed
            if n.child.prop.startswith('(') or n.prop.startswith('('):
-                n.prop = '(%s)' % n.prop
+                prop = '(%s)' % prop
-
+        n = parser.Unary(
-        n.child = n.child.child
+            SELECTION,
-        changes = 1
+            prop,
-        changes += duplicated_select(n)
+            n.child.child,
-
+        )
-    return changes + recoursive_scan(duplicated_select, n)
+    return n, changes
-def futile_union_intersection_subtraction(n: parser.Node) -> int:
+def futile_union_intersection_subtraction(n: parser.Node) -> Tuple[parser.Node, int]:
-    '''This function locates things like r ᑌ r, and replaces them with r.
+    '''This function locates things like r ∪ r, and replaces them with r.
-    R ᑌ R  --> R
+    R ∪ R  --> R
-    R ᑎ R --> R
+    R ∩ R --> R
    R - R --> σ False (R)
    σ k (R) - R --> σ False (R)
    R - σ k (R) --> σ not k (R)
-    σ k (R) ᑌ R --> R
+    σ k (R) ∪ R --> R
-    σ k (R) ᑎ R --> σ k (R)
+    σ k (R) ∩ R --> σ k (R)
    '''
    changes = 0
    # Union and intersection of the same thing
    if n.name in (UNION, INTERSECTION, JOIN, JOIN_LEFT, JOIN_RIGHT, JOIN_FULL) and n.left == n.right:
-        changes = 1
+        return n.left, 1
        replace_node(n, n.left)
    # selection and union of the same thing
    elif (n.name == UNION):
        if n.left.name == SELECTION and n.left.child == n.right:
-            changes = 1
+            return n.right, 1
            replace_node(n, n.right)
        elif n.right.name == SELECTION and n.right.child == n.left:
-            changes = 1
+            return n.left, 1
            replace_node(n, n.left)
    # selection and intersection of the same thing
    elif n.name == INTERSECTION:
        if n.left.name == SELECTION and n.left.child == n.right:
-            changes = 1
+            return n.left, 1
            replace_node(n, n.left)
        elif n.right.name == SELECTION and n.right.child == n.left:
-            changes = 1
+            return n.right, 1
            replace_node(n, n.right)
    # Subtraction and selection of the same thing
    elif n.name == DIFFERENCE and \
            n.right.name == SELECTION and \
            n.right.child == n.left:
-        n.name = n.right.name
+        return parser.Unary(
-        n.kind = n.right.kind
+            SELECTION,
-        n.child = n.right.child
+            '(not (%s))' % n.right.prop,
-        n.prop = '(not (%s))' % n.right.prop
+            n.right.child), 1
        n.left = n.right = None
    # Subtraction of the same thing or with selection on the left child
    elif n.name == DIFFERENCE and (n.left == n.right or (n.left.name == SELECTION and n.left.child == n.right)):
-        changes = 1
+        return parser.Unary(
-        n.kind = parser.UNARY
+            SELECTION,
-        n.name = SELECTION
+            'False',
-        n.prop = 'False'
+            n.get_left_leaf()
-        n.child = n.left.get_left_leaf()
+        ), 1
-        # n.left=n.right=None
+    return n, 0
    return changes + recoursive_scan(futile_union_intersection_subtraction, n)
-def down_to_unions_subtractions_intersections(n: parser.Node) -> int:
+def down_to_unions_subtractions_intersections(n: parser.Node) -> Tuple[parser.Node, int]:
-    '''This funcion locates things like σ i==2 (c ᑌ d), where the union
+    '''This funcion locates things like σ i==2 (c ∪ d), where the union
    can be a subtraction and an intersection and replaces them with
-    σ i==2 (c) ᑌ σ i==2(d).
+    σ i==2 (c) ∪ σ i==2(d).
    '''
    changes = 0
    _o = (UNION, DIFFERENCE, INTERSECTION)
    if n.name == SELECTION and n.child.name in _o:
        l = parser.Unary(SELECTION, n.prop, n.child.left)
        r = parser.Unary(SELECTION, n.prop, n.child.right)
-        left = parser.Node()
+        return parser.Binary(n.child.name, l, r), 1
-        left.prop = n.prop
+    return n, 0
        left.name = n.name
        left.child = n.child.left
        left.kind = parser.UNARY
        right = parser.Node()
        right.prop = n.prop
        right.name = n.name
        right.child = n.child.right
        right.kind = parser.UNARY
        n.name = n.child.name
        n.left = left
        n.right = right
        n.child = None
        n.prop = None
        n.kind = parser.BINARY
        changes += 1
    return changes + recoursive_scan(down_to_unions_subtractions_intersections, n)
-def duplicated_projection(n: parser.Node) -> int:
+def duplicated_projection(n: parser.Node) -> Tuple[parser.Node, int]:
    '''This function locates thing like π i ( π j (R)) and replaces
    them with π i (R)'''
    changes = 0
    if n.name == PROJECTION and n.child.name == PROJECTION:
-        n.child = n.child.child
+        return parser.Unary(
-        changes += 1
+            PROJECTION,
-
+            n.prop,
-    return changes + recoursive_scan(duplicated_projection, n)
+            n.child.child), 1
    return n, 0
-def selection_inside_projection(n: parser.Node) -> int:
+def selection_inside_projection(n: parser.Node) -> Tuple[parser.Node, int]:
    '''This function locates things like  σ j (π k(R)) and
    converts them into π k(σ j (R))'''
    changes = 0
    if n.name == SELECTION and n.child.name == PROJECTION:
-        changes = 1
+        child = parser.Unary(
-        temp = n.prop
+            SELECTION,
-        n.prop = n.child.prop
+            n.prop,
-        n.child.prop = temp
+            n.child.child
-        n.name = PROJECTION
+        )
        n.child.name = SELECTION
-    return changes + recoursive_scan(selection_inside_projection, n)
+        return parser.Unary(PROJECTION, n.child.prop, child), 0
    return n, 0
-def swap_union_renames(n: parser.Node) -> int:
+def swap_union_renames(n: parser.Node) -> Tuple[parser.Node, int]:
    '''This function locates things like
-    ρ a➡b(R) ᑌ ρ a➡b(Q)
+    ρ a➡b(R) ∪ ρ a➡b(Q)
    and replaces them with
-    ρ a➡b(R ᑌ Q).
+    ρ a➡b(R ∪ Q).
    Does the same with subtraction and intersection'''
-    changes = 0
+    if n.name in (DIFFERENCE, UNION, INTERSECTION) and n.left.name == RENAME and n.right.name == RENAME:
-
+        l_vars = n.left.get_rename_prop()
-    if n.name in (DIFFERENCE, UNION, INTERSECTION) and n.left.name == n.right.name and n.left.name == RENAME:
+        r_vars = n.right.get_rename_prop()
        l_vars = {}
        for i in n.left.prop.split(','):
            q = i.split(ARROW)
            l_vars[q[0].strip()] = q[1].strip()
        r_vars = {}
        for i in n.right.prop.split(','):
            q = i.split(ARROW)
            r_vars[q[0].strip()] = q[1].strip()
        if r_vars == l_vars:
-            changes = 1
+            child = parser.Binary(n.name, n.left.child, n.right.child)
-
+            return parser.Unary(RENAME, n.left.prop, child), 1
-            # Copying self, but child will be child of renames
+    return n, 0
            q = parser.Node()
            q.name = n.name
            q.kind = parser.BINARY
            q.left = n.left.child
            q.right = n.right.child
            n.name = RENAME
            n.kind = parser.UNARY
            n.child = q
            n.prop = n.left.prop
            n.left = n.right = None
    return changes + recoursive_scan(swap_union_renames, n)
-def futile_renames(n: parser.Node) -> int:
+def futile_renames(n: parser.Node) -> Tuple[parser.Node, int]:
-    '''This function purges renames like id->id'''
+    '''This function purges renames like
-    changes = 0
+    ρ id->id,a->q (A)
    into
    ρ a->q (A)
    or removes the operation entirely if they all get removed
    '''
    if n.name == RENAME:
-        # Located two nested renames.
+        renames = n.get_rename_prop()
-        changes = 1
+        changes = False
        for k, v in renames.items():
            if k == v:
                changes = True
                del renames[k]
        if len(renames) == 0: # Nothing to rename, removing the rename
            return n.child, 1
        elif changes:
            # Changing the node in place, no need to return to cause a recursive step
            n.set_rename_prop(renames)
-        # Creating a dictionary with the attributes
+    return n, 0
        _vars = {}
        for i in n.prop.split(','):
            q = i.split(ARROW)
            _vars[q[0].strip()] = q[1].strip()
        # Scans dictionary to locate things like "a->b,b->c" and replace them
        # with "a->c"
        for key in list(_vars.keys()):
            value = _vars.get(key)
            if key == value:
                _vars.pop(value)  # Removes the unused one
        if len(_vars) == 0: # Nothing to rename, removing the rename op
            replace_node(n, n.child)
        else:
            n.prop = ','.join('%s%s%s' % (i[0], ARROW, i[1]) for i in _vars.items())
    return changes + recoursive_scan(futile_renames, n)
-def subsequent_renames(n: parser.Node) -> int:
+def subsequent_renames(n: parser.Node) -> Tuple[parser.Node, int]:
-    '''This function removes redoundant subsequent renames joining them into one'''
+    '''This function removes redundant subsequent renames joining them into one
-
+    ρ .. ρ .. (A)
-    '''Purges renames like id->id Since it's needed to be performed BEFORE this one
+    into
-    so it is not in the list with the other optimizations'''
+    ρ ... (A)
-    futile_renames(n)
+    '''
    changes = 0
    if n.name == RENAME and n.child.name == RENAME:
        # Located two nested renames.
-        changes = 1
+        prop = n.prop + ',' + n.child.prop
-        # Joining the attribute into one
+        child = n.child.child
-        n.prop += ',' + n.child.prop
+        n = parser.Unary(RENAME, prop, child)
        n.child = n.child.child
        # Creating a dictionary with the attributes
-        _vars = {}
+        renames = n.get_rename_prop()
-        for i in n.prop.split(','):
+
            q = i.split(ARROW)
            _vars[q[0].strip()] = q[1].strip()
        # Scans dictionary to locate things like "a->b,b->c" and replace them
        # with "a->c"
-        for key in list(_vars.keys()):
+        for key, value in tuple(renames.items()):
-            value = _vars.get(key)
+
-            if value in _vars.keys():
+            if value in renames:
-                if _vars[value] != key:
+                if renames[value] != key:
                    # Double rename on attribute
-                    _vars[key] = _vars[_vars[key]]  # Sets value
+                    renames[key] = renames[renames[key]]  # Sets value
-                    _vars.pop(value)  # Removes the unused one
+                    del renames[value]  # Removes the unused one
                else:  # Cycle rename a->b,b->a
-                    _vars.pop(value)  # Removes the unused one
+                    del renames[value] # Removes the unused one
-                    _vars.pop(key)  # Removes the unused one
+                    del renames[key] # Removes the unused one
-        if len(_vars) == 0:  # Nothing to rename, removing the rename op
+        if len(renames) == 0:  # Nothing to rename, removing the rename op
-            replace_node(n, n.child)
+            return n.child, 1
        else:
-            n.prop = ','.join('%s%s%s' % (i[0], ARROW, i[1]) for i in _vars.items())
+            n.set_rename_prop(renames)
            return n, 1
-    return changes + recoursive_scan(subsequent_renames, n)
+    return n, 0
 class level_string(str):
@@ -411,101 +329,84 @@ def tokenize_select(expression):
    return l
-def swap_rename_projection(n: parser.Node) -> int:
+def swap_rename_projection(n: parser.Node) -> Tuple[parser.Node, int]:
-    '''This function locates things like π k(ρ j(R))
+    '''This function locates things like
-    and replaces them with ρ j(π k(R)).
+    π k(ρ j(R))
    and replaces them with
    ρ j(π k(R)).
    This will let rename work on a hopefully smaller set
    and more important, will hopefully allow further optimizations.
-    Will also eliminate fields in the rename that are cutted in the projection.
+
    Will also eliminate fields in the rename that are cut in the projection.
    '''
    changes = 0
    if n.name == PROJECTION and n.child.name == RENAME:
        changes = 1
        # π index,name(ρ id➡index(R))
-        _vars = {}
+        renames = n.child.get_rename_prop()
-        for i in n.child.prop.split(','):
+        projections = set(n.get_projection_prop())
            q = i.split(ARROW)
            _vars[q[1].strip()] = q[0].strip()
-        _pr = n.prop.split(',')
+        # Use pre-rename names in the projection
-        for i in range(len(_pr)):
+        for k, v in renames.items():
-            try:
+            if v in projections:
-                _pr[i] = _vars[_pr[i].strip()]
+                projections.remove(v)
-            except:
+                projections.add(k)
                pass
-        _pr_reborn = n.prop.split(',')
+        # Eliminate fields
-        for i in list(_vars.keys()):
+        for i in list(renames.keys()):
-            if i not in _pr_reborn:
+            if i not in projections:
-                _vars.pop(i)
+                del renames[i]
        n.name = n.child.name
-        n.prop = ','.join('%s%s%s' % (i[1], ARROW, i[0]) for i in _vars.items())
+        child = parser.Unary(PROJECTION,'' , n.child.child)
        child.set_projection_prop(projections)
        n = parser.Unary(RENAME, '', child)
        n.set_rename_prop(renames)
        return n, 1
-        n.child.name = PROJECTION
+    return n, 0
        n.child.prop = ''
        for i in _pr:
            n.child.prop += i + ','
        n.child.prop = n.child.prop[:-1]
    return changes + recoursive_scan(swap_rename_projection, n)
 def swap_rename_select(n: parser.Node) -> int:
-    '''This function locates things like σ k(ρ j(R)) and replaces
+    '''This function locates things like
-    them with ρ j(σ k(R)). Renaming the attributes used in the
+    σ k(ρ j(R))
    and replaces them with
    ρ j(σ k(R)).
    Renaming the attributes used in the
    selection, so the operation is still valid.'''
    changes = 0
    if n.name == SELECTION and n.child.name == RENAME:
-        changes = 1
+        # This is an inverse mapping for the rename
-        # Dictionary containing attributes of rename
+        renames = {v: k for k, v in n.child.get_rename_prop().items()}
        _vars = {}
        for i in n.child.prop.split(','):
            q = i.split(ARROW)
            _vars[q[1].strip()] = q[0].strip()
        # tokenizes expression in select
-        _tokens = tokenize_select(n.prop)
+        tokens = tokenize_select(n.prop)
-        # Renaming stuff
+        # Renaming stuff, no enum because I edit the tokens
-        for i in range(len(_tokens)):
+        for i in range(len(tokens)):
-            splitted = _tokens[i].split('.', 1)
+            splitted = tokens[i].split('.', 1)
-            if splitted[0] in _vars:
+            if splitted[0] in renames:
-                if len(splitted) == 1:
+                tokens[i] = renames[splitted[0]]
-                    _tokens[i] = _vars[_tokens[i].split('.')[0]]
+                if len(splitted) > 1:
-                else:
+                    tokens[i] += '.' + splitted[1]
                    _tokens[i] = _vars[
                        _tokens[i].split('.')[0]] + '.' + splitted[1]
-        # Swapping operators
+        child = parser.Unary(SELECTION, ' '.join(tokens), n.child.child)
-        n.name = RENAME
+        return parser.Unary(RENAME, n.child.prop, child), 1
-        n.child.name = SELECTION
+    return n, 0
        n.prop = n.child.prop
        n.child.prop = ' '.join(_tokens)
    return changes + recoursive_scan(swap_rename_select, n)
 def select_union_intersect_subtract(n: parser.Node) -> int:
-    '''This function locates things like σ i(a) ᑌ σ q(a)
+    '''This function locates things like
-    and replaces them with σ (i OR q) (a)
+    σ i(a) ∪ σ q(a)
    and replaces them with
    σ (i OR q) (a)
    Removing a O(n²) operation like the union'''
    changes = 0
    if n.name in {UNION, INTERSECTION, DIFFERENCE} and \
                n.left.name == SELECTION and \
                n.right.name == SELECTION and \
                n.left.child == n.right.child:
        changes = 1
        d = {UNION: 'or', INTERSECTION: 'and', DIFFERENCE: 'and not'}
        op = d[n.name]
        newnode = parser.Node()
        if n.left.prop.startswith('(') or n.right.prop.startswith('('):
            t_str = '('
            if n.left.prop.startswith('('):
@@ -519,54 +420,34 @@ def select_union_intersect_subtract(n: parser.Node) -> int:
                t_str += '%s'
            t_str += ')'
-            newnode.prop = t_str % (n.left.prop, op, n.right.prop)
+            prop = t_str % (n.left.prop, op, n.right.prop)
        else:
-            newnode.prop = '%s %s %s' % (n.left.prop, op, n.right.prop)
+            prop = '%s %s %s' % (n.left.prop, op, n.right.prop)
-        newnode.name = SELECTION
+        return parser.Unary(SELECTION, prop, n.left.child), 1
-        newnode.child = n.left.child
+    return n, 0
        newnode.kind = parser.UNARY
        replace_node(n, newnode)
    return changes + recoursive_scan(select_union_intersect_subtract, n)
-def union_and_product(n: parser.Node) -> int:
+def union_and_product(n: parser.Node) -> Tuple[parser.Node, int]:
    '''
    A * B ∪ A * C = A * (B ∪ C)
    Same thing with inner join
    '''
    changes = 0
    if n.name == UNION and n.left.name in {PRODUCT, JOIN} and n.left.name == n.right.name:
        newnode = parser.Node()
        newnode.kind = parser.BINARY
        newnode.name = n.left.name
        newchild = parser.Node()
        newchild.kind = parser.BINARY
        newchild.name = UNION
        if n.left.left == n.right.left or n.left.left == n.right.right:
-            newnode.left = n.left.left
+            l = n.left.right
-            newnode.right = newchild
+            r = n.right.left if n.left.left == n.right.right else n.right.right
-
+            newchild = parser.Binary(UNION, l, r)
-            newchild.left = n.left.right
+            return parser.Binary(n.left.name, n.left.left, newchild), 1
            newchild.right = n.right.left if n.left.left == n.right.right else n.right.right
            replace_node(n, newnode)
            changes = 1
        elif n.left.right == n.right.left or n.left.left == n.right.right:
-            newnode.left = n.left.right
+            l = n.left.left
-            newnode.right = newchild
+            r = n.right.left if n.right.left == n.right.right else n.right.right
-
+            newchild = parser.Binary(UNION, l, r)
-            newchild.left = n.left.left
+            return parser.Binary(n.left.name, n.left.right, newchild), 1
-            newchild.right = n.right.left if n.right.left == n.right.right else n.right.right
+    return n, 0
            replace_node(n, newnode)
            changes = 1
    return changes + recoursive_scan(union_and_product, n)
-def projection_and_union(n, rels):
+def projection_and_union(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]:
    '''
    Turns
        π a,b,c(A) ∪ π a,b,c(B)
@@ -581,28 +462,16 @@ def projection_and_union(n, rels):
            n.left.name == PROJECTION and \
            n.right.name == PROJECTION and \
            set(n.left.child.result_format(rels)) == set(n.right.child.result_format(rels)):
        newchild = parser.Node()
-        newchild.kind = parser.BINARY
+        child = parser.Binary(UNION, n.left.child, n.right.child)
-        newchild.name = UNION
+        return parser.Unary(PROJECTION, n.right.prop, child), 0
-        newchild.left = n.left.child
+    return n, 0
        newchild.right = n.right.child
        newnode = parser.Node()
        newnode.child = newchild
        newnode.kind = parser.UNARY
        newnode.name = PROJECTION
        newnode.prop = n.right.prop
        replace_node(n, newnode)
        changes = 1
    return changes + recoursive_scan(projection_and_union, n, rels)
-def selection_and_product(n, rels):
+def selection_and_product(n: parser.Node, rels: Dict[str, Relation]) -> parser.Node:
    '''This function locates things like σ k (R*Q) and converts them into
    σ l (σ j (R) * σ i (Q)). Where j contains only attributes belonging to R,
    i contains attributes belonging to Q and l contains attributes belonging to both'''
    changes = 0
    if n.name == SELECTION and n.child.name in (PRODUCT, JOIN):
        l_attr = n.child.left.result_format(rels)
@@ -637,76 +506,71 @@ def selection_and_product(n, rels):
                if j in r_attr:  # Field in right
                    r_fields = True
-            if l_fields and r_fields:  # Fields in both
+            if l_fields and not r_fields:
                both.append(i)
            elif l_fields:
                left.append(i)
-            elif r_fields:
+            elif r_fields and not l_fields:
                right.append(i)
            else:  # Unknown.. adding in both
                both.append(i)
        # Preparing left selection
-        if len(left) > 0:
+        if left:
-            changes = 1
+            l_prop = ''
            l_node = parser.Node()
            l_node.name = SELECTION
            l_node.kind = parser.UNARY
            l_node.child = n.child.left
            l_node.prop = ''
            n.child.left = l_node
            while len(left) > 0:
                c = left.pop(0)
                for i in c:
-                    l_node.prop += i + ' '
+                    l_prop += i + ' '
                if len(left) > 0:
-                    l_node.prop += ' and '
+                    l_prop += ' and '
-            if '(' in l_node.prop:
+            if '(' in l_prop:
-                l_node.prop = '(%s)' % l_node.prop
+                l_prop = '(%s)' % l_prop
            l_node = parser.Unary(SELECTION, l_prop, n.child.left)
        else:
            l_node = n.child.left
        # Preparing right selection
-        if len(right) > 0:
+        if right:
-            changes = 1
+            r_prop = ''
            r_node = parser.Node()
            r_node.name = SELECTION
            r_node.prop = ''
            r_node.kind = parser.UNARY
            r_node.child = n.child.right
            n.child.right = r_node
            while len(right) > 0:
                c = right.pop(0)
-                r_node.prop += ' '.join(c)
+                r_prop += ' '.join(c)
                if len(right) > 0:
-                    r_node.prop += ' and '
+                    r_prop += ' and '
-            if '(' in r_node.prop:
+            if '(' in r_prop:
-                r_node.prop = '(%s)' % r_node.prop
+                r_prop = '(%s)' % r_prop
            r_node = parser.Unary(SELECTION, r_prop, n.child.right)
        else:
            r_node = n.child.right
        b_node = parser.Binary(n.child.name, l_node, r_node)
        # Changing main selection
-        n.prop = ''
+        if both:
-        if len(both) != 0:
+            both_prop = ''
            while len(both) > 0:
                c = both.pop(0)
-                n.prop += ' '.join(c)
+                both_prop += ' '.join(c)
                if len(both) > 0:
-                    n.prop += ' and '
+                    both_prop += ' and '
-            if '(' in n.prop:
+            if '(' in both_prop:
-                n.prop = '(%s)' % n.prop
+                both_prop = '(%s)' % both_prop
            r = parser.Unary(SELECTION, both_prop, b_node)
            return r, len(left) + len(right)
        else:  # No need for general select
-            replace_node(n, n.child)
+            return b_node, 1
-    return changes + recoursive_scan(selection_and_product, n, rels)
+    return n, 0
-def useless_projection(n, rels) -> int:
+def useless_projection(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]:
    '''
    Removes projections that are over all the fields
    '''
    changes = 0
    if n.name == PROJECTION and \
            set(n.child.result_format(rels)) == set(i.strip() for i in n.prop.split(',')):
-        changes = 1
+        return n.child, 1
        replace_node(n, n.child)
-    return changes + recoursive_scan(useless_projection, n, rels)
+    return n, 0
 general_optimizations = [
    duplicated_select,
@@ -714,6 +578,7 @@ general_optimizations = [
    duplicated_projection,
    selection_inside_projection,
    subsequent_renames,
    futile_renames,
    swap_rename_select,
    futile_union_intersection_subtraction,
    swap_union_renames,
@@ -726,6 +591,3 @@ specific_optimizations = [
    projection_and_union,
    useless_projection,
 ]
 if __name__ == "__main__":
    print (tokenize_select("skill == 'C' and  id % 2 == 0"))
--- a/relational/optimizer.py
+++ b/relational/optimizer.py
@@ -1,5 +1,5 @@
 # Relational
-# Copyright (C) 2008-2016  Salvo "LtWorf" Tomaselli
+# Copyright (C) 2008-2020  Salvo "LtWorf" Tomaselli
 #
 # Relational is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -22,23 +22,22 @@
 # relational query, or it can be a parse tree for a relational expression (ie: class parser.node).
 # The functions will always return a string with the optimized query, but if a parse tree was provided,
 # the parse tree itself will be modified accordingly.
-from typing import Union, Optional, Dict, Any
+from typing import Union, Optional, Dict, Any, Tuple
 from relational.relation import Relation
 from relational import optimizations
-from relational.parser import Node, RELATION, UNARY, BINARY, op_functions, tokenize, tree
+from relational.parser import Node, Variable, Unary, Binary, op_functions, tokenize, tree
 from relational import querysplit
 from relational.maintenance import UserInterface
 ContextDict = Dict[str,Any]
-
+def optimize_program(code, rels: Dict[str, Relation]):
 def optimize_program(code, rels: ContextDict):
    '''
    Optimize an entire program, composed by multiple expressions
    and assignments.
    '''
    lines = code.split('\n')
-    context = {} #  type: ContextDict
+    context = {}
    for line in  lines:
        line = line.strip()
@@ -53,7 +52,7 @@ def optimize_program(code, rels: ContextDict):
    return querysplit.split(node, rels)
-def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
+def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
    '''This function performs all the available optimizations.
    expression : see documentation of this module
@@ -82,23 +81,23 @@ def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool
        total = 0
        if specific:
            for i in optimizations.specific_optimizations:
-                res = i(n, rels)  # Performs the optimization
+                n, c = recursive_scan(i, n, rels)
-                if res != 0 and dbg:
+                if c != 0 and dbg:
                    debug.append(str(n))
-                total += res
+                total += c
        if general:
            for i in optimizations.general_optimizations:
-                res = i(n)  # Performs the optimization
+                n, c = recursive_scan(i, n, None)
-                if res != 0 and dbg:
+                if c != 0 and dbg:
                    debug.append(str(n))
-                total += res
+                total += c
    if tostr:
        return str(n)
    else:
        return n
-def specific_optimize(expression, rels: ContextDict):
+def specific_optimize(expression, rels: Dict[str, Relation]):
    '''This function performs specific optimizations. Means that it will need to
    know the fields used by the relations.
@@ -117,3 +116,35 @@ def general_optimize(expression):
    Return value: this will return an optimized version of the expression'''
    return optimize_all(expression, None, specific=False, general=True)
 def recursive_scan(function, node, rels) -> Tuple[Node, int]:
    '''Does a recursive optimization on the tree.
    This function will recursively execute the function given
    as "function" parameter starting from node to all the tree.
    if rels is provided it will be passed as argument to the function.
    Otherwise the function will be called just on the node.
    Result value: function is supposed to return the amount of changes
    it has performed on the tree.
    The various result will be added up and this final value will be the
    returned value.'''
    args = []
    if rels:
        args.append(rels)
    changes = 0
    node, c = function(node, *args)
    changes += c
    if isinstance(node, Unary):
        node.child, c = recursive_scan(function, node.child, rels)
        changes += c
    elif isinstance(node, Binary):
        node.left, c = recursive_scan(function, node.left, rels)
        changes += c
        node.right, c = recursive_scan(function, node.right, rels)
        changes += c
    return node, changes
--- a/relational/parser.py
+++ b/relational/parser.py
@@ -1,5 +1,5 @@
 # Relational
-# Copyright (C) 2008-2017  Salvo "LtWorf" Tomaselli
+# Copyright (C) 2008-2020 Salvo "LtWorf" Tomaselli
 #
 # Relational is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -24,14 +24,11 @@
 #
 # Language definition here:
 # http://ltworf.github.io/relational/grammar.html
-from typing import Optional, Union, List, Any
+from typing import Optional, Union, List, Any, Dict
 from dataclasses import dataclass
 from relational import rtypes
 RELATION = 0
 UNARY = 1
 BINARY = 2
 PRODUCT = '*'
 DIFFERENCE = '-'
 UNION = '∪'
@@ -84,9 +81,8 @@ class CallableString(str):
        '''
        return eval(self, context)
-
+@dataclass
 class Node:
    '''This class is a node of a relational expression. Leaves are relations
    and internal nodes are operations.
@@ -102,72 +98,12 @@ class Node:
    operation.
    This class is used to convert an expression into python code.'''
-    kind = None #  type: Optional[int]
+    name: str
    __hash__ = None #  type: None
-    def __init__(self, expression: Optional[list] = None) -> None:
+    def __init__(self, name: str) -> None:
-        '''Generates the tree from the tokenized expression
+        raise NotImplementedError('This is supposed to be an abstract class')
        If no expression is specified then it will create an empty node'''
        if expression is None or len(expression) == 0:
            return
-        # If the list contains only a list, it will consider the lower level list.
+    def toCode(self): #FIXME return type
        # This will allow things like ((((((a))))) to work
        while len(expression) == 1 and isinstance(expression[0], list):
            expression = expression[0]
        # The list contains only 1 string. Means it is the name of a relation
        if len(expression) == 1:
            self.kind = RELATION
            self.name = expression[0]
            if not rtypes.is_valid_relation_name(self.name):
                raise ParserException(
                    u"'%s' is not a valid relation name" % self.name)
            return
        # Expression from right to left, searching for binary operators
        # this means that binary operators have lesser priority than
        # unary operators.
        # It finds the operator with lesser priority, uses it as root of this
        # (sub)tree using everything on its left as left parameter (so building
        # a left subtree with the part of the list located on left) and doing
        # the same on right.
        # Since it searches for strings, and expressions into parenthesis are
        # within sub-lists, they won't be found here, ensuring that they will
        # have highest priority.
        for i in range(len(expression) - 1, -1, -1):
            if expression[i] in b_operators:  # Binary operator
                self.kind = BINARY
                self.name = expression[i]
                if len(expression[:i]) == 0:
                    raise ParserException(
                        u"Expected left operand for '%s'" % self.name)
                if len(expression[i + 1:]) == 0:
                    raise ParserException(
                        u"Expected right operand for '%s'" % self.name)
                self.left = node(expression[:i])
                self.right = node(expression[i + 1:])
                return
        '''Searches for unary operators, parsing from right to left'''
        for i in range(len(expression) - 1, -1, -1):
            if expression[i] in u_operators:  # Unary operator
                self.kind = UNARY
                self.name = expression[i]
                if len(expression) <= i + 2:
                    raise ParserException(
                        u"Expected more tokens in '%s'" % self.name)
                self.prop = expression[1 + i].strip()
                self.child = node(expression[2 + i])
                return
        raise ParserException("Expected operator in '%s'" % expression)
    def toCode(self):
        '''This method converts the AST into a python code object'''
        code = self._toPython()
        return compile(code, '<relational_expression>', 'eval')
@@ -181,25 +117,7 @@ class Node:
        return CallableString(self._toPython())
    def _toPython(self) -> str:
-        '''
+        raise NotImplementedError()
        Same as toPython but returns a regular string
        '''
        if self.name in b_operators:
            return '%s.%s(%s)' % (self.left.toPython(), op_functions[self.name], self.right.toPython())
        elif self.name in u_operators:
            prop = self.prop
            # Converting parameters
            if self.name == PROJECTION:
                prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
            elif self.name == RENAME:
                prop = '{\"%s\"}' % prop.replace(
                    ',', '\",\"').replace(ARROW, '\":\"').replace(' ', '')
            else:  # Selection
                prop = repr(prop)
            return '%s.%s(%s)' % (self.child.toPython(), op_functions[self.name], prop)
        return self.name
    def printtree(self, level: int = 0) -> str:
        '''returns a representation of the tree using indentation'''
@@ -216,27 +134,20 @@ class Node:
        return '\n' + r
    def get_left_leaf(self) -> 'Node':
-        '''This function returns the leftmost leaf in the tree.'''
+        raise NotImplementedError()
        if self.kind == RELATION:
            return self
        elif self.kind == UNARY:
            return self.child.get_left_leaf()
        elif self.kind == BINARY:
            return self.left.get_left_leaf()
        raise ValueError('What kind of alien object is this?')
-    def result_format(self, rels: dict) -> list:
+    def result_format(self, rels: dict) -> list: #FIXME types
        '''This function returns a list containing the fields that the resulting relation will have.
        It requires a dictionary where keys are the names of the relations and the values are
        the relation objects.'''
        if not isinstance(rels, dict):
            raise TypeError('Can\'t be of None type')
-        if self.kind == RELATION:
+        if isinstance(self, Variable):  #FIXME this is ugly
            return list(rels[self.name].header)
-        elif self.kind == BINARY and self.name in (DIFFERENCE, UNION, INTERSECTION):
+        elif isinstance(self, Binary) and self.name in (DIFFERENCE, UNION, INTERSECTION):
            return self.left.result_format(rels)
-        elif self.kind == BINARY and self.name == DIVISION:
+        elif isinstance(self, Binary) and self.name == DIVISION:
            return list(set(self.left.result_format(rels)) - set(self.right.result_format(rels)))
        elif self.name == PROJECTION:
            return [i.strip() for i in self.prop.split(',')]
@@ -259,7 +170,7 @@ class Node:
            return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))
        raise ValueError('What kind of alien object is this?')
-    def __eq__(self, other):
+    def __eq__(self, other): #FIXME
        if not (isinstance(other, node) and self.name == other.name and self.kind == other.kind):
            return False
@@ -271,22 +182,151 @@ class Node:
            return self.left == other.left and self.right == other.right
        return True
@dataclass
 class Variable(Node):
    def _toPython(self) -> str:
        return self.name
    def __str__(self):
-        if (self.kind == RELATION):
+        return self.name
-            return self.name
+
-        elif (self.kind == UNARY):
+    def get_left_leaf(self) -> Node:
-            return self.name + " " + self.prop + " (" + self.child.__str__() + ")"
+        return self
        elif (self.kind == BINARY):
            le = self.left.__str__()
            if self.right.kind != BINARY:
                re = self.right.__str__()
            else:
                re = "(" + self.right.__str__() + ")"
            return (le + self.name + re)
        raise ValueError('What kind of alien object is this?')
-def _find_matching_parenthesis(expression: str, start=0, openpar=u'(', closepar=u')') -> Optional[int]:
+@dataclass
 class Binary(Node):
    left: Node
    right: Node
    def get_left_leaf(self) -> Node:
        return self.left.get_left_leaf()
    def _toPython(self) -> str:
        return '%s.%s(%s)' % (self.left._toPython(), op_functions[self.name], self.right._toPython())
    def __str__(self):
        le = self.left.__str__()
        if isinstance(self.right, Binary):
            re = "(" + self.right.__str__() + ")"
        else:
            re = self.right.__str__()
        return (le + self.name + re) #TODO use fstrings
@dataclass
 class Unary(Node):
    prop: str
    child: Node
    def get_left_leaf(self) -> Node:
        return self.child.get_left_leaf()
    def __str__(self):
        return self.name + " " + self.prop + " (" + self.child.__str__() + ")" #TODO use fstrings
    def _toPython(self) -> str:
        prop = self.prop
        # Converting parameters
        if self.name == PROJECTION:
            prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
        elif self.name == RENAME:
            prop = repr(self.get_rename_prop())
        else:  # Selection
            prop = repr(prop)
        return '%s.%s(%s)' % (self.child._toPython(), op_functions[self.name], prop)
    def get_projection_prop(self) -> List[str]:
        if self.name != PROJECTION:
            raise ValueError('This is only supported on projection nodes')
        return [i.strip() for i in self.prop.split(',')]
    def set_projection_prop(self, p: List[str]) -> None:
        if self.name != PROJECTION:
            raise ValueError('This is only supported on projection nodes')
        self.prop = ','.join(p)
    def get_rename_prop(self) -> Dict[str, str]:
        '''
        Returns the dictionary that the rename operation wants
        '''
        if self.name != RENAME:
            raise ValueError('This is only supported on rename nodes')
        r = {}
        for i in self.prop.split(','):
            q = i.split(ARROW)
            r[q[0].strip()] = q[1].strip()
        return r
    def set_rename_prop(self, renames: Dict[str, str]) -> None:
        '''
        Sets the prop field based on the dictionary for renames
        '''
        if self.name != RENAME:
            raise ValueError('This is only supported on rename nodes')
        self.prop = ','.join(f'{k}{ARROW}{v}' for k, v in renames.items())
 def parse_tokens(expression: List[Union[list, str]]) -> Node:
    '''Generates the tree from the tokenized expression
    If no expression is specified then it will create an empty node'''
    # If the list contains only a list, it will consider the lower level list.
    # This will allow things like ((((((a))))) to work
    while len(expression) == 1 and isinstance(expression[0], list):
        expression = expression[0]
    # The list contains only 1 string. Means it is the name of a relation
    if len(expression) == 1:
        if not rtypes.is_valid_relation_name(expression[0]):
            raise ParserException(
                u"'%s' is not a valid relation name" % expression[0])
        return Variable(expression[0]) #FIXME Move validation in the object
    # Expression from right to left, searching for binary operators
    # this means that binary operators have lesser priority than
    # unary operators.
    # It finds the operator with lesser priority, uses it as root of this
    # (sub)tree using everything on its left as left parameter (so building
    # a left subtree with the part of the list located on left) and doing
    # the same on right.
    # Since it searches for strings, and expressions into parenthesis are
    # within sub-lists, they won't be found here, ensuring that they will
    # have highest priority.
    for i in range(len(expression) - 1, -1, -1):
        if expression[i] in b_operators:  # Binary operator
            if len(expression[:i]) == 0:
                raise ParserException(
                    u"Expected left operand for '%s'" % self.name)
            if len(expression[i + 1:]) == 0:
                raise ParserException(
                    u"Expected right operand for '%s'" % self.name)
            return Binary(expression[i], parse_tokens(expression[:i]), parse_tokens(expression[i + 1:]))
    '''Searches for unary operators, parsing from right to left'''
    for i in range(len(expression) - 1, -1, -1):
        if expression[i] in u_operators:  # Unary operator
            if len(expression) <= i + 2:
                raise ParserException(
                    u"Expected more tokens in '%s'" % self.name)
            return Unary(
                expression[i],
                prop=expression[1 + i].strip(),
                child=parse_tokens(expression[2 + i])
            )
    raise ParserException('Parse error') #FIXME more details
 def _find_matching_parenthesis(expression: str, start=0, openpar='(', closepar=')') -> Optional[int]:
    '''This function returns the position of the matching
    close parenthesis to the 1st open parenthesis found
    starting from start (0 by default)'''
@@ -391,7 +431,7 @@ def tokenize(expression: str) -> list:
 def tree(expression: str) -> Node:
    '''This function parses a relational algebra expression into a AST and returns
    the root node using the Node class.'''
-    return Node(tokenize(expression))
+    return parse_tokens(tokenize(expression))
 def parse(expr: str) -> CallableString:
@@ -400,11 +440,3 @@ def parse(expr: str) -> CallableString:
    Python expression.
    '''
    return tree(expr).toPython()
 if __name__ == "__main__":
    while True:
        e = input("Expression: ")
        print (parse(e))
 # Backwards compatibility
 node = Node
--- a/tests_dir/people_rename.query
+++ b/tests_dir/people_rename.query
@@ -0,0 +1 @@
 ρ name➡n,age➡a(σTrue(people)) ∪ ρ age➡a,name➡n(people)
--- a/tests_dir/people_rename.result
+++ b/tests_dir/people_rename.result
@@ -0,0 +1,9 @@
 id,n,chief,a
 0,jack,0,22
 1,carl,0,20
 2,john,1,30
 3,dean,1,33
 4,eve,0,25
 5,duncan,4,30
 6,paul,4,30
 7,alia,1,28
--- a/tests_dir/people_rename_select.query
+++ b/tests_dir/people_rename_select.query
@@ -0,0 +1 @@
 σ i%2==0 (ρ id➡i (people))
--- a/tests_dir/people_rename_select.result
+++ b/tests_dir/people_rename_select.result
@@ -0,0 +1,5 @@
 i,name,chief,age
 0,jack,0,22
 2,john,1,30
 4,eve,0,25
 6,paul,4,30
		`@@ -0,0 +1 @@`
							`ρ name➡n,age➡a(σTrue(people)) ∪ ρ age➡a,name➡n(people)`