Merge pull request #20 from ltworf/refactor_parser

Refactor parser
2020-06-09 23:49:11 +02:00
parent 2480c955ae d7145171de
commit 5d3823d0ea
8 changed files with 436 additions and 492 deletions
--- a/5
+++ b/5
@@ -1,4 +1,7 @@
-2.6
+3.0
+- Refactored parser to use better typing
+- Refactored and fixed some optimizations
+- Added more test cases
 - Improved survey sending
 - Prevent relation/field names from being reserved keywords
 - Fixed issue in cli where loading an invalid file would lead to a crash
--- a/relational/optimizations.py
+++ b/relational/optimizations.py
@@ -1,5 +1,5 @@
 # Relational
-# Copyright (C) 2009-2018  Salvo "LtWorf" Tomaselli
+# Copyright (C) 2009-2020  Salvo "LtWorf" Tomaselli
 #
 # Relational is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -30,8 +30,9 @@

 from io import StringIO
 from tokenize import generate_tokens
+from typing import Tuple, Dict

-
+from relational.relation import Relation
 from relational import parser

 sel_op = (
@@ -98,36 +99,7 @@ def replace_node(replace, replacement):
        replace.left = replacement.left


-def recoursive_scan(function, node, rels=None):
-    '''Does a recoursive optimization on the tree.
-
-    This function will recoursively execute the function given
-    as "function" parameter starting from node to all the tree.
-    if rels is provided it will be passed as argument to the function.
-    Otherwise the function will be called just on the node.
-
-    Result value: function is supposed to return the amount of changes
-    it has performed on the tree.
-    The various result will be added up and this final value will be the
-    returned value.'''
-    changes = 0
-    # recoursive scan
-    if node.kind == parser.UNARY:
-        if rels != None:
-            changes += function(node.child, rels)
-        else:
-            changes += function(node.child)
-    elif node.kind == parser.BINARY:
-        if rels != None:
-            changes += function(node.right, rels)
-            changes += function(node.left, rels)
-        else:
-            changes += function(node.right)
-            changes += function(node.left)
-    return changes
-
-
-def duplicated_select(n: parser.Node) -> int:
+def duplicated_select(n: parser.Node) -> Tuple[parser.Node, int]:
    '''This function locates and deletes things like
    σ a ( σ a(C)) and the ones like σ a ( σ b(C))
    replacing the 1st one with a single select and
@@ -135,243 +107,189 @@ def duplicated_select(n: parser.Node) -> int:
    in and
    '''
    changes = 0
-    if n.name == SELECTION and n.child.name == SELECTION:
+    while n.name == SELECTION and n.child.name == SELECTION:
+        changes += 1
+        prop = n.prop
+
        if n.prop != n.child.prop:  # Nested but different, joining them
-            n.prop = n.prop + " and " + n.child.prop
+            prop = n.prop + " and " + n.child.prop

            # This adds parenthesis if they are needed
            if n.child.prop.startswith('(') or n.prop.startswith('('):
-                n.prop = '(%s)' % n.prop
-
-        n.child = n.child.child
-        changes = 1
-        changes += duplicated_select(n)
-
-    return changes + recoursive_scan(duplicated_select, n)
+                prop = '(%s)' % prop
+        n = parser.Unary(
+            SELECTION,
+            prop,
+            n.child.child,
+        )
+    return n, changes


-def futile_union_intersection_subtraction(n: parser.Node) -> int:
-    '''This function locates things like r ᑌ r, and replaces them with r.
-    R ᑌ R  --> R
-    R ᑎ R --> R
+def futile_union_intersection_subtraction(n: parser.Node) -> Tuple[parser.Node, int]:
+    '''This function locates things like r ∪ r, and replaces them with r.
+    R ∪ R  --> R
+    R ∩ R --> R
    R - R --> σ False (R)
    σ k (R) - R --> σ False (R)
    R - σ k (R) --> σ not k (R)
-    σ k (R) ᑌ R --> R
-    σ k (R) ᑎ R --> σ k (R)
+    σ k (R) ∪ R --> R
+    σ k (R) ∩ R --> σ k (R)
    '''

    changes = 0

    # Union and intersection of the same thing
    if n.name in (UNION, INTERSECTION, JOIN, JOIN_LEFT, JOIN_RIGHT, JOIN_FULL) and n.left == n.right:
-        changes = 1
-        replace_node(n, n.left)
+        return n.left, 1

    # selection and union of the same thing
    elif (n.name == UNION):
        if n.left.name == SELECTION and n.left.child == n.right:
-            changes = 1
-            replace_node(n, n.right)
+            return n.right, 1
        elif n.right.name == SELECTION and n.right.child == n.left:
-            changes = 1
-            replace_node(n, n.left)
+            return n.left, 1

    # selection and intersection of the same thing
    elif n.name == INTERSECTION:
        if n.left.name == SELECTION and n.left.child == n.right:
-            changes = 1
-            replace_node(n, n.left)
+            return n.left, 1
        elif n.right.name == SELECTION and n.right.child == n.left:
-            changes = 1
-            replace_node(n, n.right)
+            return n.right, 1

    # Subtraction and selection of the same thing
    elif n.name == DIFFERENCE and \
            n.right.name == SELECTION and \
            n.right.child == n.left:
-        n.name = n.right.name
-        n.kind = n.right.kind
-        n.child = n.right.child
-        n.prop = '(not (%s))' % n.right.prop
-        n.left = n.right = None
+        return parser.Unary(
+            SELECTION,
+            '(not (%s))' % n.right.prop,
+            n.right.child), 1

    # Subtraction of the same thing or with selection on the left child
    elif n.name == DIFFERENCE and (n.left == n.right or (n.left.name == SELECTION and n.left.child == n.right)):
-        changes = 1
-        n.kind = parser.UNARY
-        n.name = SELECTION
-        n.prop = 'False'
-        n.child = n.left.get_left_leaf()
-        # n.left=n.right=None
-
-    return changes + recoursive_scan(futile_union_intersection_subtraction, n)
+        return parser.Unary(
+            SELECTION,
+            'False',
+            n.get_left_leaf()
+        ), 1
+    return n, 0


-def down_to_unions_subtractions_intersections(n: parser.Node) -> int:
-    '''This funcion locates things like σ i==2 (c ᑌ d), where the union
+def down_to_unions_subtractions_intersections(n: parser.Node) -> Tuple[parser.Node, int]:
+    '''This funcion locates things like σ i==2 (c ∪ d), where the union
    can be a subtraction and an intersection and replaces them with
-    σ i==2 (c) ᑌ σ i==2(d).
+    σ i==2 (c) ∪ σ i==2(d).
    '''
    changes = 0
    _o = (UNION, DIFFERENCE, INTERSECTION)
    if n.name == SELECTION and n.child.name in _o:
+        l = parser.Unary(SELECTION, n.prop, n.child.left)
+        r = parser.Unary(SELECTION, n.prop, n.child.right)

-        left = parser.Node()
-        left.prop = n.prop
-        left.name = n.name
-        left.child = n.child.left
-        left.kind = parser.UNARY
-        right = parser.Node()
-        right.prop = n.prop
-        right.name = n.name
-        right.child = n.child.right
-        right.kind = parser.UNARY
-
-        n.name = n.child.name
-        n.left = left
-        n.right = right
-        n.child = None
-        n.prop = None
-        n.kind = parser.BINARY
-        changes += 1
-
-    return changes + recoursive_scan(down_to_unions_subtractions_intersections, n)
+        return parser.Binary(n.child.name, l, r), 1
+    return n, 0


-def duplicated_projection(n: parser.Node) -> int:
+def duplicated_projection(n: parser.Node) -> Tuple[parser.Node, int]:
    '''This function locates thing like π i ( π j (R)) and replaces
    them with π i (R)'''
-    changes = 0

    if n.name == PROJECTION and n.child.name == PROJECTION:
-        n.child = n.child.child
-        changes += 1
-
-    return changes + recoursive_scan(duplicated_projection, n)
+        return parser.Unary(
+            PROJECTION,
+            n.prop,
+            n.child.child), 1
+    return n, 0


-def selection_inside_projection(n: parser.Node) -> int:
+def selection_inside_projection(n: parser.Node) -> Tuple[parser.Node, int]:
    '''This function locates things like  σ j (π k(R)) and
    converts them into π k(σ j (R))'''
-    changes = 0
-
    if n.name == SELECTION and n.child.name == PROJECTION:
-        changes = 1
-        temp = n.prop
-        n.prop = n.child.prop
-        n.child.prop = temp
-        n.name = PROJECTION
-        n.child.name = SELECTION
+        child = parser.Unary(
+            SELECTION,
+            n.prop,
+            n.child.child
+        )

-    return changes + recoursive_scan(selection_inside_projection, n)
+        return parser.Unary(PROJECTION, n.child.prop, child), 0
+    return n, 0


-def swap_union_renames(n: parser.Node) -> int:
+def swap_union_renames(n: parser.Node) -> Tuple[parser.Node, int]:
    '''This function locates things like
-    ρ a➡b(R) ᑌ ρ a➡b(Q)
+    ρ a➡b(R) ∪ ρ a➡b(Q)
    and replaces them with
-    ρ a➡b(R ᑌ Q).
+    ρ a➡b(R ∪ Q).
    Does the same with subtraction and intersection'''
-    changes = 0
-
-    if n.name in (DIFFERENCE, UNION, INTERSECTION) and n.left.name == n.right.name and n.left.name == RENAME:
-        l_vars = {}
-        for i in n.left.prop.split(','):
-            q = i.split(ARROW)
-            l_vars[q[0].strip()] = q[1].strip()
-
-        r_vars = {}
-        for i in n.right.prop.split(','):
-            q = i.split(ARROW)
-            r_vars[q[0].strip()] = q[1].strip()
-
+    if n.name in (DIFFERENCE, UNION, INTERSECTION) and n.left.name == RENAME and n.right.name == RENAME:
+        l_vars = n.left.get_rename_prop()
+        r_vars = n.right.get_rename_prop()
        if r_vars == l_vars:
-            changes = 1
-
-            # Copying self, but child will be child of renames
-            q = parser.Node()
-            q.name = n.name
-            q.kind = parser.BINARY
-            q.left = n.left.child
-            q.right = n.right.child
-
-            n.name = RENAME
-            n.kind = parser.UNARY
-            n.child = q
-            n.prop = n.left.prop
-            n.left = n.right = None
-
-    return changes + recoursive_scan(swap_union_renames, n)
+            child = parser.Binary(n.name, n.left.child, n.right.child)
+            return parser.Unary(RENAME, n.left.prop, child), 1
+    return n, 0


-def futile_renames(n: parser.Node) -> int:
-    '''This function purges renames like id->id'''
-    changes = 0
+def futile_renames(n: parser.Node) -> Tuple[parser.Node, int]:
+    '''This function purges renames like
+    ρ id->id,a->q (A)
+    into
+    ρ a->q (A)

+    or removes the operation entirely if they all get removed
+    '''
    if n.name == RENAME:
-        # Located two nested renames.
-        changes = 1
+        renames = n.get_rename_prop()
+        changes = False
+        for k, v in renames.items():
+            if k == v:
+                changes = True
+                del renames[k]
+        if len(renames) == 0: # Nothing to rename, removing the rename
+            return n.child, 1
+        elif changes:
+            # Changing the node in place, no need to return to cause a recursive step
+            n.set_rename_prop(renames)

-        # Creating a dictionary with the attributes
-        _vars = {}
-        for i in n.prop.split(','):
-            q = i.split(ARROW)
-            _vars[q[0].strip()] = q[1].strip()
-        # Scans dictionary to locate things like "a->b,b->c" and replace them
-        # with "a->c"
-        for key in list(_vars.keys()):
-            value = _vars.get(key)
-            if key == value:
-                _vars.pop(value)  # Removes the unused one
-
-        if len(_vars) == 0: # Nothing to rename, removing the rename op
-            replace_node(n, n.child)
-        else:
-            n.prop = ','.join('%s%s%s' % (i[0], ARROW, i[1]) for i in _vars.items())
-
-    return changes + recoursive_scan(futile_renames, n)
+    return n, 0


-def subsequent_renames(n: parser.Node) -> int:
-    '''This function removes redoundant subsequent renames joining them into one'''
-
-    '''Purges renames like id->id Since it's needed to be performed BEFORE this one
-    so it is not in the list with the other optimizations'''
-    futile_renames(n)
-    changes = 0
-
+def subsequent_renames(n: parser.Node) -> Tuple[parser.Node, int]:
+    '''This function removes redundant subsequent renames joining them into one
+    ρ .. ρ .. (A)
+    into
+    ρ ... (A)
+    '''
    if n.name == RENAME and n.child.name == RENAME:
        # Located two nested renames.
-        changes = 1
-        # Joining the attribute into one
-        n.prop += ',' + n.child.prop
-        n.child = n.child.child
+        prop = n.prop + ',' + n.child.prop
+        child = n.child.child
+        n = parser.Unary(RENAME, prop, child)

        # Creating a dictionary with the attributes
-        _vars = {}
-        for i in n.prop.split(','):
-            q = i.split(ARROW)
-            _vars[q[0].strip()] = q[1].strip()
+        renames = n.get_rename_prop()
+
        # Scans dictionary to locate things like "a->b,b->c" and replace them
        # with "a->c"
-        for key in list(_vars.keys()):
-            value = _vars.get(key)
-            if value in _vars.keys():
-                if _vars[value] != key:
+        for key, value in tuple(renames.items()):
+
+            if value in renames:
+                if renames[value] != key:
                    # Double rename on attribute
-                    _vars[key] = _vars[_vars[key]]  # Sets value
-                    _vars.pop(value)  # Removes the unused one
+                    renames[key] = renames[renames[key]]  # Sets value
+                    del renames[value]  # Removes the unused one
                else:  # Cycle rename a->b,b->a
-                    _vars.pop(value)  # Removes the unused one
-                    _vars.pop(key)  # Removes the unused one
+                    del renames[value] # Removes the unused one
+                    del renames[key] # Removes the unused one

-        if len(_vars) == 0:  # Nothing to rename, removing the rename op
-            replace_node(n, n.child)
+        if len(renames) == 0:  # Nothing to rename, removing the rename op
+            return n.child, 1
        else:
-            n.prop = ','.join('%s%s%s' % (i[0], ARROW, i[1]) for i in _vars.items())
+            n.set_rename_prop(renames)
+            return n, 1

-    return changes + recoursive_scan(subsequent_renames, n)
+    return n, 0


 class level_string(str):
@@ -411,101 +329,84 @@ def tokenize_select(expression):
    return l


-def swap_rename_projection(n: parser.Node) -> int:
-    '''This function locates things like π k(ρ j(R))
-    and replaces them with ρ j(π k(R)).
+def swap_rename_projection(n: parser.Node) -> Tuple[parser.Node, int]:
+    '''This function locates things like
+    π k(ρ j(R))
+    and replaces them with
+    ρ j(π k(R)).
    This will let rename work on a hopefully smaller set
    and more important, will hopefully allow further optimizations.
-    Will also eliminate fields in the rename that are cutted in the projection.
+
+    Will also eliminate fields in the rename that are cut in the projection.
    '''
-    changes = 0

    if n.name == PROJECTION and n.child.name == RENAME:
-        changes = 1
-
        # π index,name(ρ id➡index(R))
-        _vars = {}
-        for i in n.child.prop.split(','):
-            q = i.split(ARROW)
-            _vars[q[1].strip()] = q[0].strip()
+        renames = n.child.get_rename_prop()
+        projections = set(n.get_projection_prop())

-        _pr = n.prop.split(',')
-        for i in range(len(_pr)):
-            try:
-                _pr[i] = _vars[_pr[i].strip()]
-            except:
-                pass
+        # Use pre-rename names in the projection
+        for k, v in renames.items():
+            if v in projections:
+                projections.remove(v)
+                projections.add(k)

-        _pr_reborn = n.prop.split(',')
-        for i in list(_vars.keys()):
-            if i not in _pr_reborn:
-                _vars.pop(i)
-        n.name = n.child.name
+        # Eliminate fields
+        for i in list(renames.keys()):
+            if i not in projections:
+                del renames[i]

-        n.prop = ','.join('%s%s%s' % (i[1], ARROW, i[0]) for i in _vars.items())
+        child = parser.Unary(PROJECTION,'' , n.child.child)
+        child.set_projection_prop(projections)
+        n = parser.Unary(RENAME, '', child)
+        n.set_rename_prop(renames)
+        return n, 1

-        n.child.name = PROJECTION
-        n.child.prop = ''
-        for i in _pr:
-            n.child.prop += i + ','
-        n.child.prop = n.child.prop[:-1]
-
-    return changes + recoursive_scan(swap_rename_projection, n)
+    return n, 0


 def swap_rename_select(n: parser.Node) -> int:
-    '''This function locates things like σ k(ρ j(R)) and replaces
-    them with ρ j(σ k(R)). Renaming the attributes used in the
+    '''This function locates things like
+    σ k(ρ j(R))
+    and replaces them with
+    ρ j(σ k(R)).
+    Renaming the attributes used in the
    selection, so the operation is still valid.'''
-    changes = 0

    if n.name == SELECTION and n.child.name == RENAME:
-        changes = 1
-        # Dictionary containing attributes of rename
-        _vars = {}
-        for i in n.child.prop.split(','):
-            q = i.split(ARROW)
-            _vars[q[1].strip()] = q[0].strip()
+        # This is an inverse mapping for the rename
+        renames = {v: k for k, v in n.child.get_rename_prop().items()}

        # tokenizes expression in select
-        _tokens = tokenize_select(n.prop)
+        tokens = tokenize_select(n.prop)

-        # Renaming stuff
-        for i in range(len(_tokens)):
-            splitted = _tokens[i].split('.', 1)
-            if splitted[0] in _vars:
-                if len(splitted) == 1:
-                    _tokens[i] = _vars[_tokens[i].split('.')[0]]
-                else:
-                    _tokens[i] = _vars[
-                        _tokens[i].split('.')[0]] + '.' + splitted[1]
+        # Renaming stuff, no enum because I edit the tokens
+        for i in range(len(tokens)):
+            splitted = tokens[i].split('.', 1)
+            if splitted[0] in renames:
+                tokens[i] = renames[splitted[0]]
+                if len(splitted) > 1:
+                    tokens[i] += '.' + splitted[1]

-        # Swapping operators
-        n.name = RENAME
-        n.child.name = SELECTION
-
-        n.prop = n.child.prop
-        n.child.prop = ' '.join(_tokens)
-
-    return changes + recoursive_scan(swap_rename_select, n)
+        child = parser.Unary(SELECTION, ' '.join(tokens), n.child.child)
+        return parser.Unary(RENAME, n.child.prop, child), 1
+    return n, 0


 def select_union_intersect_subtract(n: parser.Node) -> int:
-    '''This function locates things like σ i(a) ᑌ σ q(a)
-    and replaces them with σ (i OR q) (a)
+    '''This function locates things like
+    σ i(a) ∪ σ q(a)
+    and replaces them with
+    σ (i OR q) (a)
    Removing a O(n²) operation like the union'''
-    changes = 0
    if n.name in {UNION, INTERSECTION, DIFFERENCE} and \
                n.left.name == SELECTION and \
                n.right.name == SELECTION and \
                n.left.child == n.right.child:
-        changes = 1

        d = {UNION: 'or', INTERSECTION: 'and', DIFFERENCE: 'and not'}
        op = d[n.name]

-        newnode = parser.Node()
-
        if n.left.prop.startswith('(') or n.right.prop.startswith('('):
            t_str = '('
            if n.left.prop.startswith('('):
@@ -519,54 +420,34 @@ def select_union_intersect_subtract(n: parser.Node) -> int:
                t_str += '%s'
            t_str += ')'

-            newnode.prop = t_str % (n.left.prop, op, n.right.prop)
+            prop = t_str % (n.left.prop, op, n.right.prop)
        else:
-            newnode.prop = '%s %s %s' % (n.left.prop, op, n.right.prop)
-        newnode.name = SELECTION
-        newnode.child = n.left.child
-        newnode.kind = parser.UNARY
-        replace_node(n, newnode)
-
-    return changes + recoursive_scan(select_union_intersect_subtract, n)
+            prop = '%s %s %s' % (n.left.prop, op, n.right.prop)
+        return parser.Unary(SELECTION, prop, n.left.child), 1
+    return n, 0


-def union_and_product(n: parser.Node) -> int:
+def union_and_product(n: parser.Node) -> Tuple[parser.Node, int]:
    '''
    A * B ∪ A * C = A * (B ∪ C)
    Same thing with inner join
    '''
-
-    changes = 0
    if n.name == UNION and n.left.name in {PRODUCT, JOIN} and n.left.name == n.right.name:

-        newnode = parser.Node()
-        newnode.kind = parser.BINARY
-        newnode.name = n.left.name
-
-        newchild = parser.Node()
-        newchild.kind = parser.BINARY
-        newchild.name = UNION
-
        if n.left.left == n.right.left or n.left.left == n.right.right:
-            newnode.left = n.left.left
-            newnode.right = newchild
-
-            newchild.left = n.left.right
-            newchild.right = n.right.left if n.left.left == n.right.right else n.right.right
-            replace_node(n, newnode)
-            changes = 1
+            l = n.left.right
+            r = n.right.left if n.left.left == n.right.right else n.right.right
+            newchild = parser.Binary(UNION, l, r)
+            return parser.Binary(n.left.name, n.left.left, newchild), 1
        elif n.left.right == n.right.left or n.left.left == n.right.right:
-            newnode.left = n.left.right
-            newnode.right = newchild
-
-            newchild.left = n.left.left
-            newchild.right = n.right.left if n.right.left == n.right.right else n.right.right
-            replace_node(n, newnode)
-            changes = 1
-    return changes + recoursive_scan(union_and_product, n)
+            l = n.left.left
+            r = n.right.left if n.right.left == n.right.right else n.right.right
+            newchild = parser.Binary(UNION, l, r)
+            return parser.Binary(n.left.name, n.left.right, newchild), 1
+    return n, 0


-def projection_and_union(n, rels):
+def projection_and_union(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]:
    '''
    Turns
        π a,b,c(A) ∪ π a,b,c(B)
@@ -581,28 +462,16 @@ def projection_and_union(n, rels):
            n.left.name == PROJECTION and \
            n.right.name == PROJECTION and \
            set(n.left.child.result_format(rels)) == set(n.right.child.result_format(rels)):
-        newchild = parser.Node()

-        newchild.kind = parser.BINARY
-        newchild.name = UNION
-        newchild.left = n.left.child
-        newchild.right = n.right.child
-
-        newnode = parser.Node()
-        newnode.child = newchild
-        newnode.kind = parser.UNARY
-        newnode.name = PROJECTION
-        newnode.prop = n.right.prop
-        replace_node(n, newnode)
-        changes = 1
-    return changes + recoursive_scan(projection_and_union, n, rels)
+        child = parser.Binary(UNION, n.left.child, n.right.child)
+        return parser.Unary(PROJECTION, n.right.prop, child), 0
+    return n, 0


-def selection_and_product(n, rels):
+def selection_and_product(n: parser.Node, rels: Dict[str, Relation]) -> parser.Node:
    '''This function locates things like σ k (R*Q) and converts them into
    σ l (σ j (R) * σ i (Q)). Where j contains only attributes belonging to R,
    i contains attributes belonging to Q and l contains attributes belonging to both'''
-    changes = 0

    if n.name == SELECTION and n.child.name in (PRODUCT, JOIN):
        l_attr = n.child.left.result_format(rels)
@@ -637,76 +506,71 @@ def selection_and_product(n, rels):
                if j in r_attr:  # Field in right
                    r_fields = True

-            if l_fields and r_fields:  # Fields in both
-                both.append(i)
-            elif l_fields:
+            if l_fields and not r_fields:
                left.append(i)
-            elif r_fields:
+            elif r_fields and not l_fields:
                right.append(i)
            else:  # Unknown.. adding in both
                both.append(i)

        # Preparing left selection
-        if len(left) > 0:
-            changes = 1
-            l_node = parser.Node()
-            l_node.name = SELECTION
-            l_node.kind = parser.UNARY
-            l_node.child = n.child.left
-            l_node.prop = ''
-            n.child.left = l_node
+        if left:
+            l_prop = ''
            while len(left) > 0:
                c = left.pop(0)
                for i in c:
-                    l_node.prop += i + ' '
+                    l_prop += i + ' '
                if len(left) > 0:
-                    l_node.prop += ' and '
-            if '(' in l_node.prop:
-                l_node.prop = '(%s)' % l_node.prop
+                    l_prop += ' and '
+            if '(' in l_prop:
+                l_prop = '(%s)' % l_prop
+            l_node = parser.Unary(SELECTION, l_prop, n.child.left)
+        else:
+            l_node = n.child.left

        # Preparing right selection
-        if len(right) > 0:
-            changes = 1
-            r_node = parser.Node()
-            r_node.name = SELECTION
-            r_node.prop = ''
-            r_node.kind = parser.UNARY
-            r_node.child = n.child.right
-            n.child.right = r_node
+        if right:
+            r_prop = ''
            while len(right) > 0:
                c = right.pop(0)
-                r_node.prop += ' '.join(c)
+                r_prop += ' '.join(c)
                if len(right) > 0:
-                    r_node.prop += ' and '
-            if '(' in r_node.prop:
-                r_node.prop = '(%s)' % r_node.prop
+                    r_prop += ' and '
+            if '(' in r_prop:
+                r_prop = '(%s)' % r_prop
+            r_node = parser.Unary(SELECTION, r_prop, n.child.right)
+        else:
+            r_node = n.child.right
+
+        b_node = parser.Binary(n.child.name, l_node, r_node)
+
        # Changing main selection
-        n.prop = ''
-        if len(both) != 0:
+        if both:
+            both_prop = ''
            while len(both) > 0:
                c = both.pop(0)
-                n.prop += ' '.join(c)
+                both_prop += ' '.join(c)
                if len(both) > 0:
-                    n.prop += ' and '
-            if '(' in n.prop:
-                n.prop = '(%s)' % n.prop
+                    both_prop += ' and '
+            if '(' in both_prop:
+                both_prop = '(%s)' % both_prop
+            r = parser.Unary(SELECTION, both_prop, b_node)
+            return r, len(left) + len(right)
        else:  # No need for general select
-            replace_node(n, n.child)
+            return b_node, 1

-    return changes + recoursive_scan(selection_and_product, n, rels)
+    return n, 0


-def useless_projection(n, rels) -> int:
+def useless_projection(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]:
    '''
    Removes projections that are over all the fields
    '''
-    changes = 0
    if n.name == PROJECTION and \
            set(n.child.result_format(rels)) == set(i.strip() for i in n.prop.split(',')):
-        changes = 1
-        replace_node(n, n.child)
+        return n.child, 1

-    return changes + recoursive_scan(useless_projection, n, rels)
+    return n, 0

 general_optimizations = [
    duplicated_select,
@@ -714,6 +578,7 @@ general_optimizations = [
    duplicated_projection,
    selection_inside_projection,
    subsequent_renames,
+    futile_renames,
    swap_rename_select,
    futile_union_intersection_subtraction,
    swap_union_renames,
@@ -726,6 +591,3 @@ specific_optimizations = [
    projection_and_union,
    useless_projection,
 ]
-
-if __name__ == "__main__":
-    print (tokenize_select("skill == 'C' and  id % 2 == 0"))
--- a/relational/optimizer.py
+++ b/relational/optimizer.py
@@ -1,5 +1,5 @@
 # Relational
-# Copyright (C) 2008-2016  Salvo "LtWorf" Tomaselli
+# Copyright (C) 2008-2020  Salvo "LtWorf" Tomaselli
 #
 # Relational is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -22,23 +22,22 @@
 # relational query, or it can be a parse tree for a relational expression (ie: class parser.node).
 # The functions will always return a string with the optimized query, but if a parse tree was provided,
 # the parse tree itself will be modified accordingly.
-from typing import Union, Optional, Dict, Any
+from typing import Union, Optional, Dict, Any, Tuple

+from relational.relation import Relation
 from relational import optimizations
-from relational.parser import Node, RELATION, UNARY, BINARY, op_functions, tokenize, tree
+from relational.parser import Node, Variable, Unary, Binary, op_functions, tokenize, tree
 from relational import querysplit
 from relational.maintenance import UserInterface

-ContextDict = Dict[str,Any]

-
-def optimize_program(code, rels: ContextDict):
+def optimize_program(code, rels: Dict[str, Relation]):
    '''
    Optimize an entire program, composed by multiple expressions
    and assignments.
    '''
    lines = code.split('\n')
-    context = {} #  type: ContextDict
+    context = {}

    for line in  lines:
        line = line.strip()
@@ -53,7 +52,7 @@ def optimize_program(code, rels: ContextDict):
    return querysplit.split(node, rels)


-def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
+def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
    '''This function performs all the available optimizations.

    expression : see documentation of this module
@@ -82,23 +81,23 @@ def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool
        total = 0
        if specific:
            for i in optimizations.specific_optimizations:
-                res = i(n, rels)  # Performs the optimization
-                if res != 0 and dbg:
+                n, c = recursive_scan(i, n, rels)
+                if c != 0 and dbg:
                    debug.append(str(n))
-                total += res
+                total += c
        if general:
            for i in optimizations.general_optimizations:
-                res = i(n)  # Performs the optimization
-                if res != 0 and dbg:
+                n, c = recursive_scan(i, n, None)
+                if c != 0 and dbg:
                    debug.append(str(n))
-                total += res
+                total += c
    if tostr:
        return str(n)
    else:
        return n


-def specific_optimize(expression, rels: ContextDict):
+def specific_optimize(expression, rels: Dict[str, Relation]):
    '''This function performs specific optimizations. Means that it will need to
    know the fields used by the relations.

@@ -117,3 +116,35 @@ def general_optimize(expression):

    Return value: this will return an optimized version of the expression'''
    return optimize_all(expression, None, specific=False, general=True)
+
+
+def recursive_scan(function, node, rels) -> Tuple[Node, int]:
+    '''Does a recursive optimization on the tree.
+
+    This function will recursively execute the function given
+    as "function" parameter starting from node to all the tree.
+    if rels is provided it will be passed as argument to the function.
+    Otherwise the function will be called just on the node.
+
+    Result value: function is supposed to return the amount of changes
+    it has performed on the tree.
+    The various result will be added up and this final value will be the
+    returned value.'''
+
+    args = []
+    if rels:
+        args.append(rels)
+
+    changes = 0
+    node, c = function(node, *args)
+    changes += c
+
+    if isinstance(node, Unary):
+        node.child, c = recursive_scan(function, node.child, rels)
+        changes += c
+    elif isinstance(node, Binary):
+        node.left, c = recursive_scan(function, node.left, rels)
+        changes += c
+        node.right, c = recursive_scan(function, node.right, rels)
+        changes += c
+    return node, changes
--- a/relational/parser.py
+++ b/relational/parser.py
@@ -1,5 +1,5 @@
 # Relational
-# Copyright (C) 2008-2017  Salvo "LtWorf" Tomaselli
+# Copyright (C) 2008-2020 Salvo "LtWorf" Tomaselli
 #
 # Relational is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -24,14 +24,11 @@
 #
 # Language definition here:
 # http://ltworf.github.io/relational/grammar.html
-from typing import Optional, Union, List, Any
+from typing import Optional, Union, List, Any, Dict
+from dataclasses import dataclass

 from relational import rtypes

-RELATION = 0
-UNARY = 1
-BINARY = 2
-
 PRODUCT = '*'
 DIFFERENCE = '-'
 UNION = '∪'
@@ -84,9 +81,8 @@ class CallableString(str):
        '''
        return eval(self, context)

-
+@dataclass
 class Node:
-
    '''This class is a node of a relational expression. Leaves are relations
    and internal nodes are operations.

@@ -102,72 +98,12 @@ class Node:
    operation.

    This class is used to convert an expression into python code.'''
-    kind = None #  type: Optional[int]
-    __hash__ = None #  type: None
+    name: str

-    def __init__(self, expression: Optional[list] = None) -> None:
-        '''Generates the tree from the tokenized expression
-        If no expression is specified then it will create an empty node'''
-        if expression is None or len(expression) == 0:
-            return
+    def __init__(self, name: str) -> None:
+        raise NotImplementedError('This is supposed to be an abstract class')

-        # If the list contains only a list, it will consider the lower level list.
-        # This will allow things like ((((((a))))) to work
-        while len(expression) == 1 and isinstance(expression[0], list):
-            expression = expression[0]
-
-        # The list contains only 1 string. Means it is the name of a relation
-        if len(expression) == 1:
-            self.kind = RELATION
-            self.name = expression[0]
-            if not rtypes.is_valid_relation_name(self.name):
-                raise ParserException(
-                    u"'%s' is not a valid relation name" % self.name)
-            return
-
-        # Expression from right to left, searching for binary operators
-        # this means that binary operators have lesser priority than
-        # unary operators.
-        # It finds the operator with lesser priority, uses it as root of this
-        # (sub)tree using everything on its left as left parameter (so building
-        # a left subtree with the part of the list located on left) and doing
-        # the same on right.
-        # Since it searches for strings, and expressions into parenthesis are
-        # within sub-lists, they won't be found here, ensuring that they will
-        # have highest priority.
-        for i in range(len(expression) - 1, -1, -1):
-            if expression[i] in b_operators:  # Binary operator
-                self.kind = BINARY
-                self.name = expression[i]
-
-                if len(expression[:i]) == 0:
-                    raise ParserException(
-                        u"Expected left operand for '%s'" % self.name)
-
-                if len(expression[i + 1:]) == 0:
-                    raise ParserException(
-                        u"Expected right operand for '%s'" % self.name)
-
-                self.left = node(expression[:i])
-                self.right = node(expression[i + 1:])
-                return
-        '''Searches for unary operators, parsing from right to left'''
-        for i in range(len(expression) - 1, -1, -1):
-            if expression[i] in u_operators:  # Unary operator
-                self.kind = UNARY
-                self.name = expression[i]
-
-                if len(expression) <= i + 2:
-                    raise ParserException(
-                        u"Expected more tokens in '%s'" % self.name)
-
-                self.prop = expression[1 + i].strip()
-                self.child = node(expression[2 + i])
-
-                return
-        raise ParserException("Expected operator in '%s'" % expression)
-
-    def toCode(self):
+    def toCode(self): #FIXME return type
        '''This method converts the AST into a python code object'''
        code = self._toPython()
        return compile(code, '<relational_expression>', 'eval')
@@ -181,25 +117,7 @@ class Node:
        return CallableString(self._toPython())

    def _toPython(self) -> str:
-        '''
-        Same as toPython but returns a regular string
-        '''
-        if self.name in b_operators:
-            return '%s.%s(%s)' % (self.left.toPython(), op_functions[self.name], self.right.toPython())
-        elif self.name in u_operators:
-            prop = self.prop
-
-            # Converting parameters
-            if self.name == PROJECTION:
-                prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
-            elif self.name == RENAME:
-                prop = '{\"%s\"}' % prop.replace(
-                    ',', '\",\"').replace(ARROW, '\":\"').replace(' ', '')
-            else:  # Selection
-                prop = repr(prop)
-
-            return '%s.%s(%s)' % (self.child.toPython(), op_functions[self.name], prop)
-        return self.name
+        raise NotImplementedError()

    def printtree(self, level: int = 0) -> str:
        '''returns a representation of the tree using indentation'''
@@ -216,27 +134,20 @@ class Node:
        return '\n' + r

    def get_left_leaf(self) -> 'Node':
-        '''This function returns the leftmost leaf in the tree.'''
-        if self.kind == RELATION:
-            return self
-        elif self.kind == UNARY:
-            return self.child.get_left_leaf()
-        elif self.kind == BINARY:
-            return self.left.get_left_leaf()
-        raise ValueError('What kind of alien object is this?')
+        raise NotImplementedError()

-    def result_format(self, rels: dict) -> list:
+    def result_format(self, rels: dict) -> list: #FIXME types
        '''This function returns a list containing the fields that the resulting relation will have.
        It requires a dictionary where keys are the names of the relations and the values are
        the relation objects.'''
        if not isinstance(rels, dict):
            raise TypeError('Can\'t be of None type')

-        if self.kind == RELATION:
+        if isinstance(self, Variable):  #FIXME this is ugly
            return list(rels[self.name].header)
-        elif self.kind == BINARY and self.name in (DIFFERENCE, UNION, INTERSECTION):
+        elif isinstance(self, Binary) and self.name in (DIFFERENCE, UNION, INTERSECTION):
            return self.left.result_format(rels)
-        elif self.kind == BINARY and self.name == DIVISION:
+        elif isinstance(self, Binary) and self.name == DIVISION:
            return list(set(self.left.result_format(rels)) - set(self.right.result_format(rels)))
        elif self.name == PROJECTION:
            return [i.strip() for i in self.prop.split(',')]
@@ -259,7 +170,7 @@ class Node:
            return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))
        raise ValueError('What kind of alien object is this?')

-    def __eq__(self, other):
+    def __eq__(self, other): #FIXME
        if not (isinstance(other, node) and self.name == other.name and self.kind == other.kind):
            return False

@@ -271,22 +182,151 @@ class Node:
            return self.left == other.left and self.right == other.right
        return True

+
+@dataclass
+class Variable(Node):
+    def _toPython(self) -> str:
+        return self.name
+
    def __str__(self):
-        if (self.kind == RELATION):
-            return self.name
-        elif (self.kind == UNARY):
-            return self.name + " " + self.prop + " (" + self.child.__str__() + ")"
-        elif (self.kind == BINARY):
-            le = self.left.__str__()
-            if self.right.kind != BINARY:
-                re = self.right.__str__()
-            else:
-                re = "(" + self.right.__str__() + ")"
-            return (le + self.name + re)
-        raise ValueError('What kind of alien object is this?')
+        return self.name
+
+    def get_left_leaf(self) -> Node:
+        return self


-def _find_matching_parenthesis(expression: str, start=0, openpar=u'(', closepar=u')') -> Optional[int]:
+@dataclass
+class Binary(Node):
+    left: Node
+    right: Node
+
+    def get_left_leaf(self) -> Node:
+        return self.left.get_left_leaf()
+
+    def _toPython(self) -> str:
+        return '%s.%s(%s)' % (self.left._toPython(), op_functions[self.name], self.right._toPython())
+
+    def __str__(self):
+        le = self.left.__str__()
+        if isinstance(self.right, Binary):
+            re = "(" + self.right.__str__() + ")"
+        else:
+            re = self.right.__str__()
+        return (le + self.name + re) #TODO use fstrings
+
+
+@dataclass
+class Unary(Node):
+    prop: str
+    child: Node
+
+    def get_left_leaf(self) -> Node:
+        return self.child.get_left_leaf()
+
+    def __str__(self):
+        return self.name + " " + self.prop + " (" + self.child.__str__() + ")" #TODO use fstrings
+
+    def _toPython(self) -> str:
+        prop = self.prop
+
+        # Converting parameters
+        if self.name == PROJECTION:
+            prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
+        elif self.name == RENAME:
+            prop = repr(self.get_rename_prop())
+        else:  # Selection
+            prop = repr(prop)
+
+        return '%s.%s(%s)' % (self.child._toPython(), op_functions[self.name], prop)
+
+    def get_projection_prop(self) -> List[str]:
+        if self.name != PROJECTION:
+            raise ValueError('This is only supported on projection nodes')
+        return [i.strip() for i in self.prop.split(',')]
+
+    def set_projection_prop(self, p: List[str]) -> None:
+        if self.name != PROJECTION:
+            raise ValueError('This is only supported on projection nodes')
+        self.prop = ','.join(p)
+
+    def get_rename_prop(self) -> Dict[str, str]:
+        '''
+        Returns the dictionary that the rename operation wants
+        '''
+        if self.name != RENAME:
+            raise ValueError('This is only supported on rename nodes')
+        r = {}
+        for i in self.prop.split(','):
+            q = i.split(ARROW)
+            r[q[0].strip()] = q[1].strip()
+        return r
+
+    def set_rename_prop(self, renames: Dict[str, str]) -> None:
+        '''
+        Sets the prop field based on the dictionary for renames
+        '''
+        if self.name != RENAME:
+            raise ValueError('This is only supported on rename nodes')
+        self.prop = ','.join(f'{k}{ARROW}{v}' for k, v in renames.items())
+
+
+
+
+def parse_tokens(expression: List[Union[list, str]]) -> Node:
+    '''Generates the tree from the tokenized expression
+    If no expression is specified then it will create an empty node'''
+
+    # If the list contains only a list, it will consider the lower level list.
+    # This will allow things like ((((((a))))) to work
+    while len(expression) == 1 and isinstance(expression[0], list):
+        expression = expression[0]
+
+    # The list contains only 1 string. Means it is the name of a relation
+    if len(expression) == 1:
+
+        if not rtypes.is_valid_relation_name(expression[0]):
+            raise ParserException(
+                u"'%s' is not a valid relation name" % expression[0])
+        return Variable(expression[0]) #FIXME Move validation in the object
+
+    # Expression from right to left, searching for binary operators
+    # this means that binary operators have lesser priority than
+    # unary operators.
+    # It finds the operator with lesser priority, uses it as root of this
+    # (sub)tree using everything on its left as left parameter (so building
+    # a left subtree with the part of the list located on left) and doing
+    # the same on right.
+    # Since it searches for strings, and expressions into parenthesis are
+    # within sub-lists, they won't be found here, ensuring that they will
+    # have highest priority.
+    for i in range(len(expression) - 1, -1, -1):
+        if expression[i] in b_operators:  # Binary operator
+
+
+            if len(expression[:i]) == 0:
+                raise ParserException(
+                    u"Expected left operand for '%s'" % self.name)
+
+            if len(expression[i + 1:]) == 0:
+                raise ParserException(
+                    u"Expected right operand for '%s'" % self.name)
+            return Binary(expression[i], parse_tokens(expression[:i]), parse_tokens(expression[i + 1:]))
+    '''Searches for unary operators, parsing from right to left'''
+    for i in range(len(expression) - 1, -1, -1):
+        if expression[i] in u_operators:  # Unary operator
+            if len(expression) <= i + 2:
+                raise ParserException(
+                    u"Expected more tokens in '%s'" % self.name)
+
+            return Unary(
+                expression[i],
+                prop=expression[1 + i].strip(),
+                child=parse_tokens(expression[2 + i])
+            )
+    raise ParserException('Parse error') #FIXME more details
+
+
+def _find_matching_parenthesis(expression: str, start=0, openpar='(', closepar=')') -> Optional[int]:
    '''This function returns the position of the matching
    close parenthesis to the 1st open parenthesis found
    starting from start (0 by default)'''
@@ -391,7 +431,7 @@ def tokenize(expression: str) -> list:
 def tree(expression: str) -> Node:
    '''This function parses a relational algebra expression into a AST and returns
    the root node using the Node class.'''
-    return Node(tokenize(expression))
+    return parse_tokens(tokenize(expression))


 def parse(expr: str) -> CallableString:
@@ -400,11 +440,3 @@ def parse(expr: str) -> CallableString:
    Python expression.
    '''
    return tree(expr).toPython()
-
-if __name__ == "__main__":
-    while True:
-        e = input("Expression: ")
-        print (parse(e))
-
-# Backwards compatibility
-node = Node
--- a/tests_dir/people_rename.query
+++ b/tests_dir/people_rename.query
@@ -0,0 +1 @@
+ρ name➡n,age➡a(σTrue(people)) ∪ ρ age➡a,name➡n(people)
--- a/tests_dir/people_rename.result
+++ b/tests_dir/people_rename.result
@@ -0,0 +1,9 @@
+id,n,chief,a
+0,jack,0,22
+1,carl,0,20
+2,john,1,30
+3,dean,1,33
+4,eve,0,25
+5,duncan,4,30
+6,paul,4,30
+7,alia,1,28
--- a/tests_dir/people_rename_select.query
+++ b/tests_dir/people_rename_select.query
@@ -0,0 +1 @@
+σ i%2==0 (ρ id➡i (people))
--- a/tests_dir/people_rename_select.result
+++ b/tests_dir/people_rename_select.result
@@ -0,0 +1,5 @@
+i,name,chief,age
+0,jack,0,22
+2,john,1,30
+4,eve,0,25
+6,paul,4,30
				`@@ -0,0 +1 @@`
				`ρ name➡n,age➡a(σTrue(people)) ∪ ρ age➡a,name➡n(people)`