Merge pull request #35 from ltworf/multiline_optimization

Fix and test multiline optimizer
2020-08-26 17:39:53 +02:00
parent 8755236f94 9d6402b48c
commit ea22350463
3 changed files with 68 additions and 51 deletions
--- a/relational/optimizer.py
+++ b/relational/optimizer.py
@@ -31,28 +31,51 @@ from relational import querysplit
 from relational.maintenance import UserInterface


-def optimize_program(code, rels: Dict[str, Relation]):
+def optimize_program(code: str, rels: Dict[str, Relation]) -> str:
    '''
    Optimize an entire program, composed by multiple expressions
    and assignments.
    '''
-    raise NotImplementedError()
    lines = code.split('\n')
-    context = {}
+    context: Dict[str, Node] = {}

    for line in  lines:
+        # skip comments or empty lines
        line = line.strip()
        if line.startswith(';') or not line:
            continue
+
+
        res, query = UserInterface.split_query(line)
        last_res = res
        parsed = tree(query)
-        optimizations.replace_leaves(parsed, context)
+        _replace_leaves(parsed, context)
        context[res] = parsed
    node = optimize_all(context[last_res], rels, tostr=False)
    return querysplit.split(node, rels)


+def _replace_leaves(node: Node, context: Dict[str, Node]) -> None:
+    '''
+    If a name appearing in node appears
+    also in context, the parse tree is
+    modified to replace the node with the
+    subtree found in context.
+    '''
+    if isinstance(node, Unary):
+        _replace_leaves(node.child, context)
+
+        if isinstance(node.child, Variable) and node.child.name in context:
+            node.child = context[node.child.name]
+    elif isinstance(node, Binary):
+        _replace_leaves(node.left, context)
+        _replace_leaves(node.right, context)
+        if isinstance(node.left, Variable) and node.left.name in context:
+            node.left = context[node.left.name]
+        if isinstance(node.right, Variable) and node.right.name in context:
+            node.right = context[node.right.name]
+
+
 def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
    '''This function performs all the available optimizations.

@@ -70,20 +93,20 @@ def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specif
    elif isinstance(expression, Node):
        n = expression
    else:
-        raise (TypeError("expression must be a string or a node"))
+        raise TypeError('expression must be a string or a node')

    total = 1
    while total != 0:
        total = 0
        if specific:
            for i in optimizations.specific_optimizations:
-                n, c = recursive_scan(i, n, rels)
+                n, c = _recursive_scan(i, n, rels)
                if c != 0 and isinstance(debug, list):
                    debug.append(str(n))
                total += c
        if general:
            for j in optimizations.general_optimizations:
-                n, c = recursive_scan(j, n, None)
+                n, c = _recursive_scan(j, n, None)
                if c != 0 and isinstance(debug, list):
                    debug.append(str(n))
                total += c
@@ -93,28 +116,7 @@ def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specif
        return n


-def specific_optimize(expression, rels: Dict[str, Relation]):
-    '''This function performs specific optimizations. Means that it will need to
-    know the fields used by the relations.
-
-    expression : see documentation of this module
-    rels: dic with relation name as key, and relation istance as value
-
-    Return value: this will return an optimized version of the expression'''
-    return optimize_all(expression, rels, specific=True, general=False)
-
-
-def general_optimize(expression):
-    '''This function performs general optimizations. Means that it will not need to
-    know the fields used by the relations
-
-    expression : see documentation of this module
-
-    Return value: this will return an optimized version of the expression'''
-    return optimize_all(expression, None, specific=False, general=True)
-
-
-def recursive_scan(function, node, rels) -> Tuple[Node, int]:
+def _recursive_scan(function, node: Node, rels: Optional[Dict[str, Any]]) -> Tuple[Node, int]:
    '''Does a recursive optimization on the tree.

    This function will recursively execute the function given
@@ -128,7 +130,7 @@ def recursive_scan(function, node, rels) -> Tuple[Node, int]:
    returned value.'''

    args = []
-    if rels:
+    if rels is not None:
        args.append(rels)

    changes = 0
@@ -136,11 +138,11 @@ def recursive_scan(function, node, rels) -> Tuple[Node, int]:
    changes += c

    if isinstance(node, Unary):
-        node.child, c = recursive_scan(function, node.child, rels)
+        node.child, c = _recursive_scan(function, node.child, rels)
        changes += c
    elif isinstance(node, Binary):
-        node.left, c = recursive_scan(function, node.left, rels)
+        node.left, c = _recursive_scan(function, node.left, rels)
        changes += c
-        node.right, c = recursive_scan(function, node.right, rels)
+        node.right, c = _recursive_scan(function, node.right, rels)
        changes += c
    return node, changes
--- a/relational/querysplit.py
+++ b/relational/querysplit.py
@@ -1,5 +1,5 @@
 # Relational
-# Copyright (C) 2016  Salvo "LtWorf" Tomaselli
+# Copyright (C) 2016-2020  Salvo "LtWorf" Tomaselli
 #
 # Relational is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -18,15 +18,19 @@
 #
 # This module splits a query into a program.

+from typing import List, Dict, Tuple

-from relational import parser
+from relational.parser import Node, Binary, Unary, Variable
+
+
+__all__ = ['split']


 class Program:
-    def __init__(self, rels):
-        self.queries = []
-        self.dictionary = {} # Key is the query, value is the relation
-        self.vgen = vargen(rels, 'optm_')
+    def __init__(self, rels) -> None:
+        self.queries: List[Tuple[str, Node]] = []
+        self.dictionary: Dict[str, Node] = {} # Key is the query, value is the relation
+        self.vgen = _vargen(rels, 'optm_')

    def __str__(self):
        r = ''
@@ -34,7 +38,7 @@ class Program:
            r += '%s = %s' % (q[0], q[1]) + '\n'
        return r.rstrip()

-    def append_query(self, node):
+    def append_query(self, node: Node) -> Node:
        strnode = str(node)

        rel = self.dictionary.get(strnode)
@@ -43,29 +47,29 @@ class Program:

        qname = next(self.vgen)
        self.queries.append((qname, node))
-        n = parser.Node()
-        n.kind = parser.RELATION
-        n.name = qname
+        n = Variable(qname)
        self.dictionary[strnode] = n
        return n

-def _separate(node, program):
-    if node.kind == parser.UNARY and node.child.kind != parser.RELATION:
+
+def _separate(node: Node, program: Program) -> None:
+    if isinstance(node, Unary) and isinstance(node.child, Variable):
        _separate(node.child, program)
        rel = program.append_query(node.child)
        node.child = rel
-    elif node.kind == parser.BINARY:
-        if node.left.kind != parser.RELATION:
+    elif isinstance(node, Binary):
+        if not isinstance(node.left, Variable):
            _separate(node.left, program)
            rel = program.append_query(node.left)
            node.left = rel
-        if node.right.kind != parser.RELATION:
+        if not isinstance(node.right, Variable):
            _separate(node.right, program)
            rel = program.append_query(node.right)
            node.right = rel
    program.append_query(node)

-def vargen(avoid, prefix=''):
+
+def _vargen(avoid: str, prefix: str=''):
    '''
    Generates temp variables.

@@ -87,12 +91,15 @@ def vargen(avoid, prefix=''):
            yield r
        count += 1

-def split(node, rels):
+
+def split(node, rels) -> str:
    '''
    Split a query into a program.

-    The idea is that if there are duplicated subdtrees they
+    The idea is that if there are duplicated subtrees they
    get executed only once.
+
+    This is used by the optimizer module.
    '''
    p = Program(rels)
    _separate(node, p)
--- a/tests_dir/multiline_optimization.py
+++ b/tests_dir/multiline_optimization.py
@@ -0,0 +1,8 @@
+from relational.optimizer import optimize_program
+
+a = optimize_program('''ppl_skills = people ⧓skills
+ppl_skills1 = ppl_skills ∪ (people ⧓skills)
+ppl_skills ∩ ppl_skills1 ⧓ dates''', {})
+
+assert a == '''optm_a = people⧓skills
+optm_b = optm_a⧓dates'''