Merge pull request #35 from ltworf/multiline_optimization

Fix and test multiline optimizer
This commit is contained in:
Salvo 'LtWorf' Tomaselli 2020-08-26 17:39:53 +02:00 committed by GitHub
commit ea22350463
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 68 additions and 51 deletions

View File

@ -31,28 +31,51 @@ from relational import querysplit
from relational.maintenance import UserInterface
def optimize_program(code, rels: Dict[str, Relation]):
def optimize_program(code: str, rels: Dict[str, Relation]) -> str:
'''
Optimize an entire program, composed by multiple expressions
and assignments.
'''
raise NotImplementedError()
lines = code.split('\n')
context = {}
context: Dict[str, Node] = {}
for line in lines:
# skip comments or empty lines
line = line.strip()
if line.startswith(';') or not line:
continue
res, query = UserInterface.split_query(line)
last_res = res
parsed = tree(query)
optimizations.replace_leaves(parsed, context)
_replace_leaves(parsed, context)
context[res] = parsed
node = optimize_all(context[last_res], rels, tostr=False)
return querysplit.split(node, rels)
def _replace_leaves(node: Node, context: Dict[str, Node]) -> None:
'''
If a name appearing in node appears
also in context, the parse tree is
modified to replace the node with the
subtree found in context.
'''
if isinstance(node, Unary):
_replace_leaves(node.child, context)
if isinstance(node.child, Variable) and node.child.name in context:
node.child = context[node.child.name]
elif isinstance(node, Binary):
_replace_leaves(node.left, context)
_replace_leaves(node.right, context)
if isinstance(node.left, Variable) and node.left.name in context:
node.left = context[node.left.name]
if isinstance(node.right, Variable) and node.right.name in context:
node.right = context[node.right.name]
def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
'''This function performs all the available optimizations.
@ -70,20 +93,20 @@ def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specif
elif isinstance(expression, Node):
n = expression
else:
raise (TypeError("expression must be a string or a node"))
raise TypeError('expression must be a string or a node')
total = 1
while total != 0:
total = 0
if specific:
for i in optimizations.specific_optimizations:
n, c = recursive_scan(i, n, rels)
n, c = _recursive_scan(i, n, rels)
if c != 0 and isinstance(debug, list):
debug.append(str(n))
total += c
if general:
for j in optimizations.general_optimizations:
n, c = recursive_scan(j, n, None)
n, c = _recursive_scan(j, n, None)
if c != 0 and isinstance(debug, list):
debug.append(str(n))
total += c
@ -93,28 +116,7 @@ def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specif
return n
def specific_optimize(expression, rels: Dict[str, Relation]):
'''This function performs specific optimizations. Means that it will need to
know the fields used by the relations.
expression : see documentation of this module
rels: dic with relation name as key, and relation istance as value
Return value: this will return an optimized version of the expression'''
return optimize_all(expression, rels, specific=True, general=False)
def general_optimize(expression):
'''This function performs general optimizations. Means that it will not need to
know the fields used by the relations
expression : see documentation of this module
Return value: this will return an optimized version of the expression'''
return optimize_all(expression, None, specific=False, general=True)
def recursive_scan(function, node, rels) -> Tuple[Node, int]:
def _recursive_scan(function, node: Node, rels: Optional[Dict[str, Any]]) -> Tuple[Node, int]:
'''Does a recursive optimization on the tree.
This function will recursively execute the function given
@ -128,7 +130,7 @@ def recursive_scan(function, node, rels) -> Tuple[Node, int]:
returned value.'''
args = []
if rels:
if rels is not None:
args.append(rels)
changes = 0
@ -136,11 +138,11 @@ def recursive_scan(function, node, rels) -> Tuple[Node, int]:
changes += c
if isinstance(node, Unary):
node.child, c = recursive_scan(function, node.child, rels)
node.child, c = _recursive_scan(function, node.child, rels)
changes += c
elif isinstance(node, Binary):
node.left, c = recursive_scan(function, node.left, rels)
node.left, c = _recursive_scan(function, node.left, rels)
changes += c
node.right, c = recursive_scan(function, node.right, rels)
node.right, c = _recursive_scan(function, node.right, rels)
changes += c
return node, changes

View File

@ -1,5 +1,5 @@
# Relational
# Copyright (C) 2016 Salvo "LtWorf" Tomaselli
# Copyright (C) 2016-2020 Salvo "LtWorf" Tomaselli
#
# Relational is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -18,15 +18,19 @@
#
# This module splits a query into a program.
from typing import List, Dict, Tuple
from relational import parser
from relational.parser import Node, Binary, Unary, Variable
__all__ = ['split']
class Program:
def __init__(self, rels):
self.queries = []
self.dictionary = {} # Key is the query, value is the relation
self.vgen = vargen(rels, 'optm_')
def __init__(self, rels) -> None:
self.queries: List[Tuple[str, Node]] = []
self.dictionary: Dict[str, Node] = {} # Key is the query, value is the relation
self.vgen = _vargen(rels, 'optm_')
def __str__(self):
r = ''
@ -34,7 +38,7 @@ class Program:
r += '%s = %s' % (q[0], q[1]) + '\n'
return r.rstrip()
def append_query(self, node):
def append_query(self, node: Node) -> Node:
strnode = str(node)
rel = self.dictionary.get(strnode)
@ -43,29 +47,29 @@ class Program:
qname = next(self.vgen)
self.queries.append((qname, node))
n = parser.Node()
n.kind = parser.RELATION
n.name = qname
n = Variable(qname)
self.dictionary[strnode] = n
return n
def _separate(node, program):
if node.kind == parser.UNARY and node.child.kind != parser.RELATION:
def _separate(node: Node, program: Program) -> None:
if isinstance(node, Unary) and isinstance(node.child, Variable):
_separate(node.child, program)
rel = program.append_query(node.child)
node.child = rel
elif node.kind == parser.BINARY:
if node.left.kind != parser.RELATION:
elif isinstance(node, Binary):
if not isinstance(node.left, Variable):
_separate(node.left, program)
rel = program.append_query(node.left)
node.left = rel
if node.right.kind != parser.RELATION:
if not isinstance(node.right, Variable):
_separate(node.right, program)
rel = program.append_query(node.right)
node.right = rel
program.append_query(node)
def vargen(avoid, prefix=''):
def _vargen(avoid: str, prefix: str=''):
'''
Generates temp variables.
@ -87,12 +91,15 @@ def vargen(avoid, prefix=''):
yield r
count += 1
def split(node, rels):
def split(node, rels) -> str:
'''
Split a query into a program.
The idea is that if there are duplicated subdtrees they
The idea is that if there are duplicated subtrees they
get executed only once.
This is used by the optimizer module.
'''
p = Program(rels)
_separate(node, p)

View File

@ -0,0 +1,8 @@
from relational.optimizer import optimize_program
a = optimize_program('''ppl_skills = people ⧓skills
ppl_skills1 = ppl_skills (people skills)
ppl_skills ppl_skills1 dates''', {})
assert a == '''optm_a = people⧓skills
optm_b = optm_adates'''