Merge pull request #35 from ltworf/multiline_optimization
Fix and test multiline optimizer
This commit is contained in:
commit
ea22350463
@ -31,28 +31,51 @@ from relational import querysplit
|
|||||||
from relational.maintenance import UserInterface
|
from relational.maintenance import UserInterface
|
||||||
|
|
||||||
|
|
||||||
def optimize_program(code, rels: Dict[str, Relation]):
|
def optimize_program(code: str, rels: Dict[str, Relation]) -> str:
|
||||||
'''
|
'''
|
||||||
Optimize an entire program, composed by multiple expressions
|
Optimize an entire program, composed by multiple expressions
|
||||||
and assignments.
|
and assignments.
|
||||||
'''
|
'''
|
||||||
raise NotImplementedError()
|
|
||||||
lines = code.split('\n')
|
lines = code.split('\n')
|
||||||
context = {}
|
context: Dict[str, Node] = {}
|
||||||
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
|
# skip comments or empty lines
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if line.startswith(';') or not line:
|
if line.startswith(';') or not line:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
res, query = UserInterface.split_query(line)
|
res, query = UserInterface.split_query(line)
|
||||||
last_res = res
|
last_res = res
|
||||||
parsed = tree(query)
|
parsed = tree(query)
|
||||||
optimizations.replace_leaves(parsed, context)
|
_replace_leaves(parsed, context)
|
||||||
context[res] = parsed
|
context[res] = parsed
|
||||||
node = optimize_all(context[last_res], rels, tostr=False)
|
node = optimize_all(context[last_res], rels, tostr=False)
|
||||||
return querysplit.split(node, rels)
|
return querysplit.split(node, rels)
|
||||||
|
|
||||||
|
|
||||||
|
def _replace_leaves(node: Node, context: Dict[str, Node]) -> None:
|
||||||
|
'''
|
||||||
|
If a name appearing in node appears
|
||||||
|
also in context, the parse tree is
|
||||||
|
modified to replace the node with the
|
||||||
|
subtree found in context.
|
||||||
|
'''
|
||||||
|
if isinstance(node, Unary):
|
||||||
|
_replace_leaves(node.child, context)
|
||||||
|
|
||||||
|
if isinstance(node.child, Variable) and node.child.name in context:
|
||||||
|
node.child = context[node.child.name]
|
||||||
|
elif isinstance(node, Binary):
|
||||||
|
_replace_leaves(node.left, context)
|
||||||
|
_replace_leaves(node.right, context)
|
||||||
|
if isinstance(node.left, Variable) and node.left.name in context:
|
||||||
|
node.left = context[node.left.name]
|
||||||
|
if isinstance(node.right, Variable) and node.right.name in context:
|
||||||
|
node.right = context[node.right.name]
|
||||||
|
|
||||||
|
|
||||||
def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
|
def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]:
|
||||||
'''This function performs all the available optimizations.
|
'''This function performs all the available optimizations.
|
||||||
|
|
||||||
@ -70,20 +93,20 @@ def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specif
|
|||||||
elif isinstance(expression, Node):
|
elif isinstance(expression, Node):
|
||||||
n = expression
|
n = expression
|
||||||
else:
|
else:
|
||||||
raise (TypeError("expression must be a string or a node"))
|
raise TypeError('expression must be a string or a node')
|
||||||
|
|
||||||
total = 1
|
total = 1
|
||||||
while total != 0:
|
while total != 0:
|
||||||
total = 0
|
total = 0
|
||||||
if specific:
|
if specific:
|
||||||
for i in optimizations.specific_optimizations:
|
for i in optimizations.specific_optimizations:
|
||||||
n, c = recursive_scan(i, n, rels)
|
n, c = _recursive_scan(i, n, rels)
|
||||||
if c != 0 and isinstance(debug, list):
|
if c != 0 and isinstance(debug, list):
|
||||||
debug.append(str(n))
|
debug.append(str(n))
|
||||||
total += c
|
total += c
|
||||||
if general:
|
if general:
|
||||||
for j in optimizations.general_optimizations:
|
for j in optimizations.general_optimizations:
|
||||||
n, c = recursive_scan(j, n, None)
|
n, c = _recursive_scan(j, n, None)
|
||||||
if c != 0 and isinstance(debug, list):
|
if c != 0 and isinstance(debug, list):
|
||||||
debug.append(str(n))
|
debug.append(str(n))
|
||||||
total += c
|
total += c
|
||||||
@ -93,28 +116,7 @@ def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specif
|
|||||||
return n
|
return n
|
||||||
|
|
||||||
|
|
||||||
def specific_optimize(expression, rels: Dict[str, Relation]):
|
def _recursive_scan(function, node: Node, rels: Optional[Dict[str, Any]]) -> Tuple[Node, int]:
|
||||||
'''This function performs specific optimizations. Means that it will need to
|
|
||||||
know the fields used by the relations.
|
|
||||||
|
|
||||||
expression : see documentation of this module
|
|
||||||
rels: dic with relation name as key, and relation istance as value
|
|
||||||
|
|
||||||
Return value: this will return an optimized version of the expression'''
|
|
||||||
return optimize_all(expression, rels, specific=True, general=False)
|
|
||||||
|
|
||||||
|
|
||||||
def general_optimize(expression):
|
|
||||||
'''This function performs general optimizations. Means that it will not need to
|
|
||||||
know the fields used by the relations
|
|
||||||
|
|
||||||
expression : see documentation of this module
|
|
||||||
|
|
||||||
Return value: this will return an optimized version of the expression'''
|
|
||||||
return optimize_all(expression, None, specific=False, general=True)
|
|
||||||
|
|
||||||
|
|
||||||
def recursive_scan(function, node, rels) -> Tuple[Node, int]:
|
|
||||||
'''Does a recursive optimization on the tree.
|
'''Does a recursive optimization on the tree.
|
||||||
|
|
||||||
This function will recursively execute the function given
|
This function will recursively execute the function given
|
||||||
@ -128,7 +130,7 @@ def recursive_scan(function, node, rels) -> Tuple[Node, int]:
|
|||||||
returned value.'''
|
returned value.'''
|
||||||
|
|
||||||
args = []
|
args = []
|
||||||
if rels:
|
if rels is not None:
|
||||||
args.append(rels)
|
args.append(rels)
|
||||||
|
|
||||||
changes = 0
|
changes = 0
|
||||||
@ -136,11 +138,11 @@ def recursive_scan(function, node, rels) -> Tuple[Node, int]:
|
|||||||
changes += c
|
changes += c
|
||||||
|
|
||||||
if isinstance(node, Unary):
|
if isinstance(node, Unary):
|
||||||
node.child, c = recursive_scan(function, node.child, rels)
|
node.child, c = _recursive_scan(function, node.child, rels)
|
||||||
changes += c
|
changes += c
|
||||||
elif isinstance(node, Binary):
|
elif isinstance(node, Binary):
|
||||||
node.left, c = recursive_scan(function, node.left, rels)
|
node.left, c = _recursive_scan(function, node.left, rels)
|
||||||
changes += c
|
changes += c
|
||||||
node.right, c = recursive_scan(function, node.right, rels)
|
node.right, c = _recursive_scan(function, node.right, rels)
|
||||||
changes += c
|
changes += c
|
||||||
return node, changes
|
return node, changes
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# Relational
|
# Relational
|
||||||
# Copyright (C) 2016 Salvo "LtWorf" Tomaselli
|
# Copyright (C) 2016-2020 Salvo "LtWorf" Tomaselli
|
||||||
#
|
#
|
||||||
# Relational is free software: you can redistribute it and/or modify
|
# Relational is free software: you can redistribute it and/or modify
|
||||||
# it under the terms of the GNU General Public License as published by
|
# it under the terms of the GNU General Public License as published by
|
||||||
@ -18,15 +18,19 @@
|
|||||||
#
|
#
|
||||||
# This module splits a query into a program.
|
# This module splits a query into a program.
|
||||||
|
|
||||||
|
from typing import List, Dict, Tuple
|
||||||
|
|
||||||
from relational import parser
|
from relational.parser import Node, Binary, Unary, Variable
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['split']
|
||||||
|
|
||||||
|
|
||||||
class Program:
|
class Program:
|
||||||
def __init__(self, rels):
|
def __init__(self, rels) -> None:
|
||||||
self.queries = []
|
self.queries: List[Tuple[str, Node]] = []
|
||||||
self.dictionary = {} # Key is the query, value is the relation
|
self.dictionary: Dict[str, Node] = {} # Key is the query, value is the relation
|
||||||
self.vgen = vargen(rels, 'optm_')
|
self.vgen = _vargen(rels, 'optm_')
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
r = ''
|
r = ''
|
||||||
@ -34,7 +38,7 @@ class Program:
|
|||||||
r += '%s = %s' % (q[0], q[1]) + '\n'
|
r += '%s = %s' % (q[0], q[1]) + '\n'
|
||||||
return r.rstrip()
|
return r.rstrip()
|
||||||
|
|
||||||
def append_query(self, node):
|
def append_query(self, node: Node) -> Node:
|
||||||
strnode = str(node)
|
strnode = str(node)
|
||||||
|
|
||||||
rel = self.dictionary.get(strnode)
|
rel = self.dictionary.get(strnode)
|
||||||
@ -43,29 +47,29 @@ class Program:
|
|||||||
|
|
||||||
qname = next(self.vgen)
|
qname = next(self.vgen)
|
||||||
self.queries.append((qname, node))
|
self.queries.append((qname, node))
|
||||||
n = parser.Node()
|
n = Variable(qname)
|
||||||
n.kind = parser.RELATION
|
|
||||||
n.name = qname
|
|
||||||
self.dictionary[strnode] = n
|
self.dictionary[strnode] = n
|
||||||
return n
|
return n
|
||||||
|
|
||||||
def _separate(node, program):
|
|
||||||
if node.kind == parser.UNARY and node.child.kind != parser.RELATION:
|
def _separate(node: Node, program: Program) -> None:
|
||||||
|
if isinstance(node, Unary) and isinstance(node.child, Variable):
|
||||||
_separate(node.child, program)
|
_separate(node.child, program)
|
||||||
rel = program.append_query(node.child)
|
rel = program.append_query(node.child)
|
||||||
node.child = rel
|
node.child = rel
|
||||||
elif node.kind == parser.BINARY:
|
elif isinstance(node, Binary):
|
||||||
if node.left.kind != parser.RELATION:
|
if not isinstance(node.left, Variable):
|
||||||
_separate(node.left, program)
|
_separate(node.left, program)
|
||||||
rel = program.append_query(node.left)
|
rel = program.append_query(node.left)
|
||||||
node.left = rel
|
node.left = rel
|
||||||
if node.right.kind != parser.RELATION:
|
if not isinstance(node.right, Variable):
|
||||||
_separate(node.right, program)
|
_separate(node.right, program)
|
||||||
rel = program.append_query(node.right)
|
rel = program.append_query(node.right)
|
||||||
node.right = rel
|
node.right = rel
|
||||||
program.append_query(node)
|
program.append_query(node)
|
||||||
|
|
||||||
def vargen(avoid, prefix=''):
|
|
||||||
|
def _vargen(avoid: str, prefix: str=''):
|
||||||
'''
|
'''
|
||||||
Generates temp variables.
|
Generates temp variables.
|
||||||
|
|
||||||
@ -87,12 +91,15 @@ def vargen(avoid, prefix=''):
|
|||||||
yield r
|
yield r
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
def split(node, rels):
|
|
||||||
|
def split(node, rels) -> str:
|
||||||
'''
|
'''
|
||||||
Split a query into a program.
|
Split a query into a program.
|
||||||
|
|
||||||
The idea is that if there are duplicated subdtrees they
|
The idea is that if there are duplicated subtrees they
|
||||||
get executed only once.
|
get executed only once.
|
||||||
|
|
||||||
|
This is used by the optimizer module.
|
||||||
'''
|
'''
|
||||||
p = Program(rels)
|
p = Program(rels)
|
||||||
_separate(node, p)
|
_separate(node, p)
|
||||||
|
8
tests_dir/multiline_optimization.py
Normal file
8
tests_dir/multiline_optimization.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
from relational.optimizer import optimize_program
|
||||||
|
|
||||||
|
a = optimize_program('''ppl_skills = people ⧓skills
|
||||||
|
ppl_skills1 = ppl_skills ∪ (people ⧓skills)
|
||||||
|
ppl_skills ∩ ppl_skills1 ⧓ dates''', {})
|
||||||
|
|
||||||
|
assert a == '''optm_a = people⧓skills
|
||||||
|
optm_b = optm_a⧓dates'''
|
Loading…
Reference in New Issue
Block a user