From b5081495831feaede9ce765c154dfe66af11e956 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 01:37:19 +0200 Subject: [PATCH 01/29] Major refactory of the parser It uses subclasses rather than a kind variable. This is way more typesafe. --- relational/parser.py | 240 +++++++++++++++++++++++-------------------- 1 file changed, 127 insertions(+), 113 deletions(-) diff --git a/relational/parser.py b/relational/parser.py index c09eb46..2b0471a 100644 --- a/relational/parser.py +++ b/relational/parser.py @@ -1,5 +1,5 @@ # Relational -# Copyright (C) 2008-2017 Salvo "LtWorf" Tomaselli +# Copyright (C) 2008-2020 Salvo "LtWorf" Tomaselli # # Relational is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -25,6 +25,7 @@ # Language definition here: # http://ltworf.github.io/relational/grammar.html from typing import Optional, Union, List, Any +from dataclasses import dataclass from relational import rtypes @@ -84,9 +85,8 @@ class CallableString(str): ''' return eval(self, context) - +@dataclass class Node: - '''This class is a node of a relational expression. Leaves are relations and internal nodes are operations. @@ -102,72 +102,12 @@ class Node: operation. This class is used to convert an expression into python code.''' - kind = None # type: Optional[int] - __hash__ = None # type: None + name: str - def __init__(self, expression: Optional[list] = None) -> None: - '''Generates the tree from the tokenized expression - If no expression is specified then it will create an empty node''' - if expression is None or len(expression) == 0: - return + def __init__(self, name: str) -> None: + raise NotImplementedError('This is supposed to be an abstract class') - # If the list contains only a list, it will consider the lower level list. - # This will allow things like ((((((a))))) to work - while len(expression) == 1 and isinstance(expression[0], list): - expression = expression[0] - - # The list contains only 1 string. Means it is the name of a relation - if len(expression) == 1: - self.kind = RELATION - self.name = expression[0] - if not rtypes.is_valid_relation_name(self.name): - raise ParserException( - u"'%s' is not a valid relation name" % self.name) - return - - # Expression from right to left, searching for binary operators - # this means that binary operators have lesser priority than - # unary operators. - # It finds the operator with lesser priority, uses it as root of this - # (sub)tree using everything on its left as left parameter (so building - # a left subtree with the part of the list located on left) and doing - # the same on right. - # Since it searches for strings, and expressions into parenthesis are - # within sub-lists, they won't be found here, ensuring that they will - # have highest priority. - for i in range(len(expression) - 1, -1, -1): - if expression[i] in b_operators: # Binary operator - self.kind = BINARY - self.name = expression[i] - - if len(expression[:i]) == 0: - raise ParserException( - u"Expected left operand for '%s'" % self.name) - - if len(expression[i + 1:]) == 0: - raise ParserException( - u"Expected right operand for '%s'" % self.name) - - self.left = node(expression[:i]) - self.right = node(expression[i + 1:]) - return - '''Searches for unary operators, parsing from right to left''' - for i in range(len(expression) - 1, -1, -1): - if expression[i] in u_operators: # Unary operator - self.kind = UNARY - self.name = expression[i] - - if len(expression) <= i + 2: - raise ParserException( - u"Expected more tokens in '%s'" % self.name) - - self.prop = expression[1 + i].strip() - self.child = node(expression[2 + i]) - - return - raise ParserException("Expected operator in '%s'" % expression) - - def toCode(self): + def toCode(self): #FIXME return type '''This method converts the AST into a python code object''' code = self._toPython() return compile(code, '', 'eval') @@ -181,25 +121,7 @@ class Node: return CallableString(self._toPython()) def _toPython(self) -> str: - ''' - Same as toPython but returns a regular string - ''' - if self.name in b_operators: - return '%s.%s(%s)' % (self.left.toPython(), op_functions[self.name], self.right.toPython()) - elif self.name in u_operators: - prop = self.prop - - # Converting parameters - if self.name == PROJECTION: - prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"') - elif self.name == RENAME: - prop = '{\"%s\"}' % prop.replace( - ',', '\",\"').replace(ARROW, '\":\"').replace(' ', '') - else: # Selection - prop = repr(prop) - - return '%s.%s(%s)' % (self.child.toPython(), op_functions[self.name], prop) - return self.name + raise NotImplementedError() def printtree(self, level: int = 0) -> str: '''returns a representation of the tree using indentation''' @@ -216,27 +138,20 @@ class Node: return '\n' + r def get_left_leaf(self) -> 'Node': - '''This function returns the leftmost leaf in the tree.''' - if self.kind == RELATION: - return self - elif self.kind == UNARY: - return self.child.get_left_leaf() - elif self.kind == BINARY: - return self.left.get_left_leaf() - raise ValueError('What kind of alien object is this?') + raise NotImplementedError() - def result_format(self, rels: dict) -> list: + def result_format(self, rels: dict) -> list: #FIXME types '''This function returns a list containing the fields that the resulting relation will have. It requires a dictionary where keys are the names of the relations and the values are the relation objects.''' if not isinstance(rels, dict): raise TypeError('Can\'t be of None type') - if self.kind == RELATION: + if isinstance(self, Variable): #FIXME this is ugly return list(rels[self.name].header) - elif self.kind == BINARY and self.name in (DIFFERENCE, UNION, INTERSECTION): + elif isinstance(self, Binary) and self.name in (DIFFERENCE, UNION, INTERSECTION): return self.left.result_format(rels) - elif self.kind == BINARY and self.name == DIVISION: + elif isinstance(self, Binary) and self.name == DIVISION: return list(set(self.left.result_format(rels)) - set(self.right.result_format(rels))) elif self.name == PROJECTION: return [i.strip() for i in self.prop.split(',')] @@ -259,7 +174,7 @@ class Node: return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels)))) raise ValueError('What kind of alien object is this?') - def __eq__(self, other): + def __eq__(self, other): #FIXME if not (isinstance(other, node) and self.name == other.name and self.kind == other.kind): return False @@ -271,22 +186,121 @@ class Node: return self.left == other.left and self.right == other.right return True + +@dataclass +class Variable(Node): + def _toPython(self) -> str: + return self.name + def __str__(self): - if (self.kind == RELATION): - return self.name - elif (self.kind == UNARY): - return self.name + " " + self.prop + " (" + self.child.__str__() + ")" - elif (self.kind == BINARY): - le = self.left.__str__() - if self.right.kind != BINARY: - re = self.right.__str__() - else: - re = "(" + self.right.__str__() + ")" - return (le + self.name + re) - raise ValueError('What kind of alien object is this?') + return self.name + + def get_left_leaf(self) -> Node: + return self -def _find_matching_parenthesis(expression: str, start=0, openpar=u'(', closepar=u')') -> Optional[int]: +@dataclass +class Binary(Node): + left: Node + right: Node + + def get_left_leaf(self) -> Node: + return self.left.get_left_leaf() + + def _toPython(self) -> str: + return '%s.%s(%s)' % (self.left._toPython(), op_functions[self.name], self.right._toPython()) + + def __str__(self): + le = self.left.__str__() + if isinstance(self.right, Binary): + re = "(" + self.right.__str__() + ")" + else: + re = self.right.__str__() + return (le + self.name + re) #TODO use fstrings + + +@dataclass +class Unary(Node): + prop: str + child: Node + + def get_left_leaf(self) -> Node: + return self.child.get_left_leaf() + + def __str__(self): + return self.name + " " + self.prop + " (" + self.child.__str__() + ")" #TODO use fstrings + + def _toPython(self) -> str: + prop = self.prop + + # Converting parameters + if self.name == PROJECTION: + prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"') + elif self.name == RENAME: + prop = '{\"%s\"}' % prop.replace( + ',', '\",\"').replace(ARROW, '\":\"').replace(' ', '') + else: # Selection + prop = repr(prop) + + return '%s.%s(%s)' % (self.child._toPython(), op_functions[self.name], prop) + + + +def parse_tokens(expression: List[Union[list, str]]) -> Node: + '''Generates the tree from the tokenized expression + If no expression is specified then it will create an empty node''' + + # If the list contains only a list, it will consider the lower level list. + # This will allow things like ((((((a))))) to work + while len(expression) == 1 and isinstance(expression[0], list): + expression = expression[0] + + # The list contains only 1 string. Means it is the name of a relation + if len(expression) == 1: + + if not rtypes.is_valid_relation_name(expression[0]): + raise ParserException( + u"'%s' is not a valid relation name" % expression[0]) + return Variable(expression[0]) #FIXME Move validation in the object + + # Expression from right to left, searching for binary operators + # this means that binary operators have lesser priority than + # unary operators. + # It finds the operator with lesser priority, uses it as root of this + # (sub)tree using everything on its left as left parameter (so building + # a left subtree with the part of the list located on left) and doing + # the same on right. + # Since it searches for strings, and expressions into parenthesis are + # within sub-lists, they won't be found here, ensuring that they will + # have highest priority. + for i in range(len(expression) - 1, -1, -1): + if expression[i] in b_operators: # Binary operator + + + if len(expression[:i]) == 0: + raise ParserException( + u"Expected left operand for '%s'" % self.name) + + if len(expression[i + 1:]) == 0: + raise ParserException( + u"Expected right operand for '%s'" % self.name) + return Binary(expression[i], parse_tokens(expression[:i]), parse_tokens(expression[i + 1:])) + '''Searches for unary operators, parsing from right to left''' + for i in range(len(expression) - 1, -1, -1): + if expression[i] in u_operators: # Unary operator + if len(expression) <= i + 2: + raise ParserException( + u"Expected more tokens in '%s'" % self.name) + + return Unary( + expression[i], + prop=expression[1 + i].strip(), + child=parse_tokens(expression[2 + i]) + ) + raise ParserException('Parse error') #FIXME more details + + +def _find_matching_parenthesis(expression: str, start=0, openpar='(', closepar=')') -> Optional[int]: '''This function returns the position of the matching close parenthesis to the 1st open parenthesis found starting from start (0 by default)''' @@ -391,7 +405,7 @@ def tokenize(expression: str) -> list: def tree(expression: str) -> Node: '''This function parses a relational algebra expression into a AST and returns the root node using the Node class.''' - return Node(tokenize(expression)) + return parse_tokens(tokenize(expression)) def parse(expr: str) -> CallableString: From a8426840dad0018b7af88da21e45738454955d7b Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 02:06:57 +0200 Subject: [PATCH 02/29] Disable all optimizations for now They are in need of a refactor --- CHANGELOG | 3 +++ relational/optimizations.py | 30 +++++++++++++++--------------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index eaf8226..605c77a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,6 @@ +3.0 +- Refactored parser to use better typing + 2.6 - Improved survey sending - Prevent relation/field names from being reserved keywords diff --git a/relational/optimizations.py b/relational/optimizations.py index 42c141e..07909a6 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -1,5 +1,5 @@ # Relational -# Copyright (C) 2009-2018 Salvo "LtWorf" Tomaselli +# Copyright (C) 2009-2020 Salvo "LtWorf" Tomaselli # # Relational is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -709,22 +709,22 @@ def useless_projection(n, rels) -> int: return changes + recoursive_scan(useless_projection, n, rels) general_optimizations = [ - duplicated_select, - down_to_unions_subtractions_intersections, - duplicated_projection, - selection_inside_projection, - subsequent_renames, - swap_rename_select, - futile_union_intersection_subtraction, - swap_union_renames, - swap_rename_projection, - select_union_intersect_subtract, - union_and_product, + #duplicated_select, + #down_to_unions_subtractions_intersections, + #duplicated_projection, + #selection_inside_projection, + #subsequent_renames, + #swap_rename_select, + #futile_union_intersection_subtraction, + #swap_union_renames, + #swap_rename_projection, + #select_union_intersect_subtract, + #union_and_product, ] specific_optimizations = [ - selection_and_product, - projection_and_union, - useless_projection, + #selection_and_product, + #projection_and_union, + #useless_projection, ] if __name__ == "__main__": From 6c677452e8df3d50dbc1ed4a3bb63dbaed10582a Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 10:14:19 +0200 Subject: [PATCH 03/29] Remove old things --- relational/parser.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/relational/parser.py b/relational/parser.py index 2b0471a..c2b386c 100644 --- a/relational/parser.py +++ b/relational/parser.py @@ -414,11 +414,3 @@ def parse(expr: str) -> CallableString: Python expression. ''' return tree(expr).toPython() - -if __name__ == "__main__": - while True: - e = input("Expression: ") - print (parse(e)) - -# Backwards compatibility -node = Node From 3e524278e87ba75debdd84050213ea8320357584 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 10:51:40 +0200 Subject: [PATCH 04/29] Remove unused --- relational/parser.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/relational/parser.py b/relational/parser.py index c2b386c..7f0b682 100644 --- a/relational/parser.py +++ b/relational/parser.py @@ -29,10 +29,6 @@ from dataclasses import dataclass from relational import rtypes -RELATION = 0 -UNARY = 1 -BINARY = 2 - PRODUCT = '*' DIFFERENCE = '-' UNION = '∪' From 0dcd639c9d3cf7580a595b08c410bff1f446fe78 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 11:18:33 +0200 Subject: [PATCH 05/29] Enable one optimization Due to the fact that the nodes are different classes, this required some refactor. --- relational/optimizations.py | 57 +++++++++++-------------------------- relational/optimizer.py | 44 ++++++++++++++++++++++++---- 2 files changed, 54 insertions(+), 47 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index 07909a6..1133907 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -30,6 +30,7 @@ from io import StringIO from tokenize import generate_tokens +from typing import Tuple from relational import parser @@ -98,36 +99,7 @@ def replace_node(replace, replacement): replace.left = replacement.left -def recoursive_scan(function, node, rels=None): - '''Does a recoursive optimization on the tree. - - This function will recoursively execute the function given - as "function" parameter starting from node to all the tree. - if rels is provided it will be passed as argument to the function. - Otherwise the function will be called just on the node. - - Result value: function is supposed to return the amount of changes - it has performed on the tree. - The various result will be added up and this final value will be the - returned value.''' - changes = 0 - # recoursive scan - if node.kind == parser.UNARY: - if rels != None: - changes += function(node.child, rels) - else: - changes += function(node.child) - elif node.kind == parser.BINARY: - if rels != None: - changes += function(node.right, rels) - changes += function(node.left, rels) - else: - changes += function(node.right) - changes += function(node.left) - return changes - - -def duplicated_select(n: parser.Node) -> int: +def duplicated_select(n: parser.Node) -> Tuple[parser.Node, int]: '''This function locates and deletes things like σ a ( σ a(C)) and the ones like σ a ( σ b(C)) replacing the 1st one with a single select and @@ -135,19 +107,22 @@ def duplicated_select(n: parser.Node) -> int: in and ''' changes = 0 - if n.name == SELECTION and n.child.name == SELECTION: + while n.name == SELECTION and n.child.name == SELECTION: + changes += 1 + prop = n.prop + if n.prop != n.child.prop: # Nested but different, joining them - n.prop = n.prop + " and " + n.child.prop + prop = n.prop + " and " + n.child.prop # This adds parenthesis if they are needed if n.child.prop.startswith('(') or n.prop.startswith('('): - n.prop = '(%s)' % n.prop - - n.child = n.child.child - changes = 1 - changes += duplicated_select(n) - - return changes + recoursive_scan(duplicated_select, n) + prop = '(%s)' % prop + n = parser.Unary( + SELECTION, + prop, + n.child.child, + ) + return n, changes def futile_union_intersection_subtraction(n: parser.Node) -> int: @@ -706,10 +681,10 @@ def useless_projection(n, rels) -> int: changes = 1 replace_node(n, n.child) - return changes + recoursive_scan(useless_projection, n, rels) + return changes + recursive_scan(useless_projection, n, rels) general_optimizations = [ - #duplicated_select, + duplicated_select, #down_to_unions_subtractions_intersections, #duplicated_projection, #selection_inside_projection, diff --git a/relational/optimizer.py b/relational/optimizer.py index 3e8ccd5..30c24bd 100644 --- a/relational/optimizer.py +++ b/relational/optimizer.py @@ -1,5 +1,5 @@ # Relational -# Copyright (C) 2008-2016 Salvo "LtWorf" Tomaselli +# Copyright (C) 2008-2020 Salvo "LtWorf" Tomaselli # # Relational is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,10 +22,10 @@ # relational query, or it can be a parse tree for a relational expression (ie: class parser.node). # The functions will always return a string with the optimized query, but if a parse tree was provided, # the parse tree itself will be modified accordingly. -from typing import Union, Optional, Dict, Any +from typing import Union, Optional, Dict, Any, Tuple from relational import optimizations -from relational.parser import Node, RELATION, UNARY, BINARY, op_functions, tokenize, tree +from relational.parser import Node, Variable, Unary, Binary, op_functions, tokenize, tree from relational import querysplit from relational.maintenance import UserInterface @@ -88,10 +88,10 @@ def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool total += res if general: for i in optimizations.general_optimizations: - res = i(n) # Performs the optimization - if res != 0 and dbg: + n, c = recursive_scan(i, n, None) + if c != 0 and dbg: debug.append(str(n)) - total += res + total += c if tostr: return str(n) else: @@ -117,3 +117,35 @@ def general_optimize(expression): Return value: this will return an optimized version of the expression''' return optimize_all(expression, None, specific=False, general=True) + + +def recursive_scan(function, node, rels) -> Tuple[Node, int]: + '''Does a recursive optimization on the tree. + + This function will recursively execute the function given + as "function" parameter starting from node to all the tree. + if rels is provided it will be passed as argument to the function. + Otherwise the function will be called just on the node. + + Result value: function is supposed to return the amount of changes + it has performed on the tree. + The various result will be added up and this final value will be the + returned value.''' + + args = [] + if rels: + args.append(rels) + + changes = 0 + node, c = function(node, *args) + changes += c + + if isinstance(node, Unary): + node.child, c = recursive_scan(function, node.child, rels) + changes += c + elif isinstance(node, Binary): + node.left, c = recursive_scan(function, node.left, rels) + changes += c + node.right, c = recursive_scan(function, node.right, rels) + changes += c + return node, changes From 50647294cb98c6bf3415e50c9620db3c09715bb1 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 11:43:14 +0200 Subject: [PATCH 06/29] Enable futile_union_intersection_subtraction --- relational/optimizations.py | 42 +++++++++++++++---------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index 1133907..0c1ca08 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -125,7 +125,7 @@ def duplicated_select(n: parser.Node) -> Tuple[parser.Node, int]: return n, changes -def futile_union_intersection_subtraction(n: parser.Node) -> int: +def futile_union_intersection_subtraction(n: parser.Node) -> Tuple[parser.Node, int]: '''This function locates things like r ᑌ r, and replaces them with r. R ᑌ R --> R R ᑎ R --> R @@ -140,47 +140,39 @@ def futile_union_intersection_subtraction(n: parser.Node) -> int: # Union and intersection of the same thing if n.name in (UNION, INTERSECTION, JOIN, JOIN_LEFT, JOIN_RIGHT, JOIN_FULL) and n.left == n.right: - changes = 1 - replace_node(n, n.left) + return n.left, 1 # selection and union of the same thing elif (n.name == UNION): if n.left.name == SELECTION and n.left.child == n.right: - changes = 1 - replace_node(n, n.right) + return n.right, 1 elif n.right.name == SELECTION and n.right.child == n.left: - changes = 1 - replace_node(n, n.left) + return n.left, 1 # selection and intersection of the same thing elif n.name == INTERSECTION: if n.left.name == SELECTION and n.left.child == n.right: - changes = 1 - replace_node(n, n.left) + return n.left, 1 elif n.right.name == SELECTION and n.right.child == n.left: - changes = 1 - replace_node(n, n.right) + return n.right, 1 # Subtraction and selection of the same thing elif n.name == DIFFERENCE and \ n.right.name == SELECTION and \ n.right.child == n.left: - n.name = n.right.name - n.kind = n.right.kind - n.child = n.right.child - n.prop = '(not (%s))' % n.right.prop - n.left = n.right = None + return parser.Unary( + SELECTION, + '(not (%s))' % n.right.prop, + n.right.child), 1 # Subtraction of the same thing or with selection on the left child elif n.name == DIFFERENCE and (n.left == n.right or (n.left.name == SELECTION and n.left.child == n.right)): - changes = 1 - n.kind = parser.UNARY - n.name = SELECTION - n.prop = 'False' - n.child = n.left.get_left_leaf() - # n.left=n.right=None - - return changes + recoursive_scan(futile_union_intersection_subtraction, n) + return parser.Unary( + SELECTION, + 'False', + n.get_left_leaf() + ), 1 + return n, 0 def down_to_unions_subtractions_intersections(n: parser.Node) -> int: @@ -690,7 +682,7 @@ general_optimizations = [ #selection_inside_projection, #subsequent_renames, #swap_rename_select, - #futile_union_intersection_subtraction, + futile_union_intersection_subtraction, #swap_union_renames, #swap_rename_projection, #select_union_intersect_subtract, From 403b7b9962d5839a09ceafeb2004336923e14b5b Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 11:45:35 +0200 Subject: [PATCH 07/29] Enable duplicated_projection --- relational/optimizations.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index 0c1ca08..ceca4b9 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -206,16 +206,16 @@ def down_to_unions_subtractions_intersections(n: parser.Node) -> int: return changes + recoursive_scan(down_to_unions_subtractions_intersections, n) -def duplicated_projection(n: parser.Node) -> int: +def duplicated_projection(n: parser.Node) -> Tuple[parser.Node, int]: '''This function locates thing like π i ( π j (R)) and replaces them with π i (R)''' - changes = 0 if n.name == PROJECTION and n.child.name == PROJECTION: - n.child = n.child.child - changes += 1 - - return changes + recoursive_scan(duplicated_projection, n) + return parser.Unary( + PROJECTION, + n.prop, + n.child.child), 1 + return n, 0 def selection_inside_projection(n: parser.Node) -> int: @@ -678,7 +678,7 @@ def useless_projection(n, rels) -> int: general_optimizations = [ duplicated_select, #down_to_unions_subtractions_intersections, - #duplicated_projection, + duplicated_projection, #selection_inside_projection, #subsequent_renames, #swap_rename_select, From 418e293c1d90f7ad036ea2836182a0737305b87c Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 11:50:58 +0200 Subject: [PATCH 08/29] Enable down_to_unions_subtractions_intersections --- relational/optimizations.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index ceca4b9..7d1ff54 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -175,7 +175,7 @@ def futile_union_intersection_subtraction(n: parser.Node) -> Tuple[parser.Node, return n, 0 -def down_to_unions_subtractions_intersections(n: parser.Node) -> int: +def down_to_unions_subtractions_intersections(n: parser.Node) -> Tuple[parser.Node, int]: '''This funcion locates things like σ i==2 (c ᑌ d), where the union can be a subtraction and an intersection and replaces them with σ i==2 (c) ᑌ σ i==2(d). @@ -183,27 +183,11 @@ def down_to_unions_subtractions_intersections(n: parser.Node) -> int: changes = 0 _o = (UNION, DIFFERENCE, INTERSECTION) if n.name == SELECTION and n.child.name in _o: + l = parser.Unary(SELECTION, n.prop, n.child.left) + r = parser.Unary(SELECTION, n.prop, n.child.right) - left = parser.Node() - left.prop = n.prop - left.name = n.name - left.child = n.child.left - left.kind = parser.UNARY - right = parser.Node() - right.prop = n.prop - right.name = n.name - right.child = n.child.right - right.kind = parser.UNARY - - n.name = n.child.name - n.left = left - n.right = right - n.child = None - n.prop = None - n.kind = parser.BINARY - changes += 1 - - return changes + recoursive_scan(down_to_unions_subtractions_intersections, n) + return parser.Binary(n.child.name, l, r), 1 + return n, 0 def duplicated_projection(n: parser.Node) -> Tuple[parser.Node, int]: @@ -677,7 +661,7 @@ def useless_projection(n, rels) -> int: general_optimizations = [ duplicated_select, - #down_to_unions_subtractions_intersections, + down_to_unions_subtractions_intersections, duplicated_projection, #selection_inside_projection, #subsequent_renames, From 6622ba947ea7f8392146418b61a1b4d6e513b0bd Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 11:55:25 +0200 Subject: [PATCH 09/29] selection_inside_projection --- relational/optimizations.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index 7d1ff54..ee6c94f 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -202,20 +202,18 @@ def duplicated_projection(n: parser.Node) -> Tuple[parser.Node, int]: return n, 0 -def selection_inside_projection(n: parser.Node) -> int: +def selection_inside_projection(n: parser.Node) -> Tuple[parser.Node, int]: '''This function locates things like σ j (π k(R)) and converts them into π k(σ j (R))''' - changes = 0 - if n.name == SELECTION and n.child.name == PROJECTION: - changes = 1 - temp = n.prop - n.prop = n.child.prop - n.child.prop = temp - n.name = PROJECTION - n.child.name = SELECTION + child = parser.Unary( + SELECTION, + n.prop, + n.child.child + ) - return changes + recoursive_scan(selection_inside_projection, n) + return parser.Unary(PROJECTION, n.child.prop, child), 0 + return n, 0 def swap_union_renames(n: parser.Node) -> int: @@ -663,7 +661,7 @@ general_optimizations = [ duplicated_select, down_to_unions_subtractions_intersections, duplicated_projection, - #selection_inside_projection, + selection_inside_projection, #subsequent_renames, #swap_rename_select, futile_union_intersection_subtraction, From 72c4746578042edce393bbf913720ea7d3e4b65c Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 12:05:36 +0200 Subject: [PATCH 10/29] Add function to convert the rename property to a python dict Probably less error prone, and I can remove code duplication. --- relational/parser.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/relational/parser.py b/relational/parser.py index 7f0b682..593272e 100644 --- a/relational/parser.py +++ b/relational/parser.py @@ -24,7 +24,7 @@ # # Language definition here: # http://ltworf.github.io/relational/grammar.html -from typing import Optional, Union, List, Any +from typing import Optional, Union, List, Any, Dict from dataclasses import dataclass from relational import rtypes @@ -233,13 +233,24 @@ class Unary(Node): if self.name == PROJECTION: prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"') elif self.name == RENAME: - prop = '{\"%s\"}' % prop.replace( - ',', '\",\"').replace(ARROW, '\":\"').replace(' ', '') + prop = repr(self.rename_dict()) else: # Selection prop = repr(prop) return '%s.%s(%s)' % (self.child._toPython(), op_functions[self.name], prop) + def rename_dict(self) -> Dict[str, str]: + ''' + Returns the dictionary that the rename operation wants + ''' + if self.name != RENAME: + raise ValueError('This is only supported on rename nodes') + r = {} + for i in self.prop.split(','): + q = i.split(ARROW) + r[q[0].strip()] = q[1].strip() + return r + def parse_tokens(expression: List[Union[list, str]]) -> Node: From 6208333f48e851d7c220003f0e241845bb589761 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 12:32:09 +0200 Subject: [PATCH 11/29] Use correct symbols The unicode symbols are some strange script, they look similar but are not the actual symbols for math operations. They are leftovers from version 1. The actual values were fixed but not the comments. --- relational/optimizations.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index ee6c94f..f018f5b 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -126,14 +126,14 @@ def duplicated_select(n: parser.Node) -> Tuple[parser.Node, int]: def futile_union_intersection_subtraction(n: parser.Node) -> Tuple[parser.Node, int]: - '''This function locates things like r ᑌ r, and replaces them with r. - R ᑌ R --> R - R ᑎ R --> R + '''This function locates things like r ∪ r, and replaces them with r. + R ∪ R --> R + R ∩ R --> R R - R --> σ False (R) σ k (R) - R --> σ False (R) R - σ k (R) --> σ not k (R) - σ k (R) ᑌ R --> R - σ k (R) ᑎ R --> σ k (R) + σ k (R) ∪ R --> R + σ k (R) ∩ R --> σ k (R) ''' changes = 0 @@ -176,9 +176,9 @@ def futile_union_intersection_subtraction(n: parser.Node) -> Tuple[parser.Node, def down_to_unions_subtractions_intersections(n: parser.Node) -> Tuple[parser.Node, int]: - '''This funcion locates things like σ i==2 (c ᑌ d), where the union + '''This funcion locates things like σ i==2 (c ∪ d), where the union can be a subtraction and an intersection and replaces them with - σ i==2 (c) ᑌ σ i==2(d). + σ i==2 (c) ∪ σ i==2(d). ''' changes = 0 _o = (UNION, DIFFERENCE, INTERSECTION) @@ -440,7 +440,7 @@ def swap_rename_select(n: parser.Node) -> int: def select_union_intersect_subtract(n: parser.Node) -> int: - '''This function locates things like σ i(a) ᑌ σ q(a) + '''This function locates things like σ i(a) ∪ σ q(a) and replaces them with σ (i OR q) (a) Removing a O(n²) operation like the union''' changes = 0 From 454f4161bc6e16a25ab29cd12cd318b2765d2680 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 12:34:07 +0200 Subject: [PATCH 12/29] Add test for swap_union_renames --- tests_dir/people_rename.query | 1 + tests_dir/people_rename.result | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests_dir/people_rename.query create mode 100644 tests_dir/people_rename.result diff --git a/tests_dir/people_rename.query b/tests_dir/people_rename.query new file mode 100644 index 0000000..b2fba90 --- /dev/null +++ b/tests_dir/people_rename.query @@ -0,0 +1 @@ +ρ name➡n,age➡a(σTrue(people)) ∪ ρ age➡a,name➡n(people) diff --git a/tests_dir/people_rename.result b/tests_dir/people_rename.result new file mode 100644 index 0000000..6e4a1ba --- /dev/null +++ b/tests_dir/people_rename.result @@ -0,0 +1,9 @@ +id,n,chief,a +0,jack,0,22 +1,carl,0,20 +2,john,1,30 +3,dean,1,33 +4,eve,0,25 +5,duncan,4,30 +6,paul,4,30 +7,alia,1,28 From 4a56b8eaac113ff79182c60d7f7ae3f45aac5b1e Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 12:38:29 +0200 Subject: [PATCH 13/29] swap_union_renames --- relational/optimizations.py | 43 +++++++++---------------------------- 1 file changed, 10 insertions(+), 33 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index f018f5b..19dda72 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -216,42 +216,19 @@ def selection_inside_projection(n: parser.Node) -> Tuple[parser.Node, int]: return n, 0 -def swap_union_renames(n: parser.Node) -> int: +def swap_union_renames(n: parser.Node) -> Tuple[parser.Node, int]: '''This function locates things like - ρ a➡b(R) ᑌ ρ a➡b(Q) + ρ a➡b(R) ∪ ρ a➡b(Q) and replaces them with - ρ a➡b(R ᑌ Q). + ρ a➡b(R ∪ Q). Does the same with subtraction and intersection''' - changes = 0 - - if n.name in (DIFFERENCE, UNION, INTERSECTION) and n.left.name == n.right.name and n.left.name == RENAME: - l_vars = {} - for i in n.left.prop.split(','): - q = i.split(ARROW) - l_vars[q[0].strip()] = q[1].strip() - - r_vars = {} - for i in n.right.prop.split(','): - q = i.split(ARROW) - r_vars[q[0].strip()] = q[1].strip() - + if n.name in (DIFFERENCE, UNION, INTERSECTION) and n.left.name == RENAME and n.right.name == RENAME: + l_vars = n.left.rename_dict() + r_vars = n.right.rename_dict() if r_vars == l_vars: - changes = 1 - - # Copying self, but child will be child of renames - q = parser.Node() - q.name = n.name - q.kind = parser.BINARY - q.left = n.left.child - q.right = n.right.child - - n.name = RENAME - n.kind = parser.UNARY - n.child = q - n.prop = n.left.prop - n.left = n.right = None - - return changes + recoursive_scan(swap_union_renames, n) + child = parser.Binary(n.name, n.left.child, n.right.child) + return parser.Unary(RENAME, n.left.prop, child), 1 + return n, 0 def futile_renames(n: parser.Node) -> int: @@ -665,7 +642,7 @@ general_optimizations = [ #subsequent_renames, #swap_rename_select, futile_union_intersection_subtraction, - #swap_union_renames, + swap_union_renames, #swap_rename_projection, #select_union_intersect_subtract, #union_and_product, From e4d62e957135608a88a27ccc4de4401723692680 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 15:34:40 +0200 Subject: [PATCH 14/29] futile_renames, subsequent_renames --- relational/optimizations.py | 100 +++++++++++++++++------------------- 1 file changed, 47 insertions(+), 53 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index 19dda72..362cd16 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -231,73 +231,66 @@ def swap_union_renames(n: parser.Node) -> Tuple[parser.Node, int]: return n, 0 -def futile_renames(n: parser.Node) -> int: - '''This function purges renames like id->id''' - changes = 0 +def futile_renames(n: parser.Node) -> Tuple[parser.Node, int]: + '''This function purges renames like + ρ id->id,a->q (A) + into + ρ a->q (A) + or removes the operation entirely if they all get removed + ''' if n.name == RENAME: - # Located two nested renames. - changes = 1 + renames = n.rename_dict() + changes = False + for k, v in renames.items(): + if k == v: + changes = True + del renames[k] + if len(renames) == 0: # Nothing to rename, removing the rename + return n.child, 1 + elif changes: + # Changing the node in place, no need to return to cause a recursive step + n.prop = ','.join(f'{k}{ARROW}{v}' for k, v in renames.items()) - # Creating a dictionary with the attributes - _vars = {} - for i in n.prop.split(','): - q = i.split(ARROW) - _vars[q[0].strip()] = q[1].strip() - # Scans dictionary to locate things like "a->b,b->c" and replace them - # with "a->c" - for key in list(_vars.keys()): - value = _vars.get(key) - if key == value: - _vars.pop(value) # Removes the unused one - - if len(_vars) == 0: # Nothing to rename, removing the rename op - replace_node(n, n.child) - else: - n.prop = ','.join('%s%s%s' % (i[0], ARROW, i[1]) for i in _vars.items()) - - return changes + recoursive_scan(futile_renames, n) + return n, 0 -def subsequent_renames(n: parser.Node) -> int: - '''This function removes redoundant subsequent renames joining them into one''' - - '''Purges renames like id->id Since it's needed to be performed BEFORE this one - so it is not in the list with the other optimizations''' - futile_renames(n) - changes = 0 - +def subsequent_renames(n: parser.Node) -> Tuple[parser.Node, int]: + '''This function removes redundant subsequent renames joining them into one + ρ .. ρ .. (A) + into + ρ ... (A) + ''' if n.name == RENAME and n.child.name == RENAME: # Located two nested renames. - changes = 1 - # Joining the attribute into one - n.prop += ',' + n.child.prop - n.child = n.child.child + prop = n.prop + ',' + n.child.prop + child = n.child.child + n = parser.Unary(RENAME, prop, child) # Creating a dictionary with the attributes - _vars = {} - for i in n.prop.split(','): - q = i.split(ARROW) - _vars[q[0].strip()] = q[1].strip() + renames = n.rename_dict() + # Scans dictionary to locate things like "a->b,b->c" and replace them # with "a->c" - for key in list(_vars.keys()): - value = _vars.get(key) - if value in _vars.keys(): - if _vars[value] != key: + changes = False + for key, value in tuple(renames.items()): + if value in renames: + changes = True + if renames[value] != key: # Double rename on attribute - _vars[key] = _vars[_vars[key]] # Sets value - _vars.pop(value) # Removes the unused one + renames[key] = renames[renames[key]] # Sets value + renames.pop(value) # Removes the unused one else: # Cycle rename a->b,b->a - _vars.pop(value) # Removes the unused one - _vars.pop(key) # Removes the unused one + renames.pop(value) # Removes the unused one + renames.pop(key) # Removes the unused one - if len(_vars) == 0: # Nothing to rename, removing the rename op - replace_node(n, n.child) - else: - n.prop = ','.join('%s%s%s' % (i[0], ARROW, i[1]) for i in _vars.items()) + if len(renames) == 0: # Nothing to rename, removing the rename op + return n, 1 + elif changes: + n.prop = ','.join(f'{k}{ARROW}{v}' for k, v in renames.items()) + return n, 1 - return changes + recoursive_scan(subsequent_renames, n) + return n, 0 class level_string(str): @@ -639,7 +632,8 @@ general_optimizations = [ down_to_unions_subtractions_intersections, duplicated_projection, selection_inside_projection, - #subsequent_renames, + subsequent_renames, + futile_renames, #swap_rename_select, futile_union_intersection_subtraction, swap_union_renames, From b3b5afec637fc58004aa471ad75a35b513637035 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 15:56:16 +0200 Subject: [PATCH 15/29] Some convenience methods --- relational/optimizations.py | 12 ++++++------ relational/parser.py | 23 +++++++++++++++++++++-- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index 362cd16..63b6810 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -223,8 +223,8 @@ def swap_union_renames(n: parser.Node) -> Tuple[parser.Node, int]: ρ a➡b(R ∪ Q). Does the same with subtraction and intersection''' if n.name in (DIFFERENCE, UNION, INTERSECTION) and n.left.name == RENAME and n.right.name == RENAME: - l_vars = n.left.rename_dict() - r_vars = n.right.rename_dict() + l_vars = n.left.get_rename_prop() + r_vars = n.right.get_rename_prop() if r_vars == l_vars: child = parser.Binary(n.name, n.left.child, n.right.child) return parser.Unary(RENAME, n.left.prop, child), 1 @@ -240,7 +240,7 @@ def futile_renames(n: parser.Node) -> Tuple[parser.Node, int]: or removes the operation entirely if they all get removed ''' if n.name == RENAME: - renames = n.rename_dict() + renames = n.get_rename_prop() changes = False for k, v in renames.items(): if k == v: @@ -250,7 +250,7 @@ def futile_renames(n: parser.Node) -> Tuple[parser.Node, int]: return n.child, 1 elif changes: # Changing the node in place, no need to return to cause a recursive step - n.prop = ','.join(f'{k}{ARROW}{v}' for k, v in renames.items()) + n.set_rename_prop(renames) return n, 0 @@ -286,8 +286,8 @@ def subsequent_renames(n: parser.Node) -> Tuple[parser.Node, int]: if len(renames) == 0: # Nothing to rename, removing the rename op return n, 1 - elif changes: - n.prop = ','.join(f'{k}{ARROW}{v}' for k, v in renames.items()) + else: + n.set_rename_prop(renames) return n, 1 return n, 0 diff --git a/relational/parser.py b/relational/parser.py index 593272e..a5d6d34 100644 --- a/relational/parser.py +++ b/relational/parser.py @@ -233,13 +233,23 @@ class Unary(Node): if self.name == PROJECTION: prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"') elif self.name == RENAME: - prop = repr(self.rename_dict()) + prop = repr(self.get_rename_prop()) else: # Selection prop = repr(prop) return '%s.%s(%s)' % (self.child._toPython(), op_functions[self.name], prop) - def rename_dict(self) -> Dict[str, str]: + def get_projection_prop(self) -> List[str]: + if self.name != PROJECTION: + raise ValueError('This is only supported on projection nodes') + return [i.strip() for i in self.prop.split(',')] + + def set_projection_prop(self, p: List[str]) -> None: + if self.name != PROJECTION: + raise ValueError('This is only supported on projection nodes') + self.prop = ','.join(p) + + def get_rename_prop(self) -> Dict[str, str]: ''' Returns the dictionary that the rename operation wants ''' @@ -251,6 +261,15 @@ class Unary(Node): r[q[0].strip()] = q[1].strip() return r + def set_rename_prop(self, renames: Dict[str, str]) -> None: + ''' + Sets the prop field based on the dictionary for renames + ''' + if self.name != RENAME: + raise ValueError('This is only supported on rename nodes') + self.prop = ','.join(f'{k}{ARROW}{v}' for k, v in renames.items()) + + def parse_tokens(expression: List[Union[list, str]]) -> Node: From b17bb103f67ea91dc2b50753922dae17331833e3 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 18:26:03 +0200 Subject: [PATCH 16/29] Small fixes --- relational/optimizations.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index 63b6810..53d0977 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -268,24 +268,23 @@ def subsequent_renames(n: parser.Node) -> Tuple[parser.Node, int]: n = parser.Unary(RENAME, prop, child) # Creating a dictionary with the attributes - renames = n.rename_dict() + renames = n.get_rename_prop() # Scans dictionary to locate things like "a->b,b->c" and replace them # with "a->c" - changes = False for key, value in tuple(renames.items()): + if value in renames: - changes = True if renames[value] != key: # Double rename on attribute renames[key] = renames[renames[key]] # Sets value - renames.pop(value) # Removes the unused one + del renames[value] # Removes the unused one else: # Cycle rename a->b,b->a - renames.pop(value) # Removes the unused one - renames.pop(key) # Removes the unused one + del renames[value] # Removes the unused one + del renames[key] # Removes the unused one if len(renames) == 0: # Nothing to rename, removing the rename op - return n, 1 + return n.child, 1 else: n.set_rename_prop(renames) return n, 1 From cac990b598e6dfc72c8d3b5fe29efd5426df2657 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 18:27:11 +0200 Subject: [PATCH 17/29] swap_rename_projection --- relational/optimizations.py | 56 +++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index 53d0977..e274b7c 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -329,46 +329,40 @@ def tokenize_select(expression): return l -def swap_rename_projection(n: parser.Node) -> int: - '''This function locates things like π k(ρ j(R)) - and replaces them with ρ j(π k(R)). +def swap_rename_projection(n: parser.Node) -> Tuple[parser.Node, int]: + '''This function locates things like + π k(ρ j(R)) + and replaces them with + ρ j(π k(R)). This will let rename work on a hopefully smaller set and more important, will hopefully allow further optimizations. - Will also eliminate fields in the rename that are cutted in the projection. + + Will also eliminate fields in the rename that are cut in the projection. ''' - changes = 0 if n.name == PROJECTION and n.child.name == RENAME: - changes = 1 - # π index,name(ρ id➡index(R)) - _vars = {} - for i in n.child.prop.split(','): - q = i.split(ARROW) - _vars[q[1].strip()] = q[0].strip() + renames = n.child.get_rename_prop() + projections = set(n.get_projection_prop()) - _pr = n.prop.split(',') - for i in range(len(_pr)): - try: - _pr[i] = _vars[_pr[i].strip()] - except: - pass + # Use pre-rename names in the projection + for k, v in renames.items(): + if v in projections: + projections.remove(v) + projections.add(k) - _pr_reborn = n.prop.split(',') - for i in list(_vars.keys()): - if i not in _pr_reborn: - _vars.pop(i) - n.name = n.child.name + # Eliminate fields + for i in list(renames.keys()): + if i not in projections: + del renames[i] - n.prop = ','.join('%s%s%s' % (i[1], ARROW, i[0]) for i in _vars.items()) + child = parser.Unary(PROJECTION,'' , n.child.child) + child.set_projection_prop(projections) + n = parser.Unary(RENAME, '', child) + n.set_rename_prop(renames) + return n, 1 - n.child.name = PROJECTION - n.child.prop = '' - for i in _pr: - n.child.prop += i + ',' - n.child.prop = n.child.prop[:-1] - - return changes + recoursive_scan(swap_rename_projection, n) + return n, 0 def swap_rename_select(n: parser.Node) -> int: @@ -636,7 +630,7 @@ general_optimizations = [ #swap_rename_select, futile_union_intersection_subtraction, swap_union_renames, - #swap_rename_projection, + swap_rename_projection, #select_union_intersect_subtract, #union_and_product, ] From 2ad28ba9ad4b880eba0f6e1bd44542a7b2325a1e Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 18:40:01 +0200 Subject: [PATCH 18/29] Add test to hit swap_rename_select --- tests_dir/people_rename_select.query | 1 + tests_dir/people_rename_select.result | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 tests_dir/people_rename_select.query create mode 100644 tests_dir/people_rename_select.result diff --git a/tests_dir/people_rename_select.query b/tests_dir/people_rename_select.query new file mode 100644 index 0000000..d728fc8 --- /dev/null +++ b/tests_dir/people_rename_select.query @@ -0,0 +1 @@ +σ i%2==0 (ρ id➡i (people)) diff --git a/tests_dir/people_rename_select.result b/tests_dir/people_rename_select.result new file mode 100644 index 0000000..83a690c --- /dev/null +++ b/tests_dir/people_rename_select.result @@ -0,0 +1,5 @@ +i,name,chief,age +0,jack,0,22 +2,john,1,30 +4,eve,0,25 +6,paul,4,30 From 34ed9405ea0d673fc63224cd88a232cc414a2adf Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 19:05:20 +0200 Subject: [PATCH 19/29] swap_rename_select --- relational/optimizations.py | 47 +++++++++++++++---------------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index e274b7c..a23f7ed 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -366,40 +366,31 @@ def swap_rename_projection(n: parser.Node) -> Tuple[parser.Node, int]: def swap_rename_select(n: parser.Node) -> int: - '''This function locates things like σ k(ρ j(R)) and replaces - them with ρ j(σ k(R)). Renaming the attributes used in the + '''This function locates things like + σ k(ρ j(R)) + and replaces them with + ρ j(σ k(R)). + Renaming the attributes used in the selection, so the operation is still valid.''' - changes = 0 if n.name == SELECTION and n.child.name == RENAME: - changes = 1 - # Dictionary containing attributes of rename - _vars = {} - for i in n.child.prop.split(','): - q = i.split(ARROW) - _vars[q[1].strip()] = q[0].strip() + # This is an inverse mapping for the rename + renames = {v: k for k, v in n.child.get_rename_prop().items()} # tokenizes expression in select - _tokens = tokenize_select(n.prop) + tokens = tokenize_select(n.prop) - # Renaming stuff - for i in range(len(_tokens)): - splitted = _tokens[i].split('.', 1) - if splitted[0] in _vars: - if len(splitted) == 1: - _tokens[i] = _vars[_tokens[i].split('.')[0]] - else: - _tokens[i] = _vars[ - _tokens[i].split('.')[0]] + '.' + splitted[1] + # Renaming stuff, no enum because I edit the tokens + for i in range(len(tokens)): + splitted = tokens[i].split('.', 1) + if splitted[0] in renames: + tokens[i] = renames[splitted[0]] + if len(splitted) > 1: + tokens[i] += '.' + splitted[1] - # Swapping operators - n.name = RENAME - n.child.name = SELECTION - - n.prop = n.child.prop - n.child.prop = ' '.join(_tokens) - - return changes + recoursive_scan(swap_rename_select, n) + child = parser.Unary(SELECTION, ' '.join(tokens), n.child.child) + return parser.Unary(RENAME, n.child.prop, child), 1 + return n, 0 def select_union_intersect_subtract(n: parser.Node) -> int: @@ -627,7 +618,7 @@ general_optimizations = [ selection_inside_projection, subsequent_renames, futile_renames, - #swap_rename_select, + swap_rename_select, futile_union_intersection_subtraction, swap_union_renames, swap_rename_projection, From 7806a0a27e3e61059091e748d0a15acae97cd50e Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 19:15:34 +0200 Subject: [PATCH 20/29] union_and_product_ --- relational/optimizations.py | 40 +++++++++++-------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index a23f7ed..d38976e 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -433,40 +433,24 @@ def select_union_intersect_subtract(n: parser.Node) -> int: return changes + recoursive_scan(select_union_intersect_subtract, n) -def union_and_product(n: parser.Node) -> int: +def union_and_product(n: parser.Node) -> Tuple[parser.Node, int]: ''' A * B ∪ A * C = A * (B ∪ C) Same thing with inner join ''' - - changes = 0 if n.name == UNION and n.left.name in {PRODUCT, JOIN} and n.left.name == n.right.name: - newnode = parser.Node() - newnode.kind = parser.BINARY - newnode.name = n.left.name - - newchild = parser.Node() - newchild.kind = parser.BINARY - newchild.name = UNION - if n.left.left == n.right.left or n.left.left == n.right.right: - newnode.left = n.left.left - newnode.right = newchild - - newchild.left = n.left.right - newchild.right = n.right.left if n.left.left == n.right.right else n.right.right - replace_node(n, newnode) - changes = 1 + l = n.left.right + r = n.right.left if n.left.left == n.right.right else n.right.right + newchild = parser.Binary(UNION, l, r) + return parser.Binary(n.left.name, n.left.left, newchild), 1 elif n.left.right == n.right.left or n.left.left == n.right.right: - newnode.left = n.left.right - newnode.right = newchild - - newchild.left = n.left.left - newchild.right = n.right.left if n.right.left == n.right.right else n.right.right - replace_node(n, newnode) - changes = 1 - return changes + recoursive_scan(union_and_product, n) + l = n.left.left + r = n.right.left if n.right.left == n.right.right else n.right.right + newchild = parser.Binary(UNION, l, r) + return parser.Binary(n.left.name, n.left.right, newchild), 1 + return n, 0 def projection_and_union(n, rels): @@ -498,7 +482,7 @@ def projection_and_union(n, rels): newnode.prop = n.right.prop replace_node(n, newnode) changes = 1 - return changes + recoursive_scan(projection_and_union, n, rels) + return n, 0 def selection_and_product(n, rels): @@ -623,7 +607,7 @@ general_optimizations = [ swap_union_renames, swap_rename_projection, #select_union_intersect_subtract, - #union_and_product, + union_and_product, ] specific_optimizations = [ #selection_and_product, From ff2f890b984984a94302fd87331de65f99c714ab Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 19:16:02 +0200 Subject: [PATCH 21/29] Remove legacy thing --- relational/optimizations.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index d38976e..eeb7fcb 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -614,6 +614,3 @@ specific_optimizations = [ #projection_and_union, #useless_projection, ] - -if __name__ == "__main__": - print (tokenize_select("skill == 'C' and id % 2 == 0")) From a2111a90c12bfc8fb1b44610af22b5a809cf5fcf Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 19:27:47 +0200 Subject: [PATCH 22/29] select_union_intersect_subtract --- relational/optimizations.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index eeb7fcb..b372c84 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -394,21 +394,19 @@ def swap_rename_select(n: parser.Node) -> int: def select_union_intersect_subtract(n: parser.Node) -> int: - '''This function locates things like σ i(a) ∪ σ q(a) - and replaces them with σ (i OR q) (a) + '''This function locates things like + σ i(a) ∪ σ q(a) + and replaces them with + σ (i OR q) (a) Removing a O(n²) operation like the union''' - changes = 0 if n.name in {UNION, INTERSECTION, DIFFERENCE} and \ n.left.name == SELECTION and \ n.right.name == SELECTION and \ n.left.child == n.right.child: - changes = 1 d = {UNION: 'or', INTERSECTION: 'and', DIFFERENCE: 'and not'} op = d[n.name] - newnode = parser.Node() - if n.left.prop.startswith('(') or n.right.prop.startswith('('): t_str = '(' if n.left.prop.startswith('('): @@ -422,15 +420,11 @@ def select_union_intersect_subtract(n: parser.Node) -> int: t_str += '%s' t_str += ')' - newnode.prop = t_str % (n.left.prop, op, n.right.prop) + prop = t_str % (n.left.prop, op, n.right.prop) else: - newnode.prop = '%s %s %s' % (n.left.prop, op, n.right.prop) - newnode.name = SELECTION - newnode.child = n.left.child - newnode.kind = parser.UNARY - replace_node(n, newnode) - - return changes + recoursive_scan(select_union_intersect_subtract, n) + prop = '%s %s %s' % (n.left.prop, op, n.right.prop) + return parser.Unary(SELECTION, prop, n.left.child), 1 + return n, 0 def union_and_product(n: parser.Node) -> Tuple[parser.Node, int]: @@ -606,7 +600,7 @@ general_optimizations = [ futile_union_intersection_subtraction, swap_union_renames, swap_rename_projection, - #select_union_intersect_subtract, + select_union_intersect_subtract, union_and_product, ] specific_optimizations = [ From 4337e6073e2657a8fa1408a0990d2b08cdbfc244 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 19:31:56 +0200 Subject: [PATCH 23/29] CHANGELOG --- CHANGELOG | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 605c77a..5168f1d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,7 @@ 3.0 - Refactored parser to use better typing - -2.6 +- Refactored and fixed some optimizations +- Added more test cases - Improved survey sending - Prevent relation/field names from being reserved keywords - Fixed issue in cli where loading an invalid file would lead to a crash From 67d050e07de168c64640d03892edd0fe26145d18 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 19:34:26 +0200 Subject: [PATCH 24/29] Execute context optimizations --- relational/optimizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/relational/optimizer.py b/relational/optimizer.py index 30c24bd..7ff62b1 100644 --- a/relational/optimizer.py +++ b/relational/optimizer.py @@ -82,10 +82,10 @@ def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool total = 0 if specific: for i in optimizations.specific_optimizations: - res = i(n, rels) # Performs the optimization - if res != 0 and dbg: + n, c = recursive_scan(i, n, rels) + if c != 0 and dbg: debug.append(str(n)) - total += res + total += c if general: for i in optimizations.general_optimizations: n, c = recursive_scan(i, n, None) From f139257d2c8be8e0240efcc099100d504f815f90 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 19:37:00 +0200 Subject: [PATCH 25/29] Typing --- relational/optimizer.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/relational/optimizer.py b/relational/optimizer.py index 7ff62b1..7a4c6d7 100644 --- a/relational/optimizer.py +++ b/relational/optimizer.py @@ -24,21 +24,20 @@ # the parse tree itself will be modified accordingly. from typing import Union, Optional, Dict, Any, Tuple +from relational.relation import Relation from relational import optimizations from relational.parser import Node, Variable, Unary, Binary, op_functions, tokenize, tree from relational import querysplit from relational.maintenance import UserInterface -ContextDict = Dict[str,Any] - -def optimize_program(code, rels: ContextDict): +def optimize_program(code, rels: Dict[str, Relation]): ''' Optimize an entire program, composed by multiple expressions and assignments. ''' lines = code.split('\n') - context = {} # type: ContextDict + context = {} for line in lines: line = line.strip() @@ -53,7 +52,7 @@ def optimize_program(code, rels: ContextDict): return querysplit.split(node, rels) -def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]: +def optimize_all(expression: Union[str, Node], rels: Dict[str, Relation], specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]: '''This function performs all the available optimizations. expression : see documentation of this module @@ -98,7 +97,7 @@ def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool return n -def specific_optimize(expression, rels: ContextDict): +def specific_optimize(expression, rels: Dict[str, Relation]): '''This function performs specific optimizations. Means that it will need to know the fields used by the relations. From b987180d1cc2e412c61d069d98a9992abfccca00 Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 19:37:57 +0200 Subject: [PATCH 26/29] useless_projection --- relational/optimizations.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index b372c84..1ce87f0 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -30,9 +30,9 @@ from io import StringIO from tokenize import generate_tokens -from typing import Tuple - +from typing import Tuple, Dict +from relational.relation import Relation from relational import parser sel_op = ( @@ -577,17 +577,15 @@ def selection_and_product(n, rels): return changes + recoursive_scan(selection_and_product, n, rels) -def useless_projection(n, rels) -> int: +def useless_projection(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]: ''' Removes projections that are over all the fields ''' - changes = 0 if n.name == PROJECTION and \ set(n.child.result_format(rels)) == set(i.strip() for i in n.prop.split(',')): - changes = 1 - replace_node(n, n.child) + return n.child, 1 - return changes + recursive_scan(useless_projection, n, rels) + return n, 0 general_optimizations = [ duplicated_select, @@ -606,5 +604,5 @@ general_optimizations = [ specific_optimizations = [ #selection_and_product, #projection_and_union, - #useless_projection, + useless_projection, ] From f4a8344f3ed781e67734e8b081c0e8c0f07d85da Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 19:43:56 +0200 Subject: [PATCH 27/29] projection_and_union --- relational/optimizations.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index 1ce87f0..bdf31aa 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -447,7 +447,7 @@ def union_and_product(n: parser.Node) -> Tuple[parser.Node, int]: return n, 0 -def projection_and_union(n, rels): +def projection_and_union(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]: ''' Turns π a,b,c(A) ∪ π a,b,c(B) @@ -462,20 +462,9 @@ def projection_and_union(n, rels): n.left.name == PROJECTION and \ n.right.name == PROJECTION and \ set(n.left.child.result_format(rels)) == set(n.right.child.result_format(rels)): - newchild = parser.Node() - newchild.kind = parser.BINARY - newchild.name = UNION - newchild.left = n.left.child - newchild.right = n.right.child - - newnode = parser.Node() - newnode.child = newchild - newnode.kind = parser.UNARY - newnode.name = PROJECTION - newnode.prop = n.right.prop - replace_node(n, newnode) - changes = 1 + child = parser.Binary(UNION, n.left.child, n.right.child) + return parser.Unary(PROJECTION, n.right.prop, child), 0 return n, 0 @@ -603,6 +592,6 @@ general_optimizations = [ ] specific_optimizations = [ #selection_and_product, - #projection_and_union, + projection_and_union, useless_projection, ] From f4a4b847afd8a2bb61b107c83b8a1c3b07952cff Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 22:47:54 +0200 Subject: [PATCH 28/29] selection_and_product partial --- relational/optimizations.py | 62 ++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index bdf31aa..6d61150 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -468,11 +468,10 @@ def projection_and_union(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[par return n, 0 -def selection_and_product(n, rels): +def selection_and_product(n: parser.Node, rels: Dict[str, Relation]) -> parser.Node: '''This function locates things like σ k (R*Q) and converts them into σ l (σ j (R) * σ i (Q)). Where j contains only attributes belonging to R, i contains attributes belonging to Q and l contains attributes belonging to both''' - changes = 0 if n.name == SELECTION and n.child.name in (PRODUCT, JOIN): l_attr = n.child.left.result_format(rels) @@ -518,52 +517,51 @@ def selection_and_product(n, rels): # Preparing left selection if len(left) > 0: - changes = 1 - l_node = parser.Node() - l_node.name = SELECTION - l_node.kind = parser.UNARY - l_node.child = n.child.left - l_node.prop = '' - n.child.left = l_node + l_prop = '' while len(left) > 0: c = left.pop(0) for i in c: - l_node.prop += i + ' ' + l_prop += i + ' ' if len(left) > 0: - l_node.prop += ' and ' - if '(' in l_node.prop: - l_node.prop = '(%s)' % l_node.prop + l_prop += ' and ' + if '(' in l_prop: + l_prop = '(%s)' % l_prop + l_node = parser.Unary(SELECTION, l_prop, n.child.left) + else: + l_node = n.child.left # Preparing right selection if len(right) > 0: - changes = 1 - r_node = parser.Node() - r_node.name = SELECTION - r_node.prop = '' - r_node.kind = parser.UNARY - r_node.child = n.child.right - n.child.right = r_node + + r_prop = '' while len(right) > 0: c = right.pop(0) - r_node.prop += ' '.join(c) + r_prop += ' '.join(c) if len(right) > 0: - r_node.prop += ' and ' - if '(' in r_node.prop: - r_node.prop = '(%s)' % r_node.prop + r_prop += ' and ' + if '(' in r_prop: + r_prop = '(%s)' % r_prop + r_node = parser.Unary(SELECTION, r_prop, n.child.right) + else: + r_node = n.child.right + + b_node = parser.Binary(n.child.name, l_node, r_node) + # Changing main selection - n.prop = '' + both_prop = '' if len(both) != 0: while len(both) > 0: c = both.pop(0) - n.prop += ' '.join(c) + both_prop += ' '.join(c) if len(both) > 0: - n.prop += ' and ' - if '(' in n.prop: - n.prop = '(%s)' % n.prop + both_prop += ' and ' + if '(' in both_prop: + both_prop = '(%s)' % both_prop + return parser.Unary(SELECTION, both_prop, b_node), 1 else: # No need for general select - replace_node(n, n.child) + return b_node, 1 - return changes + recoursive_scan(selection_and_product, n, rels) + return n, 0 def useless_projection(n: parser.Node, rels: Dict[str, Relation]) -> Tuple[parser.Node, int]: @@ -591,7 +589,7 @@ general_optimizations = [ union_and_product, ] specific_optimizations = [ - #selection_and_product, + selection_and_product, projection_and_union, useless_projection, ] From d7145171de13707edbc31213b87a39b03ceebbcb Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Tue, 9 Jun 2020 23:47:04 +0200 Subject: [PATCH 29/29] Finish fixing the optimization I wrote it ages ago, I was not as skilled as I am now. --- relational/optimizations.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/relational/optimizations.py b/relational/optimizations.py index 6d61150..2f3cfe3 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -506,17 +506,15 @@ def selection_and_product(n: parser.Node, rels: Dict[str, Relation]) -> parser.N if j in r_attr: # Field in right r_fields = True - if l_fields and r_fields: # Fields in both - both.append(i) - elif l_fields: + if l_fields and not r_fields: left.append(i) - elif r_fields: + elif r_fields and not l_fields: right.append(i) else: # Unknown.. adding in both both.append(i) # Preparing left selection - if len(left) > 0: + if left: l_prop = '' while len(left) > 0: c = left.pop(0) @@ -531,8 +529,7 @@ def selection_and_product(n: parser.Node, rels: Dict[str, Relation]) -> parser.N l_node = n.child.left # Preparing right selection - if len(right) > 0: - + if right: r_prop = '' while len(right) > 0: c = right.pop(0) @@ -548,8 +545,8 @@ def selection_and_product(n: parser.Node, rels: Dict[str, Relation]) -> parser.N b_node = parser.Binary(n.child.name, l_node, r_node) # Changing main selection - both_prop = '' - if len(both) != 0: + if both: + both_prop = '' while len(both) > 0: c = both.pop(0) both_prop += ' '.join(c) @@ -557,7 +554,8 @@ def selection_and_product(n: parser.Node, rels: Dict[str, Relation]) -> parser.N both_prop += ' and ' if '(' in both_prop: both_prop = '(%s)' % both_prop - return parser.Unary(SELECTION, both_prop, b_node), 1 + r = parser.Unary(SELECTION, both_prop, b_node) + return r, len(left) + len(right) else: # No need for general select return b_node, 1