From fc49ba5bbe6e8757c2fa5663dfd8016e1de4504c Mon Sep 17 00:00:00 2001 From: LtWorf Date: Mon, 29 Jun 2009 13:12:45 +0000 Subject: [PATCH] Selection can now accept expressions with parenthesis git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@196 014f5005-505e-4b48-8d0a-63407b615a7c --- CHANGELOG | 3 +- relational/optimizations.py | 82 +++++++++++++++++++++++++++++-------- relational/parser.py | 54 +++++++++++++----------- relational/rtypes.py | 5 ++- 4 files changed, 99 insertions(+), 45 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index d366b8f..4d89646 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -86,4 +86,5 @@ - Implemented futile_union_intersection_subtraction general optimization - Implemented swap_rename_projection general optimization - Replaced old relational algebra to python compiler with new one based on the new tokenizer/parser (Rev 188) -- Code refactory to move the new parser into parser.py out of optimizer.py, that will still be compatible (Rev 190) \ No newline at end of file +- Code refactory to move the new parser into parser.py out of optimizer.py, that will still be compatible (Rev 190) +- Selection can now accept expressions with parenthesis \ No newline at end of file diff --git a/relational/optimizations.py b/relational/optimizations.py index dd659f2..d378d00 100644 --- a/relational/optimizations.py +++ b/relational/optimizations.py @@ -31,6 +31,8 @@ A function will have to return the number of changes performed on the tree. ''' import optimizer +import parser +sel_op=('//=','**=','and','not','in','//','**','<<','>>','==','!=','>=','<=','+=','-=','*=','/=','%=','or','+','-','*','/','&','|','^','~','<','>','%','=','(',')',',','[',']') def replace_node(replace,replacement): '''This function replaces "replace" node with the node "with", @@ -287,44 +289,82 @@ def subsequent_renames(n): return changes+recoursive_scan(subsequent_renames,n) +class level_string(str): + level=0 + def tokenize_select(expression): '''This function returns the list of tokens present in a - selection. The expression can't contain parenthesis.''' - op=('//=','**=','and','not','//','**','<<','>>','==','!=','>=','<=','+=','-=','*=','/=','%=','or','+','-','*','/','&','|','^','~','<','>','%','=') + selection. The expression can contain parenthesis. + It will use a subclass of str with the attribute level, which + will specify the nesting level of the token into parenthesis.''' + + l=0 + while l!=len(expression): + l=len(expression) + if expression.startswith('(') and parser.find_matching_parenthesis(expression)+1==len(expression): + expression= expression[1:-1] + tokens=[] temp='' + level=0 while len(expression)!=0: expression=expression.strip() - if expression[0:3] in op:#3char op - tokens.append(temp) + + if expression[0:1]=='(': #Expression into parenthesis + level+=1 + elif expression[0:1]==')': + level-=1 + + if expression[0:3] in sel_op:#3char op + t=level_string(temp) + t.level=level + tokens.append(t) temp='' - tokens.append(expression[0:3]) + t=level_string(expression[0:3]) + t.level=level + tokens.append(t) expression=expression[3:] - elif expression[0:2] in op:#2char op - tokens.append(temp) + elif expression[0:2] in sel_op:#2char op + t=level_string(temp) + t.level=level + tokens.append(t) temp='' - tokens.append(expression[0:2]) + t=level_string(expression[0:2]) + t.level=level + tokens.append(t) expression=expression[2:] - elif expression[0:1] in op:#1char op - tokens.append(temp) + elif expression[0:1] in sel_op:#1char op + t=level_string(temp) + t.level=level + tokens.append(t) temp='' - tokens.append(expression[0:1]) + t=level_string(expression[0:1]) + t.level=level + tokens.append(t) expression=expression[1:] elif expression[0:1]=="'":#String end=expression.index("'",1) while expression[end-1]=='\\': end=expression.index("'",end+1) - #Add string to list - tokens.append(expression[0:end+1]) + t=level_string(expression[0:end+1]) + t.level=level + tokens.append(t) expression=expression[end+1:] else: temp+=expression[0:1] expression=expression[1:] pass if len(temp)!=0: - tokens.append(temp) + t=level_string(temp) + t.level=level + tokens.append(t) + while True: + try: + tokens.remove('') + except: + break return tokens def swap_rename_projection(n): @@ -425,7 +465,7 @@ def selection_and_product(n,rels): temp=[] for i in tokens: - if i=='and': + if i=='and' and i.level==0: groups.append(temp) temp=[] else: @@ -442,7 +482,7 @@ def selection_and_product(n,rels): l_fields=False #has fields in left? r_fields=False #has fields in left? - for j in i: + for j in set(i).difference(sel_op): j=j.split('.')[0] if j in l_attr:#Field in left l_fields=True @@ -473,6 +513,8 @@ def selection_and_product(n,rels): l_node.prop+=i+ ' ' if len(left)>0: l_node.prop+=' and ' + if '(' in l_node.prop: + l_node.prop='(%s)' % l_node.prop #Preparing right selection if len(right)>0: @@ -489,7 +531,8 @@ def selection_and_product(n,rels): r_node.prop+=i+ ' ' if len(right)>0: r_node.prop+=' and ' - + if '(' in r_node.prop: + r_node.prop='(%s)' % r_node.prop #Changing main selection n.prop='' if len(both)!=0: @@ -498,7 +541,9 @@ def selection_and_product(n,rels): for i in c: n.prop+=i+ ' ' if len(both)>0: - n.prop+=' and ' + n.prop+=' and ' + if '(' in n.prop: + n.prop='(%s)' % n.prop else:#No need for general select replace_node(n,n.child) @@ -506,3 +551,4 @@ def selection_and_product(n,rels): general_optimizations=[duplicated_select,down_to_unions_subtractions_intersections,duplicated_projection,selection_inside_projection,subsequent_renames,swap_rename_select,futile_union_intersection_subtraction,swap_union_renames,swap_rename_projection] specific_optimizations=[selection_and_product] + \ No newline at end of file diff --git a/relational/parser.py b/relational/parser.py index d39acb8..4603026 100644 --- a/relational/parser.py +++ b/relational/parser.py @@ -36,16 +36,17 @@ class node (object): If the node is a binary operator, it will have left and right properties. If the node is a unary operator, it will have a child, pointing to the child node and a prop containing - the string with the props of the operation.''' + the string with the props of the operation. + + This class is used to convert an expression into python code.''' kind=None def __init__(self,expression=None): - + '''Generates the tree from the tokenized expression + If no expression is specified then it will create an empty node''' if expression==None or len(expression)==0: return - '''Generates the tree from the tokenized expression''' - #If the list contains only a list, it will consider the lower level list. #This will allow things like ((((((a))))) to work while len(expression)==1 and isinstance(expression[0],list): @@ -85,7 +86,8 @@ class node (object): return pass def toPython(self): - '''This method converts the expression into python code''' + '''This method converts the expression into python code, which will require the + relation module to be executed.''' if self.name in b_operators: return '%s.%s(%s)' % (self.left.toPython(),op_functions[self.name],self.right.toPython()) elif self.name in u_operators: @@ -136,10 +138,6 @@ class node (object): return _fields elif self.name in ('ᐅᐊ','ᐅLEFTᐊ','ᐅRIGHTᐊ','ᐅFULLᐊ'): return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels)))) - - - pass - def __eq__(self,other): if not (isinstance(other,node) and self.name==other.name and self.kind==other.kind): return False @@ -168,6 +166,19 @@ class node (object): return (le+ self.name +re) +def find_matching_parenthesis(expression,start=0): + '''This function returns the position of the matching + close parenthesis to the 1st open parenthesis found + starting from start (0 by default)''' + par_count=0 #Count of parenthesis + for i in range(start,len(expression)): + if expression[i]=='(': + par_count+=1 + elif expression[i]==')': + par_count-=1 + if par_count==0: + return i #Closing parenthesis of the parameter + def tokenize(expression): '''This function converts an expression into a list where every token of the expression is an item of a list. Expressions into @@ -198,17 +209,7 @@ def tokenize(expression): while len(expression)>0: if expression.startswith('('): #Parenthesis state state=2 - par_count=0 #Count of parenthesis - end=0 - - for i in range(len(expression)): - if expression[i]=='(': - par_count+=1 - elif expression[i]==')': - par_count-=1 - if par_count==0: - end=i - break + end=find_matching_parenthesis(expression) #Appends the tokenization of the content of the parenthesis items.append(tokenize(expression[1:end])) #Removes the entire parentesis and content from the expression @@ -217,9 +218,13 @@ def tokenize(expression): elif expression.startswith("σ") or expression.startswith("π") or expression.startswith("ρ"): #Unary 2 bytes items.append(expression[0:2]) #Adding operator in the top of the list expression=expression[2:].strip() #Removing operator from the expression - par=expression.find('(') - - items.append(expression[:par]) #Inserting parameter of the operator + + if expression.startswith('('): #Expression with parenthesis, so adding what's between open and close without tokenization + par=expression.find('(',find_matching_parenthesis(expression)) + else: #Expression without parenthesis, so adding what's between start and parenthesis as whole + par=expression.find('(') + + items.append(expression[:par].strip()) #Inserting parameter of the operator expression=expression[par:].strip() #Removing parameter from the expression elif expression.startswith("*") or expression.startswith("-"): # Binary 1 byte items.append(expression[0]) @@ -290,5 +295,4 @@ def parse(expr): if __name__=="__main__": while True: e=raw_input("Expression: ") - print parse(e) - \ No newline at end of file + print parse(e) \ No newline at end of file diff --git a/relational/rtypes.py b/relational/rtypes.py index b9ca810..8727251 100644 --- a/relational/rtypes.py +++ b/relational/rtypes.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Relational # Copyright (C) 2008 Salvo "LtWorf" Tomaselli # @@ -16,7 +17,9 @@ # # author Salvo "LtWorf" Tomaselli -'''Custom types for relational algebra''' +'''Custom types for relational algebra. +Purpose of this module is having the isFloat function and +implementing dates to use in selection.''' import datetime