Changed about and README to not point to galileo anymore
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
__all__ = (
|
||||
|
||||
"relation",
|
||||
"parser",
|
||||
"optimizer",
|
||||
"optimizations",
|
||||
"rtypes",
|
||||
"parallel",
|
||||
"relation",
|
||||
"parser",
|
||||
"optimizer",
|
||||
"optimizations",
|
||||
"rtypes",
|
||||
"parallel",
|
||||
|
||||
|
||||
)
|
||||
|
@@ -23,23 +23,21 @@ import httplib
|
||||
import urllib
|
||||
import relation
|
||||
|
||||
|
||||
def send_survey(data):
|
||||
'''Sends the survey. Data must be a dictionary.
|
||||
returns the http response'''
|
||||
|
||||
post=''
|
||||
post = ''
|
||||
for i in data.keys():
|
||||
post+='%s: %s\n' %(i,data[i])
|
||||
|
||||
#sends the string
|
||||
params = urllib.urlencode({'survey':post})
|
||||
headers = {"Content-type": "application/x-www-form-urlencoded","Accept": "text/plain"}
|
||||
#connection = httplib.HTTPConnection('galileo.dmi.unict.it')
|
||||
#connection.request("POST","/~ltworf/survey.php",params,headers)
|
||||
post += '%s: %s\n' % (i, data[i])
|
||||
|
||||
# sends the string
|
||||
params = urllib.urlencode({'survey': post})
|
||||
headers = {"Content-type":
|
||||
"application/x-www-form-urlencoded", "Accept": "text/plain"}
|
||||
connection = httplib.HTTPConnection('feedback-ltworf.appspot.com')
|
||||
connection.request("POST","/feedback/relational",params,headers)
|
||||
|
||||
connection.request("POST", "/feedback/relational", params, headers)
|
||||
|
||||
return connection.getresponse()
|
||||
|
||||
@@ -49,46 +47,47 @@ def check_latest_version():
|
||||
Heavely dependent on server and server configurations
|
||||
not granted to work forever.'''
|
||||
connection = httplib.HTTPConnection('feedback-ltworf.appspot.com')
|
||||
connection.request("GET","/version/relational")
|
||||
r=connection.getresponse()
|
||||
connection.request("GET", "/version/relational")
|
||||
r = connection.getresponse()
|
||||
|
||||
#html
|
||||
s=r.read()
|
||||
if len(s)==0:
|
||||
# html
|
||||
s = r.read()
|
||||
if len(s) == 0:
|
||||
return None
|
||||
return s.strip()
|
||||
|
||||
|
||||
class interface (object):
|
||||
|
||||
'''It is used to provide services to the user interfaces, in order to
|
||||
reduce the amount of duplicated code present in different user interfaces.
|
||||
'''
|
||||
|
||||
def __init__(self):
|
||||
self.rels= {}
|
||||
self.rels = {}
|
||||
|
||||
def load(self,filename,name):
|
||||
def load(self, filename, name):
|
||||
'''Loads a relation from file, and gives it a name to
|
||||
be used in subsequent queries.'''
|
||||
pass
|
||||
|
||||
def unload(self,name):
|
||||
def unload(self, name):
|
||||
'''Unloads an existing relation.'''
|
||||
pass
|
||||
|
||||
def store(self,filename,name):
|
||||
def store(self, filename, name):
|
||||
'''Stores a relation to file.'''
|
||||
pass
|
||||
|
||||
def get_relation(self,name):
|
||||
def get_relation(self, name):
|
||||
'''Returns the relation corresponding to name.'''
|
||||
pass
|
||||
|
||||
def set_relation(self,name,rel):
|
||||
def set_relation(self, name, rel):
|
||||
'''Sets the relation corresponding to name.'''
|
||||
pass
|
||||
|
||||
def execute(self,query,relname='last_'):
|
||||
def execute(self, query, relname='last_'):
|
||||
'''Executes a query, returns the result and if
|
||||
relname is not None, adds the result to the
|
||||
dictionary, with the name given in relname.'''
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -2,20 +2,20 @@
|
||||
# coding=UTF-8
|
||||
# Relational
|
||||
# Copyright (C) 2008 Salvo "LtWorf" Tomaselli
|
||||
#
|
||||
#
|
||||
# Relational is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
#
|
||||
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
|
||||
#
|
||||
# This module optimizes relational expressions into ones that require less time to be executed.
|
||||
@@ -29,23 +29,24 @@ import optimizations
|
||||
import parser
|
||||
|
||||
|
||||
#Stuff that was here before, keeping it for compatibility
|
||||
RELATION=parser.RELATION
|
||||
UNARY=parser.UNARY
|
||||
BINARY=parser.BINARY
|
||||
# Stuff that was here before, keeping it for compatibility
|
||||
RELATION = parser.RELATION
|
||||
UNARY = parser.UNARY
|
||||
BINARY = parser.BINARY
|
||||
|
||||
|
||||
b_operators=parser.b_operators
|
||||
u_operators=parser.u_operators
|
||||
op_functions=parser.op_functions
|
||||
node=parser.node
|
||||
tokenize=parser.tokenize
|
||||
tree=parser.tree
|
||||
#End of the stuff
|
||||
b_operators = parser.b_operators
|
||||
u_operators = parser.u_operators
|
||||
op_functions = parser.op_functions
|
||||
node = parser.node
|
||||
tokenize = parser.tokenize
|
||||
tree = parser.tree
|
||||
# End of the stuff
|
||||
|
||||
def optimize_all(expression,rels,specific=True,general=True,debug=None):
|
||||
|
||||
def optimize_all(expression, rels, specific=True, general=True, debug=None):
|
||||
'''This function performs all the available optimizations.
|
||||
|
||||
|
||||
expression : see documentation of this module
|
||||
rels: dic with relation name as key, and relation istance as value
|
||||
specific: True if it has to perform specific optimizations
|
||||
@@ -53,65 +54,70 @@ def optimize_all(expression,rels,specific=True,general=True,debug=None):
|
||||
debug: if a list is provided here, after the end of the function, it
|
||||
will contain the query repeated many times to show the performed
|
||||
steps.
|
||||
|
||||
|
||||
Return value: this will return an optimized version of the expression'''
|
||||
if isinstance(expression,unicode):
|
||||
n=tree(expression) #Gets the tree
|
||||
elif isinstance(expression,node):
|
||||
n=expression
|
||||
if isinstance(expression, unicode):
|
||||
n = tree(expression) # Gets the tree
|
||||
elif isinstance(expression, node):
|
||||
n = expression
|
||||
else:
|
||||
raise (TypeError("expression must be a unicode string or a node"))
|
||||
|
||||
if isinstance(debug,list):
|
||||
dbg=True
|
||||
|
||||
if isinstance(debug, list):
|
||||
dbg = True
|
||||
else:
|
||||
dbg=False
|
||||
|
||||
total=1
|
||||
while total!=0:
|
||||
total=0
|
||||
dbg = False
|
||||
|
||||
total = 1
|
||||
while total != 0:
|
||||
total = 0
|
||||
if specific:
|
||||
for i in optimizations.specific_optimizations:
|
||||
res=i(n,rels) #Performs the optimization
|
||||
if res!=0 and dbg: debug.append(n.__str__())
|
||||
total+=res
|
||||
res = i(n, rels) # Performs the optimization
|
||||
if res != 0 and dbg:
|
||||
debug.append(n.__str__())
|
||||
total += res
|
||||
if general:
|
||||
for i in optimizations.general_optimizations:
|
||||
res=i(n) #Performs the optimization
|
||||
if res!=0 and dbg: debug.append(n.__str__())
|
||||
total+=res
|
||||
res = i(n) # Performs the optimization
|
||||
if res != 0 and dbg:
|
||||
debug.append(n.__str__())
|
||||
total += res
|
||||
return n.__str__()
|
||||
|
||||
def specific_optimize(expression,rels):
|
||||
|
||||
def specific_optimize(expression, rels):
|
||||
'''This function performs specific optimizations. Means that it will need to
|
||||
know the fields used by the relations.
|
||||
|
||||
|
||||
expression : see documentation of this module
|
||||
rels: dic with relation name as key, and relation istance as value
|
||||
|
||||
|
||||
Return value: this will return an optimized version of the expression'''
|
||||
return optimize_all(expression,rels,specific=True,general=False)
|
||||
|
||||
return optimize_all(expression, rels, specific=True, general=False)
|
||||
|
||||
|
||||
def general_optimize(expression):
|
||||
'''This function performs general optimizations. Means that it will not need to
|
||||
know the fields used by the relations
|
||||
|
||||
expression : see documentation of this module
|
||||
|
||||
Return value: this will return an optimized version of the expression'''
|
||||
return optimize_all(expression,None,specific=False,general=True)
|
||||
|
||||
if __name__=="__main__":
|
||||
#n=node(u"((a ᑌ b) - c ᑌ d) - b")
|
||||
#n=node(u"π a,b (d-a*b)")
|
||||
|
||||
#print n.__str__()
|
||||
#a= tokenize("(a - (a ᑌ b) * π a,b (a-b)) - ρ 123 (a)")
|
||||
#a= tokenize(u"π a,b (a*b)")
|
||||
#a=tokenize("(a-b*c)*(b-c)")
|
||||
|
||||
import relation,optimizations
|
||||
|
||||
expression : see documentation of this module
|
||||
|
||||
Return value: this will return an optimized version of the expression'''
|
||||
return optimize_all(expression, None, specific=False, general=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# n=node(u"((a ᑌ b) - c ᑌ d) - b")
|
||||
# n=node(u"π a,b (d-a*b)")
|
||||
|
||||
# print n.__str__()
|
||||
# a= tokenize("(a - (a ᑌ b) * π a,b (a-b)) - ρ 123 (a)")
|
||||
# a= tokenize(u"π a,b (a*b)")
|
||||
# a=tokenize("(a-b*c)*(b-c)")
|
||||
|
||||
import relation
|
||||
import optimizations
|
||||
|
||||
'''rels={}
|
||||
rels["P1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/people.csv")
|
||||
rels["P2"]= relation.relation("/home/salvo/dev/relational/trunk/samples/people.csv")
|
||||
@@ -120,27 +126,28 @@ if __name__=="__main__":
|
||||
rels["D1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/dates.csv")
|
||||
rels["S1"]= relation.relation("/home/salvo/dev/relational/trunk/samples/skillo.csv")
|
||||
print rels'''
|
||||
n=tree(u"π indice,qq,name (ρ age➡qq,id➡indice (P1-P2))")
|
||||
#n=tree("σ id==3 and indice==2 and name==5 or name<2(P1 * S1)")
|
||||
n = tree(u"π indice,qq,name (ρ age➡qq,id➡indice (P1-P2))")
|
||||
# n=tree("σ id==3 and indice==2 and name==5 or name<2(P1 * S1)")
|
||||
print n
|
||||
print n.toPython()
|
||||
|
||||
#print optimizations.selection_and_product(n,rels)
|
||||
|
||||
|
||||
# print optimizations.selection_and_product(n,rels)
|
||||
|
||||
'''
|
||||
σ skill=='C' (π id,name,chief,age (σ chief==i and age>a (ρ id➡i,age➡a(π id,age(people))*people)) ᐅᐊ skills)
|
||||
(π id,name,chief,age (σ chief == i and age > a ((ρ age➡a,id➡i (π id,age (people)))*people)))ᐅᐊ(σ skill == 'C' (skills))
|
||||
(π id,name,chief,age (σ chief == i and age > a ((ρ age➡a,id➡i (π id,age (people)))*people)))ᐅᐊ(σ skill == 'C' (skills))
|
||||
'''
|
||||
|
||||
#print specific_optimize("σ name==skill and age>21 and id==indice and skill=='C'(P1ᐅᐊS1)",rels)
|
||||
|
||||
#print n
|
||||
#print n.result_format(rels)
|
||||
|
||||
# print specific_optimize("σ name==skill and age>21 and id==indice and
|
||||
# skill=='C'(P1ᐅᐊS1)",rels)
|
||||
|
||||
# print n
|
||||
# print n.result_format(rels)
|
||||
'''σ k (r) ᑌ r with r
|
||||
σ k (r) ᑎ r with σ k (r)'''
|
||||
|
||||
#a=general_optimize('π indice,qq,name (ρ age➡qq,id➡indice (P1-P2))')
|
||||
#a=general_optimize("σ i==2 (σ b>5 (d))")
|
||||
#print a
|
||||
#print node(a)
|
||||
#print tokenize("(a)")
|
||||
|
||||
# a=general_optimize('π indice,qq,name (ρ age➡qq,id➡indice (P1-P2))')
|
||||
# a=general_optimize("σ i==2 (σ b>5 (d))")
|
||||
# print a
|
||||
# print node(a)
|
||||
# print tokenize("(a)")
|
||||
|
@@ -1,111 +1,117 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Relational
|
||||
# Copyright (C) 2009 Salvo "LtWorf" Tomaselli
|
||||
#
|
||||
#
|
||||
# Relational is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
#
|
||||
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
|
||||
#
|
||||
#
|
||||
# This module offers capability of executing relational queries in parallel.
|
||||
|
||||
import optimizer
|
||||
import multiprocessing
|
||||
import parser
|
||||
|
||||
def execute(tree,rels):
|
||||
|
||||
def execute(tree, rels):
|
||||
'''This funcion executes a query in parallel.
|
||||
Tree is the tree describing the query (usually obtained with
|
||||
parser.tree(querystring)
|
||||
rels is a dictionary containing the relations associated with the names'''
|
||||
|
||||
q = multiprocessing.Queue()
|
||||
p = multiprocessing.Process(target=__p_exec__, args=(tree,rels,q,))
|
||||
p = multiprocessing.Process(target=__p_exec__, args=(tree, rels, q,))
|
||||
p.start()
|
||||
result= q.get()
|
||||
result = q.get()
|
||||
p.join()
|
||||
return result
|
||||
|
||||
def __p_exec__(tree,rels,q):
|
||||
|
||||
def __p_exec__(tree, rels, q):
|
||||
'''q is the queue used for communication'''
|
||||
if tree.kind==parser.RELATION:
|
||||
if tree.kind == parser.RELATION:
|
||||
q.put(rels[tree.name])
|
||||
elif tree.kind==parser.UNARY:
|
||||
|
||||
#Obtain the relation
|
||||
elif tree.kind == parser.UNARY:
|
||||
|
||||
# Obtain the relation
|
||||
temp_q = multiprocessing.Queue()
|
||||
__p_exec__(tree.child,rels,temp_q)
|
||||
rel=temp_q.get()
|
||||
|
||||
#Execute the query
|
||||
result=__p_exec_unary__(tree,rel)
|
||||
__p_exec__(tree.child, rels, temp_q)
|
||||
rel = temp_q.get()
|
||||
|
||||
# Execute the query
|
||||
result = __p_exec_unary__(tree, rel)
|
||||
q.put(result)
|
||||
elif tree.kind==parser.BINARY:
|
||||
elif tree.kind == parser.BINARY:
|
||||
left_q = multiprocessing.Queue()
|
||||
left_p = multiprocessing.Process(target=__p_exec__, args=(tree.left,rels,left_q,))
|
||||
left_p = multiprocessing.Process(
|
||||
target=__p_exec__, args=(tree.left, rels, left_q,))
|
||||
right_q = multiprocessing.Queue()
|
||||
right_p = multiprocessing.Process(target=__p_exec__, args=(tree.right,rels,right_q,))
|
||||
|
||||
|
||||
#Spawn the children
|
||||
right_p = multiprocessing.Process(
|
||||
target=__p_exec__, args=(tree.right, rels, right_q,))
|
||||
|
||||
# Spawn the children
|
||||
left_p.start()
|
||||
right_p.start()
|
||||
|
||||
#Get the left and right relations
|
||||
left= left_q.get()
|
||||
right= right_q.get()
|
||||
|
||||
#Wait for the children to terminate
|
||||
|
||||
# Get the left and right relations
|
||||
left = left_q.get()
|
||||
right = right_q.get()
|
||||
|
||||
# Wait for the children to terminate
|
||||
left_p.join()
|
||||
right_p.join()
|
||||
|
||||
result = __p_exec_binary__(tree,left,right)
|
||||
|
||||
result = __p_exec_binary__(tree, left, right)
|
||||
q.put(result)
|
||||
return
|
||||
def __p_exec_binary__(tree,left,right):
|
||||
if tree.name=='*':
|
||||
|
||||
|
||||
def __p_exec_binary__(tree, left, right):
|
||||
if tree.name == '*':
|
||||
return left.product(right)
|
||||
elif tree.name=='-':
|
||||
elif tree.name == '-':
|
||||
return left.difference(right)
|
||||
elif tree.name=='ᑌ':
|
||||
elif tree.name == 'ᑌ':
|
||||
return left.union(right)
|
||||
elif tree.name=='ᑎ':
|
||||
elif tree.name == 'ᑎ':
|
||||
return left.intersection(right)
|
||||
elif tree.name=='÷':
|
||||
elif tree.name == '÷':
|
||||
return left.division(right)
|
||||
elif tree.name=='ᐅᐊ':
|
||||
elif tree.name == 'ᐅᐊ':
|
||||
return left.join(right)
|
||||
elif tree.name=='ᐅLEFTᐊ':
|
||||
elif tree.name == 'ᐅLEFTᐊ':
|
||||
return left.outer_left(right)
|
||||
elif tree.name=='ᐅRIGHTᐊ':
|
||||
elif tree.name == 'ᐅRIGHTᐊ':
|
||||
return left.outer_right(right)
|
||||
else: # tree.name=='ᐅFULLᐊ':
|
||||
else: # tree.name=='ᐅFULLᐊ':
|
||||
return left.outer(right)
|
||||
|
||||
def __p_exec_unary__(tree,rel):
|
||||
if tree.name=='π':#Projection
|
||||
tree.prop=tree.prop.replace(' ','').split(',')
|
||||
result= rel.projection(tree.prop)
|
||||
elif tree.name=="ρ": #Rename
|
||||
#tree.prop='{\"%s\"}' % tree.prop.replace(',','\",\"').replace('➡','\":\"').replace(' ','')
|
||||
d={}
|
||||
tree.prop=tree.prop.replace(' ','')
|
||||
|
||||
|
||||
def __p_exec_unary__(tree, rel):
|
||||
if tree.name == 'π': # Projection
|
||||
tree.prop = tree.prop.replace(' ', '').split(',')
|
||||
result = rel.projection(tree.prop)
|
||||
elif tree.name == "ρ": # Rename
|
||||
# tree.prop='{\"%s\"}' %
|
||||
# tree.prop.replace(',','\",\"').replace('➡','\":\"').replace(' ','')
|
||||
d = {}
|
||||
tree.prop = tree.prop.replace(' ', '')
|
||||
for i in tree.prop.split(','):
|
||||
rename_=i.split('➡')
|
||||
d[rename_[0]]=rename_[1]
|
||||
|
||||
result= rel.rename(d)
|
||||
else: #Selection
|
||||
result= rel.selection(tree.prop)
|
||||
rename_ = i.split('➡')
|
||||
d[rename_[0]] = rename_[1]
|
||||
|
||||
result = rel.rename(d)
|
||||
else: # Selection
|
||||
result = rel.selection(tree.prop)
|
||||
return result
|
||||
|
@@ -2,20 +2,20 @@
|
||||
# coding=UTF-8
|
||||
# Relational
|
||||
# Copyright (C) 2008 Salvo "LtWorf" Tomaselli
|
||||
#
|
||||
#
|
||||
# Relational is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
#
|
||||
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
|
||||
#
|
||||
#
|
||||
@@ -25,15 +25,15 @@
|
||||
# of the expression.
|
||||
#
|
||||
# The input must be provided in UTF-8
|
||||
#
|
||||
#
|
||||
#
|
||||
# Language definition:
|
||||
# Query := Ident
|
||||
# Query := Query BinaryOp Query
|
||||
# Query := (Query)
|
||||
# Query := σ PYExprWithoutParenthesis (Query) | σ (PYExpr) (Query)
|
||||
# Query := π FieldList (Query)
|
||||
# Query := ρ RenameList (Query)
|
||||
# Query := (Query)
|
||||
# Query := σ PYExprWithoutParenthesis (Query) | σ (PYExpr) (Query)
|
||||
# Query := π FieldList (Query)
|
||||
# Query := ρ RenameList (Query)
|
||||
# FieldList := Ident | Ident , FieldList
|
||||
# RenameList := Ident ➡ Ident | Ident ➡ Ident , RenameList
|
||||
# BinaryOp := * | - | ᑌ | ᑎ | ÷ | ᐅᐊ | ᐅLEFTᐊ | ᐅRIGHTᐊ | ᐅFULLᐊ
|
||||
@@ -43,246 +43,264 @@
|
||||
import re
|
||||
import rtypes
|
||||
|
||||
RELATION=0
|
||||
UNARY=1
|
||||
BINARY=2
|
||||
RELATION = 0
|
||||
UNARY = 1
|
||||
BINARY = 2
|
||||
|
||||
PRODUCT=u'*'
|
||||
DIFFERENCE=u'-'
|
||||
UNION=u'ᑌ'
|
||||
INTERSECTION=u'ᑎ'
|
||||
DIVISION=u'÷'
|
||||
JOIN=u'ᐅᐊ'
|
||||
JOIN_LEFT=u'ᐅLEFTᐊ'
|
||||
JOIN_RIGHT=u'ᐅRIGHTᐊ'
|
||||
JOIN_FULL=u'ᐅFULLᐊ'
|
||||
PROJECTION=u'π'
|
||||
SELECTION=u'σ'
|
||||
RENAME=u'ρ'
|
||||
ARROW=u'➡'
|
||||
PRODUCT = u'*'
|
||||
DIFFERENCE = u'-'
|
||||
UNION = u'ᑌ'
|
||||
INTERSECTION = u'ᑎ'
|
||||
DIVISION = u'÷'
|
||||
JOIN = u'ᐅᐊ'
|
||||
JOIN_LEFT = u'ᐅLEFTᐊ'
|
||||
JOIN_RIGHT = u'ᐅRIGHTᐊ'
|
||||
JOIN_FULL = u'ᐅFULLᐊ'
|
||||
PROJECTION = u'π'
|
||||
SELECTION = u'σ'
|
||||
RENAME = u'ρ'
|
||||
ARROW = u'➡'
|
||||
|
||||
b_operators=(PRODUCT,DIFFERENCE,UNION,INTERSECTION,DIVISION,JOIN,JOIN_LEFT,JOIN_RIGHT,JOIN_FULL) # List of binary operators
|
||||
u_operators=(PROJECTION,SELECTION,RENAME) # List of unary operators
|
||||
b_operators = (PRODUCT, DIFFERENCE, UNION, INTERSECTION, DIVISION,
|
||||
JOIN, JOIN_LEFT, JOIN_RIGHT, JOIN_FULL) # List of binary operators
|
||||
u_operators = (PROJECTION, SELECTION, RENAME) # List of unary operators
|
||||
|
||||
# Associates operator with python method
|
||||
op_functions={PRODUCT:'product',DIFFERENCE:'difference',UNION:'union',INTERSECTION:'intersection',DIVISION:'division',JOIN:'join',JOIN_LEFT:'outer_left',JOIN_RIGHT:'outer_right',JOIN_FULL:'outer',PROJECTION:'projection',SELECTION:'selection',RENAME:'rename'}
|
||||
op_functions = {
|
||||
PRODUCT: 'product', DIFFERENCE: 'difference', UNION: 'union', INTERSECTION: 'intersection', DIVISION: 'division', JOIN: 'join',
|
||||
JOIN_LEFT: 'outer_left', JOIN_RIGHT: 'outer_right', JOIN_FULL: 'outer', PROJECTION: 'projection', SELECTION: 'selection', RENAME: 'rename'}
|
||||
|
||||
|
||||
class TokenizerException (Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ParserException (Exception):
|
||||
pass
|
||||
|
||||
|
||||
class node (object):
|
||||
|
||||
'''This class is a node of a relational expression. Leaves are relations and internal nodes are operations.
|
||||
|
||||
|
||||
The kind property says if the node is a binary operator, unary operator or relation.
|
||||
Since relations are leaves, a relation node will have no attribute for children.
|
||||
|
||||
|
||||
If the node is a binary operator, it will have left and right properties.
|
||||
|
||||
|
||||
If the node is a unary operator, it will have a child, pointing to the child node and a prop containing
|
||||
the string with the props of the operation.
|
||||
|
||||
|
||||
This class is used to convert an expression into python code.'''
|
||||
kind=None
|
||||
__hash__=None
|
||||
|
||||
def __init__(self,expression=None):
|
||||
kind = None
|
||||
__hash__ = None
|
||||
|
||||
def __init__(self, expression=None):
|
||||
'''Generates the tree from the tokenized expression
|
||||
If no expression is specified then it will create an empty node'''
|
||||
if expression==None or len(expression)==0:
|
||||
if expression == None or len(expression) == 0:
|
||||
return
|
||||
|
||||
#If the list contains only a list, it will consider the lower level list.
|
||||
#This will allow things like ((((((a))))) to work
|
||||
while len(expression)==1 and isinstance(expression[0],list):
|
||||
expression=expression[0]
|
||||
|
||||
#The list contains only 1 string. Means it is the name of a relation
|
||||
if len(expression)==1 and isinstance(expression[0],unicode):
|
||||
self.kind=RELATION
|
||||
self.name=expression[0]
|
||||
|
||||
# If the list contains only a list, it will consider the lower level list.
|
||||
# This will allow things like ((((((a))))) to work
|
||||
while len(expression) == 1 and isinstance(expression[0], list):
|
||||
expression = expression[0]
|
||||
|
||||
# The list contains only 1 string. Means it is the name of a relation
|
||||
if len(expression) == 1 and isinstance(expression[0], unicode):
|
||||
self.kind = RELATION
|
||||
self.name = expression[0]
|
||||
if not rtypes.is_valid_relation_name(self.name):
|
||||
raise ParserException(u"'%s' is not a valid relation name" % self.name)
|
||||
raise ParserException(
|
||||
u"'%s' is not a valid relation name" % self.name)
|
||||
return
|
||||
|
||||
|
||||
'''Expression from right to left, searching for binary operators
|
||||
this means that binary operators have lesser priority than
|
||||
unary operators.
|
||||
It finds the operator with lesser priority, uses it as root of this
|
||||
(sub)tree using everything on its left as left parameter (so building
|
||||
a left subtree with the part of the list located on left) and doing
|
||||
a left subtree with the part of the list located on left) and doing
|
||||
the same on right.
|
||||
Since it searches for strings, and expressions into parenthesis are
|
||||
within sub-lists, they won't be found here, ensuring that they will
|
||||
have highest priority.'''
|
||||
for i in xrange(len(expression)-1,-1,-1):
|
||||
if expression[i] in b_operators: #Binary operator
|
||||
self.kind=BINARY
|
||||
self.name=expression[i]
|
||||
|
||||
if len(expression[:i])==0:
|
||||
raise ParserException(u"Expected left operand for '%s'" % self.name)
|
||||
|
||||
if len(expression[i+1:])==0:
|
||||
raise ParserException(u"Expected right operand for '%s'" % self.name)
|
||||
|
||||
self.left=node(expression[:i])
|
||||
self.right=node(expression[i+1:])
|
||||
for i in xrange(len(expression) - 1, -1, -1):
|
||||
if expression[i] in b_operators: # Binary operator
|
||||
self.kind = BINARY
|
||||
self.name = expression[i]
|
||||
|
||||
if len(expression[:i]) == 0:
|
||||
raise ParserException(
|
||||
u"Expected left operand for '%s'" % self.name)
|
||||
|
||||
if len(expression[i + 1:]) == 0:
|
||||
raise ParserException(
|
||||
u"Expected right operand for '%s'" % self.name)
|
||||
|
||||
self.left = node(expression[:i])
|
||||
self.right = node(expression[i + 1:])
|
||||
return
|
||||
'''Searches for unary operators, parsing from right to left'''
|
||||
for i in xrange(len(expression)-1,-1,-1):
|
||||
if expression[i] in u_operators: #Unary operator
|
||||
self.kind=UNARY
|
||||
self.name=expression[i]
|
||||
|
||||
if len(expression)<=i+2:
|
||||
raise ParserException(u"Expected more tokens in '%s'"%self.name)
|
||||
|
||||
self.prop=expression[1+i].strip()
|
||||
self.child=node(expression[2+i])
|
||||
|
||||
for i in xrange(len(expression) - 1, -1, -1):
|
||||
if expression[i] in u_operators: # Unary operator
|
||||
self.kind = UNARY
|
||||
self.name = expression[i]
|
||||
|
||||
if len(expression) <= i + 2:
|
||||
raise ParserException(
|
||||
u"Expected more tokens in '%s'" % self.name)
|
||||
|
||||
self.prop = expression[1 + i].strip()
|
||||
self.child = node(expression[2 + i])
|
||||
|
||||
return
|
||||
raise ParserException(u"Unable to parse tokens")
|
||||
pass
|
||||
|
||||
def toCode(self):
|
||||
'''This method converts the tree into a python code object'''
|
||||
code = self.toPython()
|
||||
return compile(code,'<relational_expression>','eval')
|
||||
|
||||
return compile(code, '<relational_expression>', 'eval')
|
||||
|
||||
def toPython(self):
|
||||
'''This method converts the expression into a python code string, which
|
||||
'''This method converts the expression into a python code string, which
|
||||
will require the relation module to be executed.'''
|
||||
if self.name in b_operators:
|
||||
return '%s.%s(%s)' % (self.left.toPython(),op_functions[self.name],self.right.toPython())
|
||||
return '%s.%s(%s)' % (self.left.toPython(), op_functions[self.name], self.right.toPython())
|
||||
elif self.name in u_operators:
|
||||
prop =self.prop
|
||||
|
||||
#Converting parameters
|
||||
if self.name==PROJECTION:
|
||||
prop='\"%s\"' % prop.replace(' ','').replace(',','\",\"')
|
||||
elif self.name==RENAME:
|
||||
prop='{\"%s\"}' % prop.replace(',','\",\"').replace(ARROW,'\":\"').replace(' ','')
|
||||
else: #Selection
|
||||
prop='\"%s\"' % prop
|
||||
|
||||
return '%s.%s(%s)' % (self.child.toPython(),op_functions[self.name],prop)
|
||||
prop = self.prop
|
||||
|
||||
# Converting parameters
|
||||
if self.name == PROJECTION:
|
||||
prop = '\"%s\"' % prop.replace(' ', '').replace(',', '\",\"')
|
||||
elif self.name == RENAME:
|
||||
prop = '{\"%s\"}' % prop.replace(
|
||||
',', '\",\"').replace(ARROW, '\":\"').replace(' ', '')
|
||||
else: # Selection
|
||||
prop = '\"%s\"' % prop
|
||||
|
||||
return '%s.%s(%s)' % (self.child.toPython(), op_functions[self.name], prop)
|
||||
else:
|
||||
return self.name
|
||||
pass
|
||||
def printtree(self,level=0):
|
||||
|
||||
def printtree(self, level=0):
|
||||
'''returns a representation of the tree using indentation'''
|
||||
r=''
|
||||
r = ''
|
||||
for i in range(level):
|
||||
r+=' '
|
||||
r+=self.name
|
||||
r += ' '
|
||||
r += self.name
|
||||
if self.name in b_operators:
|
||||
r+=self.left.printtree(level+1)
|
||||
r+=self.right.printtree(level+1)
|
||||
r += self.left.printtree(level + 1)
|
||||
r += self.right.printtree(level + 1)
|
||||
elif self.name in u_operators:
|
||||
r+='\t%s\n' % self.prop
|
||||
r+=self.child.printtree(level+1)
|
||||
|
||||
return '\n'+r
|
||||
r += '\t%s\n' % self.prop
|
||||
r += self.child.printtree(level + 1)
|
||||
|
||||
return '\n' + r
|
||||
|
||||
def get_left_leaf(self):
|
||||
'''This function returns the leftmost leaf in the tree. It is needed by some optimizations.'''
|
||||
if self.kind==RELATION:
|
||||
if self.kind == RELATION:
|
||||
return self
|
||||
elif self.kind==UNARY:
|
||||
elif self.kind == UNARY:
|
||||
return self.child.get_left_leaf()
|
||||
elif self.kind==BINARY:
|
||||
elif self.kind == BINARY:
|
||||
return self.left.get_left_leaf()
|
||||
|
||||
|
||||
def result_format(self,rels):
|
||||
|
||||
def result_format(self, rels):
|
||||
'''This function returns a list containing the fields that the resulting relation will have.
|
||||
It requires a dictionary where keys are the names of the relations and the values are
|
||||
the relation objects.'''
|
||||
if rels==None:
|
||||
if rels == None:
|
||||
return
|
||||
|
||||
if self.kind==RELATION:
|
||||
|
||||
if self.kind == RELATION:
|
||||
return list(rels[self.name].header.attributes)
|
||||
elif self.kind==BINARY and self.name in (DIFFERENCE,UNION,INTERSECTION):
|
||||
elif self.kind == BINARY and self.name in (DIFFERENCE, UNION, INTERSECTION):
|
||||
return self.left.result_format(rels)
|
||||
elif self.kind==BINARY and self.name==DIVISION:
|
||||
elif self.kind == BINARY and self.name == DIVISION:
|
||||
return list(set(self.left.result_format(rels)) - set(self.right.result_format(rels)))
|
||||
elif self.name==PROJECTION:
|
||||
l=[]
|
||||
elif self.name == PROJECTION:
|
||||
l = []
|
||||
for i in self.prop.split(','):
|
||||
l.append(i.strip())
|
||||
return l
|
||||
elif self.name==PRODUCT:
|
||||
return self.left.result_format(rels)+self.right.result_format(rels)
|
||||
elif self.name==SELECTION:
|
||||
elif self.name == PRODUCT:
|
||||
return self.left.result_format(rels) + self.right.result_format(rels)
|
||||
elif self.name == SELECTION:
|
||||
return self.child.result_format(rels)
|
||||
elif self.name==RENAME:
|
||||
_vars={}
|
||||
elif self.name == RENAME:
|
||||
_vars = {}
|
||||
for i in self.prop.split(','):
|
||||
q=i.split(ARROW)
|
||||
_vars[q[0].strip()]=q[1].strip()
|
||||
|
||||
_fields=self.child.result_format(rels)
|
||||
q = i.split(ARROW)
|
||||
_vars[q[0].strip()] = q[1].strip()
|
||||
|
||||
_fields = self.child.result_format(rels)
|
||||
for i in range(len(_fields)):
|
||||
if _fields[i] in _vars:
|
||||
_fields[i]=_vars[_fields[i]]
|
||||
_fields[i] = _vars[_fields[i]]
|
||||
return _fields
|
||||
elif self.name in (JOIN,JOIN_LEFT,JOIN_RIGHT,JOIN_FULL):
|
||||
elif self.name in (JOIN, JOIN_LEFT, JOIN_RIGHT, JOIN_FULL):
|
||||
return list(set(self.left.result_format(rels)).union(set(self.right.result_format(rels))))
|
||||
def __eq__(self,other):
|
||||
if not (isinstance(other,node) and self.name==other.name and self.kind==other.kind):
|
||||
return False
|
||||
|
||||
if self.kind==UNARY:
|
||||
if other.prop!=self.prop:
|
||||
return False
|
||||
return self.child==other.child
|
||||
if self.kind==BINARY:
|
||||
return self.left==other.left and self.right==other.right
|
||||
return True
|
||||
|
||||
def __str__(self):
|
||||
if (self.kind==RELATION):
|
||||
return self.name
|
||||
elif (self.kind==UNARY):
|
||||
return self.name + " "+ self.prop+ " (" + self.child.__str__() +")"
|
||||
elif (self.kind==BINARY):
|
||||
if self.left.kind==RELATION:
|
||||
le=self.left.__str__()
|
||||
else:
|
||||
le="("+self.left.__str__()+")"
|
||||
if self.right.kind==RELATION:
|
||||
re=self.right.__str__()
|
||||
else:
|
||||
re="("+self.right.__str__()+")"
|
||||
|
||||
return (le+ self.name +re)
|
||||
|
||||
def _find_matching_parenthesis(expression,start=0,openpar=u'(',closepar=u')'):
|
||||
def __eq__(self, other):
|
||||
if not (isinstance(other, node) and self.name == other.name and self.kind == other.kind):
|
||||
return False
|
||||
|
||||
if self.kind == UNARY:
|
||||
if other.prop != self.prop:
|
||||
return False
|
||||
return self.child == other.child
|
||||
if self.kind == BINARY:
|
||||
return self.left == other.left and self.right == other.right
|
||||
return True
|
||||
|
||||
def __str__(self):
|
||||
if (self.kind == RELATION):
|
||||
return self.name
|
||||
elif (self.kind == UNARY):
|
||||
return self.name + " " + self.prop + " (" + self.child.__str__() + ")"
|
||||
elif (self.kind == BINARY):
|
||||
if self.left.kind == RELATION:
|
||||
le = self.left.__str__()
|
||||
else:
|
||||
le = "(" + self.left.__str__() + ")"
|
||||
if self.right.kind == RELATION:
|
||||
re = self.right.__str__()
|
||||
else:
|
||||
re = "(" + self.right.__str__() + ")"
|
||||
|
||||
return (le + self.name + re)
|
||||
|
||||
|
||||
def _find_matching_parenthesis(expression, start=0, openpar=u'(', closepar=u')'):
|
||||
'''This function returns the position of the matching
|
||||
close parenthesis to the 1st open parenthesis found
|
||||
starting from start (0 by default)'''
|
||||
par_count=0 #Count of parenthesis
|
||||
for i in range(start,len(expression)):
|
||||
if expression[i]==openpar:
|
||||
par_count+=1
|
||||
elif expression[i]==closepar:
|
||||
par_count-=1
|
||||
if par_count==0:
|
||||
return i #Closing parenthesis of the parameter
|
||||
par_count = 0 # Count of parenthesis
|
||||
for i in range(start, len(expression)):
|
||||
if expression[i] == openpar:
|
||||
par_count += 1
|
||||
elif expression[i] == closepar:
|
||||
par_count -= 1
|
||||
if par_count == 0:
|
||||
return i # Closing parenthesis of the parameter
|
||||
|
||||
|
||||
def tokenize(expression):
|
||||
'''This function converts an expression into a list where
|
||||
every token of the expression is an item of a list. Expressions into
|
||||
parenthesis will be converted into sublists.'''
|
||||
if not isinstance(expression,unicode):
|
||||
if not isinstance(expression, unicode):
|
||||
raise TokenizerException('expected unicode')
|
||||
|
||||
items=[] #List for the tokens
|
||||
|
||||
|
||||
items = [] # List for the tokens
|
||||
|
||||
'''This is a state machine. Initial status is determined by the starting of the
|
||||
expression. There are the following statuses:
|
||||
|
||||
|
||||
relation: this is the status if the expressions begins with something else than an
|
||||
operator or a parenthesis.
|
||||
binary operator: this is the status when parsing a binary operator, nothing much to say
|
||||
@@ -290,9 +308,9 @@ def tokenize(expression):
|
||||
sub-expression.
|
||||
sub-expression: this status is entered when finding a '(' and will be exited when finding a ')'.
|
||||
means that the others open must be counted to determine which close is the right one.'''
|
||||
|
||||
expression=expression.strip() #Removes initial and endind spaces
|
||||
state=0
|
||||
|
||||
expression = expression.strip() # Removes initial and endind spaces
|
||||
state = 0
|
||||
'''
|
||||
0 initial and useless
|
||||
1 previous stuff was a relation
|
||||
@@ -301,83 +319,92 @@ def tokenize(expression):
|
||||
4 previous stuff was a binary operator
|
||||
'''
|
||||
|
||||
while len(expression)>0:
|
||||
|
||||
if expression.startswith('('): #Parenthesis state
|
||||
state=2
|
||||
end=_find_matching_parenthesis(expression)
|
||||
if end==None:
|
||||
raise TokenizerException("Missing matching ')' in '%s'" %expression)
|
||||
#Appends the tokenization of the content of the parenthesis
|
||||
while len(expression) > 0:
|
||||
|
||||
if expression.startswith('('): # Parenthesis state
|
||||
state = 2
|
||||
end = _find_matching_parenthesis(expression)
|
||||
if end == None:
|
||||
raise TokenizerException(
|
||||
"Missing matching ')' in '%s'" % expression)
|
||||
# Appends the tokenization of the content of the parenthesis
|
||||
items.append(tokenize(expression[1:end]))
|
||||
#Removes the entire parentesis and content from the expression
|
||||
expression=expression[end+1:].strip()
|
||||
|
||||
elif expression.startswith((u"σ",u"π",u"ρ")): #Unary 2 bytes
|
||||
items.append(expression[0:1]) #Adding operator in the top of the list
|
||||
expression=expression[1:].strip() #Removing operator from the expression
|
||||
|
||||
if expression.startswith('('): #Expression with parenthesis, so adding what's between open and close without tokenization
|
||||
par=expression.find('(',_find_matching_parenthesis(expression))
|
||||
else: #Expression without parenthesis, so adding what's between start and parenthesis as whole
|
||||
par=expression.find('(')
|
||||
|
||||
items.append(expression[:par].strip()) #Inserting parameter of the operator
|
||||
expression=expression[par:].strip() #Removing parameter from the expression
|
||||
elif expression.startswith((u"÷",u"ᑎ",u"ᑌ",u"*",u"-")):
|
||||
# Removes the entire parentesis and content from the expression
|
||||
expression = expression[end + 1:].strip()
|
||||
|
||||
elif expression.startswith((u"σ", u"π", u"ρ")): # Unary 2 bytes
|
||||
items.append(expression[0:1])
|
||||
#Adding operator in the top of the list
|
||||
expression = expression[
|
||||
1:].strip() # Removing operator from the expression
|
||||
|
||||
if expression.startswith('('): # Expression with parenthesis, so adding what's between open and close without tokenization
|
||||
par = expression.find(
|
||||
'(', _find_matching_parenthesis(expression))
|
||||
else: # Expression without parenthesis, so adding what's between start and parenthesis as whole
|
||||
par = expression.find('(')
|
||||
|
||||
items.append(expression[:par].strip())
|
||||
#Inserting parameter of the operator
|
||||
expression = expression[
|
||||
par:].strip() # Removing parameter from the expression
|
||||
elif expression.startswith((u"÷", u"ᑎ", u"ᑌ", u"*", u"-")):
|
||||
items.append(expression[0])
|
||||
expression=expression[1:].strip() #1 char from the expression
|
||||
state=4
|
||||
elif expression.startswith(u"ᐅ"): #Binary long
|
||||
i=expression.find(u"ᐊ")
|
||||
if i==-1:
|
||||
expression = expression[1:].strip() # 1 char from the expression
|
||||
state = 4
|
||||
elif expression.startswith(u"ᐅ"): # Binary long
|
||||
i = expression.find(u"ᐊ")
|
||||
if i == -1:
|
||||
raise TokenizerException(u"Expected ᐊ in %s" % (expression,))
|
||||
items.append(expression[:i+1])
|
||||
expression=expression[i+1:].strip()
|
||||
state=4
|
||||
elif re.match(r'[_0-9A-Za-z]',expression[0])==None: #At this point we only have relation names, so we raise errors for anything else
|
||||
raise TokenizerException("Unexpected '%c' in '%s'" % (expression[0],expression))
|
||||
else: #Relation (hopefully)
|
||||
if state==1: #Previous was a relation, appending to the last token
|
||||
i=items.pop()
|
||||
items.append(i+expression[0])
|
||||
expression=expression[1:].strip() #1 char from the expression
|
||||
items.append(expression[:i + 1])
|
||||
expression = expression[i + 1:].strip()
|
||||
state = 4
|
||||
elif re.match(r'[_0-9A-Za-z]', expression[0]) == None: # At this point we only have relation names, so we raise errors for anything else
|
||||
raise TokenizerException(
|
||||
"Unexpected '%c' in '%s'" % (expression[0], expression))
|
||||
else: # Relation (hopefully)
|
||||
if state == 1: # Previous was a relation, appending to the last token
|
||||
i = items.pop()
|
||||
items.append(i + expression[0])
|
||||
expression = expression[
|
||||
1:].strip() # 1 char from the expression
|
||||
else:
|
||||
state=1
|
||||
state = 1
|
||||
items.append(expression[0])
|
||||
expression=expression[1:].strip() #1 char from the expression
|
||||
|
||||
expression = expression[
|
||||
1:].strip() # 1 char from the expression
|
||||
|
||||
return items
|
||||
|
||||
|
||||
def tree(expression):
|
||||
'''This function parses a relational algebra expression into a tree and returns
|
||||
the root node using the Node class defined in this module.'''
|
||||
return node(tokenize(expression))
|
||||
|
||||
|
||||
|
||||
def parse(expr):
|
||||
'''This function parses a relational algebra expression, converting it into python,
|
||||
'''This function parses a relational algebra expression, converting it into python,
|
||||
executable by eval function to get the result of the expression.
|
||||
It has 2 class of operators:
|
||||
without parameters
|
||||
*, -, ᑌ, ᑎ, ᐅᐊ, ᐅLEFTᐊ, ᐅRIGHTᐊ, ᐅFULLᐊ
|
||||
with parameters:
|
||||
σ, π, ρ
|
||||
|
||||
|
||||
Syntax for operators without parameters is:
|
||||
relation operator relation
|
||||
|
||||
|
||||
Syntax for operators with parameters is:
|
||||
operator parameters (relation)
|
||||
|
||||
|
||||
Since a*b is a relation itself, you can parse π a,b (a*b).
|
||||
And since π a,b (A) is a relation, you can parse π a,b (A) ᑌ B.
|
||||
|
||||
|
||||
You can use parenthesis to change priority: a ᐅᐊ (q ᑌ d).
|
||||
|
||||
|
||||
IMPORTANT: all strings must be unicode
|
||||
|
||||
|
||||
EXAMPLES
|
||||
σage > 25 and rank == weight(A)
|
||||
Q ᐅᐊ π a,b(A) ᐅᐊ B
|
||||
@@ -387,13 +414,13 @@ def parse(expr):
|
||||
A ᐅᐊ B
|
||||
'''
|
||||
return tree(expr).toPython()
|
||||
|
||||
if __name__=="__main__":
|
||||
|
||||
if __name__ == "__main__":
|
||||
while True:
|
||||
e=unicode(raw_input("Expression: "),'utf-8')
|
||||
e = unicode(raw_input("Expression: "), 'utf-8')
|
||||
print parse(e)
|
||||
|
||||
#b=u"σ age>1 and skill=='C' (peopleᐅᐊskills)"
|
||||
#print b[0]
|
||||
#parse(b)
|
||||
pass
|
||||
|
||||
# b=u"σ age>1 and skill=='C' (peopleᐅᐊskills)"
|
||||
# print b[0]
|
||||
# parse(b)
|
||||
pass
|
||||
|
@@ -1,39 +1,42 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Relational
|
||||
# Copyright (C) 2008 Salvo "LtWorf" Tomaselli
|
||||
#
|
||||
#
|
||||
# Relational is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
#
|
||||
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
|
||||
#
|
||||
# This module provides a classes to represent queries
|
||||
|
||||
import parser
|
||||
|
||||
|
||||
class TypeException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Query(object):
|
||||
def __init__(self,query):
|
||||
|
||||
if not isinstance(query,unicode):
|
||||
|
||||
def __init__(self, query):
|
||||
|
||||
if not isinstance(query, unicode):
|
||||
raise TypeException('Expected unicode')
|
||||
|
||||
|
||||
self.query = query
|
||||
self.tree = parser.tree(query)
|
||||
#TODO self.query_code = parser
|
||||
|
||||
# TODO self.query_code = parser
|
||||
|
||||
self.optimized = None
|
||||
self.optimized_query = None
|
||||
self.optimized_code = None
|
||||
self.optimized_code = None
|
||||
|
@@ -1,20 +1,20 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Relational
|
||||
# Copyright (C) 2008 Salvo "LtWorf" Tomaselli
|
||||
#
|
||||
#
|
||||
# Relational is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
#
|
||||
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
|
||||
#
|
||||
# This module provides a classes to represent relations and to perform
|
||||
@@ -23,410 +23,420 @@
|
||||
from rtypes import *
|
||||
import csv
|
||||
|
||||
|
||||
class relation (object):
|
||||
|
||||
'''This objects defines a relation (as a group of consistent tuples) and operations
|
||||
A relation can be represented using a table
|
||||
Calling an operation and providing a non relation parameter when it is expected will
|
||||
result in a None value'''
|
||||
__hash__=None
|
||||
|
||||
def __init__(self,filename=""):
|
||||
result in a None value'''
|
||||
__hash__ = None
|
||||
|
||||
def __init__(self, filename=""):
|
||||
'''Creates a relation, accepts a filename and then it will load the relation from
|
||||
that file. If no parameter is supplied an empty relation is created. Empty
|
||||
relations are used in internal operations.
|
||||
By default the file will be handled like a comma separated as described in
|
||||
RFC4180.'''
|
||||
|
||||
self._readonly=False
|
||||
|
||||
if len(filename)==0:#Empty relation
|
||||
self.content=set()
|
||||
self.header=header([])
|
||||
return
|
||||
#Opening file
|
||||
fp=file(filename)
|
||||
|
||||
reader=csv.reader(fp) #Creating a csv reader
|
||||
self.header=header(reader.next()) # read 1st line
|
||||
self.content=set()
|
||||
|
||||
for i in reader.__iter__(): #Iterating rows
|
||||
self._readonly = False
|
||||
|
||||
if len(filename) == 0: # Empty relation
|
||||
self.content = set()
|
||||
self.header = header([])
|
||||
return
|
||||
# Opening file
|
||||
fp = file(filename)
|
||||
|
||||
reader = csv.reader(fp) # Creating a csv reader
|
||||
self.header = header(reader.next()) # read 1st line
|
||||
self.content = set()
|
||||
|
||||
for i in reader.__iter__(): # Iterating rows
|
||||
self.content.add(tuple(i))
|
||||
|
||||
#Closing file
|
||||
|
||||
# Closing file
|
||||
fp.close()
|
||||
|
||||
|
||||
def _make_writable(self):
|
||||
'''If this relation is marked as readonly, this
|
||||
'''If this relation is marked as readonly, this
|
||||
method will copy the content to make it writable too'''
|
||||
|
||||
|
||||
if self._readonly:
|
||||
self.content=set(self.content)
|
||||
self._readonly=False
|
||||
|
||||
def save(self,filename):
|
||||
self.content = set(self.content)
|
||||
self._readonly = False
|
||||
|
||||
def save(self, filename):
|
||||
'''Saves the relation in a file. By default will save using the csv
|
||||
format as defined in RFC4180, but setting comma_separated to False,
|
||||
it will use the old format with space separated values.
|
||||
'''
|
||||
|
||||
fp=file(filename,'w') #Opening file in write mode
|
||||
|
||||
writer=csv.writer(fp) #Creating csv writer
|
||||
|
||||
#It wants an iterable containing iterables
|
||||
head=(self.header.attributes,)
|
||||
|
||||
fp = file(filename, 'w') # Opening file in write mode
|
||||
|
||||
writer = csv.writer(fp) # Creating csv writer
|
||||
|
||||
# It wants an iterable containing iterables
|
||||
head = (self.header.attributes,)
|
||||
writer.writerows(head)
|
||||
|
||||
#Writing content, already in the correct format
|
||||
|
||||
# Writing content, already in the correct format
|
||||
writer.writerows(self.content)
|
||||
fp.close() #Closing file
|
||||
|
||||
def _rearrange_(self,other):
|
||||
fp.close() # Closing file
|
||||
|
||||
def _rearrange_(self, other):
|
||||
'''If two relations share the same attributes in a different order, this method
|
||||
will use projection to make them have the same attributes' order.
|
||||
It is not exactely related to relational algebra. Just a method used
|
||||
It is not exactely related to relational algebra. Just a method used
|
||||
internally.
|
||||
Will return None if they don't share the same attributes'''
|
||||
if (self.__class__!=other.__class__):
|
||||
if (self.__class__ != other.__class__):
|
||||
return None
|
||||
if self.header.sharedAttributes(other.header) == len(self.header.attributes) == len(other.header.attributes):
|
||||
return other.projection(list(self.header.attributes))
|
||||
return None
|
||||
|
||||
def _autocast(self,string):
|
||||
|
||||
def _autocast(self, string):
|
||||
'''Depending on the regexp matched by the string,
|
||||
it will perform automatic casting'''
|
||||
tmpstring=rstring(string)
|
||||
if len(tmpstring)>0 and tmpstring.isInt():
|
||||
tmpstring = rstring(string)
|
||||
if len(tmpstring) > 0 and tmpstring.isInt():
|
||||
return int(tmpstring)
|
||||
elif len(tmpstring)>0 and tmpstring.isFloat():
|
||||
elif len(tmpstring) > 0 and tmpstring.isFloat():
|
||||
return float(tmpstring)
|
||||
elif len(tmpstring)>0 and tmpstring.isDate():
|
||||
elif len(tmpstring) > 0 and tmpstring.isDate():
|
||||
return rdate(tmpstring)
|
||||
else:
|
||||
return tmpstring
|
||||
|
||||
def selection(self,expr):
|
||||
|
||||
def selection(self, expr):
|
||||
'''Selection, expr must be a valid boolean expression, can contain field names,
|
||||
constant, math operations and boolean ones.'''
|
||||
attributes={}
|
||||
newt=relation()
|
||||
newt.header=header(list(self.header.attributes))
|
||||
attributes = {}
|
||||
newt = relation()
|
||||
newt.header = header(list(self.header.attributes))
|
||||
for i in self.content:
|
||||
#Fills the attributes dictionary with the values of the tuple
|
||||
# Fills the attributes dictionary with the values of the tuple
|
||||
for j in range(len(self.header.attributes)):
|
||||
attributes[self.header.attributes[j]]=self._autocast(i[j])
|
||||
|
||||
attributes[self.header.attributes[j]] = self._autocast(i[j])
|
||||
|
||||
try:
|
||||
if eval(expr,attributes):
|
||||
if eval(expr, attributes):
|
||||
newt.content.add(i)
|
||||
except Exception,e:
|
||||
raise Exception("Failed to evaluate %s\n%s" % (expr,e.__str__()))
|
||||
except Exception, e:
|
||||
raise Exception(
|
||||
"Failed to evaluate %s\n%s" % (expr, e.__str__()))
|
||||
return newt
|
||||
def product (self,other):
|
||||
|
||||
def product(self, other):
|
||||
'''Cartesian product, attributes must be different to avoid collisions
|
||||
Doing this operation on relations with colliding attributes will
|
||||
Doing this operation on relations with colliding attributes will
|
||||
cause an exception.
|
||||
It is possible to use rename on attributes and then use the product'''
|
||||
|
||||
if (self.__class__!=other.__class__)or(self.header.sharedAttributes(other.header)!=0):
|
||||
raise Exception('Unable to perform product on relations with colliding attributes')
|
||||
newt=relation()
|
||||
newt.header=header(self.header.attributes+other.header.attributes)
|
||||
|
||||
|
||||
if (self.__class__ != other.__class__)or(self.header.sharedAttributes(other.header) != 0):
|
||||
raise Exception(
|
||||
'Unable to perform product on relations with colliding attributes')
|
||||
newt = relation()
|
||||
newt.header = header(self.header.attributes + other.header.attributes)
|
||||
|
||||
for i in self.content:
|
||||
for j in other.content:
|
||||
newt.content.add(i+j)
|
||||
newt.content.add(i + j)
|
||||
return newt
|
||||
|
||||
|
||||
def projection(self,* attributes):
|
||||
|
||||
def projection(self, * attributes):
|
||||
'''Projection operator, takes many parameters, for each field to use.
|
||||
Can also use a single parameter with a list.
|
||||
Will delete duplicate items
|
||||
If an empty list or no parameters are provided, returns None'''
|
||||
#Parameters are supplied in a list, instead with multiple parameters
|
||||
if isinstance(attributes[0],list):
|
||||
attributes=attributes[0]
|
||||
|
||||
#Avoiding duplicated attributes
|
||||
attributes1=[]
|
||||
If an empty list or no parameters are provided, returns None'''
|
||||
# Parameters are supplied in a list, instead with multiple parameters
|
||||
if isinstance(attributes[0], list):
|
||||
attributes = attributes[0]
|
||||
|
||||
# Avoiding duplicated attributes
|
||||
attributes1 = []
|
||||
for i in attributes:
|
||||
if i not in attributes1:
|
||||
attributes1.append(i)
|
||||
attributes=attributes1
|
||||
|
||||
ids=self.header.getAttributesId(attributes)
|
||||
|
||||
if len(ids)==0 or len(ids)!=len(attributes):
|
||||
attributes = attributes1
|
||||
|
||||
ids = self.header.getAttributesId(attributes)
|
||||
|
||||
if len(ids) == 0 or len(ids) != len(attributes):
|
||||
raise Exception('Invalid attributes for projection')
|
||||
newt=relation()
|
||||
#Create the header
|
||||
h=[]
|
||||
newt = relation()
|
||||
# Create the header
|
||||
h = []
|
||||
for i in ids:
|
||||
h.append(self.header.attributes[i])
|
||||
newt.header=header(h)
|
||||
|
||||
#Create the body
|
||||
newt.header = header(h)
|
||||
|
||||
# Create the body
|
||||
for i in self.content:
|
||||
row=[]
|
||||
row = []
|
||||
for j in ids:
|
||||
row.append(i[j])
|
||||
newt.content.add(tuple(row))
|
||||
return newt
|
||||
|
||||
def rename(self,params):
|
||||
|
||||
def rename(self, params):
|
||||
'''Operation rename. Takes a dictionary
|
||||
Will replace the itmem with its content.
|
||||
For example if you want to rename a to b, provide {"a":"b"}
|
||||
'''
|
||||
result=[]
|
||||
|
||||
newt=relation()
|
||||
newt.header=header(list(self.header.attributes))
|
||||
|
||||
for old,new in params.iteritems():
|
||||
if (newt.header.rename(old,new)) == False:
|
||||
result = []
|
||||
|
||||
newt = relation()
|
||||
newt.header = header(list(self.header.attributes))
|
||||
|
||||
for old, new in params.iteritems():
|
||||
if (newt.header.rename(old, new)) == False:
|
||||
raise Exception('Unable to find attribute: %s' % old)
|
||||
|
||||
newt.content=self.content
|
||||
newt._readonly=True
|
||||
|
||||
newt.content = self.content
|
||||
newt._readonly = True
|
||||
return newt
|
||||
|
||||
def intersection(self,other):
|
||||
|
||||
def intersection(self, other):
|
||||
'''Intersection operation. The result will contain items present in both
|
||||
operands.
|
||||
Will return an empty one if there are no common items.
|
||||
Will return None if headers are different.
|
||||
It is possible to use projection and rename to make headers match.'''
|
||||
other=self._rearrange_(other) #Rearranges attributes' order
|
||||
if (self.__class__!=other.__class__)or(self.header!=other.header):
|
||||
raise Exception('Unable to perform intersection on relations with different attributes')
|
||||
newt=relation()
|
||||
newt.header=header(list(self.header.attributes))
|
||||
|
||||
newt.content=self.content.intersection(other.content)
|
||||
other = self._rearrange_(other) # Rearranges attributes' order
|
||||
if (self.__class__ != other.__class__)or(self.header != other.header):
|
||||
raise Exception(
|
||||
'Unable to perform intersection on relations with different attributes')
|
||||
newt = relation()
|
||||
newt.header = header(list(self.header.attributes))
|
||||
|
||||
newt.content = self.content.intersection(other.content)
|
||||
return newt
|
||||
|
||||
def difference(self,other):
|
||||
|
||||
def difference(self, other):
|
||||
'''Difference operation. The result will contain items present in first
|
||||
operand but not in second one.
|
||||
Will return an empty one if the second is a superset of first.
|
||||
Will return None if headers are different.
|
||||
It is possible to use projection and rename to make headers match.'''
|
||||
other=self._rearrange_(other) #Rearranges attributes' order
|
||||
if (self.__class__!=other.__class__)or(self.header!=other.header):
|
||||
raise Exception('Unable to perform difference on relations with different attributes')
|
||||
newt=relation()
|
||||
newt.header=header(list(self.header.attributes))
|
||||
|
||||
newt.content=self.content.difference(other.content)
|
||||
other = self._rearrange_(other) # Rearranges attributes' order
|
||||
if (self.__class__ != other.__class__)or(self.header != other.header):
|
||||
raise Exception(
|
||||
'Unable to perform difference on relations with different attributes')
|
||||
newt = relation()
|
||||
newt.header = header(list(self.header.attributes))
|
||||
|
||||
newt.content = self.content.difference(other.content)
|
||||
return newt
|
||||
def division(self,other):
|
||||
|
||||
def division(self, other):
|
||||
'''Division operator
|
||||
The division is a binary operation that is written as R ÷ S. The
|
||||
result consists of the restrictions of tuples in R to the
|
||||
attribute names unique to R, i.e., in the header of R but not in the
|
||||
header of S, for which it holds that all their combinations with tuples
|
||||
in S are present in R.
|
||||
in S are present in R.
|
||||
'''
|
||||
|
||||
#d_headers are the headers from self that aren't also headers in other
|
||||
d_headers=list(set(self.header.attributes) - set(other.header.attributes))
|
||||
|
||||
|
||||
|
||||
# d_headers are the headers from self that aren't also headers in other
|
||||
d_headers = list(
|
||||
set(self.header.attributes) - set(other.header.attributes))
|
||||
|
||||
'''
|
||||
Wikipedia defines the division as follows:
|
||||
|
||||
|
||||
a1,....,an are the d_headers
|
||||
|
||||
|
||||
T := πa1,...,an(R) × S
|
||||
U := T - R
|
||||
V := πa1,...,an(U)
|
||||
W := πa1,...,an(R) - V
|
||||
|
||||
|
||||
W is the result that we want
|
||||
'''
|
||||
|
||||
t=self.projection(d_headers).product(other)
|
||||
|
||||
t = self.projection(d_headers).product(other)
|
||||
return self.projection(d_headers).difference(t.difference(self).projection(d_headers))
|
||||
|
||||
def union(self,other):
|
||||
|
||||
def union(self, other):
|
||||
'''Union operation. The result will contain items present in first
|
||||
and second operands.
|
||||
Will return an empty one if both are empty.
|
||||
Will not insert tuplicated items.
|
||||
Will return None if headers are different.
|
||||
It is possible to use projection and rename to make headers match.'''
|
||||
other=self._rearrange_(other) #Rearranges attributes' order
|
||||
if (self.__class__!=other.__class__)or(self.header!=other.header):
|
||||
raise Exception('Unable to perform union on relations with different attributes')
|
||||
newt=relation()
|
||||
newt.header=header(list(self.header.attributes))
|
||||
|
||||
newt.content=self.content.union(other.content)
|
||||
other = self._rearrange_(other) # Rearranges attributes' order
|
||||
if (self.__class__ != other.__class__)or(self.header != other.header):
|
||||
raise Exception(
|
||||
'Unable to perform union on relations with different attributes')
|
||||
newt = relation()
|
||||
newt.header = header(list(self.header.attributes))
|
||||
|
||||
newt.content = self.content.union(other.content)
|
||||
return newt
|
||||
def thetajoin(self,other,expr):
|
||||
|
||||
def thetajoin(self, other, expr):
|
||||
'''Defined as product and then selection with the given expression.'''
|
||||
return self.product(other).selection(expr)
|
||||
|
||||
def outer(self,other):
|
||||
|
||||
def outer(self, other):
|
||||
'''Does a left and a right outer join and returns their union.'''
|
||||
a=self.outer_right(other)
|
||||
b=self.outer_left(other)
|
||||
|
||||
a = self.outer_right(other)
|
||||
b = self.outer_left(other)
|
||||
|
||||
return a.union(b)
|
||||
|
||||
def outer_right(self,other):
|
||||
|
||||
def outer_right(self, other):
|
||||
'''Outer right join. Considers self as left and param as right. If the
|
||||
tuple has no corrispondence, empy attributes are filled with a "---"
|
||||
string. This is due to the fact that empty string or a space would cause
|
||||
problems when saving the relation.
|
||||
Just like natural join, it works considering shared attributes.'''
|
||||
return other.outer_left(self)
|
||||
|
||||
def outer_left(self,other,swap=False):
|
||||
'''Outer left join. Considers self as left and param as right. If the
|
||||
tuple has no corrispondence, empty attributes are filled with a "---"
|
||||
|
||||
def outer_left(self, other, swap=False):
|
||||
'''Outer left join. Considers self as left and param as right. If the
|
||||
tuple has no corrispondence, empty attributes are filled with a "---"
|
||||
string. This is due to the fact that empty string or a space would cause
|
||||
problems when saving the relation.
|
||||
Just like natural join, it works considering shared attributes.'''
|
||||
|
||||
shared=[]
|
||||
|
||||
shared = []
|
||||
for i in self.header.attributes:
|
||||
if i in other.header.attributes:
|
||||
shared.append(i)
|
||||
|
||||
newt=relation() #Creates the new relation
|
||||
|
||||
#Adds all the attributes of the 1st relation
|
||||
newt.header=header(list(self.header.attributes))
|
||||
|
||||
#Adds all the attributes of the 2nd, when non shared
|
||||
|
||||
newt = relation() # Creates the new relation
|
||||
|
||||
# Adds all the attributes of the 1st relation
|
||||
newt.header = header(list(self.header.attributes))
|
||||
|
||||
# Adds all the attributes of the 2nd, when non shared
|
||||
for i in other.header.attributes:
|
||||
if i not in shared:
|
||||
newt.header.attributes.append(i)
|
||||
#Shared ids of self
|
||||
sid=self.header.getAttributesId(shared)
|
||||
#Shared ids of the other relation
|
||||
oid=other.header.getAttributesId(shared)
|
||||
|
||||
#Non shared ids of the other relation
|
||||
noid=[]
|
||||
# Shared ids of self
|
||||
sid = self.header.getAttributesId(shared)
|
||||
# Shared ids of the other relation
|
||||
oid = other.header.getAttributesId(shared)
|
||||
|
||||
# Non shared ids of the other relation
|
||||
noid = []
|
||||
for i in range(len(other.header.attributes)):
|
||||
if i not in oid:
|
||||
noid.append(i)
|
||||
|
||||
|
||||
for i in self.content:
|
||||
#Tuple partecipated to the join?
|
||||
added=False
|
||||
# Tuple partecipated to the join?
|
||||
added = False
|
||||
for j in other.content:
|
||||
match=True
|
||||
match = True
|
||||
for k in range(len(sid)):
|
||||
match=match and ( i[sid[k]]== j[oid[k]])
|
||||
|
||||
match = match and (i[sid[k]] == j[oid[k]])
|
||||
|
||||
if match:
|
||||
item=list(i)
|
||||
item = list(i)
|
||||
for l in noid:
|
||||
item.append(j[l])
|
||||
|
||||
|
||||
newt.content.add(tuple(item))
|
||||
added=True
|
||||
#If it didn't partecipate, adds it
|
||||
added = True
|
||||
# If it didn't partecipate, adds it
|
||||
if not added:
|
||||
item=list(i)
|
||||
item = list(i)
|
||||
for l in range(len(noid)):
|
||||
item.append("---")
|
||||
newt.content.add(tuple(item))
|
||||
|
||||
|
||||
return newt
|
||||
|
||||
def join(self,other):
|
||||
|
||||
def join(self, other):
|
||||
'''Natural join, joins on shared attributes (one or more). If there are no
|
||||
shared attributes, it will behave as cartesian product.'''
|
||||
|
||||
#List of attributes in common between the relations
|
||||
shared=list(set(self.header.attributes).intersection(set(other.header.attributes)))
|
||||
|
||||
newt=relation() #Creates the new relation
|
||||
|
||||
#Adding to the headers all the fields, done like that because order is needed
|
||||
newt.header=header(list(self.header.attributes))
|
||||
|
||||
# List of attributes in common between the relations
|
||||
shared = list(set(self.header.attributes)
|
||||
.intersection(set(other.header.attributes)))
|
||||
|
||||
newt = relation() # Creates the new relation
|
||||
|
||||
# Adding to the headers all the fields, done like that because order is
|
||||
# needed
|
||||
newt.header = header(list(self.header.attributes))
|
||||
for i in other.header.attributes:
|
||||
if i not in shared:
|
||||
newt.header.attributes.append(i)
|
||||
|
||||
#Shared ids of self
|
||||
sid=self.header.getAttributesId(shared)
|
||||
#Shared ids of the other relation
|
||||
oid=other.header.getAttributesId(shared)
|
||||
|
||||
#Non shared ids of the other relation
|
||||
noid=[]
|
||||
|
||||
# Shared ids of self
|
||||
sid = self.header.getAttributesId(shared)
|
||||
# Shared ids of the other relation
|
||||
oid = other.header.getAttributesId(shared)
|
||||
|
||||
# Non shared ids of the other relation
|
||||
noid = []
|
||||
for i in range(len(other.header.attributes)):
|
||||
if i not in oid:
|
||||
noid.append(i)
|
||||
|
||||
|
||||
for i in self.content:
|
||||
for j in other.content:
|
||||
match=True
|
||||
match = True
|
||||
for k in range(len(sid)):
|
||||
match=match and ( i[sid[k]]== j[oid[k]])
|
||||
|
||||
match = match and (i[sid[k]] == j[oid[k]])
|
||||
|
||||
if match:
|
||||
item=list(i)
|
||||
item = list(i)
|
||||
for l in noid:
|
||||
item.append(j[l])
|
||||
|
||||
|
||||
newt.content.add(tuple(item))
|
||||
|
||||
|
||||
return newt
|
||||
def __eq__(self,other):
|
||||
|
||||
def __eq__(self, other):
|
||||
'''Returns true if the relations are the same, ignoring order of items.
|
||||
This operation is rather heavy, since it requires sorting and comparing.'''
|
||||
other=self._rearrange_(other) #Rearranges attributes' order so can compare tuples directly
|
||||
|
||||
if (self.__class__!=other.__class__)or(self.header!=other.header):
|
||||
return False #Both parameters must be a relation
|
||||
other = self._rearrange_(
|
||||
other) # Rearranges attributes' order so can compare tuples directly
|
||||
|
||||
if set(self.header.attributes)!=set(other.header.attributes):
|
||||
if (self.__class__ != other.__class__)or(self.header != other.header):
|
||||
return False # Both parameters must be a relation
|
||||
|
||||
if set(self.header.attributes) != set(other.header.attributes):
|
||||
return False
|
||||
|
||||
|
||||
|
||||
#comparing content
|
||||
return self.content==other.content
|
||||
|
||||
|
||||
# comparing content
|
||||
return self.content == other.content
|
||||
|
||||
def __str__(self):
|
||||
'''Returns a string representation of the relation, can be printed with
|
||||
'''Returns a string representation of the relation, can be printed with
|
||||
monospaced fonts'''
|
||||
m_len=[] #Maximum lenght string
|
||||
m_len = [] # Maximum lenght string
|
||||
for f in self.header.attributes:
|
||||
m_len.append(len(f))
|
||||
|
||||
|
||||
for f in self.content:
|
||||
col=0
|
||||
col = 0
|
||||
for i in f:
|
||||
if len(i)>m_len[col]:
|
||||
m_len[col]=len(i)
|
||||
col+=1
|
||||
|
||||
res=""
|
||||
if len(i) > m_len[col]:
|
||||
m_len[col] = len(i)
|
||||
col += 1
|
||||
|
||||
res = ""
|
||||
for f in range(len(self.header.attributes)):
|
||||
res+="%s"%(self.header.attributes[f].ljust(2+m_len[f]))
|
||||
|
||||
|
||||
res += "%s" % (self.header.attributes[f].ljust(2 + m_len[f]))
|
||||
|
||||
for r in self.content:
|
||||
col=0
|
||||
res+="\n"
|
||||
col = 0
|
||||
res += "\n"
|
||||
for i in r:
|
||||
res+="%s"% (i.ljust(2+m_len[col]))
|
||||
col+=1
|
||||
|
||||
res += "%s" % (i.ljust(2 + m_len[col]))
|
||||
col += 1
|
||||
|
||||
return res
|
||||
|
||||
def update(self,expr,dic):
|
||||
def update(self, expr, dic):
|
||||
'''Update, expr must be a valid boolean expression, can contain field names,
|
||||
constant, math operations and boolean ones.
|
||||
This operation will change the relation itself instead of generating a new one,
|
||||
@@ -435,122 +445,123 @@ class relation (object):
|
||||
will be converted into a string.
|
||||
Returns the number of affected rows.'''
|
||||
self._make_writable()
|
||||
affected=0
|
||||
attributes={}
|
||||
keys=dic.keys() #List of headers to modify
|
||||
f_ids=self.header.getAttributesId(keys) #List of indexes corresponding to keys
|
||||
|
||||
#new_content=[] #New content of the relation
|
||||
affected = 0
|
||||
attributes = {}
|
||||
keys = dic.keys() # List of headers to modify
|
||||
f_ids = self.header.getAttributesId(
|
||||
keys) # List of indexes corresponding to keys
|
||||
|
||||
# new_content=[] #New content of the relation
|
||||
for i in self.content:
|
||||
for j in range(len(self.header.attributes)):
|
||||
attributes[self.header.attributes[j]]=self._autocast(i[j])
|
||||
|
||||
if eval(expr,attributes): #If expr is true, changing the tuple
|
||||
affected+=1
|
||||
new_tuple=list(i)
|
||||
#Deleting the tuple, instead of changing it, so other
|
||||
#relations can still point to the same list without
|
||||
#being affected.
|
||||
self.content.remove(i)
|
||||
attributes[self.header.attributes[j]] = self._autocast(i[j])
|
||||
|
||||
if eval(expr, attributes): # If expr is true, changing the tuple
|
||||
affected += 1
|
||||
new_tuple = list(i)
|
||||
# Deleting the tuple, instead of changing it, so other
|
||||
# relations can still point to the same list without
|
||||
# being affected.
|
||||
self.content.remove(i)
|
||||
for k in range(len(keys)):
|
||||
new_tuple[f_ids[k]]=str(dic[keys[k]])
|
||||
new_tuple[f_ids[k]] = str(dic[keys[k]])
|
||||
self.content.add(tuple(new_tuple))
|
||||
return affected
|
||||
|
||||
def insert(self,values):
|
||||
|
||||
def insert(self, values):
|
||||
'''Inserts a tuple in the relation.
|
||||
This function will not insert duplicate tuples.
|
||||
All the values will be converted in string.
|
||||
Will return the number of inserted rows.'''
|
||||
|
||||
#Returns if tuple doesn't fit the number of attributes
|
||||
|
||||
# Returns if tuple doesn't fit the number of attributes
|
||||
if len(self.header.attributes) != len(values):
|
||||
return 0
|
||||
|
||||
|
||||
self._make_writable()
|
||||
|
||||
#Creating list containing only strings
|
||||
t=[]
|
||||
|
||||
# Creating list containing only strings
|
||||
t = []
|
||||
for i in values:
|
||||
t.append(str(i))
|
||||
|
||||
prevlen=len(self.content)
|
||||
|
||||
prevlen = len(self.content)
|
||||
self.content.add(tuple(t))
|
||||
return len(self.content)-prevlen
|
||||
|
||||
def delete(self,expr):
|
||||
return len(self.content) - prevlen
|
||||
|
||||
def delete(self, expr):
|
||||
'''Delete, expr must be a valid boolean expression, can contain field names,
|
||||
constant, math operations and boolean ones.
|
||||
This operation will change the relation itself instead of generating a new one,
|
||||
deleting all the tuples that make expr true.
|
||||
Returns the number of affected rows.'''
|
||||
self._make_writable()
|
||||
attributes={}
|
||||
affected=len(self.content)
|
||||
new_content=set() #New content of the relation
|
||||
attributes = {}
|
||||
affected = len(self.content)
|
||||
new_content = set() # New content of the relation
|
||||
for i in self.content:
|
||||
for j in range(len(self.header.attributes)):
|
||||
attributes[self.header.attributes[j]]=self._autocast(i[j])
|
||||
|
||||
|
||||
if not eval(expr,attributes):
|
||||
affected-=1
|
||||
attributes[self.header.attributes[j]] = self._autocast(i[j])
|
||||
|
||||
if not eval(expr, attributes):
|
||||
affected -= 1
|
||||
new_content.add(i)
|
||||
self.content=new_content
|
||||
self.content = new_content
|
||||
return affected
|
||||
|
||||
|
||||
|
||||
class header (object):
|
||||
|
||||
'''This class defines the header of a relation.
|
||||
It is used within relations to know if requested operations are accepted'''
|
||||
|
||||
#Since relations are mutalbe we explicitly block hashing them
|
||||
__hash__=None
|
||||
|
||||
def __init__(self,attributes):
|
||||
|
||||
# Since relations are mutalbe we explicitly block hashing them
|
||||
__hash__ = None
|
||||
|
||||
def __init__(self, attributes):
|
||||
'''Accepts a list with attributes' names. Names MUST be unique'''
|
||||
self.attributes=attributes
|
||||
|
||||
self.attributes = attributes
|
||||
|
||||
for i in attributes:
|
||||
if not is_valid_relation_name(i):
|
||||
raise Exception('"%s" is not a valid attribute name'% i)
|
||||
|
||||
raise Exception('"%s" is not a valid attribute name' % i)
|
||||
|
||||
def __repr__(self):
|
||||
return "header(%s)" % (self.attributes.__repr__())
|
||||
|
||||
|
||||
def rename(self,old,new):
|
||||
|
||||
def rename(self, old, new):
|
||||
'''Renames a field. Doesn't check if it is a duplicate.
|
||||
Returns True if the field was renamed, False otherwise'''
|
||||
|
||||
|
||||
if not is_valid_relation_name(new):
|
||||
raise Exception('%s is not a valid attribute name'% new)
|
||||
|
||||
raise Exception('%s is not a valid attribute name' % new)
|
||||
|
||||
try:
|
||||
id_=self.attributes.index(old)
|
||||
self.attributes[id_]=new
|
||||
id_ = self.attributes.index(old)
|
||||
self.attributes[id_] = new
|
||||
except:
|
||||
return False
|
||||
return True
|
||||
|
||||
def sharedAttributes(self,other):
|
||||
|
||||
def sharedAttributes(self, other):
|
||||
'''Returns how many attributes this header has in common with a given one'''
|
||||
return len(set(self.attributes).intersection(set(other.attributes)))
|
||||
|
||||
|
||||
def __str__(self):
|
||||
'''Returns String representation of the field's list'''
|
||||
return self.attributes.__str__()
|
||||
|
||||
def __eq__(self,other):
|
||||
return self.attributes==other.attributes
|
||||
def __ne__(self,other):
|
||||
return self.attributes!=other.attributes
|
||||
|
||||
def getAttributesId(self,param):
|
||||
'''Returns a list with numeric index corresponding to field's name'''
|
||||
res=[]
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.attributes == other.attributes
|
||||
|
||||
def __ne__(self, other):
|
||||
return self.attributes != other.attributes
|
||||
|
||||
def getAttributesId(self, param):
|
||||
'''Returns a list with numeric index corresponding to field's name'''
|
||||
res = []
|
||||
for i in param:
|
||||
for j in range(len(self.attributes)):
|
||||
if i==self.attributes[j]:
|
||||
if i == self.attributes[j]:
|
||||
res.append(j)
|
||||
return res
|
||||
|
@@ -1,20 +1,20 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Relational
|
||||
# Copyright (C) 2008 Salvo "LtWorf" Tomaselli
|
||||
#
|
||||
#
|
||||
# Relation is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
#
|
||||
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
|
||||
#
|
||||
# Custom types for relational algebra.
|
||||
@@ -24,29 +24,33 @@
|
||||
import datetime
|
||||
import re
|
||||
|
||||
|
||||
class rstring (str):
|
||||
|
||||
'''String subclass with some custom methods'''
|
||||
|
||||
def isInt(self):
|
||||
'''Returns true if the string represents an int number
|
||||
it only considers as int numbers the strings matching
|
||||
the following regexp:
|
||||
r'^[\+\-]{0,1}[0-9]+$'
|
||||
'''
|
||||
if re.match(r'^[\+\-]{0,1}[0-9]+$',self)==None:
|
||||
if re.match(r'^[\+\-]{0,1}[0-9]+$', self) == None:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def isFloat(self):
|
||||
'''Returns true if the string represents a float number
|
||||
it only considers as float numbers, the strings matching
|
||||
the following regexp:
|
||||
r'^[\+\-]{0,1}[0-9]+(\.([0-9])+)?$'
|
||||
'''
|
||||
if re.match(r'^[\+\-]{0,1}[0-9]+(\.([0-9])+)?$',self)==None:
|
||||
if re.match(r'^[\+\-]{0,1}[0-9]+(\.([0-9])+)?$', self) == None:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def isDate(self):
|
||||
'''Returns true if the string represents a date,
|
||||
in the format YYYY-MM-DD. as separators '-' , '\', '/' are allowed.
|
||||
@@ -57,24 +61,25 @@ class rstring (str):
|
||||
return self._isdate
|
||||
except:
|
||||
pass
|
||||
|
||||
r= re.match(r'^([0-9]{1,4})(\\|-|/)([0-9]{1,2})(\\|-|/)([0-9]{1,2})$',self)
|
||||
if r==None:
|
||||
self._isdate=False
|
||||
self._date=None
|
||||
|
||||
r = re.match(
|
||||
r'^([0-9]{1,4})(\\|-|/)([0-9]{1,2})(\\|-|/)([0-9]{1,2})$', self)
|
||||
if r == None:
|
||||
self._isdate = False
|
||||
self._date = None
|
||||
return False
|
||||
|
||||
try: #Any of the following operations can generate an exception, if it happens, we aren't dealing with a date
|
||||
year=int(r.group(1))
|
||||
month=int(r.group(3))
|
||||
day=int(r.group(5))
|
||||
d=datetime.date(year,month,day)
|
||||
self._isdate=True
|
||||
self._date=d
|
||||
|
||||
try: # Any of the following operations can generate an exception, if it happens, we aren't dealing with a date
|
||||
year = int(r.group(1))
|
||||
month = int(r.group(3))
|
||||
day = int(r.group(5))
|
||||
d = datetime.date(year, month, day)
|
||||
self._isdate = True
|
||||
self._date = d
|
||||
return True
|
||||
except:
|
||||
self._isdate=False
|
||||
self._date=None
|
||||
self._isdate = False
|
||||
self._date = None
|
||||
return False
|
||||
|
||||
def getDate(self):
|
||||
@@ -84,46 +89,59 @@ class rstring (str):
|
||||
except:
|
||||
self.isDate()
|
||||
return self._date
|
||||
|
||||
|
||||
class rdate (object):
|
||||
|
||||
'''Represents a date'''
|
||||
def __init__(self,date):
|
||||
|
||||
def __init__(self, date):
|
||||
'''date: A string representing a date'''
|
||||
if not isinstance(date,rstring):
|
||||
date=rstring(date)
|
||||
|
||||
self.intdate=date.getDate()
|
||||
self.day= self.intdate.day
|
||||
self.month=self.intdate.month
|
||||
self.weekday=self.intdate.weekday()
|
||||
self.year=self.intdate.year
|
||||
|
||||
if not isinstance(date, rstring):
|
||||
date = rstring(date)
|
||||
|
||||
self.intdate = date.getDate()
|
||||
self.day = self.intdate.day
|
||||
self.month = self.intdate.month
|
||||
self.weekday = self.intdate.weekday()
|
||||
self.year = self.intdate.year
|
||||
|
||||
def __hash__(self):
|
||||
return self.intdate.__hash__()
|
||||
|
||||
def __str__(self):
|
||||
return self.intdate.__str__()
|
||||
def __add__(self,days):
|
||||
res=self.intdate+datetime.timedelta(days)
|
||||
|
||||
def __add__(self, days):
|
||||
res = self.intdate + datetime.timedelta(days)
|
||||
return rdate(res.__str__())
|
||||
def __eq__(self,other):
|
||||
return self.intdate==other.intdate
|
||||
def __ge__(self,other):
|
||||
return self.intdate>=other.intdate
|
||||
def __gt__ (self,other):
|
||||
return self.intdate>other.intdate
|
||||
def __le__ (self,other):
|
||||
return self.intdate<=other.intdate
|
||||
def __lt__ (self,other):
|
||||
return self.intdate<other.intdate
|
||||
def __ne__(self,other):
|
||||
return self.intdate!=other.intdate
|
||||
def __sub__ (self,other):
|
||||
return (self.intdate-other.intdate).days
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.intdate == other.intdate
|
||||
|
||||
def __ge__(self, other):
|
||||
return self.intdate >= other.intdate
|
||||
|
||||
def __gt__(self, other):
|
||||
return self.intdate > other.intdate
|
||||
|
||||
def __le__(self, other):
|
||||
return self.intdate <= other.intdate
|
||||
|
||||
def __lt__(self, other):
|
||||
return self.intdate < other.intdate
|
||||
|
||||
def __ne__(self, other):
|
||||
return self.intdate != other.intdate
|
||||
|
||||
def __sub__(self, other):
|
||||
return (self.intdate - other.intdate).days
|
||||
|
||||
|
||||
def is_valid_relation_name(name):
|
||||
'''Checks if a name is valid for a relation.
|
||||
Returns boolean'''
|
||||
if re.match(r'^[_a-zA-Z]+[_a-zA-Z0-9]*$',name)==None:
|
||||
if re.match(r'^[_a-zA-Z]+[_a-zA-Z0-9]*$', name) == None:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
return True
|
||||
|
Reference in New Issue
Block a user