- Float type recognition is more robust, now handled using a regexp

- Date type recognition is more robust, now using a combination of regexp plus date object
- Parsing of strings representing dates is now cached, eliminating the need for double parse



git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@270 014f5005-505e-4b48-8d0a-63407b615a7c
This commit is contained in:
LtWorf 2011-03-03 08:46:04 +00:00
parent ba564b55d3
commit 3b0392e867
5 changed files with 75 additions and 52 deletions

View File

@ -12,6 +12,9 @@
- Module parallel does something, can execute queries in parallel - Module parallel does something, can execute queries in parallel
- Set hash method for the classes - Set hash method for the classes
- Implemented select_union_intersect_subtract general optimization - Implemented select_union_intersect_subtract general optimization
- Float type recognition is more robust, now handled using a regexp
- Date type recognition is more robust, now using a combination of regexp plus date object
- Parsing of strings representing dates is now cached, eliminating the need for double parse
0.11 0.11
- Font is set only on windows (Rev 206) - Font is set only on windows (Rev 206)

View File

@ -118,7 +118,7 @@ class node (object):
return self.name return self.name
pass pass
def printtree(self,level=0): def printtree(self,level=0):
'''Prints a representation of the tree using indentation''' '''returns a representation of the tree using indentation'''
r='' r=''
for i in range(level): for i in range(level):
r+=' ' r+=' '

View File

@ -102,14 +102,16 @@ class relation (object):
for i in self.content: for i in self.content:
#Fills the attributes dictionary with the values of the tuple #Fills the attributes dictionary with the values of the tuple
for j in range(len(self.header.attributes)): for j in range(len(self.header.attributes)):
if len(i[j])>0 and i[j].isdigit(): tmpstring=rstring(i[j])
attributes[self.header.attributes[j]]=int(i[j])
elif len(i[j])>0 and rstring(i[j]).isFloat(): if len(tmpstring)>0 and tmpstring.isdigit():
attributes[self.header.attributes[j]]=float(i[j]) attributes[self.header.attributes[j]]=int(tmpstring)
elif len(i[j])>0 and isDate(i[j]): elif len(tmpstring)>0 and tmpstring.isFloat():
attributes[self.header.attributes[j]]=rdate(i[j]) attributes[self.header.attributes[j]]=float(tmpstring)
elif len(tmpstring)>0 and tmpstring.isDate():
attributes[self.header.attributes[j]]=rdate(tmpstring)
else: else:
attributes[self.header.attributes[j]]=i[j] attributes[self.header.attributes[j]]=tmpstring
try: try:
if eval(expr,attributes): if eval(expr,attributes):

View File

@ -22,33 +22,66 @@ Purpose of this module is having the isFloat function and
implementing dates to use in selection.''' implementing dates to use in selection.'''
import datetime import datetime
import re
class rstring (str): class rstring (str):
'''String subclass with some custom methods''' '''String subclass with some custom methods'''
def isFloat(self): def isFloat(self):
'''True if the string is a float number, false otherwise''' '''Returns true if the string represents a float number
lst=('0','1','2','3','4','5','6','7','8','9','.') it only considers as float numbers, the strings matching
for i in self: the following regexp:
if i not in lst: r'^[0-9]+(\.([0-9])+)?$'
return False; '''
return True; if re.match(r'^[0-9]+(\.([0-9])+)?$',self)==None:
return False
else:
return True
def isDate(self):
'''Returns true if the string represents a date,
in the format YYYY-MM-DD. as separators '-' , '\', '/' are allowed.
As side-effect, the date object will be stored for future usage, so
no more parsings are needed
'''
try:
return self._isdate
except:
pass
r= re.match(r'^([0-9]{1,4})(\\|-|/)([0-9]{1,2})(\\|-|/)([0-9]{1,2})$',self)
if r==None:
self._isdate=False
self._date=None
return False
try: #Any of the following operations can generate an exception, if it happens, we aren't dealing with a date
year=int(r.group(1))
month=int(r.group(3))
day=int(r.group(5))
d=datetime.date(year,month,day)
self._isdate=True
self._date=d
return True
except:
self._isdate=False
self._date=None
return False
def getDate(self):
'''Returns the datetime.date object or None'''
try:
return self._date
except:
self.isDate()
return self._date
class rdate (object): class rdate (object):
'''Represents a date''' '''Represents a date'''
def __init__(self,date): def __init__(self,date):
sep=('-','/','\\') '''date: A string representing a date'''
splitter=None if not isinstance(date,rstring):
for i in sep: date=rstring(date)
if i in date:
splitter=i
break;
elems=date.split(splitter)
year=int(elems[0]) self.intdate=date.getDate()
month=int(elems[1])
day=int(elems[2])
self.intdate=datetime.date(year,month,day)
self.day= self.intdate.day self.day= self.intdate.day
self.month=self.intdate.month self.month=self.intdate.month
self.weekday=self.intdate.weekday() self.weekday=self.intdate.weekday()
@ -75,29 +108,5 @@ class rdate (object):
return self.intdate!=other.intdate return self.intdate!=other.intdate
def __sub__ (self,other): def __sub__ (self,other):
return (self.intdate-other.intdate).days return (self.intdate-other.intdate).days
def isDate(date):
sep=('-','/','\\')
splitter=None
for i in sep:
if i in date:
splitter=i
break;
elems=date.split(splitter)
if len(elems)!=3:
return False #Wrong number of elements
year=elems[0]
month=elems[1]
day=elems[2]
if not (year.isdigit() and month.isdigit() and day.isdigit()):
return False
year=int(year)
month=int(month)
day=int(day)
if year<datetime.MINYEAR or year>datetime.MAXYEAR:
return False
if month<1 or month>12:
return False
if day<1 or day >31:
return False
return True

9
samples/ratings.csv Normal file
View File

@ -0,0 +1,9 @@
id,rating
0,5.3
1,6
2,5.7
3,3.3
4,9.1
5,4.4
6,5.1.1
7,4.9
1 id rating
2 0 5.3
3 1 6
4 2 5.7
5 3 3.3
6 4 9.1
7 5 4.4
8 6 5.1.1
9 7 4.9