- Float type recognition is more robust, now handled using a regexp

- Date type recognition is more robust, now using a combination of regexp plus date object
- Parsing of strings representing dates is now cached, eliminating the need for double parse



git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@270 014f5005-505e-4b48-8d0a-63407b615a7c
This commit is contained in:
LtWorf 2011-03-03 08:46:04 +00:00
parent ba564b55d3
commit 3b0392e867
5 changed files with 75 additions and 52 deletions

View File

@ -12,6 +12,9 @@
- Module parallel does something, can execute queries in parallel
- Set hash method for the classes
- Implemented select_union_intersect_subtract general optimization
- Float type recognition is more robust, now handled using a regexp
- Date type recognition is more robust, now using a combination of regexp plus date object
- Parsing of strings representing dates is now cached, eliminating the need for double parse
0.11
- Font is set only on windows (Rev 206)

View File

@ -118,7 +118,7 @@ class node (object):
return self.name
pass
def printtree(self,level=0):
'''Prints a representation of the tree using indentation'''
'''returns a representation of the tree using indentation'''
r=''
for i in range(level):
r+=' '

View File

@ -102,14 +102,16 @@ class relation (object):
for i in self.content:
#Fills the attributes dictionary with the values of the tuple
for j in range(len(self.header.attributes)):
if len(i[j])>0 and i[j].isdigit():
attributes[self.header.attributes[j]]=int(i[j])
elif len(i[j])>0 and rstring(i[j]).isFloat():
attributes[self.header.attributes[j]]=float(i[j])
elif len(i[j])>0 and isDate(i[j]):
attributes[self.header.attributes[j]]=rdate(i[j])
tmpstring=rstring(i[j])
if len(tmpstring)>0 and tmpstring.isdigit():
attributes[self.header.attributes[j]]=int(tmpstring)
elif len(tmpstring)>0 and tmpstring.isFloat():
attributes[self.header.attributes[j]]=float(tmpstring)
elif len(tmpstring)>0 and tmpstring.isDate():
attributes[self.header.attributes[j]]=rdate(tmpstring)
else:
attributes[self.header.attributes[j]]=i[j]
attributes[self.header.attributes[j]]=tmpstring
try:
if eval(expr,attributes):

View File

@ -22,33 +22,66 @@ Purpose of this module is having the isFloat function and
implementing dates to use in selection.'''
import datetime
import re
class rstring (str):
'''String subclass with some custom methods'''
def isFloat(self):
'''True if the string is a float number, false otherwise'''
lst=('0','1','2','3','4','5','6','7','8','9','.')
for i in self:
if i not in lst:
return False;
return True;
'''Returns true if the string represents a float number
it only considers as float numbers, the strings matching
the following regexp:
r'^[0-9]+(\.([0-9])+)?$'
'''
if re.match(r'^[0-9]+(\.([0-9])+)?$',self)==None:
return False
else:
return True
def isDate(self):
'''Returns true if the string represents a date,
in the format YYYY-MM-DD. as separators '-' , '\', '/' are allowed.
As side-effect, the date object will be stored for future usage, so
no more parsings are needed
'''
try:
return self._isdate
except:
pass
r= re.match(r'^([0-9]{1,4})(\\|-|/)([0-9]{1,2})(\\|-|/)([0-9]{1,2})$',self)
if r==None:
self._isdate=False
self._date=None
return False
try: #Any of the following operations can generate an exception, if it happens, we aren't dealing with a date
year=int(r.group(1))
month=int(r.group(3))
day=int(r.group(5))
d=datetime.date(year,month,day)
self._isdate=True
self._date=d
return True
except:
self._isdate=False
self._date=None
return False
def getDate(self):
'''Returns the datetime.date object or None'''
try:
return self._date
except:
self.isDate()
return self._date
class rdate (object):
'''Represents a date'''
def __init__(self,date):
sep=('-','/','\\')
splitter=None
for i in sep:
if i in date:
splitter=i
break;
elems=date.split(splitter)
'''date: A string representing a date'''
if not isinstance(date,rstring):
date=rstring(date)
year=int(elems[0])
month=int(elems[1])
day=int(elems[2])
self.intdate=datetime.date(year,month,day)
self.intdate=date.getDate()
self.day= self.intdate.day
self.month=self.intdate.month
self.weekday=self.intdate.weekday()
@ -75,29 +108,5 @@ class rdate (object):
return self.intdate!=other.intdate
def __sub__ (self,other):
return (self.intdate-other.intdate).days
def isDate(date):
sep=('-','/','\\')
splitter=None
for i in sep:
if i in date:
splitter=i
break;
elems=date.split(splitter)
if len(elems)!=3:
return False #Wrong number of elements
year=elems[0]
month=elems[1]
day=elems[2]
if not (year.isdigit() and month.isdigit() and day.isdigit()):
return False
year=int(year)
month=int(month)
day=int(day)
if year<datetime.MINYEAR or year>datetime.MAXYEAR:
return False
if month<1 or month>12:
return False
if day<1 or day >31:
return False
return True

9
samples/ratings.csv Normal file
View File

@ -0,0 +1,9 @@
id,rating
0,5.3
1,6
2,5.7
3,3.3
4,9.1
5,4.4
6,5.1.1
7,4.9
1 id rating
2 0 5.3
3 1 6
4 2 5.7
5 3 3.3
6 4 9.1
7 5 4.4
8 6 5.1.1
9 7 4.9