- Float type recognition is more robust, now handled using a regexp

- Date type recognition is more robust, now using a combination of regexp plus date object - Parsing of strings representing dates is now cached, eliminating the need for double parse git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@270 014f5005-505e-4b48-8d0a-63407b615a7c
2011-03-03 08:46:04 +00:00
parent ba564b55d3
commit 3b0392e867
5 changed files with 75 additions and 52 deletions
--- a/3
+++ b/3
@@ -12,6 +12,9 @@
 - Module parallel does something, can execute queries in parallel
 - Set hash method for the classes
 - Implemented select_union_intersect_subtract general optimization
 - Float type recognition is more robust, now handled using a regexp
 - Date type recognition is more robust, now using a combination of regexp plus date object
 - Parsing of strings representing dates is now cached, eliminating the need for double parse
 0.11
 - Font is set only on windows (Rev 206)
--- a/relational/parser.py
+++ b/relational/parser.py
@@ -118,7 +118,7 @@ class node (object):
            return self.name
        pass
    def printtree(self,level=0):
-        '''Prints a representation of the tree using indentation'''
+        '''returns a representation of the tree using indentation'''
        r=''
        for i in range(level):
            r+='  '
--- a/relational/relation.py
+++ b/relational/relation.py
@@ -102,14 +102,16 @@ class relation (object):
        for i in self.content:
 	    #Fills the attributes dictionary with the values of the tuple
            for j in range(len(self.header.attributes)):
-                if len(i[j])>0 and i[j].isdigit():
+                tmpstring=rstring(i[j])
-                    attributes[self.header.attributes[j]]=int(i[j])
+                
-                elif len(i[j])>0 and rstring(i[j]).isFloat():
+                if len(tmpstring)>0 and tmpstring.isdigit():
-                    attributes[self.header.attributes[j]]=float(i[j])
+                    attributes[self.header.attributes[j]]=int(tmpstring)
-                elif len(i[j])>0 and isDate(i[j]):
+                elif len(tmpstring)>0 and tmpstring.isFloat():
-                    attributes[self.header.attributes[j]]=rdate(i[j])
+                    attributes[self.header.attributes[j]]=float(tmpstring)
                elif len(tmpstring)>0 and tmpstring.isDate():
                    attributes[self.header.attributes[j]]=rdate(tmpstring)
                else:
-                    attributes[self.header.attributes[j]]=i[j]
+                    attributes[self.header.attributes[j]]=tmpstring
            try:
                if eval(expr,attributes):
--- a/relational/rtypes.py
+++ b/relational/rtypes.py
@@ -22,33 +22,66 @@ Purpose of this module is having the isFloat function and
 implementing dates to use in selection.'''
 import datetime
 import re
 class rstring (str):
    '''String subclass with some custom methods'''
    def isFloat(self):
-        '''True if the string is a float number, false otherwise'''
+        '''Returns true if the string represents a float number
-        lst=('0','1','2','3','4','5','6','7','8','9','.')
+        it only considers as float numbers, the strings matching
-        for i in self:
+        the following regexp:
-            if i not in lst:
+            r'^[0-9]+(\.([0-9])+)?$'
-                return False;
+        '''
-        return True;
+        if re.match(r'^[0-9]+(\.([0-9])+)?$',self)==None:
            return False
        else:
            return True
    def isDate(self):
        '''Returns true if the string represents a date,
        in the format YYYY-MM-DD. as separators '-' , '\', '/' are allowed.
        As side-effect, the date object will be stored for future usage, so
        no more parsings are needed
        '''
        try:
            return self._isdate
        except:
            pass
        r= re.match(r'^([0-9]{1,4})(\\|-|/)([0-9]{1,2})(\\|-|/)([0-9]{1,2})$',self)
        if r==None:
            self._isdate=False
            self._date=None
            return False
        try: #Any of the following operations can generate an exception, if it happens, we aren't dealing with a date
            year=int(r.group(1))
            month=int(r.group(3))
            day=int(r.group(5))
            d=datetime.date(year,month,day)
            self._isdate=True
            self._date=d
            return True
        except:
            self._isdate=False
            self._date=None
            return False
    def getDate(self):
        '''Returns the datetime.date object or None'''
        try:
            return self._date
        except:
            self.isDate()
            return self._date
 class rdate (object):
    '''Represents a date'''
    def __init__(self,date):
-        sep=('-','/','\\')
+        '''date: A string representing a date'''
-        splitter=None
+        if not isinstance(date,rstring):
-        for i in sep:
+            date=rstring(date)
            if i in date:
                splitter=i
                break;
        elems=date.split(splitter)
-        year=int(elems[0])
+        self.intdate=date.getDate()
        month=int(elems[1])
        day=int(elems[2])
        self.intdate=datetime.date(year,month,day)
        self.day= self.intdate.day
        self.month=self.intdate.month
        self.weekday=self.intdate.weekday()
@@ -75,29 +108,5 @@ class rdate (object):
        return self.intdate!=other.intdate
    def __sub__ (self,other):
        return (self.intdate-other.intdate).days
 def isDate(date):
    sep=('-','/','\\')
    splitter=None
    for i in sep:
        if i in date:
            splitter=i
            break;
    elems=date.split(splitter)
    if len(elems)!=3:
        return False #Wrong number of elements
    year=elems[0]
    month=elems[1]
    day=elems[2]
    if not (year.isdigit() and month.isdigit() and day.isdigit()):
        return False
    year=int(year)
    month=int(month)
    day=int(day)
-    if year<datetime.MINYEAR or year>datetime.MAXYEAR:
+        
        return False
    if month<1 or month>12:
        return False
    if day<1 or day >31:
        return False
    return True
--- a/samples/ratings.csv
+++ b/samples/ratings.csv
@@ -0,0 +1,9 @@
 id,rating
 0,5.3
 1,6
 2,5.7
 3,3.3
 4,9.1
 5,4.4
 6,5.1.1
 7,4.9