From 3b0392e8671e0fa90ce0f9bba9f4cab8f3333767 Mon Sep 17 00:00:00 2001
From: LtWorf <LtWorf@014f5005-505e-4b48-8d0a-63407b615a7c>
Date: Thu, 3 Mar 2011 08:46:04 +0000
Subject: [PATCH] - Float type recognition is more robust, now handled using a
 regexp - Date type recognition is more robust, now using a combination of
 regexp plus date object - Parsing of strings representing dates is now
 cached, eliminating the need for double parse

git-svn-id: http://galileo.dmi.unict.it/svn/relational/trunk@270 014f5005-505e-4b48-8d0a-63407b615a7c
---
 CHANGELOG              |  3 ++
 relational/parser.py   |  2 +-
 relational/relation.py | 16 ++++---
 relational/rtypes.py   | 97 +++++++++++++++++++++++-------------------
 samples/ratings.csv    |  9 ++++
 5 files changed, 75 insertions(+), 52 deletions(-)
 create mode 100644 samples/ratings.csv

diff --git a/CHANGELOG b/CHANGELOG
index 6a74818..a5ed7b0 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -12,6 +12,9 @@
 - Module parallel does something, can execute queries in parallel
 - Set hash method for the classes
 - Implemented select_union_intersect_subtract general optimization
+- Float type recognition is more robust, now handled using a regexp
+- Date type recognition is more robust, now using a combination of regexp plus date object
+- Parsing of strings representing dates is now cached, eliminating the need for double parse
 
 0.11
 - Font is set only on windows (Rev 206)
diff --git a/relational/parser.py b/relational/parser.py
index f9e2564..1da85ed 100644
--- a/relational/parser.py
+++ b/relational/parser.py
@@ -118,7 +118,7 @@ class node (object):
             return self.name
         pass
     def printtree(self,level=0):
-        '''Prints a representation of the tree using indentation'''
+        '''returns a representation of the tree using indentation'''
         r=''
         for i in range(level):
             r+='  '
diff --git a/relational/relation.py b/relational/relation.py
index 6475fd2..aaab379 100644
--- a/relational/relation.py
+++ b/relational/relation.py
@@ -102,14 +102,16 @@ class relation (object):
         for i in self.content:
 	    #Fills the attributes dictionary with the values of the tuple
             for j in range(len(self.header.attributes)):
-                if len(i[j])>0 and i[j].isdigit():
-                    attributes[self.header.attributes[j]]=int(i[j])
-                elif len(i[j])>0 and rstring(i[j]).isFloat():
-                    attributes[self.header.attributes[j]]=float(i[j])
-                elif len(i[j])>0 and isDate(i[j]):
-                    attributes[self.header.attributes[j]]=rdate(i[j])
+                tmpstring=rstring(i[j])
+                
+                if len(tmpstring)>0 and tmpstring.isdigit():
+                    attributes[self.header.attributes[j]]=int(tmpstring)
+                elif len(tmpstring)>0 and tmpstring.isFloat():
+                    attributes[self.header.attributes[j]]=float(tmpstring)
+                elif len(tmpstring)>0 and tmpstring.isDate():
+                    attributes[self.header.attributes[j]]=rdate(tmpstring)
                 else:
-                    attributes[self.header.attributes[j]]=i[j]
+                    attributes[self.header.attributes[j]]=tmpstring
                 
             try:
                 if eval(expr,attributes):
diff --git a/relational/rtypes.py b/relational/rtypes.py
index 0f38bf4..9d2ff8d 100644
--- a/relational/rtypes.py
+++ b/relational/rtypes.py
@@ -22,33 +22,66 @@ Purpose of this module is having the isFloat function and
 implementing dates to use in selection.'''
 
 import datetime
+import re
 
 class rstring (str):
     '''String subclass with some custom methods'''
     def isFloat(self):
-        '''True if the string is a float number, false otherwise'''
-        lst=('0','1','2','3','4','5','6','7','8','9','.')
-        for i in self:
-            if i not in lst:
-                return False;
-        return True;
+        '''Returns true if the string represents a float number
+        it only considers as float numbers, the strings matching
+        the following regexp:
+            r'^[0-9]+(\.([0-9])+)?$'
+        '''
+        if re.match(r'^[0-9]+(\.([0-9])+)?$',self)==None:
+            return False
+        else:
+            return True
+    
+    def isDate(self):
+        '''Returns true if the string represents a date,
+        in the format YYYY-MM-DD. as separators '-' , '\', '/' are allowed.
+        As side-effect, the date object will be stored for future usage, so
+        no more parsings are needed
+        '''
+        try:
+            return self._isdate
+        except:
+            pass
+        
+        r= re.match(r'^([0-9]{1,4})(\\|-|/)([0-9]{1,2})(\\|-|/)([0-9]{1,2})$',self)
+        if r==None:
+            self._isdate=False
+            self._date=None
+            return False
+    
+        try: #Any of the following operations can generate an exception, if it happens, we aren't dealing with a date
+            year=int(r.group(1))
+            month=int(r.group(3))
+            day=int(r.group(5))
+            d=datetime.date(year,month,day)
+            self._isdate=True
+            self._date=d
+            return True
+        except:
+            self._isdate=False
+            self._date=None
+            return False
 
+    def getDate(self):
+        '''Returns the datetime.date object or None'''
+        try:
+            return self._date
+        except:
+            self.isDate()
+            return self._date
 class rdate (object):
     '''Represents a date'''
     def __init__(self,date):
-        sep=('-','/','\\')
-        splitter=None
-        for i in sep:
-            if i in date:
-                splitter=i
-                break;
-        elems=date.split(splitter)
+        '''date: A string representing a date'''
+        if not isinstance(date,rstring):
+            date=rstring(date)
         
-        year=int(elems[0])
-        month=int(elems[1])
-        day=int(elems[2])
-        
-        self.intdate=datetime.date(year,month,day)
+        self.intdate=date.getDate()
         self.day= self.intdate.day
         self.month=self.intdate.month
         self.weekday=self.intdate.weekday()
@@ -75,29 +108,5 @@ class rdate (object):
         return self.intdate!=other.intdate
     def __sub__ (self,other):
         return (self.intdate-other.intdate).days
-def isDate(date):
-    sep=('-','/','\\')
-    splitter=None
-    for i in sep:
-        if i in date:
-            splitter=i
-            break;
-    elems=date.split(splitter)
-    if len(elems)!=3:
-        return False #Wrong number of elements
-    year=elems[0]
-    month=elems[1]
-    day=elems[2]
-    if not (year.isdigit() and month.isdigit() and day.isdigit()):
-        return False
-    year=int(year)
-    month=int(month)
-    day=int(day)
-    
-    if year<datetime.MINYEAR or year>datetime.MAXYEAR:
-        return False
-    if month<1 or month>12:
-        return False
-    if day<1 or day >31:
-        return False
-    return True
\ No newline at end of file
+
+        
\ No newline at end of file
diff --git a/samples/ratings.csv b/samples/ratings.csv
new file mode 100644
index 0000000..39fd952
--- /dev/null
+++ b/samples/ratings.csv
@@ -0,0 +1,9 @@
+id,rating
+0,5.3
+1,6
+2,5.7
+3,3.3
+4,9.1
+5,4.4
+6,5.1.1
+7,4.9