From 12f44596828fe0dc46b9fe2f7561e89038cd78ed Mon Sep 17 00:00:00 2001 From: Salvo 'LtWorf' Tomaselli Date: Sat, 24 Jun 2017 13:01:22 +0200 Subject: [PATCH] More type information Add type information, change some imports and some style. --- relational/maintenance.py | 45 ++++++++++++++++---------------- relational/optimizer.py | 27 +++++++------------ relational/parser.py | 39 +++++++++++++-------------- relational/relation.py | 55 ++++++++++++++++++++------------------- relational/rtypes.py | 9 ++++--- 5 files changed, 83 insertions(+), 92 deletions(-) diff --git a/relational/maintenance.py b/relational/maintenance.py index 89b2229..3a4e501 100644 --- a/relational/maintenance.py +++ b/relational/maintenance.py @@ -23,8 +23,9 @@ import urllib.parse import os.path import pickle import base64 +from typing import Optional, Tuple -from relational.relation import relation +from relational.relation import Relation from relational import parser from relational.rtypes import is_valid_relation_name @@ -32,7 +33,7 @@ from relational.rtypes import is_valid_relation_name SWEARWORDS = {'fuck', 'shit', 'suck', 'merda', 'mierda', 'merde'} -def send_survey(data): +def send_survey(data) -> int: '''Sends the survey. Data must be a dictionary. returns the http response. @@ -60,7 +61,7 @@ def send_survey(data): return 0 -def check_latest_version(): +def check_latest_version() -> Optional[str]: '''Returns the latest version available. Heavely dependent on server and server configurations not granted to work forever.''' @@ -78,30 +79,30 @@ def check_latest_version(): return s.decode().strip() -class UserInterface (object): +class UserInterface: '''It is used to provide services to the user interfaces, in order to reduce the amount of duplicated code present in different user interfaces. ''' - def __init__(self): + def __init__(self) -> None: self.session_reset() - def load(self, filename, name): + def load(self, filename: str, name: str) -> None: '''Loads a relation from file, and gives it a name to be used in subsequent queries.''' - rel = relation(filename) + rel = Relation(filename) self.set_relation(name, rel) - def unload(self, name): + def unload(self, name: str) -> None: '''Unloads an existing relation.''' del self.relations[name] - def store(self, filename, name): + def store(self, filename: str, name: str) -> None: '''Stores a relation to file.''' - pass + raise Exception('Not implemented') - def session_dump(self, filename=None): + def session_dump(self, filename: Optional[str] = None) -> Optional[str]: ''' Dumps the session. @@ -109,7 +110,7 @@ class UserInterface (object): inside the file, and None is returned. If no filename is specified, the session is returned - as bytes. + as string. ''' if filename: with open(filename, 'w') as f: @@ -117,11 +118,11 @@ class UserInterface (object): return None return base64.b64encode(pickle.dumps(self.relations)).decode() - def session_restore(self, session=None, filename=None): + def session_restore(self, session: Optional[bytes] = None, filename: Optional[str] = None) -> None: ''' Restores a session. - Either from bytes or from a file + Either from bytes or from a file. ''' if session: try: @@ -132,23 +133,23 @@ class UserInterface (object): with open(filename) as f: self.relations = pickle.load(f) - def session_reset(self): + def session_reset(self) -> None: ''' Resets the session to a clean one ''' self.relations = {} - def get_relation(self, name): + def get_relation(self, name: str) -> Relation: '''Returns the relation corresponding to name.''' return self.relations[name] - def set_relation(self, name, rel): + def set_relation(self, name: str, rel: Relation) -> None: '''Sets the relation corresponding to name.''' if not is_valid_relation_name(name): raise Exception('Invalid name for destination relation') self.relations[name] = rel - def suggest_name(self, filename): + def suggest_name(self, filename: str) -> Optional[str]: ''' Returns a possible name for a relation, given a filename. @@ -167,7 +168,7 @@ class UserInterface (object): return None return name - def execute(self, query, relname='last_'): + def execute(self, query: str, relname: str = 'last_') -> Relation: '''Executes a query, returns the result and if relname is not None, adds the result to the dictionary, with the name given in relname.''' @@ -180,7 +181,7 @@ class UserInterface (object): return result @staticmethod - def split_query(query, default_name='last_'): + def split_query(query: str, default_name='last_') -> Tuple[str, str]: ''' Accepts a query which might have an initial value assignment @@ -196,14 +197,14 @@ class UserInterface (object): query = sq[1].strip() return default_name, query - def multi_execute(self, query): + def multi_execute(self, query: str) -> Relation: '''Executes multiple queries, separated by \n They can have a syntax of [varname =] query to assign the result to a new relation ''' - r = relation() + r = Relation() queries = query.split('\n') for query in queries: if query.strip() == '': diff --git a/relational/optimizer.py b/relational/optimizer.py index 1fb2ee1..3e8ccd5 100644 --- a/relational/optimizer.py +++ b/relational/optimizer.py @@ -22,32 +22,23 @@ # relational query, or it can be a parse tree for a relational expression (ie: class parser.node). # The functions will always return a string with the optimized query, but if a parse tree was provided, # the parse tree itself will be modified accordingly. +from typing import Union, Optional, Dict, Any from relational import optimizations -from relational import parser +from relational.parser import Node, RELATION, UNARY, BINARY, op_functions, tokenize, tree from relational import querysplit from relational.maintenance import UserInterface - -# Stuff that was here before, keeping it for compatibility -RELATION = parser.RELATION -UNARY = parser.UNARY -BINARY = parser.BINARY - -op_functions = parser.op_functions -node = parser.node -tokenize = parser.tokenize -tree = parser.tree -# End of the stuff +ContextDict = Dict[str,Any] -def optimize_program(code, rels): +def optimize_program(code, rels: ContextDict): ''' Optimize an entire program, composed by multiple expressions and assignments. ''' lines = code.split('\n') - context = {} + context = {} # type: ContextDict for line in lines: line = line.strip() @@ -55,14 +46,14 @@ def optimize_program(code, rels): continue res, query = UserInterface.split_query(line) last_res = res - parsed = parser.tree(query) + parsed = tree(query) optimizations.replace_leaves(parsed, context) context[res] = parsed node = optimize_all(context[last_res], rels, tostr=False) return querysplit.split(node, rels) -def optimize_all(expression, rels, specific=True, general=True, debug=None,tostr=True): +def optimize_all(expression: Union[str, Node], rels: ContextDict, specific: bool = True, general: bool = True, debug: Optional[list] = None, tostr: bool = True) -> Union[str, Node]: '''This function performs all the available optimizations. expression : see documentation of this module @@ -76,7 +67,7 @@ def optimize_all(expression, rels, specific=True, general=True, debug=None,tostr Return value: this will return an optimized version of the expression''' if isinstance(expression, str): n = tree(expression) # Gets the tree - elif isinstance(expression, node): + elif isinstance(expression, Node): n = expression else: raise (TypeError("expression must be a string or a node")) @@ -107,7 +98,7 @@ def optimize_all(expression, rels, specific=True, general=True, debug=None,tostr return n -def specific_optimize(expression, rels): +def specific_optimize(expression, rels: ContextDict): '''This function performs specific optimizations. Means that it will need to know the fields used by the relations. diff --git a/relational/parser.py b/relational/parser.py index 340ed8a..8409bf8 100644 --- a/relational/parser.py +++ b/relational/parser.py @@ -24,7 +24,7 @@ # # Language definition here: # http://ltworf.github.io/relational/grammar.html -from typing import Optional +from typing import Optional, Union, List, Any from relational import rtypes @@ -85,7 +85,7 @@ class CallableString(str): return eval(self, context) -class Node (object): +class Node: '''This class is a node of a relational expression. Leaves are relations and internal nodes are operations. @@ -105,7 +105,7 @@ class Node (object): kind = None # type: Optional[int] __hash__ = None # type: None - def __init__(self, expression=None): + def __init__(self, expression: Optional[list] = None) -> None: '''Generates the tree from the tokenized expression If no expression is specified then it will create an empty node''' if expression == None or len(expression) == 0: @@ -172,7 +172,7 @@ class Node (object): code = self._toPython() return compile(code, '', 'eval') - def toPython(self): + def toPython(self) -> CallableString: '''This method converts the AST into a python code string, which will require the relation module to be executed. @@ -180,7 +180,7 @@ class Node (object): directly called.''' return CallableString(self._toPython()) - def _toPython(self): + def _toPython(self) -> str: ''' Same as toPython but returns a regular string ''' @@ -201,7 +201,7 @@ class Node (object): return '%s.%s(%s)' % (self.child.toPython(), op_functions[self.name], prop) return self.name - def printtree(self, level=0): + def printtree(self, level: int = 0) -> str: '''returns a representation of the tree using indentation''' r = '' for i in range(level): @@ -213,10 +213,9 @@ class Node (object): elif self.name in u_operators: r += '\t%s\n' % self.prop r += self.child.printtree(level + 1) - return '\n' + r - def get_left_leaf(self): + def get_left_leaf(self) -> 'Node': '''This function returns the leftmost leaf in the tree.''' if self.kind == RELATION: return self @@ -225,12 +224,12 @@ class Node (object): elif self.kind == BINARY: return self.left.get_left_leaf() - def result_format(self, rels): + def result_format(self, rels: dict) -> list: '''This function returns a list containing the fields that the resulting relation will have. It requires a dictionary where keys are the names of the relations and the values are the relation objects.''' - if rels == None: - return + if not isinstance(rels, dict): + raise TypeError('Can\'t be of None type') if self.kind == RELATION: return list(rels[self.name].header) @@ -285,7 +284,7 @@ class Node (object): return (le + self.name + re) -def _find_matching_parenthesis(expression, start=0, openpar=u'(', closepar=u')'): +def _find_matching_parenthesis(expression: str, start=0, openpar=u'(', closepar=u')') -> int: '''This function returns the position of the matching close parenthesis to the 1st open parenthesis found starting from start (0 by default)''' @@ -304,7 +303,6 @@ def _find_matching_parenthesis(expression, start=0, openpar=u'(', closepar=u')') if string: continue - if expression[i] == openpar: par_count += 1 elif expression[i] == closepar: @@ -312,7 +310,7 @@ def _find_matching_parenthesis(expression, start=0, openpar=u'(', closepar=u')') if par_count == 0: return i # Closing parenthesis of the parameter -def _find_token(haystack, needle): +def _find_token(haystack: str, needle: str) -> int: ''' Like the string function find, but ignores tokens that are within a string @@ -337,17 +335,17 @@ def _find_token(haystack, needle): return r -def tokenize(expression): +def tokenize(expression: str) -> list: '''This function converts a relational expression into a list where every token of the expression is an item of a list. Expressions into parenthesis will be converted into sublists.''' - items = [] # List for the tokens + # List for the tokens + items = [] # type: List[Union[str,list]] expression = expression.strip() # Removes initial and ending spaces while len(expression) > 0: - if expression.startswith('('): # Parenthesis state end = _find_matching_parenthesis(expression) if end == None: @@ -384,17 +382,16 @@ def tokenize(expression): break items.append(expression[:r]) expression = expression[r:].strip() - return items -def tree(expression): +def tree(expression: str) -> Node: '''This function parses a relational algebra expression into a AST and returns the root node using the Node class.''' - return node(tokenize(expression)) + return Node(tokenize(expression)) -def parse(expr): +def parse(expr: str) -> CallableString: '''This function parses a relational algebra expression, and returns a CallableString (a string that can be called) whith the corresponding Python expression. diff --git a/relational/relation.py b/relational/relation.py index dd88afb..52b3986 100644 --- a/relational/relation.py +++ b/relational/relation.py @@ -22,6 +22,7 @@ import csv from itertools import chain, repeat from collections import deque +from typing import List, Union from relational.rtypes import * @@ -52,9 +53,9 @@ class Relation (object): ''' __hash__ = None # type: None - def __init__(self, filename=""): + def __init__(self, filename : str = '') -> None: self._readonly = False - self.content = set() + self.content = set() # type: Set[tuple] if len(filename) == 0: # Empty relation self.header = Header([]) @@ -65,14 +66,14 @@ class Relation (object): iterator = ((self.insert(i) for i in reader)) deque(iterator, maxlen=0) - def _make_duplicate(self, copy): + def _make_duplicate(self, copy: 'Relation') -> None: '''Flag that the relation "copy" is pointing to the same set as this relation.''' self._readonly = True copy._readonly = True - def _make_writable(self, copy_content=True): + def _make_writable(self, copy_content : bool = True) -> None: '''If this relation is marked as readonly, this method will copy the content to make it writable too @@ -91,7 +92,7 @@ class Relation (object): def __contains__(self, key): return key in self.content - def save(self, filename): + def save(self, filename: str) -> None: ''' Saves the relation in a file. Will save using the csv format as defined in RFC4180. @@ -107,7 +108,7 @@ class Relation (object): # Writing content, already in the correct format writer.writerows(self.content) - def _rearrange(self, other): + def _rearrange(self, other: 'Relation') -> 'Relation': '''If two relations share the same attributes in a different order, this method will use projection to make them have the same attributes' order. It is not exactely related to relational algebra. Just a method used @@ -123,7 +124,7 @@ class Relation (object): ','.join(self.header), ','.join(other.header) )) - def selection(self, expr): + def selection(self, expr: str) -> 'Relation': ''' Selection, expr must be a valid Python expression; can contain field names. ''' @@ -149,7 +150,7 @@ class Relation (object): "Failed to evaluate %s\n%s" % (expr, e.__str__())) return newt - def product(self, other): + def product(self, other: 'Relation') -> 'Relation': ''' Cartesian product. Attributes of the relations must differ. ''' @@ -168,7 +169,7 @@ class Relation (object): newt.content.add(i + j) return newt - def projection(self, * attributes): + def projection(self, * attributes) -> 'Relation': ''' Can be called in two different ways: a.projection('field1','field2') @@ -199,7 +200,7 @@ class Relation (object): newt.content.add(tuple(row)) return newt - def rename(self, params): + def rename(self, params: 'Relation') -> 'Relation': ''' Takes a dictionary. @@ -215,7 +216,7 @@ class Relation (object): self._make_duplicate(newt) return newt - def intersection(self, other): + def intersection(self, other: 'Relation') -> 'Relation': ''' Intersection operation. The result will contain items present in both operands. @@ -228,7 +229,7 @@ class Relation (object): newt.content = self.content.intersection(other.content) return newt - def difference(self, other): + def difference(self, other: 'Relation') -> 'Relation': '''Difference operation. The result will contain items present in first operand but not in second one. ''' @@ -239,7 +240,7 @@ class Relation (object): newt.content = self.content.difference(other.content) return newt - def division(self, other): + def division(self, other: 'Relation') -> 'Relation': '''Division operator The division is a binary operation that is written as R รท S. The result consists of the restrictions of tuples in R to the @@ -265,7 +266,7 @@ class Relation (object): t = self.projection(d_headers).product(other) return self.projection(d_headers).difference(t.difference(self).projection(d_headers)) - def union(self, other): + def union(self, other: 'Relation') -> 'Relation': '''Union operation. The result will contain items present in first and second operands. ''' @@ -276,18 +277,18 @@ class Relation (object): newt.content = self.content.union(other.content) return newt - def thetajoin(self, other, expr): + def thetajoin(self, other: 'Relation', expr: str) -> 'Relation': '''Defined as product and then selection with the given expression.''' return self.product(other).selection(expr) - def outer(self, other): + def outer(self, other: 'Relation') -> 'Relation': '''Does a left and a right outer join and returns their union.''' a = self.outer_right(other) b = self.outer_left(other) return a.union(b) - def outer_right(self, other): + def outer_right(self, other: 'Relation') -> 'Relation': ''' Outer right join. Considers self as left and param as right. If the tuple has no corrispondence, empy attributes are filled with a "---" @@ -297,7 +298,7 @@ class Relation (object): ''' return other.outer_left(self) - def outer_left(self, other, swap=False): + def outer_left(self, other: 'Relation', swap=False) -> 'Relation': ''' See documentation for outer_right ''' @@ -338,7 +339,7 @@ class Relation (object): return newt - def join(self, other): + def join(self, other: 'Relation') -> 'Relation': ''' Natural join, joins on shared attributes (one or more). If there are no shared attributes, it will behave as the cartesian product. @@ -412,7 +413,7 @@ class Relation (object): return res - def update(self, expr, dic): + def update(self, expr: str, dic: dict) -> int: ''' Updates certain values of a relation. @@ -444,7 +445,7 @@ class Relation (object): self.content = not_affected.content return len(affected) - def insert(self, values): + def insert(self, values: Union[list,tuple]) -> int: ''' Inserts a tuple in the relation. This function will not insert duplicate tuples. @@ -468,7 +469,7 @@ class Relation (object): self.content.add(tuple(map(rstring, values))) return len(self.content) - prevlen - def delete(self, expr): + def delete(self, expr: str) -> int: ''' Delete, expr must be a valid Python expression; can contain field names. @@ -504,7 +505,7 @@ class Header(tuple): def __repr__(self): return "Header(%s)" % super(Header, self).__repr__() - def rename(self, params): + def rename(self, params) -> 'Header': '''Returns a new header, with renamed fields. params is a dictionary of {old:new} names @@ -520,19 +521,19 @@ class Header(tuple): raise Exception('Field not found: %s' % old) return Header(attrs) - def sharedAttributes(self, other): + def sharedAttributes(self, other: 'Header') -> int: '''Returns how many attributes this header has in common with a given one''' return len(set(self).intersection(set(other))) - def union(self, other): + def union(self, other) -> set: '''Returns the union of the sets of attributes with another header.''' return set(self).union(set(other)) - def intersection(self, other): + def intersection(self, other) -> set: '''Returns the set of common attributes with another header.''' return set(self).intersection(set(other)) - def getAttributesId(self, param): + def getAttributesId(self, param) -> List[int]: '''Returns a list with numeric index corresponding to field's name''' try: return [self.index(i) for i in param] diff --git a/relational/rtypes.py b/relational/rtypes.py index dea4a44..001be15 100644 --- a/relational/rtypes.py +++ b/relational/rtypes.py @@ -23,11 +23,12 @@ import datetime import keyword import re +from typing import Union RELATION_NAME_REGEXP = re.compile(r'^[_a-z][_a-z0-9]*$', re.IGNORECASE) -class Rstring (str): +class Rstring(str): '''String subclass with some custom methods''' @@ -37,7 +38,7 @@ class Rstring (str): r'^([0-9]{1,4})(\\|-|/)([0-9]{1,2})(\\|-|/)([0-9]{1,2})$' ) - def autocast(self): + def autocast(self) -> Union[int, float, 'Rdate', 'Rstring']: ''' Returns the automatic cast for this value. @@ -47,7 +48,7 @@ class Rstring (str): except: pass - self._autocast = self + self._autocast = self # type: Union[int, float, 'Rdate', 'Rstring'] if len(self) > 0: if self.isInt(): self._autocast = int(self) @@ -80,7 +81,7 @@ class Rstring (str): no more parsings are needed ''' try: - return self._isdate + return self._isdate # type: ignore except: pass