Package web2py :: Package gluon :: Module validators
Source Code for Module web2py.gluon.validators

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3   
   4  """ 
   5  This file is part of web2py Web Framework (Copyrighted, 2007-2010). 
   6  Developed by Massimo Di Pierro <mdipierro@cs.depaul.edu>. 
   7  License: GPL v2 
   8   
   9  Thanks to ga2arch for help with IS_IN_DB and IS_NOT_IN_DB on GAE 
  10  """ 
  11   
  12  import os 
  13  import re 
  14  import datetime 
  15  import time 
  16  import cgi 
  17  import hmac 
  18  import urllib 
  19  import struct 
  20  import decimal 
  21  import unicodedata 
  22  from cStringIO import StringIO 
  23  from utils import hash, get_digest 
  24   
  25   
  26  __all__ = [ 
  27      'CLEANUP', 
  28      'CRYPT', 
  29      'IS_ALPHANUMERIC', 
  30      'IS_DATE_IN_RANGE', 
  31      'IS_DATE', 
  32      'IS_DATETIME_IN_RANGE', 
  33      'IS_DATETIME', 
  34      'IS_DECIMAL_IN_RANGE', 
  35      'IS_EMAIL', 
  36      'IS_EMPTY_OR', 
  37      'IS_EXPR', 
  38      'IS_FLOAT_IN_RANGE', 
  39      'IS_IMAGE', 
  40      'IS_IN_DB', 
  41      'IS_IN_SET', 
  42      'IS_INT_IN_RANGE', 
  43      'IS_IPV4', 
  44      'IS_LENGTH', 
  45      'IS_LIST_OF', 
  46      'IS_LOWER', 
  47      'IS_MATCH', 
  48      'IS_NOT_EMPTY', 
  49      'IS_NOT_IN_DB', 
  50      'IS_NULL_OR', 
  51      'IS_SLUG', 
  52      'IS_STRONG', 
  53      'IS_TIME', 
  54      'IS_UPLOAD_FILENAME', 
  55      'IS_UPPER', 
  56      'IS_URL', 
  57      ] 
  58   
  59 -def options_sorter(x,y): 
  60      return (str(x[1]).upper()>str(y[1]).upper() and 1) or -1 
  61   
  62 -class Validator(object): 
  63      """ 
  64      Root for all validators, mainly for documentation purposes. 
  65   
  66      Validators are classes used to validate input fields (including forms 
  67      generated from database tables). 
  68   
  69      Here is an example of using a validator with a FORM:: 
  70   
  71          INPUT(_name='a', requires=IS_INT_IN_RANGE(0, 10)) 
  72   
  73      Here is an example of how to require a validator for a table field:: 
  74   
  75          db.define_table('person', SQLField('name')) 
  76          db.person.name.requires=IS_NOT_EMPTY() 
  77   
  78      Validators are always assigned using the requires attribute of a field. A 
  79      field can have a single validator or multiple validators. Multiple 
  80      validators are made part of a list:: 
  81   
  82          db.person.name.requires=[IS_NOT_EMPTY(), IS_NOT_IN_DB(db, 'person.id')] 
  83   
  84      Validators are called by the function accepts on a FORM or other HTML 
  85      helper object that contains a form. They are always called in the order in 
  86      which they are listed. 
  87   
  88      Built-in validators have constructors that take the optional argument error 
  89      message which allows you to change the default error message. 
  90      Here is an example of a validator on a database table:: 
  91   
  92          db.person.name.requires=IS_NOT_EMPTY(error_message=T('fill this')) 
  93   
  94      where we have used the translation operator T to allow for 
  95      internationalization. 
  96   
  97      Notice that default error messages are not translated. 
  98      """ 
  99   
 100 -    def formatter(self, value): 
 101          """ 
 102          For some validators returns a formatted version (matching the validator) 
 103          of value. Otherwise just returns the value. 
 104          """ 
 105          return value 
 106   
 107   
 108 -class IS_MATCH(Validator): 
 109      """ 
 110      example:: 
 111   
 112          INPUT(_type='text', _name='name', requires=IS_MATCH('.+')) 
 113   
 114      the argument of IS_MATCH is a regular expression:: 
 115   
 116          >>> IS_MATCH('.+')('hello') 
 117          ('hello', None) 
 118   
 119          >>> IS_MATCH('.+')('') 
 120          ('', 'invalid expression') 
 121      """ 
 122   
 123 -    def __init__(self, expression, error_message='invalid expression'): 
 124          self.regex = re.compile(expression) 
 125          self.error_message = error_message 
 126   
 127 -    def __call__(self, value): 
 128          match = self.regex.match(value) 
 129          if match: 
 130              return (match.group(), None) 
 131          return (value, self.error_message) 
 132   
 133   
 134 -class IS_EXPR(Validator): 
 135      """ 
 136      example:: 
 137   
 138          INPUT(_type='text', _name='name', 
 139              requires=IS_EXPR('5 < int(value) < 10')) 
 140   
 141      the argument of IS_EXPR must be python condition:: 
 142   
 143          >>> IS_EXPR('int(value) < 2')('1') 
 144          ('1', None) 
 145   
 146          >>> IS_EXPR('int(value) < 2')('2') 
 147          ('2', 'invalid expression') 
 148      """ 
 149   
 150 -    def __init__(self, expression, error_message='invalid expression'): 
 151          self.expression = expression 
 152          self.error_message = error_message 
 153   
 154 -    def __call__(self, value): 
 155          environment = {'value': value} 
 156          exec '__ret__=' + self.expression in environment 
 157          if environment['__ret__']: 
 158              return (value, None) 
 159          return (value, self.error_message) 
 160   
 161   
 162 -class IS_LENGTH(Validator): 
 163      """ 
 164      Checks if length of field's value fits between given boundaries. Works 
 165      for both text and file inputs. 
 166   
 167      Arguments: 
 168   
 169      maxsize: maximum allowed length / size 
 170      minsize: minimum allowed length / size 
 171   
 172      Examples:: 
 173   
 174          #Check if text string is shorter than 33 characters: 
 175          INPUT(_type='text', _name='name', requires=IS_LENGTH(32)) 
 176   
 177          #Check if password string is longer than 5 characters: 
 178          INPUT(_type='password', _name='name', requires=IS_LENGTH(minsize=6)) 
 179   
 180          #Check if uploaded file has size between 1KB and 1MB: 
 181          INPUT(_type='file', _name='name', requires=IS_LENGTH(1048576, 1024)) 
 182   
 183          >>> IS_LENGTH()('') 
 184          ('', None) 
 185          >>> IS_LENGTH()('1234567890') 
 186          ('1234567890', None) 
 187          >>> IS_LENGTH(maxsize=5, minsize=0)('1234567890')  # too long 
 188          ('1234567890', 'enter from 0 to 5 characters') 
 189          >>> IS_LENGTH(maxsize=50, minsize=20)('1234567890')  # too short 
 190          ('1234567890', 'enter from 20 to 50 characters') 
 191      """ 
 192   
 193 -    def __init__(self, maxsize=255, minsize=0, error_message='enter from %(min)s to %(max)s characters'): 
 194          self.maxsize = maxsize 
 195          self.minsize = minsize 
 196          self.error_message = error_message % dict(min=minsize, max=maxsize) 
 197   
 198 -    def __call__(self, value): 
 199          if isinstance(value, cgi.FieldStorage): 
 200              if value.file: 
 201                  value.file.seek(0, os.SEEK_END) 
 202                  length = value.file.tell() 
 203                  value.file.seek(0, os.SEEK_SET) 
 204              else: 
 205                  val = value.value 
 206                  if val: 
 207                      length = len(val) 
 208                  else: 
 209                      length = 0 
 210              if self.minsize <= length <= self.maxsize: 
 211                  return (value, None) 
 212          elif isinstance(value, (str, unicode, list)): 
 213              if self.minsize <= len(value) <= self.maxsize: 
 214                  return (value, None) 
 215          elif self.minsize <= len(str(value)) <= self.maxsize: 
 216              try: 
 217                  value.decode('utf8') 
 218                  return (value, None) 
 219              except: 
 220                  pass 
 221          return (value, self.error_message) 
 222   
 223   
 224 -class IS_IN_SET(Validator): 
 225      """ 
 226      example:: 
 227   
 228          INPUT(_type='text', _name='name', 
 229                requires=IS_IN_SET(['max', 'john'],zero='')) 
 230   
 231      the argument of IS_IN_SET must be a list or set 
 232   
 233          >>> IS_IN_SET(['max', 'john'])('max') 
 234          ('max', None) 
 235          >>> IS_IN_SET(['max', 'john'])('massimo') 
 236          ('massimo', 'value not allowed') 
 237          >>> IS_IN_SET(['max', 'john'], multiple=True)(('max', 'john')) 
 238          ('|max|john|', None) 
 239          >>> IS_IN_SET(['max', 'john'], multiple=True)(('bill', 'john')) 
 240          (('bill', 'john'), 'value not allowed') 
 241      """ 
 242   
 243 -    def __init__( 
 244          self, 
 245          theset, 
 246          labels=None, 
 247          error_message='value not allowed', 
 248          multiple=False, 
 249          zero='', 
 250          sort=False, 
 251          ): 
 252          self.multiple = multiple 
 253          self.theset = [str(item) for item in theset] 
 254          if isinstance(theset, dict): 
 255              self.labels = theset.values() 
 256          elif theset and (isinstance(theset[0], list) or  \ 
 257                             isinstance(theset[0], tuple)) \ 
 258                             and len(theset[0])==2: 
 259              self.theset = [str(item) for item,label in theset] 
 260              self.labels = [str(label) for item,label in theset] 
 261          else: 
 262              self.theset = [str(item) for item in theset] 
 263              self.labels = labels 
 264          self.error_message = error_message 
 265          self.zero = zero 
 266          self.sort = sort 
 267   
 268 -    def options(self): 
 269          if not self.labels: 
 270              items = [(k, k) for (i, k) in enumerate(self.theset)] 
 271          else: 
 272              items = [(k, self.labels[i]) for (i, k) in enumerate(self.theset)] 
 273          if self.sort: 
 274              items.sort(options_sorter) 
 275          if self.zero != None and not self.multiple: 
 276              items.insert(0,('',self.zero)) 
 277          return items 
 278   
 279 -    def __call__(self, value): 
 280          if self.multiple: 
 281              values = re.compile("[\w\-:]+").findall(str(value)) 
 282          else: 
 283              values = [value] 
 284          failures = [x for x in values if not x in self.theset] 
 285          if failures: 
 286              if self.multiple and value == None: 
 287                  return (value, None) 
 288              return (value, self.error_message) 
 289          if self.multiple: 
 290              return ('|%s|' % '|'.join(values), None) 
 291          return (value, None) 
 292   
 293   
 294  regex1 = re.compile('[\w_]+\.[\w_]+') 
 295  regex2 = re.compile('%\((?P<name>[^\)]+)\)s') 
 296   
 297   
 298 -class IS_IN_DB(Validator): 
 299      """ 
 300      example:: 
 301   
 302          INPUT(_type='text', _name='name', 
 303                requires=IS_IN_DB(db, db.table, zero='')) 
 304   
 305      used for reference fields, rendered as a dropbox 
 306      """ 
 307   
 308 -    def __init__( 
 309          self, 
 310          dbset, 
 311          field, 
 312          label=None, 
 313          error_message='value not in database', 
 314          orderby=None, 
 315          cache=None, 
 316          multiple=False, 
 317          zero='', 
 318          sort=False, 
 319          _and=None, 
 320          ): 
 321          if hasattr(dbset, 'define_table'): 
 322              self.dbset = dbset() 
 323          else: 
 324              self.dbset = dbset 
 325          self.field = field 
 326          (ktable, kfield) = str(self.field).split('.') 
 327          if not label: 
 328              label = '%%(%s)s' % kfield 
 329          if isinstance(label,str): 
 330              if regex1.match(str(label)): 
 331                  label = '%%(%s)s' % str(label).split('.')[-1] 
 332              ks = regex2.findall(label) 
 333              if not kfield in ks: 
 334                  ks += [kfield] 
 335              fields = ['%s.%s' % (ktable, k) for k in ks] 
 336          else: 
 337              ks = [kfield] 
 338              fields =[str(f) for f in self.dbset._db[ktable]] 
 339          self.fields = fields 
 340          self.label = label 
 341          self.ktable = ktable 
 342          self.kfield = kfield 
 343          self.ks = ks 
 344          self.error_message = error_message 
 345          self.theset = None 
 346          self.orderby = orderby 
 347          self.cache = cache 
 348          self.multiple = multiple 
 349          self.zero = zero 
 350          self.sort = sort 
 351          self._and = _and 
 352   
 353 -    def set_self_id(self, id): 
 354          if self._and: 
 355              self._and.record_id = id 
 356   
 357 -    def build_set(self): 
 358          if self.dbset._db._dbname != 'gql': 
 359              orderby = self.orderby or ', '.join(self.fields) 
 360              dd = dict(orderby=orderby, cache=self.cache) 
 361              records = self.dbset.select(*self.fields, **dd) 
 362          else: 
 363              import contrib.gql 
 364              orderby = self.orderby\ 
 365                   or contrib.gql.SQLXorable('|'.join([k for k in self.ks 
 366                      if k != 'id'])) 
 367              dd = dict(orderby=orderby, cache=self.cache) 
 368              records = \ 
 369                  self.dbset.select(self.dbset._db[self.ktable].ALL, **dd) 
 370          self.theset = [str(r[self.kfield]) for r in records] 
 371          if isinstance(self.label,str): 
 372              self.labels = [self.label % dict(r) for r in records] 
 373          else: 
 374              self.labels = [self.label(r) for r in records] 
 375   
 376 -    def options(self): 
 377          self.build_set() 
 378          items = [(k, self.labels[i]) for (i, k) in enumerate(self.theset)] 
 379          if self.sort: 
 380              items.sort(options_sorter) 
 381          if self.zero != None and not self.multiple: 
 382              items.insert(0,('',self.zero)) 
 383          return items 
 384   
 385 -    def __call__(self, value): 
 386          if self.multiple: 
 387              values = re.compile("[\w\-:]+").findall(str(value)) 
 388              if not [x for x in values if not x in self.theset]: 
 389                  return ('|%s|' % '|'.join(values), None) 
 390          elif self.theset: 
 391              if value in self.theset: 
 392                  if self._and: 
 393                      return self._and(value) 
 394                  else: 
 395                      return (value, None) 
 396          else: 
 397              (ktable, kfield) = str(self.field).split('.') 
 398              field = self.dbset._db[ktable][kfield] 
 399              if self.dbset(field == value).count(): 
 400                  if self._and: 
 401                      return self._and(value) 
 402                  else: 
 403                      return (value, None) 
 404          return (value, self.error_message) 
 405   
 406   
 407 -class IS_NOT_IN_DB(Validator): 
 408      """ 
 409      example:: 
 410   
 411          INPUT(_type='text', _name='name', requires=IS_NOT_IN_DB(db, db.table)) 
 412   
 413      makes the field unique 
 414      """ 
 415   
 416 -    def __init__( 
 417          self, 
 418          dbset, 
 419          field, 
 420          error_message='value already in database', 
 421          allowed_override=[], 
 422          ): 
 423          if hasattr(dbset, 'define_table'): 
 424              self.dbset = dbset() 
 425          else: 
 426              self.dbset = dbset 
 427          self.field = field 
 428          self.error_message = error_message 
 429          self.record_id = 0 
 430          self.allowed_override = allowed_override 
 431   
 432 -    def set_self_id(self, id): 
 433          self.record_id = id 
 434   
 435 -    def __call__(self, value): 
 436          if value in self.allowed_override: 
 437              return (value, None) 
 438          (tablename, fieldname) = str(self.field).split('.') 
 439          field = self.dbset._db[tablename][fieldname] 
 440          rows = self.dbset(field == value).select(limitby=(0, 1)) 
 441          if len(rows) > 0: 
 442              if isinstance(self.record_id, dict): 
 443                  for f in self.record_id: 
 444                      if str(getattr(rows[0], f)) != str(self.record_id[f]): 
 445                          return (value, self.error_message) 
 446              elif str(rows[0].id) != str(self.record_id): 
 447                  return (value, self.error_message) 
 448          return (value, None) 
 449   
 450   
 451 -class IS_INT_IN_RANGE(Validator): 
 452      """ 
 453      example:: 
 454   
 455          INPUT(_type='text', _name='name', requires=IS_INT_IN_RANGE(0, 10)) 
 456   
 457          >>> IS_INT_IN_RANGE(1,5)('4') 
 458          (4, None) 
 459          >>> IS_INT_IN_RANGE(1,5)(4) 
 460          (4, None) 
 461          >>> IS_INT_IN_RANGE(1,5)(1) 
 462          (1, None) 
 463          >>> IS_INT_IN_RANGE(1,5)(5) 
 464          (5, 'enter an integer between 1 and 4') 
 465          >>> IS_INT_IN_RANGE(1,5)(5) 
 466          (5, 'enter an integer between 1 and 4') 
 467          >>> IS_INT_IN_RANGE(1,5)(3.5) 
 468          (3, 'enter an integer between 1 and 4') 
 469      """ 
 470   
 471 -    def __init__( 
 472          self, 
 473          minimum, 
 474          maximum, 
 475          error_message = 'enter an integer between %(min)s and %(max)s', 
 476          ): 
 477          self.minimum = int(minimum) 
 478          self.maximum = int(maximum) 
 479          self.error_message = error_message % dict(min=self.minimum, max=self.maximum-1) 
 480   
 481 -    def __call__(self, value): 
 482          try: 
 483              fvalue = float(value) 
 484              value = int(value) 
 485              if value == fvalue and self.minimum <= value < self.maximum: 
 486                  return (value, None) 
 487          except ValueError: 
 488              pass 
 489          return (value, self.error_message) 
 490   
 491   
 492 -class IS_FLOAT_IN_RANGE(Validator): 
 493      """ 
 494      example:: 
 495   
 496          INPUT(_type='text', _name='name', requires=IS_FLOAT_IN_RANGE(0, 10)) 
 497   
 498          >>> IS_FLOAT_IN_RANGE(1,5)('4') 
 499          (4.0, None) 
 500          >>> IS_FLOAT_IN_RANGE(1,5)(4) 
 501          (4.0, None) 
 502          >>> IS_FLOAT_IN_RANGE(1,5)(1) 
 503          (1.0, None) 
 504          >>> IS_FLOAT_IN_RANGE(1,5)(5.1) 
 505          (5.0999999999999996, 'enter a number between 1.0 and 5.0') 
 506          >>> IS_FLOAT_IN_RANGE(1,5)(6.0) 
 507          (6.0, 'enter a number between 1.0 and 5.0') 
 508          >>> IS_FLOAT_IN_RANGE(1,5)(3.5) 
 509          (3.5, None) 
 510      """ 
 511   
 512 -    def __init__( 
 513          self, 
 514          minimum, 
 515          maximum, 
 516          error_message = 'enter a number between %(min)s and %(max)s', 
 517          ): 
 518          self.minimum = float(minimum) 
 519          self.maximum = float(maximum) 
 520          self.error_message = error_message % dict(min=self.minimum, max=self.maximum) 
 521   
 522 -    def __call__(self, value): 
 523          try: 
 524              value = float(value) 
 525              if self.minimum <= value <= self.maximum: 
 526                  return (value, None) 
 527          except (ValueError, TypeError): 
 528              pass 
 529          return (value, self.error_message) 
 530   
 531   
 532 -class IS_DECIMAL_IN_RANGE(Validator): 
 533      """ 
 534      example:: 
 535   
 536          INPUT(_type='text', _name='name', requires=IS_DECIMAL_IN_RANGE(0, 10)) 
 537   
 538          >>> IS_DECIMAL_IN_RANGE(1,5)('4') 
 539          ('4', None) 
 540          >>> IS_DECIMAL_IN_RANGE(1,5)(4) 
 541          (4, None) 
 542          >>> IS_DECIMAL_IN_RANGE(1,5)(1) 
 543          (1, None) 
 544          >>> IS_DECIMAL_IN_RANGE(1,5)(5.1) 
 545          (5.0999999999999996, 'enter a number between 1 and 5') 
 546          >>> IS_DECIMAL_IN_RANGE(5.1,6)(5.1) 
 547          (5.0999999999999996, None) 
 548          >>> IS_DECIMAL_IN_RANGE(5.1,6)('5.1') 
 549          ('5.1', None) 
 550          >>> IS_DECIMAL_IN_RANGE(1,5)(6.0) 
 551          (6.0, 'enter a number between 1 and 5') 
 552          >>> IS_DECIMAL_IN_RANGE(1,5)(3.5) 
 553          (3.5, None) 
 554          >>> IS_DECIMAL_IN_RANGE(1.5,5.5)(3.5) 
 555          (3.5, None) 
 556          >>> IS_DECIMAL_IN_RANGE(1.5,5.5)(6.5) 
 557          (6.5, 'enter a number between 1.5 and 5.5') 
 558      """ 
 559   
 560 -    def __init__( 
 561          self, 
 562          minimum, 
 563          maximum, 
 564          error_message = 'enter a number between %(min)s and %(max)s', 
 565          ): 
 566          self.minimum = decimal.Decimal(str(minimum)) 
 567          self.maximum = decimal.Decimal(str(maximum)) 
 568          self.error_message = error_message % dict(min=self.minimum, max=self.maximum) 
 569   
 570 -    def __call__(self, value): 
 571          try: 
 572              v = decimal.Decimal(str(value)) 
 573              if self.minimum <= v <= self.maximum: 
 574                  return (value, None) 
 575          except (ValueError, TypeError): 
 576              pass 
 577          return (value, self.error_message) 
 578   
 579   
 580 -def is_empty(value, empty_regex=None): 
 581      "test empty field" 
 582      if isinstance(value, (str, unicode)): 
 583          value = value.strip() 
 584          if empty_regex is not None and empty_regex.match(value): 
 585              value = '' 
 586      if value == None or value == '' or value == []: 
 587          return (value, True) 
 588      return (value, False) 
 589   
 590 -class IS_NOT_EMPTY(Validator): 
 591      """ 
 592      example:: 
 593   
 594          INPUT(_type='text', _name='name', requires=IS_NOT_EMPTY()) 
 595   
 596          >>> IS_NOT_EMPTY()(1) 
 597          (1, None) 
 598          >>> IS_NOT_EMPTY()(0) 
 599          (0, None) 
 600          >>> IS_NOT_EMPTY()('x') 
 601          ('x', None) 
 602          >>> IS_NOT_EMPTY()(' x ') 
 603          ('x', None) 
 604          >>> IS_NOT_EMPTY()(None) 
 605          (None, 'enter a value') 
 606          >>> IS_NOT_EMPTY()('') 
 607          ('', 'enter a value') 
 608          >>> IS_NOT_EMPTY()('  ') 
 609          ('', 'enter a value') 
 610          >>> IS_NOT_EMPTY()(' \\n\\t') 
 611          ('', 'enter a value') 
 612          >>> IS_NOT_EMPTY()([]) 
 613          ([], 'enter a value') 
 614          >>> IS_NOT_EMPTY(empty_regex='def')('def') 
 615          ('', 'enter a value') 
 616          >>> IS_NOT_EMPTY(empty_regex='de[fg]')('deg') 
 617          ('', 'enter a value') 
 618          >>> IS_NOT_EMPTY(empty_regex='def')('abc') 
 619          ('abc', None) 
 620      """ 
 621   
 622 -    def __init__(self, error_message='enter a value', empty_regex=None): 
 623          self.error_message = error_message 
 624          if empty_regex is not None: 
 625              self.empty_regex = re.compile(empty_regex) 
 626          else: 
 627              self.empty_regex = None 
 628   
 629 -    def __call__(self, value): 
 630          value, empty = is_empty(value, empty_regex=self.empty_regex) 
 631          if empty: 
 632              return (value, self.error_message) 
 633          return (value, None) 
 634   
 635   
 636 -class IS_ALPHANUMERIC(IS_MATCH): 
 637      """ 
 638      example:: 
 639   
 640          INPUT(_type='text', _name='name', requires=IS_ALPHANUMERIC()) 
 641   
 642          >>> IS_ALPHANUMERIC()('1') 
 643          ('1', None) 
 644          >>> IS_ALPHANUMERIC()('') 
 645          ('', None) 
 646          >>> IS_ALPHANUMERIC()('A_a') 
 647          ('A_a', None) 
 648          >>> IS_ALPHANUMERIC()('!') 
 649          ('!', 'enter only letters, numbers, and underscore') 
 650      """ 
 651   
 652 -    def __init__(self, error_message='enter only letters, numbers, and underscore'): 
 653          IS_MATCH.__init__(self, '^[\w]*$', error_message) 
 654   
 655   
 656 -class IS_EMAIL(Validator): 
 657      """ 
 658      Checks if field's value is a valid email address. Can be set to disallow 
 659      or force addresses from certain domain(s). 
 660   
 661      Email regex adapted from 
 662      http://haacked.com/archive/2007/08/21/i-knew-how-to-validate-an-email-address-until-i.aspx, 
 663      generally following the RFCs, except that we disallow quoted strings 
 664      and permit underscores and leading numerics in subdomain labels 
 665   
 666      Arguments: 
 667   
 668      - banned: regex text for disallowed address domains 
 669      - forced: regex text for required address domains 
 670   
 671      Both arguments can also be custom objects with a match(value) method. 
 672   
 673      Examples:: 
 674   
 675          #Check for valid email address: 
 676          INPUT(_type='text', _name='name', 
 677              requires=IS_EMAIL()) 
 678   
 679          #Check for valid email address that can't be from a .com domain: 
 680          INPUT(_type='text', _name='name', 
 681              requires=IS_EMAIL(banned='^.*\.com(|\..*)$')) 
 682   
 683          #Check for valid email address that must be from a .edu domain: 
 684          INPUT(_type='text', _name='name', 
 685              requires=IS_EMAIL(forced='^.*\.edu(|\..*)$')) 
 686   
 687          >>> IS_EMAIL()('a@b.com') 
 688          ('a@b.com', None) 
 689          >>> IS_EMAIL()('abc@def.com') 
 690          ('abc@def.com', None) 
 691          >>> IS_EMAIL()('abc@3def.com') 
 692          ('abc@3def.com', None) 
 693          >>> IS_EMAIL()('abc@def.us') 
 694          ('abc@def.us', None) 
 695          >>> IS_EMAIL()('abc@d_-f.us') 
 696          ('abc@d_-f.us', None) 
 697          >>> IS_EMAIL()('@def.com')           # missing name 
 698          ('@def.com', 'enter a valid email address') 
 699          >>> IS_EMAIL()('"abc@def".com')      # quoted name 
 700          ('"abc@def".com', 'enter a valid email address') 
 701          >>> IS_EMAIL()('abc+def.com')        # no @ 
 702          ('abc+def.com', 'enter a valid email address') 
 703          >>> IS_EMAIL()('abc@def.x')          # one-char TLD 
 704          ('abc@def.x', 'enter a valid email address') 
 705          >>> IS_EMAIL()('abc@def.12')         # numeric TLD 
 706          ('abc@def.12', 'enter a valid email address') 
 707          >>> IS_EMAIL()('abc@def..com')       # double-dot in domain 
 708          ('abc@def..com', 'enter a valid email address') 
 709          >>> IS_EMAIL()('abc@.def.com')       # dot starts domain 
 710          ('abc@.def.com', 'enter a valid email address') 
 711          >>> IS_EMAIL()('abc@def.c_m')        # underscore in TLD 
 712          ('abc@def.c_m', 'enter a valid email address') 
 713          >>> IS_EMAIL()('NotAnEmail')         # missing @ 
 714          ('NotAnEmail', 'enter a valid email address') 
 715          >>> IS_EMAIL()('abc@NotAnEmail')     # missing TLD 
 716          ('abc@NotAnEmail', 'enter a valid email address') 
 717          >>> IS_EMAIL()('customer/department@example.com') 
 718          ('customer/department@example.com', None) 
 719          >>> IS_EMAIL()('$A12345@example.com') 
 720          ('$A12345@example.com', None) 
 721          >>> IS_EMAIL()('!def!xyz%abc@example.com') 
 722          ('!def!xyz%abc@example.com', None) 
 723          >>> IS_EMAIL()('_Yosemite.Sam@example.com') 
 724          ('_Yosemite.Sam@example.com', None) 
 725          >>> IS_EMAIL()('~@example.com') 
 726          ('~@example.com', None) 
 727          >>> IS_EMAIL()('.wooly@example.com')       # dot starts name 
 728          ('.wooly@example.com', 'enter a valid email address') 
 729          >>> IS_EMAIL()('wo..oly@example.com')      # adjacent dots in name 
 730          ('wo..oly@example.com', 'enter a valid email address') 
 731          >>> IS_EMAIL()('pootietang.@example.com')  # dot ends name 
 732          ('pootietang.@example.com', 'enter a valid email address') 
 733          >>> IS_EMAIL()('.@example.com')            # name is bare dot 
 734          ('.@example.com', 'enter a valid email address') 
 735          >>> IS_EMAIL()('Ima.Fool@example.com') 
 736          ('Ima.Fool@example.com', None) 
 737          >>> IS_EMAIL()('Ima Fool@example.com')     # space in name 
 738          ('Ima Fool@example.com', 'enter a valid email address') 
 739      """ 
 740   
 741      regex = re.compile(''' 
 742          ^(?!\.)                            # name may not begin with a dot 
 743          ( 
 744            [-a-z0-9!\#$%&'*+/=?^_`{|}~]     # all legal characters except dot 
 745            | 
 746            (?<!\.)\.                        # single dots only 
 747          )+ 
 748          (?<!\.)                            # name may not end with a dot 
 749          @ 
 750          ( 
 751            [a-z0-9]                         # [sub]domain begins with alphanumeric 
 752            ( 
 753              [-\w]*                         # alphanumeric, underscore, dot, hyphen 
 754              [a-z0-9]                       # ending alphanumeric 
 755            )? 
 756            \.                               # ending dot 
 757          )+ 
 758          [a-z]{2,}$                         # TLD alpha-only 
 759      ''', re.VERBOSE|re.IGNORECASE) 
 760   
 761 -    def __init__(self, 
 762                   banned=None, 
 763                   forced=None, 
 764                   error_message='enter a valid email address'): 
 765          if isinstance(banned, str): 
 766              banned = re.compile(banned) 
 767          if isinstance(forced, str): 
 768              forced = re.compile(forced) 
 769          self.banned = banned 
 770          self.forced = forced 
 771          self.error_message = error_message 
 772   
 773 -    def __call__(self, value): 
 774          match = self.regex.match(value) 
 775          if match: 
 776              domain = value.split('@')[1] 
 777              if (not self.banned or not self.banned.match(domain)) \ 
 778                      and (not self.forced or self.forced.match(domain)): 
 779                  return (value, None) 
 780          return (value, self.error_message) 
 781   
 782   
 783  # URL scheme source: 
 784  # <http://en.wikipedia.org/wiki/URI_scheme> obtained on 2008-Nov-10 
 785   
 786  official_url_schemes = [ 
 787      'aaa', 
 788      'aaas', 
 789      'acap', 
 790      'cap', 
 791      'cid', 
 792      'crid', 
 793      'data', 
 794      'dav', 
 795      'dict', 
 796      'dns', 
 797      'fax', 
 798      'file', 
 799      'ftp', 
 800      'go', 
 801      'gopher', 
 802      'h323', 
 803      'http', 
 804      'https', 
 805      'icap', 
 806      'im', 
 807      'imap', 
 808      'info', 
 809      'ipp', 
 810      'iris', 
 811      'iris.beep', 
 812      'iris.xpc', 
 813      'iris.xpcs', 
 814      'iris.lws', 
 815      'ldap', 
 816      'mailto', 
 817      'mid', 
 818      'modem', 
 819      'msrp', 
 820      'msrps', 
 821      'mtqp', 
 822      'mupdate', 
 823      'news', 
 824      'nfs', 
 825      'nntp', 
 826      'opaquelocktoken', 
 827      'pop', 
 828      'pres', 
 829      'prospero', 
 830      'rtsp', 
 831      'service', 
 832      'shttp', 
 833      'sip', 
 834      'sips', 
 835      'snmp', 
 836      'soap.beep', 
 837      'soap.beeps', 
 838      'tag', 
 839      'tel', 
 840      'telnet', 
 841      'tftp', 
 842      'thismessage', 
 843      'tip', 
 844      'tv', 
 845      'urn', 
 846      'vemmi', 
 847      'wais', 
 848      'xmlrpc.beep', 
 849      'xmlrpc.beep', 
 850      'xmpp', 
 851      'z39.50r', 
 852      'z39.50s', 
 853      ] 
 854  unofficial_url_schemes = [ 
 855      'about', 
 856      'adiumxtra', 
 857      'aim', 
 858      'afp', 
 859      'aw', 
 860      'callto', 
 861      'chrome', 
 862      'cvs', 
 863      'ed2k', 
 864      'feed', 
 865      'fish', 
 866      'gg', 
 867      'gizmoproject', 
 868      'iax2', 
 869      'irc', 
 870      'ircs', 
 871      'itms', 
 872      'jar', 
 873      'javascript', 
 874      'keyparc', 
 875      'lastfm', 
 876      'ldaps', 
 877      'magnet', 
 878      'mms', 
 879      'msnim', 
 880      'mvn', 
 881      'notes', 
 882      'nsfw', 
 883      'psyc', 
 884      'paparazzi:http', 
 885      'rmi', 
 886      'rsync', 
 887      'secondlife', 
 888      'sgn', 
 889      'skype', 
 890      'ssh', 
 891      'sftp', 
 892      'smb', 
 893      'sms', 
 894      'soldat', 
 895      'steam', 
 896      'svn', 
 897      'teamspeak', 
 898      'unreal', 
 899      'ut2004', 
 900      'ventrilo', 
 901      'view-source', 
 902      'webcal', 
 903      'wyciwyg', 
 904      'xfire', 
 905      'xri', 
 906      'ymsgr', 
 907      ] 
 908  all_url_schemes = [None] + official_url_schemes + unofficial_url_schemes 
 909  http_schemes = [None, 'http', 'https'] 
 910   
 911   
 912  # This regex comes from RFC 2396, Appendix B. It's used to split a URL into 
 913  # its component parts 
 914  # Here are the regex groups that it extracts: 
 915  #    scheme = group(2) 
 916  #    authority = group(4) 
 917  #    path = group(5) 
 918  #    query = group(7) 
 919  #    fragment = group(9) 
 920   
 921  url_split_regex = \ 
 922      re.compile('^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?') 
 923   
 924  # Defined in RFC 3490, Section 3.1, Requirement #1 
 925  # Use this regex to split the authority component of a unicode URL into 
 926  # its component labels 
 927  label_split_regex = re.compile(u'[\u002e\u3002\uff0e\uff61]') 
 928   
 929   
 930 -def escape_unicode(string): 
 931      ''' 
 932      Converts a unicode string into US-ASCII, using a simple conversion scheme. 
 933      Each unicode character that does not have a US-ASCII equivalent is 
 934      converted into a URL escaped form based on its hexadecimal value. 
 935      For example, the unicode character '\u4e86' will become the string '%4e%86' 
 936   
 937      :param string: unicode string, the unicode string to convert into an 
 938          escaped US-ASCII form 
 939      :returns: the US-ASCII escaped form of the inputted string 
 940      :rtype: string 
 941   
 942      @author: Jonathan Benn 
 943      ''' 
 944      returnValue = StringIO() 
 945   
 946      for character in string: 
 947          code = ord(character) 
 948          if code > 0x7F: 
 949              hexCode = hex(code) 
 950              returnValue.write('%' + hexCode[2:4] + '%' + hexCode[4:6]) 
 951          else: 
 952              returnValue.write(character) 
 953   
 954      return returnValue.getvalue() 
 955   
 956   
 957 -def unicode_to_ascii_authority(authority): 
 958      ''' 
 959      Follows the steps in RFC 3490, Section 4 to convert a unicode authority 
 960      string into its ASCII equivalent. 
 961      For example, u'www.Alliancefran\xe7aise.nu' will be converted into 
 962      'www.xn--alliancefranaise-npb.nu' 
 963   
 964      :param authority: unicode string, the URL authority component to convert, 
 965                        e.g. u'www.Alliancefran\xe7aise.nu' 
 966      :returns: the US-ASCII character equivalent to the inputed authority, 
 967               e.g. 'www.xn--alliancefranaise-npb.nu' 
 968      :rtype: string 
 969      :raises Exception: if the function is not able to convert the inputed 
 970          authority 
 971   
 972      @author: Jonathan Benn 
 973      ''' 
 974      #RFC 3490, Section 4, Step 1 
 975      #The encodings.idna Python module assumes that AllowUnassigned == True 
 976   
 977      #RFC 3490, Section 4, Step 2 
 978      labels = label_split_regex.split(authority) 
 979   
 980      #RFC 3490, Section 4, Step 3 
 981      #The encodings.idna Python module assumes that UseSTD3ASCIIRules == False 
 982   
 983      #RFC 3490, Section 4, Step 4 
 984      #We use the ToASCII operation because we are about to put the authority 
 985      #into an IDN-unaware slot 
 986      asciiLabels = [] 
 987      try: 
 988          import encodings.idna 
 989          for label in labels: 
 990              if label: 
 991                  asciiLabels.append(encodings.idna.ToASCII(label)) 
 992              else: 
 993                   #encodings.idna.ToASCII does not accept an empty string, but 
 994                   #it is necessary for us to allow for empty labels so that we 
 995                   #don't modify the URL 
 996                  asciiLabels.append('') 
 997      except: 
 998          asciiLabels=[str(label) for label in labels] 
 999      #RFC 3490, Section 4, Step 5 
1000      return str(reduce(lambda x, y: x + unichr(0x002E) + y, asciiLabels)) 
1001   
1002   
1003 -def unicode_to_ascii_url(url, prepend_scheme): 
1004      ''' 
1005      Converts the inputed unicode url into a US-ASCII equivalent. This function 
1006      goes a little beyond RFC 3490, which is limited in scope to the domain name 
1007      (authority) only. Here, the functionality is expanded to what was observed 
1008      on Wikipedia on 2009-Jan-22: 
1009   
1010         Component    Can Use Unicode? 
1011         ---------    ---------------- 
1012         scheme       No 
1013         authority    Yes 
1014         path         Yes 
1015         query        Yes 
1016         fragment     No 
1017   
1018      The authority component gets converted to punycode, but occurrences of 
1019      unicode in other components get converted into a pair of URI escapes (we 
1020      assume 4-byte unicode). E.g. the unicode character U+4E2D will be 
1021      converted into '%4E%2D'. Testing with Firefox v3.0.5 has shown that it can 
1022      understand this kind of URI encoding. 
1023   
1024      :param url: unicode string, the URL to convert from unicode into US-ASCII 
1025      :param prepend_scheme: string, a protocol scheme to prepend to the URL if 
1026          we're having trouble parsing it. 
1027          e.g. "http". Input None to disable this functionality 
1028      :returns: a US-ASCII equivalent of the inputed url 
1029      :rtype: string 
1030   
1031      @author: Jonathan Benn 
1032      ''' 
1033      #convert the authority component of the URL into an ASCII punycode string, 
1034      #but encode the rest using the regular URI character encoding 
1035   
1036      groups = url_split_regex.match(url).groups() 
1037      #If no authority was found 
1038      if not groups[3]: 
1039          #Try appending a scheme to see if that fixes the problem 
1040          scheme_to_prepend = prepend_scheme or 'http' 
1041          groups = url_split_regex.match( 
1042              unicode(scheme_to_prepend) + u'://' + url).groups() 
1043      #if we still can't find the authority 
1044      if not groups[3]: 
1045          raise Exception('No authority component found, '+ \ 
1046              'could not decode unicode to US-ASCII') 
1047   
1048      #We're here if we found an authority, let's rebuild the URL 
1049      scheme = groups[1] 
1050      authority = groups[3] 
1051      path = groups[4] or '' 
1052      query = groups[5] or '' 
1053      fragment = groups[7] or '' 
1054   
1055      if prepend_scheme: 
1056          scheme = str(scheme) + '://' 
1057      else: 
1058          scheme = '' 
1059      return scheme + unicode_to_ascii_authority(authority) +\ 
1060          escape_unicode(path) + escape_unicode(query) + str(fragment) 
1061   
1062   
1063 -class IS_GENERIC_URL(Validator): 
1064      """ 
1065      Rejects a URL string if any of the following is true: 
1066         * The string is empty or None 
1067         * The string uses characters that are not allowed in a URL 
1068         * The URL scheme specified (if one is specified) is not valid 
1069   
1070      Based on RFC 2396: http://www.faqs.org/rfcs/rfc2396.html 
1071   
1072      This function only checks the URL's syntax. It does not check that the URL 
1073      points to a real document, for example, or that it otherwise makes sense 
1074      semantically. This function does automatically prepend 'http://' in front 
1075      of a URL if and only if that's necessary to successfully parse the URL. 
1076      Please note that a scheme will be prepended only for rare cases 
1077      (e.g. 'google.ca:80') 
1078   
1079      The list of allowed schemes is customizable with the allowed_schemes 
1080      parameter. If you exclude None from the list, then abbreviated URLs 
1081      (lacking a scheme such as 'http') will be rejected. 
1082   
1083      The default prepended scheme is customizable with the prepend_scheme 
1084      parameter. If you set prepend_scheme to None then prepending will be 
1085      disabled. URLs that require prepending to parse will still be accepted, 
1086      but the return value will not be modified. 
1087   
1088      @author: Jonathan Benn 
1089      """ 
1090   
1091 -    def __init__( 
1092          self, 
1093          error_message='enter a valid URL', 
1094          allowed_schemes=None, 
1095          prepend_scheme=None, 
1096          ): 
1097          """ 
1098          :param error_message: a string, the error message to give the end user 
1099              if the URL does not validate 
1100          :param allowed_schemes: a list containing strings or None. Each element 
1101              is a scheme the inputed URL is allowed to use 
1102          :param prepend_scheme: a string, this scheme is prepended if it's 
1103              necessary to make the URL valid 
1104          """ 
1105   
1106          self.error_message = error_message 
1107          if allowed_schemes == None: 
1108              self.allowed_schemes = all_url_schemes 
1109          else: 
1110              self.allowed_schemes = allowed_schemes 
1111          self.prepend_scheme = prepend_scheme 
1112          if self.prepend_scheme not in self.allowed_schemes: 
1113              raise SyntaxError, \ 
1114                  "prepend_scheme='%s' is not in allowed_schemes=%s" \ 
1115                  % (self.prepend_scheme, self.allowed_schemes) 
1116   
1117 -    def __call__(self, value): 
1118          """ 
1119          :param value: a string, the URL to validate 
1120          :returns: a tuple, where tuple[0] is the inputed value (possible 
1121              prepended with prepend_scheme), and tuple[1] is either 
1122              None (success!) or the string error_message 
1123          """ 
1124          try: 
1125              # if the URL does not misuse the '%' character 
1126              if not re.compile( 
1127                  r"%[^0-9A-Fa-f]{2}|%[^0-9A-Fa-f][0-9A-Fa-f]|%[0-9A-Fa-f][^0-9A-Fa-f]|%$|%[0-9A-Fa-f]$|%[^0-9A-Fa-f]$" 
1128                                ).search(value): 
1129                  # if the URL is only composed of valid characters 
1130                  if re.compile( 
1131                      r"[A-Za-z0-9;/?:@&=+$,\-_\.!~*'\(\)%#]+$").match(value): 
1132                      # Then split up the URL into its components and check on 
1133                      # the scheme 
1134                      scheme = url_split_regex.match(value).group(2) 
1135                      # Clean up the scheme before we check it 
1136                      if scheme != None: 
1137                          scheme = urllib.unquote(scheme).lower() 
1138                      # If the scheme really exists 
1139                      if scheme in self.allowed_schemes: 
1140                          # Then the URL is valid 
1141                          return (value, None) 
1142                      else: 
1143                          # else, for the possible case of abbreviated URLs with 
1144                          # ports, check to see if adding a valid scheme fixes 
1145                          # the problem (but only do this if it doesn't have 
1146                          # one already!) 
1147                          if not re.compile('://').search(value) and None\ 
1148                               in self.allowed_schemes: 
1149                              schemeToUse = self.prepend_scheme or 'http' 
1150                              prependTest = self.__call__(schemeToUse 
1151                                       + '://' + value) 
1152                              # if the prepend test succeeded 
1153                              if prependTest[1] == None: 
1154                                  # if prepending in the output is enabled 
1155                                  if self.prepend_scheme: 
1156                                      return prependTest 
1157                                  else: 
1158                                      # else return the original, 
1159                                      #  non-prepended value 
1160                                      return (value, None) 
1161          except: 
1162              pass 
1163          # else the URL is not valid 
1164          return (value, self.error_message) 
1165   
1166  # Sources (obtained 2008-Nov-11): 
1167  #    http://en.wikipedia.org/wiki/Top-level_domain 
1168  #    http://www.iana.org/domains/root/db/ 
1169   
1170  official_top_level_domains = [ 
1171      'ac', 
1172      'ad', 
1173      'ae', 
1174      'aero', 
1175      'af', 
1176      'ag', 
1177      'ai', 
1178      'al', 
1179      'am', 
1180      'an', 
1181      'ao', 
1182      'aq', 
1183      'ar', 
1184      'arpa', 
1185      'as', 
1186      'asia', 
1187      'at', 
1188      'au', 
1189      'aw', 
1190      'ax', 
1191      'az', 
1192      'ba', 
1193      'bb', 
1194      'bd', 
1195      'be', 
1196      'bf', 
1197      'bg', 
1198      'bh', 
1199      'bi', 
1200      'biz', 
1201      'bj', 
1202      'bl', 
1203      'bm', 
1204      'bn', 
1205      'bo', 
1206      'br', 
1207      'bs', 
1208      'bt', 
1209      'bv', 
1210      'bw', 
1211      'by', 
1212      'bz', 
1213      'ca', 
1214      'cat', 
1215      'cc', 
1216      'cd', 
1217      'cf', 
1218      'cg', 
1219      'ch', 
1220      'ci', 
1221      'ck', 
1222      'cl', 
1223      'cm', 
1224      'cn', 
1225      'co', 
1226      'com', 
1227      'coop', 
1228      'cr', 
1229      'cu', 
1230      'cv', 
1231      'cx', 
1232      'cy', 
1233      'cz', 
1234      'de', 
1235      'dj', 
1236      'dk', 
1237      'dm', 
1238      'do', 
1239      'dz', 
1240      'ec', 
1241      'edu', 
1242      'ee', 
1243      'eg', 
1244      'eh', 
1245      'er', 
1246      'es', 
1247      'et', 
1248      'eu', 
1249      'example', 
1250      'fi', 
1251      'fj', 
1252      'fk', 
1253      'fm', 
1254      'fo', 
1255      'fr', 
1256      'ga', 
1257      'gb', 
1258      'gd', 
1259      'ge', 
1260      'gf', 
1261      'gg', 
1262      'gh', 
1263      'gi', 
1264      'gl', 
1265      'gm', 
1266      'gn', 
1267      'gov', 
1268      'gp', 
1269      'gq', 
1270      'gr', 
1271      'gs', 
1272      'gt', 
1273      'gu', 
1274      'gw', 
1275      'gy', 
1276      'hk', 
1277      'hm', 
1278      'hn', 
1279      'hr', 
1280      'ht', 
1281      'hu', 
1282      'id', 
1283      'ie', 
1284      'il', 
1285      'im', 
1286      'in', 
1287      'info', 
1288      'int', 
1289      'invalid', 
1290      'io', 
1291      'iq', 
1292      'ir', 
1293      'is', 
1294      'it', 
1295      'je', 
1296      'jm', 
1297      'jo', 
1298      'jobs', 
1299      'jp', 
1300      'ke', 
1301      'kg', 
1302      'kh', 
1303      'ki', 
1304      'km', 
1305      'kn', 
1306      'kp', 
1307      'kr', 
1308      'kw', 
1309      'ky', 
1310      'kz', 
1311      'la', 
1312      'lb', 
1313      'lc', 
1314      'li', 
1315      'lk', 
1316      'localhost', 
1317      'lr', 
1318      'ls', 
1319      'lt', 
1320      'lu', 
1321      'lv', 
1322      'ly', 
1323      'ma', 
1324      'mc', 
1325      'md', 
1326      'me', 
1327      'mf', 
1328      'mg', 
1329      'mh', 
1330      'mil', 
1331      'mk', 
1332      'ml', 
1333      'mm', 
1334      'mn', 
1335      'mo', 
1336      'mobi', 
1337      'mp', 
1338      'mq', 
1339      'mr', 
1340      'ms', 
1341      'mt', 
1342      'mu', 
1343      'museum', 
1344      'mv', 
1345      'mw', 
1346      'mx', 
1347      'my', 
1348      'mz', 
1349      'na', 
1350      'name', 
1351      'nc', 
1352      'ne', 
1353      'net', 
1354      'nf', 
1355      'ng', 
1356      'ni', 
1357      'nl', 
1358      'no', 
1359      'np', 
1360      'nr', 
1361      'nu', 
1362      'nz', 
1363      'om', 
1364      'org', 
1365      'pa', 
1366      'pe', 
1367      'pf', 
1368      'pg', 
1369      'ph', 
1370      'pk', 
1371      'pl', 
1372      'pm', 
1373      'pn', 
1374      'pr', 
1375      'pro', 
1376      'ps', 
1377      'pt', 
1378      'pw', 
1379      'py', 
1380      'qa', 
1381      're', 
1382      'ro', 
1383      'rs', 
1384      'ru', 
1385      'rw', 
1386      'sa', 
1387      'sb', 
1388      'sc', 
1389      'sd', 
1390      'se', 
1391      'sg', 
1392      'sh', 
1393      'si', 
1394      'sj', 
1395      'sk', 
1396      'sl', 
1397      'sm', 
1398      'sn', 
1399      'so', 
1400      'sr', 
1401      'st', 
1402      'su', 
1403      'sv', 
1404      'sy', 
1405      'sz', 
1406      'tc', 
1407      'td', 
1408      'tel', 
1409      'test', 
1410      'tf', 
1411      'tg', 
1412      'th', 
1413      'tj', 
1414      'tk', 
1415      'tl', 
1416      'tm', 
1417      'tn', 
1418      'to', 
1419      'tp', 
1420      'tr', 
1421      'travel', 
1422      'tt', 
1423      'tv', 
1424      'tw', 
1425      'tz', 
1426      'ua', 
1427      'ug', 
1428      'uk', 
1429      'um', 
1430      'us', 
1431      'uy', 
1432      'uz', 
1433      'va', 
1434      'vc', 
1435      've', 
1436      'vg', 
1437      'vi', 
1438      'vn', 
1439      'vu', 
1440      'wf', 
1441      'ws', 
1442      'xn--0zwm56d', 
1443      'xn--11b5bs3a9aj6g', 
1444      'xn--80akhbyknj4f', 
1445      'xn--9t4b11yi5a', 
1446      'xn--deba0ad', 
1447      'xn--g6w251d', 
1448      'xn--hgbk6aj7f53bba', 
1449      'xn--hlcj6aya9esc7a', 
1450      'xn--jxalpdlp', 
1451      'xn--kgbechtv', 
1452      'xn--zckzah', 
1453      'ye', 
1454      'yt', 
1455      'yu', 
1456      'za', 
1457      'zm', 
1458      'zw', 
1459      ] 
1460   
1461   
1462 -class IS_HTTP_URL(Validator): 
1463      """ 
1464      Rejects a URL string if any of the following is true: 
1465         * The string is empty or None 
1466         * The string uses characters that are not allowed in a URL 
1467         * The string breaks any of the HTTP syntactic rules 
1468         * The URL scheme specified (if one is specified) is not 'http' or 'https' 
1469         * The top-level domain (if a host name is specified) does not exist 
1470   
1471      Based on RFC 2616: http://www.faqs.org/rfcs/rfc2616.html 
1472   
1473      This function only checks the URL's syntax. It does not check that the URL 
1474      points to a real document, for example, or that it otherwise makes sense 
1475      semantically. This function does automatically prepend 'http://' in front 
1476      of a URL in the case of an abbreviated URL (e.g. 'google.ca'). 
1477   
1478      The list of allowed schemes is customizable with the allowed_schemes 
1479      parameter. If you exclude None from the list, then abbreviated URLs 
1480      (lacking a scheme such as 'http') will be rejected. 
1481   
1482      The default prepended scheme is customizable with the prepend_scheme 
1483      parameter. If you set prepend_scheme to None then prepending will be 
1484      disabled. URLs that require prepending to parse will still be accepted, 
1485      but the return value will not be modified. 
1486   
1487      @author: Jonathan Benn 
1488      """ 
1489   
1490 -    def __init__( 
1491          self, 
1492          error_message='enter a valid URL', 
1493          allowed_schemes=None, 
1494          prepend_scheme='http', 
1495          ): 
1496          """ 
1497          :param error_message: a string, the error message to give the end user 
1498              if the URL does not validate 
1499          :param allowed_schemes: a list containing strings or None. Each element 
1500              is a scheme the inputed URL is allowed to use 
1501          :param prepend_scheme: a string, this scheme is prepended if it's 
1502              necessary to make the URL valid 
1503          """ 
1504   
1505          self.error_message = error_message 
1506          if allowed_schemes == None: 
1507              self.allowed_schemes = http_schemes 
1508          else: 
1509              self.allowed_schemes = allowed_schemes 
1510          self.prepend_scheme = prepend_scheme 
1511   
1512          for i in self.allowed_schemes: 
1513              if i not in http_schemes: 
1514                  raise SyntaxError, \ 
1515                      "allowed_scheme value '%s' is not in %s" % \ 
1516                      (i, http_schemes) 
1517   
1518          if self.prepend_scheme not in self.allowed_schemes: 
1519              raise SyntaxError, \ 
1520                  "prepend_scheme='%s' is not in allowed_schemes=%s" % \ 
1521                  (self.prepend_scheme, self.allowed_schemes) 
1522   
1523 -    def __call__(self, value): 
1524          """ 
1525          :param value: a string, the URL to validate 
1526          :returns: a tuple, where tuple[0] is the inputed value 
1527              (possible prepended with prepend_scheme), and tuple[1] is either 
1528              None (success!) or the string error_message 
1529          """ 
1530   
1531          try: 
1532              # if the URL passes generic validation 
1533              x = IS_GENERIC_URL(error_message=self.error_message, 
1534                                 allowed_schemes=self.allowed_schemes, 
1535                                 prepend_scheme=self.prepend_scheme) 
1536              if x(value)[1] == None: 
1537                  componentsMatch = url_split_regex.match(value) 
1538                  authority = componentsMatch.group(4) 
1539                  # if there is an authority component 
1540                  if authority: 
1541                      # if authority is a valid IP address 
1542                      if re.compile( 
1543                          '\d+\.\d+\.\d+\.\d+(:\d*)*$').match(authority): 
1544                          # Then this HTTP URL is valid 
1545                          return (value, None) 
1546                      else: 
1547                          # else if authority is a valid domain name 
1548                          domainMatch = \ 
1549                              re.compile( 
1550                                  '(([A-Za-z0-9]+[A-Za-z0-9\-]*[A-Za-z0-9]+\.)*([A-Za-z0-9]+\.)*)*([A-Za-z]+[A-Za-z0-9\-]*[A-Za-z0-9]+)\.?(:\d*)*$' 
1551                                  ).match(authority) 
1552                          if domainMatch: 
1553                              # if the top-level domain really exists 
1554                              if domainMatch.group(4).lower()\ 
1555                                   in official_top_level_domains: 
1556                                  # Then this HTTP URL is valid 
1557                                  return (value, None) 
1558                  else: 
1559                      # else this is a relative/abbreviated URL, which will parse 
1560                      # into the URL's path component 
1561                      path = componentsMatch.group(5) 
1562                      # relative case: if this is a valid path (if it starts with 
1563                      # a slash) 
1564                      if re.compile('/').match(path): 
1565                          # Then this HTTP URL is valid 
1566                          return (value, None) 
1567                      else: 
1568                          # abbreviated case: if we haven't already, prepend a 
1569                          # scheme and see if it fixes the problem 
1570                          if not re.compile('://').search(value): 
1571                              schemeToUse = self.prepend_scheme or 'http' 
1572                              prependTest = self.__call__(schemeToUse 
1573                                       + '://' + value) 
1574                              # if the prepend test succeeded 
1575                              if prependTest[1] == None: 
1576                                  # if prepending in the output is enabled 
1577                                  if self.prepend_scheme: 
1578                                      return prependTest 
1579                                  else: 
1580                                      # else return the original, non-prepended 
1581                                      # value 
1582                                      return (value, None) 
1583          except: 
1584              pass 
1585          # else the HTTP URL is not valid 
1586          return (value, self.error_message) 
1587   
1588   
1589 -class IS_URL(Validator): 
1590      """ 
1591      Rejects a URL string if any of the following is true: 
1592         * The string is empty or None 
1593         * The string uses characters that are not allowed in a URL 
1594         * The string breaks any of the HTTP syntactic rules 
1595         * The URL scheme specified (if one is specified) is not 'http' or 'https' 
1596         * The top-level domain (if a host name is specified) does not exist 
1597   
1598      (These rules are based on RFC 2616: http://www.faqs.org/rfcs/rfc2616.html) 
1599   
1600      This function only checks the URL's syntax. It does not check that the URL 
1601      points to a real document, for example, or that it otherwise makes sense 
1602      semantically. This function does automatically prepend 'http://' in front 
1603      of a URL in the case of an abbreviated URL (e.g. 'google.ca'). 
1604   
1605      If the parameter mode='generic' is used, then this function's behavior 
1606      changes. It then rejects a URL string if any of the following is true: 
1607         * The string is empty or None 
1608         * The string uses characters that are not allowed in a URL 
1609         * The URL scheme specified (if one is specified) is not valid 
1610   
1611      (These rules are based on RFC 2396: http://www.faqs.org/rfcs/rfc2396.html) 
1612   
1613      The list of allowed schemes is customizable with the allowed_schemes 
1614      parameter. If you exclude None from the list, then abbreviated URLs 
1615      (lacking a scheme such as 'http') will be rejected. 
1616   
1617      The default prepended scheme is customizable with the prepend_scheme 
1618      parameter. If you set prepend_scheme to None then prepending will be 
1619      disabled. URLs that require prepending to parse will still be accepted, 
1620      but the return value will not be modified. 
1621   
1622      IS_URL is compatible with the Internationalized Domain Name (IDN) standard 
1623      specified in RFC 3490 (http://tools.ietf.org/html/rfc3490). As a result, 
1624      URLs can be regular strings or unicode strings. 
1625      If the URL's domain component (e.g. google.ca) contains non-US-ASCII 
1626      letters, then the domain will be converted into Punycode (defined in 
1627      RFC 3492, http://tools.ietf.org/html/rfc3492). IS_URL goes a bit beyond 
1628      the standards, and allows non-US-ASCII characters to be present in the path 
1629      and query components of the URL as well. These non-US-ASCII characters will 
1630      be escaped using the standard '%20' type syntax. e.g. the unicode 
1631      character with hex code 0x4e86 will become '%4e%86' 
1632   
1633      Code Examples:: 
1634   
1635          INPUT(_type='text', _name='name', requires=IS_URL()) 
1636          INPUT(_type='text', _name='name', requires=IS_URL(mode='generic')) 
1637          INPUT(_type='text', _name='name', 
1638              requires=IS_URL(allowed_schemes=['https'])) 
1639          INPUT(_type='text', _name='name', 
1640              requires=IS_URL(prepend_scheme='https')) 
1641          INPUT(_type='text', _name='name', 
1642              requires=IS_URL(mode='generic', allowed_schemes=['ftps', 'https'], 
1643                  prepend_scheme='https')) 
1644   
1645      @author: Jonathan Benn 
1646      """ 
1647   
1648 -    def __init__( 
1649          self, 
1650          error_message='enter a valid URL', 
1651          mode='http', 
1652          allowed_schemes=None, 
1653          prepend_scheme='http', 
1654          ): 
1655          """ 
1656          :param error_message: a string, the error message to give the end user 
1657              if the URL does not validate 
1658          :param allowed_schemes: a list containing strings or None. Each element 
1659              is a scheme the inputed URL is allowed to use 
1660          :param prepend_scheme: a string, this scheme is prepended if it's 
1661              necessary to make the URL valid 
1662          """ 
1663   
1664          self.error_message = error_message 
1665          self.mode = mode.lower() 
1666          if not self.mode in ['generic', 'http']: 
1667              raise SyntaxError, "invalid mode '%s' in IS_URL" % self.mode 
1668          self.allowed_schemes = allowed_schemes 
1669   
1670          if self.allowed_schemes: 
1671              if prepend_scheme not in self.allowed_schemes: 
1672                  raise SyntaxError, \ 
1673                      "prepend_scheme='%s' is not in allowed_schemes=%s" \ 
1674                      % (prepend_scheme, self.allowed_schemes) 
1675   
1676          # if allowed_schemes is None, then we will defer testing 
1677          # prepend_scheme's validity to a sub-method 
1678   
1679          self.prepend_scheme = prepend_scheme 
1680   
1681 -    def __call__(self, value): 
1682          """ 
1683          :param value: a unicode or regular string, the URL to validate 
1684          :returns: a (string, string) tuple, where tuple[0] is the modified 
1685              input value and tuple[1] is either None (success!) or the 
1686              string error_message. The input value will never be modified in the 
1687              case of an error. However, if there is success then the input URL 
1688              may be modified to (1) prepend a scheme, and/or (2) convert a 
1689              non-compliant unicode URL into a compliant US-ASCII version. 
1690          """ 
1691   
1692          if self.mode == 'generic': 
1693              subMethod = IS_GENERIC_URL(error_message=self.error_message, 
1694                                         allowed_schemes=self.allowed_schemes, 
1695                                         prepend_scheme=self.prepend_scheme) 
1696          elif self.mode == 'http': 
1697              subMethod = IS_HTTP_URL(error_message=self.error_message, 
1698                                      allowed_schemes=self.allowed_schemes, 
1699                                      prepend_scheme=self.prepend_scheme) 
1700          else: 
1701              raise SyntaxError, "invalid mode '%s' in IS_URL" % self.mode 
1702   
1703          if type(value) != unicode: 
1704              return subMethod(value) 
1705          else: 
1706              try: 
1707                  asciiValue = unicode_to_ascii_url(value, self.prepend_scheme) 
1708              except Exception: 
1709                  #If we are not able to convert the unicode url into a 
1710                  # US-ASCII URL, then the URL is not valid 
1711                  return (value, self.error_message) 
1712   
1713              methodResult = subMethod(asciiValue) 
1714              #if the validation of the US-ASCII version of the value failed 
1715              if methodResult[1] != None: 
1716                  # then return the original input value, not the US-ASCII version 
1717                  return (value, methodResult[1]) 
1718              else: 
1719                  return methodResult 
1720   
1721   
1722  regex_time = re.compile( 
1723      '((?P<h>[0-9]+))([^0-9 ]+(?P<m>[0-9 ]+))?([^0-9ap ]+(?P<s>[0-9]*))?((?P<d>[ap]m))?') 
1724   
1725   
1726 -class IS_TIME(Validator): 
1727      """ 
1728      example:: 
1729   
1730          INPUT(_type='text', _name='name', requires=IS_TIME()) 
1731   
1732      understands the following formats 
1733      hh:mm:ss [am/pm] 
1734      hh:mm [am/pm] 
1735      hh [am/pm] 
1736   
1737      [am/pm] is optional, ':' can be replaced by any other non-space non-digit 
1738   
1739          >>> IS_TIME()('21:30') 
1740          (datetime.time(21, 30), None) 
1741          >>> IS_TIME()('21-30') 
1742          (datetime.time(21, 30), None) 
1743          >>> IS_TIME()('21.30') 
1744          (datetime.time(21, 30), None) 
1745          >>> IS_TIME()('21:30:59') 
1746          (datetime.time(21, 30, 59), None) 
1747          >>> IS_TIME()('5:30') 
1748          (datetime.time(5, 30), None) 
1749          >>> IS_TIME()('5:30 am') 
1750          (datetime.time(5, 30), None) 
1751          >>> IS_TIME()('5:30 pm') 
1752          (datetime.time(17, 30), None) 
1753          >>> IS_TIME()('5:30 whatever') 
1754          ('5:30 whatever', 'enter time as hh:mm:ss (seconds, am, pm optional)') 
1755          >>> IS_TIME()('5:30 20') 
1756          ('5:30 20', 'enter time as hh:mm:ss (seconds, am, pm optional)') 
1757          >>> IS_TIME()('24:30') 
1758          ('24:30', 'enter time as hh:mm:ss (seconds, am, pm optional)') 
1759          >>> IS_TIME()('21:60') 
1760          ('21:60', 'enter time as hh:mm:ss (seconds, am, pm optional)') 
1761          >>> IS_TIME()('21:30::') 
1762          ('21:30::', 'enter time as hh:mm:ss (seconds, am, pm optional)') 
1763          >>> IS_TIME()('') 
1764          ('', 'enter time as hh:mm:ss (seconds, am, pm optional)') 
1765      """ 
1766   
1767 -    def __init__(self, error_message='enter time as hh:mm:ss (seconds, am, pm optional)'): 
1768          self.error_message = error_message 
1769   
1770 -    def __call__(self, value): 
1771          try: 
1772              ivalue = value 
1773              value = regex_time.match(value.lower()) 
1774              (h, m, s) = (int(value.group('h')), 0, 0) 
1775              if value.group('m') != None: 
1776                  m = int(value.group('m')) 
1777              if value.group('s') != None: 
1778                  s = int(value.group('s')) 
1779              if value.group('d') == 'pm' and 0 < h < 12: 
1780                  h = h + 12 
1781              if not (h in range(24) and m in range(60) and s 
1782                       in range(60)): 
1783                  raise ValueError\ 
1784                      ('Hours or minutes or seconds are outside of allowed range') 
1785              value = datetime.time(h, m, s) 
1786              return (value, None) 
1787          except AttributeError: 
1788              pass 
1789          except ValueError: 
1790              pass 
1791          return (ivalue, self.error_message) 
1792   
1793   
1794 -class IS_DATE(Validator): 
1795      """ 
1796      example:: 
1797   
1798          INPUT(_type='text', _name='name', requires=IS_DATE()) 
1799   
1800      date has to be in the ISO8960 format YYYY-MM-DD 
1801      """ 
1802   
1803 -    def __init__(self, format='%Y-%m-%d', 
1804                   error_message='enter date as %(format)s'): 
1805          self.format = str(format) 
1806          self.error_message = str(error_message) 
1807   
1808 -    def __call__(self, value): 
1809          try: 
1810              (y, m, d, hh, mm, ss, t0, t1, t2) = \ 
1811                  time.strptime(value, str(self.format)) 
1812              value = datetime.date(y, m, d) 
1813              return (value, None) 
1814          except: 
1815              return (value, self.error_message % IS_DATETIME.nice(self.format)) 
1816   
1817 -    def formatter(self, value): 
1818          return value.strftime(self.format) 
1819   
1820   
1821 -class IS_DATETIME(Validator): 
1822      """ 
1823      example:: 
1824   
1825          INPUT(_type='text', _name='name', requires=IS_DATETIME()) 
1826   
1827      datetime has to be in the ISO8960 format YYYY-MM-DD hh:mm:ss 
1828      """ 
1829   
1830      isodatetime = '%Y-%m-%d %H:%M:%S' 
1831   
1832      @staticmethod 
1833 -    def nice(format): 
1834          code=(('%Y','1963'), 
1835                ('%y','63'), 
1836                ('%d','28'), 
1837                ('%m','08'), 
1838                ('%b','Aug'), 
1839                ('%b','August'), 
1840                ('%H','14'), 
1841                ('%I','02'), 
1842                ('%p','PM'), 
1843                ('%M','30'), 
1844                ('%S','59')) 
1845          for (a,b) in code: 
1846              format=format.replace(a,b) 
1847          return dict(format=format) 
1848   
1849 -    def __init__(self, format='%Y-%m-%d %H:%M:%S', 
1850                   error_message='enter date and time as %(format)s'): 
1851          self.format = str(format) 
1852          self.error_message = str(error_message) 
1853   
1854 -    def __call__(self, value): 
1855          try: 
1856              (y, m, d, hh, mm, ss, t0, t1, t2) = \ 
1857                  time.strptime(value, str(self.format)) 
1858              value = datetime.datetime(y, m, d, hh, mm, ss) 
1859              return (value, None) 
1860          except: 
1861              return (value, self.error_message % IS_DATETIME.nice(self.format)) 
1862   
1863 -    def formatter(self, value): 
1864          return value.strftime(self.format) 
1865   
1866 -class IS_DATE_IN_RANGE(IS_DATE): 
1867   
1868 -    def __init__(self, 
1869                   minimum = None, 
1870                   maximum = None, 
1871                   format='%Y-%m-%d', 
1872                   error_message = "enter date in range %(min)s %(max)s"): 
1873          self.minimum = minimum 
1874          self.maximum = maximum 
1875          d = dict(min=minimum, max=maximum) 
1876          IS_DATE.__init__(self, 
1877                           format = format, 
1878                           error_message = error_message % d) 
1879   
1880 -    def __call__(self, value): 
1881          (value, msg) = IS_DATE.__call__(self,value) 
1882          if msg is not None: 
1883              return (value, msg) 
1884          if self.minimum and self.minimum >= value: 
1885              return (value, self.error_message) 
1886          if self.maximum and value >= self.maximum: 
1887              return (value, self.error_message) 
1888          return (value, None) 
1889   
1890   
1891 -class IS_DATETIME_IN_RANGE(IS_DATETIME): 
1892   
1893 -    def __init__(self, 
1894                   minimum = None, 
1895                   maximum = None, 
1896                   format = '%Y-%m-%d %H:%M:%S', 
1897                   error_message = \ 
1898                       "enter date and time in range %(min)s %(max)s"): 
1899          self.minimum = minimum 
1900          self.maximum = maximum 
1901          d = dict(min = minimum, max = maximum) 
1902          IS_DATETIME.__init__(self, 
1903                           format = format, 
1904                           error_message = error_message % d) 
1905   
1906 -    def __call__(self, value): 
1907          (value, msg) = IS_DATETIME.__call__(self, value) 
1908          if msg is not None: 
1909              return (value, msg) 
1910          if self.minimum and self.minimum >= value: 
1911              return (value, self.error_message) 
1912          if self.maximum and value >= self.maximum: 
1913              return (value, self.error_message) 
1914          return (value, None) 
1915   
1916   
1917 -class IS_LIST_OF(Validator): 
1918   
1919 -    def __init__(self, other): 
1920          self.other = other 
1921   
1922 -    def __call__(self, value): 
1923          ivalue = value 
1924          if not isinstance(value, list): 
1925              ivalue = [ivalue] 
1926          new_value = [] 
1927          for item in ivalue: 
1928              (v, e) = self.other(item) 
1929              if e: 
1930                  return (value, e) 
1931              else: 
1932                  new_value.append(v) 
1933          return (new_value, None) 
1934   
1935   
1936 -class IS_LOWER(Validator): 
1937      """ 
1938      convert to lower case 
1939   
1940      >>> IS_LOWER()('ABC') 
1941      ('abc', None) 
1942      >>> IS_LOWER()('Ñ') 
1943      ('\\xc3\\xb1', None) 
1944      """ 
1945   
1946 -    def __call__(self, value): 
1947          return (value.decode('utf8').lower().encode('utf8'), None) 
1948   
1949   
1950 -class IS_UPPER(Validator): 
1951      """ 
1952      convert to upper case 
1953   
1954      >>> IS_UPPER()('abc') 
1955      ('ABC', None) 
1956      >>> IS_UPPER()('ñ') 
1957      ('\\xc3\\x91', None) 
1958      """ 
1959   
1960 -    def __call__(self, value): 
1961          return (value.decode('utf8').upper().encode('utf8'), None) 
1962   
1963   
1964 -class IS_SLUG(Validator): 
1965      """ 
1966      convert arbitrary text string to a slug 
1967   
1968      >>> IS_SLUG()('abc123') 
1969      ('abc123', None) 
1970      >>> IS_SLUG()('ABC123') 
1971      ('abc123', None) 
1972      >>> IS_SLUG()('abc-123') 
1973      ('abc-123', None) 
1974      >>> IS_SLUG()('abc--123') 
1975      ('abc-123', None) 
1976      >>> IS_SLUG()('abc 123') 
1977      ('abc-123', None) 
1978      >>> IS_SLUG()('-abc-') 
1979      ('abc', None) 
1980      >>> IS_SLUG()('abc&amp;123') 
1981      ('abc123', None) 
1982      >>> IS_SLUG()('abc&amp;123&amp;def') 
1983      ('abc123def', None) 
1984      >>> IS_SLUG()('ñ') 
1985      ('n', None) 
1986      >>> IS_SLUG(maxlen=4)('abc123') 
1987      ('abc1', None) 
1988      """ 
1989   
1990 -    def __init__(self, maxlen=80, check=False, error_message='must be slug'): 
1991          self.maxlen = maxlen 
1992          self.check = check 
1993          self.error_message = error_message 
1994   
1995      @staticmethod 
1996 -    def urlify(value, maxlen=80): 
1997          s = value.decode('utf-8').lower()    # to lowercase utf-8 
1998          s = unicodedata.normalize('NFKD', s) # normalize eg è => e, ñ => n 
1999          s = s.encode('ASCII', 'ignore')      # encode as ASCII 
2000          s = re.sub('&\w+?;', '', s)          # strip html entities 
2001          s = re.sub('[^a-z0-9\-\s]', '', s)   # strip all but alphanumeric/hyphen/space 
2002          s = s.replace(' ', '-')              # spaces to hyphens 
2003          s = re.sub('--+', '-', s)            # collapse strings of hyphens 
2004          s = s.strip('-')                     # remove leading and traling hyphens 
2005          return s[:maxlen].strip('-')         # enforce maximum length 
2006   
2007 -    def __call__(self,value): 
2008          if self.check and value != IS_SLUG.urlify(value,self.maxlen): 
2009              return (value,self.error_message) 
2010          return (IS_SLUG.urlify(value,self.maxlen), None) 
2011   
2012 -class IS_EMPTY_OR(Validator): 
2013      """ 
2014      dummy class for testing IS_EMPTY_OR 
2015   
2016      >>> IS_EMPTY_OR(IS_EMAIL())('abc@def.com') 
2017      ('abc@def.com', None) 
2018      >>> IS_EMPTY_OR(IS_EMAIL())('   ') 
2019      (None, None) 
2020      >>> IS_EMPTY_OR(IS_EMAIL(), null='abc')('   ') 
2021      ('abc', None) 
2022      >>> IS_EMPTY_OR(IS_EMAIL(), null='abc', empty_regex='def')('def') 
2023      ('abc', None) 
2024      >>> IS_EMPTY_OR(IS_EMAIL())('abc') 
2025      ('abc', 'enter a valid email address') 
2026      >>> IS_EMPTY_OR(IS_EMAIL())(' abc ') 
2027      ('abc', 'enter a valid email address') 
2028      """ 
2029   
2030 -    def __init__(self, other, null=None, empty_regex=None): 
2031          (self.other, self.null) = (other, null) 
2032          if empty_regex is not None: 
2033              self.empty_regex = re.compile(empty_regex) 
2034          else: 
2035              self.empty_regex = None 
2036          if hasattr(other, 'multiple'): 
2037              self.multiple = other.multiple 
2038          if hasattr(other, 'options'): 
2039              self.options=self._options 
2040   
2041 -    def _options(self): 
2042          options = self.other.options() 
2043          if (not options or options[0][0]!='') and not self.multiple: 
2044              options.insert(0,('','')) 
2045          return options 
2046   
2047 -    def set_self_id(self, id): 
2048          if hasattr(self.other, 'set_self_id'): 
2049              self.other.set_self_id(id) 
2050   
2051 -    def __call__(self, value): 
2052          value, empty = is_empty(value, empty_regex=self.empty_regex) 
2053          if empty: 
2054              return (self.null, None) 
2055          return self.other(value) 
2056   
2057 -    def formatter(self, value): 
2058          if hasattr(self.other, 'formatter'): 
2059              return self.other.formatter(value) 
2060          return value 
2061   
2062   
2063  IS_NULL_OR = IS_EMPTY_OR    # for backward compatibility 
2064   
2065   
2066 -class CLEANUP(Validator): 
2067      """ 
2068      example:: 
2069   
2070          INPUT(_type='text', _name='name', requires=CLEANUP()) 
2071   
2072      removes special characters on validation 
2073      """ 
2074   
2075 -    def __init__(self, regex='[^ \n\w]'): 
2076          self.regex = re.compile(regex) 
2077   
2078 -    def __call__(self, value): 
2079          v = self.regex.sub('',str(value).strip()) 
2080          return (v, None) 
2081   
2082   
2083 -class CRYPT(object): 
2084      """ 
2085      example:: 
2086   
2087          INPUT(_type='text', _name='name', requires=CRYPT()) 
2088   
2089      encodes the value on validation with a digest. 
2090   
2091      If no arguments are provided CRYPT uses the MD5 algorithm. 
2092      If the key argument is provided the HMAC+MD5 algorithm is used. 
2093      If the digest_alg is specified this is used to replace the 
2094      MD5 with, for example, SHA512. The digest_alg can be 
2095      the name of a hashlib algorithm as a string or the algorithm itself. 
2096      """ 
2097   
2098 -    def __init__(self, key=None, digest_alg=None): 
2099          if key and not digest_alg: 
2100              if key.count(':')==1: 
2101                  (digest_alg, key) = key.split(':') 
2102          if not digest_alg: 
2103              digest_alg = 'md5' # for backward compatibility 
2104          self.key = key 
2105          self.digest_alg = digest_alg 
2106   
2107 -    def __call__(self, value): 
2108          if self.key: 
2109              alg = get_digest(self.digest_alg) 
2110              return (hmac.new(self.key, value, alg).hexdigest(), None) 
2111          else: 
2112              return (hash(value, self.digest_alg), None) 
2113   
2114   
2115 -class IS_STRONG(object): 
2116      """ 
2117      example:: 
2118   
2119          INPUT(_type='password', _name='passwd', 
2120              requires=IS_STRONG(min=10, special=2, upper=2)) 
2121   
2122      enforces complexity requirements on a field 
2123      """ 
2124   
2125 -    def __init__(self, min=8, max=20, upper=1, lower=1, number=1, 
2126                   special=1, specials=r'~!@#$%^&*()_+-=?<>,.:;{}[]|', 
2127                   invalid=' "', error_message=None): 
2128          self.min = min 
2129          self.max = max 
2130          self.upper = upper 
2131          self.lower = lower 
2132          self.number = number 
2133          self.special = special 
2134          self.specials = specials 
2135          self.invalid = invalid 
2136          self.error_message = error_message 
2137   
2138 -    def __call__(self, value): 
2139          failures = [] 
2140          if type(self.min) == int and self.min > 0: 
2141              if not len(value) >= self.min: 
2142                  failures.append("Minimum length is %s" % self.min) 
2143          if type(self.max) == int and self.max > 0: 
2144              if not len(value) <= self.max: 
2145                  failures.append("Maximum length is %s" % self.max) 
2146          if type(self.special) == int: 
2147              all_special = [ch in value for ch in self.specials] 
2148              if self.special > 0: 
2149                  if not all_special.count(True) >= self.special: 
2150                      failures.append("Must include at least %s of the following : %s" % (self.special, self.specials)) 
2151          if self.invalid: 
2152              all_invalid = [ch in value for ch in self.invalid] 
2153              if all_invalid.count(True) > 0: 
2154                  failures.append("May not contain any of the following: %s" \ 
2155                      % self.invalid) 
2156          if type(self.upper) == int: 
2157              all_upper = re.findall("[A-Z]", value) 
2158              if self.upper > 0: 
2159                  if not len(all_upper) >= self.upper: 
2160                      failures.append("Must include at least %s upper case" \ 
2161                          % str(self.upper)) 
2162              else: 
2163                  if len(all_upper) > 0: 
2164                      failures.append("May not include any upper case letters") 
2165          if type(self.lower) == int: 
2166              all_lower = re.findall("[a-z]", value) 
2167              if self.lower > 0: 
2168                  if not len(all_lower) >= self.lower: 
2169                      failures.append("Must include at least %s lower case" \ 
2170                          % str(self.lower)) 
2171              else: 
2172                  if len(all_lower) > 0: 
2173                      failures.append("May not include any lower case letters") 
2174          if type(self.number) == int: 
2175              all_number = re.findall("[0-9]", value) 
2176              if self.number > 0: 
2177                  numbers = "number" 
2178                  if self.number > 1: 
2179                      numbers = "numbers" 
2180                  if not len(all_number) >= self.number: 
2181                      failures.append("Must include at least %s %s" \ 
2182                          % (str(self.number), numbers)) 
2183              else: 
2184                  if len(all_number) > 0: 
2185                      failures.append("May not include any numbers") 
2186          if len(failures) == 0: 
2187              return (value, None) 
2188          if not self.error_message: 
2189              from gluon.html import XML 
2190              return (value, XML('<br />'.join(failures))) 
2191          else: 
2192              return (value, self.error_message) 
2193   
2194   
2195 -class IS_IN_SUBSET(IS_IN_SET): 
2196   
2197 -    def __init__(self, *a, **b): 
2198          IS_IN_SET.__init__(self, *a, **b) 
2199   
2200 -    def __call__(self, value): 
2201          values = re.compile("\w+").findall(str(value)) 
2202          failures = [x for x in values if IS_IN_SET.__call__(self, x)[1]] 
2203          if failures: 
2204              return (value, self.error_message) 
2205          return (value, None) 
2206   
2207   
2208 -class IS_IMAGE(Validator): 
2209      """ 
2210      Checks if file uploaded through file input was saved in one of selected 
2211      image formats and has dimensions (width and height) within given boundaries. 
2212   
2213      Does *not* check for maximum file size (use IS_LENGTH for that). Returns 
2214      validation failure if no data was uploaded. 
2215   
2216      Supported file formats: BMP, GIF, JPEG, PNG. 
2217   
2218      Code parts taken from 
2219      http://mail.python.org/pipermail/python-list/2007-June/617126.html 
2220   
2221      Arguments: 
2222   
2223      extensions: iterable containing allowed *lowercase* image file extensions 
2224      ('jpg' extension of uploaded file counts as 'jpeg') 
2225      maxsize: iterable containing maximum width and height of the image 
2226      minsize: iterable containing minimum width and height of the image 
2227   
2228      Use (-1, -1) as minsize to pass image size check. 
2229   
2230      Examples:: 
2231   
2232          #Check if uploaded file is in any of supported image formats: 
2233          INPUT(_type='file', _name='name', requires=IS_IMAGE()) 
2234   
2235          #Check if uploaded file is either JPEG or PNG: 
2236          INPUT(_type='file', _name='name', 
2237              requires=IS_IMAGE(extensions=('jpeg', 'png'))) 
2238   
2239          #Check if uploaded file is PNG with maximum size of 200x200 pixels: 
2240          INPUT(_type='file', _name='name', 
2241              requires=IS_IMAGE(extensions=('png'), maxsize=(200, 200))) 
2242      """ 
2243   
2244 -    def __init__(self, 
2245                   extensions=('bmp', 'gif', 'jpeg', 'png'), 
2246                   maxsize=(10000, 10000), 
2247                   minsize=(0, 0), 
2248                   error_message='invalid image'): 
2249   
2250          self.extensions = extensions 
2251          self.maxsize = maxsize 
2252          self.minsize = minsize 
2253          self.error_message = error_message 
2254   
2255 -    def __call__(self, value): 
2256          try: 
2257              extension = value.filename.rfind('.') 
2258              assert extension >= 0 
2259              extension = value.filename[extension + 1:].lower() 
2260              if extension == 'jpg': 
2261                  extension = 'jpeg' 
2262              assert extension in self.extensions 
2263              if extension == 'bmp': 
2264                  width, height = self.__bmp(value.file) 
2265              elif extension == 'gif': 
2266                  width, height = self.__gif(value.file) 
2267              elif extension == 'jpeg': 
2268                  width, height = self.__jpeg(value.file) 
2269              elif extension == 'png': 
2270                  width, height = self.__png(value.file) 
2271              else: 
2272                  width = -1 
2273                  height = -1 
2274              assert self.minsize[0] <= width <= self.maxsize[0] \ 
2275                  and self.minsize[1] <= height <= self.maxsize[1] 
2276              value.file.seek(0) 
2277              return (value, None) 
2278          except: 
2279              return (value, self.error_message) 
2280   
2281 -    def __bmp(self, stream): 
2282          if stream.read(2) == 'BM': 
2283              stream.read(16) 
2284              return struct.unpack("<LL", stream.read(8)) 
2285          return (-1, -1) 
2286   
2287 -    def __gif(self, stream): 
2288          if stream.read(6) in ('GIF87a', 'GIF89a'): 
2289              stream = stream.read(5) 
2290              if len(stream) == 5: 
2291                  return tuple(struct.unpack("<HHB", stream)[:-1]) 
2292          return (-1, -1) 
2293   
2294 -    def __jpeg(self, stream): 
2295          if stream.read(2) == '\xFF\xD8': 
2296              while True: 
2297                  (marker, code, length) = struct.unpack("!BBH", stream.read(4)) 
2298                  if marker != 0xFF: 
2299                      break 
2300                  elif code >= 0xC0 and code <= 0xC3: 
2301                      return tuple(reversed( 
2302                          struct.unpack("!xHH", stream.read(5)))) 
2303                  else: 
2304                      stream.read(length - 2) 
2305          return (-1, -1) 
2306   
2307 -    def __png(self, stream): 
2308          if stream.read(8) == '\211PNG\r\n\032\n': 
2309              stream.read(4) 
2310              if stream.read(4) == "IHDR": 
2311                  return struct.unpack("!LL", stream.read(8)) 
2312          return (-1, -1) 
2313   
2314   
2315 -class IS_UPLOAD_FILENAME(Validator): 
2316      """ 
2317      Checks if name and extension of file uploaded through file input matches 
2318      given criteria. 
2319   
2320      Does *not* ensure the file type in any way. Returns validation failure 
2321      if no data was uploaded. 
2322   
2323      Arguments:: 
2324   
2325      filename: filename (before dot) regex 
2326      extension: extension (after dot) regex 
2327      lastdot: which dot should be used as a filename / extension separator: 
2328               True means last dot, eg. file.png -> file / png 
2329               False means first dot, eg. file.tar.gz -> file / tar.gz 
2330      case: 0 - keep the case, 1 - transform the string into lowercase (default), 
2331            2 - transform the string into uppercase 
2332   
2333      If there is no dot present, extension checks will be done against empty 
2334      string and filename checks against whole value. 
2335   
2336      Examples:: 
2337   
2338          #Check if file has a pdf extension (case insensitive): 
2339          INPUT(_type='file', _name='name', 
2340              requires=IS_UPLOAD_FILENAME(extension='pdf')) 
2341   
2342          #Check if file has a tar.gz extension and name starting with backup: 
2343          INPUT(_type='file', _name='name', 
2344              requires=IS_UPLOAD_FILENAME(filename='backup.*', 
2345                  extension='tar.gz', lastdot=False)) 
2346   
2347          #Check if file has no extension and name matching README 
2348          #(case sensitive): 
2349          INPUT(_type='file', _name='name', 
2350              requires=IS_UPLOAD_FILENAME(filename='^README$', 
2351                  extension='^$', case=0)) 
2352      """ 
2353   
2354 -    def __init__(self, filename=None, extension=None, lastdot=True, case=1, 
2355              error_message='enter valid filename'): 
2356          if isinstance(filename, str): 
2357              filename = re.compile(filename) 
2358          if isinstance(extension, str): 
2359              extension = re.compile(extension) 
2360          self.filename = filename 
2361          self.extension = extension 
2362          self.lastdot = lastdot 
2363          self.case = case 
2364          self.error_message = error_message 
2365   
2366 -    def __call__(self, value): 
2367          try: 
2368              string = value.filename 
2369          except: 
2370              return (value, self.error_message) 
2371          if self.case == 1: 
2372              string = string.lower() 
2373          elif self.case == 2: 
2374              string = string.upper() 
2375          if self.lastdot: 
2376              dot = string.rfind('.') 
2377          else: 
2378              dot = string.find('.') 
2379          if dot == -1: 
2380              dot = len(string) 
2381          if self.filename and not self.filename.match(string[:dot]): 
2382              return (value, self.error_message) 
2383          elif self.extension and not self.extension.match(string[dot + 1:]): 
2384              return (value, self.error_message) 
2385          else: 
2386              return (value, None) 
2387   
2388   
2389 -class IS_IPV4(Validator): 
2390      """ 
2391      Checks if field's value is an IP version 4 address in decimal form. Can 
2392      be set to force addresses from certain range. 
2393   
2394      IPv4 regex taken from: http://regexlib.com/REDetails.aspx?regexp_id=1411 
2395   
2396      Arguments: 
2397   
2398      minip: lowest allowed address; accepts: 
2399             str, eg. 192.168.0.1 
2400             list or tuple of octets, eg. [192, 168, 0, 1] 
2401      maxip: highest allowed address; same as above 
2402      invert: True to allow addresses only from outside of given range; note 
2403              that range boundaries are not matched this way 
2404      is_localhost: localhost address treatment: 
2405                    None (default): indifferent 
2406                    True (enforce): query address must match localhost address 
2407                                    (127.0.0.1) 
2408                    False (forbid): query address must not match localhost 
2409                                    address 
2410      is_private: same as above, except that query address is checked against 
2411                  two address ranges: 172.16.0.0 - 172.31.255.255 and 
2412                  192.168.0.0 - 192.168.255.255 
2413      is_automatic: same as above, except that query address is checked against 
2414                    one address range: 169.254.0.0 - 169.254.255.255 
2415   
2416      Minip and maxip may also be lists or tuples of addresses in all above 
2417      forms (str, int, list / tuple), allowing setup of multiple address ranges: 
2418   
2419          minip = (minip1, minip2, ... minipN) 
2420                     |       |           | 
2421                     |       |           | 
2422          maxip = (maxip1, maxip2, ... maxipN) 
2423   
2424      Longer iterable will be truncated to match length of shorter one. 
2425   
2426      Examples:: 
2427   
2428          #Check for valid IPv4 address: 
2429          INPUT(_type='text', _name='name', requires=IS_IPV4()) 
2430   
2431          #Check for valid IPv4 address belonging to specific range: 
2432          INPUT(_type='text', _name='name', 
2433              requires=IS_IPV4(minip='100.200.0.0', maxip='100.200.255.255')) 
2434   
2435          #Check for valid IPv4 address belonging to either 100.110.0.0 - 
2436          #100.110.255.255 or 200.50.0.0 - 200.50.0.255 address range: 
2437          INPUT(_type='text', _name='name', 
2438              requires=IS_IPV4(minip=('100.110.0.0', '200.50.0.0'), 
2439                               maxip=('100.110.255.255', '200.50.0.255'))) 
2440   
2441          #Check for valid IPv4 address belonging to private address space: 
2442          INPUT(_type='text', _name='name', requires=IS_IPV4(is_private=True)) 
2443   
2444          #Check for valid IPv4 address that is not a localhost address: 
2445          INPUT(_type='text', _name='name', requires=IS_IPV4(is_localhost=False)) 
2446   
2447      >>> IS_IPV4()('1.2.3.4') 
2448      ('1.2.3.4', None) 
2449      >>> IS_IPV4()('255.255.255.255') 
2450      ('255.255.255.255', None) 
2451      >>> IS_IPV4()('1.2.3.4 ') 
2452      ('1.2.3.4 ', 'enter valid IPv4 address') 
2453      >>> IS_IPV4()('1.2.3.4.5') 
2454      ('1.2.3.4.5', 'enter valid IPv4 address') 
2455      >>> IS_IPV4()('123.123') 
2456      ('123.123', 'enter valid IPv4 address') 
2457      >>> IS_IPV4()('1111.2.3.4') 
2458      ('1111.2.3.4', 'enter valid IPv4 address') 
2459      >>> IS_IPV4()('0111.2.3.4') 
2460      ('0111.2.3.4', 'enter valid IPv4 address') 
2461      >>> IS_IPV4()('256.2.3.4') 
2462      ('256.2.3.4', 'enter valid IPv4 address') 
2463      >>> IS_IPV4()('300.2.3.4') 
2464      ('300.2.3.4', 'enter valid IPv4 address') 
2465      >>> IS_IPV4(minip='1.2.3.4', maxip='1.2.3.4')('1.2.3.4') 
2466      ('1.2.3.4', None) 
2467      >>> IS_IPV4(minip='1.2.3.5', maxip='1.2.3.9', error_message='bad ip')('1.2.3.4') 
2468      ('1.2.3.4', 'bad ip') 
2469      >>> IS_IPV4(maxip='1.2.3.4', invert=True)('127.0.0.1') 
2470      ('127.0.0.1', None) 
2471      >>> IS_IPV4(maxip='1.2.3.4', invert=True)('1.2.3.4') 
2472      ('1.2.3.4', 'enter valid IPv4 address') 
2473      >>> IS_IPV4(is_localhost=True)('127.0.0.1') 
2474      ('127.0.0.1', None) 
2475      >>> IS_IPV4(is_localhost=True)('1.2.3.4') 
2476      ('1.2.3.4', 'enter valid IPv4 address') 
2477      >>> IS_IPV4(is_localhost=False)('127.0.0.1') 
2478      ('127.0.0.1', 'enter valid IPv4 address') 
2479      >>> IS_IPV4(maxip='100.0.0.0', is_localhost=True)('127.0.0.1') 
2480      ('127.0.0.1', 'enter valid IPv4 address') 
2481      """ 
2482   
2483      regex = re.compile( 
2484          '^(([1-9]?\d|1\d\d|2[0-4]\d|25[0-5])\.){3}([1-9]?\d|1\d\d|2[0-4]\d|25[0-5])$') 
2485      numbers = (16777216, 65536, 256, 1) 
2486      localhost = 2130706433 
2487      private = ((2886729728L, 2886795263L), (3232235520L, 3232301055L)) 
2488      automatic = (2851995648L, 2852061183L) 
2489   
2490 -    def __init__( 
2491          self, 
2492          minip='0.0.0.0', 
2493          maxip='255.255.255.255', 
2494          invert=False, 
2495          is_localhost=None, 
2496          is_private=None, 
2497          is_automatic=None, 
2498          error_message='enter valid IPv4 address'): 
2499          for n, value in enumerate((minip, maxip)): 
2500              temp = [] 
2501              if isinstance(value, str): 
2502                  temp.append(value.split('.')) 
2503              elif isinstance(value, (list, tuple)): 
2504                  if len(value) == len(filter(lambda item: isinstance(item, int), value)) == 4: 
2505                      temp.append(value) 
2506                  else: 
2507                      for item in value: 
2508                          if isinstance(item, str): 
2509                              temp.append(item.split('.')) 
2510                          elif isinstance(item, (list, tuple)): 
2511                              temp.append(item) 
2512              numbers = [] 
2513              for item in temp: 
2514                  number = 0 
2515                  for i, j in zip(self.numbers, item): 
2516                      number += i * int(j) 
2517                  numbers.append(number) 
2518              if n == 0: 
2519                  self.minip = numbers 
2520              else: 
2521                  self.maxip = numbers 
2522          self.invert = invert 
2523          self.is_localhost = is_localhost 
2524          self.is_private = is_private 
2525          self.is_automatic = is_automatic 
2526          self.error_message = error_message 
2527   
2528 -    def __call__(self, value): 
2529          if self.regex.match(value): 
2530              number = 0 
2531              for i, j in zip(self.numbers, value.split('.')): 
2532                  number += i * int(j) 
2533              ok = False 
2534              for bottom, top in zip(self.minip, self.maxip): 
2535                  if self.invert != (bottom <= number <= top): 
2536                      ok = True 
2537              if not (self.is_localhost == None or self.is_localhost == \ 
2538                  (number == self.localhost)): 
2539                      ok = False 
2540              if not (self.is_private == None or self.is_private == \ 
2541                  (sum([number[0] <= number <= number[1] for number in self.private]) > 0)): 
2542                      ok = False 
2543              if not (self.is_automatic == None or self.is_automatic == \ 
2544                  (self.automatic[0] <= number <= self.automatic[1])): 
2545                      ok = False 
2546              if ok: 
2547                  return (value, None) 
2548          return (value, self.error_message) 
2549   
2550  if __name__ == '__main__': 
2551      import doctest 
2552      doctest.testmod() 
2553