Package web2py :: Package gluon :: Module validators
[hide private]
[frames] | no frames]

Source Code for Module web2py.gluon.validators

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3   
   4  """ 
   5  This file is part of web2py Web Framework (Copyrighted, 2007-2010). 
   6  Developed by Massimo Di Pierro <mdipierro@cs.depaul.edu>. 
   7  License: GPL v2 
   8   
   9  Thanks to ga2arch for help with IS_IN_DB and IS_NOT_IN_DB on GAE 
  10  """ 
  11   
  12  import os 
  13  import re 
  14  import datetime 
  15  import time 
  16  import cgi 
  17  import hmac 
  18  import urllib 
  19  import struct 
  20  import decimal 
  21  import unicodedata 
  22  from cStringIO import StringIO 
  23  from utils import hash, get_digest 
  24   
  25   
  26  __all__ = [ 
  27      'CLEANUP', 
  28      'CRYPT', 
  29      'IS_ALPHANUMERIC', 
  30      'IS_DATE_IN_RANGE', 
  31      'IS_DATE', 
  32      'IS_DATETIME_IN_RANGE', 
  33      'IS_DATETIME', 
  34      'IS_DECIMAL_IN_RANGE', 
  35      'IS_EMAIL', 
  36      'IS_EMPTY_OR', 
  37      'IS_EXPR', 
  38      'IS_FLOAT_IN_RANGE', 
  39      'IS_IMAGE', 
  40      'IS_IN_DB', 
  41      'IS_IN_SET', 
  42      'IS_INT_IN_RANGE', 
  43      'IS_IPV4', 
  44      'IS_LENGTH', 
  45      'IS_LIST_OF', 
  46      'IS_LOWER', 
  47      'IS_MATCH', 
  48      'IS_NOT_EMPTY', 
  49      'IS_NOT_IN_DB', 
  50      'IS_NULL_OR', 
  51      'IS_SLUG', 
  52      'IS_STRONG', 
  53      'IS_TIME', 
  54      'IS_UPLOAD_FILENAME', 
  55      'IS_UPPER', 
  56      'IS_URL', 
  57      ] 
58 59 -def options_sorter(x,y):
60 return (str(x[1]).upper()>str(y[1]).upper() and 1) or -1
61
62 -class Validator(object):
63 """ 64 Root for all validators, mainly for documentation purposes. 65 66 Validators are classes used to validate input fields (including forms 67 generated from database tables). 68 69 Here is an example of using a validator with a FORM:: 70 71 INPUT(_name='a', requires=IS_INT_IN_RANGE(0, 10)) 72 73 Here is an example of how to require a validator for a table field:: 74 75 db.define_table('person', SQLField('name')) 76 db.person.name.requires=IS_NOT_EMPTY() 77 78 Validators are always assigned using the requires attribute of a field. A 79 field can have a single validator or multiple validators. Multiple 80 validators are made part of a list:: 81 82 db.person.name.requires=[IS_NOT_EMPTY(), IS_NOT_IN_DB(db, 'person.id')] 83 84 Validators are called by the function accepts on a FORM or other HTML 85 helper object that contains a form. They are always called in the order in 86 which they are listed. 87 88 Built-in validators have constructors that take the optional argument error 89 message which allows you to change the default error message. 90 Here is an example of a validator on a database table:: 91 92 db.person.name.requires=IS_NOT_EMPTY(error_message=T('fill this')) 93 94 where we have used the translation operator T to allow for 95 internationalization. 96 97 Notice that default error messages are not translated. 98 """ 99
100 - def formatter(self, value):
101 """ 102 For some validators returns a formatted version (matching the validator) 103 of value. Otherwise just returns the value. 104 """ 105 return value
106
107 108 -class IS_MATCH(Validator):
109 """ 110 example:: 111 112 INPUT(_type='text', _name='name', requires=IS_MATCH('.+')) 113 114 the argument of IS_MATCH is a regular expression:: 115 116 >>> IS_MATCH('.+')('hello') 117 ('hello', None) 118 119 >>> IS_MATCH('.+')('') 120 ('', 'invalid expression') 121 """ 122
123 - def __init__(self, expression, error_message='invalid expression'):
124 self.regex = re.compile(expression) 125 self.error_message = error_message
126
127 - def __call__(self, value):
128 match = self.regex.match(value) 129 if match: 130 return (match.group(), None) 131 return (value, self.error_message)
132
133 134 -class IS_EXPR(Validator):
135 """ 136 example:: 137 138 INPUT(_type='text', _name='name', 139 requires=IS_EXPR('5 < int(value) < 10')) 140 141 the argument of IS_EXPR must be python condition:: 142 143 >>> IS_EXPR('int(value) < 2')('1') 144 ('1', None) 145 146 >>> IS_EXPR('int(value) < 2')('2') 147 ('2', 'invalid expression') 148 """ 149
150 - def __init__(self, expression, error_message='invalid expression'):
151 self.expression = expression 152 self.error_message = error_message
153
154 - def __call__(self, value):
155 environment = {'value': value} 156 exec '__ret__=' + self.expression in environment 157 if environment['__ret__']: 158 return (value, None) 159 return (value, self.error_message)
160
161 162 -class IS_LENGTH(Validator):
163 """ 164 Checks if length of field's value fits between given boundaries. Works 165 for both text and file inputs. 166 167 Arguments: 168 169 maxsize: maximum allowed length / size 170 minsize: minimum allowed length / size 171 172 Examples:: 173 174 #Check if text string is shorter than 33 characters: 175 INPUT(_type='text', _name='name', requires=IS_LENGTH(32)) 176 177 #Check if password string is longer than 5 characters: 178 INPUT(_type='password', _name='name', requires=IS_LENGTH(minsize=6)) 179 180 #Check if uploaded file has size between 1KB and 1MB: 181 INPUT(_type='file', _name='name', requires=IS_LENGTH(1048576, 1024)) 182 183 >>> IS_LENGTH()('') 184 ('', None) 185 >>> IS_LENGTH()('1234567890') 186 ('1234567890', None) 187 >>> IS_LENGTH(maxsize=5, minsize=0)('1234567890') # too long 188 ('1234567890', 'enter from 0 to 5 characters') 189 >>> IS_LENGTH(maxsize=50, minsize=20)('1234567890') # too short 190 ('1234567890', 'enter from 20 to 50 characters') 191 """ 192
193 - def __init__(self, maxsize=255, minsize=0, error_message='enter from %(min)s to %(max)s characters'):
194 self.maxsize = maxsize 195 self.minsize = minsize 196 self.error_message = error_message % dict(min=minsize, max=maxsize)
197
198 - def __call__(self, value):
199 if isinstance(value, cgi.FieldStorage): 200 if value.file: 201 value.file.seek(0, os.SEEK_END) 202 length = value.file.tell() 203 value.file.seek(0, os.SEEK_SET) 204 else: 205 val = value.value 206 if val: 207 length = len(val) 208 else: 209 length = 0 210 if self.minsize <= length <= self.maxsize: 211 return (value, None) 212 elif isinstance(value, (str, unicode, list)): 213 if self.minsize <= len(value) <= self.maxsize: 214 return (value, None) 215 elif self.minsize <= len(str(value)) <= self.maxsize: 216 try: 217 value.decode('utf8') 218 return (value, None) 219 except: 220 pass 221 return (value, self.error_message)
222
223 224 -class IS_IN_SET(Validator):
225 """ 226 example:: 227 228 INPUT(_type='text', _name='name', 229 requires=IS_IN_SET(['max', 'john'],zero='')) 230 231 the argument of IS_IN_SET must be a list or set 232 233 >>> IS_IN_SET(['max', 'john'])('max') 234 ('max', None) 235 >>> IS_IN_SET(['max', 'john'])('massimo') 236 ('massimo', 'value not allowed') 237 >>> IS_IN_SET(['max', 'john'], multiple=True)(('max', 'john')) 238 ('|max|john|', None) 239 >>> IS_IN_SET(['max', 'john'], multiple=True)(('bill', 'john')) 240 (('bill', 'john'), 'value not allowed') 241 """ 242
243 - def __init__( 244 self, 245 theset, 246 labels=None, 247 error_message='value not allowed', 248 multiple=False, 249 zero='', 250 sort=False, 251 ):
252 self.multiple = multiple 253 self.theset = [str(item) for item in theset] 254 if isinstance(theset, dict): 255 self.labels = theset.values() 256 elif theset and (isinstance(theset[0], list) or \ 257 isinstance(theset[0], tuple)) \ 258 and len(theset[0])==2: 259 self.theset = [str(item) for item,label in theset] 260 self.labels = [str(label) for item,label in theset] 261 else: 262 self.theset = [str(item) for item in theset] 263 self.labels = labels 264 self.error_message = error_message 265 self.zero = zero 266 self.sort = sort
267
268 - def options(self):
269 if not self.labels: 270 items = [(k, k) for (i, k) in enumerate(self.theset)] 271 else: 272 items = [(k, self.labels[i]) for (i, k) in enumerate(self.theset)] 273 if self.sort: 274 items.sort(options_sorter) 275 if self.zero != None and not self.multiple: 276 items.insert(0,('',self.zero)) 277 return items
278
279 - def __call__(self, value):
280 if self.multiple: 281 values = re.compile("[\w\-:]+").findall(str(value)) 282 else: 283 values = [value] 284 failures = [x for x in values if not x in self.theset] 285 if failures: 286 if self.multiple and value == None: 287 return (value, None) 288 return (value, self.error_message) 289 if self.multiple: 290 return ('|%s|' % '|'.join(values), None) 291 return (value, None)
292 293 294 regex1 = re.compile('[\w_]+\.[\w_]+') 295 regex2 = re.compile('%\((?P<name>[^\)]+)\)s')
296 297 298 -class IS_IN_DB(Validator):
299 """ 300 example:: 301 302 INPUT(_type='text', _name='name', 303 requires=IS_IN_DB(db, db.table, zero='')) 304 305 used for reference fields, rendered as a dropbox 306 """ 307
308 - def __init__( 309 self, 310 dbset, 311 field, 312 label=None, 313 error_message='value not in database', 314 orderby=None, 315 cache=None, 316 multiple=False, 317 zero='', 318 sort=False, 319 _and=None, 320 ):
321 if hasattr(dbset, 'define_table'): 322 self.dbset = dbset() 323 else: 324 self.dbset = dbset 325 self.field = field 326 (ktable, kfield) = str(self.field).split('.') 327 if not label: 328 label = '%%(%s)s' % kfield 329 if isinstance(label,str): 330 if regex1.match(str(label)): 331 label = '%%(%s)s' % str(label).split('.')[-1] 332 ks = regex2.findall(label) 333 if not kfield in ks: 334 ks += [kfield] 335 fields = ['%s.%s' % (ktable, k) for k in ks] 336 else: 337 ks = [kfield] 338 fields =[str(f) for f in self.dbset._db[ktable]] 339 self.fields = fields 340 self.label = label 341 self.ktable = ktable 342 self.kfield = kfield 343 self.ks = ks 344 self.error_message = error_message 345 self.theset = None 346 self.orderby = orderby 347 self.cache = cache 348 self.multiple = multiple 349 self.zero = zero 350 self.sort = sort 351 self._and = _and
352
353 - def set_self_id(self, id):
354 if self._and: 355 self._and.record_id = id
356
357 - def build_set(self):
358 if self.dbset._db._dbname != 'gql': 359 orderby = self.orderby or ', '.join(self.fields) 360 dd = dict(orderby=orderby, cache=self.cache) 361 records = self.dbset.select(*self.fields, **dd) 362 else: 363 import contrib.gql 364 orderby = self.orderby\ 365 or contrib.gql.SQLXorable('|'.join([k for k in self.ks 366 if k != 'id'])) 367 dd = dict(orderby=orderby, cache=self.cache) 368 records = \ 369 self.dbset.select(self.dbset._db[self.ktable].ALL, **dd) 370 self.theset = [str(r[self.kfield]) for r in records] 371 if isinstance(self.label,str): 372 self.labels = [self.label % dict(r) for r in records] 373 else: 374 self.labels = [self.label(r) for r in records]
375
376 - def options(self):
377 self.build_set() 378 items = [(k, self.labels[i]) for (i, k) in enumerate(self.theset)] 379 if self.sort: 380 items.sort(options_sorter) 381 if self.zero != None and not self.multiple: 382 items.insert(0,('',self.zero)) 383 return items
384
385 - def __call__(self, value):
386 if self.multiple: 387 values = re.compile("[\w\-:]+").findall(str(value)) 388 if not [x for x in values if not x in self.theset]: 389 return ('|%s|' % '|'.join(values), None) 390 elif self.theset: 391 if value in self.theset: 392 if self._and: 393 return self._and(value) 394 else: 395 return (value, None) 396 else: 397 (ktable, kfield) = str(self.field).split('.') 398 field = self.dbset._db[ktable][kfield] 399 if self.dbset(field == value).count(): 400 if self._and: 401 return self._and(value) 402 else: 403 return (value, None) 404 return (value, self.error_message)
405
406 407 -class IS_NOT_IN_DB(Validator):
408 """ 409 example:: 410 411 INPUT(_type='text', _name='name', requires=IS_NOT_IN_DB(db, db.table)) 412 413 makes the field unique 414 """ 415
416 - def __init__( 417 self, 418 dbset, 419 field, 420 error_message='value already in database', 421 allowed_override=[], 422 ):
423 if hasattr(dbset, 'define_table'): 424 self.dbset = dbset() 425 else: 426 self.dbset = dbset 427 self.field = field 428 self.error_message = error_message 429 self.record_id = 0 430 self.allowed_override = allowed_override
431
432 - def set_self_id(self, id):
433 self.record_id = id
434
435 - def __call__(self, value):
436 if value in self.allowed_override: 437 return (value, None) 438 (tablename, fieldname) = str(self.field).split('.') 439 field = self.dbset._db[tablename][fieldname] 440 rows = self.dbset(field == value).select(limitby=(0, 1)) 441 if len(rows) > 0: 442 if isinstance(self.record_id, dict): 443 for f in self.record_id: 444 if str(getattr(rows[0], f)) != str(self.record_id[f]): 445 return (value, self.error_message) 446 elif str(rows[0].id) != str(self.record_id): 447 return (value, self.error_message) 448 return (value, None)
449
450 451 -class IS_INT_IN_RANGE(Validator):
452 """ 453 example:: 454 455 INPUT(_type='text', _name='name', requires=IS_INT_IN_RANGE(0, 10)) 456 457 >>> IS_INT_IN_RANGE(1,5)('4') 458 (4, None) 459 >>> IS_INT_IN_RANGE(1,5)(4) 460 (4, None) 461 >>> IS_INT_IN_RANGE(1,5)(1) 462 (1, None) 463 >>> IS_INT_IN_RANGE(1,5)(5) 464 (5, 'enter an integer between 1 and 4') 465 >>> IS_INT_IN_RANGE(1,5)(5) 466 (5, 'enter an integer between 1 and 4') 467 >>> IS_INT_IN_RANGE(1,5)(3.5) 468 (3, 'enter an integer between 1 and 4') 469 """ 470
471 - def __init__( 472 self, 473 minimum, 474 maximum, 475 error_message = 'enter an integer between %(min)s and %(max)s', 476 ):
477 self.minimum = int(minimum) 478 self.maximum = int(maximum) 479 self.error_message = error_message % dict(min=self.minimum, max=self.maximum-1)
480
481 - def __call__(self, value):
482 try: 483 fvalue = float(value) 484 value = int(value) 485 if value == fvalue and self.minimum <= value < self.maximum: 486 return (value, None) 487 except ValueError: 488 pass 489 return (value, self.error_message)
490
491 492 -class IS_FLOAT_IN_RANGE(Validator):
493 """ 494 example:: 495 496 INPUT(_type='text', _name='name', requires=IS_FLOAT_IN_RANGE(0, 10)) 497 498 >>> IS_FLOAT_IN_RANGE(1,5)('4') 499 (4.0, None) 500 >>> IS_FLOAT_IN_RANGE(1,5)(4) 501 (4.0, None) 502 >>> IS_FLOAT_IN_RANGE(1,5)(1) 503 (1.0, None) 504 >>> IS_FLOAT_IN_RANGE(1,5)(5.1) 505 (5.0999999999999996, 'enter a number between 1.0 and 5.0') 506 >>> IS_FLOAT_IN_RANGE(1,5)(6.0) 507 (6.0, 'enter a number between 1.0 and 5.0') 508 >>> IS_FLOAT_IN_RANGE(1,5)(3.5) 509 (3.5, None) 510 """ 511
512 - def __init__( 513 self, 514 minimum, 515 maximum, 516 error_message = 'enter a number between %(min)s and %(max)s', 517 ):
518 self.minimum = float(minimum) 519 self.maximum = float(maximum) 520 self.error_message = error_message % dict(min=self.minimum, max=self.maximum)
521
522 - def __call__(self, value):
523 try: 524 value = float(value) 525 if self.minimum <= value <= self.maximum: 526 return (value, None) 527 except (ValueError, TypeError): 528 pass 529 return (value, self.error_message)
530
531 532 -class IS_DECIMAL_IN_RANGE(Validator):
533 """ 534 example:: 535 536 INPUT(_type='text', _name='name', requires=IS_DECIMAL_IN_RANGE(0, 10)) 537 538 >>> IS_DECIMAL_IN_RANGE(1,5)('4') 539 ('4', None) 540 >>> IS_DECIMAL_IN_RANGE(1,5)(4) 541 (4, None) 542 >>> IS_DECIMAL_IN_RANGE(1,5)(1) 543 (1, None) 544 >>> IS_DECIMAL_IN_RANGE(1,5)(5.1) 545 (5.0999999999999996, 'enter a number between 1 and 5') 546 >>> IS_DECIMAL_IN_RANGE(5.1,6)(5.1) 547 (5.0999999999999996, None) 548 >>> IS_DECIMAL_IN_RANGE(5.1,6)('5.1') 549 ('5.1', None) 550 >>> IS_DECIMAL_IN_RANGE(1,5)(6.0) 551 (6.0, 'enter a number between 1 and 5') 552 >>> IS_DECIMAL_IN_RANGE(1,5)(3.5) 553 (3.5, None) 554 >>> IS_DECIMAL_IN_RANGE(1.5,5.5)(3.5) 555 (3.5, None) 556 >>> IS_DECIMAL_IN_RANGE(1.5,5.5)(6.5) 557 (6.5, 'enter a number between 1.5 and 5.5') 558 """ 559
560 - def __init__( 561 self, 562 minimum, 563 maximum, 564 error_message = 'enter a number between %(min)s and %(max)s', 565 ):
566 self.minimum = decimal.Decimal(str(minimum)) 567 self.maximum = decimal.Decimal(str(maximum)) 568 self.error_message = error_message % dict(min=self.minimum, max=self.maximum)
569
570 - def __call__(self, value):
571 try: 572 v = decimal.Decimal(str(value)) 573 if self.minimum <= v <= self.maximum: 574 return (value, None) 575 except (ValueError, TypeError): 576 pass 577 return (value, self.error_message)
578
579 580 -def is_empty(value, empty_regex=None):
581 "test empty field" 582 if isinstance(value, (str, unicode)): 583 value = value.strip() 584 if empty_regex is not None and empty_regex.match(value): 585 value = '' 586 if value == None or value == '' or value == []: 587 return (value, True) 588 return (value, False)
589
590 -class IS_NOT_EMPTY(Validator):
591 """ 592 example:: 593 594 INPUT(_type='text', _name='name', requires=IS_NOT_EMPTY()) 595 596 >>> IS_NOT_EMPTY()(1) 597 (1, None) 598 >>> IS_NOT_EMPTY()(0) 599 (0, None) 600 >>> IS_NOT_EMPTY()('x') 601 ('x', None) 602 >>> IS_NOT_EMPTY()(' x ') 603 ('x', None) 604 >>> IS_NOT_EMPTY()(None) 605 (None, 'enter a value') 606 >>> IS_NOT_EMPTY()('') 607 ('', 'enter a value') 608 >>> IS_NOT_EMPTY()(' ') 609 ('', 'enter a value') 610 >>> IS_NOT_EMPTY()(' \\n\\t') 611 ('', 'enter a value') 612 >>> IS_NOT_EMPTY()([]) 613 ([], 'enter a value') 614 >>> IS_NOT_EMPTY(empty_regex='def')('def') 615 ('', 'enter a value') 616 >>> IS_NOT_EMPTY(empty_regex='de[fg]')('deg') 617 ('', 'enter a value') 618 >>> IS_NOT_EMPTY(empty_regex='def')('abc') 619 ('abc', None) 620 """ 621
622 - def __init__(self, error_message='enter a value', empty_regex=None):
623 self.error_message = error_message 624 if empty_regex is not None: 625 self.empty_regex = re.compile(empty_regex) 626 else: 627 self.empty_regex = None
628
629 - def __call__(self, value):
630 value, empty = is_empty(value, empty_regex=self.empty_regex) 631 if empty: 632 return (value, self.error_message) 633 return (value, None)
634
635 636 -class IS_ALPHANUMERIC(IS_MATCH):
637 """ 638 example:: 639 640 INPUT(_type='text', _name='name', requires=IS_ALPHANUMERIC()) 641 642 >>> IS_ALPHANUMERIC()('1') 643 ('1', None) 644 >>> IS_ALPHANUMERIC()('') 645 ('', None) 646 >>> IS_ALPHANUMERIC()('A_a') 647 ('A_a', None) 648 >>> IS_ALPHANUMERIC()('!') 649 ('!', 'enter only letters, numbers, and underscore') 650 """ 651
652 - def __init__(self, error_message='enter only letters, numbers, and underscore'):
653 IS_MATCH.__init__(self, '^[\w]*$', error_message)
654
655 656 -class IS_EMAIL(Validator):
657 """ 658 Checks if field's value is a valid email address. Can be set to disallow 659 or force addresses from certain domain(s). 660 661 Email regex adapted from 662 http://haacked.com/archive/2007/08/21/i-knew-how-to-validate-an-email-address-until-i.aspx, 663 generally following the RFCs, except that we disallow quoted strings 664 and permit underscores and leading numerics in subdomain labels 665 666 Arguments: 667 668 - banned: regex text for disallowed address domains 669 - forced: regex text for required address domains 670 671 Both arguments can also be custom objects with a match(value) method. 672 673 Examples:: 674 675 #Check for valid email address: 676 INPUT(_type='text', _name='name', 677 requires=IS_EMAIL()) 678 679 #Check for valid email address that can't be from a .com domain: 680 INPUT(_type='text', _name='name', 681 requires=IS_EMAIL(banned='^.*\.com(|\..*)$')) 682 683 #Check for valid email address that must be from a .edu domain: 684 INPUT(_type='text', _name='name', 685 requires=IS_EMAIL(forced='^.*\.edu(|\..*)$')) 686 687 >>> IS_EMAIL()('a@b.com') 688 ('a@b.com', None) 689 >>> IS_EMAIL()('abc@def.com') 690 ('abc@def.com', None) 691 >>> IS_EMAIL()('abc@3def.com') 692 ('abc@3def.com', None) 693 >>> IS_EMAIL()('abc@def.us') 694 ('abc@def.us', None) 695 >>> IS_EMAIL()('abc@d_-f.us') 696 ('abc@d_-f.us', None) 697 >>> IS_EMAIL()('@def.com') # missing name 698 ('@def.com', 'enter a valid email address') 699 >>> IS_EMAIL()('"abc@def".com') # quoted name 700 ('"abc@def".com', 'enter a valid email address') 701 >>> IS_EMAIL()('abc+def.com') # no @ 702 ('abc+def.com', 'enter a valid email address') 703 >>> IS_EMAIL()('abc@def.x') # one-char TLD 704 ('abc@def.x', 'enter a valid email address') 705 >>> IS_EMAIL()('abc@def.12') # numeric TLD 706 ('abc@def.12', 'enter a valid email address') 707 >>> IS_EMAIL()('abc@def..com') # double-dot in domain 708 ('abc@def..com', 'enter a valid email address') 709 >>> IS_EMAIL()('abc@.def.com') # dot starts domain 710 ('abc@.def.com', 'enter a valid email address') 711 >>> IS_EMAIL()('abc@def.c_m') # underscore in TLD 712 ('abc@def.c_m', 'enter a valid email address') 713 >>> IS_EMAIL()('NotAnEmail') # missing @ 714 ('NotAnEmail', 'enter a valid email address') 715 >>> IS_EMAIL()('abc@NotAnEmail') # missing TLD 716 ('abc@NotAnEmail', 'enter a valid email address') 717 >>> IS_EMAIL()('customer/department@example.com') 718 ('customer/department@example.com', None) 719 >>> IS_EMAIL()('$A12345@example.com') 720 ('$A12345@example.com', None) 721 >>> IS_EMAIL()('!def!xyz%abc@example.com') 722 ('!def!xyz%abc@example.com', None) 723 >>> IS_EMAIL()('_Yosemite.Sam@example.com') 724 ('_Yosemite.Sam@example.com', None) 725 >>> IS_EMAIL()('~@example.com') 726 ('~@example.com', None) 727 >>> IS_EMAIL()('.wooly@example.com') # dot starts name 728 ('.wooly@example.com', 'enter a valid email address') 729 >>> IS_EMAIL()('wo..oly@example.com') # adjacent dots in name 730 ('wo..oly@example.com', 'enter a valid email address') 731 >>> IS_EMAIL()('pootietang.@example.com') # dot ends name 732 ('pootietang.@example.com', 'enter a valid email address') 733 >>> IS_EMAIL()('.@example.com') # name is bare dot 734 ('.@example.com', 'enter a valid email address') 735 >>> IS_EMAIL()('Ima.Fool@example.com') 736 ('Ima.Fool@example.com', None) 737 >>> IS_EMAIL()('Ima Fool@example.com') # space in name 738 ('Ima Fool@example.com', 'enter a valid email address') 739 """ 740 741 regex = re.compile(''' 742 ^(?!\.) # name may not begin with a dot 743 ( 744 [-a-z0-9!\#$%&'*+/=?^_`{|}~] # all legal characters except dot 745 | 746 (?<!\.)\. # single dots only 747 )+ 748 (?<!\.) # name may not end with a dot 749 @ 750 ( 751 [a-z0-9] # [sub]domain begins with alphanumeric 752 ( 753 [-\w]* # alphanumeric, underscore, dot, hyphen 754 [a-z0-9] # ending alphanumeric 755 )? 756 \. # ending dot 757 )+ 758 [a-z]{2,}$ # TLD alpha-only 759 ''', re.VERBOSE|re.IGNORECASE) 760
761 - def __init__(self, 762 banned=None, 763 forced=None, 764 error_message='enter a valid email address'):
765 if isinstance(banned, str): 766 banned = re.compile(banned) 767 if isinstance(forced, str): 768 forced = re.compile(forced) 769 self.banned = banned 770 self.forced = forced 771 self.error_message = error_message
772
773 - def __call__(self, value):
774 match = self.regex.match(value) 775 if match: 776 domain = value.split('@')[1] 777 if (not self.banned or not self.banned.match(domain)) \ 778 and (not self.forced or self.forced.match(domain)): 779 return (value, None) 780 return (value, self.error_message)
781 782 783 # URL scheme source: 784 # <http://en.wikipedia.org/wiki/URI_scheme> obtained on 2008-Nov-10 785 786 official_url_schemes = [ 787 'aaa', 788 'aaas', 789 'acap', 790 'cap', 791 'cid', 792 'crid', 793 'data', 794 'dav', 795 'dict', 796 'dns', 797 'fax', 798 'file', 799 'ftp', 800 'go', 801 'gopher', 802 'h323', 803 'http', 804 'https', 805 'icap', 806 'im', 807 'imap', 808 'info', 809 'ipp', 810 'iris', 811 'iris.beep', 812 'iris.xpc', 813 'iris.xpcs', 814 'iris.lws', 815 'ldap', 816 'mailto', 817 'mid', 818 'modem', 819 'msrp', 820 'msrps', 821 'mtqp', 822 'mupdate', 823 'news', 824 'nfs', 825 'nntp', 826 'opaquelocktoken', 827 'pop', 828 'pres', 829 'prospero', 830 'rtsp', 831 'service', 832 'shttp', 833 'sip', 834 'sips', 835 'snmp', 836 'soap.beep', 837 'soap.beeps', 838 'tag', 839 'tel', 840 'telnet', 841 'tftp', 842 'thismessage', 843 'tip', 844 'tv', 845 'urn', 846 'vemmi', 847 'wais', 848 'xmlrpc.beep', 849 'xmlrpc.beep', 850 'xmpp', 851 'z39.50r', 852 'z39.50s', 853 ] 854 unofficial_url_schemes = [ 855 'about', 856 'adiumxtra', 857 'aim', 858 'afp', 859 'aw', 860 'callto', 861 'chrome', 862 'cvs', 863 'ed2k', 864 'feed', 865 'fish', 866 'gg', 867 'gizmoproject', 868 'iax2', 869 'irc', 870 'ircs', 871 'itms', 872 'jar', 873 'javascript', 874 'keyparc', 875 'lastfm', 876 'ldaps', 877 'magnet', 878 'mms', 879 'msnim', 880 'mvn', 881 'notes', 882 'nsfw', 883 'psyc', 884 'paparazzi:http', 885 'rmi', 886 'rsync', 887 'secondlife', 888 'sgn', 889 'skype', 890 'ssh', 891 'sftp', 892 'smb', 893 'sms', 894 'soldat', 895 'steam', 896 'svn', 897 'teamspeak', 898 'unreal', 899 'ut2004', 900 'ventrilo', 901 'view-source', 902 'webcal', 903 'wyciwyg', 904 'xfire', 905 'xri', 906 'ymsgr', 907 ] 908 all_url_schemes = [None] + official_url_schemes + unofficial_url_schemes 909 http_schemes = [None, 'http', 'https'] 910 911 912 # This regex comes from RFC 2396, Appendix B. It's used to split a URL into 913 # its component parts 914 # Here are the regex groups that it extracts: 915 # scheme = group(2) 916 # authority = group(4) 917 # path = group(5) 918 # query = group(7) 919 # fragment = group(9) 920 921 url_split_regex = \ 922 re.compile('^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?')
923 924 # Defined in RFC 3490, Section 3.1, Requirement #1 925 # Use this regex to split the authority component of a unicode URL into 926 # its component labels 927 label_split_regex = re.compile(u'[\u002e\u3002\uff0e\uff61]') 928 929 930 -def escape_unicode(string):
931 ''' 932 Converts a unicode string into US-ASCII, using a simple conversion scheme. 933 Each unicode character that does not have a US-ASCII equivalent is 934 converted into a URL escaped form based on its hexadecimal value. 935 For example, the unicode character '\u4e86' will become the string '%4e%86' 936 937 :param string: unicode string, the unicode string to convert into an 938 escaped US-ASCII form 939 :returns: the US-ASCII escaped form of the inputted string 940 :rtype: string 941 942 @author: Jonathan Benn 943 ''' 944 returnValue = StringIO() 945 946 for character in string: 947 code = ord(character) 948 if code > 0x7F: 949 hexCode = hex(code) 950 returnValue.write('%' + hexCode[2:4] + '%' + hexCode[4:6]) 951 else: 952 returnValue.write(character) 953 954 return returnValue.getvalue()
955
956 957 -def unicode_to_ascii_authority(authority):
958 ''' 959 Follows the steps in RFC 3490, Section 4 to convert a unicode authority 960 string into its ASCII equivalent. 961 For example, u'www.Alliancefran\xe7aise.nu' will be converted into 962 'www.xn--alliancefranaise-npb.nu' 963 964 :param authority: unicode string, the URL authority component to convert, 965 e.g. u'www.Alliancefran\xe7aise.nu' 966 :returns: the US-ASCII character equivalent to the inputed authority, 967 e.g. 'www.xn--alliancefranaise-npb.nu' 968 :rtype: string 969 :raises Exception: if the function is not able to convert the inputed 970 authority 971 972 @author: Jonathan Benn 973 ''' 974 #RFC 3490, Section 4, Step 1 975 #The encodings.idna Python module assumes that AllowUnassigned == True 976 977 #RFC 3490, Section 4, Step 2 978 labels = label_split_regex.split(authority) 979 980 #RFC 3490, Section 4, Step 3 981 #The encodings.idna Python module assumes that UseSTD3ASCIIRules == False 982 983 #RFC 3490, Section 4, Step 4 984 #We use the ToASCII operation because we are about to put the authority 985 #into an IDN-unaware slot 986 asciiLabels = [] 987 try: 988 import encodings.idna 989 for label in labels: 990 if label: 991 asciiLabels.append(encodings.idna.ToASCII(label)) 992 else: 993 #encodings.idna.ToASCII does not accept an empty string, but 994 #it is necessary for us to allow for empty labels so that we 995 #don't modify the URL 996 asciiLabels.append('') 997 except: 998 asciiLabels=[str(label) for label in labels] 999 #RFC 3490, Section 4, Step 5 1000 return str(reduce(lambda x, y: x + unichr(0x002E) + y, asciiLabels))
1001
1002 1003 -def unicode_to_ascii_url(url, prepend_scheme):
1004 ''' 1005 Converts the inputed unicode url into a US-ASCII equivalent. This function 1006 goes a little beyond RFC 3490, which is limited in scope to the domain name 1007 (authority) only. Here, the functionality is expanded to what was observed 1008 on Wikipedia on 2009-Jan-22: 1009 1010 Component Can Use Unicode? 1011 --------- ---------------- 1012 scheme No 1013 authority Yes 1014 path Yes 1015 query Yes 1016 fragment No 1017 1018 The authority component gets converted to punycode, but occurrences of 1019 unicode in other components get converted into a pair of URI escapes (we 1020 assume 4-byte unicode). E.g. the unicode character U+4E2D will be 1021 converted into '%4E%2D'. Testing with Firefox v3.0.5 has shown that it can 1022 understand this kind of URI encoding. 1023 1024 :param url: unicode string, the URL to convert from unicode into US-ASCII 1025 :param prepend_scheme: string, a protocol scheme to prepend to the URL if 1026 we're having trouble parsing it. 1027 e.g. "http". Input None to disable this functionality 1028 :returns: a US-ASCII equivalent of the inputed url 1029 :rtype: string 1030 1031 @author: Jonathan Benn 1032 ''' 1033 #convert the authority component of the URL into an ASCII punycode string, 1034 #but encode the rest using the regular URI character encoding 1035 1036 groups = url_split_regex.match(url).groups() 1037 #If no authority was found 1038 if not groups[3]: 1039 #Try appending a scheme to see if that fixes the problem 1040 scheme_to_prepend = prepend_scheme or 'http' 1041 groups = url_split_regex.match( 1042 unicode(scheme_to_prepend) + u'://' + url).groups() 1043 #if we still can't find the authority 1044 if not groups[3]: 1045 raise Exception('No authority component found, '+ \ 1046 'could not decode unicode to US-ASCII') 1047 1048 #We're here if we found an authority, let's rebuild the URL 1049 scheme = groups[1] 1050 authority = groups[3] 1051 path = groups[4] or '' 1052 query = groups[5] or '' 1053 fragment = groups[7] or '' 1054 1055 if prepend_scheme: 1056 scheme = str(scheme) + '://' 1057 else: 1058 scheme = '' 1059 return scheme + unicode_to_ascii_authority(authority) +\ 1060 escape_unicode(path) + escape_unicode(query) + str(fragment)
1061
1062 1063 -class IS_GENERIC_URL(Validator):
1064 """ 1065 Rejects a URL string if any of the following is true: 1066 * The string is empty or None 1067 * The string uses characters that are not allowed in a URL 1068 * The URL scheme specified (if one is specified) is not valid 1069 1070 Based on RFC 2396: http://www.faqs.org/rfcs/rfc2396.html 1071 1072 This function only checks the URL's syntax. It does not check that the URL 1073 points to a real document, for example, or that it otherwise makes sense 1074 semantically. This function does automatically prepend 'http://' in front 1075 of a URL if and only if that's necessary to successfully parse the URL. 1076 Please note that a scheme will be prepended only for rare cases 1077 (e.g. 'google.ca:80') 1078 1079 The list of allowed schemes is customizable with the allowed_schemes 1080 parameter. If you exclude None from the list, then abbreviated URLs 1081 (lacking a scheme such as 'http') will be rejected. 1082 1083 The default prepended scheme is customizable with the prepend_scheme 1084 parameter. If you set prepend_scheme to None then prepending will be 1085 disabled. URLs that require prepending to parse will still be accepted, 1086 but the return value will not be modified. 1087 1088 @author: Jonathan Benn 1089 """ 1090
1091 - def __init__( 1092 self, 1093 error_message='enter a valid URL', 1094 allowed_schemes=None, 1095 prepend_scheme=None, 1096 ):
1097 """ 1098 :param error_message: a string, the error message to give the end user 1099 if the URL does not validate 1100 :param allowed_schemes: a list containing strings or None. Each element 1101 is a scheme the inputed URL is allowed to use 1102 :param prepend_scheme: a string, this scheme is prepended if it's 1103 necessary to make the URL valid 1104 """ 1105 1106 self.error_message = error_message 1107 if allowed_schemes == None: 1108 self.allowed_schemes = all_url_schemes 1109 else: 1110 self.allowed_schemes = allowed_schemes 1111 self.prepend_scheme = prepend_scheme 1112 if self.prepend_scheme not in self.allowed_schemes: 1113 raise SyntaxError, \ 1114 "prepend_scheme='%s' is not in allowed_schemes=%s" \ 1115 % (self.prepend_scheme, self.allowed_schemes)
1116
1117 - def __call__(self, value):
1118 """ 1119 :param value: a string, the URL to validate 1120 :returns: a tuple, where tuple[0] is the inputed value (possible 1121 prepended with prepend_scheme), and tuple[1] is either 1122 None (success!) or the string error_message 1123 """ 1124 try: 1125 # if the URL does not misuse the '%' character 1126 if not re.compile( 1127 r"%[^0-9A-Fa-f]{2}|%[^0-9A-Fa-f][0-9A-Fa-f]|%[0-9A-Fa-f][^0-9A-Fa-f]|%$|%[0-9A-Fa-f]$|%[^0-9A-Fa-f]$" 1128 ).search(value): 1129 # if the URL is only composed of valid characters 1130 if re.compile( 1131 r"[A-Za-z0-9;/?:@&=+$,\-_\.!~*'\(\)%#]+$").match(value): 1132 # Then split up the URL into its components and check on 1133 # the scheme 1134 scheme = url_split_regex.match(value).group(2) 1135 # Clean up the scheme before we check it 1136 if scheme != None: 1137 scheme = urllib.unquote(scheme).lower() 1138 # If the scheme really exists 1139 if scheme in self.allowed_schemes: 1140 # Then the URL is valid 1141 return (value, None) 1142 else: 1143 # else, for the possible case of abbreviated URLs with 1144 # ports, check to see if adding a valid scheme fixes 1145 # the problem (but only do this if it doesn't have 1146 # one already!) 1147 if not re.compile('://').search(value) and None\ 1148 in self.allowed_schemes: 1149 schemeToUse = self.prepend_scheme or 'http' 1150 prependTest = self.__call__(schemeToUse 1151 + '://' + value) 1152 # if the prepend test succeeded 1153 if prependTest[1] == None: 1154 # if prepending in the output is enabled 1155 if self.prepend_scheme: 1156 return prependTest 1157 else: 1158 # else return the original, 1159 # non-prepended value 1160 return (value, None) 1161 except: 1162 pass 1163 # else the URL is not valid 1164 return (value, self.error_message)
1165 1166 # Sources (obtained 2008-Nov-11): 1167 # http://en.wikipedia.org/wiki/Top-level_domain 1168 # http://www.iana.org/domains/root/db/ 1169 1170 official_top_level_domains = [ 1171 'ac', 1172 'ad', 1173 'ae', 1174 'aero', 1175 'af', 1176 'ag', 1177 'ai', 1178 'al', 1179 'am', 1180 'an', 1181 'ao', 1182 'aq', 1183 'ar', 1184 'arpa', 1185 'as', 1186 'asia', 1187 'at', 1188 'au', 1189 'aw', 1190 'ax', 1191 'az', 1192 'ba', 1193 'bb', 1194 'bd', 1195 'be', 1196 'bf', 1197 'bg', 1198 'bh', 1199 'bi', 1200 'biz', 1201 'bj', 1202 'bl', 1203 'bm', 1204 'bn', 1205 'bo', 1206 'br', 1207 'bs', 1208 'bt', 1209 'bv', 1210 'bw', 1211 'by', 1212 'bz', 1213 'ca', 1214 'cat', 1215 'cc', 1216 'cd', 1217 'cf', 1218 'cg', 1219 'ch', 1220 'ci', 1221 'ck', 1222 'cl', 1223 'cm', 1224 'cn', 1225 'co', 1226 'com', 1227 'coop', 1228 'cr', 1229 'cu', 1230 'cv', 1231 'cx', 1232 'cy', 1233 'cz', 1234 'de', 1235 'dj', 1236 'dk', 1237 'dm', 1238 'do', 1239 'dz', 1240 'ec', 1241 'edu', 1242 'ee', 1243 'eg', 1244 'eh', 1245 'er', 1246 'es', 1247 'et', 1248 'eu', 1249 'example', 1250 'fi', 1251 'fj', 1252 'fk', 1253 'fm', 1254 'fo', 1255 'fr', 1256 'ga', 1257 'gb', 1258 'gd', 1259 'ge', 1260 'gf', 1261 'gg', 1262 'gh', 1263 'gi', 1264 'gl', 1265 'gm', 1266 'gn', 1267 'gov', 1268 'gp', 1269 'gq', 1270 'gr', 1271 'gs', 1272 'gt', 1273 'gu', 1274 'gw', 1275 'gy', 1276 'hk', 1277 'hm', 1278 'hn', 1279 'hr', 1280 'ht', 1281 'hu', 1282 'id', 1283 'ie', 1284 'il', 1285 'im', 1286 'in', 1287 'info', 1288 'int', 1289 'invalid', 1290 'io', 1291 'iq', 1292 'ir', 1293 'is', 1294 'it', 1295 'je', 1296 'jm', 1297 'jo', 1298 'jobs', 1299 'jp', 1300 'ke', 1301 'kg', 1302 'kh', 1303 'ki', 1304 'km', 1305 'kn', 1306 'kp', 1307 'kr', 1308 'kw', 1309 'ky', 1310 'kz', 1311 'la', 1312 'lb', 1313 'lc', 1314 'li', 1315 'lk', 1316 'localhost', 1317 'lr', 1318 'ls', 1319 'lt', 1320 'lu', 1321 'lv', 1322 'ly', 1323 'ma', 1324 'mc', 1325 'md', 1326 'me', 1327 'mf', 1328 'mg', 1329 'mh', 1330 'mil', 1331 'mk', 1332 'ml', 1333 'mm', 1334 'mn', 1335 'mo', 1336 'mobi', 1337 'mp', 1338 'mq', 1339 'mr', 1340 'ms', 1341 'mt', 1342 'mu', 1343 'museum', 1344 'mv', 1345 'mw', 1346 'mx', 1347 'my', 1348 'mz', 1349 'na', 1350 'name', 1351 'nc', 1352 'ne', 1353 'net', 1354 'nf', 1355 'ng', 1356 'ni', 1357 'nl', 1358 'no', 1359 'np', 1360 'nr', 1361 'nu', 1362 'nz', 1363 'om', 1364 'org', 1365 'pa', 1366 'pe', 1367 'pf', 1368 'pg', 1369 'ph', 1370 'pk', 1371 'pl', 1372 'pm', 1373 'pn', 1374 'pr', 1375 'pro', 1376 'ps', 1377 'pt', 1378 'pw', 1379 'py', 1380 'qa', 1381 're', 1382 'ro', 1383 'rs', 1384 'ru', 1385 'rw', 1386 'sa', 1387 'sb', 1388 'sc', 1389 'sd', 1390 'se', 1391 'sg', 1392 'sh', 1393 'si', 1394 'sj', 1395 'sk', 1396 'sl', 1397 'sm', 1398 'sn', 1399 'so', 1400 'sr', 1401 'st', 1402 'su', 1403 'sv', 1404 'sy', 1405 'sz', 1406 'tc', 1407 'td', 1408 'tel', 1409 'test', 1410 'tf', 1411 'tg', 1412 'th', 1413 'tj', 1414 'tk', 1415 'tl', 1416 'tm', 1417 'tn', 1418 'to', 1419 'tp', 1420 'tr', 1421 'travel', 1422 'tt', 1423 'tv', 1424 'tw', 1425 'tz', 1426 'ua', 1427 'ug', 1428 'uk', 1429 'um', 1430 'us', 1431 'uy', 1432 'uz', 1433 'va', 1434 'vc', 1435 've', 1436 'vg', 1437 'vi', 1438 'vn', 1439 'vu', 1440 'wf', 1441 'ws', 1442 'xn--0zwm56d', 1443 'xn--11b5bs3a9aj6g', 1444 'xn--80akhbyknj4f', 1445 'xn--9t4b11yi5a', 1446 'xn--deba0ad', 1447 'xn--g6w251d', 1448 'xn--hgbk6aj7f53bba', 1449 'xn--hlcj6aya9esc7a', 1450 'xn--jxalpdlp', 1451 'xn--kgbechtv', 1452 'xn--zckzah', 1453 'ye', 1454 'yt', 1455 'yu', 1456 'za', 1457 'zm', 1458 'zw', 1459 ]
1460 1461 1462 -class IS_HTTP_URL(Validator):
1463 """ 1464 Rejects a URL string if any of the following is true: 1465 * The string is empty or None 1466 * The string uses characters that are not allowed in a URL 1467 * The string breaks any of the HTTP syntactic rules 1468 * The URL scheme specified (if one is specified) is not 'http' or 'https' 1469 * The top-level domain (if a host name is specified) does not exist 1470 1471 Based on RFC 2616: http://www.faqs.org/rfcs/rfc2616.html 1472 1473 This function only checks the URL's syntax. It does not check that the URL 1474 points to a real document, for example, or that it otherwise makes sense 1475 semantically. This function does automatically prepend 'http://' in front 1476 of a URL in the case of an abbreviated URL (e.g. 'google.ca'). 1477 1478 The list of allowed schemes is customizable with the allowed_schemes 1479 parameter. If you exclude None from the list, then abbreviated URLs 1480 (lacking a scheme such as 'http') will be rejected. 1481 1482 The default prepended scheme is customizable with the prepend_scheme 1483 parameter. If you set prepend_scheme to None then prepending will be 1484 disabled. URLs that require prepending to parse will still be accepted, 1485 but the return value will not be modified. 1486 1487 @author: Jonathan Benn 1488 """ 1489
1490 - def __init__( 1491 self, 1492 error_message='enter a valid URL', 1493 allowed_schemes=None, 1494 prepend_scheme='http', 1495 ):
1496 """ 1497 :param error_message: a string, the error message to give the end user 1498 if the URL does not validate 1499 :param allowed_schemes: a list containing strings or None. Each element 1500 is a scheme the inputed URL is allowed to use 1501 :param prepend_scheme: a string, this scheme is prepended if it's 1502 necessary to make the URL valid 1503 """ 1504 1505 self.error_message = error_message 1506 if allowed_schemes == None: 1507 self.allowed_schemes = http_schemes 1508 else: 1509 self.allowed_schemes = allowed_schemes 1510 self.prepend_scheme = prepend_scheme 1511 1512 for i in self.allowed_schemes: 1513 if i not in http_schemes: 1514 raise SyntaxError, \ 1515 "allowed_scheme value '%s' is not in %s" % \ 1516 (i, http_schemes) 1517 1518 if self.prepend_scheme not in self.allowed_schemes: 1519 raise SyntaxError, \ 1520 "prepend_scheme='%s' is not in allowed_schemes=%s" % \ 1521 (self.prepend_scheme, self.allowed_schemes)
1522
1523 - def __call__(self, value):
1524 """ 1525 :param value: a string, the URL to validate 1526 :returns: a tuple, where tuple[0] is the inputed value 1527 (possible prepended with prepend_scheme), and tuple[1] is either 1528 None (success!) or the string error_message 1529 """ 1530 1531 try: 1532 # if the URL passes generic validation 1533 x = IS_GENERIC_URL(error_message=self.error_message, 1534 allowed_schemes=self.allowed_schemes, 1535 prepend_scheme=self.prepend_scheme) 1536 if x(value)[1] == None: 1537 componentsMatch = url_split_regex.match(value) 1538 authority = componentsMatch.group(4) 1539 # if there is an authority component 1540 if authority: 1541 # if authority is a valid IP address 1542 if re.compile( 1543 '\d+\.\d+\.\d+\.\d+(:\d*)*$').match(authority): 1544 # Then this HTTP URL is valid 1545 return (value, None) 1546 else: 1547 # else if authority is a valid domain name 1548 domainMatch = \ 1549 re.compile( 1550 '(([A-Za-z0-9]+[A-Za-z0-9\-]*[A-Za-z0-9]+\.)*([A-Za-z0-9]+\.)*)*([A-Za-z]+[A-Za-z0-9\-]*[A-Za-z0-9]+)\.?(:\d*)*$' 1551 ).match(authority) 1552 if domainMatch: 1553 # if the top-level domain really exists 1554 if domainMatch.group(4).lower()\ 1555 in official_top_level_domains: 1556 # Then this HTTP URL is valid 1557 return (value, None) 1558 else: 1559 # else this is a relative/abbreviated URL, which will parse 1560 # into the URL's path component 1561 path = componentsMatch.group(5) 1562 # relative case: if this is a valid path (if it starts with 1563 # a slash) 1564 if re.compile('/').match(path): 1565 # Then this HTTP URL is valid 1566 return (value, None) 1567 else: 1568 # abbreviated case: if we haven't already, prepend a 1569 # scheme and see if it fixes the problem 1570 if not re.compile('://').search(value): 1571 schemeToUse = self.prepend_scheme or 'http' 1572 prependTest = self.__call__(schemeToUse 1573 + '://' + value) 1574 # if the prepend test succeeded 1575 if prependTest[1] == None: 1576 # if prepending in the output is enabled 1577 if self.prepend_scheme: 1578 return prependTest 1579 else: 1580 # else return the original, non-prepended 1581 # value 1582 return (value, None) 1583 except: 1584 pass 1585 # else the HTTP URL is not valid 1586 return (value, self.error_message)
1587
1588 1589 -class IS_URL(Validator):
1590 """ 1591 Rejects a URL string if any of the following is true: 1592 * The string is empty or None 1593 * The string uses characters that are not allowed in a URL 1594 * The string breaks any of the HTTP syntactic rules 1595 * The URL scheme specified (if one is specified) is not 'http' or 'https' 1596 * The top-level domain (if a host name is specified) does not exist 1597 1598 (These rules are based on RFC 2616: http://www.faqs.org/rfcs/rfc2616.html) 1599 1600 This function only checks the URL's syntax. It does not check that the URL 1601 points to a real document, for example, or that it otherwise makes sense 1602 semantically. This function does automatically prepend 'http://' in front 1603 of a URL in the case of an abbreviated URL (e.g. 'google.ca'). 1604 1605 If the parameter mode='generic' is used, then this function's behavior 1606 changes. It then rejects a URL string if any of the following is true: 1607 * The string is empty or None 1608 * The string uses characters that are not allowed in a URL 1609 * The URL scheme specified (if one is specified) is not valid 1610 1611 (These rules are based on RFC 2396: http://www.faqs.org/rfcs/rfc2396.html) 1612 1613 The list of allowed schemes is customizable with the allowed_schemes 1614 parameter. If you exclude None from the list, then abbreviated URLs 1615 (lacking a scheme such as 'http') will be rejected. 1616 1617 The default prepended scheme is customizable with the prepend_scheme 1618 parameter. If you set prepend_scheme to None then prepending will be 1619 disabled. URLs that require prepending to parse will still be accepted, 1620 but the return value will not be modified. 1621 1622 IS_URL is compatible with the Internationalized Domain Name (IDN) standard 1623 specified in RFC 3490 (http://tools.ietf.org/html/rfc3490). As a result, 1624 URLs can be regular strings or unicode strings. 1625 If the URL's domain component (e.g. google.ca) contains non-US-ASCII 1626 letters, then the domain will be converted into Punycode (defined in 1627 RFC 3492, http://tools.ietf.org/html/rfc3492). IS_URL goes a bit beyond 1628 the standards, and allows non-US-ASCII characters to be present in the path 1629 and query components of the URL as well. These non-US-ASCII characters will 1630 be escaped using the standard '%20' type syntax. e.g. the unicode 1631 character with hex code 0x4e86 will become '%4e%86' 1632 1633 Code Examples:: 1634 1635 INPUT(_type='text', _name='name', requires=IS_URL()) 1636 INPUT(_type='text', _name='name', requires=IS_URL(mode='generic')) 1637 INPUT(_type='text', _name='name', 1638 requires=IS_URL(allowed_schemes=['https'])) 1639 INPUT(_type='text', _name='name', 1640 requires=IS_URL(prepend_scheme='https')) 1641 INPUT(_type='text', _name='name', 1642 requires=IS_URL(mode='generic', allowed_schemes=['ftps', 'https'], 1643 prepend_scheme='https')) 1644 1645 @author: Jonathan Benn 1646 """ 1647
1648 - def __init__( 1649 self, 1650 error_message='enter a valid URL', 1651 mode='http', 1652 allowed_schemes=None, 1653 prepend_scheme='http', 1654 ):
1655 """ 1656 :param error_message: a string, the error message to give the end user 1657 if the URL does not validate 1658 :param allowed_schemes: a list containing strings or None. Each element 1659 is a scheme the inputed URL is allowed to use 1660 :param prepend_scheme: a string, this scheme is prepended if it's 1661 necessary to make the URL valid 1662 """ 1663 1664 self.error_message = error_message 1665 self.mode = mode.lower() 1666 if not self.mode in ['generic', 'http']: 1667 raise SyntaxError, "invalid mode '%s' in IS_URL" % self.mode 1668 self.allowed_schemes = allowed_schemes 1669 1670 if self.allowed_schemes: 1671 if prepend_scheme not in self.allowed_schemes: 1672 raise SyntaxError, \ 1673 "prepend_scheme='%s' is not in allowed_schemes=%s" \ 1674 % (prepend_scheme, self.allowed_schemes) 1675 1676 # if allowed_schemes is None, then we will defer testing 1677 # prepend_scheme's validity to a sub-method 1678 1679 self.prepend_scheme = prepend_scheme
1680
1681 - def __call__(self, value):
1682 """ 1683 :param value: a unicode or regular string, the URL to validate 1684 :returns: a (string, string) tuple, where tuple[0] is the modified 1685 input value and tuple[1] is either None (success!) or the 1686 string error_message. The input value will never be modified in the 1687 case of an error. However, if there is success then the input URL 1688 may be modified to (1) prepend a scheme, and/or (2) convert a 1689 non-compliant unicode URL into a compliant US-ASCII version. 1690 """ 1691 1692 if self.mode == 'generic': 1693 subMethod = IS_GENERIC_URL(error_message=self.error_message, 1694 allowed_schemes=self.allowed_schemes, 1695 prepend_scheme=self.prepend_scheme) 1696 elif self.mode == 'http': 1697 subMethod = IS_HTTP_URL(error_message=self.error_message, 1698 allowed_schemes=self.allowed_schemes, 1699 prepend_scheme=self.prepend_scheme) 1700 else: 1701 raise SyntaxError, "invalid mode '%s' in IS_URL" % self.mode 1702 1703 if type(value) != unicode: 1704 return subMethod(value) 1705 else: 1706 try: 1707 asciiValue = unicode_to_ascii_url(value, self.prepend_scheme) 1708 except Exception: 1709 #If we are not able to convert the unicode url into a 1710 # US-ASCII URL, then the URL is not valid 1711 return (value, self.error_message) 1712 1713 methodResult = subMethod(asciiValue) 1714 #if the validation of the US-ASCII version of the value failed 1715 if methodResult[1] != None: 1716 # then return the original input value, not the US-ASCII version 1717 return (value, methodResult[1]) 1718 else: 1719 return methodResult
1720 1721 1722 regex_time = re.compile( 1723 '((?P<h>[0-9]+))([^0-9 ]+(?P<m>[0-9 ]+))?([^0-9ap ]+(?P<s>[0-9]*))?((?P<d>[ap]m))?')
1724 1725 1726 -class IS_TIME(Validator):
1727 """ 1728 example:: 1729 1730 INPUT(_type='text', _name='name', requires=IS_TIME()) 1731 1732 understands the following formats 1733 hh:mm:ss [am/pm] 1734 hh:mm [am/pm] 1735 hh [am/pm] 1736 1737 [am/pm] is optional, ':' can be replaced by any other non-space non-digit 1738 1739 >>> IS_TIME()('21:30') 1740 (datetime.time(21, 30), None) 1741 >>> IS_TIME()('21-30') 1742 (datetime.time(21, 30), None) 1743 >>> IS_TIME()('21.30') 1744 (datetime.time(21, 30), None) 1745 >>> IS_TIME()('21:30:59') 1746 (datetime.time(21, 30, 59), None) 1747 >>> IS_TIME()('5:30') 1748 (datetime.time(5, 30), None) 1749 >>> IS_TIME()('5:30 am') 1750 (datetime.time(5, 30), None) 1751 >>> IS_TIME()('5:30 pm') 1752 (datetime.time(17, 30), None) 1753 >>> IS_TIME()('5:30 whatever') 1754 ('5:30 whatever', 'enter time as hh:mm:ss (seconds, am, pm optional)') 1755 >>> IS_TIME()('5:30 20') 1756 ('5:30 20', 'enter time as hh:mm:ss (seconds, am, pm optional)') 1757 >>> IS_TIME()('24:30') 1758 ('24:30', 'enter time as hh:mm:ss (seconds, am, pm optional)') 1759 >>> IS_TIME()('21:60') 1760 ('21:60', 'enter time as hh:mm:ss (seconds, am, pm optional)') 1761 >>> IS_TIME()('21:30::') 1762 ('21:30::', 'enter time as hh:mm:ss (seconds, am, pm optional)') 1763 >>> IS_TIME()('') 1764 ('', 'enter time as hh:mm:ss (seconds, am, pm optional)') 1765 """ 1766
1767 - def __init__(self, error_message='enter time as hh:mm:ss (seconds, am, pm optional)'):
1768 self.error_message = error_message
1769
1770 - def __call__(self, value):
1771 try: 1772 ivalue = value 1773 value = regex_time.match(value.lower()) 1774 (h, m, s) = (int(value.group('h')), 0, 0) 1775 if value.group('m') != None: 1776 m = int(value.group('m')) 1777 if value.group('s') != None: 1778 s = int(value.group('s')) 1779 if value.group('d') == 'pm' and 0 < h < 12: 1780 h = h + 12 1781 if not (h in range(24) and m in range(60) and s 1782 in range(60)): 1783 raise ValueError\ 1784 ('Hours or minutes or seconds are outside of allowed range') 1785 value = datetime.time(h, m, s) 1786 return (value, None) 1787 except AttributeError: 1788 pass 1789 except ValueError: 1790 pass 1791 return (ivalue, self.error_message)
1792
1793 1794 -class IS_DATE(Validator):
1795 """ 1796 example:: 1797 1798 INPUT(_type='text', _name='name', requires=IS_DATE()) 1799 1800 date has to be in the ISO8960 format YYYY-MM-DD 1801 """ 1802
1803 - def __init__(self, format='%Y-%m-%d', 1804 error_message='enter date as %(format)s'):
1805 self.format = str(format) 1806 self.error_message = str(error_message)
1807
1808 - def __call__(self, value):
1809 try: 1810 (y, m, d, hh, mm, ss, t0, t1, t2) = \ 1811 time.strptime(value, str(self.format)) 1812 value = datetime.date(y, m, d) 1813 return (value, None) 1814 except: 1815 return (value, self.error_message % IS_DATETIME.nice(self.format))
1816
1817 - def formatter(self, value):
1818 return value.strftime(self.format)
1819
1820 1821 -class IS_DATETIME(Validator):
1822 """ 1823 example:: 1824 1825 INPUT(_type='text', _name='name', requires=IS_DATETIME()) 1826 1827 datetime has to be in the ISO8960 format YYYY-MM-DD hh:mm:ss 1828 """ 1829 1830 isodatetime = '%Y-%m-%d %H:%M:%S' 1831 1832 @staticmethod
1833 - def nice(format):
1834 code=(('%Y','1963'), 1835 ('%y','63'), 1836 ('%d','28'), 1837 ('%m','08'), 1838 ('%b','Aug'), 1839 ('%b','August'), 1840 ('%H','14'), 1841 ('%I','02'), 1842 ('%p','PM'), 1843 ('%M','30'), 1844 ('%S','59')) 1845 for (a,b) in code: 1846 format=format.replace(a,b) 1847 return dict(format=format)
1848
1849 - def __init__(self, format='%Y-%m-%d %H:%M:%S', 1850 error_message='enter date and time as %(format)s'):
1851 self.format = str(format) 1852 self.error_message = str(error_message)
1853
1854 - def __call__(self, value):
1855 try: 1856 (y, m, d, hh, mm, ss, t0, t1, t2) = \ 1857 time.strptime(value, str(self.format)) 1858 value = datetime.datetime(y, m, d, hh, mm, ss) 1859 return (value, None) 1860 except: 1861 return (value, self.error_message % IS_DATETIME.nice(self.format))
1862
1863 - def formatter(self, value):
1864 return value.strftime(self.format)
1865
1866 -class IS_DATE_IN_RANGE(IS_DATE):
1867
1868 - def __init__(self, 1869 minimum = None, 1870 maximum = None, 1871 format='%Y-%m-%d', 1872 error_message = "enter date in range %(min)s %(max)s"):
1873 self.minimum = minimum 1874 self.maximum = maximum 1875 d = dict(min=minimum, max=maximum) 1876 IS_DATE.__init__(self, 1877 format = format, 1878 error_message = error_message % d)
1879
1880 - def __call__(self, value):
1881 (value, msg) = IS_DATE.__call__(self,value) 1882 if msg is not None: 1883 return (value, msg) 1884 if self.minimum and self.minimum >= value: 1885 return (value, self.error_message) 1886 if self.maximum and value >= self.maximum: 1887 return (value, self.error_message) 1888 return (value, None)
1889
1890 1891 -class IS_DATETIME_IN_RANGE(IS_DATETIME):
1892
1893 - def __init__(self, 1894 minimum = None, 1895 maximum = None, 1896 format = '%Y-%m-%d %H:%M:%S', 1897 error_message = \ 1898 "enter date and time in range %(min)s %(max)s"):
1899 self.minimum = minimum 1900 self.maximum = maximum 1901 d = dict(min = minimum, max = maximum) 1902 IS_DATETIME.__init__(self, 1903 format = format, 1904 error_message = error_message % d)
1905
1906 - def __call__(self, value):
1907 (value, msg) = IS_DATETIME.__call__(self, value) 1908 if msg is not None: 1909 return (value, msg) 1910 if self.minimum and self.minimum >= value: 1911 return (value, self.error_message) 1912 if self.maximum and value >= self.maximum: 1913 return (value, self.error_message) 1914 return (value, None)
1915
1916 1917 -class IS_LIST_OF(Validator):
1918
1919 - def __init__(self, other):
1920 self.other = other
1921
1922 - def __call__(self, value):
1923 ivalue = value 1924 if not isinstance(value, list): 1925 ivalue = [ivalue] 1926 new_value = [] 1927 for item in ivalue: 1928 (v, e) = self.other(item) 1929 if e: 1930 return (value, e) 1931 else: 1932 new_value.append(v) 1933 return (new_value, None)
1934
1935 1936 -class IS_LOWER(Validator):
1937 """ 1938 convert to lower case 1939 1940 >>> IS_LOWER()('ABC') 1941 ('abc', None) 1942 >>> IS_LOWER()('Ñ') 1943 ('\\xc3\\xb1', None) 1944 """ 1945
1946 - def __call__(self, value):
1947 return (value.decode('utf8').lower().encode('utf8'), None)
1948
1949 1950 -class IS_UPPER(Validator):
1951 """ 1952 convert to upper case 1953 1954 >>> IS_UPPER()('abc') 1955 ('ABC', None) 1956 >>> IS_UPPER()('ñ') 1957 ('\\xc3\\x91', None) 1958 """ 1959
1960 - def __call__(self, value):
1961 return (value.decode('utf8').upper().encode('utf8'), None)
1962
1963 1964 -class IS_SLUG(Validator):
1965 """ 1966 convert arbitrary text string to a slug 1967 1968 >>> IS_SLUG()('abc123') 1969 ('abc123', None) 1970 >>> IS_SLUG()('ABC123') 1971 ('abc123', None) 1972 >>> IS_SLUG()('abc-123') 1973 ('abc-123', None) 1974 >>> IS_SLUG()('abc--123') 1975 ('abc-123', None) 1976 >>> IS_SLUG()('abc 123') 1977 ('abc-123', None) 1978 >>> IS_SLUG()('-abc-') 1979 ('abc', None) 1980 >>> IS_SLUG()('abc&amp;123') 1981 ('abc123', None) 1982 >>> IS_SLUG()('abc&amp;123&amp;def') 1983 ('abc123def', None) 1984 >>> IS_SLUG()('ñ') 1985 ('n', None) 1986 >>> IS_SLUG(maxlen=4)('abc123') 1987 ('abc1', None) 1988 """ 1989
1990 - def __init__(self, maxlen=80, check=False, error_message='must be slug'):
1991 self.maxlen = maxlen 1992 self.check = check 1993 self.error_message = error_message
1994 1995 @staticmethod
1996 - def urlify(value, maxlen=80):
1997 s = value.decode('utf-8').lower() # to lowercase utf-8 1998 s = unicodedata.normalize('NFKD', s) # normalize eg è => e, ñ => n 1999 s = s.encode('ASCII', 'ignore') # encode as ASCII 2000 s = re.sub('&\w+?;', '', s) # strip html entities 2001 s = re.sub('[^a-z0-9\-\s]', '', s) # strip all but alphanumeric/hyphen/space 2002 s = s.replace(' ', '-') # spaces to hyphens 2003 s = re.sub('--+', '-', s) # collapse strings of hyphens 2004 s = s.strip('-') # remove leading and traling hyphens 2005 return s[:maxlen].strip('-') # enforce maximum length
2006
2007 - def __call__(self,value):
2008 if self.check and value != IS_SLUG.urlify(value,self.maxlen): 2009 return (value,self.error_message) 2010 return (IS_SLUG.urlify(value,self.maxlen), None)
2011
2012 -class IS_EMPTY_OR(Validator):
2013 """ 2014 dummy class for testing IS_EMPTY_OR 2015 2016 >>> IS_EMPTY_OR(IS_EMAIL())('abc@def.com') 2017 ('abc@def.com', None) 2018 >>> IS_EMPTY_OR(IS_EMAIL())(' ') 2019 (None, None) 2020 >>> IS_EMPTY_OR(IS_EMAIL(), null='abc')(' ') 2021 ('abc', None) 2022 >>> IS_EMPTY_OR(IS_EMAIL(), null='abc', empty_regex='def')('def') 2023 ('abc', None) 2024 >>> IS_EMPTY_OR(IS_EMAIL())('abc') 2025 ('abc', 'enter a valid email address') 2026 >>> IS_EMPTY_OR(IS_EMAIL())(' abc ') 2027 ('abc', 'enter a valid email address') 2028 """ 2029
2030 - def __init__(self, other, null=None, empty_regex=None):
2031 (self.other, self.null) = (other, null) 2032 if empty_regex is not None: 2033 self.empty_regex = re.compile(empty_regex) 2034 else: 2035 self.empty_regex = None 2036 if hasattr(other, 'multiple'): 2037 self.multiple = other.multiple 2038 if hasattr(other, 'options'): 2039 self.options=self._options
2040
2041 - def _options(self):
2042 options = self.other.options() 2043 if (not options or options[0][0]!='') and not self.multiple: 2044 options.insert(0,('','')) 2045 return options
2046
2047 - def set_self_id(self, id):
2048 if hasattr(self.other, 'set_self_id'): 2049 self.other.set_self_id(id)
2050
2051 - def __call__(self, value):
2052 value, empty = is_empty(value, empty_regex=self.empty_regex) 2053 if empty: 2054 return (self.null, None) 2055 return self.other(value)
2056
2057 - def formatter(self, value):
2058 if hasattr(self.other, 'formatter'): 2059 return self.other.formatter(value) 2060 return value
2061 2062 2063 IS_NULL_OR = IS_EMPTY_OR # for backward compatibility
2064 2065 2066 -class CLEANUP(Validator):
2067 """ 2068 example:: 2069 2070 INPUT(_type='text', _name='name', requires=CLEANUP()) 2071 2072 removes special characters on validation 2073 """ 2074
2075 - def __init__(self, regex='[^ \n\w]'):
2076 self.regex = re.compile(regex)
2077
2078 - def __call__(self, value):
2079 v = self.regex.sub('',str(value).strip()) 2080 return (v, None)
2081
2082 2083 -class CRYPT(object):
2084 """ 2085 example:: 2086 2087 INPUT(_type='text', _name='name', requires=CRYPT()) 2088 2089 encodes the value on validation with a digest. 2090 2091 If no arguments are provided CRYPT uses the MD5 algorithm. 2092 If the key argument is provided the HMAC+MD5 algorithm is used. 2093 If the digest_alg is specified this is used to replace the 2094 MD5 with, for example, SHA512. The digest_alg can be 2095 the name of a hashlib algorithm as a string or the algorithm itself. 2096 """ 2097
2098 - def __init__(self, key=None, digest_alg=None):
2099 if key and not digest_alg: 2100 if key.count(':')==1: 2101 (digest_alg, key) = key.split(':') 2102 if not digest_alg: 2103 digest_alg = 'md5' # for backward compatibility 2104 self.key = key 2105 self.digest_alg = digest_alg
2106
2107 - def __call__(self, value):
2108 if self.key: 2109 alg = get_digest(self.digest_alg) 2110 return (hmac.new(self.key, value, alg).hexdigest(), None) 2111 else: 2112 return (hash(value, self.digest_alg), None)
2113
2114 2115 -class IS_STRONG(object):
2116 """ 2117 example:: 2118 2119 INPUT(_type='password', _name='passwd', 2120 requires=IS_STRONG(min=10, special=2, upper=2)) 2121 2122 enforces complexity requirements on a field 2123 """ 2124
2125 - def __init__(self, min=8, max=20, upper=1, lower=1, number=1, 2126 special=1, specials=r'~!@#$%^&*()_+-=?<>,.:;{}[]|', 2127 invalid=' "', error_message=None):
2128 self.min = min 2129 self.max = max 2130 self.upper = upper 2131 self.lower = lower 2132 self.number = number 2133 self.special = special 2134 self.specials = specials 2135 self.invalid = invalid 2136 self.error_message = error_message
2137
2138 - def __call__(self, value):
2139 failures = [] 2140 if type(self.min) == int and self.min > 0: 2141 if not len(value) >= self.min: 2142 failures.append("Minimum length is %s" % self.min) 2143 if type(self.max) == int and self.max > 0: 2144 if not len(value) <= self.max: 2145 failures.append("Maximum length is %s" % self.max) 2146 if type(self.special) == int: 2147 all_special = [ch in value for ch in self.specials] 2148 if self.special > 0: 2149 if not all_special.count(True) >= self.special: 2150 failures.append("Must include at least %s of the following : %s" % (self.special, self.specials)) 2151 if self.invalid: 2152 all_invalid = [ch in value for ch in self.invalid] 2153 if all_invalid.count(True) > 0: 2154 failures.append("May not contain any of the following: %s" \ 2155 % self.invalid) 2156 if type(self.upper) == int: 2157 all_upper = re.findall("[A-Z]", value) 2158 if self.upper > 0: 2159 if not len(all_upper) >= self.upper: 2160 failures.append("Must include at least %s upper case" \ 2161 % str(self.upper)) 2162 else: 2163 if len(all_upper) > 0: 2164 failures.append("May not include any upper case letters") 2165 if type(self.lower) == int: 2166 all_lower = re.findall("[a-z]", value) 2167 if self.lower > 0: 2168 if not len(all_lower) >= self.lower: 2169 failures.append("Must include at least %s lower case" \ 2170 % str(self.lower)) 2171 else: 2172 if len(all_lower) > 0: 2173 failures.append("May not include any lower case letters") 2174 if type(self.number) == int: 2175 all_number = re.findall("[0-9]", value) 2176 if self.number > 0: 2177 numbers = "number" 2178 if self.number > 1: 2179 numbers = "numbers" 2180 if not len(all_number) >= self.number: 2181 failures.append("Must include at least %s %s" \ 2182 % (str(self.number), numbers)) 2183 else: 2184 if len(all_number) > 0: 2185 failures.append("May not include any numbers") 2186 if len(failures) == 0: 2187 return (value, None) 2188 if not self.error_message: 2189 from gluon.html import XML 2190 return (value, XML('<br />'.join(failures))) 2191 else: 2192 return (value, self.error_message)
2193
2194 2195 -class IS_IN_SUBSET(IS_IN_SET):
2196
2197 - def __init__(self, *a, **b):
2198 IS_IN_SET.__init__(self, *a, **b)
2199
2200 - def __call__(self, value):
2201 values = re.compile("\w+").findall(str(value)) 2202 failures = [x for x in values if IS_IN_SET.__call__(self, x)[1]] 2203 if failures: 2204 return (value, self.error_message) 2205 return (value, None)
2206
2207 2208 -class IS_IMAGE(Validator):
2209 """ 2210 Checks if file uploaded through file input was saved in one of selected 2211 image formats and has dimensions (width and height) within given boundaries. 2212 2213 Does *not* check for maximum file size (use IS_LENGTH for that). Returns 2214 validation failure if no data was uploaded. 2215 2216 Supported file formats: BMP, GIF, JPEG, PNG. 2217 2218 Code parts taken from 2219 http://mail.python.org/pipermail/python-list/2007-June/617126.html 2220 2221 Arguments: 2222 2223 extensions: iterable containing allowed *lowercase* image file extensions 2224 ('jpg' extension of uploaded file counts as 'jpeg') 2225 maxsize: iterable containing maximum width and height of the image 2226 minsize: iterable containing minimum width and height of the image 2227 2228 Use (-1, -1) as minsize to pass image size check. 2229 2230 Examples:: 2231 2232 #Check if uploaded file is in any of supported image formats: 2233 INPUT(_type='file', _name='name', requires=IS_IMAGE()) 2234 2235 #Check if uploaded file is either JPEG or PNG: 2236 INPUT(_type='file', _name='name', 2237 requires=IS_IMAGE(extensions=('jpeg', 'png'))) 2238 2239 #Check if uploaded file is PNG with maximum size of 200x200 pixels: 2240 INPUT(_type='file', _name='name', 2241 requires=IS_IMAGE(extensions=('png'), maxsize=(200, 200))) 2242 """ 2243
2244 - def __init__(self, 2245 extensions=('bmp', 'gif', 'jpeg', 'png'), 2246 maxsize=(10000, 10000), 2247 minsize=(0, 0), 2248 error_message='invalid image'):
2249 2250 self.extensions = extensions 2251 self.maxsize = maxsize 2252 self.minsize = minsize 2253 self.error_message = error_message
2254
2255 - def __call__(self, value):
2256 try: 2257 extension = value.filename.rfind('.') 2258 assert extension >= 0 2259 extension = value.filename[extension + 1:].lower() 2260 if extension == 'jpg': 2261 extension = 'jpeg' 2262 assert extension in self.extensions 2263 if extension == 'bmp': 2264 width, height = self.__bmp(value.file) 2265 elif extension == 'gif': 2266 width, height = self.__gif(value.file) 2267 elif extension == 'jpeg': 2268 width, height = self.__jpeg(value.file) 2269 elif extension == 'png': 2270 width, height = self.__png(value.file) 2271 else: 2272 width = -1 2273 height = -1 2274 assert self.minsize[0] <= width <= self.maxsize[0] \ 2275 and self.minsize[1] <= height <= self.maxsize[1] 2276 value.file.seek(0) 2277 return (value, None) 2278 except: 2279 return (value, self.error_message)
2280
2281 - def __bmp(self, stream):
2282 if stream.read(2) == 'BM': 2283 stream.read(16) 2284 return struct.unpack("<LL", stream.read(8)) 2285 return (-1, -1)
2286
2287 - def __gif(self, stream):
2288 if stream.read(6) in ('GIF87a', 'GIF89a'): 2289 stream = stream.read(5) 2290 if len(stream) == 5: 2291 return tuple(struct.unpack("<HHB", stream)[:-1]) 2292 return (-1, -1)
2293
2294 - def __jpeg(self, stream):
2295 if stream.read(2) == '\xFF\xD8': 2296 while True: 2297 (marker, code, length) = struct.unpack("!BBH", stream.read(4)) 2298 if marker != 0xFF: 2299 break 2300 elif code >= 0xC0 and code <= 0xC3: 2301 return tuple(reversed( 2302 struct.unpack("!xHH", stream.read(5)))) 2303 else: 2304 stream.read(length - 2) 2305 return (-1, -1)
2306
2307 - def __png(self, stream):
2308 if stream.read(8) == '\211PNG\r\n\032\n': 2309 stream.read(4) 2310 if stream.read(4) == "IHDR": 2311 return struct.unpack("!LL", stream.read(8)) 2312 return (-1, -1)
2313
2314 2315 -class IS_UPLOAD_FILENAME(Validator):
2316 """ 2317 Checks if name and extension of file uploaded through file input matches 2318 given criteria. 2319 2320 Does *not* ensure the file type in any way. Returns validation failure 2321 if no data was uploaded. 2322 2323 Arguments:: 2324 2325 filename: filename (before dot) regex 2326 extension: extension (after dot) regex 2327 lastdot: which dot should be used as a filename / extension separator: 2328 True means last dot, eg. file.png -> file / png 2329 False means first dot, eg. file.tar.gz -> file / tar.gz 2330 case: 0 - keep the case, 1 - transform the string into lowercase (default), 2331 2 - transform the string into uppercase 2332 2333 If there is no dot present, extension checks will be done against empty 2334 string and filename checks against whole value. 2335 2336 Examples:: 2337 2338 #Check if file has a pdf extension (case insensitive): 2339 INPUT(_type='file', _name='name', 2340 requires=IS_UPLOAD_FILENAME(extension='pdf')) 2341 2342 #Check if file has a tar.gz extension and name starting with backup: 2343 INPUT(_type='file', _name='name', 2344 requires=IS_UPLOAD_FILENAME(filename='backup.*', 2345 extension='tar.gz', lastdot=False)) 2346 2347 #Check if file has no extension and name matching README 2348 #(case sensitive): 2349 INPUT(_type='file', _name='name', 2350 requires=IS_UPLOAD_FILENAME(filename='^README$', 2351 extension='^$', case=0)) 2352 """ 2353
2354 - def __init__(self, filename=None, extension=None, lastdot=True, case=1, 2355 error_message='enter valid filename'):
2356 if isinstance(filename, str): 2357 filename = re.compile(filename) 2358 if isinstance(extension, str): 2359 extension = re.compile(extension) 2360 self.filename = filename 2361 self.extension = extension 2362 self.lastdot = lastdot 2363 self.case = case 2364 self.error_message = error_message
2365
2366 - def __call__(self, value):
2367 try: 2368 string = value.filename 2369 except: 2370 return (value, self.error_message) 2371 if self.case == 1: 2372 string = string.lower() 2373 elif self.case == 2: 2374 string = string.upper() 2375 if self.lastdot: 2376 dot = string.rfind('.') 2377 else: 2378 dot = string.find('.') 2379 if dot == -1: 2380 dot = len(string) 2381 if self.filename and not self.filename.match(string[:dot]): 2382 return (value, self.error_message) 2383 elif self.extension and not self.extension.match(string[dot + 1:]): 2384 return (value, self.error_message) 2385 else: 2386 return (value, None)
2387
2388 2389 -class IS_IPV4(Validator):
2390 """ 2391 Checks if field's value is an IP version 4 address in decimal form. Can 2392 be set to force addresses from certain range. 2393 2394 IPv4 regex taken from: http://regexlib.com/REDetails.aspx?regexp_id=1411 2395 2396 Arguments: 2397 2398 minip: lowest allowed address; accepts: 2399 str, eg. 192.168.0.1 2400 list or tuple of octets, eg. [192, 168, 0, 1] 2401 maxip: highest allowed address; same as above 2402 invert: True to allow addresses only from outside of given range; note 2403 that range boundaries are not matched this way 2404 is_localhost: localhost address treatment: 2405 None (default): indifferent 2406 True (enforce): query address must match localhost address 2407 (127.0.0.1) 2408 False (forbid): query address must not match localhost 2409 address 2410 is_private: same as above, except that query address is checked against 2411 two address ranges: 172.16.0.0 - 172.31.255.255 and 2412 192.168.0.0 - 192.168.255.255 2413 is_automatic: same as above, except that query address is checked against 2414 one address range: 169.254.0.0 - 169.254.255.255 2415 2416 Minip and maxip may also be lists or tuples of addresses in all above 2417 forms (str, int, list / tuple), allowing setup of multiple address ranges: 2418 2419 minip = (minip1, minip2, ... minipN) 2420 | | | 2421 | | | 2422 maxip = (maxip1, maxip2, ... maxipN) 2423 2424 Longer iterable will be truncated to match length of shorter one. 2425 2426 Examples:: 2427 2428 #Check for valid IPv4 address: 2429 INPUT(_type='text', _name='name', requires=IS_IPV4()) 2430 2431 #Check for valid IPv4 address belonging to specific range: 2432 INPUT(_type='text', _name='name', 2433 requires=IS_IPV4(minip='100.200.0.0', maxip='100.200.255.255')) 2434 2435 #Check for valid IPv4 address belonging to either 100.110.0.0 - 2436 #100.110.255.255 or 200.50.0.0 - 200.50.0.255 address range: 2437 INPUT(_type='text', _name='name', 2438 requires=IS_IPV4(minip=('100.110.0.0', '200.50.0.0'), 2439 maxip=('100.110.255.255', '200.50.0.255'))) 2440 2441 #Check for valid IPv4 address belonging to private address space: 2442 INPUT(_type='text', _name='name', requires=IS_IPV4(is_private=True)) 2443 2444 #Check for valid IPv4 address that is not a localhost address: 2445 INPUT(_type='text', _name='name', requires=IS_IPV4(is_localhost=False)) 2446 2447 >>> IS_IPV4()('1.2.3.4') 2448 ('1.2.3.4', None) 2449 >>> IS_IPV4()('255.255.255.255') 2450 ('255.255.255.255', None) 2451 >>> IS_IPV4()('1.2.3.4 ') 2452 ('1.2.3.4 ', 'enter valid IPv4 address') 2453 >>> IS_IPV4()('1.2.3.4.5') 2454 ('1.2.3.4.5', 'enter valid IPv4 address') 2455 >>> IS_IPV4()('123.123') 2456 ('123.123', 'enter valid IPv4 address') 2457 >>> IS_IPV4()('1111.2.3.4') 2458 ('1111.2.3.4', 'enter valid IPv4 address') 2459 >>> IS_IPV4()('0111.2.3.4') 2460 ('0111.2.3.4', 'enter valid IPv4 address') 2461 >>> IS_IPV4()('256.2.3.4') 2462 ('256.2.3.4', 'enter valid IPv4 address') 2463 >>> IS_IPV4()('300.2.3.4') 2464 ('300.2.3.4', 'enter valid IPv4 address') 2465 >>> IS_IPV4(minip='1.2.3.4', maxip='1.2.3.4')('1.2.3.4') 2466 ('1.2.3.4', None) 2467 >>> IS_IPV4(minip='1.2.3.5', maxip='1.2.3.9', error_message='bad ip')('1.2.3.4') 2468 ('1.2.3.4', 'bad ip') 2469 >>> IS_IPV4(maxip='1.2.3.4', invert=True)('127.0.0.1') 2470 ('127.0.0.1', None) 2471 >>> IS_IPV4(maxip='1.2.3.4', invert=True)('1.2.3.4') 2472 ('1.2.3.4', 'enter valid IPv4 address') 2473 >>> IS_IPV4(is_localhost=True)('127.0.0.1') 2474 ('127.0.0.1', None) 2475 >>> IS_IPV4(is_localhost=True)('1.2.3.4') 2476 ('1.2.3.4', 'enter valid IPv4 address') 2477 >>> IS_IPV4(is_localhost=False)('127.0.0.1') 2478 ('127.0.0.1', 'enter valid IPv4 address') 2479 >>> IS_IPV4(maxip='100.0.0.0', is_localhost=True)('127.0.0.1') 2480 ('127.0.0.1', 'enter valid IPv4 address') 2481 """ 2482 2483 regex = re.compile( 2484 '^(([1-9]?\d|1\d\d|2[0-4]\d|25[0-5])\.){3}([1-9]?\d|1\d\d|2[0-4]\d|25[0-5])$') 2485 numbers = (16777216, 65536, 256, 1) 2486 localhost = 2130706433 2487 private = ((2886729728L, 2886795263L), (3232235520L, 3232301055L)) 2488 automatic = (2851995648L, 2852061183L) 2489
2490 - def __init__( 2491 self, 2492 minip='0.0.0.0', 2493 maxip='255.255.255.255', 2494 invert=False, 2495 is_localhost=None, 2496 is_private=None, 2497 is_automatic=None, 2498 error_message='enter valid IPv4 address'):
2499 for n, value in enumerate((minip, maxip)): 2500 temp = [] 2501 if isinstance(value, str): 2502 temp.append(value.split('.')) 2503 elif isinstance(value, (list, tuple)): 2504 if len(value) == len(filter(lambda item: isinstance(item, int), value)) == 4: 2505 temp.append(value) 2506 else: 2507 for item in value: 2508 if isinstance(item, str): 2509 temp.append(item.split('.')) 2510 elif isinstance(item, (list, tuple)): 2511 temp.append(item) 2512 numbers = [] 2513 for item in temp: 2514 number = 0 2515 for i, j in zip(self.numbers, item): 2516 number += i * int(j) 2517 numbers.append(number) 2518 if n == 0: 2519 self.minip = numbers 2520 else: 2521 self.maxip = numbers 2522 self.invert = invert 2523 self.is_localhost = is_localhost 2524 self.is_private = is_private 2525 self.is_automatic = is_automatic 2526 self.error_message = error_message
2527
2528 - def __call__(self, value):
2529 if self.regex.match(value): 2530 number = 0 2531 for i, j in zip(self.numbers, value.split('.')): 2532 number += i * int(j) 2533 ok = False 2534 for bottom, top in zip(self.minip, self.maxip): 2535 if self.invert != (bottom <= number <= top): 2536 ok = True 2537 if not (self.is_localhost == None or self.is_localhost == \ 2538 (number == self.localhost)): 2539 ok = False 2540 if not (self.is_private == None or self.is_private == \ 2541 (sum([number[0] <= number <= number[1] for number in self.private]) > 0)): 2542 ok = False 2543 if not (self.is_automatic == None or self.is_automatic == \ 2544 (self.automatic[0] <= number <= self.automatic[1])): 2545 ok = False 2546 if ok: 2547 return (value, None) 2548 return (value, self.error_message)
2549 2550 if __name__ == '__main__': 2551 import doctest 2552 doctest.testmod() 2553