comparison MoinMoin/support/passlib/utils/__init__.py @ 5919:efd7c0be3339

added passlib 1.6.1 to MoinMoin/support/ removed passlib's unit tests (so our test runner does not run them also)
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Fri, 18 Jan 2013 01:38:07 +0100
parents
children d72a5e95c7c0
comparison
equal deleted inserted replaced
5918:5126fadbf24f 5919:efd7c0be3339
1 """passlib.utils -- helpers for writing password hashes"""
2 #=============================================================================
3 # imports
4 #=============================================================================
5 from passlib.utils.compat import PYPY, JYTHON
6 # core
7 from base64 import b64encode, b64decode
8 from codecs import lookup as _lookup_codec
9 from functools import update_wrapper
10 import logging; log = logging.getLogger(__name__)
11 import math
12 import os
13 import sys
14 import random
15 if JYTHON: # pragma: no cover -- runtime detection
16 # Jython 2.5.2 lacks stringprep module -
17 # see http://bugs.jython.org/issue1758320
18 try:
19 import stringprep
20 except ImportError:
21 stringprep = None
22 _stringprep_missing_reason = "not present under Jython"
23 else:
24 import stringprep
25 import time
26 if stringprep:
27 import unicodedata
28 from warnings import warn
29 # site
30 # pkg
31 from passlib.exc import ExpectedStringError
32 from passlib.utils.compat import add_doc, b, bytes, join_bytes, join_byte_values, \
33 join_byte_elems, exc_err, irange, imap, PY3, u, \
34 join_unicode, unicode, byte_elem_value, PY_MIN_32, next_method_attr
35 # local
36 __all__ = [
37 # constants
38 'PYPY',
39 'JYTHON',
40 'sys_bits',
41 'unix_crypt_schemes',
42 'rounds_cost_values',
43
44 # decorators
45 "classproperty",
46 ## "deprecated_function",
47 ## "relocated_function",
48 ## "memoized_class_property",
49
50 # unicode helpers
51 'consteq',
52 'saslprep',
53
54 # bytes helpers
55 "xor_bytes",
56 "render_bytes",
57
58 # encoding helpers
59 'is_same_codec',
60 'is_ascii_safe',
61 'to_bytes',
62 'to_unicode',
63 'to_native_str',
64
65 # base64 helpers
66 "BASE64_CHARS", "HASH64_CHARS", "BCRYPT_CHARS", "AB64_CHARS",
67 "Base64Engine", "h64", "h64big",
68 "ab64_encode", "ab64_decode",
69
70 # host OS
71 'has_crypt',
72 'test_crypt',
73 'safe_crypt',
74 'tick',
75
76 # randomness
77 'rng',
78 'getrandbytes',
79 'getrandstr',
80 'generate_password',
81
82 # object type / interface tests
83 'is_crypt_handler',
84 'is_crypt_context',
85 'has_rounds_info',
86 'has_salt_info',
87 ]
88
89 #=============================================================================
90 # constants
91 #=============================================================================
92
93 # bitsize of system architecture (32 or 64)
94 sys_bits = int(math.log(sys.maxsize if PY3 else sys.maxint, 2) + 1.5)
95
96 # list of hashes algs supported by crypt() on at least one OS.
97 unix_crypt_schemes = [
98 "sha512_crypt", "sha256_crypt",
99 "sha1_crypt", "bcrypt",
100 "md5_crypt",
101 # "bsd_nthash",
102 "bsdi_crypt", "des_crypt",
103 ]
104
105 # list of rounds_cost constants
106 rounds_cost_values = [ "linear", "log2" ]
107
108 # legacy import, will be removed in 1.8
109 from passlib.exc import MissingBackendError
110
111 # internal helpers
112 _BEMPTY = b('')
113 _UEMPTY = u("")
114 _USPACE = u(" ")
115
116 # maximum password size which passlib will allow; see exc.PasswordSizeError
117 MAX_PASSWORD_SIZE = int(os.environ.get("PASSLIB_MAX_PASSWORD_SIZE") or 4096)
118
119 #=============================================================================
120 # decorators and meta helpers
121 #=============================================================================
122 class classproperty(object):
123 """Function decorator which acts like a combination of classmethod+property (limited to read-only properties)"""
124
125 def __init__(self, func):
126 self.im_func = func
127
128 def __get__(self, obj, cls):
129 return self.im_func(cls)
130
131 @property
132 def __func__(self):
133 "py3 compatible alias"
134 return self.im_func
135
136 def deprecated_function(msg=None, deprecated=None, removed=None, updoc=True,
137 replacement=None, _is_method=False):
138 """decorator to deprecate a function.
139
140 :arg msg: optional msg, default chosen if omitted
141 :kwd deprecated: version when function was first deprecated
142 :kwd removed: version when function will be removed
143 :kwd replacement: alternate name / instructions for replacing this function.
144 :kwd updoc: add notice to docstring (default ``True``)
145 """
146 if msg is None:
147 if _is_method:
148 msg = "the method %(mod)s.%(klass)s.%(name)s() is deprecated"
149 else:
150 msg = "the function %(mod)s.%(name)s() is deprecated"
151 if deprecated:
152 msg += " as of Passlib %(deprecated)s"
153 if removed:
154 msg += ", and will be removed in Passlib %(removed)s"
155 if replacement:
156 msg += ", use %s instead" % replacement
157 msg += "."
158 def build(func):
159 opts = dict(
160 mod=func.__module__,
161 name=func.__name__,
162 deprecated=deprecated,
163 removed=removed,
164 )
165 if _is_method:
166 def wrapper(*args, **kwds):
167 tmp = opts.copy()
168 klass = args[0].__class__
169 tmp.update(klass=klass.__name__, mod=klass.__module__)
170 warn(msg % tmp, DeprecationWarning, stacklevel=2)
171 return func(*args, **kwds)
172 else:
173 text = msg % opts
174 def wrapper(*args, **kwds):
175 warn(text, DeprecationWarning, stacklevel=2)
176 return func(*args, **kwds)
177 update_wrapper(wrapper, func)
178 if updoc and (deprecated or removed) and wrapper.__doc__:
179 txt = deprecated or ''
180 if removed or replacement:
181 txt += "\n "
182 if removed:
183 txt += "and will be removed in version %s" % (removed,)
184 if replacement:
185 if removed:
186 txt += ", "
187 txt += "use %s instead" % replacement
188 txt += "."
189 if not wrapper.__doc__.strip(" ").endswith("\n"):
190 wrapper.__doc__ += "\n"
191 wrapper.__doc__ += "\n.. deprecated:: %s\n" % (txt,)
192 return wrapper
193 return build
194
195 def deprecated_method(msg=None, deprecated=None, removed=None, updoc=True,
196 replacement=None):
197 """decorator to deprecate a method.
198
199 :arg msg: optional msg, default chosen if omitted
200 :kwd deprecated: version when method was first deprecated
201 :kwd removed: version when method will be removed
202 :kwd replacement: alternate name / instructions for replacing this method.
203 :kwd updoc: add notice to docstring (default ``True``)
204 """
205 return deprecated_function(msg, deprecated, removed, updoc, replacement,
206 _is_method=True)
207
208 class memoized_property(object):
209 """decorator which invokes method once, then replaces attr with result"""
210 def __init__(self, func):
211 self.im_func = func
212
213 def __get__(self, obj, cls):
214 if obj is None:
215 return self
216 func = self.im_func
217 value = func(obj)
218 setattr(obj, func.__name__, value)
219 return value
220
221 @property
222 def __func__(self):
223 "py3 alias"
224 return self.im_func
225
226 # works but not used
227 ##class memoized_class_property(object):
228 ## """function decorator which calls function as classmethod,
229 ## and replaces itself with result for current and all future invocations.
230 ## """
231 ## def __init__(self, func):
232 ## self.im_func = func
233 ##
234 ## def __get__(self, obj, cls):
235 ## func = self.im_func
236 ## value = func(cls)
237 ## setattr(cls, func.__name__, value)
238 ## return value
239 ##
240 ## @property
241 ## def __func__(self):
242 ## "py3 compatible alias"
243
244 #=============================================================================
245 # unicode helpers
246 #=============================================================================
247
248 def consteq(left, right):
249 """Check two strings/bytes for equality.
250 This is functionally equivalent to ``left == right``,
251 but attempts to take constant time relative to the size of the righthand input.
252
253 The purpose of this function is to help prevent timing attacks
254 during digest comparisons: the standard ``==`` operator aborts
255 after the first mismatched character, causing it's runtime to be
256 proportional to the longest prefix shared by the two inputs.
257 If an attacker is able to predict and control one of the two
258 inputs, repeated queries can be leveraged to reveal information about
259 the content of the second argument. To minimize this risk, :func:`!consteq`
260 is designed to take ``THETA(len(right))`` time, regardless
261 of the contents of the two strings.
262 It is recommended that the attacker-controlled input
263 be passed in as the left-hand value.
264
265 .. warning::
266
267 This function is *not* perfect. Various VM-dependant issues
268 (e.g. the VM's integer object instantiation algorithm, internal unicode representation, etc),
269 may still cause the function's run time to be affected by the inputs,
270 though in a less predictable manner.
271 *To minimize such risks, this function should not be passed* :class:`unicode`
272 *inputs that might contain non-* ``ASCII`` *characters*.
273
274 .. versionadded:: 1.6
275 """
276 # NOTE:
277 # resources & discussions considered in the design of this function:
278 # hmac timing attack --
279 # http://rdist.root.org/2009/05/28/timing-attack-in-google-keyczar-library/
280 # python developer discussion surrounding similar function --
281 # http://bugs.python.org/issue15061
282 # http://bugs.python.org/issue14955
283
284 # validate types
285 if isinstance(left, unicode):
286 if not isinstance(right, unicode):
287 raise TypeError("inputs must be both unicode or both bytes")
288 is_py3_bytes = False
289 elif isinstance(left, bytes):
290 if not isinstance(right, bytes):
291 raise TypeError("inputs must be both unicode or both bytes")
292 is_py3_bytes = PY3
293 else:
294 raise TypeError("inputs must be both unicode or both bytes")
295
296 # do size comparison.
297 # NOTE: the double-if construction below is done deliberately, to ensure
298 # the same number of operations (including branches) is performed regardless
299 # of whether left & right are the same size.
300 same_size = (len(left) == len(right))
301 if same_size:
302 # if sizes are the same, setup loop to perform actual check of contents.
303 tmp = left
304 result = 0
305 if not same_size:
306 # if sizes aren't the same, set 'result' so equality will fail regardless
307 # of contents. then, to ensure we do exactly 'len(right)' iterations
308 # of the loop, just compare 'right' against itself.
309 tmp = right
310 result = 1
311
312 # run constant-time string comparision
313 if is_py3_bytes:
314 for l,r in zip(tmp, right):
315 result |= l ^ r
316 else:
317 for l,r in zip(tmp, right):
318 result |= ord(l) ^ ord(r)
319 return result == 0
320
321 def splitcomma(source, sep=","):
322 """split comma-separated string into list of elements,
323 stripping whitespace.
324 """
325 source = source.strip()
326 if source.endswith(sep):
327 source = source[:-1]
328 if not source:
329 return []
330 return [ elem.strip() for elem in source.split(sep) ]
331
332 def saslprep(source, param="value"):
333 """Normalizes unicode string using SASLPrep stringprep profile.
334
335 The SASLPrep profile is defined in :rfc:`4013`.
336 It provides a uniform scheme for normalizing unicode usernames
337 and passwords before performing byte-value sensitive operations
338 such as hashing. Among other things, it normalizes diacritic
339 representations, removes non-printing characters, and forbids
340 invalid characters such as ``\\n``.
341
342 :arg source:
343 unicode string to normalize & validate
344
345 :param param:
346 Optional noun used to refer to identify source parameter in error messages
347 (Defaults to the string ``"value"``). This is mainly useful to make the caller's error
348 messages make more sense.
349
350 :raises ValueError:
351 if any characters forbidden by the SASLPrep profile are encountered.
352
353 :returns:
354 normalized unicode string
355
356 .. note::
357
358 This function is not available under Jython,
359 as the Jython stdlib is missing the :mod:`!stringprep` module
360 (`Jython issue 1758320 <http://bugs.jython.org/issue1758320>`_).
361 """
362 # saslprep - http://tools.ietf.org/html/rfc4013
363 # stringprep - http://tools.ietf.org/html/rfc3454
364 # http://docs.python.org/library/stringprep.html
365
366 # validate type
367 if not isinstance(source, unicode):
368 raise TypeError("input must be unicode string, not %s" %
369 (type(source),))
370
371 # mapping stage
372 # - map non-ascii spaces to U+0020 (stringprep C.1.2)
373 # - strip 'commonly mapped to nothing' chars (stringprep B.1)
374 in_table_c12 = stringprep.in_table_c12
375 in_table_b1 = stringprep.in_table_b1
376 data = join_unicode(
377 _USPACE if in_table_c12(c) else c
378 for c in source
379 if not in_table_b1(c)
380 )
381
382 # normalize to KC form
383 data = unicodedata.normalize('NFKC', data)
384 if not data:
385 return _UEMPTY
386
387 # check for invalid bi-directional strings.
388 # stringprep requires the following:
389 # - chars in C.8 must be prohibited.
390 # - if any R/AL chars in string:
391 # - no L chars allowed in string
392 # - first and last must be R/AL chars
393 # this checks if start/end are R/AL chars. if so, prohibited loop
394 # will forbid all L chars. if not, prohibited loop will forbid all
395 # R/AL chars instead. in both cases, prohibited loop takes care of C.8.
396 is_ral_char = stringprep.in_table_d1
397 if is_ral_char(data[0]):
398 if not is_ral_char(data[-1]):
399 raise ValueError("malformed bidi sequence in " + param)
400 # forbid L chars within R/AL sequence.
401 is_forbidden_bidi_char = stringprep.in_table_d2
402 else:
403 # forbid R/AL chars if start not setup correctly; L chars allowed.
404 is_forbidden_bidi_char = is_ral_char
405
406 # check for prohibited output - stringprep tables A.1, B.1, C.1.2, C.2 - C.9
407 in_table_a1 = stringprep.in_table_a1
408 in_table_c21_c22 = stringprep.in_table_c21_c22
409 in_table_c3 = stringprep.in_table_c3
410 in_table_c4 = stringprep.in_table_c4
411 in_table_c5 = stringprep.in_table_c5
412 in_table_c6 = stringprep.in_table_c6
413 in_table_c7 = stringprep.in_table_c7
414 in_table_c8 = stringprep.in_table_c8
415 in_table_c9 = stringprep.in_table_c9
416 for c in data:
417 # check for this mapping stage should have removed
418 assert not in_table_b1(c), "failed to strip B.1 in mapping stage"
419 assert not in_table_c12(c), "failed to replace C.1.2 in mapping stage"
420
421 # check for forbidden chars
422 if in_table_a1(c):
423 raise ValueError("unassigned code points forbidden in " + param)
424 if in_table_c21_c22(c):
425 raise ValueError("control characters forbidden in " + param)
426 if in_table_c3(c):
427 raise ValueError("private use characters forbidden in " + param)
428 if in_table_c4(c):
429 raise ValueError("non-char code points forbidden in " + param)
430 if in_table_c5(c):
431 raise ValueError("surrogate codes forbidden in " + param)
432 if in_table_c6(c):
433 raise ValueError("non-plaintext chars forbidden in " + param)
434 if in_table_c7(c):
435 # XXX: should these have been caught by normalize?
436 # if so, should change this to an assert
437 raise ValueError("non-canonical chars forbidden in " + param)
438 if in_table_c8(c):
439 raise ValueError("display-modifying / deprecated chars "
440 "forbidden in" + param)
441 if in_table_c9(c):
442 raise ValueError("tagged characters forbidden in " + param)
443
444 # do bidi constraint check chosen by bidi init, above
445 if is_forbidden_bidi_char(c):
446 raise ValueError("forbidden bidi character in " + param)
447
448 return data
449
450 # replace saslprep() with stub when stringprep is missing
451 if stringprep is None: # pragma: no cover -- runtime detection
452 def saslprep(source, param="value"):
453 "stub for saslprep()"
454 raise NotImplementedError("saslprep() support requires the 'stringprep' "
455 "module, which is " + _stringprep_missing_reason)
456
457 #=============================================================================
458 # bytes helpers
459 #=============================================================================
460 def render_bytes(source, *args):
461 """Peform ``%`` formating using bytes in a uniform manner across Python 2/3.
462
463 This function is motivated by the fact that
464 :class:`bytes` instances do not support ``%`` or ``{}`` formatting under Python 3.
465 This function is an attempt to provide a replacement:
466 it converts everything to unicode (decoding bytes instances as ``latin-1``),
467 performs the required formatting, then encodes the result to ``latin-1``.
468
469 Calling ``render_bytes(source, *args)`` should function roughly the same as
470 ``source % args`` under Python 2.
471 """
472 if isinstance(source, bytes):
473 source = source.decode("latin-1")
474 result = source % tuple(arg.decode("latin-1") if isinstance(arg, bytes)
475 else arg for arg in args)
476 return result.encode("latin-1")
477
478 if PY_MIN_32:
479 def bytes_to_int(value):
480 return int.from_bytes(value, 'big')
481 def int_to_bytes(value, count):
482 return value.to_bytes(count, 'big')
483 else:
484 # XXX: can any of these be sped up?
485 from binascii import hexlify, unhexlify
486 def bytes_to_int(value):
487 return int(hexlify(value),16)
488 if PY3:
489 # grr, why did py3 have to break % for bytes?
490 def int_to_bytes(value, count):
491 return unhexlify((('%%0%dx' % (count<<1)) % value).encode("ascii"))
492 else:
493 def int_to_bytes(value, count):
494 return unhexlify(('%%0%dx' % (count<<1)) % value)
495
496 add_doc(bytes_to_int, "decode byte string as single big-endian integer")
497 add_doc(int_to_bytes, "encode integer as single big-endian byte string")
498
499 def xor_bytes(left, right):
500 "Perform bitwise-xor of two byte strings (must be same size)"
501 return int_to_bytes(bytes_to_int(left) ^ bytes_to_int(right), len(left))
502
503 def repeat_string(source, size):
504 "repeat or truncate <source> string, so it has length <size>"
505 cur = len(source)
506 if size > cur:
507 mult = (size+cur-1)//cur
508 return (source*mult)[:size]
509 else:
510 return source[:size]
511
512 _BNULL = b("\x00")
513 _UNULL = u("\x00")
514
515 def right_pad_string(source, size, pad=None):
516 "right-pad or truncate <source> string, so it has length <size>"
517 cur = len(source)
518 if size > cur:
519 if pad is None:
520 pad = _UNULL if isinstance(source, unicode) else _BNULL
521 return source+pad*(size-cur)
522 else:
523 return source[:size]
524
525 #=============================================================================
526 # encoding helpers
527 #=============================================================================
528 _ASCII_TEST_BYTES = b("\x00\n aA:#!\x7f")
529 _ASCII_TEST_UNICODE = _ASCII_TEST_BYTES.decode("ascii")
530
531 def is_ascii_codec(codec):
532 "Test if codec is compatible with 7-bit ascii (e.g. latin-1, utf-8; but not utf-16)"
533 return _ASCII_TEST_UNICODE.encode(codec) == _ASCII_TEST_BYTES
534
535 def is_same_codec(left, right):
536 "Check if two codec names are aliases for same codec"
537 if left == right:
538 return True
539 if not (left and right):
540 return False
541 return _lookup_codec(left).name == _lookup_codec(right).name
542
543 _B80 = b('\x80')[0]
544 _U80 = u('\x80')
545 def is_ascii_safe(source):
546 "Check if string (bytes or unicode) contains only 7-bit ascii"
547 r = _B80 if isinstance(source, bytes) else _U80
548 return all(c < r for c in source)
549
550 def to_bytes(source, encoding="utf-8", param="value", source_encoding=None):
551 """Helper to normalize input to bytes.
552
553 :arg source:
554 Source bytes/unicode to process.
555
556 :arg encoding:
557 Target encoding (defaults to ``"utf-8"``).
558
559 :param param:
560 Optional name of variable/noun to reference when raising errors
561
562 :param source_encoding:
563 If this is specified, and the source is bytes,
564 the source will be transcoded from *source_encoding* to *encoding*
565 (via unicode).
566
567 :raises TypeError: if source is not unicode or bytes.
568
569 :returns:
570 * unicode strings will be encoded using *encoding*, and returned.
571 * if *source_encoding* is not specified, byte strings will be
572 returned unchanged.
573 * if *source_encoding* is specified, byte strings will be transcoded
574 to *encoding*.
575 """
576 assert encoding
577 if isinstance(source, bytes):
578 if source_encoding and not is_same_codec(source_encoding, encoding):
579 return source.decode(source_encoding).encode(encoding)
580 else:
581 return source
582 elif isinstance(source, unicode):
583 return source.encode(encoding)
584 else:
585 raise ExpectedStringError(source, param)
586
587 def to_unicode(source, encoding="utf-8", param="value"):
588 """Helper to normalize input to unicode.
589
590 :arg source:
591 source bytes/unicode to process.
592
593 :arg encoding:
594 encoding to use when decoding bytes instances.
595
596 :param param:
597 optional name of variable/noun to reference when raising errors.
598
599 :raises TypeError: if source is not unicode or bytes.
600
601 :returns:
602 * returns unicode strings unchanged.
603 * returns bytes strings decoded using *encoding*
604 """
605 assert encoding
606 if isinstance(source, unicode):
607 return source
608 elif isinstance(source, bytes):
609 return source.decode(encoding)
610 else:
611 raise ExpectedStringError(source, param)
612
613 if PY3:
614 def to_native_str(source, encoding="utf-8", param="value"):
615 if isinstance(source, bytes):
616 return source.decode(encoding)
617 elif isinstance(source, unicode):
618 return source
619 else:
620 raise ExpectedStringError(source, param)
621 else:
622 def to_native_str(source, encoding="utf-8", param="value"):
623 if isinstance(source, bytes):
624 return source
625 elif isinstance(source, unicode):
626 return source.encode(encoding)
627 else:
628 raise ExpectedStringError(source, param)
629
630 add_doc(to_native_str,
631 """Take in unicode or bytes, return native string.
632
633 Python 2: encodes unicode using specified encoding, leaves bytes alone.
634 Python 3: leaves unicode alone, decodes bytes using specified encoding.
635
636 :raises TypeError: if source is not unicode or bytes.
637
638 :arg source:
639 source unicode or bytes string.
640
641 :arg encoding:
642 encoding to use when encoding unicode or decoding bytes.
643 this defaults to ``"utf-8"``.
644
645 :param param:
646 optional name of variable/noun to reference when raising errors.
647
648 :returns: :class:`str` instance
649 """)
650
651 @deprecated_function(deprecated="1.6", removed="1.7")
652 def to_hash_str(source, encoding="ascii"): # pragma: no cover -- deprecated & unused
653 "deprecated, use to_native_str() instead"
654 return to_native_str(source, encoding, param="hash")
655
656 #=============================================================================
657 # base64-variant encoding
658 #=============================================================================
659
660 class Base64Engine(object):
661 """Provides routines for encoding/decoding base64 data using
662 arbitrary character mappings, selectable endianness, etc.
663
664 :arg charmap:
665 A string of 64 unique characters,
666 which will be used to encode successive 6-bit chunks of data.
667 A character's position within the string should correspond
668 to it's 6-bit value.
669
670 :param big:
671 Whether the encoding should be big-endian (default False).
672
673 .. note::
674 This class does not currently handle base64's padding characters
675 in any way what so ever.
676
677 Raw Bytes <-> Encoded Bytes
678 ===========================
679 The following methods convert between raw bytes,
680 and strings encoded using the engine's specific base64 variant:
681
682 .. automethod:: encode_bytes
683 .. automethod:: decode_bytes
684 .. automethod:: encode_transposed_bytes
685 .. automethod:: decode_transposed_bytes
686
687 ..
688 .. automethod:: check_repair_unused
689 .. automethod:: repair_unused
690
691 Integers <-> Encoded Bytes
692 ==========================
693 The following methods allow encoding and decoding
694 unsigned integers to and from the engine's specific base64 variant.
695 Endianess is determined by the engine's ``big`` constructor keyword.
696
697 .. automethod:: encode_int6
698 .. automethod:: decode_int6
699
700 .. automethod:: encode_int12
701 .. automethod:: decode_int12
702
703 .. automethod:: encode_int24
704 .. automethod:: decode_int24
705
706 .. automethod:: encode_int64
707 .. automethod:: decode_int64
708
709 Informational Attributes
710 ========================
711 .. attribute:: charmap
712
713 unicode string containing list of characters used in encoding;
714 position in string matches 6bit value of character.
715
716 .. attribute:: bytemap
717
718 bytes version of :attr:`charmap`
719
720 .. attribute:: big
721
722 boolean flag indicating this using big-endian encoding.
723 """
724
725 #===================================================================
726 # instance attrs
727 #===================================================================
728 # public config
729 bytemap = None # charmap as bytes
730 big = None # little or big endian
731
732 # filled in by init based on charmap.
733 # (byte elem: single byte under py2, 8bit int under py3)
734 _encode64 = None # maps 6bit value -> byte elem
735 _decode64 = None # maps byte elem -> 6bit value
736
737 # helpers filled in by init based on endianness
738 _encode_bytes = None # throws IndexError if bad value (shouldn't happen)
739 _decode_bytes = None # throws KeyError if bad char.
740
741 #===================================================================
742 # init
743 #===================================================================
744 def __init__(self, charmap, big=False):
745 # validate charmap, generate encode64/decode64 helper functions.
746 if isinstance(charmap, unicode):
747 charmap = charmap.encode("latin-1")
748 elif not isinstance(charmap, bytes):
749 raise ExpectedStringError(charmap, "charmap")
750 if len(charmap) != 64:
751 raise ValueError("charmap must be 64 characters in length")
752 if len(set(charmap)) != 64:
753 raise ValueError("charmap must not contain duplicate characters")
754 self.bytemap = charmap
755 self._encode64 = charmap.__getitem__
756 lookup = dict((value, idx) for idx, value in enumerate(charmap))
757 self._decode64 = lookup.__getitem__
758
759 # validate big, set appropriate helper functions.
760 self.big = big
761 if big:
762 self._encode_bytes = self._encode_bytes_big
763 self._decode_bytes = self._decode_bytes_big
764 else:
765 self._encode_bytes = self._encode_bytes_little
766 self._decode_bytes = self._decode_bytes_little
767
768 # TODO: support padding character
769 ##if padding is not None:
770 ## if isinstance(padding, unicode):
771 ## padding = padding.encode("latin-1")
772 ## elif not isinstance(padding, bytes):
773 ## raise TypeError("padding char must be unicode or bytes")
774 ## if len(padding) != 1:
775 ## raise ValueError("padding must be single character")
776 ##self.padding = padding
777
778 @property
779 def charmap(self):
780 "charmap as unicode"
781 return self.bytemap.decode("latin-1")
782
783 #===================================================================
784 # encoding byte strings
785 #===================================================================
786 def encode_bytes(self, source):
787 """encode bytes to base64 string.
788
789 :arg source: byte string to encode.
790 :returns: byte string containing encoded data.
791 """
792 if not isinstance(source, bytes):
793 raise TypeError("source must be bytes, not %s" % (type(source),))
794 chunks, tail = divmod(len(source), 3)
795 if PY3:
796 next_value = iter(source).__next__
797 else:
798 next_value = (ord(elem) for elem in source).next
799 gen = self._encode_bytes(next_value, chunks, tail)
800 out = join_byte_elems(imap(self._encode64, gen))
801 ##if tail:
802 ## padding = self.padding
803 ## if padding:
804 ## out += padding * (3-tail)
805 return out
806
807 def _encode_bytes_little(self, next_value, chunks, tail):
808 "helper used by encode_bytes() to handle little-endian encoding"
809 #
810 # output bit layout:
811 #
812 # first byte: v1 543210
813 #
814 # second byte: v1 ....76
815 # +v2 3210..
816 #
817 # third byte: v2 ..7654
818 # +v3 10....
819 #
820 # fourth byte: v3 765432
821 #
822 idx = 0
823 while idx < chunks:
824 v1 = next_value()
825 v2 = next_value()
826 v3 = next_value()
827 yield v1 & 0x3f
828 yield ((v2 & 0x0f)<<2)|(v1>>6)
829 yield ((v3 & 0x03)<<4)|(v2>>4)
830 yield v3>>2
831 idx += 1
832 if tail:
833 v1 = next_value()
834 if tail == 1:
835 # note: 4 msb of last byte are padding
836 yield v1 & 0x3f
837 yield v1>>6
838 else:
839 assert tail == 2
840 # note: 2 msb of last byte are padding
841 v2 = next_value()
842 yield v1 & 0x3f
843 yield ((v2 & 0x0f)<<2)|(v1>>6)
844 yield v2>>4
845
846 def _encode_bytes_big(self, next_value, chunks, tail):
847 "helper used by encode_bytes() to handle big-endian encoding"
848 #
849 # output bit layout:
850 #
851 # first byte: v1 765432
852 #
853 # second byte: v1 10....
854 # +v2 ..7654
855 #
856 # third byte: v2 3210..
857 # +v3 ....76
858 #
859 # fourth byte: v3 543210
860 #
861 idx = 0
862 while idx < chunks:
863 v1 = next_value()
864 v2 = next_value()
865 v3 = next_value()
866 yield v1>>2
867 yield ((v1&0x03)<<4)|(v2>>4)
868 yield ((v2&0x0f)<<2)|(v3>>6)
869 yield v3 & 0x3f
870 idx += 1
871 if tail:
872 v1 = next_value()
873 if tail == 1:
874 # note: 4 lsb of last byte are padding
875 yield v1>>2
876 yield (v1&0x03)<<4
877 else:
878 assert tail == 2
879 # note: 2 lsb of last byte are padding
880 v2 = next_value()
881 yield v1>>2
882 yield ((v1&0x03)<<4)|(v2>>4)
883 yield ((v2&0x0f)<<2)
884
885 #===================================================================
886 # decoding byte strings
887 #===================================================================
888
889 def decode_bytes(self, source):
890 """decode bytes from base64 string.
891
892 :arg source: byte string to decode.
893 :returns: byte string containing decoded data.
894 """
895 if not isinstance(source, bytes):
896 raise TypeError("source must be bytes, not %s" % (type(source),))
897 ##padding = self.padding
898 ##if padding:
899 ## # TODO: add padding size check?
900 ## source = source.rstrip(padding)
901 chunks, tail = divmod(len(source), 4)
902 if tail == 1:
903 # only 6 bits left, can't encode a whole byte!
904 raise ValueError("input string length cannot be == 1 mod 4")
905 next_value = getattr(imap(self._decode64, source), next_method_attr)
906 try:
907 return join_byte_values(self._decode_bytes(next_value, chunks, tail))
908 except KeyError:
909 err = exc_err()
910 raise ValueError("invalid character: %r" % (err.args[0],))
911
912 def _decode_bytes_little(self, next_value, chunks, tail):
913 "helper used by decode_bytes() to handle little-endian encoding"
914 #
915 # input bit layout:
916 #
917 # first byte: v1 ..543210
918 # +v2 10......
919 #
920 # second byte: v2 ....5432
921 # +v3 3210....
922 #
923 # third byte: v3 ......54
924 # +v4 543210..
925 #
926 idx = 0
927 while idx < chunks:
928 v1 = next_value()
929 v2 = next_value()
930 v3 = next_value()
931 v4 = next_value()
932 yield v1 | ((v2 & 0x3) << 6)
933 yield (v2>>2) | ((v3 & 0xF) << 4)
934 yield (v3>>4) | (v4<<2)
935 idx += 1
936 if tail:
937 # tail is 2 or 3
938 v1 = next_value()
939 v2 = next_value()
940 yield v1 | ((v2 & 0x3) << 6)
941 # NOTE: if tail == 2, 4 msb of v2 are ignored (should be 0)
942 if tail == 3:
943 # NOTE: 2 msb of v3 are ignored (should be 0)
944 v3 = next_value()
945 yield (v2>>2) | ((v3 & 0xF) << 4)
946
947 def _decode_bytes_big(self, next_value, chunks, tail):
948 "helper used by decode_bytes() to handle big-endian encoding"
949 #
950 # input bit layout:
951 #
952 # first byte: v1 543210..
953 # +v2 ......54
954 #
955 # second byte: v2 3210....
956 # +v3 ....5432
957 #
958 # third byte: v3 10......
959 # +v4 ..543210
960 #
961 idx = 0
962 while idx < chunks:
963 v1 = next_value()
964 v2 = next_value()
965 v3 = next_value()
966 v4 = next_value()
967 yield (v1<<2) | (v2>>4)
968 yield ((v2&0xF)<<4) | (v3>>2)
969 yield ((v3&0x3)<<6) | v4
970 idx += 1
971 if tail:
972 # tail is 2 or 3
973 v1 = next_value()
974 v2 = next_value()
975 yield (v1<<2) | (v2>>4)
976 # NOTE: if tail == 2, 4 lsb of v2 are ignored (should be 0)
977 if tail == 3:
978 # NOTE: 2 lsb of v3 are ignored (should be 0)
979 v3 = next_value()
980 yield ((v2&0xF)<<4) | (v3>>2)
981
982 #===================================================================
983 # encode/decode helpers
984 #===================================================================
985
986 # padmap2/3 - dict mapping last char of string ->
987 # equivalent char with no padding bits set.
988
989 def __make_padset(self, bits):
990 "helper to generate set of valid last chars & bytes"
991 pset = set(c for i,c in enumerate(self.bytemap) if not i & bits)
992 pset.update(c for i,c in enumerate(self.charmap) if not i & bits)
993 return frozenset(pset)
994
995 @memoized_property
996 def _padinfo2(self):
997 "mask to clear padding bits, and valid last bytes (for strings 2 % 4)"
998 # 4 bits of last char unused (lsb for big, msb for little)
999 bits = 15 if self.big else (15<<2)
1000 return ~bits, self.__make_padset(bits)
1001
1002 @memoized_property
1003 def _padinfo3(self):
1004 "mask to clear padding bits, and valid last bytes (for strings 3 % 4)"
1005 # 2 bits of last char unused (lsb for big, msb for little)
1006 bits = 3 if self.big else (3<<4)
1007 return ~bits, self.__make_padset(bits)
1008
1009 def check_repair_unused(self, source):
1010 """helper to detect & clear invalid unused bits in last character.
1011
1012 :arg source:
1013 encoded data (as ascii bytes or unicode).
1014
1015 :returns:
1016 `(True, result)` if the string was repaired,
1017 `(False, source)` if the string was ok as-is.
1018 """
1019 # figure out how many padding bits there are in last char.
1020 tail = len(source) & 3
1021 if tail == 2:
1022 mask, padset = self._padinfo2
1023 elif tail == 3:
1024 mask, padset = self._padinfo3
1025 elif not tail:
1026 return False, source
1027 else:
1028 raise ValueError("source length must != 1 mod 4")
1029
1030 # check if last char is ok (padset contains bytes & unicode versions)
1031 last = source[-1]
1032 if last in padset:
1033 return False, source
1034
1035 # we have dirty bits - repair the string by decoding last char,
1036 # clearing the padding bits via <mask>, and encoding new char.
1037 if isinstance(source, unicode):
1038 cm = self.charmap
1039 last = cm[cm.index(last) & mask]
1040 assert last in padset, "failed to generate valid padding char"
1041 else:
1042 # NOTE: this assumes ascii-compat encoding, and that
1043 # all chars used by encoding are 7-bit ascii.
1044 last = self._encode64(self._decode64(last) & mask)
1045 assert last in padset, "failed to generate valid padding char"
1046 if PY3:
1047 last = bytes([last])
1048 return True, source[:-1] + last
1049
1050 def repair_unused(self, source):
1051 return self.check_repair_unused(source)[1]
1052
1053 ##def transcode(self, source, other):
1054 ## return ''.join(
1055 ## other.charmap[self.charmap.index(char)]
1056 ## for char in source
1057 ## )
1058
1059 ##def random_encoded_bytes(self, size, random=None, unicode=False):
1060 ## "return random encoded string of given size"
1061 ## data = getrandstr(random or rng,
1062 ## self.charmap if unicode else self.bytemap, size)
1063 ## return self.repair_unused(data)
1064
1065 #===================================================================
1066 # transposed encoding/decoding
1067 #===================================================================
1068 def encode_transposed_bytes(self, source, offsets):
1069 "encode byte string, first transposing source using offset list"
1070 if not isinstance(source, bytes):
1071 raise TypeError("source must be bytes, not %s" % (type(source),))
1072 tmp = join_byte_elems(source[off] for off in offsets)
1073 return self.encode_bytes(tmp)
1074
1075 def decode_transposed_bytes(self, source, offsets):
1076 "decode byte string, then reverse transposition described by offset list"
1077 # NOTE: if transposition does not use all bytes of source,
1078 # the original can't be recovered... and join_byte_elems() will throw
1079 # an error because 1+ values in <buf> will be None.
1080 tmp = self.decode_bytes(source)
1081 buf = [None] * len(offsets)
1082 for off, char in zip(offsets, tmp):
1083 buf[off] = char
1084 return join_byte_elems(buf)
1085
1086 #===================================================================
1087 # integer decoding helpers - mainly used by des_crypt family
1088 #===================================================================
1089 def _decode_int(self, source, bits):
1090 """decode base64 string -> integer
1091
1092 :arg source: base64 string to decode.
1093 :arg bits: number of bits in resulting integer.
1094
1095 :raises ValueError:
1096 * if the string contains invalid base64 characters.
1097 * if the string is not long enough - it must be at least
1098 ``int(ceil(bits/6))`` in length.
1099
1100 :returns:
1101 a integer in the range ``0 <= n < 2**bits``
1102 """
1103 if not isinstance(source, bytes):
1104 raise TypeError("source must be bytes, not %s" % (type(source),))
1105 big = self.big
1106 pad = -bits % 6
1107 chars = (bits+pad)/6
1108 if len(source) != chars:
1109 raise ValueError("source must be %d chars" % (chars,))
1110 decode = self._decode64
1111 out = 0
1112 try:
1113 for c in source if big else reversed(source):
1114 out = (out<<6) + decode(c)
1115 except KeyError:
1116 raise ValueError("invalid character in string: %r" % (c,))
1117 if pad:
1118 # strip padding bits
1119 if big:
1120 out >>= pad
1121 else:
1122 out &= (1<<bits)-1
1123 return out
1124
1125 #---------------------------------------------------------------
1126 # optimized versions for common integer sizes
1127 #---------------------------------------------------------------
1128
1129 def decode_int6(self, source):
1130 "decode single character -> 6 bit integer"
1131 if not isinstance(source, bytes):
1132 raise TypeError("source must be bytes, not %s" % (type(source),))
1133 if len(source) != 1:
1134 raise ValueError("source must be exactly 1 byte")
1135 if PY3:
1136 # convert to 8bit int before doing lookup
1137 source = source[0]
1138 try:
1139 return self._decode64(source)
1140 except KeyError:
1141 raise ValueError("invalid character")
1142
1143 def decode_int12(self, source):
1144 "decodes 2 char string -> 12-bit integer"
1145 if not isinstance(source, bytes):
1146 raise TypeError("source must be bytes, not %s" % (type(source),))
1147 if len(source) != 2:
1148 raise ValueError("source must be exactly 2 bytes")
1149 decode = self._decode64
1150 try:
1151 if self.big:
1152 return decode(source[1]) + (decode(source[0])<<6)
1153 else:
1154 return decode(source[0]) + (decode(source[1])<<6)
1155 except KeyError:
1156 raise ValueError("invalid character")
1157
1158 def decode_int24(self, source):
1159 "decodes 4 char string -> 24-bit integer"
1160 if not isinstance(source, bytes):
1161 raise TypeError("source must be bytes, not %s" % (type(source),))
1162 if len(source) != 4:
1163 raise ValueError("source must be exactly 4 bytes")
1164 decode = self._decode64
1165 try:
1166 if self.big:
1167 return decode(source[3]) + (decode(source[2])<<6)+ \
1168 (decode(source[1])<<12) + (decode(source[0])<<18)
1169 else:
1170 return decode(source[0]) + (decode(source[1])<<6)+ \
1171 (decode(source[2])<<12) + (decode(source[3])<<18)
1172 except KeyError:
1173 raise ValueError("invalid character")
1174
1175 def decode_int64(self, source):
1176 """decode 11 char base64 string -> 64-bit integer
1177
1178 this format is used primarily by des-crypt & variants to encode
1179 the DES output value used as a checksum.
1180 """
1181 return self._decode_int(source, 64)
1182
1183 #===================================================================
1184 # integer encoding helpers - mainly used by des_crypt family
1185 #===================================================================
1186 def _encode_int(self, value, bits):
1187 """encode integer into base64 format
1188
1189 :arg value: non-negative integer to encode
1190 :arg bits: number of bits to encode
1191
1192 :returns:
1193 a string of length ``int(ceil(bits/6.0))``.
1194 """
1195 assert value >= 0, "caller did not sanitize input"
1196 pad = -bits % 6
1197 bits += pad
1198 if self.big:
1199 itr = irange(bits-6, -6, -6)
1200 # shift to add lsb padding.
1201 value <<= pad
1202 else:
1203 itr = irange(0, bits, 6)
1204 # padding is msb, so no change needed.
1205 return join_byte_elems(imap(self._encode64,
1206 ((value>>off) & 0x3f for off in itr)))
1207
1208 #---------------------------------------------------------------
1209 # optimized versions for common integer sizes
1210 #---------------------------------------------------------------
1211
1212 def encode_int6(self, value):
1213 "encodes 6-bit integer -> single hash64 character"
1214 if value < 0 or value > 63:
1215 raise ValueError("value out of range")
1216 if PY3:
1217 return self.bytemap[value:value+1]
1218 else:
1219 return self._encode64(value)
1220
1221 def encode_int12(self, value):
1222 "encodes 12-bit integer -> 2 char string"
1223 if value < 0 or value > 0xFFF:
1224 raise ValueError("value out of range")
1225 raw = [value & 0x3f, (value>>6) & 0x3f]
1226 if self.big:
1227 raw = reversed(raw)
1228 return join_byte_elems(imap(self._encode64, raw))
1229
1230 def encode_int24(self, value):
1231 "encodes 24-bit integer -> 4 char string"
1232 if value < 0 or value > 0xFFFFFF:
1233 raise ValueError("value out of range")
1234 raw = [value & 0x3f, (value>>6) & 0x3f,
1235 (value>>12) & 0x3f, (value>>18) & 0x3f]
1236 if self.big:
1237 raw = reversed(raw)
1238 return join_byte_elems(imap(self._encode64, raw))
1239
1240 def encode_int64(self, value):
1241 """encode 64-bit integer -> 11 char hash64 string
1242
1243 this format is used primarily by des-crypt & variants to encode
1244 the DES output value used as a checksum.
1245 """
1246 if value < 0 or value > 0xffffffffffffffff:
1247 raise ValueError("value out of range")
1248 return self._encode_int(value, 64)
1249
1250 #===================================================================
1251 # eof
1252 #===================================================================
1253
1254 class LazyBase64Engine(Base64Engine):
1255 "Base64Engine which delays initialization until it's accessed"
1256 _lazy_opts = None
1257
1258 def __init__(self, *args, **kwds):
1259 self._lazy_opts = (args, kwds)
1260
1261 def _lazy_init(self):
1262 args, kwds = self._lazy_opts
1263 super(LazyBase64Engine, self).__init__(*args, **kwds)
1264 del self._lazy_opts
1265 self.__class__ = Base64Engine
1266
1267 def __getattribute__(self, attr):
1268 if not attr.startswith("_"):
1269 self._lazy_init()
1270 return object.__getattribute__(self, attr)
1271
1272 # common charmaps
1273 BASE64_CHARS = u("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
1274 AB64_CHARS = u("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789./")
1275 HASH64_CHARS = u("./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
1276 BCRYPT_CHARS = u("./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
1277
1278 # common variants
1279 h64 = LazyBase64Engine(HASH64_CHARS)
1280 h64big = LazyBase64Engine(HASH64_CHARS, big=True)
1281 bcrypt64 = LazyBase64Engine(BCRYPT_CHARS, big=True)
1282
1283 #=============================================================================
1284 # adapted-base64 encoding
1285 #=============================================================================
1286 _A64_ALTCHARS = b("./")
1287 _A64_STRIP = b("=\n")
1288 _A64_PAD1 = b("=")
1289 _A64_PAD2 = b("==")
1290
1291 def ab64_encode(data):
1292 """encode using variant of base64
1293
1294 the output of this function is identical to stdlib's b64_encode,
1295 except that it uses ``.`` instead of ``+``,
1296 and omits trailing padding ``=`` and whitepsace.
1297
1298 it is primarily used by Passlib's custom pbkdf2 hashes.
1299 """
1300 return b64encode(data, _A64_ALTCHARS).strip(_A64_STRIP)
1301
1302 def ab64_decode(data):
1303 """decode using variant of base64
1304
1305 the input of this function is identical to stdlib's b64_decode,
1306 except that it uses ``.`` instead of ``+``,
1307 and should not include trailing padding ``=`` or whitespace.
1308
1309 it is primarily used by Passlib's custom pbkdf2 hashes.
1310 """
1311 off = len(data) & 3
1312 if off == 0:
1313 return b64decode(data, _A64_ALTCHARS)
1314 elif off == 2:
1315 return b64decode(data + _A64_PAD2, _A64_ALTCHARS)
1316 elif off == 3:
1317 return b64decode(data + _A64_PAD1, _A64_ALTCHARS)
1318 else: # off == 1
1319 raise ValueError("invalid base64 input")
1320
1321 #=============================================================================
1322 # host OS helpers
1323 #=============================================================================
1324
1325 try:
1326 from crypt import crypt as _crypt
1327 except ImportError: # pragma: no cover
1328 has_crypt = False
1329 def safe_crypt(secret, hash):
1330 return None
1331 else:
1332 has_crypt = True
1333 _NULL = '\x00'
1334
1335 # some crypt() variants will return various constant strings when
1336 # an invalid/unrecognized config string is passed in; instead of
1337 # returning NULL / None. examples include ":", ":0", "*0", etc.
1338 # safe_crypt() returns None for any string starting with one of the
1339 # chars in this string...
1340 _invalid_prefixes = u("*:!")
1341
1342 if PY3:
1343 def safe_crypt(secret, hash):
1344 if isinstance(secret, bytes):
1345 # Python 3's crypt() only accepts unicode, which is then
1346 # encoding using utf-8 before passing to the C-level crypt().
1347 # so we have to decode the secret.
1348 orig = secret
1349 try:
1350 secret = secret.decode("utf-8")
1351 except UnicodeDecodeError:
1352 return None
1353 assert secret.encode("utf-8") == orig, \
1354 "utf-8 spec says this can't happen!"
1355 if _NULL in secret:
1356 raise ValueError("null character in secret")
1357 if isinstance(hash, bytes):
1358 hash = hash.decode("ascii")
1359 result = _crypt(secret, hash)
1360 if not result or result[0] in _invalid_prefixes:
1361 return None
1362 return result
1363 else:
1364 def safe_crypt(secret, hash):
1365 if isinstance(secret, unicode):
1366 secret = secret.encode("utf-8")
1367 if _NULL in secret:
1368 raise ValueError("null character in secret")
1369 if isinstance(hash, unicode):
1370 hash = hash.encode("ascii")
1371 result = _crypt(secret, hash)
1372 if not result:
1373 return None
1374 result = result.decode("ascii")
1375 if result[0] in _invalid_prefixes:
1376 return None
1377 return result
1378
1379 add_doc(safe_crypt, """Wrapper around stdlib's crypt.
1380
1381 This is a wrapper around stdlib's :func:`!crypt.crypt`, which attempts
1382 to provide uniform behavior across Python 2 and 3.
1383
1384 :arg secret:
1385 password, as bytes or unicode (unicode will be encoded as ``utf-8``).
1386
1387 :arg hash:
1388 hash or config string, as ascii bytes or unicode.
1389
1390 :returns:
1391 resulting hash as ascii unicode; or ``None`` if the password
1392 couldn't be hashed due to one of the issues:
1393
1394 * :func:`crypt()` not available on platform.
1395
1396 * Under Python 3, if *secret* is specified as bytes,
1397 it must be use ``utf-8`` or it can't be passed
1398 to :func:`crypt()`.
1399
1400 * Some OSes will return ``None`` if they don't recognize
1401 the algorithm being used (though most will simply fall
1402 back to des-crypt).
1403
1404 * Some OSes will return an error string if the input config
1405 is recognized but malformed; current code converts these to ``None``
1406 as well.
1407 """)
1408
1409 def test_crypt(secret, hash):
1410 """check if :func:`crypt.crypt` supports specific hash
1411 :arg secret: password to test
1412 :arg hash: known hash of password to use as reference
1413 :returns: True or False
1414 """
1415 assert secret and hash
1416 return safe_crypt(secret, hash) == hash
1417
1418 # pick best timer function to expose as "tick" - lifted from timeit module.
1419 if sys.platform == "win32":
1420 # On Windows, the best timer is time.clock()
1421 from time import clock as tick
1422 else:
1423 # On most other platforms the best timer is time.time()
1424 from time import time as tick
1425
1426 #=============================================================================
1427 # randomness
1428 #=============================================================================
1429
1430 #------------------------------------------------------------------------
1431 # setup rng for generating salts
1432 #------------------------------------------------------------------------
1433
1434 # NOTE:
1435 # generating salts (e.g. h64_gensalt, below) doesn't require cryptographically
1436 # strong randomness. it just requires enough range of possible outputs
1437 # that making a rainbow table is too costly. so it should be ok to
1438 # fall back on python's builtin mersenne twister prng, as long as it's seeded each time
1439 # this module is imported, using a couple of minor entropy sources.
1440
1441 try:
1442 os.urandom(1)
1443 has_urandom = True
1444 except NotImplementedError: # pragma: no cover
1445 has_urandom = False
1446
1447 def genseed(value=None):
1448 "generate prng seed value from system resources"
1449 # if value is rng, extract a bunch of bits from it's state
1450 from hashlib import sha512
1451 if hasattr(value, "getrandbits"):
1452 value = value.getrandbits(1<<15)
1453 text = u("%s %s %s %.15f %.15f %s") % (
1454 # if caller specified a seed value (e.g. current rng state), mix it in
1455 value,
1456
1457 # add current process id
1458 # NOTE: not available in some environments, e.g. GAE
1459 os.getpid() if hasattr(os, "getpid") else None,
1460
1461 # id of a freshly created object.
1462 # (at least 1 byte of which should be hard to predict)
1463 id(object()),
1464
1465 # the current time, to whatever precision os uses
1466 time.time(),
1467 time.clock(),
1468
1469 # if urandom available, might as well mix some bytes in.
1470 os.urandom(32).decode("latin-1") if has_urandom else 0,
1471 )
1472 # hash it all up and return it as int/long
1473 return int(sha512(text.encode("utf-8")).hexdigest(), 16)
1474
1475 if has_urandom:
1476 rng = random.SystemRandom()
1477 else: # pragma: no cover -- runtime detection
1478 # NOTE: to reseed use ``rng.seed(genseed(rng))``
1479 rng = random.Random(genseed())
1480
1481 #------------------------------------------------------------------------
1482 # some rng helpers
1483 #------------------------------------------------------------------------
1484 def getrandbytes(rng, count):
1485 """return byte-string containing *count* number of randomly generated bytes, using specified rng"""
1486 # NOTE: would be nice if this was present in stdlib Random class
1487
1488 ###just in case rng provides this...
1489 ##meth = getattr(rng, "getrandbytes", None)
1490 ##if meth:
1491 ## return meth(count)
1492
1493 if not count:
1494 return _BEMPTY
1495 def helper():
1496 # XXX: break into chunks for large number of bits?
1497 value = rng.getrandbits(count<<3)
1498 i = 0
1499 while i < count:
1500 yield value & 0xff
1501 value >>= 3
1502 i += 1
1503 return join_byte_values(helper())
1504
1505 def getrandstr(rng, charset, count):
1506 """return string containing *count* number of chars/bytes, whose elements are drawn from specified charset, using specified rng"""
1507 # NOTE: tests determined this is 4x faster than rng.sample(),
1508 # which is why that's not being used here.
1509
1510 # check alphabet & count
1511 if count < 0:
1512 raise ValueError("count must be >= 0")
1513 letters = len(charset)
1514 if letters == 0:
1515 raise ValueError("alphabet must not be empty")
1516 if letters == 1:
1517 return charset * count
1518
1519 # get random value, and write out to buffer
1520 def helper():
1521 # XXX: break into chunks for large number of letters?
1522 value = rng.randrange(0, letters**count)
1523 i = 0
1524 while i < count:
1525 yield charset[value % letters]
1526 value //= letters
1527 i += 1
1528
1529 if isinstance(charset, unicode):
1530 return join_unicode(helper())
1531 else:
1532 return join_byte_elems(helper())
1533
1534 _52charset = '2346789ABCDEFGHJKMNPQRTUVWXYZabcdefghjkmnpqrstuvwxyz'
1535
1536 def generate_password(size=10, charset=_52charset):
1537 """generate random password using given length & charset
1538
1539 :param size:
1540 size of password.
1541
1542 :param charset:
1543 optional string specified set of characters to draw from.
1544
1545 the default charset contains all normal alphanumeric characters,
1546 except for the characters ``1IiLl0OoS5``, which were omitted
1547 due to their visual similarity.
1548
1549 :returns: :class:`!str` containing randomly generated password.
1550
1551 .. note::
1552
1553 Using the default character set, on a OS with :class:`!SystemRandom` support,
1554 this function should generate passwords with 5.7 bits of entropy per character.
1555 """
1556 return getrandstr(rng, charset, size)
1557
1558 #=============================================================================
1559 # object type / interface tests
1560 #=============================================================================
1561 _handler_attrs = (
1562 "name",
1563 "setting_kwds", "context_kwds",
1564 "genconfig", "genhash",
1565 "verify", "encrypt", "identify",
1566 )
1567
1568 def is_crypt_handler(obj):
1569 "check if object follows the :ref:`password-hash-api`"
1570 # XXX: change to use isinstance(obj, PasswordHash) under py26+?
1571 return all(hasattr(obj, name) for name in _handler_attrs)
1572
1573 _context_attrs = (
1574 "needs_update",
1575 "genconfig", "genhash",
1576 "verify", "encrypt", "identify",
1577 )
1578
1579 def is_crypt_context(obj):
1580 "check if object appears to be a :class:`~passlib.context.CryptContext` instance"
1581 # XXX: change to use isinstance(obj, CryptContext)?
1582 return all(hasattr(obj, name) for name in _context_attrs)
1583
1584 ##def has_many_backends(handler):
1585 ## "check if handler provides multiple baceknds"
1586 ## # NOTE: should also provide get_backend(), .has_backend(), and .backends attr
1587 ## return hasattr(handler, "set_backend")
1588
1589 def has_rounds_info(handler):
1590 "check if handler provides the optional :ref:`rounds information <rounds-attributes>` attributes"
1591 return ('rounds' in handler.setting_kwds and
1592 getattr(handler, "min_rounds", None) is not None)
1593
1594 def has_salt_info(handler):
1595 "check if handler provides the optional :ref:`salt information <salt-attributes>` attributes"
1596 return ('salt' in handler.setting_kwds and
1597 getattr(handler, "min_salt_size", None) is not None)
1598
1599 ##def has_raw_salt(handler):
1600 ## "check if handler takes in encoded salt as unicode (False), or decoded salt as bytes (True)"
1601 ## sc = getattr(handler, "salt_chars", None)
1602 ## if sc is None:
1603 ## return None
1604 ## elif isinstance(sc, unicode):
1605 ## return False
1606 ## elif isinstance(sc, bytes):
1607 ## return True
1608 ## else:
1609 ## raise TypeError("handler.salt_chars must be None/unicode/bytes")
1610
1611 #=============================================================================
1612 # eof
1613 #=============================================================================