123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- # -*- coding: utf-8 -*-
- """
- sleekxmpp.util.stringprep_profiles
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- This module makes it easier to define profiles of stringprep,
- such as nodeprep and resourceprep for JID validation, and
- SASLprep for SASL.
- Part of SleekXMPP: The Sleek XMPP Library
- :copyright: (c) 2012 Nathanael C. Fritz, Lance J.T. Stout
- :license: MIT, see LICENSE for more details
- """
- from __future__ import unicode_literals
- import stringprep
- from unicodedata import ucd_3_2_0 as unicodedata
- from sleekxmpp.util import unicode
- class StringPrepError(UnicodeError):
- pass
- def b1_mapping(char):
- """Map characters that are commonly mapped to nothing."""
- return '' if stringprep.in_table_b1(char) else None
- def c12_mapping(char):
- """Map non-ASCII whitespace to spaces."""
- return ' ' if stringprep.in_table_c12(char) else None
- def map_input(data, tables=None):
- """
- Each character in the input stream MUST be checked against
- a mapping table.
- """
- result = []
- for char in data:
- replacement = None
- for mapping in tables:
- replacement = mapping(char)
- if replacement is not None:
- break
- if replacement is None:
- replacement = char
- result.append(replacement)
- return ''.join(result)
- def normalize(data, nfkc=True):
- """
- A profile can specify one of two options for Unicode normalization:
- - no normalization
- - Unicode normalization with form KC
- """
- if nfkc:
- data = unicodedata.normalize('NFKC', data)
- return data
- def prohibit_output(data, tables=None):
- """
- Before the text can be emitted, it MUST be checked for prohibited
- code points.
- """
- for char in data:
- for check in tables:
- if check(char):
- raise StringPrepError("Prohibited code point: %s" % char)
- def check_bidi(data):
- """
- 1) The characters in section 5.8 MUST be prohibited.
- 2) If a string contains any RandALCat character, the string MUST NOT
- contain any LCat character.
- 3) If a string contains any RandALCat character, a RandALCat
- character MUST be the first character of the string, and a
- RandALCat character MUST be the last character of the string.
- """
- if not data:
- return data
- has_lcat = False
- has_randal = False
- for c in data:
- if stringprep.in_table_c8(c):
- raise StringPrepError("BIDI violation: seciton 6 (1)")
- if stringprep.in_table_d1(c):
- has_randal = True
- elif stringprep.in_table_d2(c):
- has_lcat = True
- if has_randal and has_lcat:
- raise StringPrepError("BIDI violation: section 6 (2)")
- first_randal = stringprep.in_table_d1(data[0])
- last_randal = stringprep.in_table_d1(data[-1])
- if has_randal and not (first_randal and last_randal):
- raise StringPrepError("BIDI violation: section 6 (3)")
- def create(nfkc=True, bidi=True, mappings=None,
- prohibited=None, unassigned=None):
- """Create a profile of stringprep.
- :param bool nfkc:
- If `True`, perform NFKC Unicode normalization. Defaults to `True`.
- :param bool bidi:
- If `True`, perform bidirectional text checks. Defaults to `True`.
- :param list mappings:
- Optional list of functions for mapping characters to
- suitable replacements.
- :param list prohibited:
- Optional list of functions which check for the presence of
- prohibited characters.
- :param list unassigned:
- Optional list of functions for detecting the use of unassigned
- code points.
- :raises: StringPrepError
- :return: Unicode string of the resulting text passing the
- profile's requirements.
- """
- def profile(data, query=False):
- try:
- data = unicode(data)
- except UnicodeError:
- raise StringPrepError
- data = map_input(data, mappings)
- data = normalize(data, nfkc)
- prohibit_output(data, prohibited)
- if bidi:
- check_bidi(data)
- if query and unassigned:
- check_unassigned(data, unassigned)
- return data
- return profile
|