stringprep_profiles.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. # -*- coding: utf-8 -*-
  2. """
  3. sleekxmpp.util.stringprep_profiles
  4. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  5. This module makes it easier to define profiles of stringprep,
  6. such as nodeprep and resourceprep for JID validation, and
  7. SASLprep for SASL.
  8. Part of SleekXMPP: The Sleek XMPP Library
  9. :copyright: (c) 2012 Nathanael C. Fritz, Lance J.T. Stout
  10. :license: MIT, see LICENSE for more details
  11. """
  12. from __future__ import unicode_literals
  13. import stringprep
  14. from unicodedata import ucd_3_2_0 as unicodedata
  15. from sleekxmpp.util import unicode
  16. class StringPrepError(UnicodeError):
  17. pass
  18. def b1_mapping(char):
  19. """Map characters that are commonly mapped to nothing."""
  20. return '' if stringprep.in_table_b1(char) else None
  21. def c12_mapping(char):
  22. """Map non-ASCII whitespace to spaces."""
  23. return ' ' if stringprep.in_table_c12(char) else None
  24. def map_input(data, tables=None):
  25. """
  26. Each character in the input stream MUST be checked against
  27. a mapping table.
  28. """
  29. result = []
  30. for char in data:
  31. replacement = None
  32. for mapping in tables:
  33. replacement = mapping(char)
  34. if replacement is not None:
  35. break
  36. if replacement is None:
  37. replacement = char
  38. result.append(replacement)
  39. return ''.join(result)
  40. def normalize(data, nfkc=True):
  41. """
  42. A profile can specify one of two options for Unicode normalization:
  43. - no normalization
  44. - Unicode normalization with form KC
  45. """
  46. if nfkc:
  47. data = unicodedata.normalize('NFKC', data)
  48. return data
  49. def prohibit_output(data, tables=None):
  50. """
  51. Before the text can be emitted, it MUST be checked for prohibited
  52. code points.
  53. """
  54. for char in data:
  55. for check in tables:
  56. if check(char):
  57. raise StringPrepError("Prohibited code point: %s" % char)
  58. def check_bidi(data):
  59. """
  60. 1) The characters in section 5.8 MUST be prohibited.
  61. 2) If a string contains any RandALCat character, the string MUST NOT
  62. contain any LCat character.
  63. 3) If a string contains any RandALCat character, a RandALCat
  64. character MUST be the first character of the string, and a
  65. RandALCat character MUST be the last character of the string.
  66. """
  67. if not data:
  68. return data
  69. has_lcat = False
  70. has_randal = False
  71. for c in data:
  72. if stringprep.in_table_c8(c):
  73. raise StringPrepError("BIDI violation: seciton 6 (1)")
  74. if stringprep.in_table_d1(c):
  75. has_randal = True
  76. elif stringprep.in_table_d2(c):
  77. has_lcat = True
  78. if has_randal and has_lcat:
  79. raise StringPrepError("BIDI violation: section 6 (2)")
  80. first_randal = stringprep.in_table_d1(data[0])
  81. last_randal = stringprep.in_table_d1(data[-1])
  82. if has_randal and not (first_randal and last_randal):
  83. raise StringPrepError("BIDI violation: section 6 (3)")
  84. def create(nfkc=True, bidi=True, mappings=None,
  85. prohibited=None, unassigned=None):
  86. """Create a profile of stringprep.
  87. :param bool nfkc:
  88. If `True`, perform NFKC Unicode normalization. Defaults to `True`.
  89. :param bool bidi:
  90. If `True`, perform bidirectional text checks. Defaults to `True`.
  91. :param list mappings:
  92. Optional list of functions for mapping characters to
  93. suitable replacements.
  94. :param list prohibited:
  95. Optional list of functions which check for the presence of
  96. prohibited characters.
  97. :param list unassigned:
  98. Optional list of functions for detecting the use of unassigned
  99. code points.
  100. :raises: StringPrepError
  101. :return: Unicode string of the resulting text passing the
  102. profile's requirements.
  103. """
  104. def profile(data, query=False):
  105. try:
  106. data = unicode(data)
  107. except UnicodeError:
  108. raise StringPrepError
  109. data = map_input(data, mappings)
  110. data = normalize(data, nfkc)
  111. prohibit_output(data, prohibited)
  112. if bidi:
  113. check_bidi(data)
  114. if query and unassigned:
  115. check_unassigned(data, unassigned)
  116. return data
  117. return profile