jid.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632
  1. # -*- coding: utf-8 -*-
  2. """
  3. sleekxmpp.jid
  4. ~~~~~~~~~~~~~~~~~~~~~~~
  5. This module allows for working with Jabber IDs (JIDs).
  6. Part of SleekXMPP: The Sleek XMPP Library
  7. :copyright: (c) 2011 Nathanael C. Fritz
  8. :license: MIT, see LICENSE for more details
  9. """
  10. from __future__ import unicode_literals
  11. import re
  12. import socket
  13. import stringprep
  14. import threading
  15. import encodings.idna
  16. from copy import deepcopy
  17. from sleekxmpp.util import stringprep_profiles
  18. from sleekxmpp.thirdparty import OrderedDict
  19. #: These characters are not allowed to appear in a JID.
  20. ILLEGAL_CHARS = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r' + \
  21. '\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19' + \
  22. '\x1a\x1b\x1c\x1d\x1e\x1f' + \
  23. ' !"#$%&\'()*+,./:;<=>?@[\\]^_`{|}~\x7f'
  24. #: The basic regex pattern that a JID must match in order to determine
  25. #: the local, domain, and resource parts. This regex does NOT do any
  26. #: validation, which requires application of nodeprep, resourceprep, etc.
  27. JID_PATTERN = re.compile(
  28. "^(?:([^\"&'/:<>@]{1,1023})@)?([^/@]{1,1023})(?:/(.{1,1023}))?$"
  29. )
  30. #: The set of escape sequences for the characters not allowed by nodeprep.
  31. JID_ESCAPE_SEQUENCES = set(['\\20', '\\22', '\\26', '\\27', '\\2f',
  32. '\\3a', '\\3c', '\\3e', '\\40', '\\5c'])
  33. #: A mapping of unallowed characters to their escape sequences. An escape
  34. #: sequence for '\' is also included since it must also be escaped in
  35. #: certain situations.
  36. JID_ESCAPE_TRANSFORMATIONS = {' ': '\\20',
  37. '"': '\\22',
  38. '&': '\\26',
  39. "'": '\\27',
  40. '/': '\\2f',
  41. ':': '\\3a',
  42. '<': '\\3c',
  43. '>': '\\3e',
  44. '@': '\\40',
  45. '\\': '\\5c'}
  46. #: The reverse mapping of escape sequences to their original forms.
  47. JID_UNESCAPE_TRANSFORMATIONS = {'\\20': ' ',
  48. '\\22': '"',
  49. '\\26': '&',
  50. '\\27': "'",
  51. '\\2f': '/',
  52. '\\3a': ':',
  53. '\\3c': '<',
  54. '\\3e': '>',
  55. '\\40': '@',
  56. '\\5c': '\\'}
  57. JID_CACHE = OrderedDict()
  58. JID_CACHE_LOCK = threading.Lock()
  59. JID_CACHE_MAX_SIZE = 1024
  60. def _cache(key, parts, locked):
  61. with JID_CACHE_LOCK:
  62. JID_CACHE[key] = (parts, locked)
  63. while len(JID_CACHE) > JID_CACHE_MAX_SIZE:
  64. found = None
  65. for key, item in JID_CACHE.items():
  66. if not item[1]: # if not locked
  67. found = key
  68. break
  69. if not found: # more than MAX_SIZE locked
  70. # warn?
  71. break
  72. del JID_CACHE[found]
  73. # pylint: disable=c0103
  74. #: The nodeprep profile of stringprep used to validate the local,
  75. #: or username, portion of a JID.
  76. nodeprep = stringprep_profiles.create(
  77. nfkc=True,
  78. bidi=True,
  79. mappings=[
  80. stringprep_profiles.b1_mapping,
  81. stringprep.map_table_b2],
  82. prohibited=[
  83. stringprep.in_table_c11,
  84. stringprep.in_table_c12,
  85. stringprep.in_table_c21,
  86. stringprep.in_table_c22,
  87. stringprep.in_table_c3,
  88. stringprep.in_table_c4,
  89. stringprep.in_table_c5,
  90. stringprep.in_table_c6,
  91. stringprep.in_table_c7,
  92. stringprep.in_table_c8,
  93. stringprep.in_table_c9,
  94. lambda c: c in ' \'"&/:<>@'],
  95. unassigned=[stringprep.in_table_a1])
  96. # pylint: disable=c0103
  97. #: The resourceprep profile of stringprep, which is used to validate
  98. #: the resource portion of a JID.
  99. resourceprep = stringprep_profiles.create(
  100. nfkc=True,
  101. bidi=True,
  102. mappings=[stringprep_profiles.b1_mapping],
  103. prohibited=[
  104. stringprep.in_table_c12,
  105. stringprep.in_table_c21,
  106. stringprep.in_table_c22,
  107. stringprep.in_table_c3,
  108. stringprep.in_table_c4,
  109. stringprep.in_table_c5,
  110. stringprep.in_table_c6,
  111. stringprep.in_table_c7,
  112. stringprep.in_table_c8,
  113. stringprep.in_table_c9],
  114. unassigned=[stringprep.in_table_a1])
  115. def _parse_jid(data):
  116. """
  117. Parse string data into the node, domain, and resource
  118. components of a JID, if possible.
  119. :param string data: A string that is potentially a JID.
  120. :raises InvalidJID:
  121. :returns: tuple of the validated local, domain, and resource strings
  122. """
  123. match = JID_PATTERN.match(data)
  124. if not match:
  125. raise InvalidJID('JID could not be parsed')
  126. (node, domain, resource) = match.groups()
  127. node = _validate_node(node)
  128. domain = _validate_domain(domain)
  129. resource = _validate_resource(resource)
  130. return node, domain, resource
  131. def _validate_node(node):
  132. """Validate the local, or username, portion of a JID.
  133. :raises InvalidJID:
  134. :returns: The local portion of a JID, as validated by nodeprep.
  135. """
  136. try:
  137. if node is not None:
  138. node = nodeprep(node)
  139. if not node:
  140. raise InvalidJID('Localpart must not be 0 bytes')
  141. if len(node) > 1023:
  142. raise InvalidJID('Localpart must be less than 1024 bytes')
  143. return node
  144. except stringprep_profiles.StringPrepError:
  145. raise InvalidJID('Invalid local part')
  146. def _validate_domain(domain):
  147. """Validate the domain portion of a JID.
  148. IP literal addresses are left as-is, if valid. Domain names
  149. are stripped of any trailing label separators (`.`), and are
  150. checked with the nameprep profile of stringprep. If the given
  151. domain is actually a punyencoded version of a domain name, it
  152. is converted back into its original Unicode form. Domains must
  153. also not start or end with a dash (`-`).
  154. :raises InvalidJID:
  155. :returns: The validated domain name
  156. """
  157. ip_addr = False
  158. # First, check if this is an IPv4 address
  159. try:
  160. socket.inet_aton(domain)
  161. ip_addr = True
  162. except socket.error:
  163. pass
  164. # Check if this is an IPv6 address
  165. if not ip_addr and hasattr(socket, 'inet_pton'):
  166. try:
  167. socket.inet_pton(socket.AF_INET6, domain.strip('[]'))
  168. domain = '[%s]' % domain.strip('[]')
  169. ip_addr = True
  170. except (socket.error, ValueError):
  171. pass
  172. if not ip_addr:
  173. # This is a domain name, which must be checked further
  174. if domain and domain[-1] == '.':
  175. domain = domain[:-1]
  176. domain_parts = []
  177. for label in domain.split('.'):
  178. try:
  179. label = encodings.idna.nameprep(label)
  180. encodings.idna.ToASCII(label)
  181. pass_nameprep = True
  182. except UnicodeError:
  183. pass_nameprep = False
  184. if not pass_nameprep:
  185. raise InvalidJID('Could not encode domain as ASCII')
  186. if label.startswith('xn--'):
  187. label = encodings.idna.ToUnicode(label)
  188. for char in label:
  189. if char in ILLEGAL_CHARS:
  190. raise InvalidJID('Domain contains illegal characters')
  191. if '-' in (label[0], label[-1]):
  192. raise InvalidJID('Domain started or ended with -')
  193. domain_parts.append(label)
  194. domain = '.'.join(domain_parts)
  195. if not domain:
  196. raise InvalidJID('Domain must not be 0 bytes')
  197. if len(domain) > 1023:
  198. raise InvalidJID('Domain must be less than 1024 bytes')
  199. return domain
  200. def _validate_resource(resource):
  201. """Validate the resource portion of a JID.
  202. :raises InvalidJID:
  203. :returns: The local portion of a JID, as validated by resourceprep.
  204. """
  205. try:
  206. if resource is not None:
  207. resource = resourceprep(resource)
  208. if not resource:
  209. raise InvalidJID('Resource must not be 0 bytes')
  210. if len(resource) > 1023:
  211. raise InvalidJID('Resource must be less than 1024 bytes')
  212. return resource
  213. except stringprep_profiles.StringPrepError:
  214. raise InvalidJID('Invalid resource')
  215. def _escape_node(node):
  216. """Escape the local portion of a JID."""
  217. result = []
  218. for i, char in enumerate(node):
  219. if char == '\\':
  220. if ''.join((node[i:i+3])) in JID_ESCAPE_SEQUENCES:
  221. result.append('\\5c')
  222. continue
  223. result.append(char)
  224. for i, char in enumerate(result):
  225. if char != '\\':
  226. result[i] = JID_ESCAPE_TRANSFORMATIONS.get(char, char)
  227. escaped = ''.join(result)
  228. if escaped.startswith('\\20') or escaped.endswith('\\20'):
  229. raise InvalidJID('Escaped local part starts or ends with "\\20"')
  230. _validate_node(escaped)
  231. return escaped
  232. def _unescape_node(node):
  233. """Unescape a local portion of a JID.
  234. .. note::
  235. The unescaped local portion is meant ONLY for presentation,
  236. and should not be used for other purposes.
  237. """
  238. unescaped = []
  239. seq = ''
  240. for i, char in enumerate(node):
  241. if char == '\\':
  242. seq = node[i:i+3]
  243. if seq not in JID_ESCAPE_SEQUENCES:
  244. seq = ''
  245. if seq:
  246. if len(seq) == 3:
  247. unescaped.append(JID_UNESCAPE_TRANSFORMATIONS.get(seq, char))
  248. # Pop character off the escape sequence, and ignore it
  249. seq = seq[1:]
  250. else:
  251. unescaped.append(char)
  252. unescaped = ''.join(unescaped)
  253. return unescaped
  254. def _format_jid(local=None, domain=None, resource=None):
  255. """Format the given JID components into a full or bare JID.
  256. :param string local: Optional. The local portion of the JID.
  257. :param string domain: Required. The domain name portion of the JID.
  258. :param strin resource: Optional. The resource portion of the JID.
  259. :return: A full or bare JID string.
  260. """
  261. result = []
  262. if local:
  263. result.append(local)
  264. result.append('@')
  265. if domain:
  266. result.append(domain)
  267. if resource:
  268. result.append('/')
  269. result.append(resource)
  270. return ''.join(result)
  271. class InvalidJID(ValueError):
  272. """
  273. Raised when attempting to create a JID that does not pass validation.
  274. It can also be raised if modifying an existing JID in such a way as
  275. to make it invalid, such trying to remove the domain from an existing
  276. full JID while the local and resource portions still exist.
  277. """
  278. # pylint: disable=R0903
  279. class UnescapedJID(object):
  280. """
  281. .. versionadded:: 1.1.10
  282. """
  283. def __init__(self, local, domain, resource):
  284. self._jid = (local, domain, resource)
  285. # pylint: disable=R0911
  286. def __getattr__(self, name):
  287. """Retrieve the given JID component.
  288. :param name: one of: user, server, domain, resource,
  289. full, or bare.
  290. """
  291. if name == 'resource':
  292. return self._jid[2] or ''
  293. elif name in ('user', 'username', 'local', 'node'):
  294. return self._jid[0] or ''
  295. elif name in ('server', 'domain', 'host'):
  296. return self._jid[1] or ''
  297. elif name in ('full', 'jid'):
  298. return _format_jid(*self._jid)
  299. elif name == 'bare':
  300. return _format_jid(self._jid[0], self._jid[1])
  301. elif name == '_jid':
  302. return getattr(super(JID, self), '_jid')
  303. else:
  304. return None
  305. def __str__(self):
  306. """Use the full JID as the string value."""
  307. return _format_jid(*self._jid)
  308. def __repr__(self):
  309. """Use the full JID as the representation."""
  310. return self.__str__()
  311. class JID(object):
  312. """
  313. A representation of a Jabber ID, or JID.
  314. Each JID may have three components: a user, a domain, and an optional
  315. resource. For example: user@domain/resource
  316. When a resource is not used, the JID is called a bare JID.
  317. The JID is a full JID otherwise.
  318. **JID Properties:**
  319. :jid: Alias for ``full``.
  320. :full: The string value of the full JID.
  321. :bare: The string value of the bare JID.
  322. :user: The username portion of the JID.
  323. :username: Alias for ``user``.
  324. :local: Alias for ``user``.
  325. :node: Alias for ``user``.
  326. :domain: The domain name portion of the JID.
  327. :server: Alias for ``domain``.
  328. :host: Alias for ``domain``.
  329. :resource: The resource portion of the JID.
  330. :param string jid:
  331. A string of the form ``'[user@]domain[/resource]'``.
  332. :param string local:
  333. Optional. Specify the local, or username, portion
  334. of the JID. If provided, it will override the local
  335. value provided by the `jid` parameter. The given
  336. local value will also be escaped if necessary.
  337. :param string domain:
  338. Optional. Specify the domain of the JID. If
  339. provided, it will override the domain given by
  340. the `jid` parameter.
  341. :param string resource:
  342. Optional. Specify the resource value of the JID.
  343. If provided, it will override the domain given
  344. by the `jid` parameter.
  345. :raises InvalidJID:
  346. """
  347. # pylint: disable=W0212
  348. def __init__(self, jid=None, **kwargs):
  349. locked = kwargs.get('cache_lock', False)
  350. in_local = kwargs.get('local', None)
  351. in_domain = kwargs.get('domain', None)
  352. in_resource = kwargs.get('resource', None)
  353. parts = None
  354. if in_local or in_domain or in_resource:
  355. parts = (in_local, in_domain, in_resource)
  356. # only check cache if there is a jid string, or parts, not if there
  357. # are both
  358. self._jid = None
  359. key = None
  360. if (jid is not None) and (parts is None):
  361. if isinstance(jid, JID):
  362. # it's already good to go, and there are no additions
  363. self._jid = jid._jid
  364. return
  365. key = jid
  366. self._jid, locked = JID_CACHE.get(jid, (None, locked))
  367. elif jid is None and parts is not None:
  368. key = parts
  369. self._jid, locked = JID_CACHE.get(parts, (None, locked))
  370. if not self._jid:
  371. if not jid:
  372. parsed_jid = (None, None, None)
  373. elif not isinstance(jid, JID):
  374. parsed_jid = _parse_jid(jid)
  375. else:
  376. parsed_jid = jid._jid
  377. local, domain, resource = parsed_jid
  378. if 'local' in kwargs:
  379. local = _escape_node(in_local)
  380. if 'domain' in kwargs:
  381. domain = _validate_domain(in_domain)
  382. if 'resource' in kwargs:
  383. resource = _validate_resource(in_resource)
  384. self._jid = (local, domain, resource)
  385. if key:
  386. _cache(key, self._jid, locked)
  387. def unescape(self):
  388. """Return an unescaped JID object.
  389. Using an unescaped JID is preferred for displaying JIDs
  390. to humans, and they should NOT be used for any other
  391. purposes than for presentation.
  392. :return: :class:`UnescapedJID`
  393. .. versionadded:: 1.1.10
  394. """
  395. return UnescapedJID(_unescape_node(self._jid[0]),
  396. self._jid[1],
  397. self._jid[2])
  398. def regenerate(self):
  399. """No-op
  400. .. deprecated:: 1.1.10
  401. """
  402. pass
  403. def reset(self, data):
  404. """Start fresh from a new JID string.
  405. :param string data: A string of the form ``'[user@]domain[/resource]'``.
  406. .. deprecated:: 1.1.10
  407. """
  408. self._jid = JID(data)._jid
  409. @property
  410. def resource(self):
  411. return self._jid[2] or ''
  412. @property
  413. def user(self):
  414. return self._jid[0] or ''
  415. @property
  416. def local(self):
  417. return self._jid[0] or ''
  418. @property
  419. def node(self):
  420. return self._jid[0] or ''
  421. @property
  422. def username(self):
  423. return self._jid[0] or ''
  424. @property
  425. def server(self):
  426. return self._jid[1] or ''
  427. @property
  428. def domain(self):
  429. return self._jid[1] or ''
  430. @property
  431. def host(self):
  432. return self._jid[1] or ''
  433. @property
  434. def full(self):
  435. return _format_jid(*self._jid)
  436. @property
  437. def jid(self):
  438. return _format_jid(*self._jid)
  439. @property
  440. def bare(self):
  441. return _format_jid(self._jid[0], self._jid[1])
  442. @resource.setter
  443. def resource(self, value):
  444. self._jid = JID(self, resource=value)._jid
  445. @user.setter
  446. def user(self, value):
  447. self._jid = JID(self, local=value)._jid
  448. @username.setter
  449. def username(self, value):
  450. self._jid = JID(self, local=value)._jid
  451. @local.setter
  452. def local(self, value):
  453. self._jid = JID(self, local=value)._jid
  454. @node.setter
  455. def node(self, value):
  456. self._jid = JID(self, local=value)._jid
  457. @server.setter
  458. def server(self, value):
  459. self._jid = JID(self, domain=value)._jid
  460. @domain.setter
  461. def domain(self, value):
  462. self._jid = JID(self, domain=value)._jid
  463. @host.setter
  464. def host(self, value):
  465. self._jid = JID(self, domain=value)._jid
  466. @full.setter
  467. def full(self, value):
  468. self._jid = JID(value)._jid
  469. @jid.setter
  470. def jid(self, value):
  471. self._jid = JID(value)._jid
  472. @bare.setter
  473. def bare(self, value):
  474. parsed = JID(value)._jid
  475. self._jid = (parsed[0], parsed[1], self._jid[2])
  476. def __str__(self):
  477. """Use the full JID as the string value."""
  478. return _format_jid(*self._jid)
  479. def __repr__(self):
  480. """Use the full JID as the representation."""
  481. return self.__str__()
  482. # pylint: disable=W0212
  483. def __eq__(self, other):
  484. """Two JIDs are equal if they have the same full JID value."""
  485. if isinstance(other, UnescapedJID):
  486. return False
  487. other = JID(other)
  488. return self._jid == other._jid
  489. # pylint: disable=W0212
  490. def __ne__(self, other):
  491. """Two JIDs are considered unequal if they are not equal."""
  492. return not self == other
  493. def __hash__(self):
  494. """Hash a JID based on the string version of its full JID."""
  495. return hash(self.__str__())
  496. def __copy__(self):
  497. """Generate a duplicate JID."""
  498. return JID(self)
  499. def __deepcopy__(self, memo):
  500. """Generate a duplicate JID."""
  501. return JID(deepcopy(str(self), memo))