trustroot.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. # -*- test-case-name: openid.test.test_rpverify -*-
  2. """
  3. This module contains the C{L{TrustRoot}} class, which helps handle
  4. trust root checking. This module is used by the
  5. C{L{openid.server.server}} module, but it is also available to server
  6. implementers who wish to use it for additional trust root checking.
  7. It also implements relying party return_to URL verification, based on
  8. the realm.
  9. """
  10. __all__ = [
  11. 'TrustRoot',
  12. 'RP_RETURN_TO_URL_TYPE',
  13. 'extractReturnToURLs',
  14. 'returnToMatches',
  15. 'verifyReturnTo',
  16. ]
  17. from openid import urinorm
  18. from openid.yadis import services
  19. from urllib.parse import urlparse, urlunparse
  20. import re
  21. import logging
  22. logger = logging.getLogger(__name__)
  23. ############################################
  24. _protocols = ['http', 'https']
  25. _top_level_domains = [
  26. 'ac', 'ad', 'ae', 'aero', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq',
  27. 'ar', 'arpa', 'as', 'asia', 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd',
  28. 'be', 'bf', 'bg', 'bh', 'bi', 'biz', 'bj', 'bm', 'bn', 'bo', 'br', 'bs',
  29. 'bt', 'bv', 'bw', 'by', 'bz', 'ca', 'cat', 'cc', 'cd', 'cf', 'cg', 'ch',
  30. 'ci', 'ck', 'cl', 'cm', 'cn', 'co', 'com', 'coop', 'cr', 'cu', 'cv', 'cx',
  31. 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz', 'ec', 'edu', 'ee', 'eg',
  32. 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm', 'fo', 'fr', 'ga', 'gb',
  33. 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm', 'gn', 'gov', 'gp', 'gq',
  34. 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn', 'hr', 'ht', 'hu',
  35. 'id', 'ie', 'il', 'im', 'in', 'info', 'int', 'io', 'iq', 'ir', 'is', 'it',
  36. 'je', 'jm', 'jo', 'jobs', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp',
  37. 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt',
  38. 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mil', 'mk', 'ml',
  39. 'mm', 'mn', 'mo', 'mobi', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'museum',
  40. 'mv', 'mw', 'mx', 'my', 'mz', 'na', 'name', 'nc', 'ne', 'net', 'nf', 'ng',
  41. 'ni', 'nl', 'no', 'np', 'nr', 'nu', 'nz', 'om', 'org', 'pa', 'pe', 'pf',
  42. 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr', 'pro', 'ps', 'pt', 'pw', 'py',
  43. 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb', 'sc', 'sd', 'se', 'sg',
  44. 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so', 'sr', 'st', 'su', 'sv',
  45. 'sy', 'sz', 'tc', 'td', 'tel', 'tf', 'tg', 'th', 'tj', 'tk', 'tl', 'tm',
  46. 'tn', 'to', 'tp', 'tr', 'travel', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'uk',
  47. 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws',
  48. 'xn--0zwm56d', 'xn--11b5bs3a9aj6g', 'xn--80akhbyknj4f', 'xn--9t4b11yi5a',
  49. 'xn--deba0ad', 'xn--g6w251d', 'xn--hgbk6aj7f53bba', 'xn--hlcj6aya9esc7a',
  50. 'xn--jxalpdlp', 'xn--kgbechtv', 'xn--zckzah', 'ye', 'yt', 'yu', 'za', 'zm',
  51. 'zw'
  52. ]
  53. # Build from RFC3986, section 3.2.2. Used to reject hosts with invalid
  54. # characters.
  55. host_segment_re = re.compile(
  56. r"(?:[-a-zA-Z0-9!$&'\(\)\*+,;=._~]|%[a-zA-Z0-9]{2})+$")
  57. class RealmVerificationRedirected(Exception):
  58. """Attempting to verify this realm resulted in a redirect.
  59. @since: 2.1.0
  60. """
  61. def __init__(self, relying_party_url, rp_url_after_redirects):
  62. self.relying_party_url = relying_party_url
  63. self.rp_url_after_redirects = rp_url_after_redirects
  64. def __str__(self):
  65. return ("Attempting to verify %r resulted in "
  66. "redirect to %r" % (self.relying_party_url,
  67. self.rp_url_after_redirects))
  68. def _parseURL(url):
  69. try:
  70. url = urinorm.urinorm(url)
  71. except ValueError:
  72. return None
  73. proto, netloc, path, params, query, frag = urlparse(url)
  74. if not path:
  75. # Python <2.4 does not parse URLs with no path properly
  76. if not query and '?' in netloc:
  77. netloc, query = netloc.split('?', 1)
  78. path = '/'
  79. path = urlunparse(('', '', path, params, query, frag))
  80. if ':' in netloc:
  81. try:
  82. host, port = netloc.split(':')
  83. except ValueError:
  84. return None
  85. if not re.match(r'\d+$', port):
  86. return None
  87. else:
  88. host = netloc
  89. port = ''
  90. host = host.lower()
  91. if not host_segment_re.match(host):
  92. return None
  93. return proto, host, port, path
  94. class TrustRoot(object):
  95. """
  96. This class represents an OpenID trust root. The C{L{parse}}
  97. classmethod accepts a trust root string, producing a
  98. C{L{TrustRoot}} object. The method OpenID server implementers
  99. would be most likely to use is the C{L{isSane}} method, which
  100. checks the trust root for given patterns that indicate that the
  101. trust root is too broad or points to a local network resource.
  102. @sort: parse, isSane
  103. """
  104. def __init__(self, unparsed, proto, wildcard, host, port, path):
  105. self.unparsed = unparsed
  106. self.proto = proto
  107. self.wildcard = wildcard
  108. self.host = host
  109. self.port = port
  110. self.path = path
  111. def isSane(self):
  112. """
  113. This method checks the to see if a trust root represents a
  114. reasonable (sane) set of URLs. 'http://*.com/', for example
  115. is not a reasonable pattern, as it cannot meaningfully specify
  116. the site claiming it. This function attempts to find many
  117. related examples, but it can only work via heuristics.
  118. Negative responses from this method should be treated as
  119. advisory, used only to alert the user to examine the trust
  120. root carefully.
  121. @return: Whether the trust root is sane
  122. @rtype: C{bool}
  123. """
  124. if self.host == 'localhost':
  125. return True
  126. host_parts = self.host.split('.')
  127. if self.wildcard:
  128. assert host_parts[0] == '', host_parts
  129. del host_parts[0]
  130. # If it's an absolute domain name, remove the empty string
  131. # from the end.
  132. if host_parts and not host_parts[-1]:
  133. del host_parts[-1]
  134. if not host_parts:
  135. return False
  136. # Do not allow adjacent dots
  137. if '' in host_parts:
  138. return False
  139. tld = host_parts[-1]
  140. if tld not in _top_level_domains:
  141. return False
  142. if len(host_parts) == 1:
  143. return False
  144. if self.wildcard:
  145. if len(tld) == 2 and len(host_parts[-2]) <= 3:
  146. # It's a 2-letter tld with a short second to last segment
  147. # so there needs to be more than two segments specified
  148. # (e.g. *.co.uk is insane)
  149. return len(host_parts) > 2
  150. # Passed all tests for insanity.
  151. return True
  152. def validateURL(self, url):
  153. """
  154. Validates a URL against this trust root.
  155. @param url: The URL to check
  156. @type url: C{str}
  157. @return: Whether the given URL is within this trust root.
  158. @rtype: C{bool}
  159. """
  160. url_parts = _parseURL(url)
  161. if url_parts is None:
  162. return False
  163. proto, host, port, path = url_parts
  164. if proto != self.proto:
  165. return False
  166. if port != self.port:
  167. return False
  168. if '*' in host:
  169. return False
  170. if not self.wildcard:
  171. if host != self.host:
  172. return False
  173. elif ((not host.endswith(self.host)) and ('.' + host) != self.host):
  174. return False
  175. if path != self.path:
  176. path_len = len(self.path)
  177. trust_prefix = self.path[:path_len]
  178. url_prefix = path[:path_len]
  179. # must be equal up to the length of the path, at least
  180. if trust_prefix != url_prefix:
  181. return False
  182. # These characters must be on the boundary between the end
  183. # of the trust root's path and the start of the URL's
  184. # path.
  185. if '?' in self.path:
  186. allowed = '&'
  187. else:
  188. allowed = '?/'
  189. return (self.path[-1] in allowed or path[path_len] in allowed)
  190. return True
  191. def parse(cls, trust_root):
  192. """
  193. This method creates a C{L{TrustRoot}} instance from the given
  194. input, if possible.
  195. @param trust_root: This is the trust root to parse into a
  196. C{L{TrustRoot}} object.
  197. @type trust_root: C{str}
  198. @return: A C{L{TrustRoot}} instance if trust_root parses as a
  199. trust root, C{None} otherwise.
  200. @rtype: C{NoneType} or C{L{TrustRoot}}
  201. """
  202. url_parts = _parseURL(trust_root)
  203. if url_parts is None:
  204. return None
  205. proto, host, port, path = url_parts
  206. # check for valid prototype
  207. if proto not in _protocols:
  208. return None
  209. # check for URI fragment
  210. if path.find('#') != -1:
  211. return None
  212. # extract wildcard if it is there
  213. if host.find('*', 1) != -1:
  214. # wildcard must be at start of domain: *.foo.com, not foo.*.com
  215. return None
  216. if host.startswith('*'):
  217. # Starts with star, so must have a dot after it (if a
  218. # domain is specified)
  219. if len(host) > 1 and host[1] != '.':
  220. return None
  221. host = host[1:]
  222. wilcard = True
  223. else:
  224. wilcard = False
  225. # we have a valid trust root
  226. tr = cls(trust_root, proto, wilcard, host, port, path)
  227. return tr
  228. parse = classmethod(parse)
  229. def checkSanity(cls, trust_root_string):
  230. """str -> bool
  231. is this a sane trust root?
  232. """
  233. trust_root = cls.parse(trust_root_string)
  234. if trust_root is None:
  235. return False
  236. else:
  237. return trust_root.isSane()
  238. checkSanity = classmethod(checkSanity)
  239. def checkURL(cls, trust_root, url):
  240. """quick func for validating a url against a trust root. See the
  241. TrustRoot class if you need more control."""
  242. tr = cls.parse(trust_root)
  243. return tr is not None and tr.validateURL(url)
  244. checkURL = classmethod(checkURL)
  245. def buildDiscoveryURL(self):
  246. """Return a discovery URL for this realm.
  247. This function does not check to make sure that the realm is
  248. valid. Its behaviour on invalid inputs is undefined.
  249. @rtype: str
  250. @returns: The URL upon which relying party discovery should be run
  251. in order to verify the return_to URL
  252. @since: 2.1.0
  253. """
  254. if self.wildcard:
  255. # Use "www." in place of the star
  256. assert self.host.startswith('.'), self.host
  257. www_domain = 'www' + self.host
  258. return '%s://%s%s' % (self.proto, www_domain, self.path)
  259. else:
  260. return self.unparsed
  261. def __repr__(self):
  262. return "TrustRoot(%r, %r, %r, %r, %r, %r)" % (
  263. self.unparsed, self.proto, self.wildcard, self.host, self.port,
  264. self.path)
  265. def __str__(self):
  266. return repr(self)
  267. # The URI for relying party discovery, used in realm verification.
  268. #
  269. # XXX: This should probably live somewhere else (like in
  270. # openid.consumer or openid.yadis somewhere)
  271. RP_RETURN_TO_URL_TYPE = 'http://specs.openid.net/auth/2.0/return_to'
  272. def _extractReturnURL(endpoint):
  273. """If the endpoint is a relying party OpenID return_to endpoint,
  274. return the endpoint URL. Otherwise, return None.
  275. This function is intended to be used as a filter for the Yadis
  276. filtering interface.
  277. @see: C{L{openid.yadis.services}}
  278. @see: C{L{openid.yadis.filters}}
  279. @param endpoint: An XRDS BasicServiceEndpoint, as returned by
  280. performing Yadis dicovery.
  281. @returns: The endpoint URL or None if the endpoint is not a
  282. relying party endpoint.
  283. @rtype: str or NoneType
  284. """
  285. if endpoint.matchTypes([RP_RETURN_TO_URL_TYPE]):
  286. return endpoint.uri
  287. else:
  288. return None
  289. def returnToMatches(allowed_return_to_urls, return_to):
  290. """Is the return_to URL under one of the supplied allowed
  291. return_to URLs?
  292. @since: 2.1.0
  293. """
  294. for allowed_return_to in allowed_return_to_urls:
  295. # A return_to pattern works the same as a realm, except that
  296. # it's not allowed to use a wildcard. We'll model this by
  297. # parsing it as a realm, and not trying to match it if it has
  298. # a wildcard.
  299. return_realm = TrustRoot.parse(allowed_return_to)
  300. if ( # Parses as a trust root
  301. return_realm is not None and
  302. # Does not have a wildcard
  303. not return_realm.wildcard and
  304. # Matches the return_to that we passed in with it
  305. return_realm.validateURL(return_to)):
  306. return True
  307. # No URL in the list matched
  308. return False
  309. def getAllowedReturnURLs(relying_party_url):
  310. """Given a relying party discovery URL return a list of return_to URLs.
  311. @since: 2.1.0
  312. """
  313. (rp_url_after_redirects, return_to_urls) = services.getServiceEndpoints(
  314. relying_party_url, _extractReturnURL)
  315. if rp_url_after_redirects != relying_party_url:
  316. # Verification caused a redirect
  317. raise RealmVerificationRedirected(relying_party_url,
  318. rp_url_after_redirects)
  319. return return_to_urls
  320. # _vrfy parameter is there to make testing easier
  321. def verifyReturnTo(realm_str, return_to, _vrfy=getAllowedReturnURLs):
  322. """Verify that a return_to URL is valid for the given realm.
  323. This function builds a discovery URL, performs Yadis discovery on
  324. it, makes sure that the URL does not redirect, parses out the
  325. return_to URLs, and finally checks to see if the current return_to
  326. URL matches the return_to.
  327. @raises DiscoveryFailure: When Yadis discovery fails
  328. @returns: True if the return_to URL is valid for the realm
  329. @since: 2.1.0
  330. """
  331. realm = TrustRoot.parse(realm_str)
  332. if realm is None:
  333. # The realm does not parse as a URL pattern
  334. return False
  335. try:
  336. allowable_urls = _vrfy(realm.buildDiscoveryURL())
  337. except RealmVerificationRedirected as err:
  338. logger.exception(str(err))
  339. return False
  340. if returnToMatches(allowable_urls, return_to):
  341. return True
  342. else:
  343. logger.error("Failed to validate return_to %r for realm %r, was not "
  344. "in %s" % (return_to, realm_str, allowable_urls))
  345. return False