discover.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. # -*- test-case-name: openid.test.test_yadis_discover -*-
  2. __all__ = ['discover', 'DiscoveryResult', 'DiscoveryFailure']
  3. from io import StringIO
  4. from openid import fetchers
  5. from openid.yadis.constants import \
  6. YADIS_HEADER_NAME, YADIS_CONTENT_TYPE, YADIS_ACCEPT_HEADER
  7. from openid.yadis.parsehtml import MetaNotFound, findHTMLMeta
  8. class DiscoveryFailure(Exception):
  9. """Raised when a YADIS protocol error occurs in the discovery process"""
  10. identity_url = None
  11. def __init__(self, message, http_response):
  12. Exception.__init__(self, message)
  13. self.http_response = http_response
  14. class DiscoveryResult(object):
  15. """Contains the result of performing Yadis discovery on a URI"""
  16. # The URI that was passed to the fetcher
  17. request_uri = None
  18. # The result of following redirects from the request_uri
  19. normalized_uri = None
  20. # The URI from which the response text was returned (set to
  21. # None if there was no XRDS document found)
  22. xrds_uri = None
  23. # The content-type returned with the response_text
  24. content_type = None
  25. # The document returned from the xrds_uri
  26. response_text = None
  27. def __init__(self, request_uri):
  28. """Initialize the state of the object
  29. sets all attributes to None except the request_uri
  30. """
  31. self.request_uri = request_uri
  32. def usedYadisLocation(self):
  33. """Was the Yadis protocol's indirection used?"""
  34. if self.xrds_uri is None:
  35. return False
  36. return self.normalized_uri != self.xrds_uri
  37. def isXRDS(self):
  38. """Is the response text supposed to be an XRDS document?"""
  39. return (self.usedYadisLocation() or
  40. self.content_type == YADIS_CONTENT_TYPE)
  41. def discover(uri):
  42. """Discover services for a given URI.
  43. @param uri: The identity URI as a well-formed http or https
  44. URI. The well-formedness and the protocol are not checked, but
  45. the results of this function are undefined if those properties
  46. do not hold.
  47. @return: DiscoveryResult object
  48. @raises Exception: Any exception that can be raised by fetching a URL with
  49. the given fetcher.
  50. @raises DiscoveryFailure: When the HTTP response does not have a 200 code.
  51. """
  52. result = DiscoveryResult(uri)
  53. resp = fetchers.fetch(uri, headers={'Accept': YADIS_ACCEPT_HEADER})
  54. if resp.status not in (200, 206):
  55. raise DiscoveryFailure(
  56. 'HTTP Response status from identity URL host is not 200. '
  57. 'Got status %r' % (resp.status, ), resp)
  58. # Note the URL after following redirects
  59. result.normalized_uri = resp.final_url
  60. # Attempt to find out where to go to discover the document
  61. # or if we already have it
  62. result.content_type = resp.headers.get('content-type')
  63. result.xrds_uri = whereIsYadis(resp)
  64. if result.xrds_uri and result.usedYadisLocation():
  65. resp = fetchers.fetch(result.xrds_uri)
  66. if resp.status not in (200, 206):
  67. exc = DiscoveryFailure(
  68. 'HTTP Response status from Yadis host is not 200. '
  69. 'Got status %r' % (resp.status, ), resp)
  70. exc.identity_url = result.normalized_uri
  71. raise exc
  72. result.content_type = resp.headers.get('content-type')
  73. result.response_text = resp.body
  74. return result
  75. def whereIsYadis(resp):
  76. """Given a HTTPResponse, return the location of the Yadis document.
  77. May be the URL just retrieved, another URL, or None if no suitable URL can
  78. be found.
  79. [non-blocking]
  80. @returns: str or None
  81. """
  82. # Attempt to find out where to go to discover the document
  83. # or if we already have it
  84. content_type = resp.headers.get('content-type')
  85. # According to the spec, the content-type header must be an exact
  86. # match, or else we have to look for an indirection.
  87. if (content_type and
  88. content_type.split(';', 1)[0].lower() == YADIS_CONTENT_TYPE):
  89. return resp.final_url
  90. else:
  91. # Try the header
  92. yadis_loc = resp.headers.get(YADIS_HEADER_NAME.lower())
  93. if not yadis_loc:
  94. # Parse as HTML if the header is missing.
  95. #
  96. # XXX: do we want to do something with content-type, like
  97. # have a whitelist or a blacklist (for detecting that it's
  98. # HTML)?
  99. # Decode body by encoding of file
  100. content_type = content_type or ''
  101. encoding = content_type.rsplit(';', 1)
  102. if (len(encoding) == 2 and
  103. encoding[1].strip().startswith('charset=')):
  104. encoding = encoding[1].split('=', 1)[1].strip()
  105. else:
  106. encoding = 'utf-8'
  107. if isinstance(resp.body, bytes):
  108. try:
  109. content = resp.body.decode(encoding)
  110. except UnicodeError:
  111. # All right, the detected encoding has failed. Try with
  112. # UTF-8 (even if there was no detected encoding and we've
  113. # defaulted to UTF-8, it's not that expensive an operation)
  114. try:
  115. content = resp.body.decode('utf-8')
  116. except UnicodeError:
  117. # At this point the content cannot be decoded to a str
  118. # using the detected encoding or falling back to utf-8,
  119. # so we have to resort to replacing undecodable chars.
  120. # This *will* result in broken content but there isn't
  121. # anything else that can be done.
  122. content = resp.body.decode(encoding, 'replace')
  123. else:
  124. content = resp.body
  125. try:
  126. yadis_loc = findHTMLMeta(StringIO(content))
  127. except (MetaNotFound, UnicodeError):
  128. # UnicodeError: Response body could not be encoded and xrds
  129. # location could not be found before troubles occur.
  130. pass
  131. return yadis_loc