__init__.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. from bleach.linkifier import (
  2. DEFAULT_CALLBACKS,
  3. Linker,
  4. )
  5. from bleach.sanitizer import (
  6. ALLOWED_ATTRIBUTES,
  7. ALLOWED_PROTOCOLS,
  8. ALLOWED_TAGS,
  9. Cleaner,
  10. )
  11. # yyyymmdd
  12. __releasedate__ = "20220627"
  13. # x.y.z or x.y.z.dev0 -- semver
  14. __version__ = "5.0.1"
  15. __all__ = ["clean", "linkify"]
  16. def clean(
  17. text,
  18. tags=ALLOWED_TAGS,
  19. attributes=ALLOWED_ATTRIBUTES,
  20. protocols=ALLOWED_PROTOCOLS,
  21. strip=False,
  22. strip_comments=True,
  23. css_sanitizer=None,
  24. ):
  25. """Clean an HTML fragment of malicious content and return it
  26. This function is a security-focused function whose sole purpose is to
  27. remove malicious content from a string such that it can be displayed as
  28. content in a web page.
  29. This function is not designed to use to transform content to be used in
  30. non-web-page contexts.
  31. Example::
  32. import bleach
  33. better_text = bleach.clean(yucky_text)
  34. .. Note::
  35. If you're cleaning a lot of text and passing the same argument values or
  36. you want more configurability, consider using a
  37. :py:class:`bleach.sanitizer.Cleaner` instance.
  38. :arg str text: the text to clean
  39. :arg list tags: allowed list of tags; defaults to
  40. ``bleach.sanitizer.ALLOWED_TAGS``
  41. :arg dict attributes: allowed attributes; can be a callable, list or dict;
  42. defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
  43. :arg list protocols: allowed list of protocols for links; defaults
  44. to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
  45. :arg bool strip: whether or not to strip disallowed elements
  46. :arg bool strip_comments: whether or not to strip HTML comments
  47. :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
  48. sanitizing style attribute values and style text; defaults to None
  49. :returns: cleaned text as unicode
  50. """
  51. cleaner = Cleaner(
  52. tags=tags,
  53. attributes=attributes,
  54. protocols=protocols,
  55. strip=strip,
  56. strip_comments=strip_comments,
  57. css_sanitizer=css_sanitizer,
  58. )
  59. return cleaner.clean(text)
  60. def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False):
  61. """Convert URL-like strings in an HTML fragment to links
  62. This function converts strings that look like URLs, domain names and email
  63. addresses in text that may be an HTML fragment to links, while preserving:
  64. 1. links already in the string
  65. 2. urls found in attributes
  66. 3. email addresses
  67. linkify does a best-effort approach and tries to recover from bad
  68. situations due to crazy text.
  69. .. Note::
  70. If you're linking a lot of text and passing the same argument values or
  71. you want more configurability, consider using a
  72. :py:class:`bleach.linkifier.Linker` instance.
  73. .. Note::
  74. If you have text that you want to clean and then linkify, consider using
  75. the :py:class:`bleach.linkifier.LinkifyFilter` as a filter in the clean
  76. pass. That way you're not parsing the HTML twice.
  77. :arg str text: the text to linkify
  78. :arg list callbacks: list of callbacks to run when adjusting tag attributes;
  79. defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
  80. :arg list skip_tags: list of tags that you don't want to linkify the
  81. contents of; for example, you could set this to ``['pre']`` to skip
  82. linkifying contents of ``pre`` tags
  83. :arg bool parse_email: whether or not to linkify email addresses
  84. :returns: linkified text as unicode
  85. """
  86. linker = Linker(callbacks=callbacks, skip_tags=skip_tags, parse_email=parse_email)
  87. return linker.linkify(text)