unicode_utils.py 941 B

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. import unicodedata
  2. import sys
  3. # HFS Plus uses decomposed UTF-8
  4. def decompose(path):
  5. if isinstance(path, str):
  6. return unicodedata.normalize('NFD', path)
  7. try:
  8. path = path.decode('utf-8')
  9. path = unicodedata.normalize('NFD', path)
  10. path = path.encode('utf-8')
  11. except UnicodeError:
  12. pass # Not UTF-8
  13. return path
  14. def filesys_decode(path):
  15. """
  16. Ensure that the given path is decoded,
  17. NONE when no expected encoding works
  18. """
  19. if isinstance(path, str):
  20. return path
  21. fs_enc = sys.getfilesystemencoding() or 'utf-8'
  22. candidates = fs_enc, 'utf-8'
  23. for enc in candidates:
  24. try:
  25. return path.decode(enc)
  26. except UnicodeDecodeError:
  27. continue
  28. def try_encode(string, enc):
  29. "turn unicode encoding into a functional routine"
  30. try:
  31. return string.encode(enc)
  32. except UnicodeEncodeError:
  33. return None