glob.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. """
  2. Filename globbing utility. Mostly a copy of `glob` from Python 3.5.
  3. Changes include:
  4. * `yield from` and PEP3102 `*` removed.
  5. * Hidden files are not ignored.
  6. """
  7. import os
  8. import re
  9. import fnmatch
  10. __all__ = ["glob", "iglob", "escape"]
  11. def glob(pathname, recursive=False):
  12. """Return a list of paths matching a pathname pattern.
  13. The pattern may contain simple shell-style wildcards a la
  14. fnmatch. However, unlike fnmatch, filenames starting with a
  15. dot are special cases that are not matched by '*' and '?'
  16. patterns.
  17. If recursive is true, the pattern '**' will match any files and
  18. zero or more directories and subdirectories.
  19. """
  20. return list(iglob(pathname, recursive=recursive))
  21. def iglob(pathname, recursive=False):
  22. """Return an iterator which yields the paths matching a pathname pattern.
  23. The pattern may contain simple shell-style wildcards a la
  24. fnmatch. However, unlike fnmatch, filenames starting with a
  25. dot are special cases that are not matched by '*' and '?'
  26. patterns.
  27. If recursive is true, the pattern '**' will match any files and
  28. zero or more directories and subdirectories.
  29. """
  30. it = _iglob(pathname, recursive)
  31. if recursive and _isrecursive(pathname):
  32. s = next(it) # skip empty string
  33. assert not s
  34. return it
  35. def _iglob(pathname, recursive):
  36. dirname, basename = os.path.split(pathname)
  37. glob_in_dir = glob2 if recursive and _isrecursive(basename) else glob1
  38. if not has_magic(pathname):
  39. if basename:
  40. if os.path.lexists(pathname):
  41. yield pathname
  42. else:
  43. # Patterns ending with a slash should match only directories
  44. if os.path.isdir(dirname):
  45. yield pathname
  46. return
  47. if not dirname:
  48. yield from glob_in_dir(dirname, basename)
  49. return
  50. # `os.path.split()` returns the argument itself as a dirname if it is a
  51. # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
  52. # contains magic characters (i.e. r'\\?\C:').
  53. if dirname != pathname and has_magic(dirname):
  54. dirs = _iglob(dirname, recursive)
  55. else:
  56. dirs = [dirname]
  57. if not has_magic(basename):
  58. glob_in_dir = glob0
  59. for dirname in dirs:
  60. for name in glob_in_dir(dirname, basename):
  61. yield os.path.join(dirname, name)
  62. # These 2 helper functions non-recursively glob inside a literal directory.
  63. # They return a list of basenames. `glob1` accepts a pattern while `glob0`
  64. # takes a literal basename (so it only has to check for its existence).
  65. def glob1(dirname, pattern):
  66. if not dirname:
  67. if isinstance(pattern, bytes):
  68. dirname = os.curdir.encode('ASCII')
  69. else:
  70. dirname = os.curdir
  71. try:
  72. names = os.listdir(dirname)
  73. except OSError:
  74. return []
  75. return fnmatch.filter(names, pattern)
  76. def glob0(dirname, basename):
  77. if not basename:
  78. # `os.path.split()` returns an empty basename for paths ending with a
  79. # directory separator. 'q*x/' should match only directories.
  80. if os.path.isdir(dirname):
  81. return [basename]
  82. else:
  83. if os.path.lexists(os.path.join(dirname, basename)):
  84. return [basename]
  85. return []
  86. # This helper function recursively yields relative pathnames inside a literal
  87. # directory.
  88. def glob2(dirname, pattern):
  89. assert _isrecursive(pattern)
  90. yield pattern[:0]
  91. for x in _rlistdir(dirname):
  92. yield x
  93. # Recursively yields relative pathnames inside a literal directory.
  94. def _rlistdir(dirname):
  95. if not dirname:
  96. if isinstance(dirname, bytes):
  97. dirname = os.curdir.encode('ASCII')
  98. else:
  99. dirname = os.curdir
  100. try:
  101. names = os.listdir(dirname)
  102. except os.error:
  103. return
  104. for x in names:
  105. yield x
  106. path = os.path.join(dirname, x) if dirname else x
  107. for y in _rlistdir(path):
  108. yield os.path.join(x, y)
  109. magic_check = re.compile('([*?[])')
  110. magic_check_bytes = re.compile(b'([*?[])')
  111. def has_magic(s):
  112. if isinstance(s, bytes):
  113. match = magic_check_bytes.search(s)
  114. else:
  115. match = magic_check.search(s)
  116. return match is not None
  117. def _isrecursive(pattern):
  118. if isinstance(pattern, bytes):
  119. return pattern == b'**'
  120. else:
  121. return pattern == '**'
  122. def escape(pathname):
  123. """Escape all special characters.
  124. """
  125. # Escaping is done by wrapping any of "*?[" between square brackets.
  126. # Metacharacters do not work in the drive part and shouldn't be escaped.
  127. drive, pathname = os.path.splitdrive(pathname)
  128. if isinstance(pathname, bytes):
  129. pathname = magic_check_bytes.sub(br'[\1]', pathname)
  130. else:
  131. pathname = magic_check.sub(r'[\1]', pathname)
  132. return drive + pathname