prepare.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. """Prepares a distribution for installation
  2. """
  3. # The following comment should be removed at some point in the future.
  4. # mypy: strict-optional=False
  5. import logging
  6. import mimetypes
  7. import os
  8. import shutil
  9. from typing import Dict, Iterable, List, Optional
  10. from pip._vendor.packaging.utils import canonicalize_name
  11. from pip._internal.distributions import make_distribution_for_install_requirement
  12. from pip._internal.distributions.installed import InstalledDistribution
  13. from pip._internal.exceptions import (
  14. DirectoryUrlHashUnsupported,
  15. HashMismatch,
  16. HashUnpinned,
  17. InstallationError,
  18. NetworkConnectionError,
  19. PreviousBuildDirError,
  20. VcsHashUnsupported,
  21. )
  22. from pip._internal.index.package_finder import PackageFinder
  23. from pip._internal.metadata import BaseDistribution
  24. from pip._internal.models.link import Link
  25. from pip._internal.models.wheel import Wheel
  26. from pip._internal.network.download import BatchDownloader, Downloader
  27. from pip._internal.network.lazy_wheel import (
  28. HTTPRangeRequestUnsupported,
  29. dist_from_wheel_url,
  30. )
  31. from pip._internal.network.session import PipSession
  32. from pip._internal.req.req_install import InstallRequirement
  33. from pip._internal.req.req_tracker import RequirementTracker
  34. from pip._internal.utils.filesystem import copy2_fixed
  35. from pip._internal.utils.hashes import Hashes, MissingHashes
  36. from pip._internal.utils.logging import indent_log
  37. from pip._internal.utils.misc import display_path, hide_url, is_installable_dir, rmtree
  38. from pip._internal.utils.temp_dir import TempDirectory
  39. from pip._internal.utils.unpacking import unpack_file
  40. from pip._internal.vcs import vcs
  41. logger = logging.getLogger(__name__)
  42. def _get_prepared_distribution(
  43. req: InstallRequirement,
  44. req_tracker: RequirementTracker,
  45. finder: PackageFinder,
  46. build_isolation: bool,
  47. ) -> BaseDistribution:
  48. """Prepare a distribution for installation."""
  49. abstract_dist = make_distribution_for_install_requirement(req)
  50. with req_tracker.track(req):
  51. abstract_dist.prepare_distribution_metadata(finder, build_isolation)
  52. return abstract_dist.get_metadata_distribution()
  53. def unpack_vcs_link(link: Link, location: str, verbosity: int) -> None:
  54. vcs_backend = vcs.get_backend_for_scheme(link.scheme)
  55. assert vcs_backend is not None
  56. vcs_backend.unpack(location, url=hide_url(link.url), verbosity=verbosity)
  57. class File:
  58. def __init__(self, path: str, content_type: Optional[str]) -> None:
  59. self.path = path
  60. if content_type is None:
  61. self.content_type = mimetypes.guess_type(path)[0]
  62. else:
  63. self.content_type = content_type
  64. def get_http_url(
  65. link: Link,
  66. download: Downloader,
  67. download_dir: Optional[str] = None,
  68. hashes: Optional[Hashes] = None,
  69. ) -> File:
  70. temp_dir = TempDirectory(kind="unpack", globally_managed=True)
  71. # If a download dir is specified, is the file already downloaded there?
  72. already_downloaded_path = None
  73. if download_dir:
  74. already_downloaded_path = _check_download_dir(link, download_dir, hashes)
  75. if already_downloaded_path:
  76. from_path = already_downloaded_path
  77. content_type = None
  78. else:
  79. # let's download to a tmp dir
  80. from_path, content_type = download(link, temp_dir.path)
  81. if hashes:
  82. hashes.check_against_path(from_path)
  83. return File(from_path, content_type)
  84. def _copy2_ignoring_special_files(src: str, dest: str) -> None:
  85. """Copying special files is not supported, but as a convenience to users
  86. we skip errors copying them. This supports tools that may create e.g.
  87. socket files in the project source directory.
  88. """
  89. try:
  90. copy2_fixed(src, dest)
  91. except shutil.SpecialFileError as e:
  92. # SpecialFileError may be raised due to either the source or
  93. # destination. If the destination was the cause then we would actually
  94. # care, but since the destination directory is deleted prior to
  95. # copy we ignore all of them assuming it is caused by the source.
  96. logger.warning(
  97. "Ignoring special file error '%s' encountered copying %s to %s.",
  98. str(e),
  99. src,
  100. dest,
  101. )
  102. def _copy_source_tree(source: str, target: str) -> None:
  103. target_abspath = os.path.abspath(target)
  104. target_basename = os.path.basename(target_abspath)
  105. target_dirname = os.path.dirname(target_abspath)
  106. def ignore(d: str, names: List[str]) -> List[str]:
  107. skipped: List[str] = []
  108. if d == source:
  109. # Pulling in those directories can potentially be very slow,
  110. # exclude the following directories if they appear in the top
  111. # level dir (and only it).
  112. # See discussion at https://github.com/pypa/pip/pull/6770
  113. skipped += [".tox", ".nox"]
  114. if os.path.abspath(d) == target_dirname:
  115. # Prevent an infinite recursion if the target is in source.
  116. # This can happen when TMPDIR is set to ${PWD}/...
  117. # and we copy PWD to TMPDIR.
  118. skipped += [target_basename]
  119. return skipped
  120. shutil.copytree(
  121. source,
  122. target,
  123. ignore=ignore,
  124. symlinks=True,
  125. copy_function=_copy2_ignoring_special_files,
  126. )
  127. def get_file_url(
  128. link: Link, download_dir: Optional[str] = None, hashes: Optional[Hashes] = None
  129. ) -> File:
  130. """Get file and optionally check its hash."""
  131. # If a download dir is specified, is the file already there and valid?
  132. already_downloaded_path = None
  133. if download_dir:
  134. already_downloaded_path = _check_download_dir(link, download_dir, hashes)
  135. if already_downloaded_path:
  136. from_path = already_downloaded_path
  137. else:
  138. from_path = link.file_path
  139. # If --require-hashes is off, `hashes` is either empty, the
  140. # link's embedded hash, or MissingHashes; it is required to
  141. # match. If --require-hashes is on, we are satisfied by any
  142. # hash in `hashes` matching: a URL-based or an option-based
  143. # one; no internet-sourced hash will be in `hashes`.
  144. if hashes:
  145. hashes.check_against_path(from_path)
  146. return File(from_path, None)
  147. def unpack_url(
  148. link: Link,
  149. location: str,
  150. download: Downloader,
  151. verbosity: int,
  152. download_dir: Optional[str] = None,
  153. hashes: Optional[Hashes] = None,
  154. ) -> Optional[File]:
  155. """Unpack link into location, downloading if required.
  156. :param hashes: A Hashes object, one of whose embedded hashes must match,
  157. or HashMismatch will be raised. If the Hashes is empty, no matches are
  158. required, and unhashable types of requirements (like VCS ones, which
  159. would ordinarily raise HashUnsupported) are allowed.
  160. """
  161. # non-editable vcs urls
  162. if link.is_vcs:
  163. unpack_vcs_link(link, location, verbosity=verbosity)
  164. return None
  165. # Once out-of-tree-builds are no longer supported, could potentially
  166. # replace the below condition with `assert not link.is_existing_dir`
  167. # - unpack_url does not need to be called for in-tree-builds.
  168. #
  169. # As further cleanup, _copy_source_tree and accompanying tests can
  170. # be removed.
  171. #
  172. # TODO when use-deprecated=out-of-tree-build is removed
  173. if link.is_existing_dir():
  174. if os.path.isdir(location):
  175. rmtree(location)
  176. _copy_source_tree(link.file_path, location)
  177. return None
  178. # file urls
  179. if link.is_file:
  180. file = get_file_url(link, download_dir, hashes=hashes)
  181. # http urls
  182. else:
  183. file = get_http_url(
  184. link,
  185. download,
  186. download_dir,
  187. hashes=hashes,
  188. )
  189. # unpack the archive to the build dir location. even when only downloading
  190. # archives, they have to be unpacked to parse dependencies, except wheels
  191. if not link.is_wheel:
  192. unpack_file(file.path, location, file.content_type)
  193. return file
  194. def _check_download_dir(
  195. link: Link, download_dir: str, hashes: Optional[Hashes]
  196. ) -> Optional[str]:
  197. """Check download_dir for previously downloaded file with correct hash
  198. If a correct file is found return its path else None
  199. """
  200. download_path = os.path.join(download_dir, link.filename)
  201. if not os.path.exists(download_path):
  202. return None
  203. # If already downloaded, does its hash match?
  204. logger.info("File was already downloaded %s", download_path)
  205. if hashes:
  206. try:
  207. hashes.check_against_path(download_path)
  208. except HashMismatch:
  209. logger.warning(
  210. "Previously-downloaded file %s has bad hash. Re-downloading.",
  211. download_path,
  212. )
  213. os.unlink(download_path)
  214. return None
  215. return download_path
  216. class RequirementPreparer:
  217. """Prepares a Requirement"""
  218. def __init__(
  219. self,
  220. build_dir: str,
  221. download_dir: Optional[str],
  222. src_dir: str,
  223. build_isolation: bool,
  224. req_tracker: RequirementTracker,
  225. session: PipSession,
  226. progress_bar: str,
  227. finder: PackageFinder,
  228. require_hashes: bool,
  229. use_user_site: bool,
  230. lazy_wheel: bool,
  231. verbosity: int,
  232. in_tree_build: bool,
  233. ) -> None:
  234. super().__init__()
  235. self.src_dir = src_dir
  236. self.build_dir = build_dir
  237. self.req_tracker = req_tracker
  238. self._session = session
  239. self._download = Downloader(session, progress_bar)
  240. self._batch_download = BatchDownloader(session, progress_bar)
  241. self.finder = finder
  242. # Where still-packed archives should be written to. If None, they are
  243. # not saved, and are deleted immediately after unpacking.
  244. self.download_dir = download_dir
  245. # Is build isolation allowed?
  246. self.build_isolation = build_isolation
  247. # Should hash-checking be required?
  248. self.require_hashes = require_hashes
  249. # Should install in user site-packages?
  250. self.use_user_site = use_user_site
  251. # Should wheels be downloaded lazily?
  252. self.use_lazy_wheel = lazy_wheel
  253. # How verbose should underlying tooling be?
  254. self.verbosity = verbosity
  255. # Should in-tree builds be used for local paths?
  256. self.in_tree_build = in_tree_build
  257. # Memoized downloaded files, as mapping of url: path.
  258. self._downloaded: Dict[str, str] = {}
  259. # Previous "header" printed for a link-based InstallRequirement
  260. self._previous_requirement_header = ("", "")
  261. def _log_preparing_link(self, req: InstallRequirement) -> None:
  262. """Provide context for the requirement being prepared."""
  263. if req.link.is_file and not req.original_link_is_in_wheel_cache:
  264. message = "Processing %s"
  265. information = str(display_path(req.link.file_path))
  266. else:
  267. message = "Collecting %s"
  268. information = str(req.req or req)
  269. if (message, information) != self._previous_requirement_header:
  270. self._previous_requirement_header = (message, information)
  271. logger.info(message, information)
  272. if req.original_link_is_in_wheel_cache:
  273. with indent_log():
  274. logger.info("Using cached %s", req.link.filename)
  275. def _ensure_link_req_src_dir(
  276. self, req: InstallRequirement, parallel_builds: bool
  277. ) -> None:
  278. """Ensure source_dir of a linked InstallRequirement."""
  279. # Since source_dir is only set for editable requirements.
  280. if req.link.is_wheel:
  281. # We don't need to unpack wheels, so no need for a source
  282. # directory.
  283. return
  284. assert req.source_dir is None
  285. if req.link.is_existing_dir() and self.in_tree_build:
  286. # build local directories in-tree
  287. req.source_dir = req.link.file_path
  288. return
  289. # We always delete unpacked sdists after pip runs.
  290. req.ensure_has_source_dir(
  291. self.build_dir,
  292. autodelete=True,
  293. parallel_builds=parallel_builds,
  294. )
  295. # If a checkout exists, it's unwise to keep going. version
  296. # inconsistencies are logged later, but do not fail the
  297. # installation.
  298. # FIXME: this won't upgrade when there's an existing
  299. # package unpacked in `req.source_dir`
  300. # TODO: this check is now probably dead code
  301. if is_installable_dir(req.source_dir):
  302. raise PreviousBuildDirError(
  303. "pip can't proceed with requirements '{}' due to a"
  304. "pre-existing build directory ({}). This is likely "
  305. "due to a previous installation that failed . pip is "
  306. "being responsible and not assuming it can delete this. "
  307. "Please delete it and try again.".format(req, req.source_dir)
  308. )
  309. def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes:
  310. # By the time this is called, the requirement's link should have
  311. # been checked so we can tell what kind of requirements req is
  312. # and raise some more informative errors than otherwise.
  313. # (For example, we can raise VcsHashUnsupported for a VCS URL
  314. # rather than HashMissing.)
  315. if not self.require_hashes:
  316. return req.hashes(trust_internet=True)
  317. # We could check these first 2 conditions inside unpack_url
  318. # and save repetition of conditions, but then we would
  319. # report less-useful error messages for unhashable
  320. # requirements, complaining that there's no hash provided.
  321. if req.link.is_vcs:
  322. raise VcsHashUnsupported()
  323. if req.link.is_existing_dir():
  324. raise DirectoryUrlHashUnsupported()
  325. # Unpinned packages are asking for trouble when a new version
  326. # is uploaded. This isn't a security check, but it saves users
  327. # a surprising hash mismatch in the future.
  328. # file:/// URLs aren't pinnable, so don't complain about them
  329. # not being pinned.
  330. if req.original_link is None and not req.is_pinned:
  331. raise HashUnpinned()
  332. # If known-good hashes are missing for this requirement,
  333. # shim it with a facade object that will provoke hash
  334. # computation and then raise a HashMissing exception
  335. # showing the user what the hash should be.
  336. return req.hashes(trust_internet=False) or MissingHashes()
  337. def _fetch_metadata_using_lazy_wheel(
  338. self,
  339. link: Link,
  340. ) -> Optional[BaseDistribution]:
  341. """Fetch metadata using lazy wheel, if possible."""
  342. if not self.use_lazy_wheel:
  343. return None
  344. if self.require_hashes:
  345. logger.debug("Lazy wheel is not used as hash checking is required")
  346. return None
  347. if link.is_file or not link.is_wheel:
  348. logger.debug(
  349. "Lazy wheel is not used as %r does not points to a remote wheel",
  350. link,
  351. )
  352. return None
  353. wheel = Wheel(link.filename)
  354. name = canonicalize_name(wheel.name)
  355. logger.info(
  356. "Obtaining dependency information from %s %s",
  357. name,
  358. wheel.version,
  359. )
  360. url = link.url.split("#", 1)[0]
  361. try:
  362. return dist_from_wheel_url(name, url, self._session)
  363. except HTTPRangeRequestUnsupported:
  364. logger.debug("%s does not support range requests", url)
  365. return None
  366. def _complete_partial_requirements(
  367. self,
  368. partially_downloaded_reqs: Iterable[InstallRequirement],
  369. parallel_builds: bool = False,
  370. ) -> None:
  371. """Download any requirements which were only fetched by metadata."""
  372. # Download to a temporary directory. These will be copied over as
  373. # needed for downstream 'download', 'wheel', and 'install' commands.
  374. temp_dir = TempDirectory(kind="unpack", globally_managed=True).path
  375. # Map each link to the requirement that owns it. This allows us to set
  376. # `req.local_file_path` on the appropriate requirement after passing
  377. # all the links at once into BatchDownloader.
  378. links_to_fully_download: Dict[Link, InstallRequirement] = {}
  379. for req in partially_downloaded_reqs:
  380. assert req.link
  381. links_to_fully_download[req.link] = req
  382. batch_download = self._batch_download(
  383. links_to_fully_download.keys(),
  384. temp_dir,
  385. )
  386. for link, (filepath, _) in batch_download:
  387. logger.debug("Downloading link %s to %s", link, filepath)
  388. req = links_to_fully_download[link]
  389. req.local_file_path = filepath
  390. # This step is necessary to ensure all lazy wheels are processed
  391. # successfully by the 'download', 'wheel', and 'install' commands.
  392. for req in partially_downloaded_reqs:
  393. self._prepare_linked_requirement(req, parallel_builds)
  394. def prepare_linked_requirement(
  395. self, req: InstallRequirement, parallel_builds: bool = False
  396. ) -> BaseDistribution:
  397. """Prepare a requirement to be obtained from req.link."""
  398. assert req.link
  399. link = req.link
  400. self._log_preparing_link(req)
  401. with indent_log():
  402. # Check if the relevant file is already available
  403. # in the download directory
  404. file_path = None
  405. if self.download_dir is not None and link.is_wheel:
  406. hashes = self._get_linked_req_hashes(req)
  407. file_path = _check_download_dir(req.link, self.download_dir, hashes)
  408. if file_path is not None:
  409. # The file is already available, so mark it as downloaded
  410. self._downloaded[req.link.url] = file_path
  411. else:
  412. # The file is not available, attempt to fetch only metadata
  413. wheel_dist = self._fetch_metadata_using_lazy_wheel(link)
  414. if wheel_dist is not None:
  415. req.needs_more_preparation = True
  416. return wheel_dist
  417. # None of the optimizations worked, fully prepare the requirement
  418. return self._prepare_linked_requirement(req, parallel_builds)
  419. def prepare_linked_requirements_more(
  420. self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False
  421. ) -> None:
  422. """Prepare linked requirements more, if needed."""
  423. reqs = [req for req in reqs if req.needs_more_preparation]
  424. for req in reqs:
  425. # Determine if any of these requirements were already downloaded.
  426. if self.download_dir is not None and req.link.is_wheel:
  427. hashes = self._get_linked_req_hashes(req)
  428. file_path = _check_download_dir(req.link, self.download_dir, hashes)
  429. if file_path is not None:
  430. self._downloaded[req.link.url] = file_path
  431. req.needs_more_preparation = False
  432. # Prepare requirements we found were already downloaded for some
  433. # reason. The other downloads will be completed separately.
  434. partially_downloaded_reqs: List[InstallRequirement] = []
  435. for req in reqs:
  436. if req.needs_more_preparation:
  437. partially_downloaded_reqs.append(req)
  438. else:
  439. self._prepare_linked_requirement(req, parallel_builds)
  440. # TODO: separate this part out from RequirementPreparer when the v1
  441. # resolver can be removed!
  442. self._complete_partial_requirements(
  443. partially_downloaded_reqs,
  444. parallel_builds=parallel_builds,
  445. )
  446. def _prepare_linked_requirement(
  447. self, req: InstallRequirement, parallel_builds: bool
  448. ) -> BaseDistribution:
  449. assert req.link
  450. link = req.link
  451. self._ensure_link_req_src_dir(req, parallel_builds)
  452. hashes = self._get_linked_req_hashes(req)
  453. if link.is_existing_dir() and self.in_tree_build:
  454. local_file = None
  455. elif link.url not in self._downloaded:
  456. try:
  457. local_file = unpack_url(
  458. link,
  459. req.source_dir,
  460. self._download,
  461. self.verbosity,
  462. self.download_dir,
  463. hashes,
  464. )
  465. except NetworkConnectionError as exc:
  466. raise InstallationError(
  467. "Could not install requirement {} because of HTTP "
  468. "error {} for URL {}".format(req, exc, link)
  469. )
  470. else:
  471. file_path = self._downloaded[link.url]
  472. if hashes:
  473. hashes.check_against_path(file_path)
  474. local_file = File(file_path, content_type=None)
  475. # For use in later processing,
  476. # preserve the file path on the requirement.
  477. if local_file:
  478. req.local_file_path = local_file.path
  479. dist = _get_prepared_distribution(
  480. req,
  481. self.req_tracker,
  482. self.finder,
  483. self.build_isolation,
  484. )
  485. return dist
  486. def save_linked_requirement(self, req: InstallRequirement) -> None:
  487. assert self.download_dir is not None
  488. assert req.link is not None
  489. link = req.link
  490. if link.is_vcs or (link.is_existing_dir() and req.editable):
  491. # Make a .zip of the source_dir we already created.
  492. req.archive(self.download_dir)
  493. return
  494. if link.is_existing_dir():
  495. logger.debug(
  496. "Not copying link to destination directory "
  497. "since it is a directory: %s",
  498. link,
  499. )
  500. return
  501. if req.local_file_path is None:
  502. # No distribution was downloaded for this requirement.
  503. return
  504. download_location = os.path.join(self.download_dir, link.filename)
  505. if not os.path.exists(download_location):
  506. shutil.copy(req.local_file_path, download_location)
  507. download_path = display_path(download_location)
  508. logger.info("Saved %s", download_path)
  509. def prepare_editable_requirement(
  510. self,
  511. req: InstallRequirement,
  512. ) -> BaseDistribution:
  513. """Prepare an editable requirement."""
  514. assert req.editable, "cannot prepare a non-editable req as editable"
  515. logger.info("Obtaining %s", req)
  516. with indent_log():
  517. if self.require_hashes:
  518. raise InstallationError(
  519. "The editable requirement {} cannot be installed when "
  520. "requiring hashes, because there is no single file to "
  521. "hash.".format(req)
  522. )
  523. req.ensure_has_source_dir(self.src_dir)
  524. req.update_editable()
  525. dist = _get_prepared_distribution(
  526. req,
  527. self.req_tracker,
  528. self.finder,
  529. self.build_isolation,
  530. )
  531. req.check_if_exists(self.use_user_site)
  532. return dist
  533. def prepare_installed_requirement(
  534. self,
  535. req: InstallRequirement,
  536. skip_reason: str,
  537. ) -> BaseDistribution:
  538. """Prepare an already-installed requirement."""
  539. assert req.satisfied_by, "req should have been satisfied but isn't"
  540. assert skip_reason is not None, (
  541. "did not get skip reason skipped but req.satisfied_by "
  542. "is set to {}".format(req.satisfied_by)
  543. )
  544. logger.info(
  545. "Requirement %s: %s (%s)", skip_reason, req, req.satisfied_by.version
  546. )
  547. with indent_log():
  548. if self.require_hashes:
  549. logger.debug(
  550. "Since it is already installed, we are trusting this "
  551. "package without checking its hash. To ensure a "
  552. "completely repeatable environment, install into an "
  553. "empty virtualenv."
  554. )
  555. return InstalledDistribution(req).get_metadata_distribution()