PdfImagePlugin.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. #
  2. # The Python Imaging Library.
  3. # $Id$
  4. #
  5. # PDF (Acrobat) file handling
  6. #
  7. # History:
  8. # 1996-07-16 fl Created
  9. # 1997-01-18 fl Fixed header
  10. # 2004-02-21 fl Fixes for 1/L/CMYK images, etc.
  11. # 2004-02-24 fl Fixes for 1 and P images.
  12. #
  13. # Copyright (c) 1997-2004 by Secret Labs AB. All rights reserved.
  14. # Copyright (c) 1996-1997 by Fredrik Lundh.
  15. #
  16. # See the README file for information on usage and redistribution.
  17. #
  18. ##
  19. # Image plugin for PDF images (output only).
  20. ##
  21. from __future__ import annotations
  22. import io
  23. import math
  24. import os
  25. import time
  26. from typing import IO, Any
  27. from . import Image, ImageFile, ImageSequence, PdfParser, __version__, features
  28. #
  29. # --------------------------------------------------------------------
  30. # object ids:
  31. # 1. catalogue
  32. # 2. pages
  33. # 3. image
  34. # 4. page
  35. # 5. page contents
  36. def _save_all(im: Image.Image, fp: IO[bytes], filename: str | bytes) -> None:
  37. _save(im, fp, filename, save_all=True)
  38. ##
  39. # (Internal) Image save plugin for the PDF format.
  40. def _write_image(
  41. im: Image.Image,
  42. filename: str | bytes,
  43. existing_pdf: PdfParser.PdfParser,
  44. image_refs: list[PdfParser.IndirectReference],
  45. ) -> tuple[PdfParser.IndirectReference, str]:
  46. # FIXME: Should replace ASCIIHexDecode with RunLengthDecode
  47. # (packbits) or LZWDecode (tiff/lzw compression). Note that
  48. # PDF 1.2 also supports Flatedecode (zip compression).
  49. params = None
  50. decode = None
  51. #
  52. # Get image characteristics
  53. width, height = im.size
  54. dict_obj: dict[str, Any] = {"BitsPerComponent": 8}
  55. if im.mode == "1":
  56. if features.check("libtiff"):
  57. decode_filter = "CCITTFaxDecode"
  58. dict_obj["BitsPerComponent"] = 1
  59. params = PdfParser.PdfArray(
  60. [
  61. PdfParser.PdfDict(
  62. {
  63. "K": -1,
  64. "BlackIs1": True,
  65. "Columns": width,
  66. "Rows": height,
  67. }
  68. )
  69. ]
  70. )
  71. else:
  72. decode_filter = "DCTDecode"
  73. dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceGray")
  74. procset = "ImageB" # grayscale
  75. elif im.mode == "L":
  76. decode_filter = "DCTDecode"
  77. # params = f"<< /Predictor 15 /Columns {width-2} >>"
  78. dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceGray")
  79. procset = "ImageB" # grayscale
  80. elif im.mode == "LA":
  81. decode_filter = "JPXDecode"
  82. # params = f"<< /Predictor 15 /Columns {width-2} >>"
  83. procset = "ImageB" # grayscale
  84. dict_obj["SMaskInData"] = 1
  85. elif im.mode == "P":
  86. decode_filter = "ASCIIHexDecode"
  87. palette = im.getpalette()
  88. assert palette is not None
  89. dict_obj["ColorSpace"] = [
  90. PdfParser.PdfName("Indexed"),
  91. PdfParser.PdfName("DeviceRGB"),
  92. len(palette) // 3 - 1,
  93. PdfParser.PdfBinary(palette),
  94. ]
  95. procset = "ImageI" # indexed color
  96. if "transparency" in im.info:
  97. smask = im.convert("LA").getchannel("A")
  98. smask.encoderinfo = {}
  99. image_ref = _write_image(smask, filename, existing_pdf, image_refs)[0]
  100. dict_obj["SMask"] = image_ref
  101. elif im.mode == "RGB":
  102. decode_filter = "DCTDecode"
  103. dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceRGB")
  104. procset = "ImageC" # color images
  105. elif im.mode == "RGBA":
  106. decode_filter = "JPXDecode"
  107. procset = "ImageC" # color images
  108. dict_obj["SMaskInData"] = 1
  109. elif im.mode == "CMYK":
  110. decode_filter = "DCTDecode"
  111. dict_obj["ColorSpace"] = PdfParser.PdfName("DeviceCMYK")
  112. procset = "ImageC" # color images
  113. decode = [1, 0, 1, 0, 1, 0, 1, 0]
  114. else:
  115. msg = f"cannot save mode {im.mode}"
  116. raise ValueError(msg)
  117. #
  118. # image
  119. op = io.BytesIO()
  120. if decode_filter == "ASCIIHexDecode":
  121. ImageFile._save(im, op, [ImageFile._Tile("hex", (0, 0) + im.size, 0, im.mode)])
  122. elif decode_filter == "CCITTFaxDecode":
  123. im.save(
  124. op,
  125. "TIFF",
  126. compression="group4",
  127. # use a single strip
  128. strip_size=math.ceil(width / 8) * height,
  129. )
  130. elif decode_filter == "DCTDecode":
  131. Image.SAVE["JPEG"](im, op, filename)
  132. elif decode_filter == "JPXDecode":
  133. del dict_obj["BitsPerComponent"]
  134. Image.SAVE["JPEG2000"](im, op, filename)
  135. else:
  136. msg = f"unsupported PDF filter ({decode_filter})"
  137. raise ValueError(msg)
  138. stream = op.getvalue()
  139. filter: PdfParser.PdfArray | PdfParser.PdfName
  140. if decode_filter == "CCITTFaxDecode":
  141. stream = stream[8:]
  142. filter = PdfParser.PdfArray([PdfParser.PdfName(decode_filter)])
  143. else:
  144. filter = PdfParser.PdfName(decode_filter)
  145. image_ref = image_refs.pop(0)
  146. existing_pdf.write_obj(
  147. image_ref,
  148. stream=stream,
  149. Type=PdfParser.PdfName("XObject"),
  150. Subtype=PdfParser.PdfName("Image"),
  151. Width=width, # * 72.0 / x_resolution,
  152. Height=height, # * 72.0 / y_resolution,
  153. Filter=filter,
  154. Decode=decode,
  155. DecodeParms=params,
  156. **dict_obj,
  157. )
  158. return image_ref, procset
  159. def _save(
  160. im: Image.Image, fp: IO[bytes], filename: str | bytes, save_all: bool = False
  161. ) -> None:
  162. is_appending = im.encoderinfo.get("append", False)
  163. filename_str = filename.decode() if isinstance(filename, bytes) else filename
  164. if is_appending:
  165. existing_pdf = PdfParser.PdfParser(f=fp, filename=filename_str, mode="r+b")
  166. else:
  167. existing_pdf = PdfParser.PdfParser(f=fp, filename=filename_str, mode="w+b")
  168. dpi = im.encoderinfo.get("dpi")
  169. if dpi:
  170. x_resolution = dpi[0]
  171. y_resolution = dpi[1]
  172. else:
  173. x_resolution = y_resolution = im.encoderinfo.get("resolution", 72.0)
  174. info = {
  175. "title": (
  176. None if is_appending else os.path.splitext(os.path.basename(filename))[0]
  177. ),
  178. "author": None,
  179. "subject": None,
  180. "keywords": None,
  181. "creator": None,
  182. "producer": None,
  183. "creationDate": None if is_appending else time.gmtime(),
  184. "modDate": None if is_appending else time.gmtime(),
  185. }
  186. for k, default in info.items():
  187. v = im.encoderinfo.get(k) if k in im.encoderinfo else default
  188. if v:
  189. existing_pdf.info[k[0].upper() + k[1:]] = v
  190. #
  191. # make sure image data is available
  192. im.load()
  193. existing_pdf.start_writing()
  194. existing_pdf.write_header()
  195. existing_pdf.write_comment(f"created by Pillow {__version__} PDF driver")
  196. #
  197. # pages
  198. ims = [im]
  199. if save_all:
  200. append_images = im.encoderinfo.get("append_images", [])
  201. for append_im in append_images:
  202. append_im.encoderinfo = im.encoderinfo.copy()
  203. ims.append(append_im)
  204. number_of_pages = 0
  205. image_refs = []
  206. page_refs = []
  207. contents_refs = []
  208. for im in ims:
  209. im_number_of_pages = 1
  210. if save_all:
  211. im_number_of_pages = getattr(im, "n_frames", 1)
  212. number_of_pages += im_number_of_pages
  213. for i in range(im_number_of_pages):
  214. image_refs.append(existing_pdf.next_object_id(0))
  215. if im.mode == "P" and "transparency" in im.info:
  216. image_refs.append(existing_pdf.next_object_id(0))
  217. page_refs.append(existing_pdf.next_object_id(0))
  218. contents_refs.append(existing_pdf.next_object_id(0))
  219. existing_pdf.pages.append(page_refs[-1])
  220. #
  221. # catalog and list of pages
  222. existing_pdf.write_catalog()
  223. page_number = 0
  224. for im_sequence in ims:
  225. im_pages: ImageSequence.Iterator | list[Image.Image] = (
  226. ImageSequence.Iterator(im_sequence) if save_all else [im_sequence]
  227. )
  228. for im in im_pages:
  229. image_ref, procset = _write_image(im, filename, existing_pdf, image_refs)
  230. #
  231. # page
  232. existing_pdf.write_page(
  233. page_refs[page_number],
  234. Resources=PdfParser.PdfDict(
  235. ProcSet=[PdfParser.PdfName("PDF"), PdfParser.PdfName(procset)],
  236. XObject=PdfParser.PdfDict(image=image_ref),
  237. ),
  238. MediaBox=[
  239. 0,
  240. 0,
  241. im.width * 72.0 / x_resolution,
  242. im.height * 72.0 / y_resolution,
  243. ],
  244. Contents=contents_refs[page_number],
  245. )
  246. #
  247. # page contents
  248. page_contents = b"q %f 0 0 %f 0 0 cm /image Do Q\n" % (
  249. im.width * 72.0 / x_resolution,
  250. im.height * 72.0 / y_resolution,
  251. )
  252. existing_pdf.write_obj(contents_refs[page_number], stream=page_contents)
  253. page_number += 1
  254. #
  255. # trailer
  256. existing_pdf.write_xref_and_trailer()
  257. if hasattr(fp, "flush"):
  258. fp.flush()
  259. existing_pdf.close()
  260. #
  261. # --------------------------------------------------------------------
  262. Image.register_save("PDF", _save)
  263. Image.register_save_all("PDF", _save_all)
  264. Image.register_extension("PDF", ".pdf")
  265. Image.register_mime("PDF", "application/pdf")