file.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. # Copyright (C) 2003-2007 Robey Pointer <robeypointer@gmail.com>
  2. #
  3. # This file is part of paramiko.
  4. #
  5. # Paramiko is free software; you can redistribute it and/or modify it under the
  6. # terms of the GNU Lesser General Public License as published by the Free
  7. # Software Foundation; either version 2.1 of the License, or (at your option)
  8. # any later version.
  9. #
  10. # Paramiko is distributed in the hope that it will be useful, but WITHOUT ANY
  11. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. # A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  13. # details.
  14. #
  15. # You should have received a copy of the GNU Lesser General Public License
  16. # along with Paramiko; if not, write to the Free Software Foundation, Inc.,
  17. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18. from paramiko.common import (
  19. linefeed_byte_value,
  20. crlf,
  21. cr_byte,
  22. linefeed_byte,
  23. cr_byte_value,
  24. )
  25. from paramiko.py3compat import BytesIO, PY2, u, bytes_types, text_type
  26. from paramiko.util import ClosingContextManager
  27. class BufferedFile(ClosingContextManager):
  28. """
  29. Reusable base class to implement Python-style file buffering around a
  30. simpler stream.
  31. """
  32. _DEFAULT_BUFSIZE = 8192
  33. SEEK_SET = 0
  34. SEEK_CUR = 1
  35. SEEK_END = 2
  36. FLAG_READ = 0x1
  37. FLAG_WRITE = 0x2
  38. FLAG_APPEND = 0x4
  39. FLAG_BINARY = 0x10
  40. FLAG_BUFFERED = 0x20
  41. FLAG_LINE_BUFFERED = 0x40
  42. FLAG_UNIVERSAL_NEWLINE = 0x80
  43. def __init__(self):
  44. self.newlines = None
  45. self._flags = 0
  46. self._bufsize = self._DEFAULT_BUFSIZE
  47. self._wbuffer = BytesIO()
  48. self._rbuffer = bytes()
  49. self._at_trailing_cr = False
  50. self._closed = False
  51. # pos - position within the file, according to the user
  52. # realpos - position according the OS
  53. # (these may be different because we buffer for line reading)
  54. self._pos = self._realpos = 0
  55. # size only matters for seekable files
  56. self._size = 0
  57. def __del__(self):
  58. self.close()
  59. def __iter__(self):
  60. """
  61. Returns an iterator that can be used to iterate over the lines in this
  62. file. This iterator happens to return the file itself, since a file is
  63. its own iterator.
  64. :raises: ``ValueError`` -- if the file is closed.
  65. """
  66. if self._closed:
  67. raise ValueError("I/O operation on closed file")
  68. return self
  69. def close(self):
  70. """
  71. Close the file. Future read and write operations will fail.
  72. """
  73. self.flush()
  74. self._closed = True
  75. def flush(self):
  76. """
  77. Write out any data in the write buffer. This may do nothing if write
  78. buffering is not turned on.
  79. """
  80. self._write_all(self._wbuffer.getvalue())
  81. self._wbuffer = BytesIO()
  82. return
  83. if PY2:
  84. def next(self):
  85. """
  86. Returns the next line from the input, or raises
  87. ``StopIteration`` when EOF is hit. Unlike Python file
  88. objects, it's okay to mix calls to `next` and `readline`.
  89. :raises: ``StopIteration`` -- when the end of the file is reached.
  90. :returns: a line (`str`) read from the file.
  91. """
  92. line = self.readline()
  93. if not line:
  94. raise StopIteration
  95. return line
  96. else:
  97. def __next__(self):
  98. """
  99. Returns the next line from the input, or raises ``StopIteration``
  100. when EOF is hit. Unlike python file objects, it's okay to mix
  101. calls to `.next` and `.readline`.
  102. :raises: ``StopIteration`` -- when the end of the file is reached.
  103. :returns: a line (`str`) read from the file.
  104. """
  105. line = self.readline()
  106. if not line:
  107. raise StopIteration
  108. return line
  109. def readable(self):
  110. """
  111. Check if the file can be read from.
  112. :returns:
  113. `True` if the file can be read from. If `False`, `read` will raise
  114. an exception.
  115. """
  116. return (self._flags & self.FLAG_READ) == self.FLAG_READ
  117. def writable(self):
  118. """
  119. Check if the file can be written to.
  120. :returns:
  121. `True` if the file can be written to. If `False`, `write` will
  122. raise an exception.
  123. """
  124. return (self._flags & self.FLAG_WRITE) == self.FLAG_WRITE
  125. def seekable(self):
  126. """
  127. Check if the file supports random access.
  128. :returns:
  129. `True` if the file supports random access. If `False`, `seek` will
  130. raise an exception.
  131. """
  132. return False
  133. def readinto(self, buff):
  134. """
  135. Read up to ``len(buff)`` bytes into ``bytearray`` *buff* and return the
  136. number of bytes read.
  137. :returns:
  138. The number of bytes read.
  139. """
  140. data = self.read(len(buff))
  141. buff[: len(data)] = data
  142. return len(data)
  143. def read(self, size=None):
  144. """
  145. Read at most ``size`` bytes from the file (less if we hit the end of
  146. the file first). If the ``size`` argument is negative or omitted,
  147. read all the remaining data in the file.
  148. .. note::
  149. ``'b'`` mode flag is ignored (``self.FLAG_BINARY`` in
  150. ``self._flags``), because SSH treats all files as binary, since we
  151. have no idea what encoding the file is in, or even if the file is
  152. text data.
  153. :param int size: maximum number of bytes to read
  154. :returns:
  155. data read from the file (as bytes), or an empty string if EOF was
  156. encountered immediately
  157. """
  158. if self._closed:
  159. raise IOError("File is closed")
  160. if not (self._flags & self.FLAG_READ):
  161. raise IOError("File is not open for reading")
  162. if (size is None) or (size < 0):
  163. # go for broke
  164. result = bytearray(self._rbuffer)
  165. self._rbuffer = bytes()
  166. self._pos += len(result)
  167. while True:
  168. try:
  169. new_data = self._read(self._DEFAULT_BUFSIZE)
  170. except EOFError:
  171. new_data = None
  172. if (new_data is None) or (len(new_data) == 0):
  173. break
  174. result.extend(new_data)
  175. self._realpos += len(new_data)
  176. self._pos += len(new_data)
  177. return bytes(result)
  178. if size <= len(self._rbuffer):
  179. result = self._rbuffer[:size]
  180. self._rbuffer = self._rbuffer[size:]
  181. self._pos += len(result)
  182. return result
  183. while len(self._rbuffer) < size:
  184. read_size = size - len(self._rbuffer)
  185. if self._flags & self.FLAG_BUFFERED:
  186. read_size = max(self._bufsize, read_size)
  187. try:
  188. new_data = self._read(read_size)
  189. except EOFError:
  190. new_data = None
  191. if (new_data is None) or (len(new_data) == 0):
  192. break
  193. self._rbuffer += new_data
  194. self._realpos += len(new_data)
  195. result = self._rbuffer[:size]
  196. self._rbuffer = self._rbuffer[size:]
  197. self._pos += len(result)
  198. return result
  199. def readline(self, size=None):
  200. """
  201. Read one entire line from the file. A trailing newline character is
  202. kept in the string (but may be absent when a file ends with an
  203. incomplete line). If the size argument is present and non-negative, it
  204. is a maximum byte count (including the trailing newline) and an
  205. incomplete line may be returned. An empty string is returned only when
  206. EOF is encountered immediately.
  207. .. note::
  208. Unlike stdio's ``fgets``, the returned string contains null
  209. characters (``'\\0'``) if they occurred in the input.
  210. :param int size: maximum length of returned string.
  211. :returns:
  212. next line of the file, or an empty string if the end of the
  213. file has been reached.
  214. If the file was opened in binary (``'b'``) mode: bytes are returned
  215. Else: the encoding of the file is assumed to be UTF-8 and character
  216. strings (`str`) are returned
  217. """
  218. # it's almost silly how complex this function is.
  219. if self._closed:
  220. raise IOError("File is closed")
  221. if not (self._flags & self.FLAG_READ):
  222. raise IOError("File not open for reading")
  223. line = self._rbuffer
  224. truncated = False
  225. while True:
  226. if (
  227. self._at_trailing_cr
  228. and self._flags & self.FLAG_UNIVERSAL_NEWLINE
  229. and len(line) > 0
  230. ):
  231. # edge case: the newline may be '\r\n' and we may have read
  232. # only the first '\r' last time.
  233. if line[0] == linefeed_byte_value:
  234. line = line[1:]
  235. self._record_newline(crlf)
  236. else:
  237. self._record_newline(cr_byte)
  238. self._at_trailing_cr = False
  239. # check size before looking for a linefeed, in case we already have
  240. # enough.
  241. if (size is not None) and (size >= 0):
  242. if len(line) >= size:
  243. # truncate line
  244. self._rbuffer = line[size:]
  245. line = line[:size]
  246. truncated = True
  247. break
  248. n = size - len(line)
  249. else:
  250. n = self._bufsize
  251. if linefeed_byte in line or (
  252. self._flags & self.FLAG_UNIVERSAL_NEWLINE and cr_byte in line
  253. ):
  254. break
  255. try:
  256. new_data = self._read(n)
  257. except EOFError:
  258. new_data = None
  259. if (new_data is None) or (len(new_data) == 0):
  260. self._rbuffer = bytes()
  261. self._pos += len(line)
  262. return line if self._flags & self.FLAG_BINARY else u(line)
  263. line += new_data
  264. self._realpos += len(new_data)
  265. # find the newline
  266. pos = line.find(linefeed_byte)
  267. if self._flags & self.FLAG_UNIVERSAL_NEWLINE:
  268. rpos = line.find(cr_byte)
  269. if (rpos >= 0) and (rpos < pos or pos < 0):
  270. pos = rpos
  271. if pos == -1:
  272. # we couldn't find a newline in the truncated string, return it
  273. self._pos += len(line)
  274. return line if self._flags & self.FLAG_BINARY else u(line)
  275. xpos = pos + 1
  276. if (
  277. line[pos] == cr_byte_value
  278. and xpos < len(line)
  279. and line[xpos] == linefeed_byte_value
  280. ):
  281. xpos += 1
  282. # if the string was truncated, _rbuffer needs to have the string after
  283. # the newline character plus the truncated part of the line we stored
  284. # earlier in _rbuffer
  285. if truncated:
  286. self._rbuffer = line[xpos:] + self._rbuffer
  287. else:
  288. self._rbuffer = line[xpos:]
  289. lf = line[pos:xpos]
  290. line = line[:pos] + linefeed_byte
  291. if (len(self._rbuffer) == 0) and (lf == cr_byte):
  292. # we could read the line up to a '\r' and there could still be a
  293. # '\n' following that we read next time. note that and eat it.
  294. self._at_trailing_cr = True
  295. else:
  296. self._record_newline(lf)
  297. self._pos += len(line)
  298. return line if self._flags & self.FLAG_BINARY else u(line)
  299. def readlines(self, sizehint=None):
  300. """
  301. Read all remaining lines using `readline` and return them as a list.
  302. If the optional ``sizehint`` argument is present, instead of reading up
  303. to EOF, whole lines totalling approximately sizehint bytes (possibly
  304. after rounding up to an internal buffer size) are read.
  305. :param int sizehint: desired maximum number of bytes to read.
  306. :returns: list of lines read from the file.
  307. """
  308. lines = []
  309. byte_count = 0
  310. while True:
  311. line = self.readline()
  312. if len(line) == 0:
  313. break
  314. lines.append(line)
  315. byte_count += len(line)
  316. if (sizehint is not None) and (byte_count >= sizehint):
  317. break
  318. return lines
  319. def seek(self, offset, whence=0):
  320. """
  321. Set the file's current position, like stdio's ``fseek``. Not all file
  322. objects support seeking.
  323. .. note::
  324. If a file is opened in append mode (``'a'`` or ``'a+'``), any seek
  325. operations will be undone at the next write (as the file position
  326. will move back to the end of the file).
  327. :param int offset:
  328. position to move to within the file, relative to ``whence``.
  329. :param int whence:
  330. type of movement: 0 = absolute; 1 = relative to the current
  331. position; 2 = relative to the end of the file.
  332. :raises: ``IOError`` -- if the file doesn't support random access.
  333. """
  334. raise IOError("File does not support seeking.")
  335. def tell(self):
  336. """
  337. Return the file's current position. This may not be accurate or
  338. useful if the underlying file doesn't support random access, or was
  339. opened in append mode.
  340. :returns: file position (`number <int>` of bytes).
  341. """
  342. return self._pos
  343. def write(self, data):
  344. """
  345. Write data to the file. If write buffering is on (``bufsize`` was
  346. specified and non-zero), some or all of the data may not actually be
  347. written yet. (Use `flush` or `close` to force buffered data to be
  348. written out.)
  349. :param data: ``str``/``bytes`` data to write
  350. """
  351. if isinstance(data, text_type):
  352. # Accept text and encode as utf-8 for compatibility only.
  353. data = data.encode("utf-8")
  354. if self._closed:
  355. raise IOError("File is closed")
  356. if not (self._flags & self.FLAG_WRITE):
  357. raise IOError("File not open for writing")
  358. if not (self._flags & self.FLAG_BUFFERED):
  359. self._write_all(data)
  360. return
  361. self._wbuffer.write(data)
  362. if self._flags & self.FLAG_LINE_BUFFERED:
  363. # only scan the new data for linefeed, to avoid wasting time.
  364. last_newline_pos = data.rfind(linefeed_byte)
  365. if last_newline_pos >= 0:
  366. wbuf = self._wbuffer.getvalue()
  367. last_newline_pos += len(wbuf) - len(data)
  368. self._write_all(wbuf[: last_newline_pos + 1])
  369. self._wbuffer = BytesIO()
  370. self._wbuffer.write(wbuf[last_newline_pos + 1 :])
  371. return
  372. # even if we're line buffering, if the buffer has grown past the
  373. # buffer size, force a flush.
  374. if self._wbuffer.tell() >= self._bufsize:
  375. self.flush()
  376. return
  377. def writelines(self, sequence):
  378. """
  379. Write a sequence of strings to the file. The sequence can be any
  380. iterable object producing strings, typically a list of strings. (The
  381. name is intended to match `readlines`; `writelines` does not add line
  382. separators.)
  383. :param sequence: an iterable sequence of strings.
  384. """
  385. for line in sequence:
  386. self.write(line)
  387. return
  388. def xreadlines(self):
  389. """
  390. Identical to ``iter(f)``. This is a deprecated file interface that
  391. predates Python iterator support.
  392. """
  393. return self
  394. @property
  395. def closed(self):
  396. return self._closed
  397. # ...overrides...
  398. def _read(self, size):
  399. """
  400. (subclass override)
  401. Read data from the stream. Return ``None`` or raise ``EOFError`` to
  402. indicate EOF.
  403. """
  404. raise EOFError()
  405. def _write(self, data):
  406. """
  407. (subclass override)
  408. Write data into the stream.
  409. """
  410. raise IOError("write not implemented")
  411. def _get_size(self):
  412. """
  413. (subclass override)
  414. Return the size of the file. This is called from within `_set_mode`
  415. if the file is opened in append mode, so the file position can be
  416. tracked and `seek` and `tell` will work correctly. If the file is
  417. a stream that can't be randomly accessed, you don't need to override
  418. this method,
  419. """
  420. return 0
  421. # ...internals...
  422. def _set_mode(self, mode="r", bufsize=-1):
  423. """
  424. Subclasses call this method to initialize the BufferedFile.
  425. """
  426. # set bufsize in any event, because it's used for readline().
  427. self._bufsize = self._DEFAULT_BUFSIZE
  428. if bufsize < 0:
  429. # do no buffering by default, because otherwise writes will get
  430. # buffered in a way that will probably confuse people.
  431. bufsize = 0
  432. if bufsize == 1:
  433. # apparently, line buffering only affects writes. reads are only
  434. # buffered if you call readline (directly or indirectly: iterating
  435. # over a file will indirectly call readline).
  436. self._flags |= self.FLAG_BUFFERED | self.FLAG_LINE_BUFFERED
  437. elif bufsize > 1:
  438. self._bufsize = bufsize
  439. self._flags |= self.FLAG_BUFFERED
  440. self._flags &= ~self.FLAG_LINE_BUFFERED
  441. elif bufsize == 0:
  442. # unbuffered
  443. self._flags &= ~(self.FLAG_BUFFERED | self.FLAG_LINE_BUFFERED)
  444. if ("r" in mode) or ("+" in mode):
  445. self._flags |= self.FLAG_READ
  446. if ("w" in mode) or ("+" in mode):
  447. self._flags |= self.FLAG_WRITE
  448. if "a" in mode:
  449. self._flags |= self.FLAG_WRITE | self.FLAG_APPEND
  450. self._size = self._get_size()
  451. self._pos = self._realpos = self._size
  452. if "b" in mode:
  453. self._flags |= self.FLAG_BINARY
  454. if "U" in mode:
  455. self._flags |= self.FLAG_UNIVERSAL_NEWLINE
  456. # built-in file objects have this attribute to store which kinds of
  457. # line terminations they've seen:
  458. # <http://www.python.org/doc/current/lib/built-in-funcs.html>
  459. self.newlines = None
  460. def _write_all(self, raw_data):
  461. # the underlying stream may be something that does partial writes (like
  462. # a socket).
  463. data = memoryview(raw_data)
  464. while len(data) > 0:
  465. count = self._write(data)
  466. data = data[count:]
  467. if self._flags & self.FLAG_APPEND:
  468. self._size += count
  469. self._pos = self._realpos = self._size
  470. else:
  471. self._pos += count
  472. self._realpos += count
  473. return None
  474. def _record_newline(self, newline):
  475. # silliness about tracking what kinds of newlines we've seen.
  476. # i don't understand why it can be None, a string, or a tuple, instead
  477. # of just always being a tuple, but we'll emulate that behavior anyway.
  478. if not (self._flags & self.FLAG_UNIVERSAL_NEWLINE):
  479. return
  480. if self.newlines is None:
  481. self.newlines = newline
  482. elif self.newlines != newline and isinstance(
  483. self.newlines, bytes_types
  484. ):
  485. self.newlines = (self.newlines, newline)
  486. elif newline not in self.newlines:
  487. self.newlines += (newline,)