text.js 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. /*
  2. * Copyright 2017 Sam Thorogood. All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  5. * use this file except in compliance with the License. You may obtain a copy of
  6. * the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. * License for the specific language governing permissions and limitations under
  14. * the License.
  15. */
  16. /**
  17. * @fileoverview Polyfill for TextEncoder and TextDecoder.
  18. *
  19. * You probably want `text.min.js`, and not this file directly.
  20. */
  21. (function(scope) {
  22. 'use strict';
  23. // fail early
  24. if (scope['TextEncoder'] && scope['TextDecoder']) {
  25. return false;
  26. }
  27. // used for FastTextDecoder
  28. const validUtfLabels = ['utf-8', 'utf8', 'unicode-1-1-utf-8'];
  29. /**
  30. * @constructor
  31. */
  32. function FastTextEncoder() {
  33. // This does not accept an encoding, and always uses UTF-8:
  34. // https://www.w3.org/TR/encoding/#dom-textencoder
  35. }
  36. Object.defineProperty(FastTextEncoder.prototype, 'encoding', {value: 'utf-8'});
  37. /**
  38. * @param {string} string
  39. * @param {{stream: boolean}=} options
  40. * @return {!Uint8Array}
  41. */
  42. FastTextEncoder.prototype['encode'] = function(string, options={stream: false}) {
  43. if (options.stream) {
  44. throw new Error(`Failed to encode: the 'stream' option is unsupported.`);
  45. }
  46. let pos = 0;
  47. const len = string.length;
  48. let at = 0; // output position
  49. let tlen = Math.max(32, len + (len >>> 1) + 7); // 1.5x size
  50. let target = new Uint8Array((tlen >>> 3) << 3); // ... but at 8 byte offset
  51. while (pos < len) {
  52. let value = string.charCodeAt(pos++);
  53. if (value >= 0xd800 && value <= 0xdbff) {
  54. // high surrogate
  55. if (pos < len) {
  56. const extra = string.charCodeAt(pos);
  57. if ((extra & 0xfc00) === 0xdc00) {
  58. ++pos;
  59. value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000;
  60. }
  61. }
  62. if (value >= 0xd800 && value <= 0xdbff) {
  63. continue; // drop lone surrogate
  64. }
  65. }
  66. // expand the buffer if we couldn't write 4 bytes
  67. if (at + 4 > target.length) {
  68. tlen += 8; // minimum extra
  69. tlen *= (1.0 + (pos / string.length) * 2); // take 2x the remaining
  70. tlen = (tlen >>> 3) << 3; // 8 byte offset
  71. const update = new Uint8Array(tlen);
  72. update.set(target);
  73. target = update;
  74. }
  75. if ((value & 0xffffff80) === 0) { // 1-byte
  76. target[at++] = value; // ASCII
  77. continue;
  78. } else if ((value & 0xfffff800) === 0) { // 2-byte
  79. target[at++] = ((value >>> 6) & 0x1f) | 0xc0;
  80. } else if ((value & 0xffff0000) === 0) { // 3-byte
  81. target[at++] = ((value >>> 12) & 0x0f) | 0xe0;
  82. target[at++] = ((value >>> 6) & 0x3f) | 0x80;
  83. } else if ((value & 0xffe00000) === 0) { // 4-byte
  84. target[at++] = ((value >>> 18) & 0x07) | 0xf0;
  85. target[at++] = ((value >>> 12) & 0x3f) | 0x80;
  86. target[at++] = ((value >>> 6) & 0x3f) | 0x80;
  87. } else {
  88. continue; // out of range
  89. }
  90. target[at++] = (value & 0x3f) | 0x80;
  91. }
  92. // Use subarray if slice isn't supported (IE11). This will use more memory
  93. // because the original array still exists.
  94. return target.slice ? target.slice(0, at) : target.subarray(0, at);
  95. }
  96. /**
  97. * @constructor
  98. * @param {string=} utfLabel
  99. * @param {{fatal: boolean}=} options
  100. */
  101. function FastTextDecoder(utfLabel='utf-8', options={fatal: false}) {
  102. if (validUtfLabels.indexOf(utfLabel.toLowerCase()) === -1) {
  103. throw new RangeError(
  104. `Failed to construct 'TextDecoder': The encoding label provided ('${utfLabel}') is invalid.`);
  105. }
  106. if (options.fatal) {
  107. throw new Error(`Failed to construct 'TextDecoder': the 'fatal' option is unsupported.`);
  108. }
  109. }
  110. Object.defineProperty(FastTextDecoder.prototype, 'encoding', {value: 'utf-8'});
  111. Object.defineProperty(FastTextDecoder.prototype, 'fatal', {value: false});
  112. Object.defineProperty(FastTextDecoder.prototype, 'ignoreBOM', {value: false});
  113. /**
  114. * @param {!Uint8Array} bytes
  115. * @return {string}
  116. */
  117. function decodeBuffer(bytes) {
  118. return Buffer.from(bytes.buffer, bytes.byteOffset, bytes.byteLength).toString('utf-8');
  119. }
  120. /**
  121. * @param {!Uint8Array} bytes
  122. * @return {string}
  123. */
  124. function decodeSyncXHR(bytes) {
  125. const b = new Blob([bytes], {type: 'text/plain;charset=UTF-8'});
  126. const u = URL.createObjectURL(b);
  127. // This hack will fail in non-Edgium Edge because sync XHRs are disabled (and
  128. // possibly in other places), so ensure there's a fallback call.
  129. try {
  130. const x = new XMLHttpRequest();
  131. x.open('GET', u, false);
  132. x.send();
  133. return x.responseText;
  134. } catch (e) {
  135. return decodeFallback(bytes);
  136. } finally {
  137. URL.revokeObjectURL(u);
  138. }
  139. }
  140. /**
  141. * @param {!Uint8Array} bytes
  142. * @return {string}
  143. */
  144. function decodeFallback(bytes) {
  145. let inputIndex = 0;
  146. // Create a working buffer for UTF-16 code points, but don't generate one
  147. // which is too large for small input sizes. UTF-8 to UCS-16 conversion is
  148. // going to be at most 1:1, if all code points are ASCII. The other extreme
  149. // is 4-byte UTF-8, which results in two UCS-16 points, but this is still 50%
  150. // fewer entries in the output.
  151. const pendingSize = Math.min(256 * 256, bytes.length + 1);
  152. const pending = new Uint16Array(pendingSize);
  153. const chunks = [];
  154. let pendingIndex = 0;
  155. for (;;) {
  156. const more = inputIndex < bytes.length;
  157. // If there's no more data or there'd be no room for two UTF-16 values,
  158. // create a chunk. This isn't done at the end by simply slicing the data
  159. // into equal sized chunks as we might hit a surrogate pair.
  160. if (!more || (pendingIndex >= pendingSize - 1)) {
  161. // nb. .apply and friends are *really slow*. Low-hanging fruit is to
  162. // expand this to literally pass pending[0], pending[1], ... etc, but
  163. // the output code expands pretty fast in this case.
  164. chunks.push(String.fromCharCode.apply(null, pending.subarray(0, pendingIndex)));
  165. if (!more) {
  166. return chunks.join('');
  167. }
  168. // Move the buffer forward and create another chunk.
  169. bytes = bytes.subarray(inputIndex);
  170. inputIndex = 0;
  171. pendingIndex = 0;
  172. }
  173. // The native TextDecoder will generate "REPLACEMENT CHARACTER" where the
  174. // input data is invalid. Here, we blindly parse the data even if it's
  175. // wrong: e.g., if a 3-byte sequence doesn't have two valid continuations.
  176. const byte1 = bytes[inputIndex++];
  177. if ((byte1 & 0x80) === 0) { // 1-byte or null
  178. pending[pendingIndex++] = byte1;
  179. } else if ((byte1 & 0xe0) === 0xc0) { // 2-byte
  180. const byte2 = bytes[inputIndex++] & 0x3f;
  181. pending[pendingIndex++] = ((byte1 & 0x1f) << 6) | byte2;
  182. } else if ((byte1 & 0xf0) === 0xe0) { // 3-byte
  183. const byte2 = bytes[inputIndex++] & 0x3f;
  184. const byte3 = bytes[inputIndex++] & 0x3f;
  185. pending[pendingIndex++] = ((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3;
  186. } else if ((byte1 & 0xf8) === 0xf0) { // 4-byte
  187. const byte2 = bytes[inputIndex++] & 0x3f;
  188. const byte3 = bytes[inputIndex++] & 0x3f;
  189. const byte4 = bytes[inputIndex++] & 0x3f;
  190. // this can be > 0xffff, so possibly generate surrogates
  191. let codepoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4;
  192. if (codepoint > 0xffff) {
  193. // codepoint &= ~0x10000;
  194. codepoint -= 0x10000;
  195. pending[pendingIndex++] = (codepoint >>> 10) & 0x3ff | 0xd800;
  196. codepoint = 0xdc00 | codepoint & 0x3ff;
  197. }
  198. pending[pendingIndex++] = codepoint;
  199. } else {
  200. // invalid initial byte
  201. }
  202. }
  203. }
  204. // Decoding a string is pretty slow, but use alternative options where possible.
  205. let decodeImpl = decodeFallback;
  206. if (typeof Buffer === 'function' && Buffer.from) {
  207. // Buffer.from was added in Node v5.10.0 (2015-11-17).
  208. decodeImpl = decodeBuffer;
  209. } else if (typeof Blob === 'function' && typeof URL === 'function' && typeof URL.createObjectURL === 'function') {
  210. // Blob and URL.createObjectURL are available from IE10, Safari 6, Chrome 19
  211. // (all released in 2012), Firefox 19 (2013), ...
  212. decodeImpl = decodeSyncXHR;
  213. }
  214. /**
  215. * @param {(!ArrayBuffer|!ArrayBufferView)} buffer
  216. * @param {{stream: boolean}=} options
  217. * @return {string}
  218. */
  219. FastTextDecoder.prototype['decode'] = function(buffer, options={stream: false}) {
  220. if (options['stream']) {
  221. throw new Error(`Failed to decode: the 'stream' option is unsupported.`);
  222. }
  223. let bytes;
  224. if (buffer instanceof Uint8Array) {
  225. // Accept Uint8Array instances as-is.
  226. bytes = buffer;
  227. } else if (buffer.buffer instanceof ArrayBuffer) {
  228. // Look for ArrayBufferView, which isn't a real type, but basically
  229. // represents all the valid TypedArray types plus DataView. They all have
  230. // ".buffer" as an instance of ArrayBuffer.
  231. bytes = new Uint8Array(buffer.buffer);
  232. } else {
  233. // The only other valid argument here is that "buffer" is an ArrayBuffer.
  234. // We also try to convert anything else passed to a Uint8Array, as this
  235. // catches anything that's array-like. Native code would throw here.
  236. bytes = new Uint8Array(buffer);
  237. }
  238. return decodeImpl(/** @type {!Uint8Array} */ (bytes));
  239. }
  240. scope['TextEncoder'] = FastTextEncoder;
  241. scope['TextDecoder'] = FastTextDecoder;
  242. }(typeof window !== 'undefined' ? window : (typeof global !== 'undefined' ? global : this)));