moparser.js 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. 'use strict';
  2. var encoding = require('encoding');
  3. var sharedFuncs = require('./shared');
  4. /**
  5. * Parses a binary MO object into translation table
  6. *
  7. * @param {Buffer} buffer Binary MO object
  8. * @param {String} [defaultCharset] Default charset to use
  9. * @return {Object} Translation object
  10. */
  11. module.exports = function (buffer, defaultCharset) {
  12. var parser = new Parser(buffer, defaultCharset);
  13. return parser.parse();
  14. };
  15. /**
  16. * Creates a MO parser object.
  17. *
  18. * @constructor
  19. * @param {Buffer} fileContents Binary MO object
  20. * @param {String} [defaultCharset] Default charset to use
  21. */
  22. function Parser (fileContents, defaultCharset) {
  23. this._fileContents = fileContents;
  24. /**
  25. * Method name for writing int32 values, default littleendian
  26. */
  27. this._writeFunc = 'writeUInt32LE';
  28. /**
  29. * Method name for reading int32 values, default littleendian
  30. */
  31. this._readFunc = 'readUInt32LE';
  32. this._charset = defaultCharset || 'iso-8859-1';
  33. this._table = {
  34. charset: this._charset,
  35. headers: undefined,
  36. translations: {}
  37. };
  38. }
  39. /**
  40. * Magic constant to check the endianness of the input file
  41. */
  42. Parser.prototype.MAGIC = 0x950412de;
  43. /**
  44. * Checks if number values in the input file are in big- or littleendian format.
  45. *
  46. * @return {Boolean} Return true if magic was detected
  47. */
  48. Parser.prototype._checkMagick = function () {
  49. if (this._fileContents.readUInt32LE(0) === this.MAGIC) {
  50. this._readFunc = 'readUInt32LE';
  51. this._writeFunc = 'writeUInt32LE';
  52. return true;
  53. } else if (this._fileContents.readUInt32BE(0) === this.MAGIC) {
  54. this._readFunc = 'readUInt32BE';
  55. this._writeFunc = 'writeUInt32BE';
  56. return true;
  57. } else {
  58. return false;
  59. }
  60. };
  61. /**
  62. * Read the original strings and translations from the input MO file. Use the
  63. * first translation string in the file as the header.
  64. */
  65. Parser.prototype._loadTranslationTable = function () {
  66. var offsetOriginals = this._offsetOriginals;
  67. var offsetTranslations = this._offsetTranslations;
  68. var position;
  69. var length;
  70. var msgid;
  71. var msgstr;
  72. for (var i = 0; i < this._total; i++) {
  73. // msgid string
  74. length = this._fileContents[this._readFunc](offsetOriginals);
  75. offsetOriginals += 4;
  76. position = this._fileContents[this._readFunc](offsetOriginals);
  77. offsetOriginals += 4;
  78. msgid = this._fileContents.slice(position, position + length);
  79. // matching msgstr
  80. length = this._fileContents[this._readFunc](offsetTranslations);
  81. offsetTranslations += 4;
  82. position = this._fileContents[this._readFunc](offsetTranslations);
  83. offsetTranslations += 4;
  84. msgstr = this._fileContents.slice(position, position + length);
  85. if (!i && !msgid.toString()) {
  86. this._handleCharset(msgstr);
  87. }
  88. msgid = encoding.convert(msgid, 'utf-8', this._charset).toString('utf-8');
  89. msgstr = encoding.convert(msgstr, 'utf-8', this._charset).toString('utf-8');
  90. this._addString(msgid, msgstr);
  91. }
  92. // dump the file contents object
  93. this._fileContents = null;
  94. };
  95. /**
  96. * Detects charset for MO strings from the header
  97. *
  98. * @param {Buffer} headers Header value
  99. */
  100. Parser.prototype._handleCharset = function (headers) {
  101. var headersStr = headers.toString();
  102. var match;
  103. if ((match = headersStr.match(/[; ]charset\s*=\s*([\w-]+)/i))) {
  104. this._charset = this._table.charset = sharedFuncs.formatCharset(match[1], this._charset);
  105. }
  106. headers = encoding.convert(headers, 'utf-8', this._charset).toString('utf-8');
  107. this._table.headers = sharedFuncs.parseHeader(headers);
  108. };
  109. /**
  110. * Adds a translation to the translation object
  111. *
  112. * @param {String} msgid Original string
  113. * @params {String} msgstr Translation for the original string
  114. */
  115. Parser.prototype._addString = function (msgid, msgstr) {
  116. var translation = {};
  117. var parts;
  118. var msgctxt;
  119. var msgidPlural;
  120. msgid = msgid.split('\u0004');
  121. if (msgid.length > 1) {
  122. msgctxt = msgid.shift();
  123. translation.msgctxt = msgctxt;
  124. } else {
  125. msgctxt = '';
  126. }
  127. msgid = msgid.join('\u0004');
  128. parts = msgid.split('\u0000');
  129. msgid = parts.shift();
  130. translation.msgid = msgid;
  131. if ((msgidPlural = parts.join('\u0000'))) {
  132. translation.msgid_plural = msgidPlural;
  133. }
  134. msgstr = msgstr.split('\u0000');
  135. translation.msgstr = [].concat(msgstr || []);
  136. if (!this._table.translations[msgctxt]) {
  137. this._table.translations[msgctxt] = {};
  138. }
  139. this._table.translations[msgctxt][msgid] = translation;
  140. };
  141. /**
  142. * Parses the MO object and returns translation table
  143. *
  144. * @return {Object} Translation table
  145. */
  146. Parser.prototype.parse = function () {
  147. if (!this._checkMagick()) {
  148. return false;
  149. }
  150. /**
  151. * GetText revision nr, usually 0
  152. */
  153. this._revision = this._fileContents[this._readFunc](4);
  154. /**
  155. * Total count of translated strings
  156. */
  157. this._total = this._fileContents[this._readFunc](8);
  158. /**
  159. * Offset position for original strings table
  160. */
  161. this._offsetOriginals = this._fileContents[this._readFunc](12);
  162. /**
  163. * Offset position for translation strings table
  164. */
  165. this._offsetTranslations = this._fileContents[this._readFunc](16);
  166. // Load translations into this._translationTable
  167. this._loadTranslationTable();
  168. return this._table;
  169. };