diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a9056d3..2d240c8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,15 +3,24 @@ on: - push - pull_request jobs: + build: + name: Build with tsc + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: lts/* + - run: npm install + - run: npx tsc test: - name: Node.js ${{ matrix.node-version }} on ${{ matrix.os }} + name: Test ${{ matrix.node-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - node-version: - - 18 - - 20 + node: [ 18, 20 ] os: - ubuntu-latest - windows-latest @@ -21,4 +30,4 @@ jobs: with: node-version: ${{ matrix.node-version }} - run: npm install - - run: npm test + - run: npm run test:coverage diff --git a/.gitignore b/.gitignore index 61b7f20..ee8dea8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ /node_modules +/lib +/@types npm-debug.log .DS_Store diff --git a/.npmignore b/.npmignore index f3611ac..478b410 100644 --- a/.npmignore +++ b/.npmignore @@ -1,3 +1,4 @@ /test +/src .eslintrc.js .gitignore diff --git a/index.js b/index.js deleted file mode 100644 index fab9b44..0000000 --- a/index.js +++ /dev/null @@ -1,17 +0,0 @@ -import * as poParser from './lib/poparser.js'; -import poCompiler from './lib/pocompiler.js'; -import moParser from './lib/moparser.js'; -import moCompiler from './lib/mocompiler.js'; - -export const po = { - parse: poParser.parse, - createParseStream: poParser.stream, - compile: poCompiler -}; - -export const mo = { - parse: moParser, - compile: moCompiler -}; - -export default { mo, po }; diff --git a/lib/pocompiler.js b/lib/pocompiler.js deleted file mode 100644 index f4baa0a..0000000 --- a/lib/pocompiler.js +++ /dev/null @@ -1,289 +0,0 @@ -import { Buffer } from 'safe-buffer'; -import encoding from 'encoding'; -import { HEADERS, foldLine, compareMsgid, formatCharset, generateHeader } from './shared.js'; -import contentType from 'content-type'; - -/** - * Exposes general compiler function. Takes a translation - * object as a parameter and returns PO object - * - * @param {Object} table Translation object - * @return {Buffer} Compiled PO object - */ -export default function (table, options) { - const compiler = new Compiler(table, options); - - return compiler.compile(); -}; - -/** - * Creates a PO compiler object. - * - * @constructor - * @param {Object} table Translation table to be compiled - */ -function Compiler (table = {}, options = {}) { - this._table = table; - this._options = options; - - this._table.translations = this._table.translations || {}; - - let { headers = {} } = this._table; - - headers = Object.keys(headers).reduce((result, key) => { - const lowerKey = key.toLowerCase(); - - if (HEADERS.has(lowerKey)) { - result[HEADERS.get(lowerKey)] = headers[key]; - } else { - result[key] = headers[key]; - } - - return result; - }, {}); - - this._table.headers = headers; - - if (!('foldLength' in this._options)) { - this._options.foldLength = 76; - } - - if (!('escapeCharacters' in this._options)) { - this._options.escapeCharacters = true; - } - - if (!('sort' in this._options)) { - this._options.sort = false; - } - - if (!('eol' in this._options)) { - this._options.eol = '\n'; - } - - this._translations = []; - - this._handleCharset(); -} - -/** - * Converts a comments object to a comment string. The comment object is - * in the form of {translator:'', reference: '', extracted: '', flag: '', previous:''} - * - * @param {Object} comments A comments object - * @return {String} A comment string for the PO file - */ -Compiler.prototype._drawComments = function (comments) { - const lines = []; - const types = [{ - key: 'translator', - prefix: '# ' - }, { - key: 'reference', - prefix: '#: ' - }, { - key: 'extracted', - prefix: '#. ' - }, { - key: 'flag', - prefix: '#, ' - }, { - key: 'previous', - prefix: '#| ' - }]; - - types.forEach(type => { - if (!comments[type.key]) { - return; - } - - comments[type.key].split(/\r?\n|\r/).forEach(line => { - lines.push(`${type.prefix}${line}`); - }); - }); - - return lines.join(this._options.eol); -}; - -/** - * Builds a PO string for a single translation object - * - * @param {Object} block Translation object - * @param {Object} [override] Properties of this object will override `block` properties - * @param {boolean} [obsolete] Block is obsolete and must be commented out - * @return {String} Translation string for a single object - */ -Compiler.prototype._drawBlock = function (block, override = {}, obsolete = false) { - const response = []; - const msgctxt = override.msgctxt || block.msgctxt; - const msgid = override.msgid || block.msgid; - const msgidPlural = override.msgid_plural || block.msgid_plural; - const msgstr = [].concat(override.msgstr || block.msgstr); - let comments = override.comments || block.comments; - - // add comments - if (comments && (comments = this._drawComments(comments))) { - response.push(comments); - } - - if (msgctxt) { - response.push(this._addPOString('msgctxt', msgctxt, obsolete)); - } - - response.push(this._addPOString('msgid', msgid || '', obsolete)); - - if (msgidPlural) { - response.push(this._addPOString('msgid_plural', msgidPlural, obsolete)); - - msgstr.forEach((msgstr, i) => { - response.push(this._addPOString(`msgstr[${i}]`, msgstr || '', obsolete)); - }); - } else { - response.push(this._addPOString('msgstr', msgstr[0] || '', obsolete)); - } - - return response.join(this._options.eol); -}; - -/** - * Escapes and joins a key and a value for the PO string - * - * @param {String} key Key name - * @param {String} value Key value - * @param {boolean} [obsolete] PO string is obsolete and must be commented out - * @return {String} Joined and escaped key-value pair - */ -Compiler.prototype._addPOString = function (key = '', value = '', obsolete = false) { - key = key.toString(); - if (obsolete) { - key = '#~ ' + key; - } - - let { foldLength, eol, escapeCharacters } = this._options; - - // escape newlines and quotes - if (escapeCharacters) { - value = value.toString() - .replace(/\\/g, '\\\\') - .replace(/"/g, '\\"') - .replace(/\t/g, '\\t') - .replace(/\r/g, '\\r'); - } - - value = value.replace(/\n/g, '\\n'); // need to escape new line characters regardless - - let lines = [value]; - - if (obsolete) { - eol = eol + '#~ '; - } - - if (foldLength > 0) { - lines = foldLine(value, foldLength); - } else { - // split only on new lines - if (escapeCharacters) { - lines = value.split(/\\n/g); - for (let i = 0; i < lines.length - 1; i++) { - lines[i] = `${lines[i]}\\n`; - } - if (lines.length && lines[lines.length - 1] === '') { - lines.splice(-1, 1); - } - } - } - - if (lines.length < 2) { - return `${key} "${lines.shift() || ''}"`; - } - - return `${key} ""${eol}"${lines.join(`"${eol}"`)}"`; -}; - -/** - * Handles header values, replaces or adds (if needed) a charset property - */ -Compiler.prototype._handleCharset = function () { - const ct = contentType.parse(this._table.headers['Content-Type'] || 'text/plain'); - - const charset = formatCharset(this._table.charset || ct.parameters.charset || 'utf-8'); - - // clean up content-type charset independently using fallback if missing - if (ct.parameters.charset) { - ct.parameters.charset = formatCharset(ct.parameters.charset); - } - - this._table.charset = charset; - this._table.headers['Content-Type'] = contentType.format(ct); -}; - -/** - * Flatten and sort translations object - * - * @param {Object} section Object to be prepared (translations or obsolete) - * @returns {Array} Prepared array - */ -Compiler.prototype._prepareSection = function (section) { - let response = []; - - Object.keys(section).forEach(msgctxt => { - if (typeof section[msgctxt] !== 'object') { - return; - } - - Object.keys(section[msgctxt]).forEach(msgid => { - if (typeof section[msgctxt][msgid] !== 'object') { - return; - } - - if (msgctxt === '' && msgid === '') { - return; - } - - response.push(section[msgctxt][msgid]); - }); - }); - - const { sort } = this._options; - - if (sort !== false) { - if (typeof sort === 'function') { - response = response.sort(sort); - } else { - response = response.sort(compareMsgid); - } - } - - return response; -}; - -/** - * Compiles translation object into a PO object - * - * @return {Buffer} Compiled PO object - */ -Compiler.prototype.compile = function () { - const headerBlock = (this._table.translations[''] && this._table.translations['']['']) || {}; - let response = []; - - const translations = this._prepareSection(this._table.translations); - response = translations.map(r => this._drawBlock(r)); - - if (typeof this._table.obsolete === 'object') { - const obsolete = this._prepareSection(this._table.obsolete); - if (obsolete.length) { - response = response.concat(obsolete.map(r => this._drawBlock(r, {}, true))); - } - } - - const { eol } = this._options; - - response.unshift(this._drawBlock(headerBlock, { - msgstr: generateHeader(this._table.headers) - })); - - if (this._table.charset === 'utf-8' || this._table.charset === 'ascii') { - return Buffer.from(response.join(eol + eol) + eol, 'utf-8'); - } - - return encoding.convert(response.join(eol + eol) + eol, this._table.charset); -}; diff --git a/package.json b/package.json index d01f014..2bd36fe 100644 --- a/package.json +++ b/package.json @@ -18,28 +18,35 @@ "node": ">=18" }, "scripts": { - "lint": "eslint lib/*.js test/*.js index.js", + "lint": "eslint src/*.js test/*.js", "test-generate-mo": "msgfmt test/fixtures/latin13.po -o test/fixtures/latin13.mo & msgfmt test/fixtures/utf8.po -o test/fixtures/utf8.mo & msgfmt test/fixtures/obsolete.po -o test/fixtures/obsolete.mo", "test": "mocha", + "test:coverage": "npx c8 --check-coverage npm run test", "preversion": "npm run lint && npm test", - "postversion": "git push && git push --tags" + "postversion": "git push && git push --tags", + "prepublishOnly": "npm i && tsc && npm run lint && npm run test" }, - "main": "./index.js", + "main": "./lib/index.js", + "types": "./lib/index.d.ts", "license": "MIT", "dependencies": { "content-type": "^1.0.5", "encoding": "^0.1.13", - "readable-stream": "^4.5.2", - "safe-buffer": "^5.2.1" + "readable-stream": "^4.5.2" }, "devDependencies": { + "@types/chai": "latest", + "@types/content-type": "^1.1.8", + "@types/mocha": "latest", + "@types/readable-stream": "^4.0.11", "chai": "^5.0.3", "eslint": "^8.56.0", "eslint-config-standard": "^17.1.0", "eslint-plugin-import": "^2.29.1", "eslint-plugin-n": "^16.6.2", "eslint-plugin-promise": "^6.1.1", - "mocha": "^10.3.0" + "mocha": "^10.4.0", + "typescript": "^5.4.5" }, "keywords": [ "i18n", diff --git a/src/index.js b/src/index.js new file mode 100644 index 0000000..fe34104 --- /dev/null +++ b/src/index.js @@ -0,0 +1,25 @@ +import { poParse, poStream } from './poparser.js'; +import poCompiler from './pocompiler.js'; +import moParser from './moparser.js'; +import moCompiler from './mocompiler.js'; + +/** + * Translation parser and compiler for PO files + * @see https://www.gnu.org/software/gettext/manual/html_node/PO.html + */ +export const po = { + parse: poParse, + createParseStream: poStream, + compile: poCompiler +}; + +/** + * Translation parser and compiler for MO files + * @see https://www.gnu.org/software/gettext/manual/html_node/MO.html + */ +export const mo = { + parse: moParser, + compile: moCompiler +}; + +export default { mo, po }; diff --git a/lib/mocompiler.js b/src/mocompiler.js similarity index 57% rename from lib/mocompiler.js rename to src/mocompiler.js index 9715d63..f538a6d 100644 --- a/lib/mocompiler.js +++ b/src/mocompiler.js @@ -1,81 +1,119 @@ -import { Buffer } from 'safe-buffer'; import encoding from 'encoding'; import { HEADERS, formatCharset, generateHeader, compareMsgid } from './shared.js'; import contentType from 'content-type'; +/** + * @typedef {import('node:stream').Transform} Transform + * @typedef {import('./types.js').GetTextTranslation} GetTextTranslation + * @typedef {import('./types.js').GetTextTranslations} GetTextTranslations + * @typedef {import('./types.js').Translations} Translations + * @typedef {import('./types.js').WriteFunc} WriteFunc + */ + +/** + * @typedef {Object} Size Data about the size of the compiled MO object. + * @property {number} msgid The size of the msgid section. + * @property {number} msgstr The size of the msgstr section. + * @property {number} total The total size of the compiled MO object. + */ + +/** + * @typedef {{ msgid: Buffer, msgstr: Buffer }} TranslationBuffers A translation object partially parsed. + */ + /** * Exposes general compiler function. Takes a translation * object as a parameter and returns binary MO object * - * @param {Object} table Translation object + * @param {GetTextTranslations} table Translation object * @return {Buffer} Compiled binary MO object */ export default function (table) { const compiler = new Compiler(table); return compiler.compile(); -}; +} /** - * Creates a MO compiler object. - * - * @constructor - * @param {Object} table Translation table as defined in the README + * Prepare the header object to be compatible with MO compiler + * @param {Record} headers the headers + * @return {Record} The prepared header */ -function Compiler (table = {}) { - this._table = table; - - let { headers = {}, translations = {} } = this._table; - - headers = Object.keys(headers).reduce((result, key) => { +function prepareMoHeaders (headers) { + return Object.keys(headers).reduce((result, key) => { const lowerKey = key.toLowerCase(); if (HEADERS.has(lowerKey)) { // POT-Creation-Date is removed in MO (see https://savannah.gnu.org/bugs/?49654) if (lowerKey !== 'pot-creation-date') { - result[HEADERS.get(lowerKey)] = headers[key]; + const value = HEADERS.get(lowerKey); + if (value) { + result[value] = headers[key]; + } } } else { result[key] = headers[key]; } return result; - }, {}); + }, /** @type {Record} */ ({})); +} - // filter out empty translations - translations = Object.keys(translations).reduce((result, msgctxt) => { +/** + * Prepare the translation object to be compatible with MO compiler + * @param {Translations} translations + * @return {Translations} + */ +function prepareTranslations (translations) { + return Object.keys(translations).reduce((result, msgctxt) => { const context = translations[msgctxt]; const msgs = Object.keys(context).reduce((result, msgid) => { - const hasTranslation = context[msgid].msgstr.some(item => !!item.length); + const TranslationMsgstr = context[msgid].msgstr; + const hasTranslation = TranslationMsgstr.some(item => !!item.length); if (hasTranslation) { result[msgid] = context[msgid]; } return result; - }, {}); + }, /** @type {Record} */({})); if (Object.keys(msgs).length) { result[msgctxt] = msgs; } return result; - }, {}); + }, /** @type {Translations} */({})); +} - this._table.translations = translations; - this._table.headers = headers; +/** + * Creates a MO compiler object. + * @this {Compiler & Transform} + * + * @param {GetTextTranslations} [table] Translation table as defined in the README + */ +function Compiler (table) { + /** @type {GetTextTranslations} _table The translation table */ + this._table = { + charset: undefined, + translations: prepareTranslations(table?.translations ?? {}), + headers: prepareMoHeaders(table?.headers ?? {}) + }; this._translations = []; - + /** + * @type {WriteFunc} + */ this._writeFunc = 'writeUInt32LE'; this._handleCharset(); -} -/** - * Magic bytes for the generated binary data - */ -Compiler.prototype.MAGIC = 0x950412de; + /** + * Magic bytes for the generated binary data + * @type {number} MAGIC file header magic value of mo file + */ + this.MAGIC = 0x950412de; +} /** * Handles header values, replaces or adds (if needed) a charset property @@ -96,17 +134,19 @@ Compiler.prototype._handleCharset = function () { /** * Generates an array of translation strings - * in the form of [{msgid:... , msgstr:...}] + * in the form of [{msgid:..., msgstr: ...}] * - * @return {Array} Translation strings array */ Compiler.prototype._generateList = function () { + /** @type {TranslationBuffers[]} */ const list = []; - list.push({ - msgid: Buffer.alloc(0), - msgstr: encoding.convert(generateHeader(this._table.headers), this._table.charset) - }); + if ('headers' in this._table) { + list.push({ + msgid: Buffer.alloc(0), + msgstr: encoding.convert(generateHeader(this._table.headers), this._table.charset) + }); + } Object.keys(this._table.translations).forEach(msgctxt => { if (typeof this._table.translations[msgctxt] !== 'object') { @@ -133,7 +173,7 @@ Compiler.prototype._generateList = function () { key += '\u0000' + msgidPlural; } - const value = [].concat(this._table.translations[msgctxt][msgid].msgstr || []).join('\u0000'); + const value = /** @type {string[]} */([]).concat(this._table.translations[msgctxt][msgid].msgstr ?? []).join('\u0000'); list.push({ msgid: encoding.convert(key, this._table.charset), @@ -148,20 +188,19 @@ Compiler.prototype._generateList = function () { /** * Calculate buffer size for the final binary object * - * @param {Array} list An array of translation strings from _generateList - * @return {Object} Size data of {msgid, msgstr, total} + * @param {TranslationBuffers[]} list An array of translation strings from _generateList + * @return {Size} Size data of {msgid, msgstr, total} */ Compiler.prototype._calculateSize = function (list) { let msgidLength = 0; let msgstrLength = 0; - let totalLength = 0; list.forEach(translation => { msgidLength += translation.msgid.length + 1; // + extra 0x00 msgstrLength += translation.msgstr.length + 1; // + extra 0x00 }); - totalLength = 4 + // magic number + const totalLength = 4 + // magic number 4 + // revision 4 + // string count 4 + // original string table offset @@ -183,9 +222,9 @@ Compiler.prototype._calculateSize = function (list) { /** * Generates the binary MO object from the translation list * - * @param {Array} list translation list - * @param {Object} size Byte size information - * @return {Buffer} Compiled MO object + * @param {TranslationBuffers[]} list translation list + * @param {Size} size Byte size information + * @return {Buffer} Compiled MO object */ Compiler.prototype._build = function (list, size) { const returnBuffer = Buffer.alloc(size.total); @@ -214,21 +253,23 @@ Compiler.prototype._build = function (list, size) { // hash table offset returnBuffer[this._writeFunc](28 + (4 + 4) * list.length * 2, 24); - // build originals table + // Build original table curPosition = 28 + 2 * (4 + 4) * list.length; for (i = 0, len = list.length; i < len; i++) { - list[i].msgid.copy(returnBuffer, curPosition); - returnBuffer[this._writeFunc](list[i].msgid.length, 28 + i * 8); - returnBuffer[this._writeFunc](curPosition, 28 + i * 8 + 4); + const msgidLength = /** @type {Buffer} */(/** @type {unknown} */(list[i].msgid)); + msgidLength.copy(returnBuffer, curPosition); + returnBuffer.writeUInt32LE(list[i].msgid.length, 28 + i * 8); + returnBuffer.writeUInt32LE(curPosition, 28 + i * 8 + 4); returnBuffer[curPosition + list[i].msgid.length] = 0x00; curPosition += list[i].msgid.length + 1; } - // build translations table + // build translation table for (i = 0, len = list.length; i < len; i++) { - list[i].msgstr.copy(returnBuffer, curPosition); - returnBuffer[this._writeFunc](list[i].msgstr.length, 28 + (4 + 4) * list.length + i * 8); - returnBuffer[this._writeFunc](curPosition, 28 + (4 + 4) * list.length + i * 8 + 4); + const msgstrLength = /** @type {Buffer} */(/** @type {unknown} */(list[i].msgstr)); + msgstrLength.copy(returnBuffer, curPosition); + returnBuffer.writeUInt32LE(list[i].msgstr.length, 28 + (4 + 4) * list.length + i * 8); + returnBuffer.writeUInt32LE(curPosition, 28 + (4 + 4) * list.length + i * 8 + 4); returnBuffer[curPosition + list[i].msgstr.length] = 0x00; curPosition += list[i].msgstr.length + 1; } @@ -237,8 +278,9 @@ Compiler.prototype._build = function (list, size) { }; /** - * Compiles translation object into a binary MO object + * Compiles a translation object into a binary MO object * + * @interface * @return {Buffer} Compiled MO object */ Compiler.prototype.compile = function () { diff --git a/lib/moparser.js b/src/moparser.js similarity index 56% rename from lib/moparser.js rename to src/moparser.js index 09c10f8..263efdb 100644 --- a/lib/moparser.js +++ b/src/moparser.js @@ -1,65 +1,77 @@ import encoding from 'encoding'; import { formatCharset, parseHeader } from './shared.js'; +/** + * @typedef {import('./types.js').GetTextTranslations} GetTextTranslations + * @typedef {import('./types.js').GetTextTranslation} GetTextTranslation + * @typedef {import('./types.js').Translations} Translations + * @typedef {import('./types.js').WriteFunc} WriteFunc + * @typedef {import('./types.js').ReadFunc} ReadFunc + */ + /** * Parses a binary MO object into translation table * * @param {Buffer} buffer Binary MO object - * @param {String} [defaultCharset] Default charset to use - * @return {Object} Translation object + * @param {string} [defaultCharset] Default charset to use */ export default function (buffer, defaultCharset) { const parser = new Parser(buffer, defaultCharset); return parser.parse(); -}; +} /** * Creates a MO parser object. * * @constructor - * @param {Buffer} fileContents Binary MO object - * @param {String} [defaultCharset] Default charset to use + * @param {Buffer|null} fileContents Binary MO object + * @param {string} [defaultCharset] Default charset to use */ function Parser (fileContents, defaultCharset = 'iso-8859-1') { this._fileContents = fileContents; + this._charset = defaultCharset; + /** - * Method name for writing int32 values, default littleendian - */ + * @type {WriteFunc} + */ this._writeFunc = 'writeUInt32LE'; /** - * Method name for reading int32 values, default littleendian - */ + * @type {ReadFunc} + */ this._readFunc = 'readUInt32LE'; - this._charset = defaultCharset; - + /** + * Translation table + * + * @type {GetTextTranslations} table Translation object + */ this._table = { charset: this._charset, - headers: undefined, + headers: {}, translations: {} }; -} -/** - * Magic constant to check the endianness of the input file - */ -Parser.prototype.MAGIC = 0x950412de; + /** + * Magic constant to check the endianness of the input file + */ + this.MAGIC = 0x950412de; +} /** - * Checks if number values in the input file are in big- or littleendian format. + * Checks if number values in the input file are in big- or little endian format. * - * @return {Boolean} Return true if magic was detected + * @return {boolean} Return true if magic was detected */ Parser.prototype._checkMagick = function () { - if (this._fileContents.readUInt32LE(0) === this.MAGIC) { + if (this._fileContents?.readUInt32LE(0) === this.MAGIC) { this._readFunc = 'readUInt32LE'; this._writeFunc = 'writeUInt32LE'; return true; - } else if (this._fileContents.readUInt32BE(0) === this.MAGIC) { + } else if (this._fileContents?.readUInt32BE(0) === this.MAGIC) { this._readFunc = 'readUInt32BE'; this._writeFunc = 'writeUInt32BE'; @@ -70,31 +82,42 @@ Parser.prototype._checkMagick = function () { }; /** - * Read the original strings and translations from the input MO file. Use the - * first translation string in the file as the header. + * Read the original strings and translations from the input MO file. + * Use the first translation string in the file as the header. */ Parser.prototype._loadTranslationTable = function () { - let offsetOriginals = this._offsetOriginals; - let offsetTranslations = this._offsetTranslations; + let offsetOriginals = this._offsetOriginals || 0; + let offsetTranslations = this._offsetTranslations || 0; let position; let length; let msgid; let msgstr; + // Return if there are no translations + if (!this._total) { this._fileContents = null; return; } + + // Loop through all strings in the MO file for (let i = 0; i < this._total; i++) { + if (this._fileContents === null) continue; // msgid string length = this._fileContents[this._readFunc](offsetOriginals); offsetOriginals += 4; position = this._fileContents[this._readFunc](offsetOriginals); offsetOriginals += 4; - msgid = this._fileContents.slice(position, position + length); + msgid = this._fileContents.subarray( + position, + position + length + ); // matching msgstr length = this._fileContents[this._readFunc](offsetTranslations); offsetTranslations += 4; position = this._fileContents[this._readFunc](offsetTranslations); offsetTranslations += 4; - msgstr = this._fileContents.slice(position, position + length); + msgstr = this._fileContents.subarray( + position, + position + length + ); if (!i && !msgid.toString()) { this._handleCharset(msgstr); @@ -125,34 +148,31 @@ Parser.prototype._handleCharset = function (headers) { this._charset = this._table.charset = formatCharset(match[1], this._charset); } - headers = encoding.convert(headers, 'utf-8', this._charset) - .toString('utf8'); + headers = encoding.convert(headers, 'utf-8', this._charset); - this._table.headers = parseHeader(headers); + this._table.headers = parseHeader(headers.toString('utf8')); }; /** * Adds a translation to the translation object * - * @param {String} msgid Original string - * @params {String} msgstr Translation for the original string + * @param {string} msgidRaw Original string + * @param {string} msgstrRaw Translation for the original string */ -Parser.prototype._addString = function (msgid, msgstr) { +Parser.prototype._addString = function (msgidRaw, msgstrRaw) { const translation = {}; - let msgctxt; + let msgctxt = ''; let msgidPlural; - msgid = msgid.split('\u0004'); - if (msgid.length > 1) { - msgctxt = msgid.shift(); + const msgidArray = msgidRaw.split('\u0004'); + if (msgidArray.length > 1) { + msgctxt = msgidArray.shift() || ''; translation.msgctxt = msgctxt; - } else { - msgctxt = ''; } - msgid = msgid.join('\u0004'); + msgidRaw = msgidArray.join('\u0004'); - const parts = msgid.split('\u0000'); - msgid = parts.shift(); + const parts = msgidRaw.split('\u0000'); + const msgid = parts.shift() || ''; translation.msgid = msgid; @@ -160,8 +180,8 @@ Parser.prototype._addString = function (msgid, msgstr) { translation.msgid_plural = msgidPlural; } - msgstr = msgstr.split('\u0000'); - translation.msgstr = [].concat(msgstr || []); + const msgstr = msgstrRaw.split('\u0000'); + translation.msgstr = [...msgstr]; if (!this._table.translations[msgctxt]) { this._table.translations[msgctxt] = {}; @@ -173,31 +193,31 @@ Parser.prototype._addString = function (msgid, msgstr) { /** * Parses the MO object and returns translation table * - * @return {Object} Translation table + * @return {GetTextTranslations | false} Translation table */ Parser.prototype.parse = function () { - if (!this._checkMagick()) { + if (!this._checkMagick() || this._fileContents === null) { return false; } /** - * GetText revision nr, usually 0 - */ + * GetText revision nr, usually 0 + */ this._revision = this._fileContents[this._readFunc](4); /** - * Total count of translated strings - */ - this._total = this._fileContents[this._readFunc](8); + * @type {number} Total count of translated strings + */ + this._total = this._fileContents[this._readFunc](8) ?? 0; /** - * Offset position for original strings table - */ + * @type {number} Offset position for original strings table + */ this._offsetOriginals = this._fileContents[this._readFunc](12); /** - * Offset position for translation strings table - */ + * @type {number} Offset position for translation strings table + */ this._offsetTranslations = this._fileContents[this._readFunc](16); // Load translations into this._translationTable diff --git a/src/pocompiler.js b/src/pocompiler.js new file mode 100644 index 0000000..b107e88 --- /dev/null +++ b/src/pocompiler.js @@ -0,0 +1,324 @@ +import { HEADERS, foldLine, compareMsgid, formatCharset, generateHeader } from './shared.js'; +import contentType from 'content-type'; + +import encoding from 'encoding'; + +/** + * @typedef {import('./types.js').GetTextTranslations} GetTextTranslations + * @typedef {import('./types.js').GetTextTranslation} GetTextTranslation + * @typedef {import('./types.js').GetTextComment} GetTextComment + * @typedef {import('./types.js').Translations} Translations + * @typedef {import('./types.js').ParserOptions} ParserOptions + */ + +/** + * @typedef {Partial> & { msgstr?: string | string[] }} PreOutputTranslation + */ + +/** + * Exposes general compiler function. Takes a translation + * object as a parameter and returns PO object + * + * @param {GetTextTranslations} table Translation object + * @param {ParserOptions} [options] Options + * @return {Buffer} The compiled PO object + */ +export default function (table, options) { + const compiler = new Compiler(table, options); + + return compiler.compile(); +} + +/** + * Takes the header object and converts all headers into the lowercase format + * + * @param {Record} headersRaw the headers to prepare + * @returns {Record} the headers in the lowercase format + */ +export function preparePoHeaders (headersRaw) { + return Object.keys(headersRaw).reduce((result, key) => { + const lowerKey = key.toLowerCase(); + const value = HEADERS.get(lowerKey); + + if (typeof value === 'string') { + result[value] = headersRaw[key]; + } else { + result[key] = headersRaw[key]; + } + + return result; + }, /** @type {Record} */ ({})); +} + +/** + * Creates a PO compiler object. + * + * @constructor + * @param {GetTextTranslations} [table] Translation table to be compiled + * @param {ParserOptions} [options] Options + */ +function Compiler (table, options) { + this._table = table ?? { + headers: {}, + charset: undefined, + translations: {} + }; + this._table.translations = { ...this._table.translations }; + + /** @type {ParserOptions} _options The Options object */ + this._options = { + foldLength: 76, + escapeCharacters: true, + sort: false, + eol: '\n', + ...options + }; + + /** @type {Record}} the translation table */ + this._table.headers = preparePoHeaders(this._table.headers ?? {}); + + this._translations = []; + + this._handleCharset(); +} + +/** + * Converts a comment object to a comment string. The comment object is + * in the form of {translator: '', reference: '', extracted: '', flag: '', previous: ''} + * + * @param {Record} comments A comments object + * @return {string} A comment string for the PO file + */ +Compiler.prototype._drawComments = function (comments) { + /** @var {Record[]} lines The comment lines to be returned */ + const lines = []; + /** @var {{key: GetTextComment, prefix: string}} type The comment type */ + const types = [{ + key: 'translator', + prefix: '# ' + }, { + key: 'reference', + prefix: '#: ' + }, { + key: 'extracted', + prefix: '#. ' + }, { + key: 'flag', + prefix: '#, ' + }, { + key: 'previous', + prefix: '#| ' + }]; + + for (const type of types) { + /** @var {string} value The comment type */ + const value = type.key; + + // ignore empty comments + if (!(value in comments)) { continue; } + + const commentLines = comments[value].split(/\r?\n|\r/); + + // add comment lines to comments Array + for (const line of commentLines) { + lines.push(`${type.prefix}${line}`); + } + } + + return lines.length ? lines.join(this._options.eol) : ''; +}; + +/** + * Builds a PO string for a single translation object + * + * @param {PreOutputTranslation} block Translation object + * @param {Partial} [override] Properties of this object will override `block` properties + * @param {boolean} [obsolete] Block is obsolete and must be commented out + * @return {string} Translation string for a single object + */ +Compiler.prototype._drawBlock = function (block, override = {}, obsolete = false) { + const response = []; + const msgctxt = override.msgctxt || block.msgctxt; + const msgid = override.msgid || block.msgid; + const msgidPlural = override.msgid_plural || block.msgid_plural; + const msgstrData = override.msgstr || block.msgstr; + const msgstr = Array.isArray(msgstrData) ? [...msgstrData] : [msgstrData]; + + /** @type {GetTextComment|undefined} */ + const comments = override.comments || block.comments; + if (comments) { + const drawnComments = this._drawComments(comments); + if (drawnComments) { + response.push(drawnComments); + } + } + + if (msgctxt) { + response.push(this._addPOString('msgctxt', msgctxt, obsolete)); + } + + response.push(this._addPOString('msgid', msgid || '', obsolete)); + + if (msgidPlural) { + response.push(this._addPOString('msgid_plural', msgidPlural, obsolete)); + + msgstr.forEach((msgstr, i) => { + response.push(this._addPOString(`msgstr[${i}]`, msgstr || '', obsolete)); + }); + } else { + response.push(this._addPOString('msgstr', msgstr[0] || '', obsolete)); + } + + return response.join(this._options.eol); +}; + +/** + * Escapes and joins a key and a value for the PO string + * + * @param {string} key Key name + * @param {string} value Key value + * @param {boolean} [obsolete] PO string is obsolete and must be commented out + * @return {string} Joined and escaped key-value pair + */ +Compiler.prototype._addPOString = function (key = '', value = '', obsolete = false) { + key = key.toString(); + if (obsolete) { + key = '#~ ' + key; + } + + let { foldLength, eol, escapeCharacters } = this._options; + + // escape newlines and quotes + if (escapeCharacters) { + value = value.toString() + .replace(/\\/g, '\\\\') + .replace(/"/g, '\\"') + .replace(/\t/g, '\\t') + .replace(/\r/g, '\\r'); + } + + value = value.replace(/\n/g, '\\n'); // need to escape new line characters regardless + + let lines = [value]; + + if (obsolete) { + eol = eol + '#~ '; + } + + if (foldLength && foldLength > 0) { + lines = foldLine(value, foldLength); + } else { + // split only on new lines + if (escapeCharacters) { + lines = value.split(/\\n/g); + for (let i = 0; i < lines.length - 1; i++) { + lines[i] = `${lines[i]}\\n`; + } + if (lines.length && lines[lines.length - 1] === '') { + lines.splice(-1, 1); + } + } + } + + if (lines.length < 2) { + return `${key} "${lines.shift() || ''}"`; + } + + return `${key} ""${eol}"${lines.join(`"${eol}"`)}"`; +}; + +/** + * Handles header values, replaces or adds (if needed) a charset property + */ +Compiler.prototype._handleCharset = function () { + if (this._table.headers) { + const ct = contentType.parse(this._table.headers['Content-Type'] || 'text/plain'); + + const charset = formatCharset(this._table.charset || ct.parameters.charset || 'utf-8'); + + // clean up content-type charset independently using fallback if missing + if (ct.parameters.charset) { + ct.parameters.charset = formatCharset(ct.parameters.charset); + } + + this._table.charset = charset; + this._table.headers['Content-Type'] = contentType.format(ct); + } +}; + +/** + * Flatten and sort translations object + * + * @param {Translations} section Object to be prepared (translations or obsolete) + * @returns {PreOutputTranslation[]|undefined} Prepared array + */ +Compiler.prototype._prepareSection = function (section) { + /** @type {GetTextTranslation[]} response Prepared array */ + let response = []; + + for (const msgctxt in section) { + if (typeof section[msgctxt] !== 'object') { + return; + } + + for (const msgid of Object.keys(section[msgctxt])) { + if (typeof section[msgctxt][msgid] !== 'object') { + continue; + } + + if (msgctxt === '' && msgid === '') { + continue; + } + + response.push(section[msgctxt][msgid]); + } + } + + const { sort } = this._options; + + if (sort) { + if (typeof sort === 'function') { + response = response.sort(sort); + } else { + response = response.sort(compareMsgid); + } + } + + return response; +}; + +/** + * Compiles a translation object into a PO object + * + * @interface + * @return {Buffer} Compiled a PO object + */ +Compiler.prototype.compile = function () { + if (!this._table.translations) { + throw new Error('No translations found'); + } + /** @type {PreOutputTranslation} headerBlock */ + const headerBlock = (this._table.translations[''] && this._table.translations['']['']) || {}; + + const translations = this._prepareSection(this._table.translations); + let response = /** @type {(PreOutputTranslation|string)[]} */ (/** @type {unknown[]} */ (translations?.map(t => this._drawBlock(t)))); + + if (typeof this._table.obsolete === 'object') { + const obsolete = this._prepareSection(this._table.obsolete); + if (obsolete && obsolete.length) { + response = response?.concat(obsolete.map(r => this._drawBlock(r, {}, true))); + } + } + + const eol = this._options.eol ?? '\n'; + + response?.unshift(this._drawBlock(headerBlock, { + msgstr: generateHeader(this._table.headers) + })); + + if (this._table.charset === 'utf-8' || this._table.charset === 'ascii') { + return Buffer.from(response?.join(eol + eol) + eol, 'utf-8'); + } + + return encoding.convert(response?.join(eol + eol) + eol, this._table.charset); +}; diff --git a/lib/poparser.js b/src/poparser.js similarity index 65% rename from lib/poparser.js rename to src/poparser.js index 77b8b12..3d209b7 100644 --- a/lib/poparser.js +++ b/src/poparser.js @@ -1,38 +1,62 @@ import encoding from 'encoding'; -import { formatCharset, parseNPluralFromHeadersSafely, parseHeader } from './shared.js'; +import { formatCharset, parseHeader, parseNPluralFromHeadersSafely, ParserError } from './shared.js'; import { Transform } from 'readable-stream'; import util from 'util'; +/** + * @typedef {import('stream').Stream.Writable} WritableState + * @typedef {import('readable-stream').TransformOptions} TransformOptions + * @typedef {import('./types.js').GetTextTranslations} GetTextTranslations + * @typedef {import('./types.js').GetTextTranslation} GetTextTranslation + * @typedef {import('./types.js').GetTextComment} GetTextComment + * @typedef {import('./types.js').Translations} Translations + * @typedef {import('./types.js').ParserOptions} ParserOptions + */ + +/** + * @typedef {{ defaultCharset?: string, validation?: boolean }} Options Po parser options + */ + +/** + * @typedef {(...args: any[]) => void} DoneCallback + */ + +/** + * @typedef {Object} Node A single Node object in the PO file + * @property {string} [key] + * @property {number} [type] + * @property {string} value + * @property {string} [quote] + * @property {boolean} [obsolete] + * @property {GetTextComment | undefined} [comments] + */ + /** * Parses a PO object into translation table * - * @typedef {{ defaultCharset?: string, validation?: boolean }} Options * @param {string | Buffer} input PO object * @param {Options} [options] Optional options with defaultCharset and validation */ -export function parse (input, options = {}) { +export function poParse (input, options = {}) { const parser = new Parser(input, options); return parser.parse(); -}; +} /** * Parses a PO stream, emits translation table in object mode * - * @typedef {{ defaultCharset: strubg, validation: boolean }} Options * @param {Options} [options] Optional options with defaultCharset and validation - * @param {import('readable-stream').TransformOptions} [transformOptions] Optional stream options + * @param {TransformOptions} [transformOptions] Optional stream options */ -export function stream (options = {}, transformOptions = {}) { +export function poStream (options = {}, transformOptions = {}) { return new PoParserTransform(options, transformOptions); -}; +} /** - * Creates a PO parser object. If PO object is a string, - * UTF-8 will be used as the charset + * Creates a PO parser object. + * If a PO object is a string, UTF-8 will be used as the charset * - * @typedef {{ defaultCharset?: string, validation?: boolean }} Options - * @constructor * @param {string | Buffer} fileContents PO object * @param {Options} options Options with defaultCharset and validation */ @@ -40,8 +64,10 @@ function Parser (fileContents, { defaultCharset = 'iso-8859-1', validation = fal this._validation = validation; this._charset = defaultCharset; + /** @type {Node[]} Lexed tokens */ this._lex = []; this._escaped = false; + /** @type {Partial} */ this._node = {}; this._state = this.states.none; this._lineNumber = 1; @@ -68,17 +94,18 @@ Parser.prototype.parse = function () { /** * Detects charset for PO strings from the header * - * @param {Buffer} headers Header value + * @param {string | Buffer} buf Header value */ Parser.prototype._handleCharset = function (buf = '') { + /** @type {string} */ const str = buf.toString(); let pos; let headers = ''; let match; if ((pos = str.search(/^\s*msgid/im)) >= 0) { - pos = pos + str.substr(pos + 5).search(/^\s*(msgid|msgctxt)/im); - headers = str.substr(0, pos >= 0 ? pos + 5 : str.length); + pos = pos + str.substring(pos + 5).search(/^\s*(msgid|msgctxt)/im); + headers = str.substring(0, pos >= 0 ? pos + 5 : str.length); } if ((match = headers.match(/[; ]charset\s*=\s*([\w-]+)(?:[\s;]|\\n)*"\s*$/mi))) { @@ -92,6 +119,11 @@ Parser.prototype._handleCharset = function (buf = '') { return this._toString(buf); }; +/** + * Converts buffer to string + * @param {string | Buffer} buf Buffer to convert + * @return {string} Converted string + */ Parser.prototype._toString = function (buf) { return encoding.convert(buf, 'utf-8', this._charset).toString('utf-8'); }; @@ -121,17 +153,15 @@ Parser.prototype.types = { * String matches for lexer */ Parser.prototype.symbols = { - quotes: /["']/, - comments: /#/, whitespace: /\s/, key: /[\w\-[\]]/, keyNames: /^(?:msgctxt|msgid(?:_plural)?|msgstr(?:\[\d+])?)$/ }; - /** * Token parser. Parsed state can be found from this._lex * - * @param {String} chunk String + * @param {string} chunk String + * @throws {ParserError} Throws a SyntaxError if the value doesn't match the key names. */ Parser.prototype._lexer = function (chunk) { let chr; @@ -146,20 +176,20 @@ Parser.prototype._lexer = function (chunk) { switch (this._state) { case this.states.none: case this.states.obsolete: - if (chr.match(this.symbols.quotes)) { + if (chr === '"' || chr === "'") { this._node = { type: this.types.string, value: '', quote: chr }; - this._lex.push(this._node); + this._lex.push(/** @type {Node} */ (this._node)); this._state = this.states.string; - } else if (chr.match(this.symbols.comments)) { + } else if (chr === '#') { this._node = { type: this.types.comments, value: '' }; - this._lex.push(this._node); + this._lex.push(/** @type {Node} */ (this._node)); this._state = this.states.comments; } else if (!chr.match(this.symbols.whitespace)) { this._node = { @@ -169,7 +199,7 @@ Parser.prototype._lexer = function (chunk) { if (this._state === this.states.obsolete) { this._node.obsolete = true; } - this._lex.push(this._node); + this._lex.push(/** @type {Node} */ (this._node)); this._state = this.states.key; } break; @@ -213,12 +243,8 @@ Parser.prototype._lexer = function (chunk) { break; case this.states.key: if (!chr.match(this.symbols.key)) { - if (!this._node.value.match(this.symbols.keyNames)) { - const err = new SyntaxError(`Error parsing PO data: Invalid key name "${this._node.value}" at line ${this._lineNumber}. This can be caused by an unescaped quote character in a msgid or msgstr value.`); - - err.lineNumber = this._lineNumber; - - throw err; + if (!this._node.value?.match(this.symbols.keyNames)) { + throw new ParserError(`Error parsing PO data: Invalid key name "${this._node.value}" at line ${this._lineNumber}. This can be caused by an unescaped quote character in a msgid or msgstr value.`, this._lineNumber); } this._state = this.states.none; i--; @@ -233,16 +259,17 @@ Parser.prototype._lexer = function (chunk) { /** * Join multi line strings * - * @param {Object} tokens Parsed tokens - * @return {Object} Parsed tokens, with multi line strings joined into one + * @param {Node[]} tokens Parsed tokens + * @return {Node[]} Parsed tokens, with multi line strings joined into one */ Parser.prototype._joinStringValues = function (tokens) { + /** @type {Node[]} */ const response = []; let lastNode; for (let i = 0, len = tokens.length; i < len; i++) { if (lastNode && tokens[i].type === this.types.string && lastNode.type === this.types.string) { - lastNode.value += tokens[i].value; + lastNode.value += tokens[i].value ?? ''; } else if (lastNode && tokens[i].type === this.types.comments && lastNode.type === this.types.comments) { lastNode.value += '\n' + tokens[i].value; } else { @@ -257,15 +284,17 @@ Parser.prototype._joinStringValues = function (tokens) { /** * Parse comments into separate comment blocks * - * @param {Object} tokens Parsed tokens + * @param {Node[]} tokens Parsed tokens */ Parser.prototype._parseComments = function (tokens) { - // parse comments - tokens.forEach(node => { + for (const node of tokens) { if (!node || node.type !== this.types.comments) { - return; + continue; } + /** @type {{ + [key: string]: string[]; + }} */ const comment = { translator: [], extracted: [], @@ -274,48 +303,53 @@ Parser.prototype._parseComments = function (tokens) { previous: [] }; + /** @type {string[]} */ const lines = (node.value || '').split(/\n/); - lines.forEach(line => { + for (const line of lines) { switch (line.charAt(0) || '') { case ':': - comment.reference.push(line.substr(1).trim()); + comment.reference.push(line.substring(1).trim()); break; case '.': - comment.extracted.push(line.substr(1).replace(/^\s+/, '')); + comment.extracted.push(line.substring(1).replace(/^\s+/, '')); break; case ',': - comment.flag.push(line.substr(1).replace(/^\s+/, '')); + comment.flag.push(line.substring(1).replace(/^\s+/, '')); break; case '|': - comment.previous.push(line.substr(1).replace(/^\s+/, '')); + comment.previous.push(line.substring(1).replace(/^\s+/, '')); break; case '~': break; default: comment.translator.push(line.replace(/^\s+/, '')); } - }); + } + + const finalToken = /** @type {Omit & { value: Record}} */ (/** @type {unknown} */ (node)); - node.value = {}; + finalToken.value = {}; - Object.keys(comment).forEach(key => { - if (comment[key] && comment[key].length) { - node.value[key] = comment[key].join('\n'); + for (const key of Object.keys(comment)) { + if (key && comment[key]?.length) { + finalToken.value[key] = comment[key].join('\n'); } - }); - }); + } + } }; /** * Join gettext keys with values * - * @param {Object} tokens Parsed tokens - * @return {Object} Tokens + * @param {(Node & { value?: string })[]} tokens - Parsed tokens containing key-value pairs + * @return {Node[]} - An array of Nodes representing joined tokens */ Parser.prototype._handleKeys = function (tokens) { + /** @type {Node[]} */ const response = []; - let lastNode; + /** @type {Partial & { comments?: string }} */ + let lastNode = {}; for (let i = 0, len = tokens.length; i < len; i++) { if (tokens[i].type === this.types.key) { @@ -329,7 +363,7 @@ Parser.prototype._handleKeys = function (tokens) { lastNode.comments = tokens[i - 1].value; } lastNode.value = ''; - response.push(lastNode); + response.push(/** @type {Node} */ (lastNode)); } else if (tokens[i].type === this.types.string && lastNode) { lastNode.value += tokens[i].value; } @@ -341,22 +375,28 @@ Parser.prototype._handleKeys = function (tokens) { /** * Separate different values into individual translation objects * - * @param {Object} tokens Parsed tokens - * @return {Object} Tokens + * @param {Node[]} tokens Parsed tokens + * @return {GetTextTranslation[]} Tokens */ Parser.prototype._handleValues = function (tokens) { const response = []; - let lastNode; + /** @type {GetTextTranslation} Translation object */ + let lastNode = {}; + /** @type {string | undefined} */ let curContext; + /** @type {GetTextComment | undefined} */ let curComments; for (let i = 0, len = tokens.length; i < len; i++) { - if (tokens[i].key.toLowerCase() === 'msgctxt') { + const tokenKey = tokens[i].key; + if (!tokenKey) continue; + if (tokenKey.toLowerCase() === 'msgctxt') { curContext = tokens[i].value; curComments = tokens[i].comments; - } else if (tokens[i].key.toLowerCase() === 'msgid') { + } else if (tokenKey.toLowerCase() === 'msgid') { lastNode = { - msgid: tokens[i].value + msgid: tokens[i].value, + msgstr: [] }; if (tokens[i].obsolete) { lastNode.obsolete = true; @@ -374,10 +414,10 @@ Parser.prototype._handleValues = function (tokens) { lastNode.comments = tokens[i].comments; } - curContext = false; - curComments = false; + curContext = undefined; + curComments = undefined; response.push(lastNode); - } else if (tokens[i].key.toLowerCase() === 'msgid_plural') { + } else if (tokenKey.toLowerCase() === 'msgid_plural') { if (lastNode) { if (this._validation && 'msgid_plural' in lastNode) { throw new SyntaxError(`Multiple msgid_plural error: entry "${lastNode.msgid}" in "${lastNode.msgctxt || ''}" context has multiple msgid_plural declarations.`); @@ -390,19 +430,21 @@ Parser.prototype._handleValues = function (tokens) { lastNode.comments = tokens[i].comments; } - curContext = false; - curComments = false; - } else if (tokens[i].key.substr(0, 6).toLowerCase() === 'msgstr') { + curContext = undefined; + curComments = undefined; + } else if (tokenKey.substring(0, 6).toLowerCase() === 'msgstr') { if (lastNode) { - lastNode.msgstr = (lastNode.msgstr || []).concat(tokens[i].value); + const strData = lastNode.msgstr || []; + const tokenValue = tokens[i].value; + lastNode.msgstr = (strData).concat(tokenValue); } if (tokens[i].comments && !lastNode.comments) { lastNode.comments = tokens[i].comments; } - curContext = false; - curComments = false; + curContext = undefined; + curComments = undefined; } } @@ -412,11 +454,11 @@ Parser.prototype._handleValues = function (tokens) { /** * Validate token * - * @param {Object} token Parsed token - * @param {Object} translations Translation table + * @param {GetTextTranslation} token Parsed token + * @param {Translations} translations Translation table * @param {string} msgctxt Message entry context - * @param {number} nplurals Number of epected plural forms - * @throws Will throw an error if token validation fails + * @param {number} nplurals Number of expected plural forms + * @throws {Error} Will throw an error if token validation fails */ Parser.prototype._validateToken = function ( { @@ -428,10 +470,6 @@ Parser.prototype._validateToken = function ( msgctxt, nplurals ) { - if (!this._validation) { - return; - } - if (msgid in translations[msgctxt]) { throw new SyntaxError(`Duplicate msgid error: entry "${msgid}" in "${msgctxt}" context has already been declared.`); // eslint-disable-next-line camelcase @@ -447,20 +485,24 @@ Parser.prototype._validateToken = function ( /** * Compose a translation table from tokens object * - * @param {Object} tokens Parsed tokens - * @return {Object} Translation table + * @param {GetTextTranslation[]} tokens Parsed tokens + * @return {GetTextTranslations} Translation table */ Parser.prototype._normalize = function (tokens) { + /** + * Translation table to be returned + * @type {Omit & Partial> } table + */ const table = { charset: this._charset, headers: undefined, translations: {} }; let nplurals = 1; - let msgctxt; for (let i = 0, len = tokens.length; i < len; i++) { - msgctxt = tokens[i].msgctxt || ''; + /** @type {string} */ + const msgctxt = tokens[i].msgctxt || ''; if (tokens[i].obsolete) { if (!table.obsolete) { @@ -487,58 +529,73 @@ Parser.prototype._normalize = function (tokens) { nplurals = parseNPluralFromHeadersSafely(table.headers, nplurals); } - this._validateToken(tokens[i], table.translations, msgctxt, nplurals); + if (this._validation) { + this._validateToken(tokens[i], table.translations, msgctxt, nplurals); + } - table.translations[msgctxt][tokens[i].msgid] = tokens[i]; + const token = tokens[i]; + table.translations[msgctxt][token.msgid] = token; } - return table; + return /** @type {GetTextTranslations} */ (table); }; /** * Converts parsed tokens to a translation table * - * @param {Object} tokens Parsed tokens - * @returns {Object} Translation table + * @param {Node[]} tokens Parsed tokens + * @returns {GetTextTranslations} Translation table */ Parser.prototype._finalize = function (tokens) { + /** + * Translation table + */ let data = this._joinStringValues(tokens); this._parseComments(data); + // The PO parser gettext keys with values data = this._handleKeys(data); - data = this._handleValues(data); - return this._normalize(data); + // The PO parser individual translation objects + const dataset = this._handleValues(data); + return this._normalize(dataset); }; /** * Creates a transform stream for parsing PO input - * - * @typedef {{ defaultCharset: strubg, validation: boolean }} Options * @constructor - * @param {Options} options Optional options with defaultCharset and validation - * @param {import('readable-stream').TransformOptions} transformOptions Optional stream options + * @this {PoParserTransform & Transform} + * + * @param {ParserOptions} options Optional options with defaultCharset and validation + * @param {TransformOptions & {initialTreshold?: number;}} transformOptions Optional stream options */ function PoParserTransform (options, transformOptions) { + const { initialTreshold, ..._transformOptions } = transformOptions; this.options = options; + /** @type {Parser|false} */ this._parser = false; this._tokens = {}; + /** @type {Buffer[]} */ this._cache = []; this._cacheSize = 0; this.initialTreshold = transformOptions.initialTreshold || 2 * 1024; - Transform.call(this, transformOptions); + Transform.call(this, _transformOptions); + this._writableState.objectMode = false; this._readableState.objectMode = true; } util.inherits(PoParserTransform, Transform); /** - * Processes a chunk of the input stream - */ + * Processes a chunk of the input stream + * @param {Buffer} chunk Chunk of the input stream + * @param {string} encoding Encoding of the chunk + * @param {DoneCallback} done Callback to call when the chunk is processed + */ PoParserTransform.prototype._transform = function (chunk, encoding, done) { let i; let len = 0; @@ -581,16 +638,16 @@ PoParserTransform.prototype._transform = function (chunk, encoding, done) { } // it seems we found some 8bit bytes from the end of the string, so let's cache these if (len) { - this._cache = [chunk.slice(chunk.length - len)]; + this._cache = [chunk.subarray(chunk.length - len)]; this._cacheSize = this._cache[0].length; - chunk = chunk.slice(0, chunk.length - len); + chunk = chunk.subarray(0, chunk.length - len); } // chunk might be empty if it only continued of 8bit bytes and these were all cached if (chunk.length) { try { this._parser._lexer(this._parser._toString(chunk)); - } catch (error) { + } catch (/** @type {any} error */error) { setImmediate(() => { done(error); }); @@ -603,8 +660,10 @@ PoParserTransform.prototype._transform = function (chunk, encoding, done) { }; /** - * Once all input has been processed emit the parsed translation table as an object - */ + * Once all inputs have been processed, emit the parsed translation table as an object + * + * @param {DoneCallback} done Callback to call when the chunk is processed + */ PoParserTransform.prototype._flush = function (done) { let chunk; @@ -616,7 +675,7 @@ PoParserTransform.prototype._flush = function (done) { this._parser = new Parser(chunk, this.options); } - if (chunk) { + if (chunk && this._parser) { try { this._parser._lexer(this._parser._toString(chunk)); } catch (error) { @@ -629,7 +688,7 @@ PoParserTransform.prototype._flush = function (done) { } if (this._parser) { - this.push(this._parser._finalize(this._parser._lex)); + /** @type {any} */ (this).push(this._parser._finalize(this._parser._lex)); } setImmediate(done); diff --git a/lib/shared.js b/src/shared.js similarity index 66% rename from lib/shared.js rename to src/shared.js index 8cf706c..ae005b7 100644 --- a/lib/shared.js +++ b/src/shared.js @@ -1,5 +1,7 @@ // see https://www.gnu.org/software/gettext/manual/html_node/Header-Entry.html +/** @type {string} Header name for "Plural-Forms" */ const PLURAL_FORMS = 'Plural-Forms'; +/** @typedef {Map} Headers Map of header keys to header names */ export const HEADERS = new Map([ ['project-id-version', 'Project-Id-Version'], ['report-msgid-bugs-to', 'Report-Msgid-Bugs-To'], @@ -18,12 +20,14 @@ const PLURAL_FORM_HEADER_NPLURALS_REGEX = /nplurals\s*=\s*(?\d+)/; /** * Parses a header string into an object of key-value pairs * - * @param {String} str Header string - * @return {Object} An object of key-value pairs + * @param {string} str Header string + * @return {Record} An object of key-value pairs */ export function parseHeader (str = '') { - return str.split('\n') - .reduce((headers, line) => { + /** @type {string} Header string */ + return str + .split('\n') + .reduce((/** @type {Record} */ headers, line) => { const parts = line.split(':'); let key = (parts.shift() || '').trim(); @@ -42,11 +46,12 @@ export function parseHeader (str = '') { /** * Attempts to safely parse 'nplurals" value from "Plural-Forms" header * - * @param {Object} [headers = {}] An object with parsed headers + * @param {Record} [headers] An object with parsed headers + * @param {number} fallback Fallback value if "Plural-Forms" header is absent * @returns {number} Parsed result */ -export function parseNPluralFromHeadersSafely (headers = {}, fallback = 1) { - const pluralForms = headers[PLURAL_FORMS]; +export function parseNPluralFromHeadersSafely (headers, fallback = 1) { + const pluralForms = headers ? headers[PLURAL_FORMS] : false; if (!pluralForms) { return fallback; @@ -62,8 +67,8 @@ export function parseNPluralFromHeadersSafely (headers = {}, fallback = 1) { /** * Joins a header object of key value pairs into a header string * - * @param {Object} header Object of key value pairs - * @return {String} Header string + * @param {Record} header Object of key value pairs + * @return {string} An object of key-value pairs */ export function generateHeader (header = {}) { const keys = Object.keys(header) @@ -82,8 +87,9 @@ export function generateHeader (header = {}) { /** * Normalizes charset name. Converts utf8 to utf-8, WIN1257 to windows-1257 etc. * - * @param {String} charset Charset name - * @return {String} Normalized charset name + * @param {string} charset Charset name + * @param {string} defaultCharset Default charset name, defaults to 'iso-8859-1' + * @return {string} Normalized charset name */ export function formatCharset (charset = 'iso-8859-1', defaultCharset = 'iso-8859-1') { return charset.toString() @@ -99,9 +105,9 @@ export function formatCharset (charset = 'iso-8859-1', defaultCharset = 'iso-885 /** * Folds long lines according to PO format * - * @param {String} str PO formatted string to be folded - * @param {Number} [maxLen=76] Maximum allowed length for folded lines - * @return {Array} An array of lines + * @param {string} str PO formatted string to be folded + * @param {number} [maxLen=76] Maximum allowed length for folded lines + * @return {string[]} An array of lines */ export function foldLine (str, maxLen = 76) { const lines = []; @@ -111,11 +117,11 @@ export function foldLine (str, maxLen = 76) { let match; while (pos < len) { - curLine = str.substr(pos, maxLen); + curLine = str.substring(pos, pos + maxLen); // ensure that the line never ends with a partial escaping // make longer lines if needed - while (curLine.substr(-1) === '\\' && pos + curLine.length < len) { + while (curLine.substring(-1) === '\\' && pos + curLine.length < len) { curLine += str.charAt(pos + curLine.length); } @@ -125,7 +131,7 @@ export function foldLine (str, maxLen = 76) { curLine = match[0]; } else if (pos + curLine.length < len) { // if we're not at the end - if ((match = /.*\s+/.exec(curLine)) && /[^\s]/.test(match[0])) { + if ((match = /.*\s+/.exec(curLine)) && /\S/.test(match[0])) { // use everything before and including the last white space character (if anything) curLine = match[0]; } else if ((match = /.*[\x21-\x2f0-9\x5b-\x60\x7b-\x7e]+/.exec(curLine)) && /[^\x21-\x2f0-9\x5b-\x60\x7b-\x7e]/.test(match[0])) { @@ -144,8 +150,9 @@ export function foldLine (str, maxLen = 76) { /** * Comparator function for comparing msgid * - * @param {Object} object with msgid prev - * @param {Object} object with msgid next + * @template {Buffer|string} T + * @param {{msgid: T}} left with msgid prev + * @param {{msgid: T}} right with msgid next * @returns {number} comparator index */ export function compareMsgid ({ msgid: left }, { msgid: right }) { @@ -159,3 +166,17 @@ export function compareMsgid ({ msgid: left }, { msgid: right }) { return 0; } + +/** + * Custom SyntaxError subclass that includes the lineNumber property. + */ +export class ParserError extends SyntaxError { + /** + * @param {string} message - Error message. + * @param {number} lineNumber - Line number where the error occurred. + */ + constructor (message, lineNumber) { + super(message); + this.lineNumber = lineNumber; + } +} diff --git a/src/types.js b/src/types.js new file mode 100644 index 0000000..450d778 --- /dev/null +++ b/src/types.js @@ -0,0 +1,52 @@ +/** + * Represents a GetText comment. + * @typedef {Object} GetTextComment + * @property {string} [translator] Translator information. + * @property {string} [reference] Reference information. + * @property {string} [extracted] Extracted comments. + * @property {string} [flag] Flags. + * @property {string} [previous] Previous string. + */ + +/** + * Represents a GetText translation. + * @typedef {Object} GetTextTranslation + * @property {string} [msgctxt] Context of the message. + * @property {string} msgid The singular message ID. + * @property {string} [msgid_plural] The plural message ID. + * @property {string[]} msgstr Array of translated strings. + * @property {GetTextComment} [comments] Comments associated with the translation. + * @property {boolean} [obsolete] Whether the translation is obsolete. + */ + +/** + * @typedef {Record>} Translations The translations index. + */ + +/** + * Represents GetText translations. + * @typedef {Object} GetTextTranslations + * @property {string|undefined} charset Character set. + * @property {Record} headers Headers. + * @property {Translations} [obsolete] Obsolete messages. + * @property {Translations} translations Translations. + */ + +/** + * Options for the parser. + * @typedef {Object} ParserOptions + * @property {string} [defaultCharset] Default character set. + * @property {boolean} [validation] Whether to perform validation. + * @property {number} [foldLength] the fold length. + * @property {boolean} [escapeCharacters] Whether to escape characters. + * @property {boolean} [sort] Whether to sort messages. + * @property {string} [eol] End of line character. + */ + +/** + * @typedef {('writeUInt32LE'|'writeUInt32BE')} WriteFunc Type definition for write functions. + */ + +/** + * @typedef {('readUInt32LE'|'readUInt32BE')} ReadFunc Type definition for read functions. + */ diff --git a/test/mo-compiler-test.js b/test/mo-compiler-test.js index 4796425..3b0a8b6 100644 --- a/test/mo-compiler-test.js +++ b/test/mo-compiler-test.js @@ -1,9 +1,9 @@ +import { promisify } from 'node:util'; +import path from 'node:path'; +import { mo } from '../src/index.js'; +import { readFile as fsReadFile } from 'node:fs'; +import { fileURLToPath } from 'node:url'; import * as chai from 'chai'; -import { promisify } from 'util'; -import path from 'path'; -import { mo } from '../index.js'; -import { readFile as fsReadFile } from 'fs'; -import { fileURLToPath } from 'url'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); diff --git a/test/mo-parser-test.js b/test/mo-parser-test.js index 723368a..4391471 100644 --- a/test/mo-parser-test.js +++ b/test/mo-parser-test.js @@ -1,9 +1,9 @@ +import { promisify } from 'node:util'; +import path from 'node:path'; +import { readFile as fsReadFile } from 'node:fs'; +import { fileURLToPath } from 'node:url'; import * as chai from 'chai'; -import { promisify } from 'util'; -import path from 'path'; -import { mo } from '../index.js'; -import { readFile as fsReadFile } from 'fs'; -import { fileURLToPath } from 'url'; +import { mo } from '../src/index.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); diff --git a/test/module.mjs b/test/module.mjs index 1796fd6..b1b48b0 100644 --- a/test/module.mjs +++ b/test/module.mjs @@ -1,5 +1,5 @@ import { expect } from 'chai'; -import { po, mo } from '../index.js'; +import { po, mo } from '../src/index.js'; describe('esm module', () => { it('should allow named imports', () => { diff --git a/test/po-compiler-test.js b/test/po-compiler-test.js index 18ad149..0e969ce 100644 --- a/test/po-compiler-test.js +++ b/test/po-compiler-test.js @@ -1,10 +1,10 @@ -import { EOL } from 'os'; -import { promisify } from 'util'; -import path from 'path'; -import { readFile as fsReadFile } from 'fs'; +import { readFile as fsReadFile } from 'node:fs'; +import { promisify } from 'node:util'; +import path from 'node:path'; +import { EOL } from 'node:os'; +import { fileURLToPath } from 'node:url'; +import { po } from '../src/index.js'; import * as chai from 'chai'; -import { po } from '../index.js'; -import { fileURLToPath } from 'url'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); diff --git a/test/po-obsolete-test.js b/test/po-obsolete-test.js index f0e696f..d8a4a18 100644 --- a/test/po-obsolete-test.js +++ b/test/po-obsolete-test.js @@ -1,10 +1,10 @@ -import { EOL } from 'os'; +import { EOL } from 'node:os'; +import path from 'node:path'; +import fs from 'node:fs'; +import { promisify } from 'node:util'; import * as chai from 'chai'; -import { promisify } from 'util'; -import path from 'path'; -import fs from 'fs'; -import * as gettextParser from '../index.js'; -import { fileURLToPath } from 'url'; +import * as gettextParser from '../src/index.js'; +import { fileURLToPath } from 'node:url'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); diff --git a/test/po-parser-test.js b/test/po-parser-test.js index 3a006c1..644e222 100644 --- a/test/po-parser-test.js +++ b/test/po-parser-test.js @@ -1,9 +1,9 @@ import * as chai from 'chai'; -import { promisify } from 'util'; -import path from 'path'; -import fs from 'fs'; -import * as gettextParser from '../index.js'; -import { fileURLToPath } from 'url'; +import { promisify } from 'node:util'; +import path from 'node:path'; +import fs from 'node:fs'; +import * as gettextParser from '../src/index.js'; +import { fileURLToPath } from 'node:url'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); diff --git a/test/shared.js b/test/shared.js index c40dfd6..0263c02 100644 --- a/test/shared.js +++ b/test/shared.js @@ -1,11 +1,9 @@ -'use strict'; - +import { promisify } from 'node:util'; +import path from 'node:path'; +import { readFile as fsReadFile } from 'node:fs'; +import { fileURLToPath } from 'node:url'; import * as chai from 'chai'; -import { promisify } from 'util'; -import path from 'path'; -import { formatCharset, parseHeader, generateHeader, foldLine, parseNPluralFromHeadersSafely } from '../lib/shared.js'; -import { readFile as fsReadFile } from 'fs'; -import { fileURLToPath } from 'url'; +import { formatCharset, parseHeader, generateHeader, foldLine, parseNPluralFromHeadersSafely } from '../src/shared.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..b82537e --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,37 @@ +{ + "compilerOptions": { + "noImplicitAny": true, + "removeComments": false, + "module": "Node16", + "moduleResolution": "Node16", + "target": "ES2018", + "lib": [ + "ES2018" + ], + // Strict mode + "strict": true, + // Allow javascript files + "allowJs": true, + // Check js files for errors + "checkJs": true, + // the directory sources are in + "rootDir": "src", + // Output d.ts files to @types + "outDir": "lib", + // Generate d.ts files + "declaration": true, + // Minify + "pretty": false, + // Skip lib check when compiling + "skipLibCheck": true, + // For providing missing package types + "typeRoots": [ + "./types", + "./node_modules/@types" + ], + }, + "include": [ + "src/**/*", + "types/**/*" + ] +} diff --git a/types/encoding/index.d.ts b/types/encoding/index.d.ts new file mode 100644 index 0000000..3150d35 --- /dev/null +++ b/types/encoding/index.d.ts @@ -0,0 +1,3 @@ +declare module 'encoding' { + function convert(buffer: Buffer | string, charset?: string, fromCharset?: string): Buffer; +}