From 473bec88be8ac63ee9cb0520135fcf93c3423f32 Mon Sep 17 00:00:00 2001 From: wseymour15 Date: Wed, 11 Oct 2023 15:40:28 -0500 Subject: [PATCH 1/3] fix: 708 captions multi-byte char fix --- lib/m2ts/caption-stream.js | 28 +++++++++++++++++++++------- test/caption-stream.test.js | 27 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/lib/m2ts/caption-stream.js b/lib/m2ts/caption-stream.js index ef633349..4b2059b5 100644 --- a/lib/m2ts/caption-stream.js +++ b/lib/m2ts/caption-stream.js @@ -688,18 +688,32 @@ Cea708Stream.prototype.handleText = function(i, service, options) { var char; var charCodeArray; + // Converts an array of bytes to a unicode hex string. + function toHexString(byteArray) { + return Array.from(byteArray, function(byte) { + return ('0' + (byte & 0xFF).toString(16)).slice(-2); + }).join(''); + }; + + if (isMultiByte) { + charCodeArray = [currentByte, nextByte]; + i++; + } else { + charCodeArray = [currentByte]; + } + // Use the TextDecoder if one was created for this service if (service.textDecoder_ && !isExtended) { + char = service.textDecoder_.decode(new Uint8Array(charCodeArray)); + } else { + // We assume any multi-byte char without a decoder is unicode. if (isMultiByte) { - charCodeArray = [currentByte, nextByte]; - i++; + const unicode = toHexString(charCodeArray); + // Takes a unicode hex string and creates a single character. + char = String.fromCharCode(parseInt(unicode, 16)); } else { - charCodeArray = [currentByte]; + char = get708CharFromCode(extended | currentByte); } - - char = service.textDecoder_.decode(new Uint8Array(charCodeArray)); - } else { - char = get708CharFromCode(extended | currentByte); } if (win.pendingNewLine && !win.isEmpty()) { diff --git a/test/caption-stream.test.js b/test/caption-stream.test.js index b60041a8..f0821a82 100644 --- a/test/caption-stream.test.js +++ b/test/caption-stream.test.js @@ -3051,6 +3051,33 @@ QUnit.test('Decodes multibyte characters if valid encoding option is provided an } }); +QUnit.test('Decodes multi-byte characters as unicode if no valid encoding option is provided', function(assert) { + var captions = []; + + cea708Stream = new m2ts.Cea708Stream({ + captionServices: { + SERVICE1: {} + } + }); + + cea708Stream.on('data', function(caption) { + captions.push(caption); + }); + + cc708Korean.forEach(cea708Stream.push, cea708Stream); + + cea708Stream.flushDisplayed(4721138662, cea708Stream.services[1]); + + assert.equal(captions.length, 1, 'parsed single caption correctly'); + + assert.notOk(cea708Stream.services[1].textDecoder_, 'TextDecoder was not created'); + assert.equal( + captions[0].text, + '듏낡 ', + 'parsed multibyte characters correctly' + ); +}); + QUnit.test('Creates TextDecoder only if valid encoding value is provided', function(assert) { var secondCea708Stream; From 28d03178503763cb6626b9c2f4f4aec9905e21b3 Mon Sep 17 00:00:00 2001 From: wseymour15 Date: Wed, 11 Oct 2023 15:56:20 -0500 Subject: [PATCH 2/3] dont use Array.from --- lib/m2ts/caption-stream.js | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/m2ts/caption-stream.js b/lib/m2ts/caption-stream.js index 4b2059b5..a2d3d9b0 100644 --- a/lib/m2ts/caption-stream.js +++ b/lib/m2ts/caption-stream.js @@ -690,9 +690,12 @@ Cea708Stream.prototype.handleText = function(i, service, options) { // Converts an array of bytes to a unicode hex string. function toHexString(byteArray) { - return Array.from(byteArray, function(byte) { - return ('0' + (byte & 0xFF).toString(16)).slice(-2); - }).join(''); + const newArr = []; + byteArray.forEach((byte) => { + newArr.push(('0' + (byte & 0xFF).toString(16)).slice(-2)); + }); + + return newArr.join(''); }; if (isMultiByte) { From 56a3d9d4beec260c55259fac3b49687b94c29ac7 Mon Sep 17 00:00:00 2001 From: wseymour15 Date: Wed, 11 Oct 2023 16:03:36 -0500 Subject: [PATCH 3/3] improve hexToString --- lib/m2ts/caption-stream.js | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/m2ts/caption-stream.js b/lib/m2ts/caption-stream.js index a2d3d9b0..286c8ad1 100644 --- a/lib/m2ts/caption-stream.js +++ b/lib/m2ts/caption-stream.js @@ -690,12 +690,9 @@ Cea708Stream.prototype.handleText = function(i, service, options) { // Converts an array of bytes to a unicode hex string. function toHexString(byteArray) { - const newArr = []; - byteArray.forEach((byte) => { - newArr.push(('0' + (byte & 0xFF).toString(16)).slice(-2)); - }); - - return newArr.join(''); + return byteArray.map((byte) => { + return ('0' + (byte & 0xFF).toString(16)).slice(-2); + }).join(''); }; if (isMultiByte) {