Skip to content

Commit

Permalink
Merge pull request #3435 from mikiher/comic-book-extractors
Browse files Browse the repository at this point in the history
Move to node-unrar-js for cbr and node-stream-zip for cbz
  • Loading branch information
advplyr authored Sep 18, 2024
2 parents 22ad16e + 1a8811b commit 8f96d20
Show file tree
Hide file tree
Showing 4 changed files with 238 additions and 53 deletions.
9 changes: 9 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"graceful-fs": "^4.2.10",
"htmlparser2": "^8.0.1",
"lru-cache": "^10.0.3",
"node-unrar-js": "^2.0.2",
"nodemailer": "^6.9.13",
"openid-client": "^5.6.1",
"p-throttle": "^4.1.1",
Expand Down
207 changes: 207 additions & 0 deletions server/utils/comicBookExtractors.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
const Path = require('path')
const os = require('os')
const unrar = require('node-unrar-js')
const Logger = require('../Logger')
const fs = require('../libs/fsExtra')
const StreamZip = require('../libs/nodeStreamZip')
const Archive = require('../libs/libarchive/archive')
const { isWritable } = require('./fileUtils')

class AbstractComicBookExtractor {
constructor(comicPath) {
this.comicPath = comicPath
}

async getBuffer() {
if (!(await fs.pathExists(this.comicPath))) {
Logger.error(`[parseComicMetadata] Comic path does not exist "${this.comicPath}"`)
return null
}
try {
return fs.readFile(this.comicPath)
} catch (error) {
Logger.error(`[parseComicMetadata] Failed to read comic at "${this.comicPath}"`, error)
return null
}
}

async open() {
throw new Error('Not implemented')
}

async getFilePaths() {
throw new Error('Not implemented')
}

async extractToFile(filePath, outputFilePath) {
throw new Error('Not implemented')
}

async extractToBuffer(filePath) {
throw new Error('Not implemented')
}

close() {
throw new Error('Not implemented')
}
}

class CbrComicBookExtractor extends AbstractComicBookExtractor {
constructor(comicPath) {
super(comicPath)
this.archive = null
this.tmpDir = null
}

async open() {
this.tmpDir = global.MetadataPath ? Path.join(global.MetadataPath, 'tmp') : os.tmpdir()
await fs.ensureDir(this.tmpDir)
if (!(await isWritable(this.tmpDir))) throw new Error(`[CbrComicBookExtractor] Temp directory "${this.tmpDir}" is not writable`)
this.archive = await unrar.createExtractorFromFile({ filepath: this.comicPath, targetPath: this.tmpDir })
Logger.debug(`[CbrComicBookExtractor] Opened comic book "${this.comicPath}". Using temp directory "${this.tmpDir}" for extraction.`)
}

async getFilePaths() {
if (!this.archive) return null
const list = this.archive.getFileList()
const fileHeaders = [...list.fileHeaders]
const filePaths = fileHeaders.filter((fh) => !fh.flags.directory).map((fh) => fh.name)
Logger.debug(`[CbrComicBookExtractor] Found ${filePaths.length} files in comic book "${this.comicPath}"`)
return filePaths
}

async removeEmptyParentDirs(file) {
let dir = Path.dirname(file)
while (dir !== '.') {
const fullDirPath = Path.join(this.tmpDir, dir)
const files = await fs.readdir(fullDirPath)
if (files.length > 0) break
await fs.remove(fullDirPath)
dir = Path.dirname(dir)
}
}

async extractToBuffer(file) {
if (!this.archive) return null
const extracted = this.archive.extract({ files: [file] })
const files = [...extracted.files]
const filePath = Path.join(this.tmpDir, files[0].fileHeader.name)
const fileData = await fs.readFile(filePath)
await fs.remove(filePath)
await this.removeEmptyParentDirs(files[0].fileHeader.name)
Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${fileData.length}`)
return fileData
}

async extractToFile(file, outputFilePath) {
if (!this.archive) return false
const extracted = this.archive.extract({ files: [file] })
const files = [...extracted.files]
const extractedFilePath = Path.join(this.tmpDir, files[0].fileHeader.name)
await fs.move(extractedFilePath, outputFilePath, { overwrite: true })
await this.removeEmptyParentDirs(files[0].fileHeader.name)
Logger.debug(`[CbrComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
return true
}

close() {
Logger.debug(`[CbrComicBookExtractor] Closed comic book "${this.comicPath}"`)
}
}

class CbzComicBookExtractor extends AbstractComicBookExtractor {
constructor(comicPath) {
super(comicPath)
this.archive = null
}

async open() {
const buffer = await this.getBuffer()
this.archive = await Archive.open(buffer)
Logger.debug(`[CbzComicBookExtractor] Opened comic book "${this.comicPath}"`)
}

async getFilePaths() {
if (!this.archive) return null
const list = await this.archive.getFilesArray()
const fileNames = list.map((fo) => fo.file._path)
Logger.debug(`[CbzComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`)
return fileNames
}

async extractToBuffer(file) {
if (!this.archive) return null
const extracted = await this.archive.extractSingleFile(file)
Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted?.fileData.length}`)
return extracted?.fileData
}

async extractToFile(file, outputFilePath) {
const data = await this.extractToBuffer(file)
if (!data) return false
await fs.writeFile(outputFilePath, data)
Logger.debug(`[CbzComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
return true
}

close() {
this.archive?.close()
Logger.debug(`[CbzComicBookExtractor] Closed comic book "${this.comicPath}"`)
}
}

class CbzStreamZipComicBookExtractor extends AbstractComicBookExtractor {
constructor(comicPath) {
super(comicPath)
this.archive = null
}

async open() {
this.archive = new StreamZip.async({ file: this.comicPath })
Logger.debug(`[CbzStreamZipComicBookExtractor] Opened comic book "${this.comicPath}"`)
}

async getFilePaths() {
if (!this.archive) return null
const entries = await this.archive.entries()
const fileNames = Object.keys(entries).filter((entry) => !entries[entry].isDirectory)
Logger.debug(`[CbzStreamZipComicBookExtractor] Found ${fileNames.length} files in comic book "${this.comicPath}"`)
return fileNames
}

async extractToBuffer(file) {
if (!this.archive) return null
const extracted = await this.archive?.entryData(file)
Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to buffer, size: ${extracted.length}`)
return extracted
}

async extractToFile(file, outputFilePath) {
if (!this.archive) return false
try {
await this.archive.extract(file, outputFilePath)
Logger.debug(`[CbzStreamZipComicBookExtractor] Extracted file "${file}" from comic book "${this.comicPath}" to "${outputFilePath}"`)
return true
} catch (error) {
Logger.error(`[CbzStreamZipComicBookExtractor] Failed to extract file "${file}" to "${outputFilePath}"`, error)
return false
}
}

close() {
this.archive?.close()
Logger.debug(`[CbzStreamZipComicBookExtractor] Closed comic book "${this.comicPath}"`)
}
}

function createComicBookExtractor(comicPath) {
const ext = Path.extname(comicPath).toLowerCase()
if (ext === '.cbr') {
return new CbrComicBookExtractor(comicPath)
} else if (ext === '.cbz') {
return new CbzStreamZipComicBookExtractor(comicPath)
} else {
throw new Error(`Unsupported comic book format "${ext}"`)
}
}
module.exports = { createComicBookExtractor }
74 changes: 21 additions & 53 deletions server/utils/parsers/parseComicMetadata.js
Original file line number Diff line number Diff line change
@@ -1,28 +1,9 @@
const Path = require('path')
const globals = require('../globals')
const fs = require('../../libs/fsExtra')
const Logger = require('../../Logger')
const Archive = require('../../libs/libarchive/archive')
const { xmlToJSON } = require('../index')
const parseComicInfoMetadata = require('./parseComicInfoMetadata')

/**
*
* @param {string} filepath
* @returns {Promise<Buffer>}
*/
async function getComicFileBuffer(filepath) {
if (!(await fs.pathExists(filepath))) {
Logger.error(`[parseComicMetadata] Comic path does not exist "${filepath}"`)
return null
}
try {
return fs.readFile(filepath)
} catch (error) {
Logger.error(`[parseComicMetadata] Failed to read comic at "${filepath}"`, error)
return null
}
}
const globals = require('../globals')
const { xmlToJSON } = require('../index')
const { createComicBookExtractor } = require('../comicBookExtractors.js')

/**
* Extract cover image from comic return true if success
Expand All @@ -33,22 +14,11 @@ async function getComicFileBuffer(filepath) {
* @returns {Promise<boolean>}
*/
async function extractCoverImage(comicPath, comicImageFilepath, outputCoverPath) {
const comicFileBuffer = await getComicFileBuffer(comicPath)
if (!comicFileBuffer) return null

let archive = null
try {
archive = await Archive.open(comicFileBuffer)
const fileEntry = await archive.extractSingleFile(comicImageFilepath)

if (!fileEntry?.fileData) {
Logger.error(`[parseComicMetadata] Invalid file entry data for comicPath "${comicPath}"/${comicImageFilepath}`)
return false
}

await fs.writeFile(outputCoverPath, fileEntry.fileData)

return true
archive = createComicBookExtractor(comicPath)
await archive.open()
return await archive.extractToFile(comicImageFilepath, outputCoverPath)
} catch (error) {
Logger.error(`[parseComicMetadata] Failed to extract image "${comicImageFilepath}" from comicPath "${comicPath}" into "${outputCoverPath}"`, error)
return false
Expand All @@ -67,30 +37,28 @@ module.exports.extractCoverImage = extractCoverImage
*/
async function parse(ebookFile) {
const comicPath = ebookFile.metadata.path
Logger.debug(`Parsing metadata from comic at "${comicPath}"`)

const comicFileBuffer = await getComicFileBuffer(comicPath)
if (!comicFileBuffer) return null

Logger.debug(`[parseComicMetadata] Parsing comic metadata at "${comicPath}"`)
let archive = null
try {
archive = await Archive.open(comicFileBuffer)
archive = createComicBookExtractor(comicPath)
await archive.open()

const fileObjects = await archive.getFilesArray()
const filePaths = await archive.getFilePaths()

fileObjects.sort((a, b) => {
return a.file.name.localeCompare(b.file.name, undefined, {
// Sort the file paths in a natural order to get the first image
filePaths.sort((a, b) => {
return a.localeCompare(b, undefined, {
numeric: true,
sensitivity: 'base'
})
})

let metadata = null
const comicInfo = fileObjects.find((fo) => fo.file.name === 'ComicInfo.xml')
if (comicInfo) {
const comicInfoEntry = await comicInfo.file.extract()
if (comicInfoEntry?.fileData) {
const comicInfoStr = new TextDecoder().decode(comicInfoEntry.fileData)
const comicInfoPath = filePaths.find((filePath) => filePath === 'ComicInfo.xml')
if (comicInfoPath) {
const comicInfoData = await archive.extractToBuffer(comicInfoPath)
if (comicInfoData) {
const comicInfoStr = new TextDecoder().decode(comicInfoData)
const comicInfoJson = await xmlToJSON(comicInfoStr)
if (comicInfoJson) {
metadata = parseComicInfoMetadata.parse(comicInfoJson)
Expand All @@ -104,9 +72,9 @@ async function parse(ebookFile) {
metadata
}

const firstImage = fileObjects.find((fo) => globals.SupportedImageTypes.includes(Path.extname(fo.file.name).toLowerCase().slice(1)))
if (firstImage?.file?._path) {
payload.ebookCoverPath = firstImage.file._path
const firstImagePath = filePaths.find((filePath) => globals.SupportedImageTypes.includes(Path.extname(filePath).toLowerCase().slice(1)))
if (firstImagePath) {
payload.ebookCoverPath = firstImagePath
} else {
Logger.warn(`[parseComicMetadata] Cover image not found in comic at "${comicPath}"`)
}
Expand Down

0 comments on commit 8f96d20

Please sign in to comment.