Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
boxsnake committed May 30, 2022
2 parents ee2e416 + 2cedcb8 commit 7d46f32
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 16 deletions.
13 changes: 9 additions & 4 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ const argv = UtilArgs.getArgv();
const lhsQueue = UtilQueue.createQueue({ concurrency: argv.n })
const lhsFilePattern = UtilPath.resolve(argv.l, '**/*')
const lhsFilePaths = await UtilPath.glob(lhsFilePattern)
const lhsFileSummary = UtilCompare.getFileSummary(argv.l, lhsFilePaths, lhsQueue)
const lhsFileSummary = await UtilCompare.getFileSummary(argv.l, lhsFilePaths, lhsQueue)
const rhsQueue = UtilQueue.createQueue({ concurrency: argv.m })
const rhsFilePattern = UtilPath.resolve(argv.r, '**/*')
const rhsFilePaths = await UtilPath.glob(rhsFilePattern)
const rhsFileSummary = UtilCompare.getFileSummary(argv.r, rhsFilePaths, rhsQueue)
const rhsFileSummary = await UtilCompare.getFileSummary(argv.r, rhsFilePaths, rhsQueue)

// save file summary
const lhsFileSummaryOutputPath = UtilPath.resolve(argv.o, './file-summary-lhs.json')
Expand All @@ -26,12 +26,17 @@ const argv = UtilArgs.getArgv();
// compare summary
const compareSummary = UtilCompare.getCompareSummary(lhsFileSummary, rhsFileSummary)

// revalidate compare summary
const compareRevalidateQueueLhs = UtilQueue.createQueue({ concurrency: argv.n })
const compareRevalidateQueueRhs = UtilQueue.createQueue({ concurrency: argv.m })
const compareRevalidateSummary = await UtilCompare.revalidateCompareSummary(compareSummary, compareRevalidateQueueLhs, compareRevalidateQueueRhs)

// save compare summary
const compareSummaryOutputPath = UtilPath.resolve(argv.o, './compare-summary.json')
UtilFs.writeJson(compareSummaryOutputPath, compareSummary)
UtilFs.writeJson(compareSummaryOutputPath, compareRevalidateSummary)

// compare report
const compareReport = UtilCompare.getCompareReport(compareSummary)
const compareReport = UtilCompare.getCompareReport(compareRevalidateSummary)

// save compare report
const compareReportOutputPath = UtilPath.resolve(argv.o, './compare-report.html')
Expand Down
62 changes: 60 additions & 2 deletions util/compare.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@ import UtilPath from './path.js'
import UtilHash from './hash.js'
import UtilProgress from './progress.js'

function getFileSummary (base = '', paths = [], queue = null) {
async function getFileSummary (base = '', paths = [], queue = null) {
const countTotal = paths.length
const progress = UtilProgress.createProgressbar({ total: countTotal })
const summary = []
const hashChunkConfig = UtilHash.getHashChunkPlots({ name: 'single plot', offset: 0, limit: 100 * 1024 })

_.each(paths, async path => {
await queue.add(() => {
const filename = UtilPath.relative(base, path)
const md5 = UtilHash.getHash(path, { offset: 0, limit: 100 * 1024 })
const md5 = UtilHash.getHashByChunks(path, {
chunks: hashChunkConfig
})

const pack = {
path,
Expand All @@ -24,6 +27,44 @@ function getFileSummary (base = '', paths = [], queue = null) {
})
})

await queue.onIdle()

return summary
}

async function rehashFileSummary (files = [], queue = null) {
const countTotal = files.length
const progress = UtilProgress.createProgressbar({ total: countTotal })
const summary = []

_.each(files, async file => {
await queue.add(() => {
const path = file.path
const chunkConfig = UtilHash.getHashChunkPlots({
name: 'distribution plots',
file: path,
chunks: 10,
offset: 0,
limit: 10 * 1024
})
const filename = file.filename
const md5 = UtilHash.getHashByChunks(path, {
chunks: chunkConfig
})

const pack = {
path,
filename,
md5
}

summary.push(pack)
progress.tick()
})
})

await queue.onIdle()

return summary
}

Expand Down Expand Up @@ -62,6 +103,21 @@ function getCompareSummary (lhs = [], rhs = []) {
return summary
}

async function revalidateCompareSummary (summary = [], lhsQueue = null, rhsQueue = null) {
const summaryUnsame = _.filter(summary, v => v.type !== 'same')
const summarySame = _.filter(summary, v => v.type === 'same')

const lhs = _.chain(summarySame).map(v => v.lhs || []).flattenDeep().value()
const lhsRehashed = await rehashFileSummary(lhs, lhsQueue)
const rhs = _.chain(summarySame).map(v => v.rhs || []).flattenDeep().value()
const rhsRehashed = await rehashFileSummary(rhs, rhsQueue)
const summaryRehashed = getCompareSummary(lhsRehashed, rhsRehashed)

const summaryNew = _.concat([], summaryUnsame, summaryRehashed)

return summaryNew
}

function getCompareReport (summary = []) {
const template = `<!DOCTYPE html>
<html>
Expand Down Expand Up @@ -190,6 +246,8 @@ function getCompareReport (summary = []) {

export default {
getFileSummary,
rehashFileSummary,
getCompareSummary,
revalidateCompareSummary,
getCompareReport
}
58 changes: 48 additions & 10 deletions util/hash.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,60 @@ import _ from 'lodash'
import MD5 from 'md5'
import FsExtra from 'fs-extra'

function getHash (path = '', opts = {}) {
const offset = +opts.offset
const offsetVal = _.isFinite(offset) ? offset : 0
const limit = +opts.limit
const limitVal = _.isFinite(limit) ? limit : 102400
function getHashByChunks (path = '', opts = {}) {
const fd = FsExtra.openSync(path, 'r')
const fstat = FsExtra.fstatSync(fd)
const bufSize = fstat.size - offsetVal > limitVal ? limitVal : fstat.size - offsetVal
const buf = Buffer.alloc(bufSize)
FsExtra.readSync(fd, buf, 0, bufSize, offsetVal)

const chunks = opts.chunks || []
const chunksBuf = _
.chain(chunks)
.map(chunk => {
const bufOffset = +chunk.offset || 0
const bufLimit = +chunk.limit || 40960
const bufSize = Math.max(Math.min(bufLimit, fstat.size - bufOffset), 0)
const buf = Buffer.alloc(bufSize)
FsExtra.readSync(fd, buf, 0, bufSize, bufOffset)

return buf
})
.value()
const mergedBuf = Buffer.concat(chunksBuf)
FsExtra.closeSync(fd)
const md5 = MD5(buf)
const md5 = MD5(mergedBuf)

return md5
}

function getHashChunkPlots (opts = {}) {
let plots = []
const name = opts.name || []

if (name === 'single plot') {
const offset = opts.offset
const limit = opts.limit
plots.push({ offset, limit })
} else if (name === 'distribution plots') {
const path = opts.file
const chunks = opts.chunks
const offset = opts.offset
const limit = opts.limit
const fd = FsExtra.openSync(path, 'r')
const fstat = FsExtra.fstatSync(fd)
const fsize = fstat.size

plots = _.map(_.times(10), v => {
const plotStart = Math.floor(v * fsize / chunks)
const plotOffset = plotStart + offset

return { offset: plotOffset, limit }
})
FsExtra.closeSync(fd)
}

return plots
}

export default {
getHash
getHashByChunks,
getHashChunkPlots
}

0 comments on commit 7d46f32

Please sign in to comment.