diff --git a/declarations/reservoir.d.ts b/declarations/reservoir.d.ts new file mode 100644 index 0000000..05fd288 --- /dev/null +++ b/declarations/reservoir.d.ts @@ -0,0 +1,25 @@ +declare module 'reservoir' { + + interface ReservoirArray extends Array { + /** + * datum: one or more elements to consider for inclusion into the reservoir. + * Returns the current length of the reservoir. + */ + pushSome(...datum: T[]): number; + } + + /** + * Create a new reservoir sampler. + * + * @param reservoirSize is the maximum size of the reservoir. This is the number of elements + * to be randomly chosen from the input provided to it using pushSome. Default is 1. + * @param randomNumberGenerator is an optional random number generating function to use in + * place of the default Math.random. + */ + function Reservoir( + reservoirSize?: number, + randomNumberGenerator?: () => number + ): ReservoirArray; + + export = Reservoir; +} diff --git a/localturk.ts b/localturk.ts index 07dc587..f082672 100644 --- a/localturk.ts +++ b/localturk.ts @@ -15,6 +15,7 @@ import * as fs from 'fs-extra'; import * as path from 'path'; import * as program from 'commander'; import open = require('open'); +import Reservoir = require('reservoir'); import * as _ from 'lodash'; import * as csv from './csv'; @@ -28,10 +29,13 @@ program .option('-p, --port ', 'Run on this port (default 4321)', parseInt) .option('-s, --static-dir ', 'Serve static content from this directory. Default is same directory as template file.') + .option('-r, --random-order', + 'Serve images in random order, rather than sequentially. This is useful for ' + + 'generating valid subsamples or for minimizing collisions during group localturking.') .option('-w, --write-template', 'Generate a stub template file based on the input CSV.') .parse(process.argv); -const {args, writeTemplate} = program; +const {args, randomOrder, writeTemplate} = program; if (!((3 === args.length && !writeTemplate) || (1 === args.length && writeTemplate))) { program.help(); @@ -128,17 +132,27 @@ interface TaskStats { async function getNextTask(): Promise { const completedTasks = (await readCompletedTasks()).map(utils.normalizeValues); + let sampler = randomOrder ? Reservoir() : null; let nextTask: Task; let numTotal = 0; for await (const task of csv.readRowObjects(tasksFile)) { numTotal++; - if (!nextTask && !isTaskCompleted(utils.normalizeValues(task), completedTasks)) { + if (!sampler && nextTask) { + continue; // we're only counting at this point. + } + if (isTaskCompleted(utils.normalizeValues(task), completedTasks)) { + continue; + } + + if (sampler) { + sampler.pushSome(task); + } else { nextTask = task; } } return { - task: nextTask, + task: sampler ? sampler[0] : nextTask, numCompleted: _.size(completedTasks), numTotal, } diff --git a/package.json b/package.json index 8db7e54..3b47b1c 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "fs-extra": "^4.0.2", "lodash": "^4.17.4", "open": "^0.0.5", + "reservoir": "^0.1.2", "temp": "^0.8.3" }, "devDependencies": { diff --git a/yarn.lock b/yarn.lock index beb0047..2d63dde 100644 --- a/yarn.lock +++ b/yarn.lock @@ -560,6 +560,10 @@ raw-body@2.3.2: iconv-lite "0.4.19" unpipe "1.0.0" +reservoir@^0.1.2: + version "0.1.2" + resolved "https://registry.yarnpkg.com/reservoir/-/reservoir-0.1.2.tgz#f08eac156495123039cbc5ee06f3f58979e45605" + rimraf@~2.2.6: version "2.2.8" resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.2.8.tgz#e439be2aaee327321952730f99a8929e4fc50582"