Skip to content

Commit

Permalink
Integrate line detection model into Transkribus engine
Browse files Browse the repository at this point in the history
Add API routes, helper methods and UI components
to integrate line detection models into the
Transkribus OCR engine

Bug: T340837
  • Loading branch information
Parthiv-M authored Aug 2, 2023
1 parent 79dc7d0 commit 080d366
Show file tree
Hide file tree
Showing 14 changed files with 316 additions and 39 deletions.
43 changes: 43 additions & 0 deletions assets/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import 'select2';
const $ = require('jquery');
const $select2 = $('#lang');
var selectedLanguages = [];
const $lineDetectionSelect = $('#line-id');
var lineModels = null;

const Cropper = require('cropperjs');
import 'cropperjs/dist/cropper.css';
Expand Down Expand Up @@ -43,7 +45,46 @@ function updateSelect2Options(engine)
});
}

function fetchLineModelsJSON () {
$.getJSON('/api/transkribus/available_line_ids').then(response => {
lineModels = response.available_line_ids;
});
}

/**
* Populate the select input field for line detection model IDs with
* the line detection model IDs available for the Transkribus engine
*/
function updateLineModelOptions () {
const staticOptions = $lineDetectionSelect[0].options;
let staticOptionData = [];
staticOptions.forEach(option => {
staticOptionData.push({
text: option.text,
value: option.value
});
});

// clear existing selections and options
$lineDetectionSelect.val(null).empty().trigger('change');

// append static options
staticOptionData.slice(0, 2).forEach(staticOption => {
$lineDetectionSelect.append(new Option(staticOption.text, staticOption.value, staticOption.value === null, false));
});

// append all other line detection models as options
Object.keys(lineModels).forEach(model => {
const option = new Option(lineModels[model], model, false, false);
$lineDetectionSelect.append(option);
});
}

$(function () {

// fetch line detection model data
fetchLineModelsJSON();

// Remove nojs class, for styling non-Javascript users.
$('html').removeClass('nojs');

Expand Down Expand Up @@ -84,6 +125,8 @@ $(function () {
$('#transkribus-lang-label').addClass('hidden');
$('#optional-lang-label').removeClass('hidden');
} else {
updateLineModelOptions();
$('#transkribus-help').removeClass('hidden');
$select2.prop('required', true);
$select2.attr('data-placeholder', '');
$select2.data('select2').selection.placeholder.text = '';
Expand Down
1 change: 1 addition & 0 deletions config/packages/nelmio_api_doc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ nelmio_api_doc:
path_patterns:
- ^/api$
- ^/api/available_langs$
- ^/api/transkribus/available_line_ids$
7 changes: 6 additions & 1 deletion i18n/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,10 @@
"transkribus-no-lang-error": "No language was selected",
"transkribus-multiple-lang-error": "Multiple languages are not allowed, specify one language",
"transkribus-browse-public-models": "Browse all public language models for Transkribus",
"transkribus-request-for-model": "Make a request to add a model from Transkribus to the OCR tool"
"transkribus-request-for-model": "Make a request to add a model from Transkribus to the OCR tool",
"transkribus-options": "Transkribus Options",
"transkribus-line-label": "Line Detection Model",
"transkribus-line-id-none-option": "None",
"transkribus-mixed-line-option": "Mixed Line Orientation",
"transkribus-line-help": "Leave empty if you are not sure of which line detection model to use"
}
7 changes: 6 additions & 1 deletion i18n/qqq.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,10 @@
"transkribus-no-lang-error": "Error message displayed when no language is selected from the Languages drop down menu",
"transkribus-multiple-lang-error": "Error message displayed when multiple languages are chosen from the Languages drop down menu",
"transkribus-browse-public-models": "Link text for the list of all public models for Transkribus",
"transkribus-request-for-model": "Link text for the form to make a request for a model to be added to the OCR tool"
"transkribus-request-for-model": "Link text for the form to make a request for a model to be added to the OCR tool",
"transkribus-options": "Heading for Transkribus-specific options",
"transkribus-line-id-none-option": "Form option for Transkribus line detection model",
"transkribus-mixed-line-option": "Form option for Transkribus line detection model",
"transkribus-line-label": "Form label for the Transkribus line detection model selection.",
"transkribus-line-help": "Help text for the Transkribus line detection model selection."
}
89 changes: 67 additions & 22 deletions public/langs.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,19 @@
"google": "az-Cyrl"
},
"bali": {
"transkribus": 48252
"transkribus": {
"htr": 48252,
"line": 45612
}
},
"be": {
"tesseract": "bel",
"google": "be"
},
"ben-print": {
"transkribus": 46239
"transkribus": {
"htr": 46239
}
},
"bg": {
"tesseract": "bul",
Expand Down Expand Up @@ -85,13 +90,19 @@
"google": "de"
},
"de-17": {
"transkribus": 38871
"transkribus": {
"htr": 38871
}
},
"de-hd-m1": {
"transkribus": 38291
"transkribus": {
"htr": 38291
}
},
"dev": {
"transkribus": 45909
"transkribus": {
"htr": 45909
}
},
"de-frk": {
"tesseract": "frk"
Expand All @@ -112,16 +123,24 @@
"google": "en"
},
"en-b2022": {
"transkribus": 48327
"transkribus": {
"htr": 48327
}
},
"en-handwritten-m3": {
"transkribus": 37646
"transkribus": {
"htr": 37646
}
},
"en-print-m1": {
"transkribus": 39995
"transkribus": {
"htr": 39995
}
},
"en-typewriter": {
"transkribus": 37545
"transkribus": {
"htr": 37545
}
},
"enm": {
"tesseract": "enm",
Expand All @@ -136,13 +155,17 @@
"google": "es"
},
"es-md": {
"transkribus": 48440
"transkribus": {
"htr": 48440
}
},
"es-old": {
"tesseract": "spa_old"
},
"es-redonda-extended-v1_2": {
"transkribus": 48394
"transkribus": {
"htr": 48394
}
},
"et": {
"tesseract": "est",
Expand All @@ -161,7 +184,9 @@
"google": "fi"
},
"fin": {
"transkribus": 37748
"transkribus": {
"htr": 37748
}
},
"fo": {
"tesseract": "fao",
Expand All @@ -172,7 +197,9 @@
"google": "fr"
},
"fr-m1": {
"transkribus": 37758
"transkribus": {
"htr": 37758
}
},
"fro": {
"google": "fro"
Expand All @@ -189,10 +216,14 @@
"google": "ga"
},
"ger-hd-m1": {
"transkribus": 35909
"transkribus": {
"htr": 35909
}
},
"ger-15": {
"transkribus": 45902
"transkribus": {
"htr": 45902
}
},
"gd": {
"tesseract": "gla",
Expand Down Expand Up @@ -247,7 +278,9 @@
"google": "it"
},
"it-hd-m1": {
"transkribus": 38440
"transkribus": {
"htr": 38440
}
},
"it-old": {
"tesseract": "ita_old"
Expand Down Expand Up @@ -382,7 +415,9 @@
"google": "pl"
},
"pl-m2": {
"transkribus": 44976
"transkribus": {
"htr": 44976
}
},
"ps": {
"tesseract": "pus",
Expand Down Expand Up @@ -412,17 +447,23 @@
"google": "ru-PETR1708"
},
"rus-hd-2": {
"transkribus" : 45595
"transkribus" : {
"htr": 45595
}
},
"rus-print": {
"transkribus" : 44358
"transkribus" : {
"htr": 44358
}
},
"sa": {
"tesseract": "san",
"google": "sa"
},
"san" : {
"transkribus" : 45909
"transkribus" : {
"htr": 45909
}
},
"sd": {
"tesseract": "snd"
Expand Down Expand Up @@ -464,7 +505,9 @@
"google": "sw"
},
"swe-2.1": {
"transkribus": 45736
"transkribus": {
"htr": 45736
}
},
"syr": {
"tesseract": "syr",
Expand Down Expand Up @@ -530,7 +573,9 @@
"google": "yi"
},
"yi-hd": {
"transkribus": 46159
"transkribus": {
"htr": 46159
}
},
"yo": {
"tesseract": "yor",
Expand Down
45 changes: 45 additions & 0 deletions src/Controller/OcrController.php
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class OcrController extends AbstractController {
'langs' => [],
'psm' => TesseractEngine::DEFAULT_PSM,
'crop' => [],
'line_id' => TranskribusEngine::DEFAULT_LINEID,
];

/**
Expand Down Expand Up @@ -126,11 +127,20 @@ private function setEngineOptions(): void {
// because we want the default set if the user changes the engine to Tesseract.
static::$params['psm'] = (int)$this->request->query->get( 'psm', (string)static::$params['psm'] );

// This is always set, even if Transkribus isn't initially chosen as the engine
// because we want the default set if the user changes the engine to Transkribus.
static::$params['line_id'] = (int)$this->request->query->get( 'line_id', (string)static::$params['line_id'] );

// Apply the tesseract-specific settings
// NOTE: Intentionally excluding `oem`, see T285262
if ( TesseractEngine::getId() === static::$params['engine'] ) {
$this->engine->setPsm( static::$params['psm'] );
}

// Apply Transkribus specific settings
if ( TranskribusEngine::getId() === static::$params['engine'] ) {
$this->engine->setLineId( static::$params['line_id'] );
}
}

/**
Expand Down Expand Up @@ -165,6 +175,18 @@ public function homeAction(): Response {
static::$params['available_langs'] = $this->engine->getValidLangs();
sort( static::$params['available_langs'] );

// set empty array to avoid errors while rendering template on non-transkribus engines
static::$params['available_line_ids'] = [];

if ( static::$params['engine'] === 'transkribus' ) {
// Pre-supply the available line ids for autocompletion in the form.
static::$params['available_line_ids'] = $this->engine->getValidLineIds( true, false );
sort( static::$params['available_line_ids'] );

static::$params['available_line_id_langs'] = $this->engine->getValidLineIds( false, true );
sort( static::$params['available_line_id_langs'] );
}

// Intution::listToText() isn't available via Twig, and we only want to do this for the view and not the API.
static::$params['image_hosts'] = $this->intuition->listToText( static::$params['image_hosts'] );

Expand Down Expand Up @@ -212,6 +234,12 @@ public function homeAction(): Response {
* @OA\Schema(type="int")
* )
* @OA\Parameter(
* name="line_id",
* in="query",
* description="The line detection model ID to be used for Transkribus.",
* @OA\Schema(type="int")
* )
* @OA\Parameter(
* name="crop",
* in="query",
* description="Crop parameters: an array with `x`, `y`, `width`, and `height` integer keys.",
Expand Down Expand Up @@ -262,6 +290,22 @@ public function apiAvailableLangsAction(): JsonResponse {
] );
}

/**
* phpcs:disable MediaWiki.Commenting.FunctionAnnotations.UnrecognizedAnnotation
* @Route("/api/transkribus/available_line_ids", name="apiLineIds", methods={"GET"})
* OA\Response(response=200, description="List of available line detection model IDs, in JSON format")
* phpcs:enable
* @return JsonResponse
*/
public function apiAvailableLineDetectionModelIds(): JsonResponse {
$this->request->query->set( 'engine', 'transkribus' );
static::$params['engine'] = 'transkribus';
$this->setup();
return $this->getApiResponse( [
'available_line_ids' => $this->engine->getValidLineIds( false, false ),
] );
}

/**
* Return a new JsonResponse with the given $params merged into static::$params.
* @param mixed[] $params
Expand Down Expand Up @@ -302,6 +346,7 @@ private function getResult( string $invalidLangsMode ): EngineResult {
implode( '|', static::$params['langs'] ),
implode( '|', array_map( 'strval', static::$params['crop'] ) ),
static::$params['psm'],
static::$params['line_id'],
// Warning messages are localized
$this->intuition->getLang(),
]
Expand Down
Loading

0 comments on commit 080d366

Please sign in to comment.