diff --git a/src/Controller/OcrController.php b/src/Controller/OcrController.php index 33e7703..7d4f546 100644 --- a/src/Controller/OcrController.php +++ b/src/Controller/OcrController.php @@ -225,7 +225,7 @@ private function getText(): string $cacheKey = md5(implode( '|', [ - $this->imageUrl, + self::transformImageURLForCacheKey($this->imageUrl), static::$params['engine'], implode('|', static::$params['langs']), static::$params['psm'], @@ -237,4 +237,21 @@ private function getText(): string return $this->engine->getText($this->imageUrl, static::$params['langs']); }); } + + /** + * Make an image URL suitable to be used as a cache key (e.g. strip protocol) + * @param string $url + * @return string + */ + private static function transformImageURLForCacheKey(string $url): string + { + return preg_replace_callback( + '/(page\d+-)(\d+)px/', + static function (array $matches) { + // Tolerate ±50px, see T286356. + return $matches[1].( round($matches[2] / 100) * 100 ).'px'; + }, + preg_replace('/^https?:/i', '', $url) + ); + } }