Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
christianbltr committed Jun 14, 2024
2 parents ce17b90 + 4054434 commit 06ac0a8
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 113 deletions.
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Upcoming version

Version 5.5.0, 17 May 2024
[FEATURE] Add pagination in the backend module "Indexed content" function and avoid out of memory error. https://github.com/tpwd/ke_search/issues/100
[FEATURE] Remove obsolete records during incremental indexing. Records and files which should not be indexed (e.g. pages with "no_index" flag) are now removed from the index during incremental indexing. Previously only hidden and deleted pages, records and files were removed.
[BUGFIX] Render file preview in result list only for files which are in the "imagefile_ext" list (images and PDF files). https://github.com/tpwd/ke_search/issues/60
[BUGFIX] Don't stop indexing if a folder does not exist. Thanks to Philip Hartmann. https://github.com/tpwd/ke_search/issues/225
[TASK] Add BEGIN and COMMIT statements needed to run ke_search in a Percona-Database-Cluster with strict-mode. Thanks to rentz-skygate. https://github.com/tpwd/ke_search/issues/222
Expand Down
6 changes: 4 additions & 2 deletions Classes/Domain/Repository/IndexRepository.php
Original file line number Diff line number Diff line change
Expand Up @@ -206,13 +206,15 @@ public function deleteCorrespondingIndexRecords(string $type, array $records, ar
} else {
$origUid = $record['uid'];
}
$this->deleteByUniqueProperties(
$numberOfAffectedRows = $this->deleteByUniqueProperties(
$origUid,
$indexerConfig['storagepid'],
$type,
$record['sys_language_uid']
);
$count++;
if ($numberOfAffectedRows) {
$count += $numberOfAffectedRows;
}
}
}
return $count;
Expand Down
103 changes: 102 additions & 1 deletion Classes/Indexer/IndexerBase.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
***************************************************************/

use PDO;
use Tpwd\KeSearch\Domain\Repository\IndexRepository;
use Tpwd\KeSearch\Indexer\Types\File;
use Tpwd\KeSearch\Lib\Db;
use Tpwd\KeSearch\Lib\SearchHelper;
use Tpwd\KeSearch\Service\IndexerStatusService;
use Tpwd\KeSearch\Utility\FileUtility;
use TYPO3\CMS\Core\Database\ConnectionPool;
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
Expand All @@ -32,6 +34,8 @@
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
use TYPO3\CMS\Core\Resource\FileReference;
use TYPO3\CMS\Core\Resource\FileRepository;
use TYPO3\CMS\Core\Site\Entity\Site;
use TYPO3\CMS\Core\Site\SiteFinder;
use TYPO3\CMS\Core\Utility\GeneralUtility;

/**
Expand All @@ -52,9 +56,17 @@ class IndexerBase
// string which separates metadata from file content in the index record
public const METADATASEPARATOR = "\n";

/** @var int $fileCounter */
/**
* counter for how many files we have indexed
* @var int
*/
protected $fileCounter = 0;

/**
* counter for how many records have been removed in incremental mode
*/
protected int $counterRemoved = 0;

/**
* @var IndexerRunner
*/
Expand All @@ -80,6 +92,9 @@ class IndexerBase
*/
protected $indexingMode = self::INDEXING_MODE_FULL;

protected IndexerStatusService $indexerStatusService;
protected IndexRepository $indexRepository;

/**
* Constructor of this object
* @param IndexerRunner $pObj
Expand All @@ -90,6 +105,8 @@ public function __construct(IndexerRunner $pObj)
$this->pObj = $pObj;
$this->indexerConfig = $this->pObj->indexerConfig;
$this->lastRunStartTime = SearchHelper::getIndexerLastRunTime();
$this->indexerStatusService = GeneralUtility::makeInstance(IndexerStatusService::class);
$this->indexRepository = GeneralUtility::makeInstance(IndexRepository::class);
}

/**
Expand Down Expand Up @@ -731,4 +748,88 @@ public function getTreeList($id, $depth, $begin = 0, $permClause = '', $includeD
}
return $theList;
}

/**
* Removes a row from the index which corresponds to the given $record.
* $record must contain at least the fields 'uid', 'pid' and 'sys_language_uid'.
*
* @param string $type
* @param array $record
*/
public function removeRecordFromIndex(string $type, array $record)
{
$numberOfAffectedRows = $this->indexRepository->deleteCorrespondingIndexRecords(
$type,
[$record],
$this->indexerConfig
);
if ($numberOfAffectedRows > 0) {
$this->counterRemoved += $numberOfAffectedRows;
$this->pObj->logger->debug('Removed ' . $numberOfAffectedRows . ' corresponding index records', $record);
}
}

/**
* Removes a file from the index.
*
* @param \TYPO3\CMS\Core\Resource\File $file
*/
public function removeFileFromIndex(\TYPO3\CMS\Core\Resource\File $file)
{
$orig_uid = $file->getUid();
$pid = $this->indexerConfig['storagepid'];
$language = $this->detectFileLanguage($file->getProperties());
$type = 'file:' . $file->getExtension();
$numberOfAffectedRows = $this->indexRepository->deleteByUniqueProperties($orig_uid, $pid, $type, $language);
$numberOfAffectedRows = (int)$numberOfAffectedRows;
if ($numberOfAffectedRows > 0) {
$this->counterRemoved += $numberOfAffectedRows;
$this->pObj->logger->debug(
'Removed ' . $numberOfAffectedRows . ' index records for file "' . $file->getCombinedIdentifier() . '"',
[
'orig_uid' => $orig_uid,
'pid' => $pid,
'type' => $type,
'language' => $language,
]
);
}
}

/**
* Tries to detect the language of file from metadata field 'language' and returns the language_uid.
* The field 'language' comes with the optional extension 'filemetadata'.
* Returns -1 ("all languages") language could not be determined.
*
* @param array $fileProperties
* @return int
*/
protected function detectFileLanguage(array $fileProperties): int
{
$sites = GeneralUtility::makeInstance(SiteFinder::class)->getAllSites();
$languages = [];
/** @var Site $site */
foreach ($sites as $site) {
$siteLanguages = $site->getLanguages();
foreach ($siteLanguages as $siteLanguageId => $siteLanguage) {
$languages[strtolower($siteLanguage->getLocale())] = $siteLanguageId;
if ($siteLanguage->getTitle()) {
$languages[strtolower($siteLanguage->getTitle())] = $siteLanguageId;
}
if ($siteLanguage->getHreflang()) {
$languages[strtolower($siteLanguage->getHreflang())] = $siteLanguageId;
}
if ($siteLanguage->getTwoLetterIsoCode()) {
$languages[strtolower($siteLanguage->getTwoLetterIsoCode())] = $siteLanguageId;
}
}
}

if (isset($fileProperties['language']) && array_key_exists($fileProperties['language'], $languages)) {
$languageUid = $languages[$fileProperties['language']];
} else {
$languageUid = -1;
}
return $languageUid;
}
}
66 changes: 12 additions & 54 deletions Classes/Indexer/Types/File.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,17 @@
* This copyright notice MUST APPEAR in all copies of the script!
* ************************************************************* */

use Tpwd\KeSearch\Domain\Repository\IndexRepository;
use Tpwd\KeSearch\Indexer\Filetypes\FileIndexerInterface;
use Tpwd\KeSearch\Indexer\IndexerBase;
use Tpwd\KeSearch\Indexer\IndexerRunner;
use Tpwd\KeSearch\Lib\Fileinfo;
use Tpwd\KeSearch\Lib\SearchHelper;
use Tpwd\KeSearch\Service\IndexerStatusService;
use Tpwd\KeSearch\Utility\FileUtility;
use TYPO3\CMS\Core\Core\Environment;
use TYPO3\CMS\Core\Resource\Folder;
use TYPO3\CMS\Core\Resource\Index\MetaDataRepository;
use TYPO3\CMS\Core\Resource\ResourceStorage;
use TYPO3\CMS\Core\Resource\StorageRepository;
use TYPO3\CMS\Core\Site\Entity\Site;
use TYPO3\CMS\Core\Site\SiteFinder;
use TYPO3\CMS\Core\Utility\GeneralUtility;

/**
Expand Down Expand Up @@ -78,13 +74,6 @@ class File extends IndexerBase
*/
public ResourceStorage $storage;

/**
* @var IndexRepository
*/
private $indexRepository;

protected IndexerStatusService $indexerStatusService;

/**
* Initializes indexer for files
*
Expand All @@ -96,8 +85,6 @@ public function __construct(IndexerRunner $pObj)
$this->pObj = $pObj;
$this->extConf = SearchHelper::getExtConf();
$this->fileInfo = GeneralUtility::makeInstance(Fileinfo::class);
$this->indexRepository = GeneralUtility::makeInstance(IndexRepository::class);
$this->indexerStatusService = GeneralUtility::makeInstance(IndexerStatusService::class);
}

/**
Expand Down Expand Up @@ -125,6 +112,9 @@ public function startIndexing(): string
if ($this->indexingMode === self::INDEXING_MODE_INCREMENTAL) {
$resultMessage = count($files) . ' files have been found for indexing.' . chr(10)
. $counter . ' new or updated files have been indexed.';
if ($this->counterRemoved) {
$resultMessage .= chr(10) . $this->counterRemoved . ' outdated file index record(s) have been removed.';
}
} else {
$resultMessage = count($files) . ' files have been found for indexing.' . chr(10)
. $counter . ' files have been indexed.';
Expand Down Expand Up @@ -190,9 +180,14 @@ public function getFilesFromFal(array &$files, array $directoryArray)
$filesInFolder = $folder->getFiles();
if (count($filesInFolder)) {
foreach ($filesInFolder as $file) {
if ($file instanceof \TYPO3\CMS\Core\Resource\File
&& FileUtility::isFileIndexable($file, $this->indexerConfig)) {
$files[] = $file;
if ($file instanceof \TYPO3\CMS\Core\Resource\File) {
if (FileUtility::isFileIndexable($file, $this->indexerConfig)) {
$files[] = $file;
} else {
if ($this->indexingMode == self::INDEXING_MODE_INCREMENTAL) {
$this->removeFileFromIndex($file);
}
}
}
}
}
Expand Down Expand Up @@ -402,7 +397,7 @@ public function storeToIndex($file, string $content)
// get file properties for this file, this information is merged from file record and meta information
$fileProperties = $file->getProperties();
$orig_uid = $file->getUid();
$language_uid = $this->detectLanguage($fileProperties);
$language_uid = $this->detectFileLanguage($fileProperties);

// get raw metadata for this file
/** @var MetaDataRepository $metaDataRepository */
Expand Down Expand Up @@ -499,41 +494,4 @@ public function storeToIndex($file, string $content)
$additionalFields // additional fields added by hooks
);
}

/**
* Tries to detect the language of file from metadata field 'language' and returns the language_uid.
* The field 'language' comes with the optional extension 'filemetadata'.
* Returns -1 ("all languages") language could not be determined.
*
* @param array $fileProperties
* @return int
*/
protected function detectLanguage(array $fileProperties): int
{
$sites = GeneralUtility::makeInstance(SiteFinder::class)->getAllSites();
$languages = [];
/** @var Site $site */
foreach ($sites as $site) {
$siteLanguages = $site->getLanguages();
foreach ($siteLanguages as $siteLanguageId => $siteLanguage) {
$languages[strtolower($siteLanguage->getLocale())] = $siteLanguageId;
if ($siteLanguage->getTitle()) {
$languages[strtolower($siteLanguage->getTitle())] = $siteLanguageId;
}
if ($siteLanguage->getHreflang()) {
$languages[strtolower($siteLanguage->getHreflang())] = $siteLanguageId;
}
if ($siteLanguage->getTwoLetterIsoCode()) {
$languages[strtolower($siteLanguage->getTwoLetterIsoCode())] = $siteLanguageId;
}
}
}

if (isset($fileProperties['language']) && array_key_exists($fileProperties['language'], $languages)) {
$languageUid = $languages[$fileProperties['language']];
} else {
$languageUid = -1;
}
return $languageUid;
}
}
Loading

0 comments on commit 06ac0a8

Please sign in to comment.