From a1d420a21cee8dfc73ef98b12ab4f2cc6e8194ae Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 21:26:48 +0000 Subject: [PATCH 01/11] Bump ch.qos.logback:logback-classic from 1.5.3 to 1.5.4 Bumps [ch.qos.logback:logback-classic](https://github.com/qos-ch/logback) from 1.5.3 to 1.5.4. - [Commits](https://github.com/qos-ch/logback/compare/v_1.5.3...v_1.5.4) --- updated-dependencies: - dependency-name: ch.qos.logback:logback-classic dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- boudicca.base/eventcollector-client/build.gradle.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boudicca.base/eventcollector-client/build.gradle.kts b/boudicca.base/eventcollector-client/build.gradle.kts index b5c38e49..5786da09 100644 --- a/boudicca.base/eventcollector-client/build.gradle.kts +++ b/boudicca.base/eventcollector-client/build.gradle.kts @@ -11,7 +11,7 @@ dependencies { implementation(project(":boudicca.base:remote-collector:remote-collector-client")) implementation("org.apache.velocity:velocity-engine-core:2.3") implementation("org.apache.velocity.tools:velocity-tools-generic:3.1") - implementation("ch.qos.logback:logback-classic:1.5.3") + implementation("ch.qos.logback:logback-classic:1.5.4") implementation("org.slf4j:slf4j-api:2.0.12") testImplementation("org.junit.jupiter:junit-jupiter:5.10.2") } From f326b9db244d7829a54a21c223cff144640c4474 Mon Sep 17 00:00:00 2001 From: abl Date: Tue, 9 Apr 2024 23:48:07 +0200 Subject: [PATCH 02/11] fix ical service paging and allow empty and null queries which return everything --- .../boudicca/publisher/event/ical/CalendarService.kt | 2 +- .../boudicca/publisher/event/ical/IcalResource.kt | 4 ++-- .../base/boudicca/search/service/QueryService.kt | 11 ++++++----- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/boudicca.base/publisher-event-ical/src/main/kotlin/base/boudicca/publisher/event/ical/CalendarService.kt b/boudicca.base/publisher-event-ical/src/main/kotlin/base/boudicca/publisher/event/ical/CalendarService.kt index 8c681223..f0e7e2be 100644 --- a/boudicca.base/publisher-event-ical/src/main/kotlin/base/boudicca/publisher/event/ical/CalendarService.kt +++ b/boudicca.base/publisher-event-ical/src/main/kotlin/base/boudicca/publisher/event/ical/CalendarService.kt @@ -82,7 +82,7 @@ class CalendarService @Autowired constructor(@Value("\${boudicca.search.url}") p } fun getEvents(query: String): ByteArray { - val events = searchClient.queryEvents(QueryDTO(query, 100)) + val events = searchClient.queryEvents(QueryDTO(query, 0, Int.MAX_VALUE)) return createCalendar(events.result) } } \ No newline at end of file diff --git a/boudicca.base/publisher-event-ical/src/main/kotlin/base/boudicca/publisher/event/ical/IcalResource.kt b/boudicca.base/publisher-event-ical/src/main/kotlin/base/boudicca/publisher/event/ical/IcalResource.kt index f42b1a05..2c82e592 100644 --- a/boudicca.base/publisher-event-ical/src/main/kotlin/base/boudicca/publisher/event/ical/IcalResource.kt +++ b/boudicca.base/publisher-event-ical/src/main/kotlin/base/boudicca/publisher/event/ical/IcalResource.kt @@ -14,8 +14,8 @@ class IcalResource @Autowired constructor(private val calendarService: CalendarS "/calendar.ics", produces = [MediaType.APPLICATION_OCTET_STREAM_VALUE], ) - fun getAllEvents(@RequestParam query: String): ResponseEntity { - val calendarFile = calendarService.getEvents(query) + fun getAllEvents(@RequestParam(required = false) query: String?): ResponseEntity { + val calendarFile = calendarService.getEvents(query ?: "") return ResponseEntity.ok() .header("Content-Disposition", "attachment;filename=calendar.ics") .body(calendarFile) diff --git a/boudicca.base/search/src/main/kotlin/base/boudicca/search/service/QueryService.kt b/boudicca.base/search/src/main/kotlin/base/boudicca/search/service/QueryService.kt index 584dde63..c27cfa27 100644 --- a/boudicca.base/search/src/main/kotlin/base/boudicca/search/service/QueryService.kt +++ b/boudicca.base/search/src/main/kotlin/base/boudicca/search/service/QueryService.kt @@ -3,8 +3,8 @@ package base.boudicca.search.service import base.boudicca.api.search.model.QueryDTO import base.boudicca.api.search.model.ResultDTO import base.boudicca.model.Entry -import base.boudicca.query.QueryException import base.boudicca.query.BoudiccaQueryRunner +import base.boudicca.query.QueryException import base.boudicca.query.Utils import base.boudicca.query.evaluator.Evaluator import base.boudicca.query.evaluator.NoopEvaluator @@ -24,10 +24,10 @@ class QueryService { @Throws(QueryException::class) fun query(queryDTO: QueryDTO): ResultDTO { - val query = - queryDTO.query ?: return ResultDTO(Utils.offset(entries, queryDTO.offset, queryDTO.size), entries.size) - - return evaluateQuery(query, Page(queryDTO.offset ?: 0, queryDTO.size ?: 30)) + if (queryDTO.query.isNullOrEmpty()) { + return ResultDTO(Utils.offset(entries, queryDTO.offset, queryDTO.size), entries.size) + } + return evaluateQuery(queryDTO.query!!, Page(queryDTO.offset ?: 0, queryDTO.size ?: 30)) } @EventListener @@ -42,6 +42,7 @@ class QueryService { val queryResult = evaluator.evaluate(expression, page) ResultDTO(queryResult.result, queryResult.totalResults, queryResult.error) } catch (e: QueryException) { + //TODO this should return a 400 error or something, not a 200 message with an error message... ResultDTO(emptyList(), 0, e.message) } } From b621fb5c14329d18bf6243bc324137a69ee5406b Mon Sep 17 00:00:00 2001 From: Mahdi Khashan <58775404+mahdikhashan@users.noreply.github.com> Date: Wed, 10 Apr 2024 19:44:32 +0200 Subject: [PATCH 03/11] fix: when there is only one event, it is filling the whole parent container width (#379) --- .../src/main/resources/templates/events/events.hbs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boudicca.base/publisher-event-html/src/main/resources/templates/events/events.hbs b/boudicca.base/publisher-event-html/src/main/resources/templates/events/events.hbs index 6730bfeb..639a94be 100644 --- a/boudicca.base/publisher-event-html/src/main/resources/templates/events/events.hbs +++ b/boudicca.base/publisher-event-html/src/main/resources/templates/events/events.hbs @@ -2,7 +2,7 @@ .events-grid { gap: 18px; display: grid; - grid-template-columns: repeat(auto-fit, minmax(min(250px, 100%),1fr)); + grid-template-columns: repeat(auto-fill, minmax(min(250px, 100%),1fr)); } .event { From e45827794deb98b5af9be08c2fdd3c6d211425b0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 21:43:02 +0000 Subject: [PATCH 04/11] Bump gradle/wrapper-validation-action from 2.1.2 to 2.1.3 Bumps [gradle/wrapper-validation-action](https://github.com/gradle/wrapper-validation-action) from 2.1.2 to 2.1.3. - [Release notes](https://github.com/gradle/wrapper-validation-action/releases) - [Commits](https://github.com/gradle/wrapper-validation-action/compare/v2.1.2...v2.1.3) --- updated-dependencies: - dependency-name: gradle/wrapper-validation-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/deploy.yml | 2 +- .github/workflows/gradle.yml | 2 +- .github/workflows/publish.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 59a2f7e9..e0dc57a7 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -21,7 +21,7 @@ jobs: java-version: '21' distribution: 'temurin' - name: Validate Gradle wrapper - uses: gradle/wrapper-validation-action@v2.1.2 + uses: gradle/wrapper-validation-action@v2.1.3 - name: Setup Gradle uses: gradle/actions/setup-gradle@v3 - name: Execute Gradle build with tests diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index d389e5f2..befac347 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -23,7 +23,7 @@ jobs: java-version: '21' distribution: 'temurin' - name: Validate Gradle wrapper - uses: gradle/wrapper-validation-action@v2.1.2 + uses: gradle/wrapper-validation-action@v2.1.3 - name: Setup Gradle uses: gradle/actions/setup-gradle@v3 - name: Execute Gradle build diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 9159c2ce..7a386437 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -20,7 +20,7 @@ jobs: java-version: '21' distribution: 'temurin' - name: Validate Gradle wrapper - uses: gradle/wrapper-validation-action@v2.1.2 + uses: gradle/wrapper-validation-action@v2.1.3 - name: Setup Gradle uses: gradle/actions/setup-gradle@v3 - name: Execute Gradle build with tests From c9984c91b6d53fc071af589898ad41736c21389f Mon Sep 17 00:00:00 2001 From: abl Date: Fri, 12 Apr 2024 20:36:31 +0200 Subject: [PATCH 05/11] OptimizingEvaluator --- boudicca.base/query-lib/build.gradle.kts | 7 + .../base/boudicca/query/EvaluatorTest.kt | 140 ++++++++ .../query/LoadTestDataIntoLocalEventDb.kt | 24 ++ .../query/OrderAfterPerformanceTest.kt | 81 +++++ .../query/OrderBeforePerformanceTest.kt | 82 +++++ .../base/boudicca/query/TestDataGenerator.kt | 115 +++++++ .../main/kotlin/base/boudicca/query/Utils.kt | 22 +- .../boudicca/query/evaluator/EvaluatorUtil.kt | 24 -- .../query/evaluator/OptimizingEvaluator.kt | 319 ++++++++++++++++++ .../query/evaluator/SimpleEvaluator.kt | 18 +- .../query/evaluator/util/EvaluatorUtil.kt | 134 ++++++++ .../query/evaluator/util/FullTextIndex.kt | 196 +++++++++++ .../query/evaluator/util/SimpleIndex.kt | 47 +++ .../query/evaluator/AbstractEvaluatorTest.kt | 318 +++++++++++++++++ .../evaluator/OptimizingEvaluatorTest.kt | 8 + .../query/evaluator/SimpleEvaluatorTest.kt | 281 +-------------- .../util/EvaluatorUtilBinarySearchTest.kt | 28 ++ .../EvaluatorUtilDurationTest.kt} | 21 +- .../evaluator/util/EvaluatorUtilSortTest.kt | 72 ++++ .../query/evaluator/util/FullTextIndexTest.kt | 209 ++++++++++++ .../query/evaluator/util/SimpleIndexTest.kt | 44 +++ .../boudicca/search/service/QueryService.kt | 36 +- docs/tech/SEARCH.md | 2 +- 23 files changed, 1900 insertions(+), 328 deletions(-) create mode 100644 boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/EvaluatorTest.kt create mode 100644 boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/LoadTestDataIntoLocalEventDb.kt create mode 100644 boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/OrderAfterPerformanceTest.kt create mode 100644 boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/OrderBeforePerformanceTest.kt create mode 100644 boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/TestDataGenerator.kt delete mode 100644 boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/EvaluatorUtil.kt create mode 100644 boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/OptimizingEvaluator.kt create mode 100644 boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtil.kt create mode 100644 boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/util/FullTextIndex.kt create mode 100644 boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/util/SimpleIndex.kt create mode 100644 boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/AbstractEvaluatorTest.kt create mode 100644 boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/OptimizingEvaluatorTest.kt create mode 100644 boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtilBinarySearchTest.kt rename boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/{EvaluatorUtilTest.kt => util/EvaluatorUtilDurationTest.kt} (79%) create mode 100644 boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtilSortTest.kt create mode 100644 boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/FullTextIndexTest.kt create mode 100644 boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/SimpleIndexTest.kt diff --git a/boudicca.base/query-lib/build.gradle.kts b/boudicca.base/query-lib/build.gradle.kts index 2b210d65..8bf98504 100644 --- a/boudicca.base/query-lib/build.gradle.kts +++ b/boudicca.base/query-lib/build.gradle.kts @@ -1,9 +1,16 @@ plugins { id("boudicca-kotlin") id("boudicca-publish") + id ("me.champeau.jmh") version "0.7.2" } dependencies { api(project(":boudicca.base:semantic-conventions")) testImplementation("org.junit.jupiter:junit-jupiter:5.10.2") + jmh(project(":boudicca.base:publisher-client")) + jmh(project(":boudicca.base:ingest-client")) + jmh("com.fasterxml.jackson.core:jackson-core:2.17.0") + jmh("com.fasterxml.jackson.module:jackson-module-kotlin:2.17.0") + jmh("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.17.0") + jmh("com.fasterxml.jackson.core:jackson-databind:2.17.0") } \ No newline at end of file diff --git a/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/EvaluatorTest.kt b/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/EvaluatorTest.kt new file mode 100644 index 00000000..610aa100 --- /dev/null +++ b/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/EvaluatorTest.kt @@ -0,0 +1,140 @@ +package base.boudicca.query + +import base.boudicca.model.Entry +import base.boudicca.query.evaluator.* +import com.fasterxml.jackson.core.type.TypeReference +import com.fasterxml.jackson.databind.json.JsonMapper +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule +import com.fasterxml.jackson.module.kotlin.KotlinModule +import org.openjdk.jmh.annotations.* +import java.nio.file.Path +import java.util.concurrent.TimeUnit +import kotlin.io.path.exists +import kotlin.io.path.readBytes +import kotlin.time.measureTime + + +@State(Scope.Benchmark) +open class EvaluatorTest { + +// @Param( +// """ "name" contains "rock" """, +// """ "description" contains "rock" """, +// """ "whatever" contains "rock" """, +// ) +// var query: String? = null + +// @Param( +// """ "category" equals "music" """, +// """ "name" equals "music" """, +// """ "whatever" equals "music" """, +// ) +// var query: String? = null + + @Param( + """ "category" equals "music" """, + """ "name" contains "rock" """, + """ "description" contains "rock" """, + ) + var query: String? = null + + var expression: Expression? = null + + @Param(/*"noop", */"simple", "optimizing") + var mode: String? = null + + @Param("5000", "20000", "100000") + var testDataSize: Int? = null + + var evaluator: Evaluator? = null + + @Setup + fun setup() { + expression = BoudiccaQueryRunner.parseQuery(query!!) + evaluator = when (mode) { + "noop" -> NoopEvaluator() + "simple" -> SimpleEvaluator(loadTestData(testDataSize)) + "optimizing" -> OptimizingEvaluator(loadTestData(testDataSize)) + else -> throw IllegalArgumentException("illegal mode $mode") + } + } + + @Benchmark + @Fork(3) +// @BenchmarkMode(Mode.AverageTime) +// @OutputTimeUnit(TimeUnit.MILLISECONDS) + @Warmup(iterations = 2, time = 5000, timeUnit = TimeUnit.MILLISECONDS) + @Measurement(iterations = 2, time = 5000, timeUnit = TimeUnit.MILLISECONDS) + fun testEvaluator(): QueryResult { + return evaluator!!.evaluate(expression!!, PAGE_ALL) + } +} + +fun main() { + +// Thread.sleep(20000) + val testData: List> = loadTestData(100_000) + + + val evaluator = OptimizingEvaluator(testData.toList()) +// val evaluator = SimpleEvaluator(testData.toList()) + + +// val expression = BoudiccaQueryRunner.parseQuery(""" "name" contains "rock" """) +// val expression = BoudiccaQueryRunner.parseQuery(""" "description" contains "rock" """) +// val expression = BoudiccaQueryRunner.parseQuery(""" "whatever" contains "rock" """) + +// val expression = BoudiccaQueryRunner.parseQuery(""" "category" equals "music" """) +// val expression = BoudiccaQueryRunner.parseQuery(""" "name" equals "music" """) +// val expression = BoudiccaQueryRunner.parseQuery(""" "whatever" equals "music" """) + + val expression = BoudiccaQueryRunner.parseQuery(""" ("startDate" after "2024-04-12") and (duration "startDate" "endDate" shorter 720.0) and ((not (hasField "recurrence.type")) or ("recurrence.type" equals "ONCE")) and "*" contains "rock" """) + + println("search took:" + measureTime { + val queryResult = evaluator.evaluate(expression, PAGE_ALL) + println(queryResult.totalResults) + }) + println("second search took:" + measureTime { + val queryResult = evaluator.evaluate(expression, PAGE_ALL) + println(queryResult.totalResults) + }) + var sum = 0 + while (sum != 1) { + val result = evaluator.evaluate(expression, PAGE_ALL) + sum += result.result.hashCode() + } + + println(sum) + + +} + + +private fun loadTestData(testDataSize: Int? = null): List> { +// return listOf( +// mapOf("name" to "what","description" to "what","test" to "what"), +// mapOf("name" to "rock","description" to "what","test" to "what"), +// mapOf("name" to "asd","description" to "rock","test" to "what"), +// mapOf("name" to "what","description" to "what","test" to "what"), +// ) + + val objectMapper = JsonMapper.builder().addModule(JavaTimeModule()) + .addModule(KotlinModule.Builder().build()).build() + + var path = Path.of("testdata.dump") + if (!path.exists()) { + path = Path.of("../../testdata.dump") + } + val testData = objectMapper.readValue( + path.readBytes(), + object : TypeReference>() {}) + + +// val testData = EventDbPublisherClient("https://eventdb.boudicca.events").getAllEntries() + + if(testDataSize != null){ + return testData.take(testDataSize) + }else{ + return testData + } +} \ No newline at end of file diff --git a/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/LoadTestDataIntoLocalEventDb.kt b/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/LoadTestDataIntoLocalEventDb.kt new file mode 100644 index 00000000..81662afa --- /dev/null +++ b/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/LoadTestDataIntoLocalEventDb.kt @@ -0,0 +1,24 @@ +package base.boudicca.query + +import base.boudicca.api.eventdb.ingest.EventDbIngestClient +import base.boudicca.model.Entry +import com.fasterxml.jackson.core.type.TypeReference +import com.fasterxml.jackson.databind.json.JsonMapper +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule +import com.fasterxml.jackson.module.kotlin.KotlinModule +import java.io.FileInputStream +import java.io.ObjectInputStream +import java.nio.file.Path +import kotlin.io.path.readBytes + +fun main() { + val objectMapper = JsonMapper.builder().addModule(JavaTimeModule()) + .addModule(KotlinModule.Builder().build()).build() + + val storeRead = objectMapper.readValue( + Path.of("testdata.dump").readBytes(), + object : TypeReference>() {}) + + val ingestClient = EventDbIngestClient("http://localhost:8081", "ingest", "ingest") + ingestClient.ingestEntries(storeRead) +} diff --git a/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/OrderAfterPerformanceTest.kt b/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/OrderAfterPerformanceTest.kt new file mode 100644 index 00000000..368427b3 --- /dev/null +++ b/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/OrderAfterPerformanceTest.kt @@ -0,0 +1,81 @@ +package base.boudicca.query + +import base.boudicca.model.Entry +import base.boudicca.query.evaluator.OptimizingEvaluator +import base.boudicca.query.evaluator.PAGE_ALL +import com.fasterxml.jackson.core.type.TypeReference +import com.fasterxml.jackson.databind.json.JsonMapper +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule +import com.fasterxml.jackson.module.kotlin.KotlinModule +import org.openjdk.jmh.annotations.* +import java.nio.file.Path +import java.time.OffsetDateTime +import java.util.concurrent.ConcurrentHashMap +import java.util.concurrent.TimeUnit +import kotlin.io.path.exists +import kotlin.io.path.readBytes +import kotlin.time.measureTime + +@State(Scope.Benchmark) +open class OrderAfterPerformanceTest { + + @Param("5000", "20000", "100000") + var testDataSize: Int? = null + + @Param("20", "30", "50", "70", "90") + var resultIsEveryXItem: Int? = null + + var testData: List? = null + var startDateCache: ConcurrentHashMap? = null + + @Setup + fun setup() { + startDateCache = ConcurrentHashMap() +// testData = Utils.order(loadTestData(testDataSize), startDateCache!!) + testData = loadTestData(testDataSize) + } + +// @Benchmark + @Fork(3) + @Warmup(iterations = 2, time = 5000, timeUnit = TimeUnit.MILLISECONDS) + @Measurement(iterations = 2, time = 5000, timeUnit = TimeUnit.MILLISECONDS) + fun test(): List { + val result = mutableSetOf() + var i = 0 + while (i < testData!!.size) { + result.add(i) + i += resultIsEveryXItem!! + } + + return Utils.order(result.map { testData!![it] }, startDateCache!!) + } +} + +private fun loadTestData(testDataSize: Int? = null): List> { +// return listOf( +// mapOf("name" to "what","description" to "what","test" to "what"), +// mapOf("name" to "rock","description" to "what","test" to "what"), +// mapOf("name" to "asd","description" to "rock","test" to "what"), +// mapOf("name" to "what","description" to "what","test" to "what"), +// ) + + val objectMapper = JsonMapper.builder().addModule(JavaTimeModule()) + .addModule(KotlinModule.Builder().build()).build() + + var path = Path.of("testdata.dump") + if (!path.exists()) { + path = Path.of("../../testdata.dump") + } + val testData = objectMapper.readValue( + path.readBytes(), + object : TypeReference>() {}) + + +// val testData = EventDbPublisherClient("https://eventdb.boudicca.events").getAllEntries() + + if (testDataSize != null) { + return testData.take(testDataSize) + } else { + return testData + } +} \ No newline at end of file diff --git a/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/OrderBeforePerformanceTest.kt b/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/OrderBeforePerformanceTest.kt new file mode 100644 index 00000000..17be19f4 --- /dev/null +++ b/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/OrderBeforePerformanceTest.kt @@ -0,0 +1,82 @@ +package base.boudicca.query + +import base.boudicca.model.Entry +import base.boudicca.query.evaluator.OptimizingEvaluator +import base.boudicca.query.evaluator.PAGE_ALL +import com.fasterxml.jackson.core.type.TypeReference +import com.fasterxml.jackson.databind.json.JsonMapper +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule +import com.fasterxml.jackson.module.kotlin.KotlinModule +import org.openjdk.jmh.annotations.* +import java.nio.file.Path +import java.time.OffsetDateTime +import java.util.concurrent.ConcurrentHashMap +import java.util.concurrent.TimeUnit +import kotlin.io.path.exists +import kotlin.io.path.readBytes +import kotlin.time.measureTime + + +@State(Scope.Benchmark) +open class OrderBeforePerformanceTest { + + @Param("5000", "20000", "100000") + var testDataSize: Int? = null + + @Param("20", "30", "50", "70", "90") + var resultIsEveryXItem: Int? = null + + var testData: List? = null + var startDateCache: ConcurrentHashMap? = null + + @Setup + fun setup() { + startDateCache = ConcurrentHashMap() + testData = Utils.order(loadTestData(testDataSize), startDateCache!!) +// testData = loadTestData(testDataSize) + } + +// @Benchmark + @Fork(3) + @Warmup(iterations = 2, time = 5000, timeUnit = TimeUnit.MILLISECONDS) + @Measurement(iterations = 2, time = 5000, timeUnit = TimeUnit.MILLISECONDS) + fun test(): List { + val result = mutableSetOf() + var i = 0 + while (i < testData!!.size) { + result.add(i) + i += resultIsEveryXItem!! + } + + return testData!!.filterIndexed { index, _ -> result.contains(index) } + } +} + +private fun loadTestData(testDataSize: Int? = null): List> { +// return listOf( +// mapOf("name" to "what","description" to "what","test" to "what"), +// mapOf("name" to "rock","description" to "what","test" to "what"), +// mapOf("name" to "asd","description" to "rock","test" to "what"), +// mapOf("name" to "what","description" to "what","test" to "what"), +// ) + + val objectMapper = JsonMapper.builder().addModule(JavaTimeModule()) + .addModule(KotlinModule.Builder().build()).build() + + var path = Path.of("testdata.dump") + if (!path.exists()) { + path = Path.of("../../testdata.dump") + } + val testData = objectMapper.readValue( + path.readBytes(), + object : TypeReference>() {}) + + +// val testData = EventDbPublisherClient("https://eventdb.boudicca.events").getAllEntries() + + if (testDataSize != null) { + return testData.take(testDataSize) + } else { + return testData + } +} \ No newline at end of file diff --git a/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/TestDataGenerator.kt b/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/TestDataGenerator.kt new file mode 100644 index 00000000..b57bbbe5 --- /dev/null +++ b/boudicca.base/query-lib/src/jmh/kotlin/base/boudicca/query/TestDataGenerator.kt @@ -0,0 +1,115 @@ +package base.boudicca.query + +import base.boudicca.SemanticKeys +import base.boudicca.api.eventdb.publisher.EventDbPublisherClient +import base.boudicca.model.Entry +import com.fasterxml.jackson.databind.json.JsonMapper +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule +import com.fasterxml.jackson.module.kotlin.KotlinModule +import java.io.Serializable +import java.nio.file.Path +import java.nio.file.StandardOpenOption +import kotlin.io.path.writeBytes +import kotlin.math.min +import kotlin.random.Random + +const val WANTED_EVENTS = 100_000 +fun main() { + val startTime = System.currentTimeMillis() + val testData = TestDataGenerator.getTestData() + + writeTestData(testData) + + println("generating and saving test data took ${System.currentTimeMillis() - startTime}ms") +} + +private fun writeTestData(testData: Pair>, Map>) { + val objectMapper = JsonMapper.builder().addModule(JavaTimeModule()) + .addModule(KotlinModule.Builder().build()).build() + + val bytes = objectMapper.writeValueAsBytes(testData.first) + Path.of("C:\\projects\\boudicca\\testdata.dump") + .writeBytes(bytes, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.CREATE) +} + +object TestDataGenerator { + + fun getTestData(): Pair>, Map> { + val publisherClient = EventDbPublisherClient("https://eventdb.boudicca.events") + + val originalEvents = publisherClient.getAllEntries() + + return remixEvents(originalEvents.toList()) + } + + /** + * takes events + remixes them to generate a big amount of test data + */ + private fun remixEvents(events: List): Pair>, Map> { + val metadata = generateMetaData(events) + + val remixes = mutableListOf>() + for (i in 1..WANTED_EVENTS) { + remixes.add(generateRemix(metadata)) + } + return Pair(remixes, metadata) + } + + private fun generateRemix(metadata: Map): Map { + val remix = mutableMapOf() + for (field in metadata.entries) { + if (field.value.percentage > Math.random()) { + val value = StringBuilder() + val maxDistance = field.value.median - field.value.min + for (i in 1..(field.value.median + maxDistance - Random.Default.nextInt(0, maxDistance * 2 + 1))) { + value.append(field.value.words.random()) + value.append(" ") + } + remix[field.key] = value.toString().trim() + } + } + return remix + } + + /** + * metadata is for each field chance of existing, all possible words, min and max amount of words + */ + private fun generateMetaData(events: List>): Map { + val allFields = events.flatMap { it.keys }.toSet() + return allFields.associateWith { generateMetaData(events, it) } + } + + private fun generateMetaData(events: List>, field: String): Metadata { + var min = Int.MAX_VALUE + var count = 0 + var words = mutableSetOf() + val wordCounts = mutableListOf() + + events + .mapNotNull { it[field] } + .forEach { + val fieldWords = it + .split(" ", "\t", "\n", "\r\n") + .filter(String::isNotBlank) //meh, good enough for this + + count++ + words.addAll(fieldWords) + min = min(min, fieldWords.size) + wordCounts.add(fieldWords.size) + } + if (field == SemanticKeys.LOCATION_NAME) { + words = words.take(5).toMutableSet() + } + return Metadata( + count.toFloat() / events.size.toFloat(), words.toList(), min, + wordCounts.sorted()[wordCounts.size / 2] + ) + } + + data class Metadata( + val percentage: Float, + val words: List, + val min: Int, + val median: Int, + ) : Serializable +} \ No newline at end of file diff --git a/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/Utils.kt b/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/Utils.kt index 04519e8e..daad3d0b 100644 --- a/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/Utils.kt +++ b/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/Utils.kt @@ -1,12 +1,13 @@ package base.boudicca.query -import base.boudicca.model.Entry import base.boudicca.SemanticKeys +import base.boudicca.model.Entry import java.time.Instant import java.time.OffsetDateTime import java.time.ZoneId import java.time.ZoneOffset import java.time.format.DateTimeFormatter +import java.util.concurrent.ConcurrentHashMap import java.util.function.Function object Utils { @@ -16,9 +17,9 @@ object Utils { return events.drop(offset ?: 0).take(size ?: DEFAULT_SIZE) } - fun order(entries: Collection): List { + fun order(entries: Collection, dateCache: ConcurrentHashMap): List { return entries.toList() - .map { Pair(it, getStartDate(it[SemanticKeys.STARTDATE])) } + .map { Pair(it, getStartDate(it[SemanticKeys.STARTDATE], dateCache)) } .sortedWith( Comparator .comparing, OffsetDateTime> { it.second } @@ -27,11 +28,22 @@ object Utils { .map { it.first } } - private fun getStartDate(dateText: String?): OffsetDateTime { + private fun getStartDate( + dateText: String?, + startDateCache: ConcurrentHashMap + ): OffsetDateTime { + if (dateText == null) { + return Instant.ofEpochMilli(0).atOffset(ZoneOffset.MIN) + } + if (startDateCache.containsKey(dateText)) { + return startDateCache[dateText]!! + } return try { - OffsetDateTime.parse(dateText, DateTimeFormatter.ISO_DATE_TIME) + val offsetDateTime = OffsetDateTime.parse(dateText, DateTimeFormatter.ISO_DATE_TIME) .atZoneSameInstant(ZoneId.of("Europe/Vienna")) .toOffsetDateTime() + startDateCache[dateText] = offsetDateTime + offsetDateTime } catch (e: Exception) { Instant.ofEpochMilli(0).atOffset(ZoneOffset.MIN) } diff --git a/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/EvaluatorUtil.kt b/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/EvaluatorUtil.kt deleted file mode 100644 index 3faff506..00000000 --- a/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/EvaluatorUtil.kt +++ /dev/null @@ -1,24 +0,0 @@ -package base.boudicca.query.evaluator - -import java.time.Duration -import java.time.OffsetDateTime -import java.time.format.DateTimeFormatter -import java.time.format.DateTimeParseException -import java.time.temporal.ChronoUnit - -object EvaluatorUtil { - fun getDuration(startDateField: String, endDateField: String, event: Map): Double { - if (!event.containsKey(startDateField) || !event.containsKey(endDateField)) { - return 0.0 - } - return try { - val startDate = OffsetDateTime.parse(event[startDateField]!!, DateTimeFormatter.ISO_DATE_TIME) - val endDate = OffsetDateTime.parse(event[endDateField]!!, DateTimeFormatter.ISO_DATE_TIME) - Duration.of(endDate.toEpochSecond() - startDate.toEpochSecond(), ChronoUnit.SECONDS) - .toMillis() - .toDouble() / 1000.0 / 60.0 / 60.0 - } catch (e: DateTimeParseException) { - 0.0 - } - } -} \ No newline at end of file diff --git a/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/OptimizingEvaluator.kt b/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/OptimizingEvaluator.kt new file mode 100644 index 00000000..93d044e6 --- /dev/null +++ b/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/OptimizingEvaluator.kt @@ -0,0 +1,319 @@ +package base.boudicca.query.evaluator + +import base.boudicca.model.Entry +import base.boudicca.query.* +import base.boudicca.query.evaluator.util.EvaluatorUtil +import base.boudicca.query.evaluator.util.FullTextIndex +import base.boudicca.query.evaluator.util.SimpleIndex +import java.time.LocalDate +import java.time.OffsetDateTime +import java.time.format.DateTimeParseException +import java.util.concurrent.ConcurrentHashMap + +class OptimizingEvaluator(rawEntries: Collection) : Evaluator { + + private val dateCache = ConcurrentHashMap() + private val entries = Utils.order(rawEntries, dateCache) + private val fullTextIndexCache = mutableMapOf() + private val simpleIndexCache = mutableMapOf>>() + private val allFields = getAllFields(entries) + + init { + // init contains searches + for(field in allFields){ + getOrCreateFullTextIndex(field) + } + } + + override fun evaluate(expression: Expression, page: Page): QueryResult { + val resultSet = evaluateExpression(expression) + val orderedResult = if (resultSet.size > entries.size / 3) { + // roughly, if the resultset is bigger then a third of the total entries, + // it is faster to iterate over all entries then to sort the resultset + entries.filterIndexed { i, _ -> resultSet.contains(i) } + } else { + Utils.order(resultSet.map { entries[it] }, dateCache) + } + return QueryResult( + orderedResult + .drop(page.offset) + .take(page.size) + .toList(), + resultSet.size + ) + } + + private fun evaluateExpression(expression: Expression): Set { + when (expression) { + is EqualsExpression -> { + return equalsExpression(expression) + } + + is ContainsExpression -> { + return containsExpression(expression) + } + + is NotExpression -> { + return notExpression(expression) + } + + is AndExpression -> { + return andExpression(expression) + } + + is OrExpression -> { + return orExpression(expression) + } + + is BeforeExpression -> { + return beforeExpression(expression) + } + + is AfterExpression -> { + return afterExpression(expression) + } + + is DurationLongerExpression -> { + return durationLongerExpression(expression) + } + + is DurationShorterExpression -> { + return durationShorterExpression(expression) + } + + is HasFieldExpression -> { + return hasFieldExpression(expression) + } + + else -> { + throw QueryException("unknown expression kind $expression") + } + } + } + + private fun hasFieldExpression(expression: HasFieldExpression): Set { + return entries.mapIndexed { i, event -> + Pair(i, event) + }.filter { (_, event) -> + event.containsKey(expression.getFieldName()) && event[expression.getFieldName()]!!.isNotEmpty() + }.map { (i, _) -> i }.toSet() + } + + private fun notExpression(expression: NotExpression): Set { + val subEvents = evaluateExpression(expression.getChild()) + return entries.indices.filter { i -> + !subEvents.contains(i) + }.toSet() + } + + private fun orExpression(expression: OrExpression): Set { + val leftSubEvents = evaluateExpression(expression.getLeftChild()) + val rightSubEvents = evaluateExpression(expression.getRightChild()) + return entries.indices.filter { i -> + leftSubEvents.contains(i) || rightSubEvents.contains(i) + }.toSet() + //TODO check threshold for the below? + // return leftSubEvents.plus(rightSubEvents) + } + + private fun andExpression(expression: AndExpression): Set { + val leftSubEvents = evaluateExpression(expression.getLeftChild()) + val rightSubEvents = evaluateExpression(expression.getRightChild()) + return entries.indices.filter { i -> + leftSubEvents.contains(i) && rightSubEvents.contains(i) + }.toSet() + //TODO check threshold for the below? + // return if (leftSubEvents.size < rightSubEvents.size) { + // leftSubEvents.intersect(rightSubEvents) + // } else { + // rightSubEvents.intersect(leftSubEvents) + // } + } + + private fun equalsExpression(expression: EqualsExpression): Set { + val lowerCase = expression.getText().lowercase() + return starFieldSearch(expression.getFieldName()) { field -> + val index = getOrCreateSimpleIndex("equals", field) { + SimpleIndex(entries.map { it[field]?.lowercase() }, Comparator.naturalOrder()) + } + index.search { it?.compareTo(lowerCase) ?: -1 } + } + } + + private fun containsExpression(expression: ContainsExpression): Set { + return starFieldSearch(expression.getFieldName()) { field -> + val index = getOrCreateFullTextIndex(field) + index.containsSearch(expression.getText()) + } + } + + private fun beforeExpression(expression: BeforeExpression): Set { + val index = getOrCreateLocalDateIndex(expression.getFieldName()) + return index.search { + if (it != null) { + if (it.isEqual(expression.getDate()) || it.isBefore(expression.getDate())) { + 0 + } else { + 1 + } + } else { + -1 + } + } + } + + private fun afterExpression(expression: AfterExpression): Set { + val index = getOrCreateLocalDateIndex(expression.getFieldName()) + return index.search { + if (it != null) { + if (it.isEqual(expression.getDate()) || it.isAfter(expression.getDate())) { + 0 + } else { + -1 + } + } else { + -1 + } + } + } + + private fun durationLongerExpression(expression: DurationLongerExpression): Set { + val index = getDurationIndex(expression) + val duration = expression.getDuration().toDouble() + return index.search { + if (it != null) { + if (it >= duration) { + 0 + } else { + -1 + } + } else { + -1 + } + } + } + + private fun durationShorterExpression(expression: DurationShorterExpression): Set { + val index = getDurationIndex(expression) + val duration = expression.getDuration().toDouble() + return index.search { + if (it != null) { + if (it <= duration) { + 0 + } else { + 1 + } + } else { + -1 + } + } + } + + private fun getDurationIndex(expression: AbstractDurationExpression): SimpleIndex { + val index = + getOrCreateSimpleIndex("duration", expression.getStartDateField() + "&" + expression.getEndDateField()) { + SimpleIndex(entries.map { + EvaluatorUtil.getDuration( + expression.getStartDateField(), + expression.getEndDateField(), + it, dateCache + ) + }, Comparator.naturalOrder()) + } + return index + } + + private fun getOrCreateLocalDateIndex(fieldName: String): SimpleIndex { + val index = getOrCreateSimpleIndex("localDate", fieldName) { + SimpleIndex(entries.map { safeGetLocalDate(it[fieldName], dateCache) }, Comparator.naturalOrder()) + } + return index + } + + private fun starFieldSearch(fieldName: String, search: (String) -> Set): Set { + val allFieldsToCheck = if (fieldName == "*") { + allFields + } else { + setOf(fieldName) + } + val result = mutableSetOf() + for (field in allFieldsToCheck) { + result.addAll(search(field)) + } + return result + } + + @Suppress("UNCHECKED_CAST") + private fun getOrCreateSimpleIndex( + operation: String, + fieldName: String, + indexCreator: () -> SimpleIndex + ): SimpleIndex { + //TODO this lock could lead to contention + synchronized(simpleIndexCache) { + val operationCache = if (!simpleIndexCache.containsKey(operation)) { + val newCache = mutableMapOf>() + simpleIndexCache[operation] = newCache + newCache + } else { + simpleIndexCache[operation]!! + } + + val index = if (!operationCache.containsKey(fieldName)) { + val index = indexCreator() + operationCache[fieldName] = index + index + } else { + operationCache[fieldName]!! + } + + return index as SimpleIndex + } + } + + private fun getOrCreateFullTextIndex(fieldName: String): FullTextIndex { + synchronized(fullTextIndexCache) { + if (fullTextIndexCache.containsKey(fieldName)) { + return fullTextIndexCache[fieldName]!! + } else { + val index = FullTextIndex(entries, fieldName) + fullTextIndexCache[fieldName] = index + return index + } + } + } + + private fun safeGetLocalDate(dateText: String?, dateCache: ConcurrentHashMap): LocalDate? { + if (dateText == null) { + return null + } + try {//TODO cache null + return getLocalDate(dateText, dateCache) + } catch (e: DateTimeParseException) { + return null + } + } + + private fun getLocalDate(dateText: String, dataCache: ConcurrentHashMap): LocalDate { + val offsetDateTime = if (dataCache.containsKey(dateText)) { + dataCache[dateText]!! + } else { + EvaluatorUtil.parseDate(dateText, dataCache) + } + return offsetDateTime.toLocalDate() + } + + + private fun getAllFields(entries: List>): Set { + val allFields = mutableSetOf() + + for (entry in entries) { + for (key in entry.keys) { + allFields.add(key) + } + } + + return allFields + } + +} \ No newline at end of file diff --git a/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/SimpleEvaluator.kt b/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/SimpleEvaluator.kt index 968b4d82..791fb88d 100644 --- a/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/SimpleEvaluator.kt +++ b/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/SimpleEvaluator.kt @@ -1,17 +1,19 @@ package base.boudicca.query.evaluator import base.boudicca.model.Entry -import base.boudicca.query.Utils import base.boudicca.query.* +import base.boudicca.query.evaluator.util.EvaluatorUtil import java.time.LocalDate import java.time.OffsetDateTime import java.time.ZoneId import java.time.format.DateTimeFormatter import java.time.format.DateTimeParseException +import java.util.concurrent.ConcurrentHashMap class SimpleEvaluator(rawEntries: Collection) : Evaluator { - private val events = Utils.order(rawEntries) + private val dateCache = ConcurrentHashMap() + private val events = Utils.order(rawEntries, dateCache) override fun evaluate(expression: Expression, page: Page): QueryResult { val results = events.filter { matchesExpression(expression, it) } @@ -84,13 +86,21 @@ class SimpleEvaluator(rawEntries: Collection) : Evaluator { is DurationLongerExpression -> { val duration = - EvaluatorUtil.getDuration(expression.getStartDateField(), expression.getEndDateField(), event) + EvaluatorUtil.getDuration( + expression.getStartDateField(), + expression.getEndDateField(), + event, dateCache + ) return duration >= expression.getDuration().toDouble() } is DurationShorterExpression -> { val duration = - EvaluatorUtil.getDuration(expression.getStartDateField(), expression.getEndDateField(), event) + EvaluatorUtil.getDuration( + expression.getStartDateField(), + expression.getEndDateField(), + event, dateCache + ) return duration <= expression.getDuration().toDouble() } diff --git a/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtil.kt b/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtil.kt new file mode 100644 index 00000000..79ad2cf4 --- /dev/null +++ b/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtil.kt @@ -0,0 +1,134 @@ +package base.boudicca.query.evaluator.util + +import java.time.Duration +import java.time.OffsetDateTime +import java.time.format.DateTimeFormatter +import java.time.format.DateTimeParseException +import java.time.temporal.ChronoUnit +import java.util.concurrent.ConcurrentHashMap +import kotlin.time.measureTime + +object EvaluatorUtil { + fun getDuration( + startDateField: String, + endDateField: String, + event: Map, + dataCache: ConcurrentHashMap + ): Double { + if (!event.containsKey(startDateField) || !event.containsKey(endDateField)) { + return 0.0 + } + return try { + val startDate = parseDate(event[startDateField]!!, dataCache) + val endDate = parseDate(event[endDateField]!!, dataCache) + Duration.of(endDate.toEpochSecond() - startDate.toEpochSecond(), ChronoUnit.SECONDS) + .toMillis() + .toDouble() / 1000.0 / 60.0 / 60.0 + } catch (e: DateTimeParseException) { + 0.0 + } + } + + fun parseDate( + dateText: String, + dataCache: ConcurrentHashMap + ): OffsetDateTime { + return if (dataCache.containsKey(dateText)) { + dataCache[dateText]!! + } else { +// try { + val parsedDate = OffsetDateTime.parse(dateText, DateTimeFormatter.ISO_DATE_TIME) + dataCache[dateText] = parsedDate + parsedDate +// } catch (e: DateTimeParseException) { +// dataCache[dateText] = null //TODO make nullable cache +// null +// } + } + } + + fun binarySearch(start: Int, length: Int, comparator: (Int) -> Int): Int { + var lower = start + var upper = start + length - 1 + + while (lower <= upper) { + val i = (lower + upper) / 2 + val result = comparator.invoke(i) + if (result == 0) { + return i + } else if (result < 0) { + lower = i + 1 + } else { + upper = i - 1 + } + } + return -1 + } + + //impl copied from Arrays.mergeSort.... + fun sort(start: Int, length: Int, sortable: Sortable) { + val aux = sortable.copy() + mergeSort(aux, sortable, start, start + length, -start) + } + + interface Sortable { + fun get(): T + fun copy(): Sortable + fun compare(i: Int, j: Int): Int + fun swap(i: Int, j: Int) + fun setFrom(i: Int, src: Sortable, j: Int) + } + + private const val INSERTIONSORT_THRESHOLD = 7 + + private fun mergeSort( + src: Sortable, + dest: Sortable, + low: Int, + high: Int, + off: Int + ) { + var low = low + var high = high + val length = high - low + + // Insertion sort on smallest arrays + if (length < INSERTIONSORT_THRESHOLD) { + for (i in low until high) { + var j = i + while (j > low && dest.compare(j - 1, j) > 0) { + dest.swap(j, j - 1) + j-- + } + } + return + } + + // Recursively sort halves of dest into src + val destLow = low + val destHigh = high + low += off + high += off + val mid = (low + high) ushr 1 + mergeSort(dest, src, low, mid, -off) + mergeSort(dest, src, mid, high, -off) + + //TODO maybe? + // If list is already sorted, just copy from src to dest. This is an + // optimization that results in faster sorts for nearly ordered lists. +// if ((src[mid - 1] as Comparable<*>).compareTo(src[mid]) <= 0) { +// System.arraycopy(src, low, dest, destLow, length) +// return +// } + + // Merge sorted halves (now in src) into dest + var i = destLow + var p = low + var q = mid + while (i < destHigh) { + if (q >= high || p < mid && (src.compare(p, q) <= 0)) dest.setFrom(i, src, p++) + else dest.setFrom(i, src, q++) + i++ + } + } +} \ No newline at end of file diff --git a/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/util/FullTextIndex.kt b/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/util/FullTextIndex.kt new file mode 100644 index 00000000..c578dcd2 --- /dev/null +++ b/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/util/FullTextIndex.kt @@ -0,0 +1,196 @@ +package base.boudicca.query.evaluator.util + +import base.boudicca.model.Entry +import java.nio.ByteBuffer +import java.nio.CharBuffer +import java.text.BreakIterator +import java.util.* + +class FullTextIndex(entries: List, field: String) { + + private val words = getWords(entries, field) + private val index = createIndex() + + fun get(i: Int): Pair { + return Pair(index.getInt(i * 8), index.getInt(i * 8 + 4)) + } + + fun size(): Int { + return index.capacity() / 8 + } + + fun getEntriesForWord(i: Int): Set { + return words[i].second + } + + fun containsSearch(text: String): Set { + val searchWords = breakText(text.lowercase()) + val subResults = searchWords.map { word -> + val (lower, upper) = containsSearchIndices(word) + + val result = mutableSetOf() + for (i in lower until upper) { + result.addAll(words[get(i).first].second) + } + result + } + + if (subResults.isEmpty()) { + return emptySet() + } + + var result: Set = subResults.first() + for (subResult in subResults.drop(1)) { + result = result.intersect(subResult) + } + + return result + } + + private fun containsSearchIndices(lowerText: CharBuffer): Pair { + val lower = binarySearch { i -> + val matches = startsWith(i, lowerText) + if (matches) { + if (i - 1 < 0 || !startsWith(i - 1, lowerText)) { + 0 + } else { + 1 + } + } else { + val (vI, sI) = get(i) + val word = words[vI].first + word.subSequence(sI, word.capacity()).compareTo(lowerText) + } + } + if (lower == -1) { + //nothing found + return Pair(-1, -1) + } + val upper = binarySearch { i -> + val matches = startsWith(i, lowerText) + if (matches) { + if (i + 1 >= size() || !startsWith(i + 1, lowerText)) { + 0 + } else { + -1 + } + } else { + val (vI, sI) = get(i) + val word = words[vI].first + word.subSequence(sI, word.capacity()).compareTo(lowerText) + } + } + return Pair(lower, upper + 1) + } + + private fun startsWith(i: Int, lowerPrefix: CharBuffer): Boolean { + val (vI, sI) = get(i) + return words[vI].first.startsWith(lowerPrefix, sI, false) //ignore case already done by lowering everything + } + + private fun binarySearch(comparator: (Int) -> Int): Int { + return EvaluatorUtil.binarySearch(0, size(), comparator) + } + + private fun getWords(entries: List, field: String): List>> { + val words = mutableMapOf>() + entries.forEachIndexed { entryIndex, entry -> + if (!entry[field].isNullOrEmpty()) { + val lowercase = entry[field]!!.lowercase() + val newWords = breakText(lowercase) + + newWords.forEach { newWord -> + if (words.containsKey(newWord)) { + words[newWord]!!.add(entryIndex) + } else { + words[newWord] = mutableSetOf(entryIndex) + } + } + } + } + + return words.toList() + } + + private fun breakText(lowercase: String): MutableList { + val iter = BreakIterator.getWordInstance(Locale.GERMAN) + iter.setText(lowercase) + var breakI = 0 + var newBreakI = iter.next() + val newWords = mutableListOf() + while (newBreakI != BreakIterator.DONE) { + val newWord = CharBuffer.wrap(lowercase.substring(breakI, newBreakI).trim()) + newWords.add(newWord) + breakI = newBreakI + newBreakI = iter.next() + } + return newWords + } + + private fun createIndex(): ByteBuffer { + var count = 0 + words.forEach { (word, _) -> + count += word.capacity() + } + + val index = ByteBuffer.allocate(count * 8) + + words.forEachIndexed { wordI, word -> + for (stringI in word.first.indices) { + val i = --count + index.putInt(i * 8, wordI) + index.putInt(i * 8 + 4, stringI) + } + } + + return sort(index) + } + + private fun sort(index: ByteBuffer): ByteBuffer { + EvaluatorUtil.sort( + 0, index.capacity() / 8, + SortableByteBuffer(index, words) + ) + return index + } + + class SortableByteBuffer(private val byteBuffer: ByteBuffer, private val values: List>>) : + EvaluatorUtil.Sortable { + override fun get(): ByteBuffer { + return byteBuffer + } + + override fun copy(): EvaluatorUtil.Sortable { + return SortableByteBuffer(ByteBuffer.wrap(byteBuffer.array().copyOf()), values) + } + + override fun compare(i: Int, j: Int): Int { + val vI1 = byteBuffer.getInt(i * 8) + val sI1 = byteBuffer.getInt(i * 8 + 4) + val vI2 = byteBuffer.getInt(j * 8) + val sI2 = byteBuffer.getInt(j * 8 + 4) + val word1 = values[vI1].first + val subValue1 = word1.subSequence(sI1, word1.capacity()) + val word2 = values[vI2].first + val subValue2 = word2.subSequence(sI2, word2.capacity()) + return subValue1.compareTo(subValue2) + } + + override fun swap(i: Int, j: Int) { + val vI = byteBuffer.getInt(i * 8) + val sI = byteBuffer.getInt(i * 8 + 4) + byteBuffer.putInt(i * 8, byteBuffer.getInt(j * 8)) + byteBuffer.putInt(i * 8 + 4, byteBuffer.getInt(j * 8 + 4)) + byteBuffer.putInt(j * 8, vI) + byteBuffer.putInt(j * 8 + 4, sI) + } + + override fun setFrom(i: Int, src: EvaluatorUtil.Sortable, j: Int) { + val otherByteBuffer = src.get() + byteBuffer.putInt(i * 8, otherByteBuffer.getInt(j * 8)) + byteBuffer.putInt(i * 8 + 4, otherByteBuffer.getInt(j * 8 + 4)) + } + + } +} + diff --git a/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/util/SimpleIndex.kt b/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/util/SimpleIndex.kt new file mode 100644 index 00000000..acffd962 --- /dev/null +++ b/boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/util/SimpleIndex.kt @@ -0,0 +1,47 @@ +package base.boudicca.query.evaluator.util + +class SimpleIndex(values: List, comparator: Comparator) { + private val index = values + .mapIndexed { index, t -> Pair(index, t) } + .filter { it.second != null } + .sortedWith(Comparator.comparing({ pair -> pair.second }, comparator)) + + fun search(comparator: (T) -> Int): Set { + val lower = EvaluatorUtil.binarySearch(0, index.size) { i -> + val result = comparator.invoke(index[i].second) + if (result == 0) { + if (i - 1 < 0 || comparator.invoke(index[i - 1].second) != 0) { + 0 + } else { + 1 + } + } else { + result + } + } + if (lower == -1) { + return emptySet() + } + + val upper = EvaluatorUtil.binarySearch(0, index.size) { i -> + val result = comparator.invoke(index[i].second) + if (result == 0) { + if (i + 1 >= index.size || comparator.invoke(index[i + 1].second) != 0) { + 0 + } else { + -1 + } + } else { + result + } + } + + val result = mutableSetOf() + for (i in lower..upper) { + result.add(index[i].first) + } + return result + } + +} + diff --git a/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/AbstractEvaluatorTest.kt b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/AbstractEvaluatorTest.kt new file mode 100644 index 00000000..71d89388 --- /dev/null +++ b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/AbstractEvaluatorTest.kt @@ -0,0 +1,318 @@ +package base.boudicca.query.evaluator + +import base.boudicca.SemanticKeys +import base.boudicca.model.Entry +import base.boudicca.query.* +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test +import java.time.LocalDateTime +import java.time.OffsetDateTime +import java.time.ZoneId +import java.time.format.DateTimeFormatter + +abstract class AbstractEvaluatorTest { + + @Test + fun simpleEquals() { + val events = callEvaluator(EqualsExpression("name", "event1")) + assertEquals(1, events.size) + assertEquals("event1", events.first()["name"]) + } + + @Test + fun simpleContains() { + val events = callEvaluator(ContainsExpression("name", "event")) + assertEquals(2, events.size) + } + + @Test + fun simpleOr() { + val events = callEvaluator(OrExpression(EqualsExpression("name", "event1"), EqualsExpression("name", "event2"))) + assertEquals(2, events.size) + } + + @Test + fun simpleAnd() { + val events = + callEvaluator(AndExpression(EqualsExpression("name", "somethingelse"), EqualsExpression("field", "wuuut"))) + assertEquals(1, events.size) + } + + @Test + fun simpleNot() { + val events = callEvaluator(NotExpression(EqualsExpression("name", "event1"))) + assertEquals(3, events.size) + } + + @Test + fun orAndNot() { + val events = callEvaluator( + OrExpression( + NotExpression( + ContainsExpression("name", "event") + ), + AndExpression( + EqualsExpression("name", "event1"), + EqualsExpression("field", "value1") + ) + ) + ) + assertEquals(3, events.size) + } + + @Test + fun testCaseInsensitiveMatching() { + var events = callEvaluator( + EqualsExpression("field", "value"), + listOf( + mapOf("field" to "value"), + mapOf("field" to "VAlue"), + ) + ) + assertEquals(2, events.size) + + events = callEvaluator( + ContainsExpression("field", "value"), + listOf( + mapOf("field" to "1value2"), + mapOf("field" to "1VAlue2"), + ) + ) + assertEquals(2, events.size) + } + + @Test + fun testCaseSensitiveFieldNameMatching() { + val events = callEvaluator( + EqualsExpression("field", "value"), + listOf( + mapOf("field" to "value"), + mapOf("FIELD" to "value"), + ) + ) + assertEquals(1, events.size) + } + + @Test + fun testStarFieldName() { + var events = callEvaluator( + EqualsExpression("*", "value"), + listOf( + mapOf("field" to "value"), + mapOf("otherfield" to "value"), + ) + ) + assertEquals(2, events.size) + + events = callEvaluator( + ContainsExpression("*", "value"), + listOf( + mapOf("field" to "1value2"), + mapOf("otherfield" to "1value2"), + ) + ) + assertEquals(2, events.size) + } + + @Test + fun simpleBefore() { + val events = + callEvaluator( + BeforeExpression("startDate", "2023-05-27"), + listOf( + entry("event1", "2023-05-25T00:00:00"), + entry("event2", "2023-05-28T00:00:00"), + entry("event3", "2023-05-29T00:00:00"), + ) + ) + assertEquals(1, events.size) + assertEquals("2023-05-25T00:00:00+02:00", events.first()["startDate"]) + } + + @Test + fun simpleAfter() { + val events = + callEvaluator( + AfterExpression("startDate", "2023-05-27"), + listOf( + entry("event1", "2023-05-25T00:00:00"), + entry("event2", "2023-05-26T00:00:00"), + entry("event3", "2023-05-29T00:00:00"), + ) + ) + assertEquals(1, events.size) + assertEquals("2023-05-29T00:00:00+02:00", events.first()["startDate"]) + } + + @Test + fun simpleAfterInclusiveToday() { + val events = + callEvaluator( + AfterExpression("startDate", "2023-05-25"), + listOf( + entry("event1", "2023-05-25T00:00:00"), + entry("event2", "2023-05-29T00:00:00"), + ) + ) + assertEquals(2, events.size) + } + + @Test + fun simpleBeforeInclusiveToday() { + val events = + callEvaluator( + BeforeExpression("startDate", "2023-05-29"), + listOf( + entry("event1", "2023-05-25T00:00:00"), + entry("event2", "2023-05-29T00:00:00"), + ) + ) + assertEquals(2, events.size) + } + + @Test + fun durationLonger() { + val events = + callEvaluator( + DurationLongerExpression("startDate", "endDate", 2.0), + listOf( + mapOf( + SemanticKeys.NAME to "event1", + SemanticKeys.STARTDATE to "2024-05-31T00:00:00Z", + SemanticKeys.ENDDATE to "2024-05-31T03:00:00Z", + ), + mapOf( + SemanticKeys.NAME to "event2", + SemanticKeys.STARTDATE to "2024-05-31T00:00:00Z", + SemanticKeys.ENDDATE to "2024-05-31T00:00:00Z", + ), + ) + ) + assertEquals(1, events.size) + assertEquals("event1", events.first()["name"]) + } + + @Test + fun durationShorter() { + val events = + callEvaluator( + DurationShorterExpression("startDate", "endDate", 2.0), + listOf( + mapOf( + SemanticKeys.NAME to "event1", + SemanticKeys.STARTDATE to "2024-05-31T00:00:00Z", + SemanticKeys.ENDDATE to "2024-05-31T03:00:00Z", + ), + mapOf( + SemanticKeys.NAME to "event2", + SemanticKeys.STARTDATE to "2024-05-31T00:00:00Z", + SemanticKeys.ENDDATE to "2024-05-31T00:00:00Z", + ), + ) + ) + assertEquals(1, events.size) + assertEquals("event2", events.first()["name"]) + } + + @Test + fun durationZero() { + val events = + callEvaluator( + DurationLongerExpression("startDate", "endDate", 0.0), + listOf( + mapOf( + SemanticKeys.NAME to "event1", + ), + ) + ) + assertEquals(1, events.size) + assertEquals("event1", events.first()["name"]) + } + + @Test + fun hasField() { + val events = + callEvaluator( + HasFieldExpression("recurrence.type"), + listOf( + mapOf( + SemanticKeys.NAME to "event1", + ), + mapOf( + SemanticKeys.NAME to "event2", + SemanticKeys.RECURRENCE_TYPE to "REGULARLY", + ), + ) + ) + assertEquals(1, events.size) + assertEquals("event2", events.first()["name"]) + } + + @Test + fun resultsAreSorted() { + val events = + callEvaluator( + ContainsExpression("name", "event"), + listOf( + mapOf( + SemanticKeys.NAME to "event3", + SemanticKeys.STARTDATE to OffsetDateTime.now().minusDays(1) + .format(DateTimeFormatter.ISO_DATE_TIME) + ), + mapOf( + SemanticKeys.NAME to "event1", + SemanticKeys.STARTDATE to OffsetDateTime.now().plusDays(1) + .format(DateTimeFormatter.ISO_DATE_TIME) + ), + mapOf( + SemanticKeys.NAME to "event2", + SemanticKeys.STARTDATE to OffsetDateTime.now() + .format(DateTimeFormatter.ISO_DATE_TIME) + ), + ) + ) + assertEquals(3, events.size) + assertEquals("event3", events[0]["name"]) + assertEquals("event2", events[1]["name"]) + assertEquals("event1", events[2]["name"]) + } + + + private fun callEvaluator(expression: Expression): Collection { + return callEvaluator(expression, testData()) + } + + private fun callEvaluator( + expression: Expression, + entries: Collection + ): List { + return createEvaluator(entries) + .evaluate(expression, PAGE_ALL) + .result + } + + abstract fun createEvaluator(entries: Collection>): Evaluator + + private fun testData(): Collection { + return listOf( + entry("name" to "event1", "field" to "value1"), + entry("name" to "event2", "field" to "value2"), + entry("name" to "somethingelse", "field" to "wuuut"), + entry("name" to "somethingelse2", "field" to "wuuut"), + ) + } + + private fun entry(name: String, startDate: String): Entry { + return entry("name" to name, "startDate" to DateTimeFormatter.ISO_DATE_TIME.format(parseLocalDate(startDate))) + } + + private fun entry(vararg data: Pair): Entry { + return data.toMap() + } + + private fun parseLocalDate(startDateAsString: String): OffsetDateTime { + return LocalDateTime.parse(startDateAsString, DateTimeFormatter.ISO_LOCAL_DATE_TIME).atZone(ZoneId.of("CET")) + .toOffsetDateTime() + } +} + diff --git a/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/OptimizingEvaluatorTest.kt b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/OptimizingEvaluatorTest.kt new file mode 100644 index 00000000..f9ebd507 --- /dev/null +++ b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/OptimizingEvaluatorTest.kt @@ -0,0 +1,8 @@ +package base.boudicca.query.evaluator + +class OptimizingEvaluatorTest : AbstractEvaluatorTest() { + override fun createEvaluator(entries: Collection>): Evaluator { + return OptimizingEvaluator(entries) + } +} + diff --git a/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/SimpleEvaluatorTest.kt b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/SimpleEvaluatorTest.kt index 06f0b72d..94f3822e 100644 --- a/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/SimpleEvaluatorTest.kt +++ b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/SimpleEvaluatorTest.kt @@ -1,285 +1,8 @@ package base.boudicca.query.evaluator -import base.boudicca.model.Entry -import base.boudicca.SemanticKeys -import base.boudicca.query.* -import org.junit.jupiter.api.Assertions.assertEquals -import org.junit.jupiter.api.Test -import java.time.LocalDateTime -import java.time.OffsetDateTime -import java.time.ZoneId -import java.time.format.DateTimeFormatter - -class SimpleEvaluatorTest { - - @Test - fun simpleEquals() { - val events = callEvaluator(EqualsExpression("name", "event1")) - assertEquals(1, events.size) - assertEquals("event1", events.first()["name"]) - } - - @Test - fun simpleContains() { - val events = callEvaluator(ContainsExpression("name", "event")) - assertEquals(2, events.size) - } - - @Test - fun simpleOr() { - val events = callEvaluator(OrExpression(EqualsExpression("name", "event1"), EqualsExpression("name", "event2"))) - assertEquals(2, events.size) - } - - @Test - fun simpleAnd() { - val events = - callEvaluator(AndExpression(EqualsExpression("name", "somethingelse"), EqualsExpression("field", "wuuut"))) - assertEquals(1, events.size) - } - - @Test - fun simpleNot() { - val events = callEvaluator(NotExpression(EqualsExpression("name", "event1"))) - assertEquals(3, events.size) - } - - @Test - fun orAndNot() { - val events = callEvaluator( - OrExpression( - NotExpression( - ContainsExpression("name", "event") - ), - AndExpression( - EqualsExpression("name", "event1"), - EqualsExpression("field", "value1") - ) - ) - ) - assertEquals(3, events.size) - } - - @Test - fun testCaseInsensitiveMatching() { - var events = callEvaluator( - EqualsExpression("field", "value"), - listOf( - mapOf("field" to "value"), - mapOf("field" to "VAlue"), - ) - ) - assertEquals(2, events.size) - - events = callEvaluator( - ContainsExpression("field", "value"), - listOf( - mapOf("field" to "1value2"), - mapOf("field" to "1VAlue2"), - ) - ) - assertEquals(2, events.size) - } - - @Test - fun testCaseSensitiveFieldNameMatching() { - val events = callEvaluator( - EqualsExpression("field", "value"), - listOf( - mapOf("field" to "value"), - mapOf("FIELD" to "value"), - ) - ) - assertEquals(1, events.size) - } - - @Test - fun testStarFieldName() { - var events = callEvaluator( - EqualsExpression("*", "value"), - listOf( - mapOf("field" to "value"), - mapOf("otherfield" to "value"), - ) - ) - assertEquals(2, events.size) - - events = callEvaluator( - ContainsExpression("*", "value"), - listOf( - mapOf("field" to "1value2"), - mapOf("otherfield" to "1value2"), - ) - ) - assertEquals(2, events.size) - } - - @Test - fun simpleBefore() { - val events = - callEvaluator( - BeforeExpression("startDate", "2023-05-27"), - listOf( - entry("event1", "2023-05-25T00:00:00"), - entry("event2", "2023-05-29T00:00:00"), - ) - ) - assertEquals(1, events.size) - assertEquals("2023-05-25T00:00:00+02:00", events.first()["startDate"]) - } - - @Test - fun simpleAfter() { - val events = - callEvaluator( - AfterExpression("startDate", "2023-05-27"), - listOf( - entry("event1", "2023-05-25T00:00:00"), - entry("event2", "2023-05-29T00:00:00"), - ) - ) - assertEquals(1, events.size) - assertEquals("2023-05-29T00:00:00+02:00", events.first()["startDate"]) - } - - @Test - fun simpleAfterInclusiveToday() { - val events = - callEvaluator( - AfterExpression("startDate", "2023-05-25"), - listOf( - entry("event1", "2023-05-25T00:00:00"), - entry("event2", "2023-05-29T00:00:00"), - ) - ) - assertEquals(2, events.size) - } - - @Test - fun simpleBeforeInclusiveToday() { - val events = - callEvaluator( - BeforeExpression("startDate", "2023-05-29"), - listOf( - entry("event1", "2023-05-25T00:00:00"), - entry("event2", "2023-05-29T00:00:00"), - ) - ) - assertEquals(2, events.size) - } - - @Test - fun durationLonger() { - val events = - callEvaluator( - DurationLongerExpression("startDate", "endDate", 2.0), - listOf( - mapOf( - SemanticKeys.NAME to "event1", - SemanticKeys.STARTDATE to "2024-05-31T00:00:00Z", - SemanticKeys.ENDDATE to "2024-05-31T03:00:00Z", - ), - mapOf( - SemanticKeys.NAME to "event2", - SemanticKeys.STARTDATE to "2024-05-31T00:00:00Z", - SemanticKeys.ENDDATE to "2024-05-31T00:00:00Z", - ), - ) - ) - assertEquals(1, events.size) - assertEquals("event1", events.first()["name"]) - } - - @Test - fun durationShorter() { - val events = - callEvaluator( - DurationShorterExpression("startDate", "endDate", 2.0), - listOf( - mapOf( - SemanticKeys.NAME to "event1", - SemanticKeys.STARTDATE to "2024-05-31T00:00:00Z", - SemanticKeys.ENDDATE to "2024-05-31T03:00:00Z", - ), - mapOf( - SemanticKeys.NAME to "event2", - SemanticKeys.STARTDATE to "2024-05-31T00:00:00Z", - SemanticKeys.ENDDATE to "2024-05-31T00:00:00Z", - ), - ) - ) - assertEquals(1, events.size) - assertEquals("event2", events.first()["name"]) - } - - @Test - fun durationZero() { - val events = - callEvaluator( - DurationLongerExpression("startDate", "endDate", 0.0), - listOf( - mapOf( - SemanticKeys.NAME to "event1", - ), - ) - ) - assertEquals(1, events.size) - assertEquals("event1", events.first()["name"]) - } - - @Test - fun hasField() { - val events = - callEvaluator( - HasFieldExpression("recurrence.type"), - listOf( - mapOf( - SemanticKeys.NAME to "event1", - ), - mapOf( - SemanticKeys.NAME to "event2", - SemanticKeys.RECURRENCE_TYPE to "REGULARLY", - ), - ) - ) - assertEquals(1, events.size) - assertEquals("event2", events.first()["name"]) - } - - - private fun callEvaluator(expression: Expression): Collection { - return callEvaluator(expression, testData()) - } - - private fun callEvaluator( - expression: Expression, - entries: Collection - ): List { +class SimpleEvaluatorTest : AbstractEvaluatorTest() { + override fun createEvaluator(entries: Collection>): Evaluator { return SimpleEvaluator(entries) - .evaluate(expression, PAGE_ALL) - .result - } - - private fun testData(): Collection { - return listOf( - entry("name" to "event1", "field" to "value1"), - entry("name" to "event2", "field" to "value2"), - entry("name" to "somethingelse", "field" to "wuuut"), - entry("name" to "somethingelse2", "field" to "wuuut"), - ) - } - - private fun entry(name: String, startDate: String): Entry { - return entry("name" to name, "startDate" to DateTimeFormatter.ISO_DATE_TIME.format(parseLocalDate(startDate))) - } - - private fun entry(vararg data: Pair): Entry { - return data.toMap() - } - - private fun parseLocalDate(startDateAsString: String): OffsetDateTime { - return LocalDateTime.parse(startDateAsString, DateTimeFormatter.ISO_LOCAL_DATE_TIME).atZone(ZoneId.of("CET")) - .toOffsetDateTime() } } diff --git a/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtilBinarySearchTest.kt b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtilBinarySearchTest.kt new file mode 100644 index 00000000..a122ed6b --- /dev/null +++ b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtilBinarySearchTest.kt @@ -0,0 +1,28 @@ +package base.boudicca.query.evaluator.util + +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class EvaluatorUtilBinarySearchTest { + + @Test + fun testEmptyList() { + assertEquals(-1, testWithList("a", listOf())) + } + + @Test + fun testSimpleSearch() { + assertEquals(0, testWithList("a", listOf("a"))) + } + + @Test + fun testSearch() { + assertEquals(1, testWithList("b", listOf("a", "b", "c", "d", "e", "f"))) + } + + private fun testWithList(item: String, list: List): Int { + return EvaluatorUtil.binarySearch(0, list.size) { list[it].compareTo(item) } + } + + +} \ No newline at end of file diff --git a/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/EvaluatorUtilTest.kt b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtilDurationTest.kt similarity index 79% rename from boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/EvaluatorUtilTest.kt rename to boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtilDurationTest.kt index 7e1b553d..69b03caf 100644 --- a/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/EvaluatorUtilTest.kt +++ b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtilDurationTest.kt @@ -1,16 +1,17 @@ -package base.boudicca.query.evaluator +package base.boudicca.query.evaluator.util import base.boudicca.SemanticKeys import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.Test +import java.util.concurrent.ConcurrentHashMap -class EvaluatorUtilTest { +class EvaluatorUtilDurationTest { @Test fun testEmpty() { Assertions.assertEquals( 0.0, - EvaluatorUtil.getDuration( + getDuration( "startDate", "endDate", mapOf( ) @@ -22,7 +23,7 @@ class EvaluatorUtilTest { fun testNoStart() { Assertions.assertEquals( 0.0, - EvaluatorUtil.getDuration( + getDuration( "startDate", "endDate", mapOf( SemanticKeys.ENDDATE to "2024-05-31T01:00:00Z", @@ -35,7 +36,7 @@ class EvaluatorUtilTest { fun testNoEnd() { Assertions.assertEquals( 0.0, - EvaluatorUtil.getDuration( + getDuration( "startDate", "endDate", mapOf( SemanticKeys.STARTDATE to "2024-05-31T01:00:00Z", @@ -48,7 +49,7 @@ class EvaluatorUtilTest { fun testSimple() { Assertions.assertEquals( 1.0, - EvaluatorUtil.getDuration( + getDuration( "startDate", "endDate", mapOf( SemanticKeys.STARTDATE to "2024-05-31T00:00:00Z", @@ -62,7 +63,7 @@ class EvaluatorUtilTest { fun testNegative() { Assertions.assertEquals( -1.0, - EvaluatorUtil.getDuration( + getDuration( "startDate", "endDate", mapOf( SemanticKeys.STARTDATE to "2024-05-31T01:00:00Z", @@ -76,7 +77,7 @@ class EvaluatorUtilTest { fun testFraction() { Assertions.assertEquals( 0.5, - EvaluatorUtil.getDuration( + getDuration( "startDate", "endDate", mapOf( SemanticKeys.STARTDATE to "2024-05-31T00:00:00Z", @@ -86,4 +87,8 @@ class EvaluatorUtilTest { ) } + fun getDuration(startDateField: String, endDateField: String, event: Map): Double { + return EvaluatorUtil.getDuration(startDateField, endDateField, event, ConcurrentHashMap()) + } + } \ No newline at end of file diff --git a/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtilSortTest.kt b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtilSortTest.kt new file mode 100644 index 00000000..98e27b86 --- /dev/null +++ b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/EvaluatorUtilSortTest.kt @@ -0,0 +1,72 @@ +package base.boudicca.query.evaluator.util + +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class EvaluatorUtilSortTest { + + @Test + fun testEmptyList() { + testWithList( + listOf() + ) + } + + @Test + fun testSingleList() { + testWithList( + listOf(1) + ) + } + + @Test + fun testSmallList() { + testWithList( + listOf(6, 1, 4) + ) + } + + @Test + fun testBiggerList() { + testWithList( + listOf(6, 1, 4, 5, 7, 1, 65, 9, 0, 3, 7, 2, 5, 57, 45, 23, 53, 3, 345, 1, 52, 2) + ) + } + + private fun testWithList(list: List) { + val solution = list.sorted() + + val toSortList = list.toMutableList() + + EvaluatorUtil.sort(0, list.size, SortableList(toSortList)) + + assertEquals(solution, toSortList) + } + + class SortableList(private val list: MutableList) : EvaluatorUtil.Sortable> { + override fun get(): MutableList { + return list + } + + override fun copy(): EvaluatorUtil.Sortable> { + return SortableList(list.toMutableList()) + } + + override fun compare(i: Int, j: Int): Int { + return list[i].compareTo(list[j]) + } + + override fun swap(i: Int, j: Int) { + val v1 = list[i] + list[i] = list[j] + list[j] = v1 + } + + override fun setFrom(i: Int, src: EvaluatorUtil.Sortable>, j: Int) { + val otherList = src.get() + list[i] = otherList[j] + } + + } + +} \ No newline at end of file diff --git a/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/FullTextIndexTest.kt b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/FullTextIndexTest.kt new file mode 100644 index 00000000..e6572cc3 --- /dev/null +++ b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/FullTextIndexTest.kt @@ -0,0 +1,209 @@ +package base.boudicca.query.evaluator.util + +import base.boudicca.model.Entry +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.Test + +class FullTextIndexTest { + + @Test + fun emptyCreate() { + val index = create(listOf(), "name") + + assertEquals(0, index.size()) + } + + @Test + fun createWithNonValueField() { + val index = create( + listOf( + mapOf("key" to "value1"), + mapOf("key" to "value2"), + mapOf("key" to "value3"), + ), "name" + ) + + assertEquals(0, index.size()) + } + + @Test + fun createSimple() { + val index = create( + listOf( + mapOf("name" to "value", "description" to "ignored"), + ), "name" + ) + + assertEquals(5, index.size()) + assertEquals(Pair(0, 1), index.get(0)) + assertEquals(Pair(0, 4), index.get(1)) + assertEquals(Pair(0, 2), index.get(2)) + assertEquals(Pair(0, 3), index.get(3)) + assertEquals(Pair(0, 0), index.get(4)) + } + + @Test + fun createSimpleWithTwoEntries() { + val index = create( + listOf( + mapOf("name" to "2"), + mapOf("name" to "1"), + ), "name" + ) + + assertEquals(2, index.size()) + assertEquals(Pair(1, 0), index.get(0)) + assertEquals(Pair(0, 0), index.get(1)) + } + + @Test + fun createTwoSameValues() { + val index = create( + listOf( + mapOf("name" to "1"), + mapOf("name" to "1"), + ), "name" + ) + + assertEquals(1, index.size()) + assertEquals(0, index.get(0).second) + assertEquals(setOf(0, 1), index.getEntriesForWord(index.get(0).first)) + } + + @Test + fun createTwoOneEmptyValues() { + val index = create( + listOf( + mapOf("name" to ""), + mapOf("name" to "1"), + ), "name" + ) + + assertEquals(1, index.size()) + assertEquals(Pair(0, 0), index.get(0)) + assertEquals(setOf(1), index.getEntriesForWord(0)) + } + + @Test + fun caseInsensitiveOrder() { + val index = create( + listOf( + mapOf("name" to "a"), + mapOf("name" to "B"), + ), "name" + ) + + assertEquals(2, index.size()) + assertEquals(Pair(0, 0), index.get(0)) + assertEquals(Pair(1, 0), index.get(1)) + } + + @Test + fun simpleContainsSearch() { + val index = create( + listOf( + mapOf("name" to "a"), + mapOf("name" to "B"), + ), "name" + ) + val result = index.containsSearch("a") + + assertEquals(1, result.size) + assertTrue(result.contains(0)) + } + + @Test + fun twoValueBeginningContainsSearch() { + val index = create( + listOf( + mapOf("name" to "this is a name"), + mapOf("name" to "this is another name"), + mapOf("name" to "whatever"), + ), "name" + ) + val result = index.containsSearch("this is") + + assertEquals(2, result.size) + assertTrue(result.contains(0)) + assertTrue(result.contains(1)) + } + + @Test + fun twoValueEndContainsSearch() { + val index = create( + listOf( + mapOf("name" to "this is a name"), + mapOf("name" to "this is another name"), + mapOf("name" to "whatever"), + ), "name" + ) + val result = index.containsSearch("name") + + assertEquals(2, result.size) + assertTrue(result.contains(0)) + assertTrue(result.contains(1)) + } + + @Test + fun twoValueMiddleContainsSearch() { + val index = create( + listOf( + mapOf("name" to "this is a name"), + mapOf("name" to "this is another name"), + mapOf("name" to "whatever"), + ), "name" + ) + val result = index.containsSearch("is a") + + assertEquals(2, result.size) + assertTrue(result.contains(0)) + assertTrue(result.contains(1)) + } + + @Test + fun weirdValuesContainsSearch() { + val index = create( + listOf( + mapOf("name" to ""), + mapOf("name" to "TEST"), + mapOf("name" to "test 😘"), + ), "name" + ) + val result = index.containsSearch("test") + + assertEquals(2, result.size) + assertTrue(result.contains(1)) + assertTrue(result.contains(2)) + } + + @Test + fun caseInsensitiveContainsSearch() { + val index = create( + listOf( + mapOf("name" to "teST"), + ), "name" + ) + val result = index.containsSearch("TESt") + + assertEquals(1, result.size) + assertTrue(result.contains(0)) + } + + @Test + fun nothingFoundContainsSearch() { + val index = create( + listOf( + mapOf("name" to "value"), + mapOf("name" to "value2"), + ), "name" + ) + val result = index.containsSearch("other") + + assertEquals(0, result.size) + } + + private fun create(entries: List, field: String): FullTextIndex { + return FullTextIndex(entries, field) + } +} \ No newline at end of file diff --git a/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/SimpleIndexTest.kt b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/SimpleIndexTest.kt new file mode 100644 index 00000000..5a70e49f --- /dev/null +++ b/boudicca.base/query-lib/src/test/kotlin/base/boudicca/query/evaluator/util/SimpleIndexTest.kt @@ -0,0 +1,44 @@ +package base.boudicca.query.evaluator.util + +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test + +class SimpleIndexTest { + @Test + fun createEmptyIndex() { + createIndex(listOf()) + } + + @Test + fun searchOneElement() { + val index = createIndex(listOf("a")) + + assertEquals(setOf(0), index.search { it.compareTo("a") }) + } + + @Test + fun searchOneElementInTwo() { + val index = createIndex(listOf("b", "a")) + + assertEquals(setOf(1), index.search { it.compareTo("a") }) + } + + @Test + fun searchTwoElements() { + val index = createIndex(listOf("b", "a", "aa", "a", "asd", "c")) + + assertEquals(setOf(1, 3), index.search { it.compareTo("a") }) + } + + @Test + fun searchNullableIndex() { + val index = + SimpleIndex(listOf("c", null, "b", "a", null, null, null), Comparator.naturalOrder()) + + assertEquals(setOf(3), index.search { it?.compareTo("a") ?: -1 }) + } + + private fun createIndex(list: List): SimpleIndex { + return SimpleIndex(list, Comparator.naturalOrder()) + } +} \ No newline at end of file diff --git a/boudicca.base/search/src/main/kotlin/base/boudicca/search/service/QueryService.kt b/boudicca.base/search/src/main/kotlin/base/boudicca/search/service/QueryService.kt index c27cfa27..b2858a78 100644 --- a/boudicca.base/search/src/main/kotlin/base/boudicca/search/service/QueryService.kt +++ b/boudicca.base/search/src/main/kotlin/base/boudicca/search/service/QueryService.kt @@ -4,17 +4,23 @@ import base.boudicca.api.search.model.QueryDTO import base.boudicca.api.search.model.ResultDTO import base.boudicca.model.Entry import base.boudicca.query.BoudiccaQueryRunner +import base.boudicca.query.Expression import base.boudicca.query.QueryException import base.boudicca.query.Utils -import base.boudicca.query.evaluator.Evaluator -import base.boudicca.query.evaluator.NoopEvaluator -import base.boudicca.query.evaluator.Page -import base.boudicca.query.evaluator.SimpleEvaluator +import base.boudicca.query.evaluator.* +import base.boudicca.search.BoudiccaSearchProperties +import org.slf4j.LoggerFactory +import org.springframework.beans.factory.annotation.Autowired import org.springframework.context.event.EventListener import org.springframework.stereotype.Service +import java.util.concurrent.ConcurrentHashMap @Service -class QueryService { +class QueryService @Autowired constructor( + private val boudiccaSearchProperties: BoudiccaSearchProperties +) { + + private val LOG = LoggerFactory.getLogger(this::class.java) @Volatile private var entries = emptyList() @@ -32,8 +38,24 @@ class QueryService { @EventListener fun onEventsUpdate(event: EntriesUpdatedEvent) { - this.entries = Utils.order(event.entries) - this.evaluator = SimpleEvaluator(event.entries) + this.entries = Utils.order(event.entries, ConcurrentHashMap()) + if (boudiccaSearchProperties.localMode) { + //for local mode we only want the simple, the optimizing has quite some startup + this.evaluator = SimpleEvaluator(event.entries) + } else { + val optimizingEvaluator = OptimizingEvaluator(event.entries) + val fallbackEvaluator = SimpleEvaluator(event.entries) + this.evaluator = object : Evaluator { + override fun evaluate(expression: Expression, page: Page): QueryResult { + return try { + optimizingEvaluator.evaluate(expression, page) + } catch (e: Exception) { + LOG.error("optimizing evaluator threw exception", e) + fallbackEvaluator.evaluate(expression, page) + } + } + } + } } private fun evaluateQuery(query: String, page: Page): ResultDTO { diff --git a/docs/tech/SEARCH.md b/docs/tech/SEARCH.md index 341c41ce..1e723b8a 100644 --- a/docs/tech/SEARCH.md +++ b/docs/tech/SEARCH.md @@ -28,6 +28,6 @@ The query service handles the evaluation of the actual search queries. For more This consists of following steps: 1. Parsing the Query with the [QueryParser](../../boudicca.base/search/src/main/kotlin/base/boudicca/search/service/query/QueryParser.kt) which results in an [Expression](../../boudicca.base/search/src/main/kotlin/base/boudicca/search/service/query/Expression.kt) object. This is an AST representation of the parsed Query. -2. Using an [Evaluator](../../boudicca.base/search/src/main/kotlin/base/boudicca/search/service/query/Evaluator.kt) to run the AST expression from step 1. +2. Using an [Evaluator](../../boudicca.base/query-lib/src/main/kotlin/base/boudicca/query/evaluator/Evaluator.kt) to run the AST expression from step 1. 3. Return the correct page of the data, specified by the offset and size parameters of the REST endpoint From 7c05e438d882da85077a62ebb8490432caec01ff Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 21:33:13 +0000 Subject: [PATCH 06/11] Bump org.slf4j:slf4j-api from 2.0.12 to 2.0.13 Bumps org.slf4j:slf4j-api from 2.0.12 to 2.0.13. --- updated-dependencies: - dependency-name: org.slf4j:slf4j-api dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- boudicca.base/eventcollector-client/build.gradle.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boudicca.base/eventcollector-client/build.gradle.kts b/boudicca.base/eventcollector-client/build.gradle.kts index 5786da09..b0a9c8b2 100644 --- a/boudicca.base/eventcollector-client/build.gradle.kts +++ b/boudicca.base/eventcollector-client/build.gradle.kts @@ -12,6 +12,6 @@ dependencies { implementation("org.apache.velocity:velocity-engine-core:2.3") implementation("org.apache.velocity.tools:velocity-tools-generic:3.1") implementation("ch.qos.logback:logback-classic:1.5.4") - implementation("org.slf4j:slf4j-api:2.0.12") + implementation("org.slf4j:slf4j-api:2.0.13") testImplementation("org.junit.jupiter:junit-jupiter:5.10.2") } From d52769c10128de400cbd5dde5720a6f35cfb4a13 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 13 Apr 2024 15:10:57 +0000 Subject: [PATCH 07/11] Bump ch.qos.logback:logback-classic from 1.5.4 to 1.5.5 Bumps [ch.qos.logback:logback-classic](https://github.com/qos-ch/logback) from 1.5.4 to 1.5.5. - [Commits](https://github.com/qos-ch/logback/compare/v_1.5.4...v_1.5.5) --- updated-dependencies: - dependency-name: ch.qos.logback:logback-classic dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- boudicca.base/eventcollector-client/build.gradle.kts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boudicca.base/eventcollector-client/build.gradle.kts b/boudicca.base/eventcollector-client/build.gradle.kts index b0a9c8b2..d4c8a95d 100644 --- a/boudicca.base/eventcollector-client/build.gradle.kts +++ b/boudicca.base/eventcollector-client/build.gradle.kts @@ -11,7 +11,7 @@ dependencies { implementation(project(":boudicca.base:remote-collector:remote-collector-client")) implementation("org.apache.velocity:velocity-engine-core:2.3") implementation("org.apache.velocity.tools:velocity-tools-generic:3.1") - implementation("ch.qos.logback:logback-classic:1.5.4") + implementation("ch.qos.logback:logback-classic:1.5.5") implementation("org.slf4j:slf4j-api:2.0.13") testImplementation("org.junit.jupiter:junit-jupiter:5.10.2") } From b415032d161ed5f1f63381148d52da6d54fe53fd Mon Sep 17 00:00:00 2001 From: abl Date: Sun, 14 Apr 2024 18:38:18 +0200 Subject: [PATCH 08/11] generate and use jakarta annotations in openapi clients --- .../src/main/kotlin/boudicca-openapi-generate-client.gradle.kts | 1 + gradle/libs.versions.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/buildSrc/src/main/kotlin/boudicca-openapi-generate-client.gradle.kts b/buildSrc/src/main/kotlin/boudicca-openapi-generate-client.gradle.kts index e3662cae..b1de2995 100644 --- a/buildSrc/src/main/kotlin/boudicca-openapi-generate-client.gradle.kts +++ b/buildSrc/src/main/kotlin/boudicca-openapi-generate-client.gradle.kts @@ -38,6 +38,7 @@ tasks.register("generateJavaClient") { generatorName.set("java") library.set("native") additionalProperties.put("supportUrlQuery", "false") + additionalProperties.put("useJakartaEe", "true") generateApiTests.set(false) generateModelTests.set(false) invokerPackage.set("base.boudicca.openapi") diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index e95b14a6..37104ea7 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,7 +1,7 @@ [versions] kotlin = "1.9.20" jackson = "2.17.0" -jakarta-annotation = "1.3.5" +jakarta-annotation = "3.0.0" javax-jaxrs = "2.1.1" findbugs = "3.0.2" jackson-databind-nullable = "0.2.6" From 13c6a0aea5de84dadc525aabbb76216f9de5cb39 Mon Sep 17 00:00:00 2001 From: abl Date: Sun, 14 Apr 2024 18:40:45 +0200 Subject: [PATCH 09/11] update wrapper-validation action --- .github/workflows/deploy.yml | 2 +- .github/workflows/gradle.yml | 2 +- .github/workflows/publish.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index e0dc57a7..bc4987a5 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -21,7 +21,7 @@ jobs: java-version: '21' distribution: 'temurin' - name: Validate Gradle wrapper - uses: gradle/wrapper-validation-action@v2.1.3 + uses: gradle/actions/wrapper-validation@v3 - name: Setup Gradle uses: gradle/actions/setup-gradle@v3 - name: Execute Gradle build with tests diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index befac347..7766ffa7 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -23,7 +23,7 @@ jobs: java-version: '21' distribution: 'temurin' - name: Validate Gradle wrapper - uses: gradle/wrapper-validation-action@v2.1.3 + uses: gradle/actions/wrapper-validation@v3 - name: Setup Gradle uses: gradle/actions/setup-gradle@v3 - name: Execute Gradle build diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 7a386437..8f325fb5 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -20,7 +20,7 @@ jobs: java-version: '21' distribution: 'temurin' - name: Validate Gradle wrapper - uses: gradle/wrapper-validation-action@v2.1.3 + uses: gradle/actions/wrapper-validation@v3 - name: Setup Gradle uses: gradle/actions/setup-gradle@v3 - name: Execute Gradle build with tests From e20dd2198471378d9faa2bf98b587024fb2d75ec Mon Sep 17 00:00:00 2001 From: abl Date: Sun, 14 Apr 2024 18:51:58 +0200 Subject: [PATCH 10/11] fix ZuckerfabrikCollector --- .../eventcollector/collectors/ZuckerfabrikCollector.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boudicca.events/eventcollectors/src/main/kotlin/events/boudicca/eventcollector/collectors/ZuckerfabrikCollector.kt b/boudicca.events/eventcollectors/src/main/kotlin/events/boudicca/eventcollector/collectors/ZuckerfabrikCollector.kt index fa7b8d66..bb776964 100644 --- a/boudicca.events/eventcollectors/src/main/kotlin/events/boudicca/eventcollector/collectors/ZuckerfabrikCollector.kt +++ b/boudicca.events/eventcollectors/src/main/kotlin/events/boudicca/eventcollector/collectors/ZuckerfabrikCollector.kt @@ -86,12 +86,12 @@ class ZuckerfabrikCollector : TwoStepEventCollector>("zuc if (dateSplit[2].contains(" - ")) { val timeSplit = startTimeString.split(" - ") startTimeString = timeSplit[0] - endTime = LocalTime.parse(timeSplit[1], timeFormatter) + endTime = LocalTime.parse(timeSplit[1].replace('.', ':'), timeFormatter) } if (startTimeString.endsWith(" Uhr")) { startTimeString = startTimeString.substring(0, startTimeString.length - 4) } - startTime = LocalTime.parse(startTimeString, timeFormatter) + startTime = LocalTime.parse(startTimeString.replace('.', ':'), timeFormatter) val startDate = date.atTime(startTime).atZone(ZoneId.of("Europe/Vienna")).toOffsetDateTime() if (endTime != null) { data[SemanticKeys.ENDDATE] = From 97d64a99eef5557d8dd29b5a4934d586dbdfae24 Mon Sep 17 00:00:00 2001 From: abl Date: Sun, 14 Apr 2024 19:15:27 +0200 Subject: [PATCH 11/11] fix BrucknerhausCollector --- .../collectors/BrucknerhausCollector.kt | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/boudicca.events/eventcollectors/src/main/kotlin/events/boudicca/eventcollector/collectors/BrucknerhausCollector.kt b/boudicca.events/eventcollectors/src/main/kotlin/events/boudicca/eventcollector/collectors/BrucknerhausCollector.kt index 9540b4b4..cc00fedb 100644 --- a/boudicca.events/eventcollectors/src/main/kotlin/events/boudicca/eventcollector/collectors/BrucknerhausCollector.kt +++ b/boudicca.events/eventcollectors/src/main/kotlin/events/boudicca/eventcollector/collectors/BrucknerhausCollector.kt @@ -40,8 +40,8 @@ class BrucknerhausCollector : TwoStepEventCollector("brucknerhaus") { return doc.select("div.event div.event__element") } - override fun parseEvent(event: Element): Event { - val startDate: OffsetDateTime = parseDate(event) + override fun parseMultipleEvents(event: Element): List { + val startDates: List = parseDate(event) val data = mutableMapOf() val name = event.select("div.event__name").text() @@ -61,25 +61,41 @@ class BrucknerhausCollector : TwoStepEventCollector("brucknerhaus") { data[SemanticKeys.SOURCES] = data[SemanticKeys.URL]!! - return Event(name, startDate, data) + return startDates.map { startDate -> Event(name, startDate, data) } } - private fun parseDate(event: Element): OffsetDateTime { + private fun parseDate(event: Element): List { val dateElement = event.select("div.event__date").first()!! - val datePart1 = dateElement.children()[1].children()[0].text() - val datePart2 = dateElement.children()[1].children()[1].text() - val datePart3 = dateElement.children()[2].children()[0].text() + + val dateText = dateElement.text().trim() + val localDates = if (dateText.contains("-")) { + //Sa 23 März - Do 16 Mai 24 + val split = dateText.split("-") + val year = dateText.substring(dateText.lastIndexOf(' ') + 1) + val startDate = parseSingleLocalDate(split[0].trim() + " " + year) + val endDate = parseSingleLocalDate(split[1].trim()) + startDate.datesUntil(endDate.plusDays(1)).toList() + } else { + //So 14 Apr 24 + listOf(parseSingleLocalDate(dateText)) + } val timeElement = event.select("div.event__location").first()!! val time = timeElement.children()[0].children()[0].text() - val localDate = LocalDate.parse( - datePart1 + " " + mapMonth(datePart2) + " " + datePart3, - DateTimeFormatter.ofPattern("d M uu").withLocale(Locale.GERMAN) - ) val localTime = LocalTime.parse(time, DateTimeFormatter.ofPattern("kk:mm")) - return localDate.atTime(localTime).atZone(ZoneId.of("Europe/Vienna")).toOffsetDateTime() + return localDates.map { localDate -> + localDate.atTime(localTime).atZone(ZoneId.of("Europe/Vienna")).toOffsetDateTime() + } + } + + private fun parseSingleLocalDate(dateText: String): LocalDate { + val split = dateText.split(" ") + return LocalDate.parse( + split[1] + " " + mapMonth(split[2]) + " " + split[3], + DateTimeFormatter.ofPattern("d M uu").withLocale(Locale.GERMAN) + ) } private fun mapMonth(month: String): String {