diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b2dc353..65dc7ed 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,18 +7,16 @@ jobs: strategy: matrix: os: [ubuntu-latest, macOS-latest, windows-latest] - jdk: [ 11, 17, 21 ] + jdk: [ 21 ] experimental: [false] steps: - uses: actions/checkout@v4 + with: + lfs: true - name: Set up JDK ${{ matrix.jdk }} uses: actions/setup-java@v4 with: - distribution: temurin + distribution: adopt java-version: ${{ matrix.jdk }} - cache: gradle - - name: Assemble target plugin - uses: gradle/gradle-build-action@v3 - with: - cache-disabled: true - arguments: -Dtests.security.manager=false assemble + - name: Build + run: mvn --batch-mode test \ No newline at end of file diff --git a/.gitignore b/.gitignore index 9beb788..488f99c 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,17 @@ gradle-app.setting .project .classpath +# Ignore Gradle project-specific cache directory +.gradle + +# Ignore Gradle build output directory +build + +# intellij files +.idea/ +*.iml +*.ipr +*.iws +build-idea/ +out/ + diff --git a/opensearch-search-quality-evaluation-plugin/NOTICE.txt b/NOTICE.txt similarity index 100% rename from opensearch-search-quality-evaluation-plugin/NOTICE.txt rename to NOTICE.txt diff --git a/README.md b/README.md index 3c061fd..9fdc516 100644 --- a/README.md +++ b/README.md @@ -1,64 +1,11 @@ -# Search Evaluation Framework +# OpenSearch Evaluation Framework -This repository contains the search quality evaluation framework as described in the [RFC](https://github.com/opensearch-project/OpenSearch/issues/15354). +This is an application to perform search evaluation. -Note: Some of the data files in this repository are tracked by `git lfs`. +## Building -## Repository Contents - -* `data` - The data directory contains scripts for creating random UBI queries and events for purposes of development and testing. -* `opensearch-search-quality-evaluation-plugin` - An OpenSearch plugin that extends the OpenSearch Scheduler plugin that provides the ability to generate scheduled (and on-demand) implicit judgments from UBI data. -* `opensearch-search-quality-implicit-judgments` - A standalone Java application to generate implicit judgments from indexed UBI data. - -## OpenSearch Search Quality Evaluation Plugin - -This is an OpenSearch plugin that extends the OpenSearch Scheduler plugin that provides the ability to generate scheduled (and on-demand) implicit judgments from UBI data. - -To use the plugin: - -``` -./gradlew build -cd opensearch-search-quality-evaluation-plugin -docker compose build -docker compose up -``` - -To create a schedule to generate implicit judgments: +Build the project from the top-level directory to build all projects. ``` -curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/schedule?id=1&click_model=coec&job_name=test&interval=60" | jq -``` - -See the created job: - -``` -curl -s http://localhost:9200/search_quality_eval_scheduled_jobs/_search | jq -``` - -To run an on-demand job without scheduling: - +mvn clean install ``` -curl -X POST "http://localhost:9200/_plugins/search_quality_eval/judgments?click_model=coec&max_rank=20" | jq -``` - -To see the job runs: - -``` -curl -X POST "http://localhost:9200/search_quality_eval_completed_jobs/_search" | jq -``` - -See the first 10 judgments: - -``` -curl -s http://localhost:9200/judgments/_search | jq -``` - -## OpenSearch Search Quality Implicit Judgments - -This is a standalone Java application to generate implicit judgments from indexed UBI data. It runs outside OpenSearch and queries the UBI indexes to get the data for calculating the implicit judgments. - -To run it, run the `org.opensearch.eval.App` class. This will connect to OpenSearch running on `localhost:9200`. It expects the `ubi_events` and `ubi_queries` indexes to exist and be populated. - -## License - -This code is licensed under the Apache 2.0 License. See [LICENSE.txt](LICENSE.txt). diff --git a/build.gradle b/build.gradle deleted file mode 100644 index 4cf5a69..0000000 --- a/build.gradle +++ /dev/null @@ -1,4 +0,0 @@ -allprojects { - group 'org.opensearch' - version '1.0.0-SNAPSHOT' -} \ No newline at end of file diff --git a/create-index.sh b/create-index.sh deleted file mode 100755 index 2566073..0000000 --- a/create-index.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -e - -echo "Creating ubi_events index" - -curl -X DELETE http://localhost:9200/ubi_events -curl -X PUT http://localhost:9200/ubi_events -H "Content-Type: application/json" -curl -X PUT http://localhost:9200/ubi_events/_mapping -H "Content-Type: application/json" -d @./events-mapping.json diff --git a/data/esci/index.sh b/data/esci/index.sh index d1ebd45..95f915f 100755 --- a/data/esci/index.sh +++ b/data/esci/index.sh @@ -7,4 +7,4 @@ echo "Initializing UBI..." curl -s -X POST "http://localhost:9200/_plugins/ubi/initialize" echo "Indexing queries and events..." -curl -s -T "http://localhost:9200/_bulk?pretty" -H "Content-Type: application/x-ndjson" --data-binary @ubi_queries_events.ndjson +curl -X POST 'http://localhost:9200/index-name/_bulk?pretty' --data-binary @ubi_queries_events.ndjson -H "Content-Type: application/x-ndjson" \ No newline at end of file diff --git a/events-mapping.json b/events-mapping.json deleted file mode 100644 index cdb1393..0000000 --- a/events-mapping.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "properties": { - "application": { "type": "keyword", "ignore_above": 256 }, - "action_name": { "type": "keyword", "ignore_above": 100 }, - "client_id": { "type": "keyword", "ignore_above": 100 }, - "message": { "type": "keyword", "ignore_above": 1024 }, - "message_type": { "type": "keyword", "ignore_above": 100 }, - "timestamp": { - "type": "date", - "format":"strict_date_time", - "ignore_malformed": true, - "doc_values": true - }, - "event_attributes": { - "dynamic": true, - "properties": { - "position": { - "properties": { - "ordinal": { "type": "integer" }, - "x": { "type": "integer" }, - "y": { "type": "integer" }, - "page_depth": { "type": "integer" }, - "scroll_depth": { "type": "integer" }, - "trail": { "type": "text", - "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } - } - } - } - }, - "object": { - "properties": { - "internal_id": { "type": "keyword" }, - "object_id": { "type": "keyword", "ignore_above": 256 }, - "object_id_field": { "type": "keyword", "ignore_above": 100 }, - "name": { "type": "keyword", "ignore_above": 256 }, - "description": { "type": "text", - "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } - }, - "object_detail": { "type": "object" } - } - } - } - } - } -} \ No newline at end of file diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml deleted file mode 100644 index 1aab14a..0000000 --- a/gradle/libs.versions.toml +++ /dev/null @@ -1,12 +0,0 @@ -# This file was generated by the Gradle 'init' task. -# https://docs.gradle.org/current/userguide/platforms.html#sub::toml-dependencies-format - -[versions] -commons-math3 = "3.6.1" -guava = "33.2.1-jre" -junit = "4.13.2" - -[libraries] -commons-math3 = { module = "org.apache.commons:commons-math3", version.ref = "commons-math3" } -guava = { module = "com.google.guava:guava", version.ref = "guava" } -junit = { module = "junit:junit", version.ref = "junit" } diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar deleted file mode 100644 index a4b76b9..0000000 Binary files a/gradle/wrapper/gradle-wrapper.jar and /dev/null differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties deleted file mode 100644 index 9355b41..0000000 --- a/gradle/wrapper/gradle-wrapper.properties +++ /dev/null @@ -1,7 +0,0 @@ -distributionBase=GRADLE_USER_HOME -distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip -networkTimeout=10000 -validateDistributionUrl=true -zipStoreBase=GRADLE_USER_HOME -zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew deleted file mode 100755 index f5feea6..0000000 --- a/gradlew +++ /dev/null @@ -1,252 +0,0 @@ -#!/bin/sh - -# -# Copyright © 2015-2021 the original authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: Apache-2.0 -# - -############################################################################## -# -# Gradle start up script for POSIX generated by Gradle. -# -# Important for running: -# -# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is -# noncompliant, but you have some other compliant shell such as ksh or -# bash, then to run this script, type that shell name before the whole -# command line, like: -# -# ksh Gradle -# -# Busybox and similar reduced shells will NOT work, because this script -# requires all of these POSIX shell features: -# * functions; -# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», -# «${var#prefix}», «${var%suffix}», and «$( cmd )»; -# * compound commands having a testable exit status, especially «case»; -# * various built-in commands including «command», «set», and «ulimit». -# -# Important for patching: -# -# (2) This script targets any POSIX shell, so it avoids extensions provided -# by Bash, Ksh, etc; in particular arrays are avoided. -# -# The "traditional" practice of packing multiple parameters into a -# space-separated string is a well documented source of bugs and security -# problems, so this is (mostly) avoided, by progressively accumulating -# options in "$@", and eventually passing that to Java. -# -# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, -# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; -# see the in-line comments for details. -# -# There are tweaks for specific operating systems such as AIX, CygWin, -# Darwin, MinGW, and NonStop. -# -# (3) This script is generated from the Groovy template -# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt -# within the Gradle project. -# -# You can find Gradle at https://github.com/gradle/gradle/. -# -############################################################################## - -# Attempt to set APP_HOME - -# Resolve links: $0 may be a link -app_path=$0 - -# Need this for daisy-chained symlinks. -while - APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path - [ -h "$app_path" ] -do - ls=$( ls -ld "$app_path" ) - link=${ls#*' -> '} - case $link in #( - /*) app_path=$link ;; #( - *) app_path=$APP_HOME$link ;; - esac -done - -# This is normally unused -# shellcheck disable=SC2034 -APP_BASE_NAME=${0##*/} -# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) -APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s -' "$PWD" ) || exit - -# Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD=maximum - -warn () { - echo "$*" -} >&2 - -die () { - echo - echo "$*" - echo - exit 1 -} >&2 - -# OS specific support (must be 'true' or 'false'). -cygwin=false -msys=false -darwin=false -nonstop=false -case "$( uname )" in #( - CYGWIN* ) cygwin=true ;; #( - Darwin* ) darwin=true ;; #( - MSYS* | MINGW* ) msys=true ;; #( - NONSTOP* ) nonstop=true ;; -esac - -CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar - - -# Determine the Java command to use to start the JVM. -if [ -n "$JAVA_HOME" ] ; then - if [ -x "$JAVA_HOME/jre/sh/java" ] ; then - # IBM's JDK on AIX uses strange locations for the executables - JAVACMD=$JAVA_HOME/jre/sh/java - else - JAVACMD=$JAVA_HOME/bin/java - fi - if [ ! -x "$JAVACMD" ] ; then - die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -else - JAVACMD=java - if ! command -v java >/dev/null 2>&1 - then - die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -fi - -# Increase the maximum file descriptors if we can. -if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then - case $MAX_FD in #( - max*) - # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC2039,SC3045 - MAX_FD=$( ulimit -H -n ) || - warn "Could not query maximum file descriptor limit" - esac - case $MAX_FD in #( - '' | soft) :;; #( - *) - # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC2039,SC3045 - ulimit -n "$MAX_FD" || - warn "Could not set maximum file descriptor limit to $MAX_FD" - esac -fi - -# Collect all arguments for the java command, stacking in reverse order: -# * args from the command line -# * the main class name -# * -classpath -# * -D...appname settings -# * --module-path (only if needed) -# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. - -# For Cygwin or MSYS, switch paths to Windows format before running java -if "$cygwin" || "$msys" ; then - APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) - CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) - - JAVACMD=$( cygpath --unix "$JAVACMD" ) - - # Now convert the arguments - kludge to limit ourselves to /bin/sh - for arg do - if - case $arg in #( - -*) false ;; # don't mess with options #( - /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath - [ -e "$t" ] ;; #( - *) false ;; - esac - then - arg=$( cygpath --path --ignore --mixed "$arg" ) - fi - # Roll the args list around exactly as many times as the number of - # args, so each arg winds up back in the position where it started, but - # possibly modified. - # - # NB: a `for` loop captures its iteration list before it begins, so - # changing the positional parameters here affects neither the number of - # iterations, nor the values presented in `arg`. - shift # remove old arg - set -- "$@" "$arg" # push replacement arg - done -fi - - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' - -# Collect all arguments for the java command: -# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, -# and any embedded shellness will be escaped. -# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be -# treated as '${Hostname}' itself on the command line. - -set -- \ - "-Dorg.gradle.appname=$APP_BASE_NAME" \ - -classpath "$CLASSPATH" \ - org.gradle.wrapper.GradleWrapperMain \ - "$@" - -# Stop when "xargs" is not available. -if ! command -v xargs >/dev/null 2>&1 -then - die "xargs is not available" -fi - -# Use "xargs" to parse quoted args. -# -# With -n1 it outputs one arg per line, with the quotes and backslashes removed. -# -# In Bash we could simply go: -# -# readarray ARGS < <( xargs -n1 <<<"$var" ) && -# set -- "${ARGS[@]}" "$@" -# -# but POSIX shell has neither arrays nor command substitution, so instead we -# post-process each arg (as a line of input to sed) to backslash-escape any -# character that might be a shell metacharacter, then use eval to reverse -# that process (while maintaining the separation between arguments), and wrap -# the whole thing up as a single "set" statement. -# -# This will of course break if any of these variables contains a newline or -# an unmatched quote. -# - -eval "set -- $( - printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | - xargs -n1 | - sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | - tr '\n' ' ' - )" '"$@"' - -exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat deleted file mode 100644 index 9d21a21..0000000 --- a/gradlew.bat +++ /dev/null @@ -1,94 +0,0 @@ -@rem -@rem Copyright 2015 the original author or authors. -@rem -@rem Licensed under the Apache License, Version 2.0 (the "License"); -@rem you may not use this file except in compliance with the License. -@rem You may obtain a copy of the License at -@rem -@rem https://www.apache.org/licenses/LICENSE-2.0 -@rem -@rem Unless required by applicable law or agreed to in writing, software -@rem distributed under the License is distributed on an "AS IS" BASIS, -@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -@rem See the License for the specific language governing permissions and -@rem limitations under the License. -@rem -@rem SPDX-License-Identifier: Apache-2.0 -@rem - -@if "%DEBUG%"=="" @echo off -@rem ########################################################################## -@rem -@rem Gradle startup script for Windows -@rem -@rem ########################################################################## - -@rem Set local scope for the variables with windows NT shell -if "%OS%"=="Windows_NT" setlocal - -set DIRNAME=%~dp0 -if "%DIRNAME%"=="" set DIRNAME=. -@rem This is normally unused -set APP_BASE_NAME=%~n0 -set APP_HOME=%DIRNAME% - -@rem Resolve any "." and ".." in APP_HOME to make it shorter. -for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi - -@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" - -@rem Find java.exe -if defined JAVA_HOME goto findJavaFromJavaHome - -set JAVA_EXE=java.exe -%JAVA_EXE% -version >NUL 2>&1 -if %ERRORLEVEL% equ 0 goto execute - -echo. 1>&2 -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 -echo. 1>&2 -echo Please set the JAVA_HOME variable in your environment to match the 1>&2 -echo location of your Java installation. 1>&2 - -goto fail - -:findJavaFromJavaHome -set JAVA_HOME=%JAVA_HOME:"=% -set JAVA_EXE=%JAVA_HOME%/bin/java.exe - -if exist "%JAVA_EXE%" goto execute - -echo. 1>&2 -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 -echo. 1>&2 -echo Please set the JAVA_HOME variable in your environment to match the 1>&2 -echo location of your Java installation. 1>&2 - -goto fail - -:execute -@rem Setup the command line - -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar - - -@rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* - -:end -@rem End local scope for the variables with windows NT shell -if %ERRORLEVEL% equ 0 goto mainEnd - -:fail -rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of -rem the _cmd.exe /c_ return code! -set EXIT_CODE=%ERRORLEVEL% -if %EXIT_CODE% equ 0 set EXIT_CODE=1 -if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% -exit /b %EXIT_CODE% - -:mainEnd -if "%OS%"=="Windows_NT" endlocal - -:omega diff --git a/opensearch-search-quality-evaluation-plugin/.gitignore b/opensearch-search-quality-evaluation-plugin/.gitignore deleted file mode 100644 index 6c884e1..0000000 --- a/opensearch-search-quality-evaluation-plugin/.gitignore +++ /dev/null @@ -1,14 +0,0 @@ -# Ignore Gradle project-specific cache directory -.gradle - -# Ignore Gradle build output directory -build - -# intellij files -.idea/ -*.iml -*.ipr -*.iws -build-idea/ -out/ - diff --git a/opensearch-search-quality-evaluation-plugin/Dockerfile b/opensearch-search-quality-evaluation-plugin/Dockerfile deleted file mode 100644 index 02f56c8..0000000 --- a/opensearch-search-quality-evaluation-plugin/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM opensearchproject/opensearch:2.18.0 - -RUN /usr/share/opensearch/bin/opensearch-plugin install --batch https://github.com/opensearch-project/user-behavior-insights/releases/download/2.18.0.2/opensearch-ubi-2.18.0.2.zip - -ADD ./build/distributions/search-quality-evaluation-plugin-0.0.1.zip /tmp/search-quality-evaluation-plugin.zip -RUN /usr/share/opensearch/bin/opensearch-plugin install --batch file:/tmp/search-quality-evaluation-plugin.zip diff --git a/opensearch-search-quality-evaluation-plugin/LICENSE.txt b/opensearch-search-quality-evaluation-plugin/LICENSE.txt deleted file mode 100644 index 67db858..0000000 --- a/opensearch-search-quality-evaluation-plugin/LICENSE.txt +++ /dev/null @@ -1,175 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. diff --git a/opensearch-search-quality-evaluation-plugin/README.md b/opensearch-search-quality-evaluation-plugin/README.md deleted file mode 100644 index 215ccce..0000000 --- a/opensearch-search-quality-evaluation-plugin/README.md +++ /dev/null @@ -1,72 +0,0 @@ -# OpenSearch Evaluation Framework - -This is an OpenSearch plugin built on the OpenSearch job scheduler plugin. - -## API Endpoints - -| Method | Endpoint | Description | -|--------|-----------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------| -| `POST` | `/_plugins/search_quality_eval/queryset` | Create a query set by sampling from the `ubi_queries` index. The `name`, `description`, and `sampling` method parameters are required. | -| `POST` | `/_plugins/search_quality_eval/run` | Initiate a run of a query set. The `name` of the query set is a required parameter. | -| `POST` | `/_plugins/search_quality_eval/judgments` | Generate implicit judgments from UBI events and queries now. | -| `POST` | `/_plugins/search_quality_eval/schedule` | Create a scheduled job to generate implicit judgments. | - - -## Building - -Build the project from the top-level directory to build all projects. - -``` -cd .. -./gradlew build -``` - -## Running in Docker - -From this directory: - -``` -docker compose build && docker compose up -``` - -Verify the plugin is installed: - -``` -curl http://localhost:9200/_cat/plugins -``` - -In the list returned you should see: - -``` -opensearch search-quality-evaluation-plugin 2.17.1.0-SNAPSHOT -``` - -To create a schedule to generate implicit judgments: - -``` -curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/schedule?id=1&click_model=coec&job_name=test&interval=60" | jq -``` - -See the created job: - -``` -curl -s http://localhost:9200/search_quality_eval_scheduled_jobs/_search | jq -``` - -To run an on-demand job without scheduling: - -``` -curl -X POST "http://localhost:9200/_plugins/search_quality_eval/judgments?click_model=coec&max_rank=20" | jq -``` - -To see the job runs: - -``` -curl -X POST "http://localhost:9200/search_quality_eval_completed_jobs/_search" | jq -``` - -See the first 10 judgments: - -``` -curl -s http://localhost:9200/judgments/_search | jq -``` \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/aggs.sh b/opensearch-search-quality-evaluation-plugin/aggs.sh deleted file mode 100755 index 5cf0e24..0000000 --- a/opensearch-search-quality-evaluation-plugin/aggs.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -e - -curl -X GET http://localhost:9200/ubi_events/_search -H "Content-Type: application/json" -d' -{ - "size": 0, - "aggs": { - "By_Action": { - "terms": { - "field": "action_name" - }, - "aggs": { - "By_Position": { - "terms": { - "field": "event_attributes.position.ordinal" - } - } - } - } - } -}' | jq \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/build.gradle b/opensearch-search-quality-evaluation-plugin/build.gradle deleted file mode 100644 index dcfa2da..0000000 --- a/opensearch-search-quality-evaluation-plugin/build.gradle +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -apply plugin: 'java' -apply plugin: 'idea' -apply plugin: 'opensearch.opensearchplugin' -apply plugin: 'opensearch.yaml-rest-test' - -opensearchplugin { - name 'search-quality-evaluation-plugin' - description 'OpenSearch Search Quality Evaluation' - classname 'org.opensearch.eval.SearchQualityEvaluationPlugin' - extendedPlugins = ['opensearch-job-scheduler'] -} - -ext { - projectSubstitutions = [:] - licenseFile = rootProject.file('LICENSE.txt') - noticeFile = rootProject.file('NOTICE.txt') -} - -test { - include "**/Test*.class" - include "**/*Test.class" - include "**/*Test.class" - include "**/*TestCase.class" -} - -group = 'org.opensearch' -version = "${evalVersion}" - -// disabling some unnecessary validations for this plugin -testingConventions.enabled = false -forbiddenApis.ignoreFailures = true -loggerUsageCheck.enabled = false -validateNebulaPom.enabled = false -thirdPartyAudit.enabled = false - -buildscript { - repositories { - mavenLocal() - maven { url "https://aws.oss.sonatype.org/content/repositories/snapshots" } - mavenCentral() - maven { url "https://plugins.gradle.org/m2/" } - } - - dependencies { - classpath "org.opensearch.gradle:build-tools:${opensearchVersion}" - } -} - -repositories { - mavenLocal() - mavenCentral() - maven { url "https://aws.oss.sonatype.org/content/repositories/snapshots" } -} - -dependencies { - runtimeOnly "org.apache.logging.log4j:log4j-core:${versions.log4j}" - compileOnly "org.opensearch:opensearch-job-scheduler:${opensearchVersion}.0" - compileOnly "org.opensearch:opensearch-job-scheduler-spi:${opensearchVersion}.0" - compileOnly "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}" - compileOnly "com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}" - compileOnly "org.apache.httpcomponents:httpcore:${versions.httpcore}" - compileOnly "org.apache.httpcomponents:httpclient:${versions.httpclient}" - compileOnly "commons-logging:commons-logging:${versions.commonslogging}" - implementation "com.google.code.gson:gson:2.11.0" - yamlRestTestImplementation "org.apache.logging.log4j:log4j-core:${versions.log4j}" -} - -publishing { - repositories { - maven { - name = "Snapshots" - url = "https://aws.oss.sonatype.org/content/repositories/snapshots" - credentials { - username "$System.env.SONATYPE_USERNAME" - password "$System.env.SONATYPE_PASSWORD" - } - } - } - publications { - pluginZip(MavenPublication) { publication -> - pom { - name = "opensearch-eval" - description = "OpenSearch User Behavior Insights plugin" - groupId = "org.opensearch.plugin" - licenses { - license { - name = "The Apache License, Version 2.0" - url = "http://www.apache.org/licenses/LICENSE-2.0.txt" - } - } - developers { - developer { - name = "OpenSearch" - url = "https://github.com/opensearch-project/eval" - } - } - } - } - } -} \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/coec.png b/opensearch-search-quality-evaluation-plugin/coec.png deleted file mode 100644 index 65e297a..0000000 Binary files a/opensearch-search-quality-evaluation-plugin/coec.png and /dev/null differ diff --git a/opensearch-search-quality-evaluation-plugin/coec_definition.png b/opensearch-search-quality-evaluation-plugin/coec_definition.png deleted file mode 100644 index e5ed3f9..0000000 Binary files a/opensearch-search-quality-evaluation-plugin/coec_definition.png and /dev/null differ diff --git a/opensearch-search-quality-evaluation-plugin/docker-compose.yaml b/opensearch-search-quality-evaluation-plugin/docker-compose.yaml deleted file mode 100644 index c2ab3b7..0000000 --- a/opensearch-search-quality-evaluation-plugin/docker-compose.yaml +++ /dev/null @@ -1,47 +0,0 @@ -services: - - opensearch_sef: - build: . - container_name: opensearch_sef - environment: - discovery.type: single-node - node.name: opensearch - plugins.security.disabled: "true" - logger.level: info - OPENSEARCH_INITIAL_ADMIN_PASSWORD: SuperSecretPassword_123 - http.max_content_length: 500mb - OPENSEARCH_JAVA_OPTS: "-Xms16g -Xmx16g" - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 - hard: 65536 - ports: - - "9200:9200" - - "9600:9600" - networks: - - opensearch-net - volumes: - - opensearch-data1:/usr/share/opensearch/data - - opensearch_sef_dashboards: - image: opensearchproject/opensearch-dashboards:2.18.0 - container_name: opensearch_sef_dashboards - ports: - - "5601:5601" - environment: - OPENSEARCH_HOSTS: '["http://opensearch_sef:9200"]' - DISABLE_SECURITY_DASHBOARDS_PLUGIN: "true" - depends_on: - - opensearch_sef - networks: - - opensearch-net - -volumes: - opensearch-data1: - -networks: - opensearch-net: - driver: bridge diff --git a/opensearch-search-quality-evaluation-plugin/gradle.properties b/opensearch-search-quality-evaluation-plugin/gradle.properties deleted file mode 100644 index 2659a68..0000000 --- a/opensearch-search-quality-evaluation-plugin/gradle.properties +++ /dev/null @@ -1,2 +0,0 @@ -opensearchVersion = 2.18.0 -evalVersion = 0.0.1 diff --git a/opensearch-search-quality-evaluation-plugin/gradle/wrapper/gradle-wrapper.jar b/opensearch-search-quality-evaluation-plugin/gradle/wrapper/gradle-wrapper.jar deleted file mode 100644 index e708b1c..0000000 Binary files a/opensearch-search-quality-evaluation-plugin/gradle/wrapper/gradle-wrapper.jar and /dev/null differ diff --git a/opensearch-search-quality-evaluation-plugin/gradle/wrapper/gradle-wrapper.properties b/opensearch-search-quality-evaluation-plugin/gradle/wrapper/gradle-wrapper.properties deleted file mode 100644 index 2bbac7d..0000000 --- a/opensearch-search-quality-evaluation-plugin/gradle/wrapper/gradle-wrapper.properties +++ /dev/null @@ -1,5 +0,0 @@ -distributionBase=GRADLE_USER_HOME -distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip -zipStoreBase=GRADLE_USER_HOME -zipStorePath=wrapper/dists \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/licenses/gson-2.11.0.jar.sha1 b/opensearch-search-quality-evaluation-plugin/licenses/gson-2.11.0.jar.sha1 deleted file mode 100644 index 0414a49..0000000 --- a/opensearch-search-quality-evaluation-plugin/licenses/gson-2.11.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -527175ca6d81050b53bdd4c457a6d6e017626b0e \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/licenses/gson-LICENSE.txt b/opensearch-search-quality-evaluation-plugin/licenses/gson-LICENSE.txt deleted file mode 100644 index 7a4a3ea..0000000 --- a/opensearch-search-quality-evaluation-plugin/licenses/gson-LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/licenses/gson-NOTICE.txt b/opensearch-search-quality-evaluation-plugin/licenses/gson-NOTICE.txt deleted file mode 100644 index e69de29..0000000 diff --git a/opensearch-search-quality-evaluation-plugin/scripts/cleanup.sh b/opensearch-search-quality-evaluation-plugin/scripts/cleanup.sh deleted file mode 100755 index 77b1fd4..0000000 --- a/opensearch-search-quality-evaluation-plugin/scripts/cleanup.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -e - -curl -s -X DELETE "http://localhost:9200/judgments,search_quality_eval_completed_jobs,search_quality_eval_query_sets_run_results" | jq -curl -s -X DELETE "http://localhost:9200/search_quality_eval_completed_jobs" | jq -curl -s -X DELETE "http://localhost:9200/search_quality_eval_query_sets_run_results" | jq -curl -s -X DELETE "http://localhost:9200/ubi_queries,ubi_events" | jq diff --git a/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-now.sh b/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-now.sh deleted file mode 100755 index 8bc505a..0000000 --- a/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-now.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -e - -echo "Deleting existing judgments index..." -curl -s -X DELETE http://localhost:9200/judgments - -echo "Creating judgments..." -curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/judgments?click_model=coec&max_rank=50" diff --git a/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-schedule.sh b/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-schedule.sh deleted file mode 100755 index 72a79ed..0000000 --- a/opensearch-search-quality-evaluation-plugin/scripts/create-judgments-schedule.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -e - -curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/schedule?id=1&click_model=coec&max_rank=20&job_name=test2&interval=10" | jq - -echo "Scheduled jobs:" -curl -s "http://localhost:9200/search_quality_eval_scheduler/_search" | jq diff --git a/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-no-sampling.sh b/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-no-sampling.sh deleted file mode 100755 index ace0404..0000000 --- a/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-no-sampling.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -e - -curl -s -X DELETE "http://localhost:9200/search_quality_eval_query_sets" - -curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/queryset?name=test&description=fake&sampling=none&query_set_size=10" diff --git a/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-using-pptss-sampling.sh b/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-using-pptss-sampling.sh deleted file mode 100755 index 572bc8d..0000000 --- a/opensearch-search-quality-evaluation-plugin/scripts/create-query-set-using-pptss-sampling.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -e - -curl -s -X DELETE "http://localhost:9200/search_quality_eval_query_sets" - -curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/queryset?name=test&description=fake&sampling=pptss&query_set_size=20" diff --git a/opensearch-search-quality-evaluation-plugin/scripts/delete-query-sets.sh b/opensearch-search-quality-evaluation-plugin/scripts/delete-query-sets.sh deleted file mode 100755 index f495800..0000000 --- a/opensearch-search-quality-evaluation-plugin/scripts/delete-query-sets.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -e - -curl -s -X DELETE "http://localhost:9200/search_quality_eval_query_sets" diff --git a/opensearch-search-quality-evaluation-plugin/scripts/get-models.sh b/opensearch-search-quality-evaluation-plugin/scripts/get-models.sh deleted file mode 100755 index da6c0a5..0000000 --- a/opensearch-search-quality-evaluation-plugin/scripts/get-models.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -e - -# Get the search pipeline. -curl -s http://localhost:9200/_search/pipeline/hybrid-search-pipeline | jq - -#curl -s "http://localhost:9200/_plugins/_ml/models/_search" -H "Content-Type: application/json" -d'{ -# "query": { -# "match_all": {} -# } -# }' | jq diff --git a/opensearch-search-quality-evaluation-plugin/scripts/get-rank-aggregated-clickthrough.sh b/opensearch-search-quality-evaluation-plugin/scripts/get-rank-aggregated-clickthrough.sh deleted file mode 100755 index 639a012..0000000 --- a/opensearch-search-quality-evaluation-plugin/scripts/get-rank-aggregated-clickthrough.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -e - -curl -s "http://localhost:9200/rank_aggregated_ctr/_search" | jq diff --git a/opensearch-search-quality-evaluation-plugin/scripts/index-sample-events.sh b/opensearch-search-quality-evaluation-plugin/scripts/index-sample-events.sh deleted file mode 100755 index e50c828..0000000 --- a/opensearch-search-quality-evaluation-plugin/scripts/index-sample-events.sh +++ /dev/null @@ -1,267 +0,0 @@ -curl -s -X DELETE http://localhost:9200/ubi_events,ubi_queries - -curl -s -X POST http://localhost:9200/_plugins/ubi/initialize - -curl -s -X PUT http://localhost:9200/ubi_events/_doc/1 -H "Content-Type: application/json" -d' -{ - "application": "esci_ubi_sample", - "action_name": "impression", - "query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5", - "session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2", - "client_id": "28ccfb32-fbd7-4514-9051-cea719db42de", - "timestamp": "2024-12-11T04:56:49.419Z", - "user_query": "tv", - "message_type": null, - "message": null, - "event_attributes": { - "object": { - "object_id": "B07JW53H22", - "object_id_field": "product_id" - }, - "position": { - "ordinal": 1 - } - } -}' | jq - -curl -s -X PUT http://localhost:9200/ubi_events/_doc/2 -H "Content-Type: application/json" -d' -{ - "application": "esci_ubi_sample", - "action_name": "impression", - "query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5", - "session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2", - "client_id": "28ccfb32-fbd7-4514-9051-cea719db42de", - "timestamp": "2024-12-11T04:56:49.419Z", - "user_query": "tv", - "message_type": null, - "message": null, - "event_attributes": { - "object": { - "object_id": "B07JW53H22", - "object_id_field": "product_id" - }, - "position": { - "ordinal": 1 - } - } -}' | jq - -curl -s -X PUT http://localhost:9200/ubi_events/_doc/3 -H "Content-Type: application/json" -d' -{ - "application": "esci_ubi_sample", - "action_name": "impression", - "query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5", - "session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2", - "client_id": "28ccfb32-fbd7-4514-9051-cea719db42de", - "timestamp": "2024-12-11T04:56:49.419Z", - "user_query": "tv", - "message_type": null, - "message": null, - "event_attributes": { - "object": { - "object_id": "B07JW53H22", - "object_id_field": "product_id" - }, - "position": { - "ordinal": 1 - } - } -}' | jq - -curl -s -X PUT http://localhost:9200/ubi_events/_doc/4 -H "Content-Type: application/json" -d' -{ - "application": "esci_ubi_sample", - "action_name": "impression", - "query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5", - "session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2", - "client_id": "28ccfb32-fbd7-4514-9051-cea719db42de", - "timestamp": "2024-12-11T04:56:49.419Z", - "user_query": "tv", - "message_type": null, - "message": null, - "event_attributes": { - "object": { - "object_id": "B07JW53H22", - "object_id_field": "product_id" - }, - "position": { - "ordinal": 1 - } - } -}' | jq - -curl -s -X PUT http://localhost:9200/ubi_events/_doc/5 -H "Content-Type: application/json" -d' -{ - "application": "esci_ubi_sample", - "action_name": "click", - "query_id": "bd43b377-67ff-4165-8753-58bbdb3392c5", - "session_id": "fdb13692-d42c-4d1d-950b-b8814c963de2", - "client_id": "28ccfb32-fbd7-4514-9051-cea719db42de", - "timestamp": "2024-12-11T04:56:49.419Z", - "user_query": "tv", - "message_type": null, - "message": null, - "event_attributes": { - "object": { - "object_id": "B07JW53H22", - "object_id_field": "product_id" - }, - "position": { - "ordinal": 1 - } - } -}' | jq - -curl -s -X PUT http://localhost:9200/ubi_events/_doc/6 -H "Content-Type: application/json" -d' -{ - "application": "esci_ubi_sample", - "action_name": "impression", - "query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73", - "session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89", - "client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb", - "timestamp": "2024-12-11T00:16:42.278Z", - "user_query": "airpods", - "message_type": null, - "message": null, - "event_attributes": { - "object": { - "object_id": "B088FVYG44", - "object_id_field": "product_id" - }, - "position": { - "ordinal": 1 - } - } -}' | jq - -curl -s -X PUT http://localhost:9200/ubi_events/_doc/7 -H "Content-Type: application/json" -d' -{ - "application": "esci_ubi_sample", - "action_name": "impression", - "query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73", - "session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89", - "client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb", - "timestamp": "2024-12-11T00:16:42.278Z", - "user_query": "airpods", - "message_type": null, - "message": null, - "event_attributes": { - "object": { - "object_id": "B088FVYG44", - "object_id_field": "product_id" - }, - "position": { - "ordinal": 1 - } - } -}' | jq - -curl -s -X PUT http://localhost:9200/ubi_events/_doc/8 -H "Content-Type: application/json" -d' -{ - "application": "esci_ubi_sample", - "action_name": "impression", - "query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73", - "session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89", - "client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb", - "timestamp": "2024-12-11T00:16:42.278Z", - "user_query": "airpods", - "message_type": null, - "message": null, - "event_attributes": { - "object": { - "object_id": "B088FVYG44", - "object_id_field": "product_id" - }, - "position": { - "ordinal": 1 - } - } -}' | jq - -curl -s -X PUT http://localhost:9200/ubi_events/_doc/9 -H "Content-Type: application/json" -d' -{ - "application": "esci_ubi_sample", - "action_name": "impression", - "query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73", - "session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89", - "client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb", - "timestamp": "2024-12-11T00:16:42.278Z", - "user_query": "airpods", - "message_type": null, - "message": null, - "event_attributes": { - "object": { - "object_id": "B088FVYG44", - "object_id_field": "product_id" - }, - "position": { - "ordinal": 1 - } - } -}' | jq - -curl -s -X PUT http://localhost:9200/ubi_events/_doc/10 -H "Content-Type: application/json" -d' -{ - "application": "esci_ubi_sample", - "action_name": "click", - "query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73", - "session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89", - "client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb", - "timestamp": "2024-12-11T00:16:42.278Z", - "user_query": "airpods", - "message_type": null, - "message": null, - "event_attributes": { - "object": { - "object_id": "B088FVYG44", - "object_id_field": "product_id" - }, - "position": { - "ordinal": 1 - } - } -}' | jq - -curl -s -X PUT http://localhost:9200/ubi_events/_doc/11 -H "Content-Type: application/json" -d' -{ - "application": "esci_ubi_sample", - "action_name": "click", - "query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73", - "session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89", - "client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb", - "timestamp": "2024-12-11T00:16:42.278Z", - "user_query": "airpods", - "message_type": null, - "message": null, - "event_attributes": { - "object": { - "object_id": "B088FVYG44", - "object_id_field": "product_id" - }, - "position": { - "ordinal": 1 - } - } -}' | jq - -curl -s -X PUT http://localhost:9200/ubi_events/_doc/12 -H "Content-Type: application/json" -d' -{ - "application": "esci_ubi_sample", - "action_name": "click", - "query_id": "dc6872a3-1f4c-46b2-ad84-7add603b4c73", - "session_id": "a8f7d668-12b9-4cf3-a56f-22700b9e9b89", - "client_id": "a654b87b-a8cd-423b-996f-a169de13d4fb", - "timestamp": "2024-12-11T00:16:42.278Z", - "user_query": "airpods", - "message_type": null, - "message": null, - "event_attributes": { - "object": { - "object_id": "B088FVYG44", - "object_id_field": "product_id" - }, - "position": { - "ordinal": 1 - } - } -}' | jq \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/scripts/initialize-ubi.sh b/opensearch-search-quality-evaluation-plugin/scripts/initialize-ubi.sh deleted file mode 100755 index 37883fa..0000000 --- a/opensearch-search-quality-evaluation-plugin/scripts/initialize-ubi.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -e - -curl -s -X DELETE http://localhost:9200/ubi_queries,ubi_events - -curl -s -X POST http://localhost:9200/_plugins/ubi/initialize diff --git a/opensearch-search-quality-evaluation-plugin/scripts/run-query-set.sh b/opensearch-search-quality-evaluation-plugin/scripts/run-query-set.sh deleted file mode 100755 index f76e26c..0000000 --- a/opensearch-search-quality-evaluation-plugin/scripts/run-query-set.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -e - -QUERY_SET_ID="09fc2a69-9bc0-49ea-9747-aefd66528858" -JUDGMENTS_ID="cd7b72c9-21fa-4500-abf3-722438ab3ad4" -INDEX="ecommerce" -ID_FIELD="asin" -K="50" -THRESHOLD="1.0" # Default value - -curl -s -X DELETE "http://localhost:9200/sqe_metrics_sample_data" - -# Keyword search -curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/run?id=${QUERY_SET_ID}&judgments_id=${JUDGMENTS_ID}&index=${INDEX}&id_field=${ID_FIELD}&k=${K}" \ - -H "Content-Type: application/json" \ - --data-binary '{ - "multi_match": { - "query": "#$query##", - "fields": ["id", "title", "category", "bullets", "description", "attrs.Brand", "attrs.Color"] - } - }' - -## Neural search -#curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/run?id=${QUERY_SET_ID}&judgments_id=${JUDGMENTS_ID}&index=${INDEX}&id_field=${ID_FIELD}&k=${K}&search_pipeline=neural-search-pipeline" \ -# -H "Content-Type: application/json" \ -# --data-binary '{ -# "neural": { -# "title_embedding": { -# "query_text": ""#$query##", -# "k": "50" -# } -# } -# }' - -# Hybrid search -#curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/run?id=${QUERY_SET_ID}&judgments_id=${JUDGMENTS_ID}&index=${INDEX}&id_field=${ID_FIELD}&k=${K}&search_pipeline=hybrid-search-pipeline" \ -# -H "Content-Type: application/json" \ -# --data-binary '{ -# "hybrid": { -# "queries": [ -# { -# "match": { -# "title": { -# "query": "#$query##" -# } -# } -# }, -# { -# "neural": { -# "title_embedding": { -# "query_text": "#$query##", -# "k": "50" -# } -# } -# } -# ] -# } -# }' diff --git a/opensearch-search-quality-evaluation-plugin/scripts/test-neural-query.sh b/opensearch-search-quality-evaluation-plugin/scripts/test-neural-query.sh deleted file mode 100755 index c3d518f..0000000 --- a/opensearch-search-quality-evaluation-plugin/scripts/test-neural-query.sh +++ /dev/null @@ -1,24 +0,0 @@ -curl -s "http://localhost:9200/ecommerce/_search?search_pipeline=hybrid-search-pipeline" -H "Content-Type: application/json" -d' -{ - "query": { - "hybrid": { - "queries": [ - { - "match": { - "title": { - "query": "shoes" - } - } - }, - { - "neural": { - "title_embedding": { - "query_text": "shoes", - "k": "50" - } - } - } - ] - } - } -}' | jq \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/scripts/walkthrough.sh b/opensearch-search-quality-evaluation-plugin/scripts/walkthrough.sh deleted file mode 100755 index faf9f75..0000000 --- a/opensearch-search-quality-evaluation-plugin/scripts/walkthrough.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -e - -# Example walkthrough end-to-end for the plugin. - -# Delete existing UBI indexes and create new ones. -curl -s -X DELETE "http://localhost:9200/ubi_queries,ubi_events" -curl -s -X POST "http://localhost:9200/_plugins/ubi/initialize" - -# IMPORTANT: Now index data (UBI and ESCI). - -# Create judgments. -curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/judgments?click_model=coec&max_rank=20" - -# Create a query set. -curl -s -X POST "http://localhost:9200/_plugins/search_quality_eval/queryset?name=test&description=fake&sampling=pptss&query_set_size=100" - -# Run the query set. -./run-query-set.sh ${QUERY_SET_ID} - -# Look at the results. -curl -s "http://localhost:9200/search_quality_eval_query_sets_run_results/_search" | jq diff --git a/opensearch-search-quality-evaluation-plugin/settings.gradle b/opensearch-search-quality-evaluation-plugin/settings.gradle deleted file mode 100644 index ef059e1..0000000 --- a/opensearch-search-quality-evaluation-plugin/settings.gradle +++ /dev/null @@ -1 +0,0 @@ -rootProject.name = 'search-evaluation-framework' diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationJobParameter.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationJobParameter.java deleted file mode 100644 index 2ea5379..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationJobParameter.java +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.eval; - -import org.opensearch.core.xcontent.XContentBuilder; -import org.opensearch.jobscheduler.spi.ScheduledJobParameter; -import org.opensearch.jobscheduler.spi.schedule.Schedule; - -import java.io.IOException; -import java.time.Instant; - -public class SearchQualityEvaluationJobParameter implements ScheduledJobParameter { - - /** - * The name of the parameter for providing a name for the scheduled job. - */ - public static final String NAME_FIELD = "name"; - - /** - * The name of the parameter for creating a job as enabled or disabled. - */ - public static final String ENABLED_FILED = "enabled"; - - /** - * The name of the parameter for specifying when the job was last updated. - */ - public static final String LAST_UPDATE_TIME_FIELD = "last_update_time"; - - /** - * The name of the parameter for specifying a readable time for when the job was last updated. - */ - public static final String LAST_UPDATE_TIME_FIELD_READABLE = "last_update_time_field"; - public static final String SCHEDULE_FIELD = "schedule"; - public static final String ENABLED_TIME_FILED = "enabled_time"; - public static final String ENABLED_TIME_FILED_READABLE = "enabled_time_field"; - public static final String LOCK_DURATION_SECONDS = "lock_duration_seconds"; - public static final String JITTER = "jitter"; - - /** - * The name of the parameter that allows for specifying the type of click model to use. - */ - public static final String CLICK_MODEL = "click_model"; - - /** - * The name of the parameter that allows for setting a max rank value to use during judgment generation. - */ - public static final String MAX_RANK = "max_rank"; - - // Properties from ScheduledJobParameter. - private String jobName; - private Instant lastUpdateTime; - private Instant enabledTime; - private boolean enabled; - private Schedule schedule; - private Long lockDurationSeconds; - private Double jitter; - - // Custom properties. - private String clickModel; - private int maxRank; - - public SearchQualityEvaluationJobParameter() { - - } - - public SearchQualityEvaluationJobParameter(final String name, final Schedule schedule, - final Long lockDurationSeconds, final Double jitter, - final String clickModel, final int maxRank) { - this.jobName = name; - this.schedule = schedule; - this.enabled = true; - this.lockDurationSeconds = lockDurationSeconds; - this.jitter = jitter; - - final Instant now = Instant.now(); - this.enabledTime = now; - this.lastUpdateTime = now; - - // Custom properties. - this.clickModel = clickModel; - this.maxRank = maxRank; - - } - - @Override - public XContentBuilder toXContent(final XContentBuilder builder, final Params params) throws IOException { - - builder.startObject(); - - builder - .field(NAME_FIELD, this.jobName) - .field(ENABLED_FILED, this.enabled) - .field(SCHEDULE_FIELD, this.schedule) - .field(CLICK_MODEL, this.clickModel) - .field(MAX_RANK, this.maxRank); - - if (this.enabledTime != null) { - builder.timeField(ENABLED_TIME_FILED, ENABLED_TIME_FILED_READABLE, this.enabledTime.toEpochMilli()); - } - - if (this.lastUpdateTime != null) { - builder.timeField(LAST_UPDATE_TIME_FIELD, LAST_UPDATE_TIME_FIELD_READABLE, this.lastUpdateTime.toEpochMilli()); - } - - if (this.lockDurationSeconds != null) { - builder.field(LOCK_DURATION_SECONDS, this.lockDurationSeconds); - } - - if (this.jitter != null) { - builder.field(JITTER, this.jitter); - } - - builder.endObject(); - - return builder; - - } - - @Override - public String getName() { - return this.jobName; - } - - @Override - public Instant getLastUpdateTime() { - return this.lastUpdateTime; - } - - @Override - public Instant getEnabledTime() { - return this.enabledTime; - } - - @Override - public Schedule getSchedule() { - return this.schedule; - } - - @Override - public boolean isEnabled() { - return this.enabled; - } - - @Override - public Long getLockDurationSeconds() { - return this.lockDurationSeconds; - } - - @Override - public Double getJitter() { - return jitter; - } - - /** - * Sets the name of the job. - * @param jobName The name of the job. - */ - public void setJobName(String jobName) { - this.jobName = jobName; - } - - /** - * Sets when the job was last updated. - * @param lastUpdateTime An {@link Instant} of when the job was last updated. - */ - public void setLastUpdateTime(Instant lastUpdateTime) { - this.lastUpdateTime = lastUpdateTime; - } - - /** - * Sets when the job was enabled. - * @param enabledTime An {@link Instant} of when the job was enabled. - */ - public void setEnabledTime(Instant enabledTime) { - this.enabledTime = enabledTime; - } - - /** - * Sets whether the job is enabled. - * @param enabled A boolean representing whether the job is enabled. - */ - public void setEnabled(boolean enabled) { - this.enabled = enabled; - } - - /** - * Sets the schedule for the job. - * @param schedule A {@link Schedule} for the job. - */ - public void setSchedule(Schedule schedule) { - this.schedule = schedule; - } - - /** - * Sets the lock duration for the cluster when running the job. - * @param lockDurationSeconds The lock duration in seconds. - */ - public void setLockDurationSeconds(Long lockDurationSeconds) { - this.lockDurationSeconds = lockDurationSeconds; - } - - /** - * Sets the jitter for the job. - * @param jitter The jitter for the job. - */ - public void setJitter(Double jitter) { - this.jitter = jitter; - } - - /** - * Gets the type of click model to use for implicit judgment generation. - * @return The type of click model to use for implicit judgment generation. - */ - public String getClickModel() { - return clickModel; - } - - /** - * Sets the click model type to use for implicit judgment generation. - * @param clickModel The click model type to use for implicit judgment generation. - */ - public void setClickModel(String clickModel) { - this.clickModel = clickModel; - } - - /** - * Gets the max rank to use when generating implicit judgments. - * @return The max rank to use when generating implicit judgments. - */ - public int getMaxRank() { - return maxRank; - } - - /** - * Sets the max rank to use when generating implicit judgments. - * @param maxRank The max rank to use when generating implicit judgments. - */ - public void setMaxRank(int maxRank) { - this.maxRank = maxRank; - } - -} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationJobRunner.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationJobRunner.java deleted file mode 100644 index 442ae4c..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationJobRunner.java +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.eval; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.opensearch.action.index.IndexRequest; -import org.opensearch.action.index.IndexResponse; -import org.opensearch.action.support.WriteRequest; -import org.opensearch.client.Client; -import org.opensearch.cluster.service.ClusterService; -import org.opensearch.core.action.ActionListener; -import org.opensearch.eval.judgments.clickmodel.coec.CoecClickModel; -import org.opensearch.eval.judgments.clickmodel.coec.CoecClickModelParameters; -import org.opensearch.jobscheduler.spi.JobExecutionContext; -import org.opensearch.jobscheduler.spi.ScheduledJobParameter; -import org.opensearch.jobscheduler.spi.ScheduledJobRunner; -import org.opensearch.jobscheduler.spi.utils.LockService; -import org.opensearch.threadpool.ThreadPool; - -import java.util.HashMap; -import java.util.Map; - -/** - * Job runner for scheduled implicit judgments jobs. - */ -public class SearchQualityEvaluationJobRunner implements ScheduledJobRunner { - - private static final Logger LOGGER = LogManager.getLogger(SearchQualityEvaluationJobRunner.class); - - private static SearchQualityEvaluationJobRunner INSTANCE; - - /** - * Gets a singleton instance of this class. - * @return A {@link SearchQualityEvaluationJobRunner}. - */ - public static SearchQualityEvaluationJobRunner getJobRunnerInstance() { - - LOGGER.info("Getting job runner instance"); - - if (INSTANCE != null) { - return INSTANCE; - } - - synchronized (SearchQualityEvaluationJobRunner.class) { - if (INSTANCE == null) { - INSTANCE = new SearchQualityEvaluationJobRunner(); - } - return INSTANCE; - } - - } - - private ClusterService clusterService; - private ThreadPool threadPool; - private Client client; - - private SearchQualityEvaluationJobRunner() { - - } - - public void setClusterService(ClusterService clusterService) { - this.clusterService = clusterService; - } - - public void setThreadPool(ThreadPool threadPool) { - this.threadPool = threadPool; - } - - public void setClient(Client client) { - this.client = client; - } - - @Override - public void runJob(final ScheduledJobParameter jobParameter, final JobExecutionContext context) { - - if(!(jobParameter instanceof SearchQualityEvaluationJobParameter)) { - throw new IllegalStateException( - "Job parameter is not instance of SampleJobParameter, type: " + jobParameter.getClass().getCanonicalName() - ); - } - - if(this.clusterService == null) { - throw new IllegalStateException("ClusterService is not initialized."); - } - - if(this.threadPool == null) { - throw new IllegalStateException("ThreadPool is not initialized."); - } - - final LockService lockService = context.getLockService(); - - final Runnable runnable = () -> { - - if (jobParameter.getLockDurationSeconds() != null) { - - lockService.acquireLock(jobParameter, context, ActionListener.wrap(lock -> { - - if (lock == null) { - return; - } - - final SearchQualityEvaluationJobParameter searchQualityEvaluationJobParameter = (SearchQualityEvaluationJobParameter) jobParameter; - - final long startTime = System.currentTimeMillis(); - final String judgmentsId; - - if("coec".equalsIgnoreCase(searchQualityEvaluationJobParameter.getClickModel())) { - - LOGGER.info("Beginning implicit judgment generation using clicks-over-expected-clicks."); - final CoecClickModelParameters coecClickModelParameters = new CoecClickModelParameters(searchQualityEvaluationJobParameter.getMaxRank()); - final CoecClickModel coecClickModel = new CoecClickModel(client, coecClickModelParameters); - - judgmentsId = coecClickModel.calculateJudgments(); - - } else { - - // Invalid click model. - throw new IllegalArgumentException("Invalid click model: " + searchQualityEvaluationJobParameter.getClickModel()); - - } - - final long elapsedTime = System.currentTimeMillis() - startTime; - LOGGER.info("Implicit judgment generation completed in {} ms", elapsedTime); - - final Map job = new HashMap<>(); - job.put("name", searchQualityEvaluationJobParameter.getName()); - job.put("click_model", searchQualityEvaluationJobParameter.getClickModel()); - job.put("started", startTime); - job.put("duration", elapsedTime); - job.put("judgments", judgmentsId); - job.put("invocation", "scheduled"); - job.put("max_rank", searchQualityEvaluationJobParameter.getMaxRank()); - - final IndexRequest indexRequest = new IndexRequest() - .index(SearchQualityEvaluationPlugin.COMPLETED_JOBS_INDEX_NAME) - .id(judgmentsId) - .source(job) - .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); - - client.index(indexRequest, new ActionListener<>() { - @Override - public void onResponse(IndexResponse indexResponse) { - LOGGER.info("Successfully indexed implicit judgments {}", judgmentsId); - } - - @Override - public void onFailure(Exception ex) { - LOGGER.error("Unable to index implicit judgments", ex); - } - }); - - }, exception -> { throw new IllegalStateException("Failed to acquire lock."); })); - } - - }; - - threadPool.generic().submit(runnable); - - } - -} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationPlugin.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationPlugin.java deleted file mode 100644 index 6a7b581..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationPlugin.java +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.eval; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.opensearch.client.Client; -import org.opensearch.cluster.metadata.IndexNameExpressionResolver; -import org.opensearch.cluster.node.DiscoveryNodes; -import org.opensearch.cluster.service.ClusterService; -import org.opensearch.common.settings.ClusterSettings; -import org.opensearch.common.settings.IndexScopedSettings; -import org.opensearch.common.settings.Settings; -import org.opensearch.common.settings.SettingsFilter; -import org.opensearch.core.common.io.stream.NamedWriteableRegistry; -import org.opensearch.core.xcontent.NamedXContentRegistry; -import org.opensearch.core.xcontent.XContentParser; -import org.opensearch.core.xcontent.XContentParserUtils; -import org.opensearch.env.Environment; -import org.opensearch.env.NodeEnvironment; -import org.opensearch.jobscheduler.spi.JobSchedulerExtension; -import org.opensearch.jobscheduler.spi.ScheduledJobParser; -import org.opensearch.jobscheduler.spi.ScheduledJobRunner; -import org.opensearch.jobscheduler.spi.schedule.ScheduleParser; -import org.opensearch.plugins.ActionPlugin; -import org.opensearch.plugins.Plugin; -import org.opensearch.repositories.RepositoriesService; -import org.opensearch.rest.RestController; -import org.opensearch.rest.RestHandler; -import org.opensearch.script.ScriptService; -import org.opensearch.threadpool.ThreadPool; -import org.opensearch.watcher.ResourceWatcherService; - -import java.io.IOException; -import java.time.Instant; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.function.Supplier; - -/** - * Main class for the Search Quality Evaluation plugin. - */ -public class SearchQualityEvaluationPlugin extends Plugin implements ActionPlugin, JobSchedulerExtension { - - private static final Logger LOGGER = LogManager.getLogger(SearchQualityEvaluationPlugin.class); - - /** - * The name of the UBI index containing the queries. This should not be changed. - */ - public static final String UBI_QUERIES_INDEX_NAME = "ubi_queries"; - - /** - * The name of the UBI index containing the events. This should not be changed. - */ - public static final String UBI_EVENTS_INDEX_NAME = "ubi_events"; - - /** - * The name of the index to store the scheduled jobs to create implicit judgments. - */ - public static final String SCHEDULED_JOBS_INDEX_NAME = "search_quality_eval_scheduled_jobs"; - - /** - * The name of the index to store the completed jobs to create implicit judgments. - */ - public static final String COMPLETED_JOBS_INDEX_NAME = "search_quality_eval_completed_jobs"; - - /** - * The name of the index that stores the query sets. - */ - public static final String QUERY_SETS_INDEX_NAME = "search_quality_eval_query_sets"; - - /** - * The name of the index that stores the metrics for the dashboard. - */ - public static final String DASHBOARD_METRICS_INDEX_NAME = "sqe_metrics_sample_data"; - - /** - * The name of the index that stores the implicit judgments. - */ - public static final String JUDGMENTS_INDEX_NAME = "judgments"; - - @Override - public Collection createComponents( - final Client client, - final ClusterService clusterService, - final ThreadPool threadPool, - final ResourceWatcherService resourceWatcherService, - final ScriptService scriptService, - final NamedXContentRegistry xContentRegistry, - final Environment environment, - final NodeEnvironment nodeEnvironment, - final NamedWriteableRegistry namedWriteableRegistry, - final IndexNameExpressionResolver indexNameExpressionResolver, - final Supplier repositoriesServiceSupplier - ) { - - LOGGER.info("Creating search evaluation framework components"); - final SearchQualityEvaluationJobRunner jobRunner = SearchQualityEvaluationJobRunner.getJobRunnerInstance(); - jobRunner.setClusterService(clusterService); - jobRunner.setThreadPool(threadPool); - jobRunner.setClient(client); - - return Collections.emptyList(); - - } - - @Override - public String getJobType() { - return "scheduler_search_quality_eval"; - } - - @Override - public String getJobIndex() { - LOGGER.info("Getting job index name"); - return SCHEDULED_JOBS_INDEX_NAME; - } - - @Override - public ScheduledJobRunner getJobRunner() { - LOGGER.info("Creating job runner"); - return SearchQualityEvaluationJobRunner.getJobRunnerInstance(); - } - - @Override - public ScheduledJobParser getJobParser() { - - return (parser, id, jobDocVersion) -> { - - final SearchQualityEvaluationJobParameter jobParameter = new SearchQualityEvaluationJobParameter(); - XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser); - - while (!parser.nextToken().equals(XContentParser.Token.END_OBJECT)) { - - final String fieldName = parser.currentName(); - - parser.nextToken(); - - switch (fieldName) { - case SearchQualityEvaluationJobParameter.NAME_FIELD: - jobParameter.setJobName(parser.text()); - break; - case SearchQualityEvaluationJobParameter.ENABLED_FILED: - jobParameter.setEnabled(parser.booleanValue()); - break; - case SearchQualityEvaluationJobParameter.ENABLED_TIME_FILED: - jobParameter.setEnabledTime(parseInstantValue(parser)); - break; - case SearchQualityEvaluationJobParameter.LAST_UPDATE_TIME_FIELD: - jobParameter.setLastUpdateTime(parseInstantValue(parser)); - break; - case SearchQualityEvaluationJobParameter.SCHEDULE_FIELD: - jobParameter.setSchedule(ScheduleParser.parse(parser)); - break; - case SearchQualityEvaluationJobParameter.LOCK_DURATION_SECONDS: - jobParameter.setLockDurationSeconds(parser.longValue()); - break; - case SearchQualityEvaluationJobParameter.JITTER: - jobParameter.setJitter(parser.doubleValue()); - break; - case SearchQualityEvaluationJobParameter.CLICK_MODEL: - jobParameter.setClickModel(parser.text()); - break; - case SearchQualityEvaluationJobParameter.MAX_RANK: - jobParameter.setMaxRank(parser.intValue()); - break; - default: - XContentParserUtils.throwUnknownToken(parser.currentToken(), parser.getTokenLocation()); - } - - } - - return jobParameter; - - }; - - } - - private Instant parseInstantValue(final XContentParser parser) throws IOException { - - if (XContentParser.Token.VALUE_NULL.equals(parser.currentToken())) { - return null; - } - - if (parser.currentToken().isValue()) { - return Instant.ofEpochMilli(parser.longValue()); - } - - XContentParserUtils.throwUnknownToken(parser.currentToken(), parser.getTokenLocation()); - return null; - - } - - @Override - public List getRestHandlers( - final Settings settings, - final RestController restController, - final ClusterSettings clusterSettings, - final IndexScopedSettings indexScopedSettings, - final SettingsFilter settingsFilter, - final IndexNameExpressionResolver indexNameExpressionResolver, - final Supplier nodesInCluster - ) { - return Collections.singletonList(new SearchQualityEvaluationRestHandler()); - } - -} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationRestHandler.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationRestHandler.java deleted file mode 100644 index ba56f04..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/SearchQualityEvaluationRestHandler.java +++ /dev/null @@ -1,417 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.eval; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.opensearch.action.admin.indices.create.CreateIndexRequest; -import org.opensearch.action.admin.indices.create.CreateIndexResponse; -import org.opensearch.action.admin.indices.exists.indices.IndicesExistsRequest; -import org.opensearch.action.admin.indices.exists.indices.IndicesExistsResponse; -import org.opensearch.action.delete.DeleteRequest; -import org.opensearch.action.delete.DeleteResponse; -import org.opensearch.action.index.IndexRequest; -import org.opensearch.action.index.IndexResponse; -import org.opensearch.action.support.WriteRequest; -import org.opensearch.client.node.NodeClient; -import org.opensearch.common.xcontent.json.JsonXContent; -import org.opensearch.core.action.ActionListener; -import org.opensearch.core.common.bytes.BytesReference; -import org.opensearch.core.rest.RestStatus; -import org.opensearch.eval.judgments.clickmodel.coec.CoecClickModel; -import org.opensearch.eval.judgments.clickmodel.coec.CoecClickModelParameters; -import org.opensearch.eval.runners.OpenSearchQuerySetRunner; -import org.opensearch.eval.runners.QuerySetRunResult; -import org.opensearch.eval.samplers.AllQueriesQuerySampler; -import org.opensearch.eval.samplers.AllQueriesQuerySamplerParameters; -import org.opensearch.eval.samplers.ProbabilityProportionalToSizeAbstractQuerySampler; -import org.opensearch.eval.samplers.ProbabilityProportionalToSizeParameters; -import org.opensearch.jobscheduler.spi.schedule.IntervalSchedule; -import org.opensearch.rest.BaseRestHandler; -import org.opensearch.rest.BytesRestResponse; -import org.opensearch.rest.RestRequest; -import org.opensearch.rest.RestResponse; - -import java.io.IOException; -import java.nio.charset.Charset; -import java.time.Instant; -import java.time.temporal.ChronoUnit; -import java.util.List; -import java.util.UUID; -import java.util.concurrent.ExecutionException; - -import static org.opensearch.eval.SearchQualityEvaluationPlugin.JUDGMENTS_INDEX_NAME; - -public class SearchQualityEvaluationRestHandler extends BaseRestHandler { - - private static final Logger LOGGER = LogManager.getLogger(SearchQualityEvaluationRestHandler.class); - - /** - * URL for the implicit judgment scheduling. - */ - public static final String SCHEDULING_URL = "/_plugins/search_quality_eval/schedule"; - - /** - * URL for on-demand implicit judgment generation. - */ - public static final String IMPLICIT_JUDGMENTS_URL = "/_plugins/search_quality_eval/judgments"; - - /** - * URL for managing query sets. - */ - public static final String QUERYSET_MANAGEMENT_URL = "/_plugins/search_quality_eval/queryset"; - - /** - * URL for initiating query sets to run on-demand. - */ - public static final String QUERYSET_RUN_URL = "/_plugins/search_quality_eval/run"; - - /** - * The placeholder in the query that gets replaced by the query term when running a query set. - */ - public static final String QUERY_PLACEHOLDER = "#$query##"; - - @Override - public String getName() { - return "Search Quality Evaluation Framework"; - } - - @Override - public List routes() { - return List.of( - new Route(RestRequest.Method.POST, IMPLICIT_JUDGMENTS_URL), - new Route(RestRequest.Method.POST, SCHEDULING_URL), - new Route(RestRequest.Method.DELETE, SCHEDULING_URL), - new Route(RestRequest.Method.POST, QUERYSET_MANAGEMENT_URL), - new Route(RestRequest.Method.POST, QUERYSET_RUN_URL)); - } - - @Override - protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { - - // Handle managing query sets. - if(QUERYSET_MANAGEMENT_URL.equalsIgnoreCase(request.path())) { - - // Creating a new query set by sampling the UBI queries. - if (request.method().equals(RestRequest.Method.POST)) { - - final String name = request.param("name"); - final String description = request.param("description"); - final String sampling = request.param("sampling", "pptss"); - final int querySetSize = Integer.parseInt(request.param("query_set_size", "1000")); - - // Create a query set by finding all the unique user_query terms. - if (AllQueriesQuerySampler.NAME.equalsIgnoreCase(sampling)) { - - // If we are not sampling queries, the query sets should just be directly - // indexed into OpenSearch using the `ubi_queries` index directly. - - try { - - final AllQueriesQuerySamplerParameters parameters = new AllQueriesQuerySamplerParameters(name, description, sampling, querySetSize); - final AllQueriesQuerySampler sampler = new AllQueriesQuerySampler(client, parameters); - - // Sample and index the queries. - final String querySetId = sampler.sample(); - - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.OK, "{\"query_set\": \"" + querySetId + "\"}")); - - } catch(Exception ex) { - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.INTERNAL_SERVER_ERROR, "{\"error\": \"" + ex.getMessage() + "\"}")); - } - - - // Create a query set by using PPTSS sampling. - } else if (ProbabilityProportionalToSizeAbstractQuerySampler.NAME.equalsIgnoreCase(sampling)) { - - LOGGER.info("Creating query set using PPTSS"); - - final ProbabilityProportionalToSizeParameters parameters = new ProbabilityProportionalToSizeParameters(name, description, sampling, querySetSize); - final ProbabilityProportionalToSizeAbstractQuerySampler sampler = new ProbabilityProportionalToSizeAbstractQuerySampler(client, parameters); - - try { - - // Sample and index the queries. - final String querySetId = sampler.sample(); - - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.OK, "{\"query_set\": \"" + querySetId + "\"}")); - - } catch(Exception ex) { - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.INTERNAL_SERVER_ERROR, "{\"error\": \"" + ex.getMessage() + "\"}")); - } - - } else { - // An Invalid sampling method was provided in the request. - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.BAD_REQUEST, "{\"error\": \"Invalid sampling method: " + sampling + "\"}")); - } - - } else { - // Invalid HTTP method for this endpoint. - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.METHOD_NOT_ALLOWED, "{\"error\": \"" + request.method() + " is not allowed.\"}")); - } - - // Handle running query sets. - } else if(QUERYSET_RUN_URL.equalsIgnoreCase(request.path())) { - - final String querySetId = request.param("id"); - final String judgmentsId = request.param("judgments_id"); - final String index = request.param("index"); - final String searchPipeline = request.param("search_pipeline", null); - final String idField = request.param("id_field", "_id"); - final int k = Integer.parseInt(request.param("k", "10")); - final double threshold = Double.parseDouble(request.param("threshold", "1.0")); - - if(querySetId == null || querySetId.isEmpty() || judgmentsId == null || judgmentsId.isEmpty() || index == null || index.isEmpty()) { - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.BAD_REQUEST, "{\"error\": \"Missing required parameters.\"}")); - } - - if(k < 1) { - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.BAD_REQUEST, "{\"error\": \"k must be a positive integer.\"}")); - } - - if(!request.hasContent()) { - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.BAD_REQUEST, "{\"error\": \"Missing query in body.\"}")); - } - - // Get the query JSON from the content. - final String query = new String(BytesReference.toBytes(request.content()), Charset.defaultCharset()); - - // Validate the query has a QUERY_PLACEHOLDER. - if(!query.contains(QUERY_PLACEHOLDER)) { - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.BAD_REQUEST, "{\"error\": \"Missing query placeholder in query.\"}")); - } - - try { - - final OpenSearchQuerySetRunner openSearchQuerySetRunner = new OpenSearchQuerySetRunner(client); - final QuerySetRunResult querySetRunResult = openSearchQuerySetRunner.run(querySetId, judgmentsId, index, searchPipeline, idField, query, k, threshold); - openSearchQuerySetRunner.save(querySetRunResult); - - } catch (Exception ex) { - LOGGER.error("Unable to run query set. Verify query set and judgments exist.", ex); - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.INTERNAL_SERVER_ERROR, ex.getMessage())); - } - - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.OK, "{\"message\": \"Run initiated for query set " + querySetId + "\"}")); - - // Handle the on-demand creation of implicit judgments. - } else if(IMPLICIT_JUDGMENTS_URL.equalsIgnoreCase(request.path())) { - - if (request.method().equals(RestRequest.Method.POST)) { - - //final long startTime = System.currentTimeMillis(); - final String clickModel = request.param("click_model", "coec"); - final int maxRank = Integer.parseInt(request.param("max_rank", "20")); - - if (CoecClickModel.CLICK_MODEL_NAME.equalsIgnoreCase(clickModel)) { - - final CoecClickModelParameters coecClickModelParameters = new CoecClickModelParameters(maxRank); - final CoecClickModel coecClickModel = new CoecClickModel(client, coecClickModelParameters); - - final String judgmentsId; - - // TODO: Run this in a separate thread. - try { - - // Create the judgments index. - createJudgmentsIndex(client); - - judgmentsId = coecClickModel.calculateJudgments(); - - // judgmentsId will be null if no judgments were created (and indexed). - if(judgmentsId == null) { - // TODO: Is Bad Request the appropriate error? Perhaps Conflict is more appropriate? - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.BAD_REQUEST, "{\"error\": \"No judgments were created. Check the queries and events data.\"}")); - } - -// final long elapsedTime = System.currentTimeMillis() - startTime; -// -// final Map job = new HashMap<>(); -// job.put("name", "manual_generation"); -// job.put("click_model", clickModel); -// job.put("started", startTime); -// job.put("duration", elapsedTime); -// job.put("invocation", "on_demand"); -// job.put("judgments_id", judgmentsId); -// job.put("max_rank", maxRank); -// -// final String jobId = UUID.randomUUID().toString(); -// -// final IndexRequest indexRequest = new IndexRequest() -// .index(SearchQualityEvaluationPlugin.COMPLETED_JOBS_INDEX_NAME) -// .id(jobId) -// .source(job) -// .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); -// -// client.index(indexRequest, new ActionListener<>() { -// @Override -// public void onResponse(final IndexResponse indexResponse) { -// LOGGER.debug("Click model job completed successfully: {}", jobId); -// } -// -// @Override -// public void onFailure(final Exception ex) { -// LOGGER.error("Unable to run job with ID {}", jobId, ex); -// throw new RuntimeException("Unable to run job", ex); -// } -// }); - - } catch (Exception ex) { - throw new RuntimeException("Unable to generate judgments.", ex); - } - - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.OK, "{\"judgments_id\": \"" + judgmentsId + "\"}")); - - } else { - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.BAD_REQUEST, "{\"error\": \"Invalid click model.\"}")); - } - - } else { - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.METHOD_NOT_ALLOWED, "{\"error\": \"" + request.method() + " is not allowed.\"}")); - } - - // Handle the scheduling of creating implicit judgments. - } else if(SCHEDULING_URL.equalsIgnoreCase(request.path())) { - - if (request.method().equals(RestRequest.Method.POST)) { - - // Get the job parameters from the request. - final String id = request.param("id"); - final String jobName = request.param("job_name", UUID.randomUUID().toString()); - final String lockDurationSecondsString = request.param("lock_duration_seconds", "600"); - final Long lockDurationSeconds = lockDurationSecondsString != null ? Long.parseLong(lockDurationSecondsString) : null; - final String jitterString = request.param("jitter"); - final Double jitter = jitterString != null ? Double.parseDouble(jitterString) : null; - final String clickModel = request.param("click_model"); - final int maxRank = Integer.parseInt(request.param("max_rank", "20")); - - // Validate the request parameters. - if (id == null || clickModel == null) { - throw new IllegalArgumentException("The id and click_model parameters must be provided."); - } - - // Read the start_time. - final Instant startTime; - if (request.param("start_time") == null) { - startTime = Instant.now(); - } else { - startTime = Instant.ofEpochMilli(Long.parseLong(request.param("start_time"))); - } - - // Read the interval. - final int interval; - if (request.param("interval") == null) { - // Default to every 24 hours. - interval = 1440; - } else { - interval = Integer.parseInt(request.param("interval")); - } - - final SearchQualityEvaluationJobParameter jobParameter = new SearchQualityEvaluationJobParameter( - jobName, new IntervalSchedule(startTime, interval, ChronoUnit.MINUTES), lockDurationSeconds, - jitter, clickModel, maxRank - ); - - final IndexRequest indexRequest = new IndexRequest().index(SearchQualityEvaluationPlugin.SCHEDULED_JOBS_INDEX_NAME) - .id(id) - .source(jobParameter.toXContent(JsonXContent.contentBuilder(), null)) - .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); - - return restChannel -> { - - // index the job parameter - client.index(indexRequest, new ActionListener<>() { - - @Override - public void onResponse(final IndexResponse indexResponse) { - - try { - - final RestResponse restResponse = new BytesRestResponse( - RestStatus.OK, - indexResponse.toXContent(JsonXContent.contentBuilder(), null) - ); - LOGGER.info("Created implicit judgments schedule for click-model {}: Job name {}, running every {} minutes starting {}", clickModel, jobName, interval, startTime); - - restChannel.sendResponse(restResponse); - - } catch (IOException e) { - restChannel.sendResponse(new BytesRestResponse(RestStatus.INTERNAL_SERVER_ERROR, e.getMessage())); - } - - } - - @Override - public void onFailure(Exception e) { - restChannel.sendResponse(new BytesRestResponse(RestStatus.INTERNAL_SERVER_ERROR, e.getMessage())); - } - }); - - }; - - // Delete a scheduled job to make implicit judgments. - } else if (request.method().equals(RestRequest.Method.DELETE)) { - - final String id = request.param("id"); - final DeleteRequest deleteRequest = new DeleteRequest().index(SearchQualityEvaluationPlugin.SCHEDULED_JOBS_INDEX_NAME).id(id); - - return restChannel -> client.delete(deleteRequest, new ActionListener<>() { - @Override - public void onResponse(final DeleteResponse deleteResponse) { - restChannel.sendResponse(new BytesRestResponse(RestStatus.OK, "{\"message\": \"Scheduled job deleted.\"}")); - } - - @Override - public void onFailure(Exception e) { - restChannel.sendResponse(new BytesRestResponse(RestStatus.INTERNAL_SERVER_ERROR, e.getMessage())); - } - }); - - } else { - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.METHOD_NOT_ALLOWED, "{\"error\": \"" + request.method() + " is not allowed.\"}")); - } - - } else { - return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.NOT_FOUND, "{\"error\": \"" + request.path() + " was not found.\"}")); - } - - } - - private void createJudgmentsIndex(final NodeClient client) throws Exception { - - // If the judgments index does not exist we need to create it. - final IndicesExistsRequest indicesExistsRequest = new IndicesExistsRequest(JUDGMENTS_INDEX_NAME); - - final IndicesExistsResponse indicesExistsResponse = client.admin().indices().exists(indicesExistsRequest).get(); - - if(!indicesExistsResponse.isExists()) { - - // TODO: Read this mapping from a resource file instead. - final String mapping = "{\n" + - " \"properties\": {\n" + - " \"judgments_id\": { \"type\": \"keyword\" },\n" + - " \"query_id\": { \"type\": \"keyword\" },\n" + - " \"query\": { \"type\": \"keyword\" },\n" + - " \"document_id\": { \"type\": \"keyword\" },\n" + - " \"judgment\": { \"type\": \"double\" },\n" + - " \"timestamp\": { \"type\": \"date\", \"format\": \"strict_date_time\" }\n" + - " }\n" + - " }"; - - // Create the judgments index. - final CreateIndexRequest createIndexRequest = new CreateIndexRequest(JUDGMENTS_INDEX_NAME).mapping(mapping); - - // TODO: Don't use .get() - client.admin().indices().create(createIndexRequest).get(); - - } - - } - -} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java deleted file mode 100644 index f2e8aa8..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.eval.judgments.clickmodel.coec; - -import com.google.gson.Gson; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.opensearch.action.search.SearchRequest; -import org.opensearch.action.search.SearchResponse; -import org.opensearch.action.search.SearchScrollRequest; -import org.opensearch.client.Client; -import org.opensearch.client.Requests; -import org.opensearch.common.unit.TimeValue; -import org.opensearch.eval.SearchQualityEvaluationPlugin; -import org.opensearch.eval.judgments.clickmodel.ClickModel; -import org.opensearch.eval.judgments.model.ClickthroughRate; -import org.opensearch.eval.judgments.model.Judgment; -import org.opensearch.eval.judgments.model.ubi.event.UbiEvent; -import org.opensearch.eval.judgments.opensearch.OpenSearchHelper; -import org.opensearch.eval.judgments.queryhash.IncrementalUserQueryHash; -import org.opensearch.eval.utils.MathUtils; -import org.opensearch.index.query.BoolQueryBuilder; -import org.opensearch.index.query.QueryBuilder; -import org.opensearch.index.query.QueryBuilders; -import org.opensearch.index.query.WrapperQueryBuilder; -import org.opensearch.search.Scroll; -import org.opensearch.search.SearchHit; -import org.opensearch.search.aggregations.AggregationBuilders; -import org.opensearch.search.aggregations.BucketOrder; -import org.opensearch.search.aggregations.bucket.terms.Terms; -import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; -import org.opensearch.search.builder.SearchSourceBuilder; - -import java.io.IOException; -import java.security.AccessController; -import java.security.PrivilegedAction; -import java.util.Collection; -import java.util.HashMap; -import java.util.LinkedHashSet; -import java.util.LinkedList; -import java.util.Map; -import java.util.Set; - -public class CoecClickModel extends ClickModel { - - public static final String CLICK_MODEL_NAME = "coec"; - - // OpenSearch indexes for COEC data. - public static final String INDEX_RANK_AGGREGATED_CTR = "rank_aggregated_ctr"; - public static final String INDEX_QUERY_DOC_CTR = "click_through_rates"; - - // UBI event names. - public static final String EVENT_CLICK = "click"; - public static final String EVENT_IMPRESSION = "impression"; - - private final CoecClickModelParameters parameters; - - private final OpenSearchHelper openSearchHelper; - - private final IncrementalUserQueryHash incrementalUserQueryHash = new IncrementalUserQueryHash(); - private final Gson gson = new Gson(); - private final Client client; - - private static final Logger LOGGER = LogManager.getLogger(CoecClickModel.class.getName()); - - public CoecClickModel(final Client client, final CoecClickModelParameters parameters) { - - this.parameters = parameters; - this.openSearchHelper = new OpenSearchHelper(client); - this.client = client; - - } - - @Override - public String calculateJudgments() throws Exception { - - final int maxRank = parameters.getMaxRank(); - - // Calculate and index the rank-aggregated click-through. - LOGGER.info("Beginning calculation of rank-aggregated click-through."); - final Map rankAggregatedClickThrough = getRankAggregatedClickThrough(); - LOGGER.info("Rank-aggregated clickthrough positions: {}", rankAggregatedClickThrough.size()); - showRankAggregatedClickThrough(rankAggregatedClickThrough); - - // Calculate and index the click-through rate for query/doc pairs. - LOGGER.info("Beginning calculation of clickthrough rates."); - final Map> clickthroughRates = getClickthroughRate(); - LOGGER.info("Clickthrough rates for number of queries: {}", clickthroughRates.size()); - showClickthroughRates(clickthroughRates); - - // Generate and index the implicit judgments. - LOGGER.info("Beginning calculation of implicit judgments."); - return calculateCoec(rankAggregatedClickThrough, clickthroughRates); - - } - - public String calculateCoec(final Map rankAggregatedClickThrough, - final Map> clickthroughRates) throws Exception { - - // Calculate the COEC. - // Numerator is the total number of clicks received by a query/result pair. - // Denominator is the expected clicks (EC) that an average result would receive after being impressed i times at rank r, - // and CTR is the average CTR for each position in the results page (up to R) computed over all queries and results. - - // Format: query_id, query, document, judgment - final Collection judgments = new LinkedList<>(); - - LOGGER.info("Count of queries: {}", clickthroughRates.size()); - - for(final String userQuery : clickthroughRates.keySet()) { - - // The clickthrough rates for this one query. - // A ClickthroughRate is a document with counts of impressions and clicks. - final Collection ctrs = clickthroughRates.get(userQuery); - - // Go through each clickthrough rate for this query. - for(final ClickthroughRate ctr : ctrs) { - - double denominatorSum = 0; - - for(int rank = 0; rank < parameters.getMaxRank(); rank++) { - - // The document's mean CTR at the rank. - final double meanCtrAtRank = rankAggregatedClickThrough.getOrDefault(rank, 0.0); - - // The number of times this document was shown as this rank. - final long countOfTimesShownAtRank = openSearchHelper.getCountOfQueriesForUserQueryHavingResultInRankR(userQuery, ctr.getObjectId(), rank); - - denominatorSum += (meanCtrAtRank * countOfTimesShownAtRank); - - } - - // Numerator is sum of clicks at all ranks up to the maxRank. - final int totalNumberClicksForQueryResult = ctr.getClicks(); - - // Divide the numerator by the denominator (value). - final double judgmentValue; - - if(denominatorSum == 0) { - judgmentValue = 0.0; - } else { - judgmentValue = totalNumberClicksForQueryResult / denominatorSum; - } - - // Hash the user query to get a query ID. - final int queryId = incrementalUserQueryHash.getHash(userQuery); - - // Add the judgment to the list. - // TODO: What to do for query ID when the values are per user_query instead? - final Judgment judgment = new Judgment(String.valueOf(queryId), userQuery, ctr.getObjectId(), judgmentValue); - judgments.add(judgment); - - } - - } - - LOGGER.info("Count of user queries: {}", clickthroughRates.size()); - LOGGER.info("Count of judgments: {}", judgments.size()); - - showJudgments(judgments); - - if(!(judgments.isEmpty())) { - return openSearchHelper.indexJudgments(judgments); - } else { - return null; - } - - } - - /** - * Gets the clickthrough rates for each query and its results. - * @return A map of user_query to the clickthrough rate for each query result. - * @throws IOException Thrown when a problem accessing OpenSearch. - */ - private Map> getClickthroughRate() throws Exception { - - // For each query: - // - Get each document returned in that query (in the QueryResponse object). - // - Calculate the click-through rate for the document. (clicks/impressions) - - // TODO: Allow for a time period and for a specific application. - - final String query = "{\n" + - " \"bool\": {\n" + - " \"should\": [\n" + - " {\n" + - " \"term\": {\n" + - " \"action_name\": \"click\"\n" + - " }\n" + - " },\n" + - " {\n" + - " \"term\": {\n" + - " \"action_name\": \"impression\"\n" + - " }\n" + - " }\n" + - " ],\n" + - " \"must\": [\n" + - " {\n" + - " \"range\": {\n" + - " \"event_attributes.position.ordinal\": {\n" + - " \"lte\": " + parameters.getMaxRank() + "\n" + - " }\n" + - " }\n" + - " }\n" + - " ]\n" + - " }\n" + - " }"; - - final BoolQueryBuilder queryBuilder = new BoolQueryBuilder().must(new WrapperQueryBuilder(query)); - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(queryBuilder).size(1000); - final Scroll scroll = new Scroll(TimeValue.timeValueMinutes(10L)); - - final SearchRequest searchRequest = Requests - .searchRequest(SearchQualityEvaluationPlugin.UBI_EVENTS_INDEX_NAME) - .source(searchSourceBuilder) - .scroll(scroll); - - // TODO Don't use .get() - SearchResponse searchResponse = client.search(searchRequest).get(); - - String scrollId = searchResponse.getScrollId(); - SearchHit[] searchHits = searchResponse.getHits().getHits(); - - final Map> queriesToClickthroughRates = new HashMap<>(); - - while (searchHits != null && searchHits.length > 0) { - - for (final SearchHit hit : searchHits) { - - final UbiEvent ubiEvent = AccessController.doPrivileged((PrivilegedAction) () -> gson.fromJson(hit.getSourceAsString(), UbiEvent.class)); - - // We need to the hash of the query_id because two users can both search - // for "computer" and those searches will have different query IDs, but they are the same search. - final String userQuery = openSearchHelper.getUserQuery(ubiEvent.getQueryId()); - - // userQuery will be null if there is not a query for this event in ubi_queries. - if(userQuery != null) { - - // Get the clicks for this queryId from the map, or an empty list if this is a new query. - final Set clickthroughRates = queriesToClickthroughRates.getOrDefault(userQuery, new LinkedHashSet<>()); - - // Get the ClickthroughRate object for the object that was interacted with. - final ClickthroughRate clickthroughRate = clickthroughRates.stream().filter(p -> p.getObjectId().equals(ubiEvent.getEventAttributes().getObject().getObjectId())).findFirst().orElse(new ClickthroughRate(ubiEvent.getEventAttributes().getObject().getObjectId())); - - if (EVENT_CLICK.equalsIgnoreCase(ubiEvent.getActionName())) { - //LOGGER.info("Logging a CLICK on " + ubiEvent.getEventAttributes().getObject().getObjectId()); - clickthroughRate.logClick(); - } else if (EVENT_IMPRESSION.equalsIgnoreCase(ubiEvent.getActionName())) { - //LOGGER.info("Logging an IMPRESSION on " + ubiEvent.getEventAttributes().getObject().getObjectId()); - clickthroughRate.logImpression(); - } else { - LOGGER.warn("Invalid event action name: {}", ubiEvent.getActionName()); - } - - clickthroughRates.add(clickthroughRate); - queriesToClickthroughRates.put(userQuery, clickthroughRates); - // LOGGER.debug("clickthroughRate = {}", queriesToClickthroughRates.size()); - - } - - } - - final SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId); - scrollRequest.scroll(scroll); - - //LOGGER.info("Doing scroll to next results"); - // TODO: Getting a warning in the log that "QueryGroup _id can't be null, It should be set before accessing it. This is abnormal behaviour" - // I don't remember seeing this prior to 2.18.0 but it's possible I just didn't see it. - // https://github.com/opensearch-project/OpenSearch/blob/f105e4eb2ede1556b5dd3c743bea1ab9686ebccf/server/src/main/java/org/opensearch/wlm/QueryGroupTask.java#L73 - searchResponse = client.searchScroll(scrollRequest).get(); - //LOGGER.info("Scroll complete."); - - scrollId = searchResponse.getScrollId(); - - searchHits = searchResponse.getHits().getHits(); - - } - - openSearchHelper.indexClickthroughRates(queriesToClickthroughRates); - - return queriesToClickthroughRates; - - } - - /** - * Calculate the rank-aggregated click through from the UBI events. - * @return A map of positions to clickthrough rates. - * @throws IOException Thrown when a problem accessing OpenSearch. - */ - public Map getRankAggregatedClickThrough() throws Exception { - - final Map rankAggregatedClickThrough = new HashMap<>(); - - // TODO: Allow for a time period and for a specific application. - - final QueryBuilder findRangeNumber = QueryBuilders.rangeQuery("event_attributes.position.ordinal").lte(parameters.getMaxRank()); - final QueryBuilder queryBuilder = new BoolQueryBuilder().must(findRangeNumber); - - // Order the aggregations by key and not by value. - final BucketOrder bucketOrder = BucketOrder.key(true); - - final TermsAggregationBuilder positionsAggregator = AggregationBuilders.terms("By_Position").field("event_attributes.position.ordinal").order(bucketOrder).size(parameters.getMaxRank()); - final TermsAggregationBuilder actionNameAggregation = AggregationBuilders.terms("By_Action").field("action_name").subAggregation(positionsAggregator).order(bucketOrder).size(parameters.getMaxRank()); - - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder() - .query(queryBuilder) - .aggregation(actionNameAggregation) - .from(0) - .size(0); - - final SearchRequest searchRequest = new SearchRequest(SearchQualityEvaluationPlugin.UBI_EVENTS_INDEX_NAME).source(searchSourceBuilder); - final SearchResponse searchResponse = client.search(searchRequest).get(); - - final Map clickCounts = new HashMap<>(); - final Map impressionCounts = new HashMap<>(); - - final Terms actionTerms = searchResponse.getAggregations().get("By_Action"); - final Collection actionBuckets = actionTerms.getBuckets(); - - LOGGER.debug("Aggregation query: {}", searchSourceBuilder.toString()); - - for(final Terms.Bucket actionBucket : actionBuckets) { - - // Handle the "impression" bucket. - if(EVENT_IMPRESSION.equalsIgnoreCase(actionBucket.getKey().toString())) { - - final Terms positionTerms = actionBucket.getAggregations().get("By_Position"); - final Collection positionBuckets = positionTerms.getBuckets(); - - for(final Terms.Bucket positionBucket : positionBuckets) { - LOGGER.debug("Inserting impression event from position {} with click count {}", positionBucket.getKey(), (double) positionBucket.getDocCount()); - impressionCounts.put(Integer.valueOf(positionBucket.getKey().toString()), (double) positionBucket.getDocCount()); - } - - } - - // Handle the "click" bucket. - if(EVENT_CLICK.equalsIgnoreCase(actionBucket.getKey().toString())) { - - final Terms positionTerms = actionBucket.getAggregations().get("By_Position"); - final Collection positionBuckets = positionTerms.getBuckets(); - - for(final Terms.Bucket positionBucket : positionBuckets) { - LOGGER.debug("Inserting client event from position {} with click count {}", positionBucket.getKey(), (double) positionBucket.getDocCount()); - clickCounts.put(Integer.valueOf(positionBucket.getKey().toString()), (double) positionBucket.getDocCount()); - } - - } - - } - - for(int rank = 0; rank < parameters.getMaxRank(); rank++) { - - if(impressionCounts.containsKey(rank)) { - - if(clickCounts.containsKey(rank)) { - - // Calculate the CTR by dividing the number of clicks by the number of impressions. - LOGGER.info("Position = {}, Impression Counts = {}, Click Count = {}", rank, impressionCounts.get(rank), clickCounts.get(rank)); - rankAggregatedClickThrough.put(rank, clickCounts.get(rank) / impressionCounts.get(rank)); - - } else { - - // This document has impressions but no clicks, so it's CTR is zero. - LOGGER.info("Position = {}, Impression Counts = {}, Impressions but no clicks so CTR is 0", rank, clickCounts.get(rank)); - rankAggregatedClickThrough.put(rank, 0.0); - - } - - } else { - - // No impressions so the clickthrough rate is 0. - LOGGER.info("No impressions for rank {}, so using CTR of 0", rank); - rankAggregatedClickThrough.put(rank, (double) 0); - - } - - } - - openSearchHelper.indexRankAggregatedClickthrough(rankAggregatedClickThrough); - - return rankAggregatedClickThrough; - - } - - private void showJudgments(final Collection judgments) { - - for(final Judgment judgment : judgments) { - LOGGER.info(judgment.toJudgmentString()); - } - - } - - private void showClickthroughRates(final Map> clickthroughRates) { - - for(final String userQuery : clickthroughRates.keySet()) { - - LOGGER.debug("user_query: {}", userQuery); - - for(final ClickthroughRate clickthroughRate : clickthroughRates.get(userQuery)) { - LOGGER.debug("\t - {}", clickthroughRate.toString()); - } - - } - - } - - private void showRankAggregatedClickThrough(final Map rankAggregatedClickThrough) { - - for(final int position : rankAggregatedClickThrough.keySet()) { - LOGGER.info("Position: {}, # ctr: {}", position, MathUtils.round(rankAggregatedClickThrough.get(position), parameters.getRoundingDigits())); - } - - } - -} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/UbiEvent.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/UbiEvent.java deleted file mode 100644 index 61c0f8b..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/UbiEvent.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.eval.judgments.model.ubi.event; - -import com.google.gson.annotations.SerializedName; - -/** - * Creates a representation of a UBI event. - */ -public class UbiEvent { - - @SerializedName("action_name") - private String actionName; - - @SerializedName("client_id") - private String clientId; - - @SerializedName("query_id") - private String queryId; - - @SerializedName("event_attributes") - private EventAttributes eventAttributes; - - /** - * Creates a new representation of an UBI event. - */ - public UbiEvent() { - - } - - @Override - public String toString() { - return actionName + ", " + clientId + ", " + queryId + ", " + eventAttributes.getObject().toString() + ", " + eventAttributes.getPosition().getOrdinal(); - } - - /** - * Gets the name of the action. - * @return The name of the action. - */ - public String getActionName() { - return actionName; - } - - /** - * Gets the client ID. - * @return The client ID. - */ - public String getClientId() { - return clientId; - } - - /** - * Gets the query ID. - * @return The query ID. - */ - public String getQueryId() { - return queryId; - } - - /** - * Gets the event attributes. - * @return The {@link EventAttributes}. - */ - public EventAttributes getEventAttributes() { - return eventAttributes; - } - - /** - * Sets the event attributes. - * @param eventAttributes The {@link EventAttributes}. - */ - public void setEventAttributes(EventAttributes eventAttributes) { - this.eventAttributes = eventAttributes; - } - -} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/query/QueryResponse.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/query/QueryResponse.java deleted file mode 100644 index 5d45ee0..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/query/QueryResponse.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.eval.judgments.model.ubi.query; - -import java.util.List; - -/** - * A query response for a {@link UbiQuery query}. - */ -public class QueryResponse { - - private final String queryId; - private final String queryResponseId; - private final List queryResponseHitIds; - - /** - * Creates a query response. - * @param queryId The ID of the query. - * @param queryResponseId The ID of the query response. - * @param queryResponseHitIds A list of IDs for the hits in the query. - */ - public QueryResponse(final String queryId, final String queryResponseId, final List queryResponseHitIds) { - this.queryId = queryId; - this.queryResponseId = queryResponseId; - this.queryResponseHitIds = queryResponseHitIds; - } - - /** - * Gets the query ID. - * @return The query ID. - */ - public String getQueryId() { - return queryId; - } - - /** - * Gets the query response ID. - * @return The query response ID. - */ - public String getQueryResponseId() { - return queryResponseId; - } - - /** - * Gets the list of query response hit IDs. - * @return A list of query response hit IDs. - */ - public List getQueryResponseHitIds() { - return queryResponseHitIds; - } - -} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java deleted file mode 100644 index 3c391b3..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/opensearch/OpenSearchHelper.java +++ /dev/null @@ -1,342 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.eval.judgments.opensearch; - -import com.google.gson.Gson; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.opensearch.action.bulk.BulkRequest; -import org.opensearch.action.bulk.BulkResponse; -import org.opensearch.action.index.IndexRequest; -import org.opensearch.action.search.SearchRequest; -import org.opensearch.action.search.SearchResponse; -import org.opensearch.client.Client; -import org.opensearch.core.action.ActionListener; -import org.opensearch.eval.judgments.model.ClickthroughRate; -import org.opensearch.eval.judgments.model.Judgment; -import org.opensearch.eval.judgments.model.ubi.query.UbiQuery; -import org.opensearch.eval.utils.TimeUtils; -import org.opensearch.index.query.QueryBuilders; -import org.opensearch.index.query.WrapperQueryBuilder; -import org.opensearch.search.SearchHit; -import org.opensearch.search.builder.SearchSourceBuilder; - -import java.io.IOException; -import java.security.AccessController; -import java.security.PrivilegedAction; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import java.util.UUID; - -import static org.opensearch.eval.SearchQualityEvaluationPlugin.JUDGMENTS_INDEX_NAME; -import static org.opensearch.eval.SearchQualityEvaluationPlugin.UBI_EVENTS_INDEX_NAME; -import static org.opensearch.eval.SearchQualityEvaluationPlugin.UBI_QUERIES_INDEX_NAME; -import static org.opensearch.eval.judgments.clickmodel.coec.CoecClickModel.INDEX_QUERY_DOC_CTR; -import static org.opensearch.eval.judgments.clickmodel.coec.CoecClickModel.INDEX_RANK_AGGREGATED_CTR; - -/** - * Functionality for interacting with OpenSearch. - * TODO: Move these functions out of this class. - */ -public class OpenSearchHelper { - - private static final Logger LOGGER = LogManager.getLogger(OpenSearchHelper.class.getName()); - - private final Client client; - private final Gson gson = new Gson(); - - // Used to cache the query ID->user_query to avoid unnecessary lookups to OpenSearch. - private static final Map userQueryCache = new HashMap<>(); - - public OpenSearchHelper(final Client client) { - this.client = client; - } - - /** - * Gets the user query for a given query ID. - * @param queryId The query ID. - * @return The user query. - * @throws IOException Thrown when there is a problem accessing OpenSearch. - */ - public String getUserQuery(final String queryId) throws Exception { - - // If it's in the cache just get it and return it. - if(userQueryCache.containsKey(queryId)) { - return userQueryCache.get(queryId); - } - - // Cache it and return it. - final UbiQuery ubiQuery = getQueryFromQueryId(queryId); - - // ubiQuery will be null if the query does not exist. - if(ubiQuery != null) { - - userQueryCache.put(queryId, ubiQuery.getUserQuery()); - return ubiQuery.getUserQuery(); - - } else { - - return null; - - } - - } - - /** - * Gets the query object for a given query ID. - * @param queryId The query ID. - * @return A {@link UbiQuery} object for the given query ID. - * @throws Exception Thrown if the query cannot be retrieved. - */ - public UbiQuery getQueryFromQueryId(final String queryId) throws Exception { - - LOGGER.debug("Getting query from query ID {}", queryId); - - final String query = "{\"match\": {\"query_id\": \"" + queryId + "\" }}"; - final WrapperQueryBuilder qb = QueryBuilders.wrapperQuery(query); - - // The query_id should be unique anyway, but we are limiting it to a single result anyway. - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - searchSourceBuilder.query(qb); - searchSourceBuilder.from(0); - searchSourceBuilder.size(1); - - final String[] indexes = {UBI_QUERIES_INDEX_NAME}; - - final SearchRequest searchRequest = new SearchRequest(indexes, searchSourceBuilder); - final SearchResponse response = client.search(searchRequest).get(); - - // If this does not return a query then we cannot calculate the judgments. Each even should have a query associated with it. - if(response.getHits().getHits() != null & response.getHits().getHits().length > 0) { - - final SearchHit hit = response.getHits().getHits()[0]; - return AccessController.doPrivileged((PrivilegedAction) () -> gson.fromJson(hit.getSourceAsString(), UbiQuery.class)); - - } else { - - LOGGER.warn("No query exists for query ID {} to calculate judgments.", queryId); - return null; - - } - - } - - private Collection getQueryIdsHavingUserQuery(final String userQuery) throws Exception { - - final String query = "{\"match\": {\"user_query\": \"" + userQuery + "\" }}"; - final WrapperQueryBuilder qb = QueryBuilders.wrapperQuery(query); - - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - searchSourceBuilder.query(qb); - - final String[] indexes = {UBI_QUERIES_INDEX_NAME}; - - final SearchRequest searchRequest = new SearchRequest(indexes, searchSourceBuilder); - final SearchResponse response = client.search(searchRequest).get(); - - final Collection queryIds = new ArrayList<>(); - - for(final SearchHit hit : response.getHits().getHits()) { - final String queryId = hit.getSourceAsMap().get("query_id").toString(); - queryIds.add(queryId); - } - - return queryIds; - - } - - public long getCountOfQueriesForUserQueryHavingResultInRankR(final String userQuery, final String objectId, final int rank) throws Exception { - - long countOfTimesShownAtRank = 0; - - // Get all query IDs matching this user query. - final Collection queryIds = getQueryIdsHavingUserQuery(userQuery); - - // For each query ID, get the events with action_name = "impression" having a match on objectId and rank (position). - for(final String queryId : queryIds) { - - final String query = "{\n" + - " \"bool\": {\n" + - " \"must\": [\n" + - " {\n" + - " \"term\": {\n" + - " \"query_id\": \"" + queryId + "\"\n" + - " }\n" + - " },\n" + - " {\n" + - " \"term\": {\n" + - " \"action_name\": \"impression\"\n" + - " }\n" + - " },\n" + - " {\n" + - " \"term\": {\n" + - " \"event_attributes.position.ordinal\": \"" + rank + "\"\n" + - " }\n" + - " },\n" + - " {\n" + - " \"term\": {\n" + - " \"event_attributes.object.object_id\": \"" + objectId + "\"\n" + - " }\n" + - " }\n" + - " ]\n" + - " }\n" + - " }"; - - final WrapperQueryBuilder qb = QueryBuilders.wrapperQuery(query); - - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - searchSourceBuilder.query(qb); - searchSourceBuilder.trackTotalHits(true); - searchSourceBuilder.size(0); - - final String[] indexes = {UBI_EVENTS_INDEX_NAME}; - - final SearchRequest searchRequest = new SearchRequest(indexes, searchSourceBuilder); - final SearchResponse response = client.search(searchRequest).get(); - - // Won't be null as long as trackTotalHits is true. - if(response.getHits().getTotalHits() != null) { - countOfTimesShownAtRank += response.getHits().getTotalHits().value; - } - - } - - LOGGER.debug("Count of {} having {} at rank {} = {}", userQuery, objectId, rank, countOfTimesShownAtRank); - - if(countOfTimesShownAtRank > 0) { - LOGGER.debug("Count of {} having {} at rank {} = {}", userQuery, objectId, rank, countOfTimesShownAtRank); - } - - return countOfTimesShownAtRank; - - } - - /** - * Index the rank-aggregated clickthrough values. - * @param rankAggregatedClickThrough A map of position to clickthrough values. - * @throws IOException Thrown when there is a problem accessing OpenSearch. - */ - public void indexRankAggregatedClickthrough(final Map rankAggregatedClickThrough) throws Exception { - - if(!rankAggregatedClickThrough.isEmpty()) { - - // TODO: Split this into multiple bulk insert requests. - - final BulkRequest request = new BulkRequest(); - - for (final int position : rankAggregatedClickThrough.keySet()) { - - final Map jsonMap = new HashMap<>(); - jsonMap.put("position", position); - jsonMap.put("ctr", rankAggregatedClickThrough.get(position)); - - final IndexRequest indexRequest = new IndexRequest(INDEX_RANK_AGGREGATED_CTR).id(UUID.randomUUID().toString()).source(jsonMap); - - request.add(indexRequest); - - } - - client.bulk(request).get(); - - } - - } - - /** - * Index the clickthrough rates. - * @param clickthroughRates A map of query IDs to a collection of {@link ClickthroughRate} objects. - * @throws IOException Thrown when there is a problem accessing OpenSearch. - */ - public void indexClickthroughRates(final Map> clickthroughRates) throws Exception { - - if(!clickthroughRates.isEmpty()) { - - final BulkRequest request = new BulkRequest(); - - for(final String userQuery : clickthroughRates.keySet()) { - - for(final ClickthroughRate clickthroughRate : clickthroughRates.get(userQuery)) { - - final Map jsonMap = new HashMap<>(); - jsonMap.put("user_query", userQuery); - jsonMap.put("clicks", clickthroughRate.getClicks()); - jsonMap.put("events", clickthroughRate.getImpressions()); - jsonMap.put("ctr", clickthroughRate.getClickthroughRate()); - jsonMap.put("object_id", clickthroughRate.getObjectId()); - - final IndexRequest indexRequest = new IndexRequest(INDEX_QUERY_DOC_CTR) - .id(UUID.randomUUID().toString()) - .source(jsonMap); - - request.add(indexRequest); - - } - - } - - client.bulk(request, new ActionListener<>() { - - @Override - public void onResponse(BulkResponse bulkItemResponses) { - if(bulkItemResponses.hasFailures()) { - LOGGER.error("Clickthrough rates were not all successfully indexed: {}", bulkItemResponses.buildFailureMessage()); - } else { - LOGGER.debug("Clickthrough rates has been successfully indexed."); - } - } - - @Override - public void onFailure(Exception ex) { - LOGGER.error("Indexing the clickthrough rates failed.", ex); - } - - }); - - } - - } - - /** - * Index the judgments. - * @param judgments A collection of {@link Judgment judgments}. - * @throws IOException Thrown when there is a problem accessing OpenSearch. - * @return The ID of the indexed judgments. - */ - public String indexJudgments(final Collection judgments) throws Exception { - - final String judgmentsId = UUID.randomUUID().toString(); - final String timestamp = TimeUtils.getTimestamp(); - - final BulkRequest bulkRequest = new BulkRequest(); - - for(final Judgment judgment : judgments) { - - final Map j = judgment.getJudgmentAsMap(); - j.put("judgments_id", judgmentsId); - j.put("timestamp", timestamp); - - final IndexRequest indexRequest = new IndexRequest(JUDGMENTS_INDEX_NAME) - .id(UUID.randomUUID().toString()) - .source(j); - - bulkRequest.add(indexRequest); - - } - - // TODO: Don't use .get() - client.bulk(bulkRequest).get(); - - return judgmentsId; - - } - -} \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java deleted file mode 100644 index 7ca0ad6..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.eval.runners; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.opensearch.action.search.SearchRequest; -import org.opensearch.action.search.SearchResponse; -import org.opensearch.client.Client; -import org.opensearch.eval.SearchQualityEvaluationPlugin; -import org.opensearch.index.query.BoolQueryBuilder; -import org.opensearch.index.query.QueryBuilders; -import org.opensearch.search.builder.SearchSourceBuilder; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Map; - -/** - * Base class for query set runners. Classes that extend this class - * should be specific to a search engine. See the {@link OpenSearchQuerySetRunner} for an example. - */ -public abstract class AbstractQuerySetRunner { - - private static final Logger LOGGER = LogManager.getLogger(AbstractQuerySetRunner.class); - - protected final Client client; - - public AbstractQuerySetRunner(final Client client) { - this.client = client; - } - - /** - * Runs the query set. - * @param querySetId The ID of the query set to run. - * @param judgmentsId The ID of the judgments set to use for search metric calculation. - * @param index The name of the index to run the query sets against. - * @param searchPipeline The name of the search pipeline to use, or null to not use a search pipeline. - * @param idField The field in the index that is used to uniquely identify a document. - * @param query The query that will be used to run the query set. - * @param k The k used for metrics calculation, i.e. DCG@k. - * @param threshold The cutoff for binary judgments. A judgment score greater than or equal - * to this value will be assigned a binary judgment value of 1. A judgment score - * less than this value will be assigned a binary judgment value of 0. - * @return The query set {@link QuerySetRunResult results} and calculated metrics. - */ - abstract QuerySetRunResult run(String querySetId, final String judgmentsId, final String index, final String searchPipeline, - final String idField, final String query, final int k, - final double threshold) throws Exception; - - /** - * Saves the query set results to a persistent store, which may be the search engine itself. - * @param result The {@link QuerySetRunResult results}. - */ - abstract void save(QuerySetRunResult result) throws Exception; - - /** - * Gets a query set from the index. - * @param querySetId The ID of the query set to get. - * @return The query set as a collection of maps of query to frequency - * @throws Exception Thrown if the query set cannot be retrieved. - */ - public final Collection> getQuerySet(final String querySetId) throws Exception { - - // Get the query set. - final SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); - sourceBuilder.query(QueryBuilders.matchQuery("_id", querySetId)); - - // Will be at most one match. - sourceBuilder.from(0); - sourceBuilder.size(1); - - final SearchRequest searchRequest = new SearchRequest(SearchQualityEvaluationPlugin.QUERY_SETS_INDEX_NAME).source(sourceBuilder); - - // TODO: Don't use .get() - final SearchResponse searchResponse = client.search(searchRequest).get(); - - if(searchResponse.getHits().getHits().length > 0) { - - // The queries from the query set that will be run. - return (Collection>) searchResponse.getHits().getAt(0).getSourceAsMap().get("queries"); - - } else { - - LOGGER.error("Unable to get query set with ID {}", querySetId); - - // The query set was not found. - throw new RuntimeException("The query set with ID " + querySetId + " was not found."); - - } - - } - - /** - * Get a judgment from the index. - * @param judgmentsId The ID of the judgments to find. - * @param query The user query. - * @param documentId The document ID. - * @return The value of the judgment, or NaN if the judgment cannot be found. - */ - public Double getJudgmentValue(final String judgmentsId, final String query, final String documentId) throws Exception { - - // Find a judgment that matches the judgments_id, query_id, and document_id fields in the index. - - final BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); - boolQueryBuilder.must(QueryBuilders.termQuery("judgments_id", judgmentsId)); - boolQueryBuilder.must(QueryBuilders.termQuery("query", query)); - boolQueryBuilder.must(QueryBuilders.termQuery("document_id", documentId)); - - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - searchSourceBuilder.query(boolQueryBuilder); - - // Will be a max of 1 result since we are getting the judgments by ID. - searchSourceBuilder.from(0); - searchSourceBuilder.size(1); - - // Only include the judgment field in the response. - final String[] includeFields = new String[] {"judgment"}; - final String[] excludeFields = new String[] {}; - searchSourceBuilder.fetchSource(includeFields, excludeFields); - - final SearchRequest searchRequest = new SearchRequest(SearchQualityEvaluationPlugin.JUDGMENTS_INDEX_NAME).source(searchSourceBuilder); - - Double judgment = Double.NaN; - - final SearchResponse searchResponse = client.search(searchRequest).get(); - - if (searchResponse.getHits().getHits().length > 0) { - - final Map j = searchResponse.getHits().getAt(0).getSourceAsMap(); - - // LOGGER.debug("Judgment contains a value: {}", j.get("judgment")); - - // TODO: Why does this not exist in some cases? - if(j.containsKey("judgment")) { - judgment = (Double) j.get("judgment"); - } - - } else { - - // No judgment for this query/doc pair exists. - judgment = Double.NaN; - - } - - return judgment; - - } - - /** - * Gets the judgments for a query / document pairs. - * @param judgmentsId The judgments collection for which the judgment to retrieve belongs. - * @param query The user query. - * @param orderedDocumentIds A list of document IDs returned for the user query. - * @param k The k used for metrics calculation, i.e. DCG@k. - * @return An ordered list of relevance scores for the query / document pairs. - * @throws Exception Thrown if a judgment cannot be retrieved. - */ - protected RelevanceScores getRelevanceScores(final String judgmentsId, final String query, final List orderedDocumentIds, final int k) throws Exception { - - // Ordered list of scores. - final List scores = new ArrayList<>(); - - // Count the number of documents without judgments. - int documentsWithoutJudgmentsCount = 0; - - // For each document (up to k), get the judgment for the document. - for (int i = 0; i < k && i < orderedDocumentIds.size(); i++) { - - final String documentId = orderedDocumentIds.get(i); - - // Find the judgment value for this combination of query and documentId from the index. - final Double judgmentValue = getJudgmentValue(judgmentsId, query, documentId); - - // If a judgment for this query/doc pair is not found, Double.NaN will be returned. - if(!Double.isNaN(judgmentValue)) { - LOGGER.info("Score found for document ID {} with judgments {} and query {} = {}", documentId, judgmentsId, query, judgmentValue); - scores.add(judgmentValue); - } else { - //LOGGER.info("No score found for document ID {} with judgments {} and query {}", documentId, judgmentsId, query); - documentsWithoutJudgmentsCount++; - } - - } - - double frogs = ((double) documentsWithoutJudgmentsCount) / orderedDocumentIds.size(); - - if(Double.isNaN(frogs)) { - frogs = 1.0; - } - - // Multiply by 100 to be a percentage. - frogs *= 100; - - LOGGER.info("frogs for query {} = {} ------- {} / {}", query, frogs, documentsWithoutJudgmentsCount, orderedDocumentIds.size()); - - return new RelevanceScores(scores, frogs); - - } - -} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/OpenSearchQuerySetRunner.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/OpenSearchQuerySetRunner.java deleted file mode 100644 index a1f0c4f..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/OpenSearchQuerySetRunner.java +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.eval.runners; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.opensearch.action.admin.indices.create.CreateIndexRequest; -import org.opensearch.action.admin.indices.create.CreateIndexResponse; -import org.opensearch.action.admin.indices.exists.indices.IndicesExistsRequest; -import org.opensearch.action.admin.indices.exists.indices.IndicesExistsResponse; -import org.opensearch.action.bulk.BulkRequest; -import org.opensearch.action.bulk.BulkResponse; -import org.opensearch.action.index.IndexRequest; -import org.opensearch.action.search.SearchRequest; -import org.opensearch.action.search.SearchResponse; -import org.opensearch.client.Client; -import org.opensearch.core.action.ActionListener; -import org.opensearch.eval.SearchQualityEvaluationPlugin; -import org.opensearch.eval.metrics.DcgSearchMetric; -import org.opensearch.eval.metrics.NdcgSearchMetric; -import org.opensearch.eval.metrics.PrecisionSearchMetric; -import org.opensearch.eval.metrics.SearchMetric; -import org.opensearch.eval.utils.TimeUtils; -import org.opensearch.index.query.QueryBuilders; -import org.opensearch.search.SearchHit; -import org.opensearch.search.builder.SearchSourceBuilder; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.UUID; - -import static org.opensearch.eval.SearchQualityEvaluationRestHandler.QUERY_PLACEHOLDER; - -/** - * A {@link AbstractQuerySetRunner} for Amazon OpenSearch. - */ -public class OpenSearchQuerySetRunner extends AbstractQuerySetRunner { - - private static final Logger LOGGER = LogManager.getLogger(OpenSearchQuerySetRunner.class); - - /** - * Creates a new query set runner - * - * @param client An OpenSearch {@link Client}. - */ - public OpenSearchQuerySetRunner(final Client client) { - super(client); - } - - @Override - public QuerySetRunResult run(final String querySetId, final String judgmentsId, final String index, - final String searchPipeline, final String idField, final String query, - final int k, final double threshold) throws Exception { - - final Collection> querySet = getQuerySet(querySetId); - LOGGER.info("Found {} queries in query set {}", querySet.size(), querySetId); - - try { - - // The results of each query. - final List queryResults = new ArrayList<>(); - - for (Map queryMap : querySet) { - - // Loop over each query in the map and run each one. - for (final String userQuery : queryMap.keySet()) { - - // Replace the query placeholder with the user query. - final String parsedQuery = query.replace(QUERY_PLACEHOLDER, userQuery); - - // Build the query from the one that was passed in. - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - - searchSourceBuilder.query(QueryBuilders.wrapperQuery(parsedQuery)); - searchSourceBuilder.from(0); - searchSourceBuilder.size(k); - - final String[] includeFields = new String[]{idField}; - final String[] excludeFields = new String[]{}; - searchSourceBuilder.fetchSource(includeFields, excludeFields); - - // LOGGER.info(searchSourceBuilder.toString()); - - final SearchRequest searchRequest = new SearchRequest(index); - searchRequest.source(searchSourceBuilder); - - if(searchPipeline != null) { - searchSourceBuilder.pipeline(searchPipeline); - searchRequest.pipeline(searchPipeline); - } - - // This is to keep OpenSearch from rejecting queries. - // TODO: Look at using the Workload Management in 2.18.0. - Thread.sleep(50); - - client.search(searchRequest, new ActionListener<>() { - - @Override - public void onResponse(final SearchResponse searchResponse) { - - final List orderedDocumentIds = new ArrayList<>(); - - for (final SearchHit hit : searchResponse.getHits().getHits()) { - - final String documentId; - - if("_id".equals(idField)) { - documentId = hit.getId(); - } else { - // TODO: Need to check this field actually exists. - documentId = hit.getSourceAsMap().get(idField).toString(); - } - - orderedDocumentIds.add(documentId); - - } - - try { - - final RelevanceScores relevanceScores = getRelevanceScores(judgmentsId, userQuery, orderedDocumentIds, k); - - // Calculate the metrics for this query. - final SearchMetric dcgSearchMetric = new DcgSearchMetric(k, relevanceScores.getRelevanceScores()); - final SearchMetric ndcgSearchmetric = new NdcgSearchMetric(k, relevanceScores.getRelevanceScores()); - final SearchMetric precisionSearchMetric = new PrecisionSearchMetric(k, threshold, relevanceScores.getRelevanceScores()); - - final Collection searchMetrics = List.of(dcgSearchMetric, ndcgSearchmetric, precisionSearchMetric); - - queryResults.add(new QueryResult(userQuery, orderedDocumentIds, k, searchMetrics, relevanceScores.getFrogs())); - - } catch (Exception ex) { - LOGGER.error("Unable to get relevance scores for judgments {} and user query {}.", judgmentsId, userQuery, ex); - } - - } - - @Override - public void onFailure(Exception ex) { - LOGGER.error("Unable to search using query: {}", searchSourceBuilder.toString(), ex); - } - }); - - } - - } - - // Calculate the search metrics for the entire query set given the individual query set metrics. - // Sum up the metrics for each query per metric type. - final int querySetSize = queryResults.size(); - final Map sumOfMetrics = new HashMap<>(); - for(final QueryResult queryResult : queryResults) { - for(final SearchMetric searchMetric : queryResult.getSearchMetrics()) { - //LOGGER.info("Summing: {} - {}", searchMetric.getName(), searchMetric.getValue()); - sumOfMetrics.merge(searchMetric.getName(), searchMetric.getValue(), Double::sum); - } - } - - // Now divide by the number of queries. - final Map querySetMetrics = new HashMap<>(); - for(final String metric : sumOfMetrics.keySet()) { - //LOGGER.info("Dividing by the query set size: {} / {}", sumOfMetrics.get(metric), querySetSize); - querySetMetrics.put(metric, sumOfMetrics.get(metric) / querySetSize); - } - - final String querySetRunId = UUID.randomUUID().toString(); - final QuerySetRunResult querySetRunResult = new QuerySetRunResult(querySetRunId, querySetId, queryResults, querySetMetrics); - - LOGGER.info("Query set run complete: {}", querySetRunId); - - return querySetRunResult; - - } catch (Exception ex) { - throw new RuntimeException("Unable to run query set.", ex); - } - - } - - @Override - public void save(final QuerySetRunResult result) throws Exception { - - // Now, index the metrics as expected by the dashboards. - - // See https://github.com/o19s/opensearch-search-quality-evaluation/blob/main/opensearch-dashboard-prototyping/METRICS_SCHEMA.md - // See https://github.com/o19s/opensearch-search-quality-evaluation/blob/main/opensearch-dashboard-prototyping/sample_data.ndjson - - final IndicesExistsRequest indicesExistsRequest = new IndicesExistsRequest(SearchQualityEvaluationPlugin.DASHBOARD_METRICS_INDEX_NAME); - - client.admin().indices().exists(indicesExistsRequest, new ActionListener<>() { - - @Override - public void onResponse(IndicesExistsResponse indicesExistsResponse) { - - if(!indicesExistsResponse.isExists()) { - - // Create the index. - // TODO: Read this mapping from a resource file instead. - final String mapping = "{\n" + - " \"properties\": {\n" + - " \"datetime\": { \"type\": \"date\", \"format\": \"strict_date_time\" },\n" + - " \"search_config\": { \"type\": \"keyword\" },\n" + - " \"query_set_id\": { \"type\": \"keyword\" },\n" + - " \"query\": { \"type\": \"keyword\" },\n" + - " \"metric\": { \"type\": \"keyword\" },\n" + - " \"value\": { \"type\": \"double\" },\n" + - " \"application\": { \"type\": \"keyword\" },\n" + - " \"evaluation_id\": { \"type\": \"keyword\" },\n" + - " \"frogs_percent\": { \"type\": \"double\" }\n" + - " }\n" + - " }"; - - // Create the judgments index. - final CreateIndexRequest createIndexRequest = new CreateIndexRequest(SearchQualityEvaluationPlugin.DASHBOARD_METRICS_INDEX_NAME).mapping(mapping); - - client.admin().indices().create(createIndexRequest, new ActionListener<>() { - - @Override - public void onResponse(CreateIndexResponse createIndexResponse) { - LOGGER.info("{} index created.", SearchQualityEvaluationPlugin.DASHBOARD_METRICS_INDEX_NAME); - } - - @Override - public void onFailure(Exception ex) { - LOGGER.error("Unable to create the {} index.", SearchQualityEvaluationPlugin.DASHBOARD_METRICS_INDEX_NAME, ex); - } - - }); - - } - - } - - @Override - public void onFailure(Exception ex) { - LOGGER.error("Unable to determine if {} index exists.", SearchQualityEvaluationPlugin.DASHBOARD_METRICS_INDEX_NAME, ex); - } - - }); - - final BulkRequest bulkRequest = new BulkRequest(); - final String timestamp = TimeUtils.getTimestamp(); - - for(final QueryResult queryResult : result.getQueryResults()) { - - for(final SearchMetric searchMetric : queryResult.getSearchMetrics()) { - - // TODO: Make sure all of these items have values. - final Map metrics = new HashMap<>(); - metrics.put("datetime", timestamp); - metrics.put("search_config", "research_1"); - metrics.put("query_set_id", result.getQuerySetId()); - metrics.put("query", queryResult.getQuery()); - metrics.put("metric", searchMetric.getName()); - metrics.put("value", searchMetric.getValue()); - metrics.put("application", "sample_data"); - metrics.put("evaluation_id", result.getRunId()); - metrics.put("frogs_percent", queryResult.getFrogs()); - - // TODO: This is using the index name from the sample data. - bulkRequest.add(new IndexRequest("sqe_metrics_sample_data").source(metrics)); - - } - - } - - client.bulk(bulkRequest, new ActionListener<>() { - - @Override - public void onResponse(BulkResponse bulkItemResponses) { - LOGGER.info("Successfully indexed {} metrics.", bulkItemResponses.getItems().length); - } - - @Override - public void onFailure(Exception ex) { - LOGGER.error("Unable to bulk index metrics.", ex); - } - - }); - - } - -} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySampler.java b/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySampler.java deleted file mode 100644 index 263d70a..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySampler.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.eval.samplers; - -import org.opensearch.action.search.SearchRequest; -import org.opensearch.action.search.SearchResponse; -import org.opensearch.client.node.NodeClient; -import org.opensearch.eval.SearchQualityEvaluationPlugin; -import org.opensearch.index.query.QueryBuilders; -import org.opensearch.search.SearchHit; -import org.opensearch.search.builder.SearchSourceBuilder; - -import java.util.HashMap; -import java.util.Map; - -/** - * An implementation of {@link AbstractQuerySampler} that uses all UBI queries without any sampling. - */ -public class AllQueriesQuerySampler extends AbstractQuerySampler { - - public static final String NAME = "none"; - - private final NodeClient client; - private final AllQueriesQuerySamplerParameters parameters; - - /** - * Creates a new sampler. - * @param client The OpenSearch {@link NodeClient client}. - */ - public AllQueriesQuerySampler(final NodeClient client, final AllQueriesQuerySamplerParameters parameters) { - this.client = client; - this.parameters = parameters; - } - - @Override - public String getName() { - return NAME; - } - - @Override - public String sample() throws Exception { - - // Get queries from the UBI queries index. - // TODO: This needs to use scroll or something else. - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - searchSourceBuilder.query(QueryBuilders.matchAllQuery()); - searchSourceBuilder.from(0); - searchSourceBuilder.size(parameters.getQuerySetSize()); - - final SearchRequest searchRequest = new SearchRequest(SearchQualityEvaluationPlugin.UBI_QUERIES_INDEX_NAME).source(searchSourceBuilder); - - // TODO: Don't use .get() - final SearchResponse searchResponse = client.search(searchRequest).get(); - - final Map queries = new HashMap<>(); - - for(final SearchHit hit : searchResponse.getHits().getHits()) { - - final Map fields = hit.getSourceAsMap(); - queries.merge(fields.get("user_query").toString(), 1L, Long::sum); - - // Will be useful for paging once implemented. - if(queries.size() > parameters.getQuerySetSize()) { - break; - } - - } - - return indexQuerySet(client, parameters.getName(), parameters.getDescription(), parameters.getSampling(), queries); - - } - -} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/plugin-metadata/plugin-security.policy b/opensearch-search-quality-evaluation-plugin/src/main/plugin-metadata/plugin-security.policy deleted file mode 100644 index eb1558f..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/plugin-metadata/plugin-security.policy +++ /dev/null @@ -1,4 +0,0 @@ -grant { - permission java.lang.reflect.ReflectPermission "suppressAccessChecks"; - permission java.lang.RuntimePermission "accessDeclaredMembers"; -}; diff --git a/opensearch-search-quality-evaluation-plugin/src/main/resources/META-INF/services/org.opensearch.jobscheduler.spi.JobSchedulerExtension b/opensearch-search-quality-evaluation-plugin/src/main/resources/META-INF/services/org.opensearch.jobscheduler.spi.JobSchedulerExtension deleted file mode 100644 index a1f979c..0000000 --- a/opensearch-search-quality-evaluation-plugin/src/main/resources/META-INF/services/org.opensearch.jobscheduler.spi.JobSchedulerExtension +++ /dev/null @@ -1 +0,0 @@ -org.opensearch.eval.SearchQualityEvaluationPlugin \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/useful_queries.txt b/opensearch-search-quality-evaluation-plugin/useful_queries.txt deleted file mode 100644 index 35c8335..0000000 --- a/opensearch-search-quality-evaluation-plugin/useful_queries.txt +++ /dev/null @@ -1,151 +0,0 @@ -DELETE ubi_events -DELETE ubi_queries - -GET ubi_events/_mapping -GET ubi_events/_search - -GET ubi_queries/_mapping -GET ubi_queries/_search - -DELETE judgments -GET judgments/_search - - -PUT ubi_queries -{ - "mappings": { - "properties": { - "timestamp": { "type": "date", "format": "strict_date_time" }, - "query_id": { "type": "keyword", "ignore_above": 100 }, - "query": { "type": "text" }, - "query_response_id": { "type": "keyword", "ignore_above": 100 }, - "query_response_hit_ids": { "type": "keyword" }, - "user_query": { "type": "keyword", "ignore_above": 256 }, - "query_attributes": { "type": "flat_object" }, - "client_id": { "type": "keyword", "ignore_above": 100 }, - "application": { "type": "keyword", "ignore_above": 100 } - } - } -} - -PUT ubi_events -{ -"mappings": { - "properties": { - "application": { "type": "keyword", "ignore_above": 256 }, - "action_name": { "type": "keyword", "ignore_above": 100 }, - "client_id": { "type": "keyword", "ignore_above": 100 }, - "query_id": { "type": "keyword", "ignore_above": 100 }, - "message": { "type": "keyword", "ignore_above": 1024 }, - "message_type": { "type": "keyword", "ignore_above": 100 }, - "timestamp": { - "type": "date", - "format":"strict_date_time", - "ignore_malformed": true, - "doc_values": true - }, - "event_attributes": { - "dynamic": true, - "properties": { - "position": { - "properties": { - "ordinal": { "type": "integer" }, - "x": { "type": "integer" }, - "y": { "type": "integer" }, - "page_depth": { "type": "integer" }, - "scroll_depth": { "type": "integer" }, - "trail": { "type": "text", - "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } - } - } - } - }, - "object": { - "properties": { - "internal_id": { "type": "keyword" }, - "object_id": { "type": "keyword", "ignore_above": 256 }, - "object_id_field": { "type": "keyword", "ignore_above": 100 }, - "name": { "type": "keyword", "ignore_above": 256 }, - "description": { "type": "text", - "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } - }, - "object_detail": { "type": "object" } - } - } - } - } - } - } -} - -GET ubi_events/_search -{ - "query": { - "range": { - "event_attributes.position.ordinal": { - "lte": 20 - } - } - } -} - -GET ubi_queries/_search -{ - "query": { - "term": { - "user_query": "batteries" - } - } -} - -GET ubi_events/_search -{ - "query": { - "bool": { - "must": [ - { - "term": { - "query_id": "cdc01f67-0b24-4c96-bb56-a89234f4fb0c" - } - }, - { - "term": { - "action_name": "click" - } - }, - { - "term": { - "event_attributes.position.ordinal": "0" - } - }, - { - "term": { - "event_attributes.object.object_id": "B0797J3DWK" - } - } - ] - } - } - } -} - -GET ubi_events/_search -{ - "size": 0, - "aggs": { - "By_Action": { - "terms": { - "field": "action_name", - "size": 20 - }, - "aggs": { - "By_Position": { - "terms": { - "field": "event_attributes.position.ordinal", - "size": 20 - } - } - } - } - } -} \ No newline at end of file diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..75f41e9 --- /dev/null +++ b/pom.xml @@ -0,0 +1,100 @@ + + + 4.0.0 + org.opensearch + search-evaluation-framework + 1.0.0-SNAPSHOT + search-evaluation-framework + https://www.ubisearch.dev + + UTF-8 + 21 + + + + + maven-assembly-plugin + + false + + + true + org.opensearch.eval.App + + + + jar-with-dependencies + + search-evaluation-framework + + + + make-my-jar-with-dependencies + package + + single + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.13.0 + + 21 + 21 + + + + + + + org.opensearch.client + opensearch-java + 2.19.0 + + + com.fasterxml.jackson.core + jackson-databind + 2.18.2 + + + commons-cli + commons-cli + 1.9.0 + + + org.apache.logging.log4j + log4j-core + 2.24.3 + + + org.apache.httpcomponents.core5 + httpcore5 + 5.3.1 + + + org.apache.httpcomponents.client5 + httpclient5 + 5.4.1 + + + commons-logging + commons-logging + 1.3.4 + + + com.google.code.gson + gson + 2.11.0 + + + org.junit.jupiter + junit-jupiter-api + 5.11.4 + test + + + \ No newline at end of file diff --git a/runbook/requirements.txt b/runbook/requirements.txt deleted file mode 100644 index 934b71e..0000000 --- a/runbook/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -notebook==7.2.2 -bash_kernel==0.9.3 diff --git a/runbook/search-quality-eval.ipynb b/runbook/search-quality-eval.ipynb deleted file mode 100644 index 578fcb9..0000000 --- a/runbook/search-quality-eval.ipynb +++ /dev/null @@ -1,683 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 14, - "id": "7ec95d0b-308a-4863-bc6b-c82e8553551e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"acknowledged\":true}\n" - ] - } - ], - "source": [ - "# Delete all indexes to start fresh.\n", - "# curl -X DELETE http://localhost:9200/ubi_queries,ubi_events,\n", - "# curl -X DELETE http://localhost:9200/judgments\n", - "curl -X DELETE http://localhost:9200/search_quality_eval_query_sets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fc25e25a-7200-4298-858b-4d606b32829a", - "metadata": {}, - "outputs": [], - "source": [ - "# Create the UBI indexes\n", - "curl -X PUT http://localhost:9200/ubi_queries/_mappings -H \"Content-Type: application/json\" -d'\n", - "{\n", - " \"properties\": {\n", - " \"timestamp\": { \"type\": \"date\", \"format\": \"strict_date_time\" },\n", - " \"query_id\": { \"type\": \"keyword\", \"ignore_above\": 100 },\n", - " \"query\": { \"type\": \"text\" },\n", - " \"query_response_id\": { \"type\": \"keyword\", \"ignore_above\": 100 },\n", - " \"query_response_hit_ids\": { \"type\": \"keyword\" },\n", - " \"user_query\": { \"type\": \"keyword\", \"ignore_above\": 256 },\n", - " \"query_attributes\": { \"type\": \"flat_object\" },\n", - " \"client_id\": { \"type\": \"keyword\", \"ignore_above\": 100 },\n", - " \"application\": { \"type\": \"keyword\", \"ignore_above\": 100 }\n", - " }\n", - "}'\n", - "\n", - "curl -X PUT http://localhost:9200/ubi_events/_mappings -H \"Content-Type: application/json\" -d'\n", - "{\n", - " \"properties\": {\n", - " \"application\": { \"type\": \"keyword\", \"ignore_above\": 256 },\n", - " \"action_name\": { \"type\": \"keyword\", \"ignore_above\": 100 },\n", - " \"client_id\": { \"type\": \"keyword\", \"ignore_above\": 100 },\n", - " \"query_id\": { \"type\": \"keyword\", \"ignore_above\": 100 },\n", - " \"message\": { \"type\": \"keyword\", \"ignore_above\": 1024 },\n", - " \"message_type\": { \"type\": \"keyword\", \"ignore_above\": 100 },\n", - " \"timestamp\": {\n", - " \"type\": \"date\",\n", - " \"format\":\"strict_date_time\",\n", - " \"ignore_malformed\": true,\n", - " \"doc_values\": true\n", - " },\n", - " \"event_attributes\": {\n", - " \"dynamic\": true,\n", - " \"properties\": {\n", - " \"position\": {\n", - " \"properties\": {\n", - " \"ordinal\": { \"type\": \"integer\" },\n", - " \"x\": { \"type\": \"integer\" },\n", - " \"y\": { \"type\": \"integer\" },\n", - " \"page_depth\": { \"type\": \"integer\" },\n", - " \"scroll_depth\": { \"type\": \"integer\" },\n", - " \"trail\": { \"type\": \"text\",\n", - " \"fields\": { \"keyword\": { \"type\": \"keyword\", \"ignore_above\": 256 }\n", - " }\n", - " }\n", - " }\n", - " },\n", - " \"object\": {\n", - " \"properties\": {\n", - " \"internal_id\": { \"type\": \"keyword\" },\n", - " \"object_id\": { \"type\": \"keyword\", \"ignore_above\": 256 },\n", - " \"object_id_field\": { \"type\": \"keyword\", \"ignore_above\": 100 },\n", - " \"name\": { \"type\": \"keyword\", \"ignore_above\": 256 },\n", - " \"description\": { \"type\": \"text\",\n", - " \"fields\": { \"keyword\": { \"type\": \"keyword\", \"ignore_above\": 256 } }\n", - " },\n", - " \"object_detail\": { \"type\": \"object\" }\n", - " }\n", - " }\n", - " }\n", - " }\n", - " }\n", - "}'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc97c07b-e182-4248-be0d-d3cd6d0c4471", - "metadata": {}, - "outputs": [], - "source": [ - "# Index the ESCI data.\n", - "cd ./../data/esci && ./index.sh 1> /dev/null 2> /dev/null" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "b6793210-e45b-4445-ad1f-c4b56c80cb1a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"count\":100000,\"_shards\":{\"total\":1,\"successful\":1,\"skipped\":0,\"failed\":0}}\n", - "{\"count\":528374,\"_shards\":{\"total\":1,\"successful\":1,\"skipped\":0,\"failed\":0}}\n" - ] - } - ], - "source": [ - "# See the query and events count from the indexed ESCI data.\n", - "curl -s http://localhost:9200/ubi_queries/_count\n", - "curl -s http://localhost:9200/ubi_events/_count" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "21d85f25-97a6-480f-84da-d03b6af682ca", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"message\": \"Implicit judgment generation initiated.\"}\n" - ] - } - ], - "source": [ - "# Create implicit judgments now.\n", - "curl -s -X POST \"http://localhost:9200/_plugins/search_quality_eval/judgments?click_model=coec&max_rank=20\"\n", - "\n", - "# Schedule implicit judgments creation.\n", - "# curl -s -X POST \"http://localhost:9200/_plugins/search_quality_eval/schedule?id=1&click_model=coec&max_rank=20&job_name=test2&interval=10\" | jq\n", - "# curl -s \"http://localhost:9200/search_quality_eval_scheduler/_search\" | jq" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "e04e1282-c744-4ead-a65d-66d1b731391a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"took\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m3\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"timed_out\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39mfalse\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_shards\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"total\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"successful\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"skipped\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m0\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"failed\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m0\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"hits\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"total\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"value\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m5000\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"relation\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"eq\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"max_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"hits\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m[\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"judgments\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"6ae380c3-4313-4b43-b145-31fbd27a115c\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"query_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"1\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"judgment\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m20.949720670391063\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"query\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"k cups\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"document\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"B07P23H371\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"judgments\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"400c3242-a4f5-4897-85a2-6db1d7aaf0b4\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"query_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"1\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"judgment\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m22.609514837494114\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"query\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"k cups\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"document\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"B00K2RY8GI\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"judgments\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"8151d6df-0ab6-428a-ba83-450df26987df\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"query_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"1\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"judgment\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m17.46724890829694\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"query\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"k cups\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"document\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"B082D6NC6P\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"judgments\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"36a399cf-eb78-489d-8795-a408b7da4f26\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"query_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"1\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"judgment\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m17.557060446450965\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"query\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"k cups\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"document\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"B06XSFHTWM\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"judgments\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"972740a4-8494-4e08-82b4-07bccfa44ec3\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"query_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"1\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"judgment\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m8.660508083140877\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"query\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"k cups\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"document\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"B081NTR1XX\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"judgments\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"2a5133a2-6716-451e-a605-bdf038bb2312\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"query_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"2\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"judgment\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m13.966480446927374\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"query\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"chips\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"document\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"B00BHFELOS\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"judgments\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"7b0d830a-ac6d-483b-abd0-1ddf2a3de36a\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"query_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"2\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"judgment\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m6.550218340611353\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"query\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"chips\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"document\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"B0170B0RUY\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"judgments\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"5926100c-9992-40b5-b20b-a2bf9d8b7bb3\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"query_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"2\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"judgment\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m10.032605969400551\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"query\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"chips\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"document\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"B00ID0OURI\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"judgments\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"80e7be09-a366-424c-9048-40037c168dcd\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"query_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"2\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"judgment\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m0\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"query\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"chips\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"document\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"B079RMSR88\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"judgments\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"63db1764-ab04-4c7e-8ddc-95ac0010c3b8\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"query_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"2\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"judgment\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m0\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"query\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"chips\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"document\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"B07XJWNF9R\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m]\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - "\u001b[1;39m}\u001b[0m\n" - ] - } - ], - "source": [ - "# See the judgments.\n", - "curl -s http://localhost:9200/judgments/_search | jq" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "6b6f847d-5142-4d0e-83b1-6127410bfa67", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"query_set\": \"d4857274-3216-4add-afeb-e8c30562270e\"}\n" - ] - } - ], - "source": [ - "# Create a query set using PPTSS sampling from the UBI queries.\n", - "curl -s -X POST \"http://localhost:9200/_plugins/search_quality_eval/queryset?name=test&description=fake&sampling=pptss&query_set_size=50\"" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "2cc4ee64-f3b8-4a6b-8050-1fa9e70fc9ba", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"took\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"timed_out\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39mfalse\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_shards\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"total\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"successful\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"skipped\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m0\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"failed\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m0\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"hits\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"total\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"value\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m4\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"relation\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"eq\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"max_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"hits\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m[\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"search_quality_eval_query_sets\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"46c14597-b4d2-4872-af9c-c5b4ffdef876\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"sampling\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"pptss\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"name\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"test\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"description\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"fake\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"created_at\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1732544453255\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"queries\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m[\n", - " \u001b[0;32m\"k cups\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"chips\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"bicycle\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"shirts long sleeves\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"best bra without underwire\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"lamp base only without shade\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"fitbit charge 3\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"laptop\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"ipod\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"hot pink nike shoes\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"instant pot\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"halloween costumes for women\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"hydro flask\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"face mask\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"barbie dolls\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple airpods\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"printer\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"wood graining tool for painting\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"lightning cable\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"iphone\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"toy story\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"paper towels\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"moses sandals for women\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"rings\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"watches for women\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"jerzees long sleeve t shirt\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"frozen 2\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"water shoes\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"dog harness for large dogs\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"youth slippers size 7\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"gun\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"curious george yellow hat\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple pen\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"vacuum cleaner\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple headphones\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"echo dot\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"moncler\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"womens dresses\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"diffusers for essential oils\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"weighted blanket\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"beats wireless headphones\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"rugs\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple watch series 3\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"simple grunge outfits\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"smart watch\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"nike\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"chromebook\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"ban idiots not guns\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"3 ring binder\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple watch\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"womens slippers\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"shoes\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"womens shoes\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"futon frames full size without mattress\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"gifts for men\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"mini fridge\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"tv\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"xo hat the weeknd\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"cut up high waisted shorts\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"harry potter\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"airsoft guns\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"gaming chair\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"reusable camping trash bag\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"skirt\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"ratchet belts for men without buckle\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"macbook\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"iphone xr\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"patio furniture\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"twin mattress\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"jeans american eagle for men\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"carhartt double front pants for men\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"sewing machine\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"compression socks\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"foot massager\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"kerug filter\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"fire stick\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"vintage sewing kit\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"desk\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"iphone 11\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"ipad\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"office chair\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"cut out tank tops for women\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"gaming laptop\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"yoga mat\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"laptops\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"dildo\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"compression socks women\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple earphones\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"wireless earbuds\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"monitor\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"shoes for women\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"nxt crossbow\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"batman\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"aquarium\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple earbuds\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"marshmallows without gelatin\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"bluetooth earbuds\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"airpods\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m]\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"search_quality_eval_query_sets\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"78334998-6fc8-4f96-94a9-78ea0b4c694f\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"sampling\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"pptss\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"name\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"test\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"description\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"fake\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"created_at\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1732544477679\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"queries\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m[\n", - " \u001b[0;32m\"yeezy\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"tv\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"k cups\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"headset with microphone\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"fitbit charge 3\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"laptop\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"airsoft guns\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"hot pink nike shoes\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"instant pot\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"ratchet belts for men without buckle\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"iphone xr\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"patio furniture\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"twin mattress\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"hydro flask\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"face mask\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"carhartt double front pants for men\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"compression socks\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"printer\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"iphone\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"kerug filter\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"plus size womens clothing\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"rings\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"yeti\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"scrunchies\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"frozen 2\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"where the crawdads sing\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"youth slippers size 7\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"gun\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"office chair\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"weighted blanket\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"beats wireless headphones\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"yoga mat\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"laptops\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"dildo\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple watch series 3\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"simple grunge outfits\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"compression socks women\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"iphone headphones\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"chromebook\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"wireless earbuds\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"ban idiots not guns\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"puma high tops\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple watch\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"monitor\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"shoes\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"womens shoes\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"aquarium\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"futon frames full size without mattress\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple watch 3\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"gifts for men\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"airpods\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m]\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"search_quality_eval_query_sets\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"b5f3c63c-429d-4870-9e9b-8154ef834924\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"sampling\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"pptss\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"name\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"test\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"description\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"fake\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"created_at\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1732544512268\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"queries\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m[\n", - " \u001b[0;32m\"tv\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"xo hat the weeknd\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"best bra without underwire\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"king platform bed without headboard\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"headset with microphone\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"aloy costume\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"fitbit charge 3\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"laptop\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"halo\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"gaming chair\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"hot pink nike shoes\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"reusable camping trash bag\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"music\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"patio furniture\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"sunglasses\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"face mask\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"printer\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"paper towels\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"kerug filter\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"blackout curtains\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"desk\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"iphone 11\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"ipad\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"youth slippers size 7\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple pen\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"vacuum cleaner\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"echo dot\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"moncler\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"portable charger\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"yoga mat\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"laptops\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"dildo\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"air freshener\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"boots for women\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"compression socks women\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"nike\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"chromebook\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"ban idiots not guns\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"wireless earbuds\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"monitor\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple watch\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"shoes for women\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"shoes\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"batman\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"futon frames full size without mattress\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple earbuds\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"bluetooth earbuds\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"gifts for men\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"toilet paper\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"airpods\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"mini fridge\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m]\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m,\n", - " \u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"_index\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"search_quality_eval_query_sets\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_id\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"d4857274-3216-4add-afeb-e8c30562270e\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_score\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"_source\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m{\n", - " \u001b[0m\u001b[34;1m\"sampling\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"pptss\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"name\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"test\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"description\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;32m\"fake\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"created_at\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[0;39m1732544554130\u001b[0m\u001b[1;39m,\n", - " \u001b[0m\u001b[34;1m\"queries\"\u001b[0m\u001b[1;39m: \u001b[0m\u001b[1;39m[\n", - " \u001b[0;32m\"tv\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"chips\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"bicycle\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"shirts long sleeves\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"aloy costume\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"laptop\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"reusable camping trash bag\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"projector screen\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"iphone xr\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"ratchet belts for men without buckle\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"macbook\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"calm and quiet for dogs\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"twin mattress\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"face mask\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"sewing machine\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"printer\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"compression socks\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"computer desk\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"toy story\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"foot massager\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"paper towels\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"fire stick\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"plus size womens clothing\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"blackout curtains\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"yeti\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"ipad\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"scrunchies\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"frozen 2\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"water shoes\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"where the crawdads sing\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"youth slippers size 7\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple headphones\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"echo dot\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"gaming laptop\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"laptops\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"dildo\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple watch series 3\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"simple grunge outfits\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"boots for women\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"chromebook\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"wireless earbuds\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"3 ring binder\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple watch\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"puma high tops\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"shoes for women\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"shoes\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple earbuds\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"apple watch 3\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"toilet paper\"\u001b[0m\u001b[1;39m,\n", - " \u001b[0;32m\"airpods\"\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m]\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m]\u001b[0m\u001b[1;39m\n", - " \u001b[1;39m}\u001b[0m\u001b[1;39m\n", - "\u001b[1;39m}\u001b[0m\n" - ] - } - ], - "source": [ - "# Look at the query sets.\n", - "curl -s http://localhost:9200/search_quality_eval_query_sets/_search | jq" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "abe28eb7-fe7b-47b9-aec0-f3090cd38ca7", - "metadata": {}, - "outputs": [], - "source": [ - "# Run a query set.\n", - "curl -s http://localhost:9200/search_quality_eval_run?id=${QUERY_SET_ID}" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Bash", - "language": "bash", - "name": "bash" - }, - "language_info": { - "codemirror_mode": "shell", - "file_extension": ".sh", - "mimetype": "text/x-sh", - "name": "bash" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/scripts/create-query-set.json b/scripts/create-query-set.json new file mode 100644 index 0000000..5d711d9 --- /dev/null +++ b/scripts/create-query-set.json @@ -0,0 +1,7 @@ +{ + "sampler": "all", + "name": "", + "description": "", + "sampling": "", + "querySetSize": 500 +} diff --git a/scripts/create-query-set.sh b/scripts/create-query-set.sh new file mode 100755 index 0000000..7a221c6 --- /dev/null +++ b/scripts/create-query-set.sh @@ -0,0 +1,4 @@ +#!/bin/bash -e + +# Create a query set using sampling. +java -jar ../target/search-evaluation-framework.jar -s create-query-set.json diff --git a/scripts/generate-judgments.sh b/scripts/generate-judgments.sh new file mode 100755 index 0000000..e4f343c --- /dev/null +++ b/scripts/generate-judgments.sh @@ -0,0 +1,4 @@ +#!/bin/bash -e + +# Create a click model. +java -jar ../target/search-evaluation-framework.jar -o http://localhost:9200 -c coec diff --git a/scripts/hybrid_query.txt b/scripts/hybrid_query.txt new file mode 100644 index 0000000..752c961 --- /dev/null +++ b/scripts/hybrid_query.txt @@ -0,0 +1,28 @@ +{ + "_source": { + "excludes": [ + "title_embedding" + ] + }, + "query": { + "hybrid": { + "queries": [ + { + "match": { + "title_text": { + "query": "#$query##" + } + } + }, + { + "neural": { + "title_embedding": { + "query_text": "#$query##", + "k": 50 + } + } + } + ] + } + } +} \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/scripts/get-click-through-rates.sh b/scripts/opensearch-scripts/get-click-through-rates.sh similarity index 100% rename from opensearch-search-quality-evaluation-plugin/scripts/get-click-through-rates.sh rename to scripts/opensearch-scripts/get-click-through-rates.sh diff --git a/opensearch-search-quality-evaluation-plugin/scripts/get-judgments.sh b/scripts/opensearch-scripts/get-judgments.sh similarity index 100% rename from opensearch-search-quality-evaluation-plugin/scripts/get-judgments.sh rename to scripts/opensearch-scripts/get-judgments.sh diff --git a/opensearch-search-quality-evaluation-plugin/scripts/get-metrics.sh b/scripts/opensearch-scripts/get-metrics.sh similarity index 100% rename from opensearch-search-quality-evaluation-plugin/scripts/get-metrics.sh rename to scripts/opensearch-scripts/get-metrics.sh diff --git a/scripts/opensearch-scripts/get-pipelines.sh b/scripts/opensearch-scripts/get-pipelines.sh new file mode 100755 index 0000000..876fd9d --- /dev/null +++ b/scripts/opensearch-scripts/get-pipelines.sh @@ -0,0 +1,3 @@ +#!/bin/bash -e + +curl http://localhost:9200/_search/pipeline | jq \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/scripts/get-query-set.sh b/scripts/opensearch-scripts/get-query-set.sh similarity index 100% rename from opensearch-search-quality-evaluation-plugin/scripts/get-query-set.sh rename to scripts/opensearch-scripts/get-query-set.sh diff --git a/opensearch-search-quality-evaluation-plugin/scripts/get-query-sets.sh b/scripts/opensearch-scripts/get-query-sets.sh similarity index 100% rename from opensearch-search-quality-evaluation-plugin/scripts/get-query-sets.sh rename to scripts/opensearch-scripts/get-query-sets.sh diff --git a/scripts/opensearch-scripts/initialize-ubi.sh b/scripts/opensearch-scripts/initialize-ubi.sh new file mode 100755 index 0000000..46f0e2d --- /dev/null +++ b/scripts/opensearch-scripts/initialize-ubi.sh @@ -0,0 +1,5 @@ +#!/bin/bash -e + +curl -s -X DELETE http://localhost:9200/ubi_queries,ubi_events | jq + +curl -s -X POST http://localhost:9200/_plugins/ubi/initialize | jq diff --git a/scripts/run-query-set.json b/scripts/run-query-set.json new file mode 100644 index 0000000..6db1519 --- /dev/null +++ b/scripts/run-query-set.json @@ -0,0 +1,11 @@ +{ + "query_set_id": "6b1ac777-758d-4f33-9bb6-7e3f15e77637", + "judgments_id": "76267535-0591-4e13-9e3d-8de5cb1329a6", + "index": "ecommerce", + "search_pipeline": "hybrid-search-pipeline", + "id_field": "asin", + "k": 10, + "threshold": 1.0, + "query": "{\"_source\": {\"excludes\": [\"title_embedding\"]},\"query\": {\"hybrid\": {\"queries\": [{\"match\": {\"title_text\": {\"query\": \"#$query##\"}}},{\"neural\": {\"title_embedding\": {\"query_text\": \"#$query##\",\"k\": 50}}}]}}}", + "not_used_query": "{\"query\": {\"match\": {\"description\": \"#$query##\"}}}" +} diff --git a/scripts/run-query-set.sh b/scripts/run-query-set.sh new file mode 100755 index 0000000..9e0f720 --- /dev/null +++ b/scripts/run-query-set.sh @@ -0,0 +1,4 @@ +#!/bin/bash -e + +# Run a query set. +java -jar ../target/search-evaluation-framework.jar -o http://localhost:9200 -r run-query-set.json diff --git a/settings.gradle b/settings.gradle deleted file mode 100644 index 7f2d692..0000000 --- a/settings.gradle +++ /dev/null @@ -1,3 +0,0 @@ -rootProject.name = 'search-evaluation-framework' -include 'opensearch-search-quality-evaluation-plugin' -include 'opensearch-search-quality-implicit-judgments' \ No newline at end of file diff --git a/src/main/java/org/opensearch/eval/App.java b/src/main/java/org/opensearch/eval/App.java new file mode 100644 index 0000000..7bcc314 --- /dev/null +++ b/src/main/java/org/opensearch/eval/App.java @@ -0,0 +1,306 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval; + +import com.google.gson.Gson; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Options; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.eval.engine.OpenSearchEngine; +import org.opensearch.eval.engine.SearchEngine; +import org.opensearch.eval.judgments.clickmodel.ClickModel; +import org.opensearch.eval.judgments.clickmodel.coec.CoecClickModel; +import org.opensearch.eval.judgments.clickmodel.coec.CoecClickModelParameters; +import org.opensearch.eval.runners.OpenSearchQuerySetRunner; +import org.opensearch.eval.runners.RunQuerySetParameters; +import org.opensearch.eval.samplers.AllQueriesQuerySampler; +import org.opensearch.eval.samplers.AllQueriesQuerySamplerParameters; +import org.opensearch.eval.samplers.ProbabilityProportionalToSizeParameters; +import org.opensearch.eval.samplers.ProbabilityProportionalToSizeQuerySampler; + +import java.io.File; +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; + +public class App { + + private static final Logger LOGGER = LogManager.getLogger(App.class); + + public static void main(String[] args) throws Exception { + + System.out.println("Search Quality Evaluation Framework"); + + final Gson gson = new Gson(); + + final Options options = new Options(); + options.addOption("c", "create-click-model", true, "create a click model"); + options.addOption("s", "create-query-set", true, "create a query set using sampling"); + options.addOption("r", "run-query-set", true, "run a query set"); + options.addOption("o", "opensearch", true, "OpenSearch URL, e.g. http://localhost:9200"); + + final CommandLineParser parser = new DefaultParser(); + final CommandLine cmd = parser.parse(options, args); + + final URI uri; + if(cmd.hasOption("o")) { + uri = URI.create(cmd.getOptionValue("o")); + } else { + System.out.println("No OpenSearch host given so defaulting to http://localhost:9200"); + uri = URI.create("http://localhost:9200"); + } + + final SearchEngine searchEngine = new OpenSearchEngine(uri); + + if(cmd.hasOption("c")) { + + //final String clickModel = cmd.getOptionValue("c"); + System.out.println("Creating click model..."); + + final String clickModelType = cmd.getOptionValue("c"); + + if(CoecClickModel.CLICK_MODEL_NAME.equalsIgnoreCase(clickModelType)) { + + final CoecClickModelParameters coecClickModelParameters = new CoecClickModelParameters(10); + + final ClickModel cm = new CoecClickModel(searchEngine, coecClickModelParameters); + cm.calculateJudgments(); + + } else { + System.err.println("Invalid click model type. Valid models are 'coec'."); + } + + } else if (cmd.hasOption("r")) { + + System.out.println("Running query set..."); + + final String querySetOptionsFile = cmd.getOptionValue("r"); + final File file = new File(querySetOptionsFile); + + if(file.exists()) { + + final RunQuerySetParameters runQuerySetParameters = gson.fromJson(Files.readString(file.toPath(), StandardCharsets.UTF_8), RunQuerySetParameters.class); + + final OpenSearchQuerySetRunner openSearchQuerySetRunner = new OpenSearchQuerySetRunner(searchEngine); + openSearchQuerySetRunner.run(runQuerySetParameters); + + } else { + System.err.println("The query set run parameters file does not exist."); + } + + } else if (cmd.hasOption("s")) { + + final String samplerOptionsFile = cmd.getOptionValue("s"); + final File file = new File(samplerOptionsFile); + + if(file.exists()) { + + final String jsonString = Files.readString(file.toPath(), StandardCharsets.UTF_8); + final JsonElement jsonElement = JsonParser.parseString(jsonString); + final JsonObject jsonObject = jsonElement.getAsJsonObject(); + final String samplerType = jsonObject.get("sampler").getAsString(); + + if(AllQueriesQuerySampler.NAME.equalsIgnoreCase(samplerType)) { + + final AllQueriesQuerySamplerParameters parameters = gson.fromJson(jsonString, AllQueriesQuerySamplerParameters.class); + + final AllQueriesQuerySampler sampler = new AllQueriesQuerySampler(searchEngine, parameters); + final String querySetId = sampler.sample(); + + System.out.println("Query set created: " + querySetId); + + } else if(ProbabilityProportionalToSizeQuerySampler.NAME.equalsIgnoreCase(samplerType)) { + + final ProbabilityProportionalToSizeParameters parameters = gson.fromJson(jsonString, ProbabilityProportionalToSizeParameters.class); + + final ProbabilityProportionalToSizeQuerySampler sampler = new ProbabilityProportionalToSizeQuerySampler(searchEngine, parameters); + final String querySetId = sampler.sample(); + + System.out.println("Query set created: " + querySetId); + + } else { + + System.err.println("Invalid sampler: " + samplerType); + + } + + } else { + System.err.println("The query set run parameters file does not exist."); + } + + + } else { + + System.err.println("Invalid options."); + + } + + } + +// +// /** +// * The placeholder in the query that gets replaced by the query term when running a query set. +// */ +// public static final String QUERY_PLACEHOLDER = "#$query##"; +// +// @Override +// protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { +// +// // Handle managing query sets. +// if(QUERYSET_MANAGEMENT_URL.equalsIgnoreCase(request.path())) { +// +// // Creating a new query set by sampling the UBI queries. +// if (request.method().equals(RestRequest.Method.POST)) { +// +// final String name = request.param("name"); +// final String description = request.param("description"); +// final String sampling = request.param("sampling", "pptss"); +// final int querySetSize = Integer.parseInt(request.param("query_set_size", "1000")); +// +// // Create a query set by finding all the unique user_query terms. +// if (AllQueriesQuerySampler.NAME.equalsIgnoreCase(sampling)) { +// +// // If we are not sampling queries, the query sets should just be directly +// // indexed into OpenSearch using the `ubi_queries` index directly. +// +// try { +// +// final AllQueriesQuerySamplerParameters parameters = new AllQueriesQuerySamplerParameters(name, description, sampling, querySetSize); +// final AllQueriesQuerySampler sampler = new AllQueriesQuerySampler(client, parameters); +// +// // Sample and index the queries. +// final String querySetId = sampler.sample(); +// +// return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.OK, "{\"query_set\": \"" + querySetId + "\"}")); +// +// } catch(Exception ex) { +// return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.INTERNAL_SERVER_ERROR, "{\"error\": \"" + ex.getMessage() + "\"}")); +// } + + +// // Handle the on-demand creation of implicit judgments. +// } else if(IMPLICIT_JUDGMENTS_URL.equalsIgnoreCase(request.path())) { +// +// if (request.method().equals(RestRequest.Method.POST)) { +// +// //final long startTime = System.currentTimeMillis(); +// final String clickModel = request.param("click_model", "coec"); +// final int maxRank = Integer.parseInt(request.param("max_rank", "20")); +// +// if (CoecClickModel.CLICK_MODEL_NAME.equalsIgnoreCase(clickModel)) { +// +// final CoecClickModelParameters coecClickModelParameters = new CoecClickModelParameters(maxRank); +// final CoecClickModel coecClickModel = new CoecClickModel(client, coecClickModelParameters); +// +// final String judgmentsId; +// +// // TODO: Run this in a separate thread. +// try { +// +// // Create the judgments index. +// createJudgmentsIndex(client); +// +// judgmentsId = coecClickModel.calculateJudgments(); +// +// // judgmentsId will be null if no judgments were created (and indexed). +// if(judgmentsId == null) { +// // TODO: Is Bad Request the appropriate error? Perhaps Conflict is more appropriate? +// return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.BAD_REQUEST, "{\"error\": \"No judgments were created. Check the queries and events data.\"}")); +// } +// +//// final long elapsedTime = System.currentTimeMillis() - startTime; +//// +//// final Map job = new HashMap<>(); +//// job.put("name", "manual_generation"); +//// job.put("click_model", clickModel); +//// job.put("started", startTime); +//// job.put("duration", elapsedTime); +//// job.put("invocation", "on_demand"); +//// job.put("judgments_id", judgmentsId); +//// job.put("max_rank", maxRank); +//// +//// final String jobId = UUID.randomUUID().toString(); +//// +//// final IndexRequest indexRequest = new IndexRequest() +//// .index(SearchQualityEvaluationPlugin.COMPLETED_JOBS_INDEX_NAME) +//// .id(jobId) +//// .source(job) +//// .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); +//// +//// client.index(indexRequest, new ActionListener<>() { +//// @Override +//// public void onResponse(final IndexResponse indexResponse) { +//// LOGGER.debug("Click model job completed successfully: {}", jobId); +//// } +//// +//// @Override +//// public void onFailure(final Exception ex) { +//// LOGGER.error("Unable to run job with ID {}", jobId, ex); +//// throw new RuntimeException("Unable to run job", ex); +//// } +//// }); +// +// } catch (Exception ex) { +// throw new RuntimeException("Unable to generate judgments.", ex); +// } +// +// return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.OK, "{\"judgments_id\": \"" + judgmentsId + "\"}")); +// +// } else { +// return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.BAD_REQUEST, "{\"error\": \"Invalid click model.\"}")); +// } +// +// } else { +// return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.METHOD_NOT_ALLOWED, "{\"error\": \"" + request.method() + " is not allowed.\"}")); +// } +// +// } else { +// return restChannel -> restChannel.sendResponse(new BytesRestResponse(RestStatus.NOT_FOUND, "{\"error\": \"" + request.path() + " was not found.\"}")); +// } +// +// } +// +// private void createJudgmentsIndex(final NodeClient client) throws Exception { +// +// // If the judgments index does not exist we need to create it. +// final IndicesExistsRequest indicesExistsRequest = new IndicesExistsRequest(Constants.JUDGMENTS_INDEX_NAME); +// +// final IndicesExistsResponse indicesExistsResponse = client.admin().indices().exists(indicesExistsRequest).get(); +// +// if(!indicesExistsResponse.isExists()) { +// +// // TODO: Read this mapping from a resource file instead. +// final String mapping = "{\n" + +// " \"properties\": {\n" + +// " \"judgments_id\": { \"type\": \"keyword\" },\n" + +// " \"query_id\": { \"type\": \"keyword\" },\n" + +// " \"query\": { \"type\": \"keyword\" },\n" + +// " \"document_id\": { \"type\": \"keyword\" },\n" + +// " \"judgment\": { \"type\": \"double\" },\n" + +// " \"timestamp\": { \"type\": \"date\", \"format\": \"strict_date_time\" }\n" + +// " }\n" + +// " }"; +// +// // Create the judgments index. +// final CreateIndexRequest createIndexRequest = new CreateIndexRequest(Constants.JUDGMENTS_INDEX_NAME).mapping(mapping); +// +// // TODO: Don't use .get() +// client.admin().indices().create(createIndexRequest).get(); +// +// } +// +// } + +} diff --git a/src/main/java/org/opensearch/eval/Constants.java b/src/main/java/org/opensearch/eval/Constants.java new file mode 100644 index 0000000..6d0eccb --- /dev/null +++ b/src/main/java/org/opensearch/eval/Constants.java @@ -0,0 +1,45 @@ +package org.opensearch.eval; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +public class Constants { + + private static final Logger LOGGER = LogManager.getLogger(Constants.class); + + /** + * The name of the UBI index containing the queries. This should not be changed. + */ + public static final String UBI_QUERIES_INDEX_NAME = "ubi_queries"; + + /** + * The name of the UBI index containing the events. This should not be changed. + */ + public static final String UBI_EVENTS_INDEX_NAME = "ubi_events"; + + /** + * The name of the index to store the scheduled jobs to create implicit judgments. + */ + public static final String SCHEDULED_JOBS_INDEX_NAME = "search_quality_eval_scheduled_jobs"; + + /** + * The name of the index to store the completed jobs to create implicit judgments. + */ + public static final String COMPLETED_JOBS_INDEX_NAME = "search_quality_eval_completed_jobs"; + + /** + * The name of the index that stores the query sets. + */ + public static final String QUERY_SETS_INDEX_NAME = "search_quality_eval_query_sets"; + + /** + * The name of the index that stores the metrics for the dashboard. + */ + public static final String DASHBOARD_METRICS_INDEX_NAME = "sqe_metrics_sample_data"; + + /** + * The name of the index that stores the implicit judgments. + */ + public static final String JUDGMENTS_INDEX_NAME = "judgments"; + +} diff --git a/src/main/java/org/opensearch/eval/engine/OpenSearchEngine.java b/src/main/java/org/opensearch/eval/engine/OpenSearchEngine.java new file mode 100644 index 0000000..621d5ef --- /dev/null +++ b/src/main/java/org/opensearch/eval/engine/OpenSearchEngine.java @@ -0,0 +1,885 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.engine; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.apache.hc.core5.http.HttpHost; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.client.json.JsonData; +import org.opensearch.client.json.jackson.JacksonJsonpMapper; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch._types.FieldValue; +import org.opensearch.client.opensearch._types.Refresh; +import org.opensearch.client.opensearch._types.SortOrder; +import org.opensearch.client.opensearch._types.Time; +import org.opensearch.client.opensearch._types.aggregations.Aggregate; +import org.opensearch.client.opensearch._types.aggregations.Aggregation; +import org.opensearch.client.opensearch._types.aggregations.LongTermsBucket; +import org.opensearch.client.opensearch._types.aggregations.StringTermsAggregate; +import org.opensearch.client.opensearch._types.aggregations.StringTermsBucket; +import org.opensearch.client.opensearch._types.query_dsl.BoolQuery; +import org.opensearch.client.opensearch._types.query_dsl.MatchQuery; +import org.opensearch.client.opensearch._types.query_dsl.Query; +import org.opensearch.client.opensearch._types.query_dsl.RangeQuery; +import org.opensearch.client.opensearch._types.query_dsl.WrapperQuery; +import org.opensearch.client.opensearch.core.BulkRequest; +import org.opensearch.client.opensearch.core.BulkResponse; +import org.opensearch.client.opensearch.core.IndexRequest; +import org.opensearch.client.opensearch.core.ScrollRequest; +import org.opensearch.client.opensearch.core.ScrollResponse; +import org.opensearch.client.opensearch.core.SearchRequest; +import org.opensearch.client.opensearch.core.SearchResponse; +import org.opensearch.client.opensearch.core.bulk.BulkOperation; +import org.opensearch.client.opensearch.core.bulk.IndexOperation; +import org.opensearch.client.opensearch.core.search.Hit; +import org.opensearch.client.opensearch.core.search.TrackHits; +import org.opensearch.client.opensearch.generic.Bodies; +import org.opensearch.client.opensearch.generic.OpenSearchGenericClient; +import org.opensearch.client.opensearch.generic.Requests; +import org.opensearch.client.opensearch.generic.Response; +import org.opensearch.client.opensearch.indices.CreateIndexRequest; +import org.opensearch.client.opensearch.indices.ExistsRequest; +import org.opensearch.client.transport.OpenSearchTransport; +import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder; +import org.opensearch.eval.Constants; +import org.opensearch.eval.model.ClickthroughRate; +import org.opensearch.eval.model.data.ClickThroughRate; +import org.opensearch.eval.model.data.Judgment; +import org.opensearch.eval.model.data.QueryResultMetric; +import org.opensearch.eval.model.data.QuerySet; +import org.opensearch.eval.model.data.RankAggregatedClickThrough; +import org.opensearch.eval.model.ubi.event.UbiEvent; +import org.opensearch.eval.model.ubi.query.UbiQuery; +import org.opensearch.eval.utils.TimeUtils; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; + +import static org.opensearch.eval.judgments.clickmodel.coec.CoecClickModel.EVENT_CLICK; +import static org.opensearch.eval.judgments.clickmodel.coec.CoecClickModel.EVENT_IMPRESSION; +import static org.opensearch.eval.judgments.clickmodel.coec.CoecClickModel.INDEX_QUERY_DOC_CTR; +import static org.opensearch.eval.judgments.clickmodel.coec.CoecClickModel.INDEX_RANK_AGGREGATED_CTR; +import static org.opensearch.eval.runners.OpenSearchQuerySetRunner.QUERY_PLACEHOLDER; + +/** + * Functionality for interacting with OpenSearch. + */ +public class OpenSearchEngine extends SearchEngine { + + private static final Logger LOGGER = LogManager.getLogger(OpenSearchEngine.class.getName()); + + private final OpenSearchClient client; + + // Used to cache the query ID->user_query to avoid unnecessary lookups to OpenSearch. + private static final Map userQueryCache = new HashMap<>(); + + public OpenSearchEngine(final URI uri) { + + final HttpHost[] hosts = new HttpHost[] { + HttpHost.create(uri) + }; + + final OpenSearchTransport transport = ApacheHttpClient5TransportBuilder + .builder(hosts) + .setMapper(new JacksonJsonpMapper()) + .build(); + + this.client = new OpenSearchClient(transport); + + } + + @Override + public boolean doesIndexExist(final String index) throws IOException { + + return client.indices().exists(ExistsRequest.of(s -> s.index(index))).value(); + + } + + @Override + public boolean createIndex(final String index, final String mappingJson) throws IOException { + + final InputStream stream = new ByteArrayInputStream(mappingJson.getBytes(StandardCharsets.UTF_8)); + + final CreateIndexRequest createIndexRequest = new CreateIndexRequest.Builder() + .index(index) + .mappings(m -> m.withJson(stream)) + .build(); + + stream.close(); + + return Boolean.TRUE.equals(client.indices().create(createIndexRequest).acknowledged()); + + } + + @Override + public boolean deleteIndex(String index) throws IOException { + + return client.indices().delete(s -> s.index(index)).acknowledged(); + + } + + @Override + public String indexQuerySet(final QuerySet querySet) throws IOException { + + final String index = Constants.QUERY_SETS_INDEX_NAME; + final String id = querySet.getId(); + + final IndexRequest indexRequest = new IndexRequest.Builder().index(index).id(id).document(querySet).build(); + return client.index(indexRequest).id(); + + } + + @Override + public QuerySet getQuerySet(String querySetId) throws IOException { + + final Query query = Query.of(q -> q.term(m -> m.field("_id").value(FieldValue.of(querySetId)))); + + final SearchResponse searchResponse = client.search(s -> s.index(Constants.QUERY_SETS_INDEX_NAME).query(query).size(1), QuerySet.class); + + // TODO: Handle the query set not being found. + + return searchResponse.hits().hits().get(0).source(); + + } + + @Override + public Double getJudgmentValue(final String judgmentsId, final String userQuery, final String documentId) throws Exception { + + var boolQuery = BoolQuery.of(bq -> bq + .must( + List.of( + MatchQuery.of(mq -> mq.query(FieldValue.of("judgments_id")).field(judgmentsId)).toQuery(), + MatchQuery.of(mq -> mq.query(FieldValue.of("query")).field(userQuery)).toQuery(), + MatchQuery.of(mq -> mq.query(FieldValue.of("document_id")).field(documentId)).toQuery() + ) + ) + ); + + final Query query = Query.of(q -> q.bool(boolQuery)); + + final SearchResponse searchResponse = client.search(s -> s.index(Constants.JUDGMENTS_INDEX_NAME) + .query(query) + .from(0) + .size(1), Judgment.class); + + if(searchResponse.hits().hits().isEmpty()) { + return Double.NaN; + } else { + return searchResponse.hits().hits().get(0).source().getJudgment(); + } + + } + + @Override + public Collection getUbiQueries() throws IOException { + + final Collection ubiQueries = new ArrayList<>(); + + final Time scrollTime = new Time.Builder().time("10m").build(); + + final SearchResponse searchResponse = client.search(s -> s.index(Constants.UBI_QUERIES_INDEX_NAME).size(1000).scroll(scrollTime), UbiQuery.class); + + String scrollId = searchResponse.scrollId(); + List> searchHits = searchResponse.hits().hits(); + + while (searchHits != null && !searchHits.isEmpty()) { + + for (int i = 0; i < searchResponse.hits().hits().size(); i++) { + ubiQueries.add(searchResponse.hits().hits().get(i).source()); + } + + final ScrollRequest scrollRequest = new ScrollRequest.Builder().scrollId(scrollId).build(); + final ScrollResponse scrollResponse = client.scroll(scrollRequest, UbiQuery.class); + + scrollId = scrollResponse.scrollId(); + searchHits = scrollResponse.hits().hits(); + + } + + return ubiQueries; + + } + + public Collection getJudgments(final String index) throws IOException { + + final Collection judgments = new ArrayList<>(); + + final Time scrollTime = new Time.Builder().time("10m").build(); + + final SearchResponse searchResponse = client.search(s -> s.index(index).size(1000).scroll(scrollTime), Judgment.class); + + String scrollId = searchResponse.scrollId(); + List> searchHits = searchResponse.hits().hits(); + + while (searchHits != null && !searchHits.isEmpty()) { + + for (int i = 0; i < searchResponse.hits().hits().size(); i++) { + judgments.add(searchResponse.hits().hits().get(i).source()); + } + + final ScrollRequest scrollRequest = new ScrollRequest.Builder().scrollId(scrollId).build(); + final ScrollResponse scrollResponse = client.scroll(scrollRequest, Judgment.class); + + scrollId = scrollResponse.scrollId(); + searchHits = scrollResponse.hits().hits(); + + } + + return judgments; + + } + + @Override + public boolean bulkIndex(String index, Map documents) throws IOException { + + final ArrayList bulkOperations = new ArrayList<>(); + + for(final String id : documents.keySet()) { + final Object document = documents.get(id); + bulkOperations.add(new BulkOperation.Builder().index(IndexOperation.of(io -> io.index(index).id(id).document(document))).build()); + } + + final BulkRequest.Builder bulkReq = new BulkRequest.Builder() + .index(index) + .operations(bulkOperations) + .refresh(Refresh.WaitFor); + + final BulkResponse bulkResponse = client.bulk(bulkReq.build()); + + return !bulkResponse.errors(); + + } + + /** + * Gets the user query for a given query ID. + * @param queryId The query ID. + * @return The user query. + * @throws IOException Thrown when there is a problem accessing OpenSearch. + */ + @Override + public String getUserQuery(final String queryId) throws Exception { + + // If it's in the cache just get it and return it. + if(userQueryCache.containsKey(queryId)) { + return userQueryCache.get(queryId); + } + + // Cache it and return it. + final UbiQuery ubiQuery = getQueryFromQueryId(queryId); + + // ubiQuery will be null if the query does not exist. + if(ubiQuery != null) { + + userQueryCache.put(queryId, ubiQuery.getUserQuery()); + return ubiQuery.getUserQuery(); + + } else { + + return null; + + } + + } + + /** + * Gets the query object for a given query ID. + * @param queryId The query ID. + * @return A {@link UbiQuery} object for the given query ID. + * @throws Exception Thrown if the query cannot be retrieved. + */ + @Override + public UbiQuery getQueryFromQueryId(final String queryId) throws Exception { + + LOGGER.debug("Getting query from query ID {}", queryId); + + final SearchRequest searchRequest = new SearchRequest.Builder().query(q -> q.match(m -> m.field("query_id").query(FieldValue.of(queryId)))) + .index(Constants.UBI_QUERIES_INDEX_NAME) + .from(0) + .size(1) + .build(); + + final SearchResponse searchResponse = client.search(searchRequest, UbiQuery.class); + + // If this does not return a query then we cannot calculate the judgments. Each even should have a query associated with it. + if(searchResponse.hits().hits() != null & !searchResponse.hits().hits().isEmpty()) { + + final UbiQuery ubiQuery = searchResponse.hits().hits().get(0).source(); + + LOGGER.info("Found query: {}", ubiQuery.toString()); + + return ubiQuery; + + } else { + + LOGGER.warn("No query exists for query ID {} to calculate judgments.", queryId); + return null; + + } + + } + + @Override + public List runQuery(final String index, final String query, final int k, final String userQuery, final String idField, final String pipeline) throws IOException { + + // Replace the query placeholder with the user query. + final String parsedQuery = query.replace(QUERY_PLACEHOLDER, userQuery); + + LOGGER.debug("Running query on index {}, k = {}, userQuery = {}, idField = {}, pipeline = {}, query = {}", index, k, userQuery, idField, pipeline, parsedQuery); + + // Use a generic client to get around https://github.com/opensearch-project/OpenSearch/issues/16829 + // Refer to https://code.dblock.org/2023/10/16/making-raw-json-rest-requests-to-opensearch.html + final OpenSearchGenericClient genericClient = client.generic().withClientOptions(OpenSearchGenericClient.ClientOptions.throwOnHttpErrors()); + + final Map params = new HashMap<>(); + + if(!pipeline.isEmpty()) { + params.put("search_pipeline", pipeline); + } + + final Response searchResponse = genericClient.execute( + Requests.builder() + .endpoint(index + "/_search") + .method("POST") + .query(params) + .json(parsedQuery) + .build()); + + final JsonNode json = searchResponse.getBody() + .map(b -> Bodies.json(b, JsonNode.class, client._transport().jsonpMapper())) + .orElse(null); + + final List orderedDocumentIds = new ArrayList<>(); + + final JsonNode hits = json.get("hits").get("hits"); + for (int i = 0; i < hits.size(); i++) { + + if(hits.get(i).get("_source").get(idField) != null) { + orderedDocumentIds.add(hits.get(i).get("_source").get(idField).asText()); + } else { + LOGGER.info("The requested idField {} does not exist.", idField); + } + + } + + // The following commented code uses a wrapper query. +// final String encodedQuery = Base64.getEncoder().encodeToString(parsedQuery.getBytes(StandardCharsets.UTF_8)); + +// final WrapperQuery wrapperQuery = new WrapperQuery.Builder() +// .query(encodedQuery) +// .build(); + + // TODO: Only return the idField since that's all we need. + // final SearchRequest searchRequest; + +// if(!pipeline.isEmpty()) { +// +// searchRequest = new SearchRequest.Builder() +// .index(index) +// .query(q -> q.wrapper(wrapperQuery)) +// .from(0) +// .size(k) +// .pipeline(pipeline) +// .build(); +// +// } else { +// +// searchRequest = new SearchRequest.Builder() +// .index(index) +// .query(q -> q.wrapper(wrapperQuery)) +// .from(0) +// .size(k) +// .build(); +// +// } + +// final SearchResponse searchResponse = client.search(searchRequest, ObjectNode.class); + +// final List orderedDocumentIds = new ArrayList<>(); +// +// LOGGER.info("Encoded query: {}", encodedQuery); +// LOGGER.info("Found hits: {}", searchResponse.hits().hits().size()); +// +// for (int i = 0; i < searchResponse.hits().hits().size(); i++) { +// +// final String documentId; +// +// if ("_id".equals(idField)) { +// documentId = searchResponse.hits().hits().get(i).id(); +// } else { +// // TODO: Need to check this field actually exists. +// // TODO: Does this work? +// final Hit hit = searchResponse.hits().hits().get(i); +// documentId = hit.source().get(idField).toString(); +// +// } +// +// orderedDocumentIds.add(documentId); +// +// } + + return orderedDocumentIds; + + } + + @Override + public Map> getClickthroughRate(final int maxRank) throws Exception { + + final Map> queriesToClickthroughRates = new HashMap<>(); + + // For each query: + // - Get each document returned in that query (in the QueryResponse object). + // - Calculate the click-through rate for the document. (clicks/impressions) + + // TODO: Allow for a time period and for a specific application. + + final String query = "{\n" + + " \"bool\": {\n" + + " \"should\": [\n" + + " {\n" + + " \"term\": {\n" + + " \"action_name\": \"click\"\n" + + " }\n" + + " },\n" + + " {\n" + + " \"term\": {\n" + + " \"action_name\": \"impression\"\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"must\": [\n" + + " {\n" + + " \"range\": {\n" + + " \"event_attributes.position.ordinal\": {\n" + + " \"lte\": " + maxRank + "\n" + + " }\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " }"; + + final String encodedQuery = Base64.getEncoder().encodeToString(query.getBytes(StandardCharsets.UTF_8)); + + final WrapperQuery wrapperQuery = new WrapperQuery.Builder() + .query(encodedQuery) + .build(); + + final Time scrollTime = new Time.Builder().time("10m").build(); + + final SearchRequest searchRequest = new SearchRequest.Builder() + .index(Constants.UBI_EVENTS_INDEX_NAME) + .query(q -> q.wrapper(wrapperQuery)) + .from(0) + .size(1000) + .scroll(scrollTime) + .build(); + + // Use the generic client to send the raw json. + // https://code.dblock.org/2023/10/16/making-raw-json-rest-requests-to-opensearch.html#:~:text=build()%3B,Here's%20a%20search%20example.&See%20the%20updated%20documentation%20and%20working%20demo%20for%20more%20information. + + final SearchResponse searchResponse = client.search(searchRequest, UbiEvent.class); + + String scrollId = searchResponse.scrollId(); + List> searchHits = searchResponse.hits().hits(); + + while (searchHits != null && !searchHits.isEmpty()) { + + for (int i = 0; i < searchResponse.hits().hits().size(); i++) { + + final UbiEvent ubiEvent = searchResponse.hits().hits().get(i).source(); + + // We need to the hash of the query_id because two users can both search + // for "computer" and those searches will have different query IDs, but they are the same search. + final String userQuery = getUserQuery(ubiEvent.getQueryId()); + + // userQuery will be null if there is not a query for this event in ubi_queries. + if (userQuery != null) { + + // Get the clicks for this queryId from the map, or an empty list if this is a new query. + final Set clickthroughRates = queriesToClickthroughRates.getOrDefault(userQuery, new LinkedHashSet<>()); + + // Get the ClickthroughRate object for the object that was interacted with. + final ClickthroughRate clickthroughRate = clickthroughRates.stream().filter(p -> p.getObjectId().equals(ubiEvent.getEventAttributes().getObject().getObjectId())).findFirst().orElse(new ClickthroughRate(ubiEvent.getEventAttributes().getObject().getObjectId())); + + if (EVENT_CLICK.equalsIgnoreCase(ubiEvent.getActionName())) { + //LOGGER.info("Logging a CLICK on " + ubiEvent.getEventAttributes().getObject().getObjectId()); + clickthroughRate.logClick(); + } else if (EVENT_IMPRESSION.equalsIgnoreCase(ubiEvent.getActionName())) { + //LOGGER.info("Logging an IMPRESSION on " + ubiEvent.getEventAttributes().getObject().getObjectId()); + clickthroughRate.logImpression(); + } else { + LOGGER.warn("Invalid event action name: {}", ubiEvent.getActionName()); + } + + // Safeguard to avoid having clicks without events. + // When the clicks is > 0 and impressions == 0, set the impressions to the number of clicks. + if(clickthroughRate.getClicks() > 0 && clickthroughRate.getImpressions() == 0) { + clickthroughRate.setImpressions(clickthroughRate.getClicks()); + } + + clickthroughRates.add(clickthroughRate); + queriesToClickthroughRates.put(userQuery, clickthroughRates); + // LOGGER.debug("clickthroughRate = {}", queriesToClickthroughRates.size()); + + } + + } + + //LOGGER.info("Doing scroll to next results"); + // TODO: Getting a warning in the log that "QueryGroup _id can't be null, It should be set before accessing it. This is abnormal behaviour" + // I don't remember seeing this prior to 2.18.0 but it's possible I just didn't see it. + // https://github.com/opensearch-project/OpenSearch/blob/f105e4eb2ede1556b5dd3c743bea1ab9686ebccf/server/src/main/java/org/opensearch/wlm/QueryGroupTask.java#L73 + + if(scrollId == null) { + break; + } + + final ScrollRequest scrollRequest = new ScrollRequest.Builder().scrollId(scrollId).build(); + final ScrollResponse scrollResponse = client.scroll(scrollRequest, UbiEvent.class); + + scrollId = scrollResponse.scrollId(); + searchHits = scrollResponse.hits().hits(); + + } + + indexClickthroughRates(queriesToClickthroughRates); + + return queriesToClickthroughRates; + + } + + @Override + public Map getRankAggregatedClickThrough(final int maxRank) throws Exception { + + final Map rankAggregatedClickThrough = new HashMap<>(); + + final RangeQuery rangeQuery = RangeQuery.of(r -> r + .field("event_attributes.position.ordinal") + .lte(JsonData.of(maxRank)) + ); + + // TODO: Is this the same as: final BucketOrder bucketOrder = BucketOrder.key(true); + final List> sort = new ArrayList<>(); + sort.add(Map.of("_key", SortOrder.Asc)); + + final Aggregation positionsAggregator = Aggregation.of(a -> a + .terms(t -> t + .field("event_attributes.position.ordinal") + .size(maxRank) + .order(sort) + ) + ); + + final Aggregation actionNameAggregation = Aggregation.of(a -> a + .terms(t -> t + .field("action_name") + .size(maxRank) + .order(sort) + ).aggregations(Map.of("By_Position", positionsAggregator)) + ); + + final Map aggregations = new HashMap<>(); + aggregations.put("By_Action", actionNameAggregation); + + // TODO: Allow for a time period and for a specific application. + final SearchRequest searchRequest = new SearchRequest.Builder() + .index(Constants.UBI_EVENTS_INDEX_NAME) + .aggregations(aggregations) + .query(q -> q.range(rangeQuery)) + .from(0) + .size(0) + .build(); + + System.out.println(searchRequest.toJsonString()); + + final SearchResponse searchResponse = client.search(searchRequest, Void.class); + + final Map aggs = searchResponse.aggregations(); + final StringTermsAggregate byAction = aggs.get("By_Action").sterms(); + final List byActionBuckets = byAction.buckets().array(); + + final Map clickCounts = new HashMap<>(); + final Map impressionCounts = new HashMap<>(); + + for (final StringTermsBucket bucket : byActionBuckets) { + + // Handle the "impression" bucket. + if(EVENT_IMPRESSION.equalsIgnoreCase(bucket.key())) { + + final Aggregate positionTerms = bucket.aggregations().get("By_Position"); + + final List positionBuckets = positionTerms.lterms().buckets().array(); + + for(final LongTermsBucket positionBucket : positionBuckets) { + LOGGER.debug("Inserting impression event from position {} with click count {}", positionBucket.key(), (double) positionBucket.docCount()); + impressionCounts.put(Integer.valueOf(positionBucket.key()), (double) positionBucket.docCount()); + } + + } + + // Handle the "click" bucket. + if(EVENT_CLICK.equalsIgnoreCase(bucket.key())) { + + final Aggregate positionTerms = bucket.aggregations().get("By_Position"); + + final List positionBuckets = positionTerms.lterms().buckets().array(); + + for(final LongTermsBucket positionBucket : positionBuckets) { + LOGGER.debug("Inserting client event from position {} with click count {}", positionBucket.key(), (double) positionBucket.docCount()); + impressionCounts.put(Integer.valueOf(positionBucket.key()), (double) positionBucket.docCount()); + } + + } + + } + + for(int rank = 0; rank < maxRank; rank++) { + + if(impressionCounts.containsKey(rank)) { + + if(clickCounts.containsKey(rank)) { + + // Calculate the CTR by dividing the number of clicks by the number of impressions. + LOGGER.info("Position = {}, Impression Counts = {}, Click Count = {}", rank, impressionCounts.get(rank), clickCounts.get(rank)); + rankAggregatedClickThrough.put(rank, clickCounts.get(rank) / impressionCounts.get(rank)); + + } else { + + // This document has impressions but no clicks, so it's CTR is zero. + LOGGER.info("Position = {}, Impression Counts = {}, Impressions but no clicks so CTR is 0", rank, clickCounts.get(rank)); + rankAggregatedClickThrough.put(rank, 0.0); + + } + + } else { + + // No impressions so the clickthrough rate is 0. + LOGGER.info("No impressions for rank {}, so using CTR of 0", rank); + rankAggregatedClickThrough.put(rank, (double) 0); + + } + + } + + indexRankAggregatedClickthrough(rankAggregatedClickThrough); + + return rankAggregatedClickThrough; + + } + + private Collection getQueryIdsHavingUserQuery(final String userQuery) throws Exception { + + final SearchRequest searchRequest = new SearchRequest.Builder().query(q -> q.match(m -> m.field("user_query").query(FieldValue.of(userQuery)))) + .index(Constants.UBI_QUERIES_INDEX_NAME) + .build(); + + final SearchResponse searchResponse = client.search(searchRequest, UbiQuery.class); + + final Collection queryIds = new ArrayList<>(); + + for (int i = 0; i < searchResponse.hits().hits().size(); i++) { + queryIds.add(searchResponse.hits().hits().get(i).source().getQueryId()); + } + + return queryIds; + + } + + @Override + public long getCountOfQueriesForUserQueryHavingResultInRankR(final String userQuery, final String objectId, final int rank) throws Exception { + + long countOfTimesShownAtRank = 0; + + // Get all query IDs matching this user query. + final Collection queryIds = getQueryIdsHavingUserQuery(userQuery); + + // For each query ID, get the events with action_name = "impression" having a match on objectId and rank (position). + for(final String queryId : queryIds) { + + final String query = "{\n" + + " \"bool\": {\n" + + " \"must\": [\n" + + " {\n" + + " \"term\": {\n" + + " \"query_id\": \"" + queryId + "\"\n" + + " }\n" + + " },\n" + + " {\n" + + " \"term\": {\n" + + " \"action_name\": \"impression\"\n" + + " }\n" + + " },\n" + + " {\n" + + " \"term\": {\n" + + " \"event_attributes.position.ordinal\": \"" + rank + "\"\n" + + " }\n" + + " },\n" + + " {\n" + + " \"term\": {\n" + + " \"event_attributes.object.object_id\": \"" + objectId + "\"\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " }"; + + final String encodedQuery = Base64.getEncoder().encodeToString(query.getBytes(StandardCharsets.UTF_8)); + + final WrapperQuery wrapperQuery = new WrapperQuery.Builder() + .query(encodedQuery) + .build(); + + final SearchRequest searchRequest = new SearchRequest.Builder() + .index(Constants.UBI_EVENTS_INDEX_NAME) + .query(q -> q.wrapper(wrapperQuery)) + .size(0) + .trackTotalHits(TrackHits.of(t -> t.enabled(true))) + .build(); + + final SearchResponse searchResponse = client.search(searchRequest, UbiEvent.class); + + countOfTimesShownAtRank += searchResponse.hits().total().value(); + + } + + LOGGER.debug("Count of {} having {} at rank {} = {}", userQuery, objectId, rank, countOfTimesShownAtRank); + + if(countOfTimesShownAtRank > 0) { + LOGGER.debug("Count of {} having {} at rank {} = {}", userQuery, objectId, rank, countOfTimesShownAtRank); + } + + return countOfTimesShownAtRank; + + } + + /** + * Index the rank-aggregated clickthrough values. + * @param rankAggregatedClickThrough A map of position to clickthrough values. + * @throws IOException Thrown when there is a problem accessing OpenSearch. + */ + @Override + public void indexRankAggregatedClickthrough(final Map rankAggregatedClickThrough) throws Exception { + + if(!rankAggregatedClickThrough.isEmpty()) { + + // TODO: Use bulk indexing. + + for (final int position : rankAggregatedClickThrough.keySet()) { + + final String id = UUID.randomUUID().toString(); + + final RankAggregatedClickThrough r = new RankAggregatedClickThrough(id); + r.setPosition(position); + r.setCtr(rankAggregatedClickThrough.get(position)); + + final IndexRequest indexRequest = new IndexRequest.Builder().index(INDEX_RANK_AGGREGATED_CTR).id(id).document(r).build(); + client.index(indexRequest); + + } + + } + + } + + /** + * Index the clickthrough rates. + * @param clickthroughRates A map of query IDs to a collection of {@link ClickthroughRate} objects. + * @throws IOException Thrown when there is a problem accessing OpenSearch. + */ + @Override + public void indexClickthroughRates(final Map> clickthroughRates) throws Exception { + + if(!clickthroughRates.isEmpty()) { + + // TODO: Use bulk inserts. + + for (final String userQuery : clickthroughRates.keySet()) { + + for (final ClickthroughRate clickthroughRate : clickthroughRates.get(userQuery)) { + + final String id = UUID.randomUUID().toString(); + + final ClickThroughRate ctr = new ClickThroughRate(id); + ctr.setUserQuery(userQuery); + ctr.setClicks(clickthroughRate.getClicks()); + ctr.setEvents(clickthroughRate.getImpressions()); + ctr.setCtr(clickthroughRate.getClickthroughRate()); + ctr.setObjectId(clickthroughRate.getObjectId()); + + LOGGER.debug("Clickthrough rate: {}", ctr); + + // TODO: This index needs created. + final IndexRequest indexRequest = new IndexRequest.Builder().index(INDEX_QUERY_DOC_CTR).id(id).document(ctr).build(); + client.index(indexRequest); + + } + + } + + } + + } + + @Override + public void indexQueryResultMetric(final QueryResultMetric queryResultMetric) throws Exception { + + // TODO: Use bulk imports. + + final IndexRequest indexRequest = new IndexRequest.Builder() + .index(Constants.DASHBOARD_METRICS_INDEX_NAME) + .id(queryResultMetric.getId()) + .document(queryResultMetric).build(); + + client.index(indexRequest); + + } + + /** + * Index the judgments. + * @param judgments A collection of {@link Judgment judgments}. + * @throws IOException Thrown when there is a problem accessing OpenSearch. + * @return The ID of the indexed judgments. + */ + @Override + public String indexJudgments(final Collection judgments) throws Exception { + + final String judgmentsId = UUID.randomUUID().toString(); + final String timestamp = TimeUtils.getTimestamp(); + + // TODO: Use bulk imports. + + for(final Judgment judgment : judgments) { + + judgment.setJudgmentsId(judgmentsId); + judgment.setTimestamp(timestamp); + + final IndexRequest indexRequest = new IndexRequest.Builder().index(Constants.JUDGMENTS_INDEX_NAME).id(judgment.getId()).document(judgment).build(); + client.index(indexRequest); + + } + + return judgmentsId; + + } + +} \ No newline at end of file diff --git a/src/main/java/org/opensearch/eval/engine/SearchEngine.java b/src/main/java/org/opensearch/eval/engine/SearchEngine.java new file mode 100644 index 0000000..3529a76 --- /dev/null +++ b/src/main/java/org/opensearch/eval/engine/SearchEngine.java @@ -0,0 +1,69 @@ +package org.opensearch.eval.engine; + +import org.opensearch.eval.model.ClickthroughRate; +import org.opensearch.eval.model.data.Judgment; +import org.opensearch.eval.model.data.QueryResultMetric; +import org.opensearch.eval.model.data.QuerySet; +import org.opensearch.eval.model.ubi.query.UbiQuery; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public abstract class SearchEngine { + + public abstract boolean doesIndexExist(String index) throws IOException; + public abstract boolean createIndex(String index, String mapping) throws IOException; + public abstract boolean deleteIndex(String index) throws IOException; + + public abstract String getUserQuery(final String queryId) throws Exception; + public abstract UbiQuery getQueryFromQueryId(final String queryId) throws Exception; + public abstract long getCountOfQueriesForUserQueryHavingResultInRankR(final String userQuery, final String objectId, final int rank) throws Exception; + public abstract void indexRankAggregatedClickthrough(final Map rankAggregatedClickThrough) throws Exception; + public abstract void indexClickthroughRates(final Map> clickthroughRates) throws Exception; + public abstract String indexJudgments(final Collection judgments) throws Exception; + public abstract void indexQueryResultMetric(final QueryResultMetric queryResultMetric) throws Exception; + + public abstract boolean bulkIndex(String index, Map documents) throws IOException; + + public abstract Collection getJudgments(final String index) throws IOException; + + public abstract List runQuery(final String index, final String query, final int k, final String userQuery, final String idField, final String pipeline) throws IOException; + + public abstract String indexQuerySet(QuerySet querySet) throws IOException; + public abstract Collection getUbiQueries() throws IOException; + + /** + * Gets a query set from the index. + * @param querySetId The ID of the query set to get. + * @return The query set as a collection of maps of query to frequency + * @throws IOException Thrown if the query set cannot be retrieved. + */ + public abstract QuerySet getQuerySet(String querySetId) throws IOException; + + /** + * Get a judgment from the index. + * @param judgmentsId The ID of the judgments to find. + * @param query The user query. + * @param documentId The document ID. + * @return The value of the judgment, or NaN if the judgment cannot be found. + */ + public abstract Double getJudgmentValue(final String judgmentsId, final String query, final String documentId) throws Exception; + + /** + * Gets the clickthrough rates for each query and its results. + * @return A map of user_query to the clickthrough rate for each query result. + * @throws IOException Thrown when a problem accessing OpenSearch. + */ + public abstract Map> getClickthroughRate(final int maxRank) throws Exception; + + /** + * Calculate the rank-aggregated click through from the UBI events. + * @return A map of positions to clickthrough rates. + * @throws IOException Thrown when a problem accessing OpenSearch. + */ + public abstract Map getRankAggregatedClickThrough(int maxRank) throws Exception; + +} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/ClickModel.java b/src/main/java/org/opensearch/eval/judgments/clickmodel/ClickModel.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/ClickModel.java rename to src/main/java/org/opensearch/eval/judgments/clickmodel/ClickModel.java diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/ClickModelParameters.java b/src/main/java/org/opensearch/eval/judgments/clickmodel/ClickModelParameters.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/ClickModelParameters.java rename to src/main/java/org/opensearch/eval/judgments/clickmodel/ClickModelParameters.java diff --git a/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java b/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java new file mode 100644 index 0000000..684fbf7 --- /dev/null +++ b/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModel.java @@ -0,0 +1,179 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.judgments.clickmodel.coec; + +import com.google.gson.Gson; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.eval.engine.SearchEngine; +import org.opensearch.eval.judgments.clickmodel.ClickModel; +import org.opensearch.eval.judgments.queryhash.IncrementalUserQueryHash; +import org.opensearch.eval.model.ClickthroughRate; +import org.opensearch.eval.model.data.Judgment; +import org.opensearch.eval.utils.MathUtils; + +import java.util.Collection; +import java.util.LinkedList; +import java.util.Map; +import java.util.Set; + +public class CoecClickModel extends ClickModel { + + public static final String CLICK_MODEL_NAME = "coec"; + + // OpenSearch indexes for COEC data. + public static final String INDEX_RANK_AGGREGATED_CTR = "rank_aggregated_ctr"; + public static final String INDEX_QUERY_DOC_CTR = "click_through_rates"; + + // UBI event names. + public static final String EVENT_CLICK = "click"; + public static final String EVENT_IMPRESSION = "impression"; + + private final CoecClickModelParameters parameters; + + private final IncrementalUserQueryHash incrementalUserQueryHash = new IncrementalUserQueryHash(); + private final Gson gson = new Gson(); + private final SearchEngine searchEngine; + + private static final Logger LOGGER = LogManager.getLogger(CoecClickModel.class.getName()); + + public CoecClickModel(final SearchEngine searchEngine, final CoecClickModelParameters parameters) { + + this.parameters = parameters; + this.searchEngine = searchEngine; + + } + + @Override + public String calculateJudgments() throws Exception { + + final int maxRank = parameters.getMaxRank(); + + // Calculate and index the rank-aggregated click-through. + LOGGER.info("Beginning calculation of rank-aggregated click-through."); + final Map rankAggregatedClickThrough = searchEngine.getRankAggregatedClickThrough(maxRank); + LOGGER.info("Rank-aggregated clickthrough positions: {}", rankAggregatedClickThrough.size()); + showRankAggregatedClickThrough(rankAggregatedClickThrough); + + // Calculate and index the click-through rate for query/doc pairs. + LOGGER.info("Beginning calculation of clickthrough rates."); + final Map> clickthroughRates = searchEngine.getClickthroughRate(maxRank); + LOGGER.info("Clickthrough rates for number of queries: {}", clickthroughRates.size()); + showClickthroughRates(clickthroughRates); + + // Generate and index the implicit judgments. + LOGGER.info("Beginning calculation of implicit judgments."); + return calculateCoec(rankAggregatedClickThrough, clickthroughRates); + + } + + public String calculateCoec(final Map rankAggregatedClickThrough, + final Map> clickthroughRates) throws Exception { + + // Calculate the COEC. + // Numerator is the total number of clicks received by a query/result pair. + // Denominator is the expected clicks (EC) that an average result would receive after being impressed i times at rank r, + // and CTR is the average CTR for each position in the results page (up to R) computed over all queries and results. + + // Format: query_id, query, document, judgment + final Collection judgments = new LinkedList<>(); + + LOGGER.info("Count of queries: {}", clickthroughRates.size()); + + for(final String userQuery : clickthroughRates.keySet()) { + + // The clickthrough rates for this one query. + // A ClickthroughRate is a document with counts of impressions and clicks. + final Collection ctrs = clickthroughRates.get(userQuery); + + // Go through each clickthrough rate for this query. + for(final ClickthroughRate ctr : ctrs) { + + double denominatorSum = 0; + + for(int rank = 0; rank < parameters.getMaxRank(); rank++) { + + // The document's mean CTR at the rank. + final double meanCtrAtRank = rankAggregatedClickThrough.getOrDefault(rank, 0.0); + + // The number of times this document was shown as this rank. + final long countOfTimesShownAtRank = searchEngine.getCountOfQueriesForUserQueryHavingResultInRankR(userQuery, ctr.getObjectId(), rank); + + denominatorSum += (meanCtrAtRank * countOfTimesShownAtRank); + + } + + // Numerator is the sum of clicks at all ranks up to the maxRank. + final long totalNumberClicksForQueryResult = ctr.getClicks(); + + // Divide the numerator by the denominator (value). + final double judgmentValue; + + if(denominatorSum == 0) { + judgmentValue = 0.0; + } else { + judgmentValue = totalNumberClicksForQueryResult / denominatorSum; + } + + // Hash the user query to get a query ID. + final int queryId = incrementalUserQueryHash.getHash(userQuery); + + // Add the judgment to the list. + // TODO: What to do for query ID when the values are per user_query instead? + final Judgment judgment = new Judgment(String.valueOf(queryId), userQuery, ctr.getObjectId(), judgmentValue); + judgments.add(judgment); + + } + + } + + LOGGER.info("Count of user queries: {}", clickthroughRates.size()); + LOGGER.info("Count of judgments: {}", judgments.size()); + + showJudgments(judgments); + + if(!(judgments.isEmpty())) { + return searchEngine.indexJudgments(judgments); + } else { + return null; + } + + } + + private void showJudgments(final Collection judgments) { + + for(final Judgment judgment : judgments) { + LOGGER.info(judgment.toJudgmentString()); + } + + } + + private void showClickthroughRates(final Map> clickthroughRates) { + + for(final String userQuery : clickthroughRates.keySet()) { + + LOGGER.debug("user_query: {}", userQuery); + + for(final ClickthroughRate clickthroughRate : clickthroughRates.get(userQuery)) { + LOGGER.debug("\t - {}", clickthroughRate.toString()); + } + + } + + } + + private void showRankAggregatedClickThrough(final Map rankAggregatedClickThrough) { + + for(final int position : rankAggregatedClickThrough.keySet()) { + LOGGER.info("Position: {}, # ctr: {}", position, MathUtils.round(rankAggregatedClickThrough.get(position), parameters.getRoundingDigits())); + } + + } + +} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModelParameters.java b/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModelParameters.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModelParameters.java rename to src/main/java/org/opensearch/eval/judgments/clickmodel/coec/CoecClickModelParameters.java diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/queryhash/IncrementalUserQueryHash.java b/src/main/java/org/opensearch/eval/judgments/queryhash/IncrementalUserQueryHash.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/queryhash/IncrementalUserQueryHash.java rename to src/main/java/org/opensearch/eval/judgments/queryhash/IncrementalUserQueryHash.java diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/queryhash/UserQueryHash.java b/src/main/java/org/opensearch/eval/judgments/queryhash/UserQueryHash.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/queryhash/UserQueryHash.java rename to src/main/java/org/opensearch/eval/judgments/queryhash/UserQueryHash.java diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/DcgSearchMetric.java b/src/main/java/org/opensearch/eval/metrics/DcgSearchMetric.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/DcgSearchMetric.java rename to src/main/java/org/opensearch/eval/metrics/DcgSearchMetric.java diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/NdcgSearchMetric.java b/src/main/java/org/opensearch/eval/metrics/NdcgSearchMetric.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/NdcgSearchMetric.java rename to src/main/java/org/opensearch/eval/metrics/NdcgSearchMetric.java diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/PrecisionSearchMetric.java b/src/main/java/org/opensearch/eval/metrics/PrecisionSearchMetric.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/PrecisionSearchMetric.java rename to src/main/java/org/opensearch/eval/metrics/PrecisionSearchMetric.java diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/SearchMetric.java b/src/main/java/org/opensearch/eval/metrics/SearchMetric.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/metrics/SearchMetric.java rename to src/main/java/org/opensearch/eval/metrics/SearchMetric.java diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ClickthroughRate.java b/src/main/java/org/opensearch/eval/model/ClickthroughRate.java similarity index 85% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ClickthroughRate.java rename to src/main/java/org/opensearch/eval/model/ClickthroughRate.java index cef1f1f..7523fd2 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ClickthroughRate.java +++ b/src/main/java/org/opensearch/eval/model/ClickthroughRate.java @@ -6,7 +6,7 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ -package org.opensearch.eval.judgments.model; +package org.opensearch.eval.model; import org.opensearch.eval.utils.MathUtils; @@ -16,8 +16,8 @@ public class ClickthroughRate { private final String objectId; - private int clicks; - private int impressions; + private long clicks; + private long impressions; /** * Creates a new clickthrough rate for an object. @@ -35,7 +35,7 @@ public ClickthroughRate(final String objectId) { * @param clicks The count of clicks. * @param impressions The count of events. */ - public ClickthroughRate(final String objectId, final int clicks, final int impressions) { + public ClickthroughRate(final String objectId, final long clicks, final long impressions) { this.objectId = objectId; this.clicks = clicks; this.impressions = impressions; @@ -61,6 +61,10 @@ public void logImpression() { impressions++; } + public void setImpressions(long impressions) { + this.impressions = impressions; + } + /** * Calculate the clickthrough rate. * @return The clickthrough rate as clicks divided by events. @@ -73,7 +77,7 @@ public double getClickthroughRate() { * Gets the count of clicks. * @return The count of clicks. */ - public int getClicks() { + public long getClicks() { return clicks; } @@ -81,7 +85,7 @@ public int getClicks() { * Gets the count of events. * @return The count of events. */ - public int getImpressions() { + public long getImpressions() { return impressions; } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/QuerySetQuery.java b/src/main/java/org/opensearch/eval/model/QuerySetQuery.java similarity index 93% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/QuerySetQuery.java rename to src/main/java/org/opensearch/eval/model/QuerySetQuery.java index 2244df4..eaa825b 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/QuerySetQuery.java +++ b/src/main/java/org/opensearch/eval/model/QuerySetQuery.java @@ -6,7 +6,7 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ -package org.opensearch.eval.judgments.model; +package org.opensearch.eval.model; public class QuerySetQuery { diff --git a/src/main/java/org/opensearch/eval/model/data/AbstractData.java b/src/main/java/org/opensearch/eval/model/data/AbstractData.java new file mode 100644 index 0000000..bfbd31e --- /dev/null +++ b/src/main/java/org/opensearch/eval/model/data/AbstractData.java @@ -0,0 +1,23 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.model.data; + +public abstract class AbstractData { + + private final String id; + + public AbstractData(final String id) { + this.id = id; + } + + public String getId() { + return id; + } + +} diff --git a/src/main/java/org/opensearch/eval/model/data/ClickThroughRate.java b/src/main/java/org/opensearch/eval/model/data/ClickThroughRate.java new file mode 100644 index 0000000..46271d9 --- /dev/null +++ b/src/main/java/org/opensearch/eval/model/data/ClickThroughRate.java @@ -0,0 +1,81 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.model.data; + +import java.util.UUID; + +public class ClickThroughRate extends AbstractData { + + private String userQuery; + private long clicks; + private long events; + private double ctr; + private String objectId; + + public ClickThroughRate() { + super(UUID.randomUUID().toString()); + } + + public ClickThroughRate(String id) { + super(id); + } + + @Override + public String toString() { + return "ClickThroughRate{" + + "id='" + getId() + '\'' + + ", userQuery='" + userQuery + '\'' + + ", clicks=" + clicks + + ", events=" + events + + ", ctr=" + ctr + + ", objectId='" + objectId + '\'' + + '}'; + } + + public String getUserQuery() { + return userQuery; + } + + public void setUserQuery(String userQuery) { + this.userQuery = userQuery; + } + + public long getClicks() { + return clicks; + } + + public void setClicks(long clicks) { + this.clicks = clicks; + } + + public long getEvents() { + return events; + } + + public void setEvents(long events) { + this.events = events; + } + + public double getCtr() { + return ctr; + } + + public void setCtr(double ctr) { + this.ctr = ctr; + } + + public String getObjectId() { + return objectId; + } + + public void setObjectId(String objectId) { + this.objectId = objectId; + } + +} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/Judgment.java b/src/main/java/org/opensearch/eval/model/data/Judgment.java similarity index 67% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/Judgment.java rename to src/main/java/org/opensearch/eval/model/data/Judgment.java index bc9955f..408ce02 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/Judgment.java +++ b/src/main/java/org/opensearch/eval/model/data/Judgment.java @@ -6,7 +6,7 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ -package org.opensearch.eval.judgments.model; +package org.opensearch.eval.model.data; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -14,11 +14,12 @@ import java.util.HashMap; import java.util.Map; +import java.util.UUID; /** * A judgment of a search result's quality for a given query. */ -public class Judgment { +public class Judgment extends AbstractData { private static final Logger LOGGER = LogManager.getLogger(Judgment.class.getName()); @@ -26,15 +27,34 @@ public class Judgment { private final String query; private final String document; private final double judgment; + private String judgmentsId; + private String timestamp; /** * Creates a new judgment. + * @param id The judgment ID. * @param queryId The query ID for the judgment. * @param query The query for the judgment. - * @param document The document in the jdugment. + * @param document The document in the judgment. + * @param judgment The judgment value. + */ + public Judgment(final String id, final String queryId, final String query, final String document, final double judgment) { + super(id); + this.queryId = queryId; + this.query = query; + this.document = document; + this.judgment = judgment; + } + + /** + * Creates a new judgment. + * @param queryId The query ID for the judgment. + * @param query The query for the judgment. + * @param document The document in the judgment. * @param judgment The judgment value. */ public Judgment(final String queryId, final String query, final String document, final double judgment) { + super(UUID.randomUUID().toString()); this.queryId = queryId; this.query = query; this.document = document; @@ -94,4 +114,20 @@ public double getJudgment() { return judgment; } + public String getJudgmentsId() { + return judgmentsId; + } + + public void setJudgmentsId(String judgmentsId) { + this.judgmentsId = judgmentsId; + } + + public String getTimestamp() { + return timestamp; + } + + public void setTimestamp(String timestamp) { + this.timestamp = timestamp; + } + } diff --git a/src/main/java/org/opensearch/eval/model/data/QueryResultMetric.java b/src/main/java/org/opensearch/eval/model/data/QueryResultMetric.java new file mode 100644 index 0000000..c7a14e7 --- /dev/null +++ b/src/main/java/org/opensearch/eval/model/data/QueryResultMetric.java @@ -0,0 +1,97 @@ +package org.opensearch.eval.model.data; + +import java.util.UUID; + +public class QueryResultMetric extends AbstractData { + + private String datetime; + private String searchConfig; + private String querySetId; + private String query; + private String metric; + private double value; + private String application; + private String evaluationId; + private double frogsPercent; + + public QueryResultMetric(String id) { + super(id); + } + + public QueryResultMetric() { + super(UUID.randomUUID().toString()); + } + + public String getDatetime() { + return datetime; + } + + public void setDatetime(String datetime) { + this.datetime = datetime; + } + + public String getSearchConfig() { + return searchConfig; + } + + public void setSearchConfig(String searchConfig) { + this.searchConfig = searchConfig; + } + + public String getQuerySetId() { + return querySetId; + } + + public void setQuerySetId(String querySetId) { + this.querySetId = querySetId; + } + + public String getQuery() { + return query; + } + + public void setQuery(String query) { + this.query = query; + } + + public String getMetric() { + return metric; + } + + public void setMetric(String metric) { + this.metric = metric; + } + + public double getValue() { + return value; + } + + public void setValue(double value) { + this.value = value; + } + + public String getApplication() { + return application; + } + + public void setApplication(String application) { + this.application = application; + } + + public String getEvaluationId() { + return evaluationId; + } + + public void setEvaluationId(String evaluationId) { + this.evaluationId = evaluationId; + } + + public double getFrogsPercent() { + return frogsPercent; + } + + public void setFrogsPercent(double frogsPercent) { + this.frogsPercent = frogsPercent; + } + +} diff --git a/src/main/java/org/opensearch/eval/model/data/QuerySet.java b/src/main/java/org/opensearch/eval/model/data/QuerySet.java new file mode 100644 index 0000000..712005a --- /dev/null +++ b/src/main/java/org/opensearch/eval/model/data/QuerySet.java @@ -0,0 +1,71 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.model.data; + +import java.util.Collection; +import java.util.Map; +import java.util.UUID; + +public class QuerySet extends AbstractData { + + private String name; + private String description; + private String sampling; + private Collection> querySetQueries; + private String timestamp; + + public QuerySet() { + super(UUID.randomUUID().toString()); + } + + public QuerySet(String id) { + super(id); + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getSampling() { + return sampling; + } + + public void setSampling(String sampling) { + this.sampling = sampling; + } + + public Collection> getQuerySetQueries() { + return querySetQueries; + } + + public void setQuerySetQueries(Collection> querySetQueries) { + this.querySetQueries = querySetQueries; + } + + public String getTimestamp() { + return timestamp; + } + + public void setTimestamp(String timestamp) { + this.timestamp = timestamp; + } + +} diff --git a/src/main/java/org/opensearch/eval/model/data/RankAggregatedClickThrough.java b/src/main/java/org/opensearch/eval/model/data/RankAggregatedClickThrough.java new file mode 100644 index 0000000..4844aef --- /dev/null +++ b/src/main/java/org/opensearch/eval/model/data/RankAggregatedClickThrough.java @@ -0,0 +1,42 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.model.data; + +import java.util.UUID; + +public class RankAggregatedClickThrough extends AbstractData { + + private int position; + private double ctr; + + public RankAggregatedClickThrough(String id) { + super(id); + } + + public RankAggregatedClickThrough() { + super(UUID.randomUUID().toString()); + } + + public int getPosition() { + return position; + } + + public void setPosition(int position) { + this.position = position; + } + + public double getCtr() { + return ctr; + } + + public void setCtr(double ctr) { + this.ctr = ctr; + } + +} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/EventAttributes.java b/src/main/java/org/opensearch/eval/model/ubi/event/EventAttributes.java similarity index 93% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/EventAttributes.java rename to src/main/java/org/opensearch/eval/model/ubi/event/EventAttributes.java index cf09444..7d73781 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/EventAttributes.java +++ b/src/main/java/org/opensearch/eval/model/ubi/event/EventAttributes.java @@ -6,8 +6,9 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ -package org.opensearch.eval.judgments.model.ubi.event; +package org.opensearch.eval.model.ubi.event; +import com.fasterxml.jackson.annotation.JsonProperty; import com.google.gson.annotations.SerializedName; /** @@ -18,6 +19,7 @@ public class EventAttributes { @SerializedName("object") private EventObject object; + @JsonProperty("session_id") @SerializedName("session_id") private String sessionId; diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/EventObject.java b/src/main/java/org/opensearch/eval/model/ubi/event/EventObject.java similarity index 59% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/EventObject.java rename to src/main/java/org/opensearch/eval/model/ubi/event/EventObject.java index 55595ba..71cd693 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/EventObject.java +++ b/src/main/java/org/opensearch/eval/model/ubi/event/EventObject.java @@ -6,18 +6,27 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ -package org.opensearch.eval.judgments.model.ubi.event; +package org.opensearch.eval.model.ubi.event; +import com.fasterxml.jackson.annotation.JsonAnySetter; +import com.fasterxml.jackson.annotation.JsonProperty; import com.google.gson.annotations.SerializedName; +import java.util.HashMap; +import java.util.Map; + public class EventObject { + @JsonProperty("object_id_field") @SerializedName("object_id_field") private String objectIdField; + @JsonProperty("object_id") @SerializedName("object_id") private String objectId; + private final Map additionalProperties = new HashMap<>(); + @Override public String toString() { return "[" + objectIdField + ", " + objectId + "]"; @@ -55,4 +64,22 @@ public void setObjectIdField(String objectIdField) { this.objectIdField = objectIdField; } + /** + * Adds an unrecognized property to the additional properties map. + * @param key The property name. + * @param value The property value. + */ + @JsonAnySetter + public void addAdditionalProperty(String key, Object value) { + this.additionalProperties.put(key, value); + } + + /** + * Gets the additional properties. + * @return The additional properties map. + */ + public Map getAdditionalProperties() { + return additionalProperties; + } + } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/Position.java b/src/main/java/org/opensearch/eval/model/ubi/event/Position.java similarity index 90% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/Position.java rename to src/main/java/org/opensearch/eval/model/ubi/event/Position.java index e3ebaad..ec66369 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/event/Position.java +++ b/src/main/java/org/opensearch/eval/model/ubi/event/Position.java @@ -6,8 +6,9 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ -package org.opensearch.eval.judgments.model.ubi.event; +package org.opensearch.eval.model.ubi.event; +import com.fasterxml.jackson.annotation.JsonProperty; import com.google.gson.annotations.SerializedName; /** diff --git a/src/main/java/org/opensearch/eval/model/ubi/event/UbiEvent.java b/src/main/java/org/opensearch/eval/model/ubi/event/UbiEvent.java new file mode 100644 index 0000000..34306af --- /dev/null +++ b/src/main/java/org/opensearch/eval/model/ubi/event/UbiEvent.java @@ -0,0 +1,167 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.model.ubi.event; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.gson.annotations.SerializedName; + +/** + * Creates a representation of a UBI event. + */ +public class UbiEvent { + + @JsonProperty("user_id") + @SerializedName("user_id") + private String userId; + + @JsonProperty("action_name") + @SerializedName("action_name") + private String actionName; + + @JsonProperty("client_id") + @SerializedName("client_id") + private String clientId; + + @JsonProperty("query_id") + @SerializedName("query_id") + private String queryId; + + @JsonProperty("session_id") + @SerializedName("session_id") + private String sessionId; + + @SerializedName("application") + private String application; + + @JsonProperty("event_attributes") + @SerializedName("event_attributes") + private EventAttributes eventAttributes; + + @JsonProperty("user_query") + @SerializedName("user_query") + private String userQuery; + + @JsonProperty("message_type") + @SerializedName("message_type") + private String messageType; + + @JsonProperty("message") + @SerializedName("message") + private String message; + + private String timestamp; + + /** + * Creates a new representation of an UBI event. + */ + public UbiEvent() { + + } + + @Override + public String toString() { + return actionName + ", " + clientId + ", " + queryId + ", " + eventAttributes.getObject().toString() + ", " + eventAttributes.getPosition().getOrdinal(); + } + + /** + * Gets the name of the action. + * @return The name of the action. + */ + public String getActionName() { + return actionName; + } + + /** + * Gets the client ID. + * @return The client ID. + */ + public String getClientId() { + return clientId; + } + + /** + * Gets the query ID. + * @return The query ID. + */ + public String getQueryId() { + return queryId; + } + + /** + * Gets the event attributes. + * @return The {@link EventAttributes}. + */ + public EventAttributes getEventAttributes() { + return eventAttributes; + } + + /** + * Sets the event attributes. + * @param eventAttributes The {@link EventAttributes}. + */ + public void setEventAttributes(EventAttributes eventAttributes) { + this.eventAttributes = eventAttributes; + } + + public String getApplication() { + return application; + } + + public void setApplication(String application) { + this.application = application; + } + + public String getSessionId() { + return sessionId; + } + + public void setSessionId(String sessionId) { + this.sessionId = sessionId; + } + + public String getTimestamp() { + return timestamp; + } + + public void setTimestamp(String timestamp) { + this.timestamp = timestamp; + } + + public String getUserQuery() { + return userQuery; + } + + public void setUserQuery(String userQuery) { + this.userQuery = userQuery; + } + + public String getMessage() { + return message; + } + + public void setMessage(String message) { + this.message = message; + } + + public String getMessageType() { + return messageType; + } + + public void setMessageType(String messageType) { + this.messageType = messageType; + } + + public String getUserId() { + return userId; + } + + public void setUserId(String userId) { + this.userId = userId; + } +} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/query/UbiQuery.java b/src/main/java/org/opensearch/eval/model/ubi/query/UbiQuery.java similarity index 71% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/query/UbiQuery.java rename to src/main/java/org/opensearch/eval/model/ubi/query/UbiQuery.java index 0b7ca0b..30b68be 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/judgments/model/ubi/query/UbiQuery.java +++ b/src/main/java/org/opensearch/eval/model/ubi/query/UbiQuery.java @@ -6,10 +6,12 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ -package org.opensearch.eval.judgments.model.ubi.query; +package org.opensearch.eval.model.ubi.query; +import com.fasterxml.jackson.annotation.JsonProperty; import com.google.gson.annotations.SerializedName; +import java.util.List; import java.util.Map; /** @@ -20,23 +22,35 @@ public class UbiQuery { @SerializedName("timestamp") private String timestamp; + @JsonProperty("query_id") @SerializedName("query_id") private String queryId; + @JsonProperty("client_id") @SerializedName("client_id") private String clientId; + @JsonProperty("user_query") @SerializedName("user_query") private String userQuery; @SerializedName("query") private String query; + @SerializedName("application") + private String application; + + @JsonProperty("query_attributes") @SerializedName("query_attributes") private Map queryAttributes; - @SerializedName("query_response") - private QueryResponse queryResponse; + @JsonProperty("query_response_id") + @SerializedName("query_response_id") + private String queryResponseId; + + @JsonProperty("query_response_hit_ids") + @SerializedName("query_response_hit_ids") + private List queryResponseHitIds; /** * Creates a new UBI query object. @@ -141,20 +155,28 @@ public void setQueryAttributes(Map queryAttributes) { this.queryAttributes = queryAttributes; } - /** - * Gets the query responses. - * @return The query responses. - */ - public QueryResponse getQueryResponse() { - return queryResponse; + public String getApplication() { + return application; } - /** - * Sets the query responses. - * @param queryResponse The query responses. - */ - public void setQueryResponse(QueryResponse queryResponse) { - this.queryResponse = queryResponse; + public void setApplication(String application) { + this.application = application; + } + + public List getQueryResponseHitIds() { + return queryResponseHitIds; + } + + public void setQueryResponseHitIds(List queryResponseHitIds) { + this.queryResponseHitIds = queryResponseHitIds; + } + + public String getQueryResponseId() { + return queryResponseId; + } + + public void setQueryResponseId(String queryResponseId) { + this.queryResponseId = queryResponseId; } } diff --git a/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java b/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java new file mode 100644 index 0000000..ad5f5dd --- /dev/null +++ b/src/main/java/org/opensearch/eval/runners/AbstractQuerySetRunner.java @@ -0,0 +1,96 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.runners; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.eval.engine.SearchEngine; + +import java.util.ArrayList; +import java.util.List; + +/** + * Base class for query set runners. Classes that extend this class + * should be specific to a search engine. See the {@link OpenSearchQuerySetRunner} for an example. + */ +public abstract class AbstractQuerySetRunner { + + private static final Logger LOGGER = LogManager.getLogger(AbstractQuerySetRunner.class); + + protected final SearchEngine searchEngine; + + public AbstractQuerySetRunner(final SearchEngine searchEngine) { + this.searchEngine = searchEngine; + } + + /** + * Runs the query set. + * @param querySetParameters A {@link RunQuerySetParameters parameters}. + * @return The query set {@link QuerySetRunResult results} and calculated metrics. + */ + abstract QuerySetRunResult run(RunQuerySetParameters querySetParameters) throws Exception; + + /** + * Saves the query set results to a persistent store, which may be the search engine itself. + * @param result The {@link QuerySetRunResult results}. + */ + abstract void save(QuerySetRunResult result) throws Exception; + + /** + * Gets the judgments for a query / document pairs. + * @param judgmentsId The judgments collection for which the judgment to retrieve belongs. + * @param query The user query. + * @param orderedDocumentIds A list of document IDs returned for the user query. + * @param k The k used for metrics calculation, i.e. DCG@k. + * @return An ordered list of relevance scores for the query / document pairs. + * @throws Exception Thrown if a judgment cannot be retrieved. + */ + protected RelevanceScores getRelevanceScores(final String judgmentsId, final String query, final List orderedDocumentIds, final int k) throws Exception { + + // Ordered list of scores. + final List scores = new ArrayList<>(); + + // Count the number of documents without judgments. + int documentsWithoutJudgmentsCount = 0; + + // For each document (up to k), get the judgment for the document. + for (int i = 0; i < k && i < orderedDocumentIds.size(); i++) { + + final String documentId = orderedDocumentIds.get(i); + + // Find the judgment value for this combination of query and documentId from the index. + final Double judgmentValue = searchEngine.getJudgmentValue(judgmentsId, query, documentId); + + // If a judgment for this query/doc pair is not found, Double.NaN will be returned. + if(!Double.isNaN(judgmentValue)) { + LOGGER.info("Score found for document ID {} with judgments {} and query {} = {}", documentId, judgmentsId, query, judgmentValue); + scores.add(judgmentValue); + } else { + //LOGGER.info("No score found for document ID {} with judgments {} and query {}", documentId, judgmentsId, query); + documentsWithoutJudgmentsCount++; + } + + } + + double frogs = ((double) documentsWithoutJudgmentsCount) / orderedDocumentIds.size(); + + if(Double.isNaN(frogs)) { + frogs = 1.0; + } + + // Multiply by 100 to be a percentage. + frogs *= 100; + + LOGGER.info("frogs for query {} = {} ------- {} / {}", query, frogs, documentsWithoutJudgmentsCount, orderedDocumentIds.size()); + + return new RelevanceScores(scores, frogs); + + } + +} diff --git a/src/main/java/org/opensearch/eval/runners/OpenSearchQuerySetRunner.java b/src/main/java/org/opensearch/eval/runners/OpenSearchQuerySetRunner.java new file mode 100644 index 0000000..3593544 --- /dev/null +++ b/src/main/java/org/opensearch/eval/runners/OpenSearchQuerySetRunner.java @@ -0,0 +1,194 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.runners; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.eval.engine.SearchEngine; +import org.opensearch.eval.metrics.DcgSearchMetric; +import org.opensearch.eval.metrics.NdcgSearchMetric; +import org.opensearch.eval.metrics.PrecisionSearchMetric; +import org.opensearch.eval.metrics.SearchMetric; +import org.opensearch.eval.model.data.QueryResultMetric; +import org.opensearch.eval.model.data.QuerySet; +import org.opensearch.eval.utils.TimeUtils; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import static org.opensearch.eval.Constants.DASHBOARD_METRICS_INDEX_NAME; + +/** + * A {@link AbstractQuerySetRunner} for Amazon OpenSearch. + */ +public class OpenSearchQuerySetRunner extends AbstractQuerySetRunner { + + private static final Logger LOGGER = LogManager.getLogger(OpenSearchQuerySetRunner.class); + + public static final String QUERY_PLACEHOLDER = "#$query##"; + + /** + * Creates a new query set runner + * + * @param searchEngine An OpenSearch engine {@link SearchEngine}. + */ + public OpenSearchQuerySetRunner(final SearchEngine searchEngine) { + super(searchEngine); + } + + @Override + public QuerySetRunResult run(final RunQuerySetParameters querySetParameters) throws Exception { + + final QuerySet querySet = searchEngine.getQuerySet(querySetParameters.getQuerySetId()); + LOGGER.info("Found {} queries in query set {}", querySet.getQuerySetQueries().size(), querySetParameters.getQuerySetId()); + + try { + + // The results of each query. + final List queryResults = new ArrayList<>(); + + for (Map queryMap : querySet.getQuerySetQueries()) { + + // Loop over each query in the map and run each one. + for (final String userQuery : queryMap.keySet()) { + + // This is to keep OpenSearch from rejecting queries. + // TODO: Look at using the Workload Management in 2.18.0. + Thread.sleep(50); + + final List orderedDocumentIds = searchEngine.runQuery( + querySetParameters.getIndex(), + querySetParameters.getQuery(), + querySetParameters.getK(), + userQuery, + querySetParameters.getIdField(), + querySetParameters.getSearchPipeline()); + + try { + + final int k = querySetParameters.getK(); + final RelevanceScores relevanceScores = getRelevanceScores(querySetParameters.getJudgmentsId(), userQuery, orderedDocumentIds, k); + + // Calculate the metrics for this query. + final SearchMetric dcgSearchMetric = new DcgSearchMetric(k, relevanceScores.getRelevanceScores()); + final SearchMetric ndcgSearchmetric = new NdcgSearchMetric(k, relevanceScores.getRelevanceScores()); + final SearchMetric precisionSearchMetric = new PrecisionSearchMetric(k, querySetParameters.getThreshold(), relevanceScores.getRelevanceScores()); + + final Collection searchMetrics = List.of(dcgSearchMetric, ndcgSearchmetric, precisionSearchMetric); + + queryResults.add(new QueryResult(userQuery, orderedDocumentIds, k, searchMetrics, relevanceScores.getFrogs())); + + } catch (Exception ex) { + LOGGER.error("Unable to get relevance scores for judgments {} and user query {}.", querySetParameters.getJudgmentsId(), userQuery, ex); + } + + } + + } + + // Calculate the search metrics for the entire query set given the individual query set metrics. + // Sum up the metrics for each query per metric type. + final int querySetSize = queryResults.size(); + final Map sumOfMetrics = new HashMap<>(); + for (final QueryResult queryResult : queryResults) { + for (final SearchMetric searchMetric : queryResult.getSearchMetrics()) { + //LOGGER.info("Summing: {} - {}", searchMetric.getName(), searchMetric.getValue()); + sumOfMetrics.merge(searchMetric.getName(), searchMetric.getValue(), Double::sum); + } + } + + // Now divide by the number of queries. + final Map querySetMetrics = new HashMap<>(); + for (final String metric : sumOfMetrics.keySet()) { + //LOGGER.info("Dividing by the query set size: {} / {}", sumOfMetrics.get(metric), querySetSize); + querySetMetrics.put(metric, sumOfMetrics.get(metric) / querySetSize); + } + + final String querySetRunId = UUID.randomUUID().toString(); + final QuerySetRunResult querySetRunResult = new QuerySetRunResult(querySetRunId, querySetParameters.getQuerySetId(), queryResults, querySetMetrics); + + save(querySetRunResult); + + LOGGER.info("Query set run complete: {}", querySetRunId); + + return querySetRunResult; + + } catch (Exception ex) { + throw new RuntimeException("Unable to run query set. If using a search_pipeline make sure the pipeline exists.", ex); + } + + } + + @Override + public void save(final QuerySetRunResult result) throws Exception { + + LOGGER.info("Indexing query run results."); + + // Now, index the metrics as expected by the dashboards. + + // See https://github.com/o19s/opensearch-search-quality-evaluation/blob/main/opensearch-dashboard-prototyping/METRICS_SCHEMA.md + // See https://github.com/o19s/opensearch-search-quality-evaluation/blob/main/opensearch-dashboard-prototyping/sample_data.ndjson + + final boolean dashboardMetricsIndexExists = searchEngine.doesIndexExist(DASHBOARD_METRICS_INDEX_NAME); + + if (!dashboardMetricsIndexExists) { + + // Create the index. + // TODO: Read this mapping from a resource file instead. + final String mapping = "{\n" + + " \"properties\": {\n" + + " \"datetime\": { \"type\": \"date\", \"format\": \"strict_date_time\" },\n" + + " \"search_config\": { \"type\": \"keyword\" },\n" + + " \"query_set_id\": { \"type\": \"keyword\" },\n" + + " \"query\": { \"type\": \"keyword\" },\n" + + " \"metric\": { \"type\": \"keyword\" },\n" + + " \"value\": { \"type\": \"double\" },\n" + + " \"application\": { \"type\": \"keyword\" },\n" + + " \"evaluation_id\": { \"type\": \"keyword\" },\n" + + " \"frogs_percent\": { \"type\": \"double\" }\n" + + " }\n" + + " }"; + + // TODO: Make sure the index gets created successfully. + searchEngine.createIndex(DASHBOARD_METRICS_INDEX_NAME, mapping); + + } + + final String timestamp = TimeUtils.getTimestamp(); + + for(final QueryResult queryResult : result.getQueryResults()) { + + for(final SearchMetric searchMetric : queryResult.getSearchMetrics()) { + + final QueryResultMetric queryResultMetric = new QueryResultMetric(); + queryResultMetric.setDatetime(timestamp); + queryResultMetric.setSearchConfig("research_1"); + queryResultMetric.setQuerySetId(result.getQuerySetId()); + queryResultMetric.setQuery(queryResult.getQuery()); + queryResultMetric.setMetric(searchMetric.getName()); + queryResultMetric.setValue(searchMetric.getValue()); + queryResultMetric.setApplication("sample_data"); + queryResultMetric.setEvaluationId(result.getRunId()); + queryResultMetric.setFrogsPercent(queryResult.getFrogs()); + + searchEngine.indexQueryResultMetric(queryResultMetric); + + } + + } + + + + } + +} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/QueryResult.java b/src/main/java/org/opensearch/eval/runners/QueryResult.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/QueryResult.java rename to src/main/java/org/opensearch/eval/runners/QueryResult.java diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/QuerySetRunResult.java b/src/main/java/org/opensearch/eval/runners/QuerySetRunResult.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/QuerySetRunResult.java rename to src/main/java/org/opensearch/eval/runners/QuerySetRunResult.java diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/RelevanceScores.java b/src/main/java/org/opensearch/eval/runners/RelevanceScores.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/runners/RelevanceScores.java rename to src/main/java/org/opensearch/eval/runners/RelevanceScores.java diff --git a/src/main/java/org/opensearch/eval/runners/RunQuerySetParameters.java b/src/main/java/org/opensearch/eval/runners/RunQuerySetParameters.java new file mode 100644 index 0000000..bf8d523 --- /dev/null +++ b/src/main/java/org/opensearch/eval/runners/RunQuerySetParameters.java @@ -0,0 +1,106 @@ +package org.opensearch.eval.runners; + +import com.google.gson.annotations.SerializedName; + +public class RunQuerySetParameters { + + @SerializedName("query_set_id") + private String querySetId; + + @SerializedName("judgments_id") + private String judgmentsId; + + @SerializedName("index") + private String index; + + @SerializedName("search_pipeline") + private String searchPipeline; + + @SerializedName("id_field") + private String idField; + + @SerializedName("query") + private String query; + + @SerializedName("k") + private int k; + + @SerializedName("threshold") + private double threshold; + +// * @param querySetId The ID of the query set to run. +// * @param judgmentsId The ID of the judgments set to use for search metric calculation. +// * @param index The name of the index to run the query sets against. +// * @param searchPipeline The name of the search pipeline to use, or null to not use a search pipeline. +// * @param idField The field in the index that is used to uniquely identify a document. +// * @param query The query that will be used to run the query set. +// * @param k The k used for metrics calculation, i.e. DCG@k. +// * @param threshold The cutoff for binary judgments. A judgment score greater than or equal +// * to this value will be assigned a binary judgment value of 1. A judgment score +// * less than this value will be assigned a binary judgment value of 0. + + public String getQuerySetId() { + return querySetId; + } + + public void setQuerySetId(String querySetId) { + this.querySetId = querySetId; + } + + public String getJudgmentsId() { + return judgmentsId; + } + + public void setJudgmentsId(String judgmentsId) { + this.judgmentsId = judgmentsId; + } + + public String getIndex() { + return index; + } + + public void setIndex(String index) { + this.index = index; + } + + public String getSearchPipeline() { + return searchPipeline; + } + + public void setSearchPipeline(String searchPipeline) { + this.searchPipeline = searchPipeline; + } + + public String getIdField() { + return idField; + } + + public void setIdField(String idField) { + this.idField = idField; + } + + public String getQuery() { + return query; + } + + public void setQuery(String query) { + this.query = query; + } + + public int getK() { + return k; + } + + public void setK(int k) { + this.k = k; + } + + public double getThreshold() { + return threshold; + } + + public void setThreshold(double threshold) { + this.threshold = threshold; + } + +} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractQuerySampler.java b/src/main/java/org/opensearch/eval/samplers/AbstractQuerySampler.java similarity index 51% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractQuerySampler.java rename to src/main/java/org/opensearch/eval/samplers/AbstractQuerySampler.java index 3c70f0a..e43b37b 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractQuerySampler.java +++ b/src/main/java/org/opensearch/eval/samplers/AbstractQuerySampler.java @@ -10,19 +10,14 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.action.index.IndexRequest; -import org.opensearch.action.index.IndexResponse; -import org.opensearch.action.support.WriteRequest; -import org.opensearch.client.node.NodeClient; -import org.opensearch.core.action.ActionListener; -import org.opensearch.eval.SearchQualityEvaluationPlugin; +import org.opensearch.eval.engine.SearchEngine; +import org.opensearch.eval.model.data.QuerySet; import org.opensearch.eval.utils.TimeUtils; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Map; -import java.util.UUID; /** * An interface for sampling UBI queries. @@ -46,7 +41,7 @@ public abstract class AbstractQuerySampler { /** * Index the query set. */ - protected String indexQuerySet(final NodeClient client, final String name, final String description, final String sampling, Map queries) throws Exception { + protected String indexQuerySet(final SearchEngine searchEngine, final String name, final String description, final String sampling, Map queries) throws Exception { LOGGER.info("Indexing {} queries for query set {}", queries.size(), name); @@ -63,33 +58,34 @@ protected String indexQuerySet(final NodeClient client, final String name, final } - final Map querySet = new HashMap<>(); - querySet.put("name", name); - querySet.put("description", description); - querySet.put("sampling", sampling); - querySet.put("queries", querySetQueries); - querySet.put("timestamp", TimeUtils.getTimestamp()); + final QuerySet querySet = new QuerySet(); + querySet.setName(name); + querySet.setDescription(description); + querySet.setSampling(sampling); + querySet.setQuerySetQueries(querySetQueries); + querySet.setTimestamp(TimeUtils.getTimestamp()); - final String querySetId = UUID.randomUUID().toString(); + final String querySetId = searchEngine.indexQuerySet(querySet); // TODO: Create a mapping for the query set index. - final IndexRequest indexRequest = new IndexRequest().index(SearchQualityEvaluationPlugin.QUERY_SETS_INDEX_NAME) - .id(querySetId) - .source(querySet) - .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); - - client.index(indexRequest, new ActionListener<>() { - - @Override - public void onResponse(IndexResponse indexResponse) { - LOGGER.info("Indexed query set {} having name {}", querySetId, name); - } - - @Override - public void onFailure(Exception ex) { - LOGGER.error("Unable to index query set {}", querySetId, ex); - } - }); + +// final IndexRequest indexRequest = new IndexRequest().index(Constants.QUERY_SETS_INDEX_NAME) +// .id(querySetId) +// .source(querySet) +// .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); +// +// client.index(indexRequest, new ActionListener<>() { +// +// @Override +// public void onResponse(IndexResponse indexResponse) { +// LOGGER.info("Indexed query set {} having name {}", querySetId, name); +// } +// +// @Override +// public void onFailure(Exception ex) { +// LOGGER.error("Unable to index query set {}", querySetId, ex); +// } +// }); return querySetId; diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractSamplerParameters.java b/src/main/java/org/opensearch/eval/samplers/AbstractSamplerParameters.java similarity index 75% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractSamplerParameters.java rename to src/main/java/org/opensearch/eval/samplers/AbstractSamplerParameters.java index c8d731a..4388fcc 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AbstractSamplerParameters.java +++ b/src/main/java/org/opensearch/eval/samplers/AbstractSamplerParameters.java @@ -10,12 +10,14 @@ public class AbstractSamplerParameters { + private final String sampler; private final String name; private final String description; private final String sampling; private final int querySetSize; - public AbstractSamplerParameters(final String name, final String description, final String sampling, final int querySetSize) { + public AbstractSamplerParameters(final String sampler, final String name, final String description, final String sampling, final int querySetSize) { + this.sampler = sampler; this.name = name; this.description = description; this.sampling = sampling; @@ -38,4 +40,8 @@ public int getQuerySetSize() { return querySetSize; } + public String getSampler() { + return sampler; + } + } diff --git a/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySampler.java b/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySampler.java new file mode 100644 index 0000000..54a78b5 --- /dev/null +++ b/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySampler.java @@ -0,0 +1,64 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.eval.samplers; + +import org.opensearch.eval.engine.SearchEngine; +import org.opensearch.eval.model.ubi.query.UbiQuery; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/** + * An implementation of {@link AbstractQuerySampler} that uses all UBI queries without any sampling. + */ +public class AllQueriesQuerySampler extends AbstractQuerySampler { + + public static final String NAME = "all"; + + private final SearchEngine searchEngine; + private final AllQueriesQuerySamplerParameters parameters; + + /** + * Creates a new sampler. + * @param searchEngine The OpenSearch {@link SearchEngine engine}. + */ + public AllQueriesQuerySampler(final SearchEngine searchEngine, final AllQueriesQuerySamplerParameters parameters) { + this.searchEngine = searchEngine; + this.parameters = parameters; + } + + @Override + public String getName() { + return NAME; + } + + @Override + public String sample() throws Exception { + + final Collection ubiQueries = searchEngine.getUbiQueries(); + + final Map queries = new HashMap<>(); + + for(final UbiQuery ubiQuery : ubiQueries) { + + queries.merge(ubiQuery.getUserQuery(), 1L, Long::sum); + + // Will be useful for paging once implemented. + if(queries.size() >= parameters.getQuerySetSize()) { + break; + } + + } + + return indexQuerySet(searchEngine, parameters.getName(), parameters.getDescription(), parameters.getSampling(), queries); + + } + +} diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySamplerParameters.java b/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySamplerParameters.java similarity index 88% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySamplerParameters.java rename to src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySamplerParameters.java index 3149668..dc317e2 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySamplerParameters.java +++ b/src/main/java/org/opensearch/eval/samplers/AllQueriesQuerySamplerParameters.java @@ -11,7 +11,7 @@ public class AllQueriesQuerySamplerParameters extends AbstractSamplerParameters { public AllQueriesQuerySamplerParameters(final String name, final String description, final String sampling, final int querySetSize) { - super(name, description, sampling, querySetSize); + super("all", name, description, sampling, querySetSize); } } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeParameters.java b/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeParameters.java similarity index 88% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeParameters.java rename to src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeParameters.java index d5e4311..242941c 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeParameters.java +++ b/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeParameters.java @@ -11,7 +11,7 @@ public class ProbabilityProportionalToSizeParameters extends AbstractSamplerParameters { public ProbabilityProportionalToSizeParameters(final String name, final String description, final String sampling, final int querySetSize) { - super(name, description, sampling, querySetSize); + super("pptss", name, description, sampling, querySetSize); } } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeAbstractQuerySampler.java b/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeQuerySampler.java similarity index 61% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeAbstractQuerySampler.java rename to src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeQuerySampler.java index 79f2c7c..0546cd8 100644 --- a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeAbstractQuerySampler.java +++ b/src/main/java/org/opensearch/eval/samplers/ProbabilityProportionalToSizeQuerySampler.java @@ -10,21 +10,13 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.action.search.SearchRequest; -import org.opensearch.action.search.SearchResponse; -import org.opensearch.action.search.SearchScrollRequest; -import org.opensearch.client.node.NodeClient; -import org.opensearch.common.unit.TimeValue; -import org.opensearch.eval.SearchQualityEvaluationPlugin; -import org.opensearch.index.query.QueryBuilders; -import org.opensearch.search.Scroll; -import org.opensearch.search.SearchHit; -import org.opensearch.search.builder.SearchSourceBuilder; - -import java.util.ArrayList; +import org.opensearch.eval.engine.SearchEngine; +import org.opensearch.eval.model.ubi.query.UbiQuery; + import java.util.Collection; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; @@ -33,22 +25,22 @@ * See https://opensourceconnections.com/blog/2022/10/13/how-to-succeed-with-explicit-relevance-evaluation-using-probability-proportional-to-size-sampling/ * for more information on PPTSS. */ -public class ProbabilityProportionalToSizeAbstractQuerySampler extends AbstractQuerySampler { +public class ProbabilityProportionalToSizeQuerySampler extends AbstractQuerySampler { public static final String NAME = "pptss"; - private static final Logger LOGGER = LogManager.getLogger(ProbabilityProportionalToSizeAbstractQuerySampler.class); + private static final Logger LOGGER = LogManager.getLogger(ProbabilityProportionalToSizeQuerySampler.class); - private final NodeClient client; + private final SearchEngine searchEngine; private final ProbabilityProportionalToSizeParameters parameters; /** * Creates a new PPTSS sampler. - * @param client The OpenSearch {@link NodeClient client}. + * @param searchEngine The OpenSearch {@link SearchEngine engine}. * @param parameters The {@link ProbabilityProportionalToSizeParameters parameters} for the sampling. */ - public ProbabilityProportionalToSizeAbstractQuerySampler(final NodeClient client, final ProbabilityProportionalToSizeParameters parameters) { - this.client = client; + public ProbabilityProportionalToSizeQuerySampler(final SearchEngine searchEngine, final ProbabilityProportionalToSizeParameters parameters) { + this.searchEngine = searchEngine; this.parameters = parameters; } @@ -60,45 +52,11 @@ public String getName() { @Override public String sample() throws Exception { - // TODO: Can this be changed to an aggregation? - // An aggregation is limited (?) to 10,000 which could miss some queries. - - // Get queries from the UBI queries index. - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - searchSourceBuilder.query(QueryBuilders.matchAllQuery()); - searchSourceBuilder.size(10000); - final Scroll scroll = new Scroll(TimeValue.timeValueMinutes(10L)); - - final SearchRequest searchRequest = new SearchRequest(SearchQualityEvaluationPlugin.UBI_QUERIES_INDEX_NAME); - searchRequest.scroll(scroll); - searchRequest.source(searchSourceBuilder); - - // TODO: Don't use .get() - SearchResponse searchResponse = client.search(searchRequest).get(); - - String scrollId = searchResponse.getScrollId(); - SearchHit[] searchHits = searchResponse.getHits().getHits(); - - final Collection userQueries = new ArrayList<>(); - - while (searchHits != null && searchHits.length > 0) { + final Collection ubiQueries = searchEngine.getUbiQueries(); - for(final SearchHit hit : searchHits) { - final Map fields = hit.getSourceAsMap(); - userQueries.add(fields.get("user_query").toString()); - // LOGGER.info("user queries count: {} user query: {}", userQueries.size(), fields.get("user_query").toString()); - } - - final SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId); - scrollRequest.scroll(scroll); - - // TODO: Don't use .get() - searchResponse = client.searchScroll(scrollRequest).get(); - - scrollId = searchResponse.getScrollId(); - searchHits = searchResponse.getHits().getHits(); - - } + final List userQueries = ubiQueries.stream() + .map(UbiQuery::getUserQuery) + .toList(); // LOGGER.info("User queries found: {}", userQueries); @@ -165,7 +123,7 @@ public String sample() throws Exception { } - return indexQuerySet(client, parameters.getName(), parameters.getDescription(), parameters.getSampling(), querySet); + return indexQuerySet(searchEngine, parameters.getName(), parameters.getDescription(), parameters.getSampling(), querySet); } diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/utils/MathUtils.java b/src/main/java/org/opensearch/eval/utils/MathUtils.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/utils/MathUtils.java rename to src/main/java/org/opensearch/eval/utils/MathUtils.java diff --git a/opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/utils/TimeUtils.java b/src/main/java/org/opensearch/eval/utils/TimeUtils.java similarity index 100% rename from opensearch-search-quality-evaluation-plugin/src/main/java/org/opensearch/eval/utils/TimeUtils.java rename to src/main/java/org/opensearch/eval/utils/TimeUtils.java diff --git a/src/main/resources/log4j2.xml b/src/main/resources/log4j2.xml new file mode 100644 index 0000000..a52723f --- /dev/null +++ b/src/main/resources/log4j2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/opensearch-search-quality-evaluation-plugin/src/test/java/org/opensearch/eval/metrics/DcgSearchMetricTest.java b/src/test/java/org/opensearch/eval/metrics/DcgSearchMetricTest.java similarity index 87% rename from opensearch-search-quality-evaluation-plugin/src/test/java/org/opensearch/eval/metrics/DcgSearchMetricTest.java rename to src/test/java/org/opensearch/eval/metrics/DcgSearchMetricTest.java index f3755f3..13da235 100644 --- a/opensearch-search-quality-evaluation-plugin/src/test/java/org/opensearch/eval/metrics/DcgSearchMetricTest.java +++ b/src/test/java/org/opensearch/eval/metrics/DcgSearchMetricTest.java @@ -8,12 +8,15 @@ */ package org.opensearch.eval.metrics; -import org.opensearch.test.OpenSearchTestCase; +import org.junit.jupiter.api.Test; import java.util.List; -public class DcgSearchMetricTest extends OpenSearchTestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +public class DcgSearchMetricTest { + + @Test public void testCalculate() { final int k = 10; @@ -26,6 +29,7 @@ public void testCalculate() { } + @Test public void testCalculateAllZeros() { final int k = 10; diff --git a/opensearch-search-quality-evaluation-plugin/src/test/java/org/opensearch/eval/metrics/NdcgSearchMetricTest.java b/src/test/java/org/opensearch/eval/metrics/NdcgSearchMetricTest.java similarity index 87% rename from opensearch-search-quality-evaluation-plugin/src/test/java/org/opensearch/eval/metrics/NdcgSearchMetricTest.java rename to src/test/java/org/opensearch/eval/metrics/NdcgSearchMetricTest.java index 08795f8..cd7c037 100644 --- a/opensearch-search-quality-evaluation-plugin/src/test/java/org/opensearch/eval/metrics/NdcgSearchMetricTest.java +++ b/src/test/java/org/opensearch/eval/metrics/NdcgSearchMetricTest.java @@ -8,12 +8,15 @@ */ package org.opensearch.eval.metrics; -import org.opensearch.test.OpenSearchTestCase; +import org.junit.jupiter.api.Test; import java.util.List; -public class NdcgSearchMetricTest extends OpenSearchTestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +public class NdcgSearchMetricTest { + + @Test public void testCalculate() { final int k = 10; @@ -26,6 +29,7 @@ public void testCalculate() { } + @Test public void testCalculateAllZeros() { final int k = 10; diff --git a/opensearch-search-quality-evaluation-plugin/src/test/java/org/opensearch/eval/metrics/PrecisionSearchMetricTest.java b/src/test/java/org/opensearch/eval/metrics/PrecisionSearchMetricTest.java similarity index 83% rename from opensearch-search-quality-evaluation-plugin/src/test/java/org/opensearch/eval/metrics/PrecisionSearchMetricTest.java rename to src/test/java/org/opensearch/eval/metrics/PrecisionSearchMetricTest.java index b6c260f..ed3e5f3 100644 --- a/opensearch-search-quality-evaluation-plugin/src/test/java/org/opensearch/eval/metrics/PrecisionSearchMetricTest.java +++ b/src/test/java/org/opensearch/eval/metrics/PrecisionSearchMetricTest.java @@ -8,12 +8,15 @@ */ package org.opensearch.eval.metrics; -import org.opensearch.test.OpenSearchTestCase; +import org.junit.jupiter.api.Test; import java.util.List; -public class PrecisionSearchMetricTest extends OpenSearchTestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +public class PrecisionSearchMetricTest { + + @Test public void testCalculate() { final int k = 10;