From 4e6d9a83c3d4df486d58cdab2049703f437f9574 Mon Sep 17 00:00:00 2001 From: Sean Aery Date: Mon, 18 Dec 2023 16:08:50 -0500 Subject: [PATCH] Boost relevance scoring when search terms appear in close proximity. Followup to #532 --- solr/conf/solrconfig.xml | 53 +++++++++++++---------- spec/features/search_query_spec.rb | 18 ++++++++ spec/fixtures/ead/nlm/alphaomegaalpha.xml | 10 +++++ 3 files changed, 57 insertions(+), 24 deletions(-) diff --git a/solr/conf/solrconfig.xml b/solr/conf/solrconfig.xml index 80f157d60..098602f73 100644 --- a/solr/conf/solrconfig.xml +++ b/solr/conf/solrconfig.xml @@ -93,6 +93,11 @@ http://wiki.apache.org/solr/LocalParams --> + collection_title_tesim^150 title_tesim^100 @@ -110,27 +115,27 @@ text - collection_title_tesim^150 - title_tesim^100 - normalized_title_teim^100 - unitid_identifier_match^40 - name_teim^10 - place_teim^10 - subject_teim^2 - id - ead_ssi - ref_ssm - unitid_ssm - container_teim - parent_unittitles_tesim - text + collection_title_tesim^300 + title_tesim^200 + normalized_title_teim^200 + unitid_identifier_match^80 + name_teim^20 + place_teim^20 + subject_teim^5 + id^2 + ead_ssi^2 + ref_ssm^2 + unitid_ssm^2 + container_teim^2 + parent_unittitles_tesim^2 + text^2 container_teim - container_teim + container_teim^2 id @@ -139,36 +144,36 @@ unitid_ssm - id - ead_ssi - ref_ssm - unitid_ssm + id^2 + ead_ssi^2 + ref_ssm^2 + unitid_ssm^2 name_teim - name_teim + name_teim^2 place_teim - place_teim + place_teim^2 subject_teim - subject_teim + subject_teim^2 title_tesim normalized_title_teim - title_tesim - normalized_title_teim + title_tesim^2 + normalized_title_teim^2 3 diff --git a/spec/features/search_query_spec.rb b/spec/features/search_query_spec.rb index 3dcb23b42..f9139eec9 100644 --- a/spec/features/search_query_spec.rb +++ b/spec/features/search_query_spec.rb @@ -69,4 +69,22 @@ end end end + + context 'when two terms match two docs but proximity differs (pf test)' do + it 'counts the doc where the terms are in close proximity as more relevant' do + visit search_catalog_path q: 'splendiferous escapades', search_field: 'all_fields' + within('.document-position-1') do + expect(page).to have_css '.al-document-abstract-or-scope', + text: /This will test the splendiferous escapades phrase/ + end + end + + it 'counts the doc where the terms are are far apart as less relevant' do + visit search_catalog_path q: 'splendiferous escapades', search_field: 'all_fields' + within('.document-position-2') do + expect(page).to have_css '.al-document-abstract-or-scope', + text: /This splendiferous test will help/ + end + end + end end diff --git a/spec/fixtures/ead/nlm/alphaomegaalpha.xml b/spec/fixtures/ead/nlm/alphaomegaalpha.xml index e1a71fb50..ff61c68d3 100644 --- a/spec/fixtures/ead/nlm/alphaomegaalpha.xml +++ b/spec/fixtures/ead/nlm/alphaomegaalpha.xml @@ -680,6 +680,11 @@ 15 + + Scope and Contents Phrase Query (pf) test 1 +

This will test the splendiferous escapades phrase to help ensure that relevance + is impacted by the proximity of search terms.

+
@@ -688,6 +693,11 @@ 1 + + Scope and Contents Phrase Query (pf) test 2 +

This splendiferous test will help to ensure that the relevance + is impacted by the proximity of search term phrase escapades.

+