Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add "latest" and "related" search. #2055

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 2 additions & 25 deletions assets/js/autocomplete/suggestions.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { getSidebarNodes } from '../globals'
import { escapeRegexModifiers, escapeHtmlEntities, isBlank } from '../helpers'
import { isBlank } from '../helpers'
import { highlightMatches } from '../highlighter'

/**
* @typedef Suggestion
Expand Down Expand Up @@ -285,27 +286,3 @@ function startsWith (text, subtext) {
function tokenize (query) {
return query.trim().split(/\s+/)
}

/**
* Returns an HTML string highlighting the individual tokens from the query string.
*/
function highlightMatches (text, query) {
// Sort terms length, so that the longest are highlighted first.
const terms = tokenize(query).sort((term1, term2) => term2.length - term1.length)
return highlightTerms(text, terms)
}

function highlightTerms (text, terms) {
if (terms.length === 0) return text

const [firstTerm, ...otherTerms] = terms
const match = text.match(new RegExp(`(.*)(${escapeRegexModifiers(firstTerm)})(.*)`, 'i'))

if (match) {
const [, before, matching, after] = match
// Note: this has exponential complexity, but we expect just a few terms, so that's fine.
return highlightTerms(before, terms) + '<em>' + escapeHtmlEntities(matching) + '</em>' + highlightTerms(after, terms)
} else {
return highlightTerms(text, otherTerms)
}
}
4 changes: 4 additions & 0 deletions assets/js/globals.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,7 @@ export function getSidebarNodes () {
export function getVersionNodes () {
return window.versionNodes || []
}

export function getSearchNodes () {
return window.searchNodes || []
}
34 changes: 34 additions & 0 deletions assets/js/highlighter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { escapeRegexModifiers, escapeHtmlEntities } from './helpers'

/**
* Returns an HTML string highlighting the individual tokens from the query string.
*/
export function highlightMatches (text, query, opts = {}) {
// Sort terms length, so that the longest are highlighted first.
if (typeof query === 'string') {
query = query.split(/\s+/)
}
const terms = query.sort((term1, term2) => term2.length - term1.length)
return highlightTerms(text, terms, opts)
}

function highlightTerms (text, terms, opts) {
if (terms.length === 0) return text

let flags = 'i'

if (opts.multiline) {
flags = 'is'
}

const [firstTerm, ...otherTerms] = terms
const match = text.match(new RegExp(`(.*)(${escapeRegexModifiers(firstTerm)})(.*)`, flags))

if (match) {
const [, before, matching, after] = match
// Note: this has exponential complexity, but we expect just a few terms, so that's fine.
return highlightTerms(before, terms, opts) + '<em>' + escapeHtmlEntities(matching) + '</em>' + highlightTerms(after, terms, opts)
} else {
return highlightTerms(text, otherTerms, opts)
}
}
111 changes: 84 additions & 27 deletions assets/js/search-page.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import lunr from 'lunr'
import { qs, escapeHtmlEntities, isBlank, getQueryParamByName, getProjectNameAndVersion } from './helpers'
import { setSearchInputValue } from './search-bar'
import searchResultsTemplate from './handlebars/templates/search-results.handlebars'
import { getSearchNodes } from './globals'
import { highlightMatches } from './highlighter'

const EXCERPT_RADIUS = 80
const SEARCH_CONTAINER_SELECTOR = '#search'
Expand All @@ -23,41 +25,96 @@ lunr.Pipeline.registerFunction(docTrimmerFunction, 'docTrimmer')
window.addEventListener('swup:page:view', initialize)
initialize()

function initialize () {
function initialize() {
const pathname = window.location.pathname
if (pathname.endsWith('/search.html') || pathname.endsWith('/search')) {
const query = getQueryParamByName('q')
search(query)
const queryType = getQueryParamByName('type')
search(query, queryType)
}
}

async function search (value) {
async function search(value, queryType) {
if (isBlank(value)) {
renderResults({ value })
} else {
setSearchInputValue(value)

const index = await getIndex()

try {
// We cannot match on atoms :foo because that would be considered
// a filter. So we escape all colons not preceded by a word.
const fixedValue = value.replaceAll(/(\B|\\):/g, '\\:')
const results = searchResultsToDecoratedSearchItems(index.search(fixedValue))
let results = []
const searchNodes = getSearchNodes()

if (['related', 'latest'].includes(queryType) && searchNodes.length > 0) {
results = await remoteSearch(value, queryType, searchNodes)
Copy link

@ruslandoga ruslandoga Jan 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a couple nitpicks :)

Can we have a race condition here? When the previous request returns after the current request and updates the items to stale results. I think it's possible with multiple HTTP/1.1 connections, but not sure about multiple streams on the same HTTP/2 connection, are they guaranteed to be ordered? Or maybe JS runtime resolves it in some way?

Also, do we need to debounce on remote search or check for response.ok and results.length > 0?

For some reason I decided to do these things in ruslandoga#1 but I don't remember if I actually had these problems or was just playing it safe ...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I'm sure you're right. As you can probably tell it's been almost a decade since I wrote any JavaScript so I'm still getting the hang of the new idioms.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So looking at the code more carefully, it appears that the search function is only called on page load, so should only be run once in the page's lifecycle.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Additionally, the search result handlebars template takes care of whether any results were actually returned.

} else {
results = await localSearch(value)
}

renderResults({ value, results })
} catch (error) {
renderResults({ value, errorMessage: error.message })
}
}
}

function renderResults ({ value, results, errorMessage }) {
async function localSearch(value) {
const index = await getIndex()

// We cannot match on atoms :foo because that would be considered
// a filter. So we escape all colons not preceded by a word.
const fixedValue = value.replaceAll(/(\B|\\):/g, '\\:')
return searchResultsToDecoratedSearchItems(index.search(fixedValue))
}

async function remoteSearch(value, queryType, searchNodes) {
let filterNodes = searchNodes

if (queryType === 'latest') {
filterNodes = searchNodes.slice(0, 1)
}

const filters = filterNodes.map(node => `${node.name}-${node.version}`).join(',')

const params = new URLSearchParams()
params.set('q', value)
params.set('query_by', 'title,doc')
params.set('filter_by', `package:=[${filters}]`)

const response = await fetch(`https://search.hexdocs.pm/?${params.toString()}`)
const payload = await response.json()

if (Array.isArray(payload.hits)) {
return payload.hits.map(result => {
const [packageName, packageVersion] = result.document.package.split('-')

const doc = highlightMatches(result.document.doc, value, { multiline: true })
const excerpts = [doc]
const metadata = {}
const ref = `https://hexdocs.pm/${packageName}/${packageVersion}/${result.document.ref}`
const title = result.document.title
const type = result.document.type

return {
doc,
excerpts,
metadata,
ref,
title,
type
}
})
} else {
return []
}
}

function renderResults({ value, results, errorMessage }) {
const searchContainer = qs(SEARCH_CONTAINER_SELECTOR)
const resultsHtml = searchResultsTemplate({ value, results, errorMessage })
searchContainer.innerHTML = resultsHtml
}

async function getIndex () {
async function getIndex() {
const cachedIndex = await loadIndex()
if (cachedIndex) { return cachedIndex }

Expand All @@ -66,7 +123,7 @@ async function getIndex () {
return index
}

async function loadIndex () {
async function loadIndex() {
try {
const serializedIndex = sessionStorage.getItem(indexStorageKey())
if (serializedIndex) {
Expand All @@ -81,7 +138,7 @@ async function loadIndex () {
}
}

async function saveIndex (index) {
async function saveIndex(index) {
try {
const serializedIndex = await compress(index)
sessionStorage.setItem(indexStorageKey(), serializedIndex)
Expand All @@ -90,7 +147,7 @@ async function saveIndex (index) {
}
}

async function compress (index) {
async function compress(index) {
const stream = new Blob([JSON.stringify(index)], {
type: 'application/json'
}).stream().pipeThrough(new window.CompressionStream('gzip'))
Expand All @@ -100,7 +157,7 @@ async function compress (index) {
return b64encode(buffer)
}

async function decompress (index) {
async function decompress(index) {
const stream = new Blob([b64decode(index)], {
type: 'application/json'
}).stream().pipeThrough(new window.DecompressionStream('gzip'))
Expand All @@ -109,7 +166,7 @@ async function decompress (index) {
return JSON.parse(blob)
}

function b64encode (buffer) {
function b64encode(buffer) {
let binary = ''
const bytes = new Uint8Array(buffer)
const len = bytes.byteLength
Expand All @@ -119,7 +176,7 @@ function b64encode (buffer) {
return window.btoa(binary)
}

function b64decode (str) {
function b64decode(str) {
const binaryString = window.atob(str)
const len = binaryString.length
const bytes = new Uint8Array(new ArrayBuffer(len))
Expand All @@ -129,11 +186,11 @@ function b64decode (str) {
return bytes
}

function indexStorageKey () {
function indexStorageKey() {
return `idv5:${getProjectNameAndVersion()}`
}

function createIndex () {
function createIndex() {
return lunr(function () {
this.ref('ref')
this.field('title', { boost: 3 })
Expand All @@ -151,11 +208,11 @@ function createIndex () {
})
}

function docTokenSplitter (builder) {
function docTokenSplitter(builder) {
builder.pipeline.before(lunr.stemmer, docTokenFunction)
}

function docTokenFunction (token) {
function docTokenFunction(token) {
// If we have something with an arity, we split on : . to make partial
// matches easier. We split only when tokenizing, not when searching.
// Below we use ExDoc.Markdown.to_ast/2 as an example.
Expand Down Expand Up @@ -219,11 +276,11 @@ function docTokenFunction (token) {
return tokens
}

function docTrimmer (builder) {
function docTrimmer(builder) {
builder.pipeline.before(lunr.stemmer, docTrimmerFunction)
}

function docTrimmerFunction (token) {
function docTrimmerFunction(token) {
// Preserve @ and : at the beginning of tokens,
// and ? and ! at the end of tokens. It needs to
// be done before stemming, otherwise search and
Expand All @@ -233,7 +290,7 @@ function docTrimmerFunction (token) {
})
}

function searchResultsToDecoratedSearchItems (results) {
function searchResultsToDecoratedSearchItems(results) {
return results
// If the docs are regenerated without changing its version,
// a reference may have been doc'ed false in the code but
Expand All @@ -250,11 +307,11 @@ function searchResultsToDecoratedSearchItems (results) {
})
}

function getSearchItemByRef (ref) {
function getSearchItemByRef(ref) {
return searchData.items.find(searchItem => searchItem.ref === ref) || null
}

function getExcerpts (searchItem, metadata) {
function getExcerpts(searchItem, metadata) {
const { doc } = searchItem
const searchTerms = Object.keys(metadata)

Expand All @@ -275,7 +332,7 @@ function getExcerpts (searchItem, metadata) {
return excerpts.slice(0, 1)
}

function excerpt (doc, sliceStart, sliceLength) {
function excerpt(doc, sliceStart, sliceLength) {
const startPos = Math.max(sliceStart - EXCERPT_RADIUS, 0)
const endPos = Math.min(sliceStart + sliceLength + EXCERPT_RADIUS, doc.length)
return [
Expand Down
Loading