From c27a4d7807af87024724f957e12698def4efdef6 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Mon, 9 Dec 2024 09:05:43 +0100
Subject: [PATCH 01/25] First try of turning the index building into a free
 function that can be used in a library.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 CMakeLists.txt                 |   1 +
 src/index/IndexBuilderMain.cpp |   1 -
 src/libqlever/CMakeLists.txt   |   0
 src/libqlever/Qlever.cpp       |   5 ++
 src/libqlever/Qlever.h         | 111 +++++++++++++++++++++++++++++++++
 5 files changed, 117 insertions(+), 1 deletion(-)
 create mode 100644 src/libqlever/CMakeLists.txt
 create mode 100644 src/libqlever/Qlever.cpp
 create mode 100644 src/libqlever/Qlever.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0503cd210f..a9daa916a3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -405,6 +405,7 @@ target_precompile_headers(engine PRIVATE ${PRECOMPILED_HEADER_FILES_ENGINE})
 add_subdirectory(src/index)
 add_subdirectory(src/util)
 add_subdirectory(benchmark)
+add_subdirectory(src/libqlever)
 
 enable_testing()
 option(SINGLE_TEST_BINARY "Link all unit tests into a single binary. This is useful e.g. for code coverage tools" OFF)
diff --git a/src/index/IndexBuilderMain.cpp b/src/index/IndexBuilderMain.cpp
index 1b500c9dde..5cca43f77d 100644
--- a/src/index/IndexBuilderMain.cpp
+++ b/src/index/IndexBuilderMain.cpp
@@ -165,7 +165,6 @@ int main(int argc, char** argv) {
   bool onlyPsoAndPos = false;
   bool addWordsFromLiterals = false;
   std::optional<ad_utility::MemorySize> stxxlMemory;
-  optind = 1;
 
   Index index{ad_utility::makeUnlimitedAllocator<Id>()};
 
diff --git a/src/libqlever/CMakeLists.txt b/src/libqlever/CMakeLists.txt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/libqlever/Qlever.cpp b/src/libqlever/Qlever.cpp
new file mode 100644
index 0000000000..2d9c407597
--- /dev/null
+++ b/src/libqlever/Qlever.cpp
@@ -0,0 +1,5 @@
+//
+// Created by kalmbacj on 12/9/24.
+//
+
+#include "Qlever.h"
diff --git a/src/libqlever/Qlever.h b/src/libqlever/Qlever.h
new file mode 100644
index 0000000000..bc183b3706
--- /dev/null
+++ b/src/libqlever/Qlever.h
@@ -0,0 +1,111 @@
+//  Copyright 2024, University of Freiburg,
+//                  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#pragma once
+
+#include <util/MemorySize/MemorySize.h>
+
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "index/Index.h"
+#include "index/InputFileSpecification.h"
+#include "util/AllocatorWithLimit.h"
+
+namespace qlever {
+
+struct IndexBuilderConfig {
+  std::string baseName;
+  std::string wordsfile;
+  std::string docsfile;
+  std::string textIndexName;
+  std::string kbIndexName;
+  std::string settingsFile;
+  std::vector<qlever::InputFileSpecification> inputFiles;
+  bool noPatterns = false;
+  bool onlyAddTextIndex = false;
+  bool keepTemporaryFiles = false;
+  bool onlyPsoAndPos = false;
+  bool addWordsFromLiterals = false;
+  std::optional<ad_utility::MemorySize> stxxlMemory;
+};
+
+string getStxxlConfigFileName(const string& location) {
+  return absl::StrCat(location, ".stxxl");
+}
+
+string getStxxlDiskFileName(const string& location, const string& tail) {
+  return absl::StrCat(location, tail, ".stxxl-disk");
+}
+
+// Write a .stxxl config-file.
+// All we want is sufficient space somewhere with enough space.
+// We can use the location of input files and use a constant size for now.
+// The required size can only be estimated anyway, since index size
+// depends on the structure of words files rather than their size only,
+// because of the "multiplications" performed.
+void writeStxxlConfigFile(const string& location, const string& tail) {
+  string stxxlConfigFileName = getStxxlConfigFileName(location);
+  ad_utility::File stxxlConfig(stxxlConfigFileName, "w");
+  auto configFile = ad_utility::makeOfstream(stxxlConfigFileName);
+  // Inform stxxl about .stxxl location
+  setenv("STXXLCFG", stxxlConfigFileName.c_str(), true);
+  configFile << "disk=" << getStxxlDiskFileName(location, tail) << ","
+             << STXXL_DISK_SIZE_INDEX_BUILDER << ",syscall\n";
+}
+
+class Qlever {
+  void buildIndex(IndexBuilderConfig config) {
+    Index index{ad_utility::makeUnlimitedAllocator<Id>()};
+
+    if (config.stxxlMemory.has_value()) {
+      index.memoryLimitIndexBuilding() = config.stxxlMemory.value();
+    }
+    // If no text index name was specified, take the part of the wordsfile after
+    // the last slash.
+    if (config.textIndexName.empty() && !config.wordsfile.empty()) {
+      config.textIndexName =
+          ad_utility::getLastPartOfString(config.wordsfile, '/');
+    }
+    try {
+      LOG(TRACE) << "Configuring STXXL..." << std::endl;
+      size_t posOfLastSlash = config.baseName.rfind('/');
+      string location = config.baseName.substr(0, posOfLastSlash + 1);
+      string tail = config.baseName.substr(posOfLastSlash + 1);
+      writeStxxlConfigFile(location, tail);
+      string stxxlFileName = getStxxlDiskFileName(location, tail);
+      LOG(TRACE) << "done." << std::endl;
+
+      index.setKbName(config.kbIndexName);
+      index.setTextName(config.textIndexName);
+      index.usePatterns() = !config.noPatterns;
+      index.setOnDiskBase(config.baseName);
+      index.setKeepTempFiles(config.keepTemporaryFiles);
+      index.setSettingsFile(config.settingsFile);
+      index.loadAllPermutations() = !config.onlyPsoAndPos;
+
+      if (!config.onlyAddTextIndex) {
+        AD_CONTRACT_CHECK(!config.inputFiles.empty());
+        index.createFromFiles(config.inputFiles);
+      }
+
+      if (!config.wordsfile.empty() || config.addWordsFromLiterals) {
+        index.addTextFromContextFile(config.wordsfile,
+                                     config.addWordsFromLiterals);
+      }
+
+      if (!config.docsfile.empty()) {
+        index.buildDocsDB(config.docsfile);
+      }
+      ad_utility::deleteFile(stxxlFileName, false);
+    } catch (std::exception& e) {
+      LOG(ERROR) << "Creating the index for QLever failed with the following "
+                    "exception: "
+                 << e.what() << std::endl;
+      throw;
+    }
+  }
+};
+}  // namespace qlever

From 237e93cab050b5caa43f5708618904a60f88ee71 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Mon, 9 Dec 2024 11:18:18 +0100
Subject: [PATCH 02/25] First try of turning the index building into a free
 function that can be used in a library.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/libqlever/CMakeLists.txt       |   5 +
 src/libqlever/LibQLeverExample.cpp |  16 +++
 src/libqlever/Qlever.cpp           | 162 ++++++++++++++++++++++++++++-
 src/libqlever/Qlever.h             | 152 +++++++++++++--------------
 4 files changed, 249 insertions(+), 86 deletions(-)
 create mode 100644 src/libqlever/LibQLeverExample.cpp

diff --git a/src/libqlever/CMakeLists.txt b/src/libqlever/CMakeLists.txt
index e69de29bb2..589c150393 100644
--- a/src/libqlever/CMakeLists.txt
+++ b/src/libqlever/CMakeLists.txt
@@ -0,0 +1,5 @@
+
+add_library(qlever Qlever.cpp)
+qlever_target_link_libraries(qlever parser engine util index absl::strings)
+add_executable(LibQLeverExample LibQLeverExample.cpp)
+qlever_target_link_libraries(LibQLeverExample parser engine util index qlever absl::strings)
\ No newline at end of file
diff --git a/src/libqlever/LibQLeverExample.cpp b/src/libqlever/LibQLeverExample.cpp
new file mode 100644
index 0000000000..dbc0ffe2e6
--- /dev/null
+++ b/src/libqlever/LibQLeverExample.cpp
@@ -0,0 +1,16 @@
+//  Copyright 2024, University of Freiburg,
+//                  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#include <iostream>
+
+#include "libqlever/Qlever.h"
+
+int main() {
+  qlever::QleverConfig config;
+  config.baseName = "exampleIndex";
+  config.inputFiles.emplace_back("/dev/stdin", qlever::Filetype::Turtle);
+  qlever::Qlever::buildIndex(config);
+  qlever::Qlever qlever{config};
+  std::cout << qlever.query("SELECT * {?s ?p ?o}") << std::endl;
+}
diff --git a/src/libqlever/Qlever.cpp b/src/libqlever/Qlever.cpp
index 2d9c407597..cc37562dde 100644
--- a/src/libqlever/Qlever.cpp
+++ b/src/libqlever/Qlever.cpp
@@ -1,5 +1,159 @@
-//
-// Created by kalmbacj on 12/9/24.
-//
+//  Copyright 2024, University of Freiburg,
+//                  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
-#include "Qlever.h"
+#include "libqlever/Qlever.h"
+
+namespace qlever {
+static std::string getStxxlConfigFileName(const string& location) {
+  return absl::StrCat(location, ".stxxl");
+}
+
+static std::string getStxxlDiskFileName(const string& location,
+                                        const string& tail) {
+  return absl::StrCat(location, tail, ".stxxl-disk");
+}
+
+// Write a .stxxl config-file.
+// All we want is sufficient space somewhere with enough space.
+// We can use the location of input files and use a constant size for now.
+// The required size can only be estimated anyway, since index size
+// depends on the structure of words files rather than their size only,
+// because of the "multiplications" performed.
+static void writeStxxlConfigFile(const string& location, const string& tail) {
+  string stxxlConfigFileName = getStxxlConfigFileName(location);
+  ad_utility::File stxxlConfig(stxxlConfigFileName, "w");
+  auto configFile = ad_utility::makeOfstream(stxxlConfigFileName);
+  // Inform stxxl about .stxxl location
+  setenv("STXXLCFG", stxxlConfigFileName.c_str(), true);
+  configFile << "disk=" << getStxxlDiskFileName(location, tail) << ","
+             << STXXL_DISK_SIZE_INDEX_BUILDER << ",syscall\n";
+}
+
+// _____________________________________________________________________________
+Qlever::Qlever(const QleverConfig& config)
+    : allocator_{ad_utility::AllocatorWithLimit<Id>{
+          ad_utility::makeAllocationMemoryLeftThreadsafeObject(
+              config.memoryLimit.value())}},
+      index_{allocator_} {
+  ad_utility::setGlobalLoggingStream(&ignoreLogStream);
+  // This also directly triggers the update functions and propagates the
+  // values of the parameters to the cache.
+  RuntimeParameters().setOnUpdateAction<"cache-max-num-entries">(
+      [this](size_t newValue) { cache_.setMaxNumEntries(newValue); });
+  RuntimeParameters().setOnUpdateAction<"cache-max-size">(
+      [this](ad_utility::MemorySize newValue) { cache_.setMaxSize(newValue); });
+  RuntimeParameters().setOnUpdateAction<"cache-max-size-single-entry">(
+      [this](ad_utility::MemorySize newValue) {
+        cache_.setMaxSizeSingleEntry(newValue);
+      });
+  index_.usePatterns() = !config.noPatterns;
+  enablePatternTrick_ = !config.noPatterns;
+  index_.loadAllPermutations() = !config.onlyPsoAndPos;
+
+  // Init the index.
+  index_.createFromOnDiskIndex(config.baseName);
+  // TODO<joka921> Enable the loading of the text index via the QLever lib.
+  /*
+  if (useText) {
+    index_.addTextFromOnDiskIndex();
+  }
+   */
+
+  sortPerformanceEstimator_.computeEstimatesExpensively(
+      allocator_, index_.numTriples().normalAndInternal_() *
+                      PERCENTAGE_OF_TRIPLES_FOR_SORT_ESTIMATE / 100);
+}
+
+// _____________________________________________________________________________
+void Qlever::buildIndex(QleverConfig config) {
+  ad_utility::setGlobalLoggingStream(&ignoreLogStream);
+  Index index{ad_utility::makeUnlimitedAllocator<Id>()};
+
+  if (config.memoryLimit.has_value()) {
+    index.memoryLimitIndexBuilding() = config.memoryLimit.value();
+  }
+  // If no text index name was specified, take the part of the wordsfile after
+  // the last slash.
+  if (config.textIndexName.empty() && !config.wordsfile.empty()) {
+    config.textIndexName =
+        ad_utility::getLastPartOfString(config.wordsfile, '/');
+  }
+  try {
+    LOG(TRACE) << "Configuring STXXL..." << std::endl;
+    size_t posOfLastSlash = config.baseName.rfind('/');
+    string location = config.baseName.substr(0, posOfLastSlash + 1);
+    string tail = config.baseName.substr(posOfLastSlash + 1);
+    writeStxxlConfigFile(location, tail);
+    string stxxlFileName = getStxxlDiskFileName(location, tail);
+    LOG(TRACE) << "done." << std::endl;
+
+    index.setKbName(config.kbIndexName);
+    index.setTextName(config.textIndexName);
+    index.usePatterns() = !config.noPatterns;
+    index.setOnDiskBase(config.baseName);
+    index.setKeepTempFiles(config.keepTemporaryFiles);
+    index.setSettingsFile(config.settingsFile);
+    index.loadAllPermutations() = !config.onlyPsoAndPos;
+
+    if (!config.onlyAddTextIndex) {
+      AD_CONTRACT_CHECK(!config.inputFiles.empty());
+      index.createFromFiles(config.inputFiles);
+    }
+
+    if (!config.wordsfile.empty() || config.addWordsFromLiterals) {
+      index.addTextFromContextFile(config.wordsfile,
+                                   config.addWordsFromLiterals);
+    }
+
+    if (!config.docsfile.empty()) {
+      index.buildDocsDB(config.docsfile);
+    }
+    ad_utility::deleteFile(stxxlFileName, false);
+  } catch (std::exception& e) {
+    LOG(ERROR) << "Creating the index for QLever failed with the following "
+                  "exception: "
+               << e.what() << std::endl;
+    throw;
+  }
+}
+
+// ___________________________________________________________________________
+std::string Qlever::query(std::string query) {
+  QueryExecutionContext qec{index_, &cache_, allocator_,
+                            sortPerformanceEstimator_};
+  auto parsedQuery = SparqlParser::parseQuery(query);
+  auto handle = std::make_shared<ad_utility::CancellationHandle<>>();
+  QueryPlanner qp{&qec, handle};
+  qp.setEnablePatternTrick(enablePatternTrick_);
+  auto qet = qp.createExecutionTree(parsedQuery);
+  qet.isRoot() = true;
+  auto& limitOffset = parsedQuery._limitOffset;
+
+  // TODO<joka921> For cancellation we have to call
+  // `recursivelySetCancellationHandle` (see `Server::parseAndPlan`).
+
+  // TODO<joka921> The following interface looks fishy and should be
+  // incorporated directly in the query planner or somewhere else.
+  // (it is used identically in `Server.cpp`.
+
+  // Make sure that the offset is not applied again when exporting the result
+  // (it is already applied by the root operation in the query execution
+  // tree). Note that we don't need this for the limit because applying a
+  // fixed limit is idempotent.
+  AD_CORRECTNESS_CHECK(limitOffset._offset >=
+                       qet.getRootOperation()->getLimit()._offset);
+  limitOffset._offset -= qet.getRootOperation()->getLimit()._offset;
+
+  ad_utility::Timer timer{ad_utility::Timer::Started};
+  auto responseGenerator = ExportQueryExecutionTrees::computeResult(
+      parsedQuery, qet, ad_utility::MediaType::sparqlJson, timer,
+      std::move(handle));
+  std::string result;
+  std::cout << "Writing the result:" << std::endl;
+  for (const auto& batch : responseGenerator) {
+    result += batch;
+  }
+  return result;
+}
+}  // namespace qlever
diff --git a/src/libqlever/Qlever.h b/src/libqlever/Qlever.h
index bc183b3706..b69dcf9588 100644
--- a/src/libqlever/Qlever.h
+++ b/src/libqlever/Qlever.h
@@ -8,104 +8,92 @@
 
 #include <optional>
 #include <string>
+#include <utility>
 #include <vector>
 
+#include "engine/ExportQueryExecutionTrees.h"
+#include "engine/QueryExecutionContext.h"
+#include "engine/QueryPlanner.h"
+#include "global/RuntimeParameters.h"
 #include "index/Index.h"
 #include "index/InputFileSpecification.h"
+#include "parser/SparqlParser.h"
 #include "util/AllocatorWithLimit.h"
+#include "util/http/MediaTypes.h"
 
 namespace qlever {
 
-struct IndexBuilderConfig {
+// A configuration for a QLever instance.
+struct QleverConfig {
+  // A basename for all files that QLever will write as part of the index
+  // building.
   std::string baseName;
+
+  // The specification of the input files (Turtle/NT or NQuad) from which the
+  // index will be built.
+  std::vector<qlever::InputFileSpecification> inputFiles;
+
+  // A memory limit that will be applied during the index building as well as
+  // during the query processing.
+  std::optional<ad_utility::MemorySize> memoryLimit =
+      ad_utility::MemorySize::gigabytes(1);
+
+  // If set to true, then no so-called patterns will be built. Patterns are
+  // useful for autocompletion and for certain statistics queries, but not for
+  // typical SELECT queries.
+  bool noPatterns = false;
+
+  // Only build two permutations. This is sufficient if all queries have a fixed
+  // predicate.
+  // TODO<joka921> We haven't tested this mode in a while, it is currently
+  // probably broken because the UPDATE mechanism doesn't support only two
+  // permutations.
+  bool onlyPsoAndPos = false;
+
+  // Optionally a filename to a .json file with additional settings...
+  // TODO<joka921> Make these settings part of this struct directly
+  // TODO<joka921> Document these additional settings.
+  std::string settingsFile;
+
+  // The following members are only required if QLever's full-text search
+  // extension is to be used, see `IndexBuilderMain.cpp` for additional details.
+  bool addWordsFromLiterals = false;
+  std::string kbIndexName;
   std::string wordsfile;
   std::string docsfile;
   std::string textIndexName;
-  std::string kbIndexName;
-  std::string settingsFile;
-  std::vector<qlever::InputFileSpecification> inputFiles;
-  bool noPatterns = false;
   bool onlyAddTextIndex = false;
+
+  // If set to true, then certain temporary files which are created while
+  // building the index are not deleted. This can be useful for debugging.
   bool keepTemporaryFiles = false;
-  bool onlyPsoAndPos = false;
-  bool addWordsFromLiterals = false;
-  std::optional<ad_utility::MemorySize> stxxlMemory;
 };
 
-string getStxxlConfigFileName(const string& location) {
-  return absl::StrCat(location, ".stxxl");
-}
-
-string getStxxlDiskFileName(const string& location, const string& tail) {
-  return absl::StrCat(location, tail, ".stxxl-disk");
-}
-
-// Write a .stxxl config-file.
-// All we want is sufficient space somewhere with enough space.
-// We can use the location of input files and use a constant size for now.
-// The required size can only be estimated anyway, since index size
-// depends on the structure of words files rather than their size only,
-// because of the "multiplications" performed.
-void writeStxxlConfigFile(const string& location, const string& tail) {
-  string stxxlConfigFileName = getStxxlConfigFileName(location);
-  ad_utility::File stxxlConfig(stxxlConfigFileName, "w");
-  auto configFile = ad_utility::makeOfstream(stxxlConfigFileName);
-  // Inform stxxl about .stxxl location
-  setenv("STXXLCFG", stxxlConfigFileName.c_str(), true);
-  configFile << "disk=" << getStxxlDiskFileName(location, tail) << ","
-             << STXXL_DISK_SIZE_INDEX_BUILDER << ",syscall\n";
-}
-
+// A class that can be used to use QLever without the HTTP server, e.g. as part
+// of another program.
 class Qlever {
-  void buildIndex(IndexBuilderConfig config) {
-    Index index{ad_utility::makeUnlimitedAllocator<Id>()};
-
-    if (config.stxxlMemory.has_value()) {
-      index.memoryLimitIndexBuilding() = config.stxxlMemory.value();
-    }
-    // If no text index name was specified, take the part of the wordsfile after
-    // the last slash.
-    if (config.textIndexName.empty() && !config.wordsfile.empty()) {
-      config.textIndexName =
-          ad_utility::getLastPartOfString(config.wordsfile, '/');
-    }
-    try {
-      LOG(TRACE) << "Configuring STXXL..." << std::endl;
-      size_t posOfLastSlash = config.baseName.rfind('/');
-      string location = config.baseName.substr(0, posOfLastSlash + 1);
-      string tail = config.baseName.substr(posOfLastSlash + 1);
-      writeStxxlConfigFile(location, tail);
-      string stxxlFileName = getStxxlDiskFileName(location, tail);
-      LOG(TRACE) << "done." << std::endl;
-
-      index.setKbName(config.kbIndexName);
-      index.setTextName(config.textIndexName);
-      index.usePatterns() = !config.noPatterns;
-      index.setOnDiskBase(config.baseName);
-      index.setKeepTempFiles(config.keepTemporaryFiles);
-      index.setSettingsFile(config.settingsFile);
-      index.loadAllPermutations() = !config.onlyPsoAndPos;
-
-      if (!config.onlyAddTextIndex) {
-        AD_CONTRACT_CHECK(!config.inputFiles.empty());
-        index.createFromFiles(config.inputFiles);
-      }
-
-      if (!config.wordsfile.empty() || config.addWordsFromLiterals) {
-        index.addTextFromContextFile(config.wordsfile,
-                                     config.addWordsFromLiterals);
-      }
-
-      if (!config.docsfile.empty()) {
-        index.buildDocsDB(config.docsfile);
-      }
-      ad_utility::deleteFile(stxxlFileName, false);
-    } catch (std::exception& e) {
-      LOG(ERROR) << "Creating the index for QLever failed with the following "
-                    "exception: "
-                 << e.what() << std::endl;
-      throw;
-    }
-  }
+ private:
+  QueryResultCache cache_;
+  ad_utility::AllocatorWithLimit<Id> allocator_;
+  SortPerformanceEstimator sortPerformanceEstimator_;
+  Index index_;
+  bool enablePatternTrick_;
+  static inline std::ostringstream ignoreLogStream;
+
+ public:
+  // Build a persistent on disk index using the `config`.
+  static void buildIndex(QleverConfig config);
+
+  // Load the qlever index from file.
+  explicit Qlever(const QleverConfig& config);
+
+  // Run the given query on the index. Currently only SELECT and ASK queries are
+  // supported, and the result will always be in sparql-results+json format.
+  // TODO<joka921> Support other formats + CONSTRUCT queries, support
+  // cancellation, time limits, and observable queries.
+  std::string query(std::string query);
+
+  // TODO<joka921> Give access to the RuntimeParameters() which allow for
+  // further tweaking of the qlever instance.
 };
 }  // namespace qlever

From 10d492df9d8f53cd2b55f319d65674ea59abc7b4 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Mon, 16 Dec 2024 21:35:03 +0100
Subject: [PATCH 03/25] Optimize some includes.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 CMakeLists.txt            | 2 +-
 src/engine/CMakeLists.txt | 4 +++-
 src/engine/Service.cpp    | 1 -
 src/engine/Service.h      | 4 ++++
 test/CMakeLists.txt       | 2 +-
 5 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a9daa916a3..ceb383ecd7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -420,7 +420,7 @@ add_executable(IndexBuilderMain src/index/IndexBuilderMain.cpp)
 qlever_target_link_libraries(IndexBuilderMain index ${CMAKE_THREAD_LIBS_INIT} Boost::program_options compilationInfo)
 
 add_executable(ServerMain src/ServerMain.cpp)
-qlever_target_link_libraries(ServerMain engine ${CMAKE_THREAD_LIBS_INIT} Boost::program_options compilationInfo)
+qlever_target_link_libraries(ServerMain engine server ${CMAKE_THREAD_LIBS_INIT} Boost::program_options compilationInfo)
 target_precompile_headers(ServerMain REUSE_FROM engine)
 
 add_executable(VocabularyMergerMain src/VocabularyMergerMain.cpp)
diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt
index cbfb3344c3..7105bde2da 100644
--- a/src/engine/CMakeLists.txt
+++ b/src/engine/CMakeLists.txt
@@ -5,7 +5,7 @@ add_library(engine
         Engine.cpp QueryExecutionTree.cpp Operation.cpp Result.cpp LocalVocab.cpp
         IndexScan.cpp Join.cpp Sort.cpp
         Distinct.cpp OrderBy.cpp Filter.cpp
-        Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp
+        QueryPlanner.cpp QueryPlanningCostFactors.cpp
         OptionalJoin.cpp CountAvailablePredicates.cpp GroupBy.cpp HasPredicateScan.cpp
         Union.cpp MultiColumnJoin.cpp TransitivePathBase.cpp
         TransitivePathHashMap.cpp TransitivePathBinSearch.cpp Service.cpp
@@ -14,4 +14,6 @@ add_library(engine
         CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
         TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp
         CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp)
+add_library(server Server.cpp)
+qlever_target_link_libraries(server)
 qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)
diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp
index 8c946a2fb3..cf11babfa4 100644
--- a/src/engine/Service.cpp
+++ b/src/engine/Service.cpp
@@ -19,7 +19,6 @@
 #include "util/HashMap.h"
 #include "util/HashSet.h"
 #include "util/StringUtils.h"
-#include "util/http/HttpUtils.h"
 
 // ____________________________________________________________________________
 Service::Service(QueryExecutionContext* qec,
diff --git a/src/engine/Service.h b/src/engine/Service.h
index 8fef6f5d0e..2267928f7f 100644
--- a/src/engine/Service.h
+++ b/src/engine/Service.h
@@ -12,6 +12,10 @@
 #include "util/LazyJsonParser.h"
 #include "util/http/HttpClient.h"
 
+// Forward declarations to reduce dependencies
+struct HttpOrHttpsResponse;
+namespace ad_utility {}
+
 // The SERVICE operation. Sends a query to the remote endpoint specified by the
 // service IRI, gets the result as JSON, parses it, and writes it into a result
 // table.
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 9dd3a733a9..f85aaa8306 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -433,6 +433,6 @@ addLinkAndDiscoverTest(SparqlExpressionGeneratorsTest engine)
 
 addLinkAndDiscoverTest(UrlParserTest)
 
-addLinkAndDiscoverTest(ServerTest engine)
+addLinkAndDiscoverTest(ServerTest engine server)
 
 addLinkAndDiscoverTest(ExecuteUpdateTest engine)

From 0d4fa20aeb9d764ed975de62d76b5d69462286c9 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 30 Jan 2025 10:16:43 +0100
Subject: [PATCH 04/25] We have the named cache compiling, now let's use it.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 benchmark/GroupByHashMapBenchmark.cpp    |  2 +-
 src/engine/CMakeLists.txt                |  5 +++-
 src/engine/Describe.cpp                  |  2 +-
 src/engine/NamedQueryCache.cpp           |  5 ++++
 src/engine/NamedQueryCache.h             | 28 +++++++++++++++++++
 src/engine/QueryExecutionContext.cpp     | 25 +++++++++++++++++
 src/engine/QueryExecutionContext.h       | 25 +++++++++++------
 src/engine/Server.cpp                    |  8 +++---
 src/engine/Server.h                      |  2 ++
 {test => src}/engine/ValuesForTesting.h  |  0
 test/OperationTest.cpp                   | 35 +++++++++++++++++++-----
 test/engine/BindTest.cpp                 |  2 +-
 test/engine/CartesianProductJoinTest.cpp |  2 +-
 test/engine/LazyGroupByTest.cpp          |  2 +-
 test/engine/QueryExecutionTreeTest.cpp   |  2 +-
 test/util/IdTableHelpers.cpp             |  2 +-
 test/util/IdTableHelpers.h               |  2 +-
 test/util/IndexTestHelpers.cpp           |  7 +++--
 18 files changed, 125 insertions(+), 31 deletions(-)
 create mode 100644 src/engine/NamedQueryCache.cpp
 create mode 100644 src/engine/NamedQueryCache.h
 create mode 100644 src/engine/QueryExecutionContext.cpp
 rename {test => src}/engine/ValuesForTesting.h (100%)

diff --git a/benchmark/GroupByHashMapBenchmark.cpp b/benchmark/GroupByHashMapBenchmark.cpp
index 780785e9bc..1335ebc5bd 100644
--- a/benchmark/GroupByHashMapBenchmark.cpp
+++ b/benchmark/GroupByHashMapBenchmark.cpp
@@ -6,12 +6,12 @@
 #include <random>
 
 #include "../benchmark/infrastructure/Benchmark.h"
-#include "../test/engine/ValuesForTesting.h"
 #include "../test/util/IdTableHelpers.h"
 #include "../test/util/IndexTestHelpers.h"
 #include "engine/GroupBy.h"
 #include "engine/Sort.h"
 #include "engine/Values.h"
+#include "engine/ValuesForTesting.h"
 #include "engine/sparqlExpressions/AggregateExpression.h"
 #include "engine/sparqlExpressions/GroupConcatExpression.h"
 #include "engine/sparqlExpressions/LiteralExpression.h"
diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt
index e81c834303..d517c0f239 100644
--- a/src/engine/CMakeLists.txt
+++ b/src/engine/CMakeLists.txt
@@ -14,5 +14,8 @@ add_library(engine
         CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
         TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp
         CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp
-        Describe.cpp GraphStoreProtocol.cpp)
+        Describe.cpp GraphStoreProtocol.cpp
+        NamedQueryCache.cpp
+        NamedQueryCache.h
+        QueryExecutionContext.cpp)
 qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)
diff --git a/src/engine/Describe.cpp b/src/engine/Describe.cpp
index a0c43222d2..61960d90b2 100644
--- a/src/engine/Describe.cpp
+++ b/src/engine/Describe.cpp
@@ -4,9 +4,9 @@
 
 #include "engine/Describe.h"
 
-#include "../../test/engine/ValuesForTesting.h"
 #include "engine/IndexScan.h"
 #include "engine/Join.h"
+#include "engine/ValuesForTesting.h"
 
 // _____________________________________________________________________________
 Describe::Describe(QueryExecutionContext* qec,
diff --git a/src/engine/NamedQueryCache.cpp b/src/engine/NamedQueryCache.cpp
new file mode 100644
index 0000000000..9d4bbb15a3
--- /dev/null
+++ b/src/engine/NamedQueryCache.cpp
@@ -0,0 +1,5 @@
+//  Copyright 2025, University of Freiburg,
+//                  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#include "NamedQueryCache.h"
diff --git a/src/engine/NamedQueryCache.h b/src/engine/NamedQueryCache.h
new file mode 100644
index 0000000000..f9d702b4e4
--- /dev/null
+++ b/src/engine/NamedQueryCache.h
@@ -0,0 +1,28 @@
+//  Copyright 2025, University of Freiburg,
+//                  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+#pragma once
+
+#include "engine/ValuesForTesting.h"
+#include "util/Cache.h"
+#include "util/Synchronized.h"
+
+class NamedQueryCache {
+  using Key = std::string;
+  using Value = std::shared_ptr<ValuesForTesting>;
+  using Cache =
+      ad_utility::HashMap<std::string, std::shared_ptr<ValuesForTesting>>;
+
+  ad_utility::Synchronized<Cache> cache_;
+
+  void store(const Key& key, Value value) {
+    (*cache_.wlock())[key] = std::move(value);
+  }
+  Value get(const Key& key) {
+    auto l = cache_.wlock();
+    auto it = l->find(key);
+    // TODO<joka921> Proper error message.
+    AD_CONTRACT_CHECK(it != l->end());
+    return it->second;
+  }
+};
diff --git a/src/engine/QueryExecutionContext.cpp b/src/engine/QueryExecutionContext.cpp
new file mode 100644
index 0000000000..d7c4867898
--- /dev/null
+++ b/src/engine/QueryExecutionContext.cpp
@@ -0,0 +1,25 @@
+//  Copyright 2025, University of Freiburg,
+//                  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#include "engine/QueryExecutionContext.h"
+
+// _____________________________________________________________________________
+QueryExecutionContext::QueryExecutionContext(
+    const Index& index, QueryResultCache* const cache,
+    ad_utility::AllocatorWithLimit<Id> allocator,
+    SortPerformanceEstimator sortPerformanceEstimator,
+    NamedQueryCache* namedCache,
+    std::function<void(std::string)> updateCallback, const bool pinSubtrees,
+    const bool pinResult)
+    : _pinSubtrees(pinSubtrees),
+      _pinResult(pinResult),
+      _index(index),
+      _subtreeCache(cache),
+      _allocator(std::move(allocator)),
+      _sortPerformanceEstimator(sortPerformanceEstimator),
+      updateCallback_(std::move(updateCallback)),
+      namedQueryCache_{namedCache} {}
+
+// _____________________________________________________________________________
+QueryExecutionContext::~QueryExecutionContext() = default;
diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h
index 1e891a398f..cd1c931952 100644
--- a/src/engine/QueryExecutionContext.h
+++ b/src/engine/QueryExecutionContext.h
@@ -65,6 +65,9 @@ class CacheValue {
   };
 };
 
+// Forward declaration because of cyclic dependencies
+class NamedQueryCache;
+
 // The key for the `QueryResultCache` below. It consists of a `string` (the
 // actual cache key of a `QueryExecutionTree` and the index of the
 // `LocatedTriplesSnapshot` that was used to create the corresponding value.
@@ -89,6 +92,9 @@ struct QueryCacheKey {
 using QueryResultCache = ad_utility::ConcurrentCache<
     ad_utility::LRUCache<QueryCacheKey, CacheValue, CacheValue::SizeGetter>>;
 
+// Forward declaration because of cyclic dependency
+class NamedQueryCache;
+
 // Execution context for queries.
 // Holds references to index and engine, implements caching.
 class QueryExecutionContext {
@@ -97,17 +103,11 @@ class QueryExecutionContext {
       const Index& index, QueryResultCache* const cache,
       ad_utility::AllocatorWithLimit<Id> allocator,
       SortPerformanceEstimator sortPerformanceEstimator,
+      NamedQueryCache* namedCache,
       std::function<void(std::string)> updateCallback =
           [](std::string) { /* No-op by default for testing */ },
-      const bool pinSubtrees = false, const bool pinResult = false)
-      : _pinSubtrees(pinSubtrees),
-        _pinResult(pinResult),
-        _index(index),
-        _subtreeCache(cache),
-        _allocator(std::move(allocator)),
-        _costFactors(),
-        _sortPerformanceEstimator(sortPerformanceEstimator),
-        updateCallback_(std::move(updateCallback)) {}
+      bool pinSubtrees = false, bool pinResult = false);
+  ~QueryExecutionContext();
 
   QueryResultCache& getQueryTreeCache() { return *_subtreeCache; }
 
@@ -151,6 +151,11 @@ class QueryExecutionContext {
     return areWebsocketUpdatesEnabled_;
   }
 
+  NamedQueryCache& namedQueryCache() {
+    AD_CORRECTNESS_CHECK(namedQueryCache_ != nullptr);
+    return *namedQueryCache_;
+  }
+
  private:
   const Index& _index;
 
@@ -170,4 +175,6 @@ class QueryExecutionContext {
   // mutex.
   bool areWebsocketUpdatesEnabled_ =
       RuntimeParameters().get<"websocket-updates-enabled">();
+
+  NamedQueryCache* namedQueryCache_ = nullptr;
 };
diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp
index 8c1248203c..7c3c7e2c2e 100644
--- a/src/engine/Server.cpp
+++ b/src/engine/Server.cpp
@@ -814,8 +814,8 @@ Awaitable<void> Server::processQuery(
             << (pinSubtrees ? " [pin subresults]" : "") << "\n"
             << query.query_ << std::endl;
   QueryExecutionContext qec(index_, &cache_, allocator_,
-                            sortPerformanceEstimator_, std::ref(messageSender),
-                            pinSubtrees, pinResult);
+                            sortPerformanceEstimator_, &namedQueryCache_,
+                            std::ref(messageSender), pinSubtrees, pinResult);
 
   // The usage of an `optional` here is required because of a limitation in
   // Boost::Asio which forces us to use default-constructible result types with
@@ -957,8 +957,8 @@ json Server::processUpdateImpl(
             << (pinSubtrees ? " [pin subresults]" : "") << "\n"
             << update.update_ << std::endl;
   QueryExecutionContext qec(index_, &cache_, allocator_,
-                            sortPerformanceEstimator_, std::ref(messageSender),
-                            pinSubtrees, pinResult);
+                            sortPerformanceEstimator_, &namedQueryCache_,
+                            std::ref(messageSender), pinSubtrees, pinResult);
   auto plannedQuery =
       setupPlannedQuery(update.datasetClauses_, update.update_, qec,
                         cancellationHandle, timeLimit, requestTimer);
diff --git a/src/engine/Server.h b/src/engine/Server.h
index 4e0889b48a..04f31645fa 100644
--- a/src/engine/Server.h
+++ b/src/engine/Server.h
@@ -12,6 +12,7 @@
 
 #include "ExecuteUpdate.h"
 #include "engine/Engine.h"
+#include "engine/NamedQueryCache.h"
 #include "engine/QueryExecutionContext.h"
 #include "engine/QueryExecutionTree.h"
 #include "engine/SortPerformanceEstimator.h"
@@ -68,6 +69,7 @@ class Server {
   unsigned short port_;
   std::string accessToken_;
   QueryResultCache cache_;
+  NamedQueryCache namedQueryCache_;
   ad_utility::AllocatorWithLimit<Id> allocator_;
   SortPerformanceEstimator sortPerformanceEstimator_;
   Index index_;
diff --git a/test/engine/ValuesForTesting.h b/src/engine/ValuesForTesting.h
similarity index 100%
rename from test/engine/ValuesForTesting.h
rename to src/engine/ValuesForTesting.h
diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp
index 2afbe38a83..5f2c2c377c 100644
--- a/test/OperationTest.cpp
+++ b/test/OperationTest.cpp
@@ -6,6 +6,7 @@
 
 #include <optional>
 
+#include "engine/NamedQueryCache.h"
 #include "engine/NeutralElementOperation.h"
 #include "engine/ValuesForTesting.h"
 #include "global/RuntimeParameters.h"
@@ -123,8 +124,13 @@ class OperationTestFixture : public testing::Test {
   Index index =
       makeTestIndex("OperationTest", std::nullopt, true, true, true, 32_B);
   QueryResultCache cache;
+  NamedQueryCache namedCache;
   QueryExecutionContext qec{
-      index, &cache, makeAllocator(), SortPerformanceEstimator{},
+      index,
+      &cache,
+      makeAllocator(),
+      SortPerformanceEstimator{},
+      &namedCache,
       [&](std::string json) { jsonHistory.emplace_back(std::move(json)); }};
   IdTable table = makeIdTableFromVector({{}, {}, {}});
   ValuesForTesting operation{&qec, std::move(table), {}};
@@ -404,9 +410,14 @@ TEST(Operation, ensureFailedStatusIsSetWhenGeneratorThrowsException) {
       "ensureFailedStatusIsSetWhenGeneratorThrowsException", std::nullopt, true,
       true, true, ad_utility::MemorySize::bytes(16), false);
   QueryResultCache cache{};
+  NamedQueryCache namedCache{};
   QueryExecutionContext context{
-      index, &cache, makeAllocator(ad_utility::MemorySize::megabytes(100)),
-      SortPerformanceEstimator{}, [&](std::string) { signaledUpdate = true; }};
+      index,
+      &cache,
+      makeAllocator(ad_utility::MemorySize::megabytes(100)),
+      SortPerformanceEstimator{},
+      &namedCache,
+      [&](std::string) { signaledUpdate = true; }};
   AlwaysFailOperation operation{&context};
   ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started};
   auto result =
@@ -431,9 +442,14 @@ TEST(Operation, ensureSignalUpdateIsOnlyCalledEvery50msAndAtTheEnd) {
       "ensureSignalUpdateIsOnlyCalledEvery50msAndAtTheEnd", std::nullopt, true,
       true, true, ad_utility::MemorySize::bytes(16), false);
   QueryResultCache cache{};
+  NamedQueryCache namedCache{};
   QueryExecutionContext context{
-      index, &cache, makeAllocator(ad_utility::MemorySize::megabytes(100)),
-      SortPerformanceEstimator{}, [&](std::string) { ++updateCallCounter; }};
+      index,
+      &cache,
+      makeAllocator(ad_utility::MemorySize::megabytes(100)),
+      SortPerformanceEstimator{},
+      &namedCache,
+      [&](std::string) { ++updateCallCounter; }};
   CustomGeneratorOperation operation{
       &context, [](const IdTable& idTable) -> Result::Generator {
         std::this_thread::sleep_for(50ms);
@@ -474,9 +490,14 @@ TEST(Operation, ensureSignalUpdateIsCalledAtTheEndOfPartialConsumption) {
       "ensureSignalUpdateIsCalledAtTheEndOfPartialConsumption", std::nullopt,
       true, true, true, ad_utility::MemorySize::bytes(16), false);
   QueryResultCache cache{};
+  NamedQueryCache namedCache{};
   QueryExecutionContext context{
-      index, &cache, makeAllocator(ad_utility::MemorySize::megabytes(100)),
-      SortPerformanceEstimator{}, [&](std::string) { ++updateCallCounter; }};
+      index,
+      &cache,
+      makeAllocator(ad_utility::MemorySize::megabytes(100)),
+      SortPerformanceEstimator{},
+      &namedCache,
+      [&](std::string) { ++updateCallCounter; }};
   CustomGeneratorOperation operation{
       &context, [](const IdTable& idTable) -> Result::Generator {
         co_yield {idTable.clone(), LocalVocab{}};
diff --git a/test/engine/BindTest.cpp b/test/engine/BindTest.cpp
index 34ef0eb370..43039c47f7 100644
--- a/test/engine/BindTest.cpp
+++ b/test/engine/BindTest.cpp
@@ -6,8 +6,8 @@
 
 #include "../util/IdTableHelpers.h"
 #include "../util/IndexTestHelpers.h"
-#include "./ValuesForTesting.h"
 #include "engine/Bind.h"
+#include "engine/ValuesForTesting.h"
 #include "engine/sparqlExpressions/LiteralExpression.h"
 
 using namespace sparqlExpression;
diff --git a/test/engine/CartesianProductJoinTest.cpp b/test/engine/CartesianProductJoinTest.cpp
index 8727aa223a..3bc01b077a 100644
--- a/test/engine/CartesianProductJoinTest.cpp
+++ b/test/engine/CartesianProductJoinTest.cpp
@@ -4,12 +4,12 @@
 
 #include <gmock/gmock.h>
 
-#include "../engine/ValuesForTesting.h"
 #include "../util/GTestHelpers.h"
 #include "../util/IdTableHelpers.h"
 #include "../util/IndexTestHelpers.h"
 #include "engine/CartesianProductJoin.h"
 #include "engine/QueryExecutionTree.h"
+#include "engine/ValuesForTesting.h"
 
 using namespace ad_utility::testing;
 using ad_utility::source_location;
diff --git a/test/engine/LazyGroupByTest.cpp b/test/engine/LazyGroupByTest.cpp
index 1b952f9f7b..23bc903618 100644
--- a/test/engine/LazyGroupByTest.cpp
+++ b/test/engine/LazyGroupByTest.cpp
@@ -6,9 +6,9 @@
 
 #include "../util/IdTableHelpers.h"
 #include "../util/IndexTestHelpers.h"
-#include "./ValuesForTesting.h"
 #include "engine/GroupBy.h"
 #include "engine/LazyGroupBy.h"
+#include "engine/ValuesForTesting.h"
 #include "engine/sparqlExpressions/AggregateExpression.h"
 #include "engine/sparqlExpressions/GroupConcatExpression.h"
 #include "engine/sparqlExpressions/NaryExpression.h"
diff --git a/test/engine/QueryExecutionTreeTest.cpp b/test/engine/QueryExecutionTreeTest.cpp
index c67e17202f..d464e9b837 100644
--- a/test/engine/QueryExecutionTreeTest.cpp
+++ b/test/engine/QueryExecutionTreeTest.cpp
@@ -6,8 +6,8 @@
 
 #include "../util/IdTableHelpers.h"
 #include "../util/IndexTestHelpers.h"
-#include "./ValuesForTesting.h"
 #include "engine/QueryExecutionTree.h"
+#include "engine/ValuesForTesting.h"
 
 using namespace ad_utility::testing;
 
diff --git a/test/util/IdTableHelpers.cpp b/test/util/IdTableHelpers.cpp
index 34ad9414e7..b4708634aa 100644
--- a/test/util/IdTableHelpers.cpp
+++ b/test/util/IdTableHelpers.cpp
@@ -7,7 +7,7 @@
 #include <algorithm>
 #include <utility>
 
-#include "../engine/ValuesForTesting.h"
+#include "engine/ValuesForTesting.h"
 #include "engine/idTable/IdTable.h"
 #include "global/ValueId.h"
 #include "util/Algorithm.h"
diff --git a/test/util/IdTableHelpers.h b/test/util/IdTableHelpers.h
index 40e2fe8213..928b4e7b99 100644
--- a/test/util/IdTableHelpers.h
+++ b/test/util/IdTableHelpers.h
@@ -13,7 +13,6 @@
 #include <stdexcept>
 #include <tuple>
 
-#include "../engine/ValuesForTesting.h"
 #include "./AllocatorTestHelpers.h"
 #include "./GTestHelpers.h"
 #include "./IdTestHelpers.h"
@@ -22,6 +21,7 @@
 #include "engine/Join.h"
 #include "engine/OptionalJoin.h"
 #include "engine/QueryExecutionTree.h"
+#include "engine/ValuesForTesting.h"
 #include "engine/idTable/IdTable.h"
 #include "global/ValueId.h"
 #include "util/Algorithm.h"
diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp
index 8e1a693209..26c2698be0 100644
--- a/test/util/IndexTestHelpers.cpp
+++ b/test/util/IndexTestHelpers.cpp
@@ -6,6 +6,7 @@
 
 #include "./GTestHelpers.h"
 #include "./TripleComponentTestHelpers.h"
+#include "engine/NamedQueryCache.h"
 #include "global/SpecialIds.h"
 #include "index/IndexImpl.h"
 #include "util/ProgressBar.h"
@@ -277,10 +278,11 @@ QueryExecutionContext* getQec(std::optional<std::string> turtleInput,
     TypeErasedCleanup cleanup_;
     std::unique_ptr<Index> index_;
     std::unique_ptr<QueryResultCache> cache_;
+    std::unique_ptr<NamedQueryCache> namedCache_;
     std::unique_ptr<QueryExecutionContext> qec_ =
         std::make_unique<QueryExecutionContext>(
             *index_, cache_.get(), makeAllocator(MemorySize::megabytes(100)),
-            SortPerformanceEstimator{});
+            SortPerformanceEstimator{}, namedCache_.get());
   };
 
   using Key = std::tuple<std::optional<string>, bool, bool, bool,
@@ -308,7 +310,8 @@ QueryExecutionContext* getQec(std::optional<std::string> turtleInput,
                          usePatterns, usePrefixCompression,
                          blocksizePermutations, createTextIndex,
                          addWordsFromLiterals, contentsOfWordsFileAndDocsFile)),
-                     std::make_unique<QueryResultCache>()});
+                     std::make_unique<QueryResultCache>(),
+                     std::make_unique<NamedQueryCache>()});
   }
   auto* qec = contextMap.at(key).qec_.get();
   qec->getIndex().getImpl().setGlobalIndexAndComparatorOnlyForTesting();

From befc33dcd9aef155415b8c453f5d09b87cdaf1b3 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 30 Jan 2025 13:12:14 +0100
Subject: [PATCH 05/25] This seems to work, but copies IdTables etc.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/CheckUsePatternTrick.cpp           |  8 +-
 src/engine/NamedQueryCache.h                  | 23 ++++-
 src/engine/QueryPlanner.cpp                   | 12 +++
 src/engine/QueryPlanner.h                     |  1 +
 src/engine/Server.cpp                         | 34 ++++++-
 src/engine/ValuesForTesting.h                 | 93 ++++++++++++-------
 src/global/Constants.h                        |  4 +
 src/parser/CMakeLists.txt                     |  2 +
 src/parser/GraphPatternOperation.h            |  5 +-
 src/parser/NamedCachedQuery.cpp               |  5 +
 src/parser/NamedCachedQuery.h                 | 28 ++++++
 .../sparqlParser/SparqlQleverVisitor.cpp      | 38 ++++++++
 src/parser/sparqlParser/SparqlQleverVisitor.h | 51 +++++-----
 13 files changed, 234 insertions(+), 70 deletions(-)
 create mode 100644 src/parser/NamedCachedQuery.cpp
 create mode 100644 src/parser/NamedCachedQuery.h

diff --git a/src/engine/CheckUsePatternTrick.cpp b/src/engine/CheckUsePatternTrick.cpp
index e7da58ea14..583976c829 100644
--- a/src/engine/CheckUsePatternTrick.cpp
+++ b/src/engine/CheckUsePatternTrick.cpp
@@ -72,9 +72,11 @@ bool isVariableContainedInGraphPatternOperation(
     } else if constexpr (std::is_same_v<T, p::Service>) {
       return ad_utility::contains(arg.visibleVariables_, variable);
     } else {
-      static_assert(
-          std::is_same_v<T, p::TransPath> || std::is_same_v<T, p::PathQuery> ||
-          std::is_same_v<T, p::Describe> || std::is_same_v<T, p::SpatialQuery>);
+      static_assert(std::is_same_v<T, p::TransPath> ||
+                    std::is_same_v<T, p::PathQuery> ||
+                    std::is_same_v<T, p::Describe> ||
+                    std::is_same_v<T, p::SpatialQuery> ||
+                    std::is_same_v<T, p::NamedCachedQuery>);
       // The `TransPath` is set up later in the query planning, when this
       // function should not be called anymore.
       AD_FAIL();
diff --git a/src/engine/NamedQueryCache.h b/src/engine/NamedQueryCache.h
index f9d702b4e4..03b9c3ebe8 100644
--- a/src/engine/NamedQueryCache.h
+++ b/src/engine/NamedQueryCache.h
@@ -8,21 +8,34 @@
 #include "util/Synchronized.h"
 
 class NamedQueryCache {
+ public:
+  struct Value {
+    IdTable result_;
+    VariableToColumnMap varToColMap_;
+    std::vector<ColumnIndex> resultSortedOn_;
+  };
   using Key = std::string;
-  using Value = std::shared_ptr<ValuesForTesting>;
-  using Cache =
-      ad_utility::HashMap<std::string, std::shared_ptr<ValuesForTesting>>;
+  using Cache = ad_utility::HashMap<std::string, Value>;
 
+ private:
   ad_utility::Synchronized<Cache> cache_;
 
+ public:
   void store(const Key& key, Value value) {
-    (*cache_.wlock())[key] = std::move(value);
+    (*cache_.wlock()).insert_or_assign(key, std::move(value));
   }
-  Value get(const Key& key) {
+  const Value& get(const Key& key) {
     auto l = cache_.wlock();
     auto it = l->find(key);
     // TODO<joka921> Proper error message.
     AD_CONTRACT_CHECK(it != l->end());
     return it->second;
   }
+
+  std::shared_ptr<ValuesForTesting> getOperation(const Key& key,
+                                                 QueryExecutionContext* ctx) {
+    const auto& [table, map, sortedOn] = get(key);
+    return std::make_shared<ValuesForTesting>(
+        ctx, std::make_shared<IdTable>(table.clone()), map);
+  }
 };
diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp
index 43caa71f02..7a349045d1 100644
--- a/src/engine/QueryPlanner.cpp
+++ b/src/engine/QueryPlanner.cpp
@@ -13,6 +13,7 @@
 #include <type_traits>
 #include <variant>
 
+#include "NamedQueryCache.h"
 #include "backports/algorithm.h"
 #include "engine/Bind.h"
 #include "engine/CartesianProductJoin.h"
@@ -2408,6 +2409,8 @@ void QueryPlanner::GraphPatternPlanner::graphPatternOperationVisitor(Arg& arg) {
     visitDescribe(arg);
   } else if constexpr (std::is_same_v<T, p::SpatialQuery>) {
     visitSpatialSearch(arg);
+  } else if constexpr (std::is_same_v<T, p::NamedCachedQuery>) {
+    visitNamedCachedQuery(arg);
   } else {
     static_assert(std::is_same_v<T, p::BasicGraphPattern>);
     visitBasicGraphPattern(arg);
@@ -2581,6 +2584,15 @@ void QueryPlanner::GraphPatternPlanner::visitSpatialSearch(
   visitGroupOptionalOrMinus(std::move(candidatesOut));
 }
 
+// _____________________________________________________________________________
+void QueryPlanner::GraphPatternPlanner::visitNamedCachedQuery(
+    parsedQuery::NamedCachedQuery& arg) {
+  auto candidate = SubtreePlan{
+      planner_._qec, planner_._qec->namedQueryCache().getOperation(
+                         arg.validateAndGetIdentifier(), planner_._qec)};
+  visitGroupOptionalOrMinus(std::vector{std::move(candidate)});
+}
+
 // _______________________________________________________________
 void QueryPlanner::GraphPatternPlanner::visitUnion(parsedQuery::Union& arg) {
   // TODO<joka921> here we could keep all the candidates, and create a
diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h
index b51523baed..72fb009716 100644
--- a/src/engine/QueryPlanner.h
+++ b/src/engine/QueryPlanner.h
@@ -543,6 +543,7 @@ class QueryPlanner {
     void visitTransitivePath(parsedQuery::TransPath& transitivePath);
     void visitPathSearch(parsedQuery::PathQuery& config);
     void visitSpatialSearch(parsedQuery::SpatialQuery& config);
+    void visitNamedCachedQuery(parsedQuery::NamedCachedQuery& config);
     void visitUnion(parsedQuery::Union& un);
     void visitSubquery(parsedQuery::Subquery& subquery);
     void visitDescribe(parsedQuery::Describe& describe);
diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp
index 7c3c7e2c2e..8f2abaec1c 100644
--- a/src/engine/Server.cpp
+++ b/src/engine/Server.cpp
@@ -361,6 +361,11 @@ Awaitable<void> Server::process(
   const auto parsedHttpRequest = parseHttpRequest(request);
   const auto& parameters = parsedHttpRequest.parameters_;
 
+  LOG(INFO) << "Logging all the parameters" << std::endl;
+  for (const auto& [key, value] : parameters) {
+    LOG(INFO) << key << ":" << value.at(0) << std::endl;
+  }
+
   // We always want to call `Server::checkParameter` with the same first
   // parameter.
   auto checkParameter = std::bind_front(&ad_utility::url_parser::checkParameter,
@@ -809,9 +814,17 @@ Awaitable<void> Server::processQuery(
   // Do the query planning. This creates a `QueryExecutionTree`, which will
   // then be used to process the query.
   auto [pinSubtrees, pinResult] = determineResultPinning(params);
+  for (auto [key, value] : params) {
+    LOG(INFO) << "key : " << key << ": " << value.at(0) << std::endl;
+  }
+  std::optional<std::string> pinNamed =
+      ad_utility::url_parser::checkParameter(params, "pin-named-query", {});
   LOG(INFO) << "Processing the following SPARQL query:"
             << (pinResult ? " [pin result]" : "")
             << (pinSubtrees ? " [pin subresults]" : "") << "\n"
+            << (pinNamed ? absl::StrCat(" [pin named as ]", pinNamed.value())
+                         : "")
+            << "\n"
             << query.query_ << std::endl;
   QueryExecutionContext qec(index_, &cache_, allocator_,
                             sortPerformanceEstimator_, &namedQueryCache_,
@@ -866,10 +879,23 @@ Awaitable<void> Server::processQuery(
                        qet.getRootOperation()->getLimit()._offset);
   limitOffset._offset -= qet.getRootOperation()->getLimit()._offset;
 
-  // This actually processes the query and sends the result in the requested
-  // format.
-  co_await sendStreamableResponse(request, send, mediaType, plannedQuery, qet,
-                                  requestTimer, cancellationHandle);
+  if (pinNamed.has_value()) {
+    auto result = qet.getResult(false);
+    auto t =
+        NamedQueryCache::Value(result->idTable().clone(),
+                               qet.getVariableColumns(), result->sortedBy());
+    qec.namedQueryCache().store(pinNamed.value(), std::move(t));
+
+    auto response = ad_utility::httpUtils::createOkResponse(
+        "successfully pinned the query result", request,
+        ad_utility::MediaType::textPlain);
+    co_await send(response);
+  } else {
+    // This actually processes the query and sends the result in the requested
+    // format.
+    co_await sendStreamableResponse(request, send, mediaType, plannedQuery, qet,
+                                    requestTimer, cancellationHandle);
+  }
 
   // Print the runtime info. This needs to be done after the query
   // was computed.
diff --git a/src/engine/ValuesForTesting.h b/src/engine/ValuesForTesting.h
index 097ccd9c78..c9ad456720 100644
--- a/src/engine/ValuesForTesting.h
+++ b/src/engine/ValuesForTesting.h
@@ -4,19 +4,24 @@
 
 #pragma once
 
+#include <util/TransparentFunctors.h>
+
 #include "engine/Operation.h"
 #include "engine/QueryExecutionContext.h"
 #include "engine/Result.h"
 #include "util/Algorithm.h"
 #include "util/Random.h"
 
+auto tables(auto& tables_) {
+  return ql::views::transform(tables_, ad_utility::dereference);
+}
 // An operation that yields a given `IdTable` as its result. It is used for
 // unit testing purposes when we need to specify the subtrees of another
 // operation.
 class ValuesForTesting : public Operation {
  private:
-  std::vector<IdTable> tables_;
-  std::vector<std::optional<Variable>> variables_;
+  std::vector<std::shared_ptr<const IdTable>> tables_;
+  VariableToColumnMap variables_;
   bool supportsLimit_;
   // Those can be manually overwritten for testing using the respective getters.
   size_t sizeEstimate_;
@@ -27,16 +32,14 @@ class ValuesForTesting : public Operation {
   // Create an operation that has as its result the given `table` and the given
   // `variables`. The number of variables must be equal to the number
   // of columns in the table.
-  explicit ValuesForTesting(QueryExecutionContext* ctx, IdTable table,
-                            std::vector<std::optional<Variable>> variables,
-                            bool supportsLimit = false,
-                            std::vector<ColumnIndex> sortedColumns = {},
-                            LocalVocab localVocab = LocalVocab{},
-                            std::optional<float> multiplicity = std::nullopt,
-                            bool forceFullyMaterialized = false)
+  explicit ValuesForTesting(
+      QueryExecutionContext* ctx, IdTable table,
+      const std::vector<std::optional<Variable>>& variables,
+      bool supportsLimit = false, std::vector<ColumnIndex> sortedColumns = {},
+      LocalVocab localVocab = LocalVocab{},
+      std::optional<float> multiplicity = std::nullopt,
+      bool forceFullyMaterialized = false)
       : Operation{ctx},
-        tables_{},
-        variables_{std::move(variables)},
         supportsLimit_{supportsLimit},
         sizeEstimate_{table.numRows()},
         costEstimate_{table.numRows()},
@@ -45,17 +48,32 @@ class ValuesForTesting : public Operation {
         multiplicity_{multiplicity},
         forceFullyMaterialized_{forceFullyMaterialized} {
     AD_CONTRACT_CHECK(variables_.size() == table.numColumns());
-    tables_.push_back(std::move(table));
+    tables_.push_back(std::make_shared<const IdTable>(std::move(table)));
+    variables_ = computeVarMapFromVector(variables);
   }
+
+  ValuesForTesting(QueryExecutionContext* ctx,
+                   std::shared_ptr<const IdTable> table,
+                   VariableToColumnMap variables,
+                   std::vector<ColumnIndex> sortedColumns = {},
+                   LocalVocab localVocab = LocalVocab{})
+      : Operation{ctx},
+        tables_{std::move(table)},
+        variables_{std::move(variables)},
+        supportsLimit_{false},
+        sizeEstimate_{tables_.at(0)->numRows()},
+        costEstimate_{0},
+        resultSortedColumns_{std::move(sortedColumns)},
+        localVocab_{std::move(localVocab)},
+        multiplicity_{},
+        forceFullyMaterialized_{false} {}
   explicit ValuesForTesting(QueryExecutionContext* ctx,
-                            std::vector<IdTable> tables,
+                            std::vector<IdTable> idTables,
                             std::vector<std::optional<Variable>> variables,
                             bool unlikelyToFitInCache = false,
                             std::vector<ColumnIndex> sortedColumns = {},
                             LocalVocab localVocab = LocalVocab{})
       : Operation{ctx},
-        tables_{std::move(tables)},
-        variables_{std::move(variables)},
         supportsLimit_{false},
         sizeEstimate_{0},
         costEstimate_{0},
@@ -63,15 +81,20 @@ class ValuesForTesting : public Operation {
         resultSortedColumns_{std::move(sortedColumns)},
         localVocab_{std::move(localVocab)},
         multiplicity_{std::nullopt} {
-    AD_CONTRACT_CHECK(ql::ranges::all_of(tables_, [this](const IdTable& table) {
-      return variables_.size() == table.numColumns();
-    }));
+    for (auto& table : idTables) {
+      tables_.push_back(std::make_shared<const IdTable>(std::move(table)));
+    }
+    AD_CONTRACT_CHECK(
+        ql::ranges::all_of(tables(tables_), [this](const IdTable& table) {
+          return variables_.size() == table.numColumns();
+        }));
     size_t totalRows = 0;
-    for (const IdTable& idTable : tables_) {
+    for (const IdTable& idTable : tables(tables_)) {
       totalRows += idTable.numRows();
     }
     sizeEstimate_ = totalRows;
     costEstimate_ = totalRows;
+    variables_ = computeVarMapFromVector(variables);
   }
 
   // Accessors for the estimates for manual testing.
@@ -85,7 +108,7 @@ class ValuesForTesting : public Operation {
       AD_CORRECTNESS_CHECK(!supportsLimit_);
       std::vector<IdTable> clones;
       clones.reserve(tables_.size());
-      for (const IdTable& idTable : tables_) {
+      for (const IdTable& idTable : tables(tables_)) {
         clones.push_back(idTable.clone());
       }
       auto generator = [](auto idTables,
@@ -98,15 +121,15 @@ class ValuesForTesting : public Operation {
     }
     std::optional<IdTable> optionalTable;
     if (tables_.size() > 1) {
-      IdTable aggregateTable{tables_.at(0).numColumns(),
-                             tables_.at(0).getAllocator()};
-      for (const IdTable& idTable : tables_) {
+      IdTable aggregateTable{tables(tables_)[0].numColumns(),
+                             tables(tables_)[0].getAllocator()};
+      for (const IdTable& idTable : tables(tables_)) {
         aggregateTable.insertAtEnd(idTable);
       }
       optionalTable = std::move(aggregateTable);
     }
     auto table = optionalTable.has_value() ? std::move(optionalTable).value()
-                                           : tables_.at(0).clone();
+                                           : tables(tables_)[0].clone();
     if (supportsLimit_) {
       table.erase(table.begin() + getLimit().upperBound(table.size()),
                   table.end());
@@ -128,13 +151,13 @@ class ValuesForTesting : public Operation {
     std::stringstream str;
     auto numRowsView = tables_ | ql::views::transform(&IdTable::numRows);
     auto totalNumRows = std::reduce(numRowsView.begin(), numRowsView.end(), 0);
-    auto numCols = tables_.empty() ? 0 : tables_.at(0).numColumns();
+    auto numCols = tables_.empty() ? 0 : tables_.at(0)->numColumns();
     str << "Values for testing with " << numCols << " columns and "
         << totalNumRows << " rows. ";
     if (totalNumRows > 1000) {
       str << ad_utility::FastRandomIntGenerator<int64_t>{}();
     } else {
-      for (const IdTable& idTable : tables_) {
+      for (const IdTable& idTable : tables(tables_)) {
         for (size_t i = 0; i < idTable.numColumns(); ++i) {
           for (Id entry : idTable.getColumn(i)) {
             str << entry << ' ';
@@ -154,7 +177,7 @@ class ValuesForTesting : public Operation {
   size_t getResultWidth() const override {
     // Assume a width of 1 if we have no tables and no other information to base
     // it on because 0 would otherwise cause stuff to break.
-    return tables_.empty() ? 1 : tables_.at(0).numColumns();
+    return tables_.empty() ? 1 : tables_.at(0)->numColumns();
   }
 
   vector<ColumnIndex> resultSortedOn() const override {
@@ -179,27 +202,31 @@ class ValuesForTesting : public Operation {
 
   bool knownEmptyResult() override {
     return ql::ranges::all_of(
-        tables_, [](const IdTable& table) { return table.empty(); });
+        tables(tables_), [](const IdTable& table) { return table.empty(); });
   }
 
  private:
-  VariableToColumnMap computeVariableToColumnMap() const override {
+  VariableToColumnMap computeVarMapFromVector(
+      const std::vector<std::optional<Variable>>& vars) const {
     VariableToColumnMap m;
-    for (auto i = ColumnIndex{0}; i < variables_.size(); ++i) {
-      if (!variables_.at(i).has_value()) {
+    for (auto i = ColumnIndex{0}; i < vars.size(); ++i) {
+      if (!vars.at(i).has_value()) {
         continue;
       }
       bool containsUndef =
-          ql::ranges::any_of(tables_, [&i](const IdTable& table) {
+          ql::ranges::any_of(tables(tables_), [&i](const IdTable& table) {
             return ql::ranges::any_of(table.getColumn(i),
                                       [](Id id) { return id.isUndefined(); });
           });
       using enum ColumnIndexAndTypeInfo::UndefStatus;
-      m[variables_.at(i).value()] = ColumnIndexAndTypeInfo{
+      m[vars.at(i).value()] = ColumnIndexAndTypeInfo{
           i, containsUndef ? PossiblyUndefined : AlwaysDefined};
     }
     return m;
   }
+  VariableToColumnMap computeVariableToColumnMap() const override {
+    return variables_;
+  }
 
   std::vector<ColumnIndex> resultSortedColumns_;
   LocalVocab localVocab_;
diff --git a/src/global/Constants.h b/src/global/Constants.h
index 5a79575d82..dde13b65ee 100644
--- a/src/global/Constants.h
+++ b/src/global/Constants.h
@@ -74,6 +74,10 @@ constexpr inline std::string_view DEFAULT_GRAPH_IRI =
 constexpr inline std::string_view QLEVER_INTERNAL_GRAPH_IRI =
     makeQleverInternalIriConst<"internal-graph">();
 
+constexpr inline std::string_view NAMED_CACHED_QUERY_PREFIX =
+    ad_utility::constexprStrCat<"<", QLEVER_INTERNAL_PREFIX_URL,
+                                "named-cached-query-">();
+
 constexpr inline std::pair<std::string_view, std::string_view> GEOF_PREFIX = {
     "geof:", "http://www.opengis.net/def/function/geosparql/"};
 constexpr inline std::pair<std::string_view, std::string_view> MATH_PREFIX = {
diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt
index 6fa123a793..57f519f6cf 100644
--- a/src/parser/CMakeLists.txt
+++ b/src/parser/CMakeLists.txt
@@ -31,6 +31,8 @@ add_library(parser
         Literal.cpp
         LiteralOrIri.cpp
         DatasetClauses.cpp
+        NamedCachedQuery.cpp
+        NamedCachedQuery.h
 )
 qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2::re2 util engine index)
 
diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h
index 8f7d4a8505..9c454302ec 100644
--- a/src/parser/GraphPatternOperation.h
+++ b/src/parser/GraphPatternOperation.h
@@ -13,6 +13,7 @@
 #include "engine/sparqlExpressions/SparqlExpressionPimpl.h"
 #include "parser/DatasetClauses.h"
 #include "parser/GraphPattern.h"
+#include "parser/NamedCachedQuery.h"
 #include "parser/PathQuery.h"
 #include "parser/SpatialQuery.h"
 #include "parser/TripleComponent.h"
@@ -178,8 +179,8 @@ struct Bind {
 // class actually becomes `using GraphPatternOperation = std::variant<...>`
 using GraphPatternOperationVariant =
     std::variant<Optional, Union, Subquery, TransPath, Bind, BasicGraphPattern,
-                 Values, Service, PathQuery, SpatialQuery, Minus,
-                 GroupGraphPattern, Describe>;
+                 Values, Service, PathQuery, SpatialQuery, NamedCachedQuery,
+                 Minus, GroupGraphPattern, Describe>;
 struct GraphPatternOperation
     : public GraphPatternOperationVariant,
       public VisitMixin<GraphPatternOperation, GraphPatternOperationVariant> {
diff --git a/src/parser/NamedCachedQuery.cpp b/src/parser/NamedCachedQuery.cpp
new file mode 100644
index 0000000000..a1db7e95c3
--- /dev/null
+++ b/src/parser/NamedCachedQuery.cpp
@@ -0,0 +1,5 @@
+//
+// Created by kalmbacj on 1/30/25.
+//
+
+#include "NamedCachedQuery.h"
diff --git a/src/parser/NamedCachedQuery.h b/src/parser/NamedCachedQuery.h
new file mode 100644
index 0000000000..40a9604d12
--- /dev/null
+++ b/src/parser/NamedCachedQuery.h
@@ -0,0 +1,28 @@
+//  Copyright 2025, University of Freiburg,
+//                  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#pragma once
+
+#include "parser/MagicServiceQuery.h"
+
+namespace parsedQuery {
+class NamedCachedQuery : public MagicServiceQuery {
+  std::string identifier_;
+
+ public:
+  NamedCachedQuery(std::string identifier)
+      : identifier_{std::move(identifier)} {}
+
+  void addParameter([[maybe_unused]] const SparqlTriple& triple) override {
+    throw std::runtime_error{
+        "The body of a named cache query request must be empty"};
+  }
+
+  const std::string& validateAndGetIdentifier() const {
+    // TODO<joka921> Better error messages.
+    AD_CORRECTNESS_CHECK(!childGraphPattern_.has_value());
+    return identifier_;
+  }
+};
+}  // namespace parsedQuery
diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
index e22454cfd7..34cd48a25a 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
@@ -30,6 +30,7 @@
 #include "parser/GraphPatternOperation.h"
 #include "parser/MagicServiceIriConstants.h"
 #include "parser/MagicServiceQuery.h"
+#include "parser/NamedCachedQuery.h"
 #include "parser/RdfParser.h"
 #include "parser/SparqlParser.h"
 #include "parser/SpatialQuery.h"
@@ -891,6 +892,40 @@ GraphPatternOperation Visitor::visitPathQuery(
   return pathQuery;
 }
 
+// _____________________________________________________________________________
+GraphPatternOperation Visitor::visitNamedCachedQuery(
+    Parser::ServiceGraphPatternContext* ctx) {
+  auto parseContent = [ctx](parsedQuery::NamedCachedQuery& namedQuery,
+                            const parsedQuery::GraphPatternOperation& op) {
+    if (std::holds_alternative<parsedQuery::BasicGraphPattern>(op)) {
+      namedQuery.addBasicPattern(std::get<parsedQuery::BasicGraphPattern>(op));
+    } else if (std::holds_alternative<parsedQuery::GroupGraphPattern>(op)) {
+      namedQuery.addGraph(op);
+    } else {
+      reportError(ctx,
+                  "Unsupported element in named cached query."
+                  "A named cached query currently must have an empty body");
+    }
+  };
+
+  auto iri = std::get<Iri>(visit(ctx->varOrIri()));
+  auto s = iri.toSparql();
+  AD_CORRECTNESS_CHECK(s.starts_with(NAMED_CACHED_QUERY_PREFIX));
+  auto view = std::string_view{s};
+  // Remove the prefix and the trailing ">"
+  view.remove_prefix(NAMED_CACHED_QUERY_PREFIX.size());
+  view.remove_suffix(1);
+
+  parsedQuery::GraphPattern graphPattern = visit(ctx->groupGraphPattern());
+  parsedQuery::NamedCachedQuery namedQuery{std::string{view}};
+  for (const auto& op : graphPattern._graphPatterns) {
+    parseContent(namedQuery, op);
+  }
+  [[maybe_unused]] const auto& validated =
+      namedQuery.validateAndGetIdentifier();
+  return namedQuery;
+}
+
 GraphPatternOperation Visitor::visitSpatialQuery(
     Parser::ServiceGraphPatternContext* ctx) {
   auto parseSpatialQuery = [ctx](parsedQuery::SpatialQuery& spatialQuery,
@@ -951,6 +986,9 @@ GraphPatternOperation Visitor::visit(Parser::ServiceGraphPatternContext* ctx) {
     return visitPathQuery(ctx);
   } else if (serviceIri.toStringRepresentation() == SPATIAL_SEARCH_IRI) {
     return visitSpatialQuery(ctx);
+  } else if (serviceIri.toStringRepresentation().starts_with(
+                 NAMED_CACHED_QUERY_PREFIX)) {
+    return visitNamedCachedQuery(ctx);
   }
   // Parse the body of the SERVICE query. Add the visible variables from the
   // SERVICE clause to the visible variables so far, but also remember them
diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h
index fb1cb9c05c..697d353f65 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.h
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.h
@@ -264,6 +264,9 @@ class SparqlQleverVisitor {
   GraphPatternOperation visitSpatialQuery(
       Parser::ServiceGraphPatternContext* ctx);
 
+  GraphPatternOperation visitNamedCachedQuery(
+      Parser::ServiceGraphPatternContext* ctx);
+
   parsedQuery::GraphPatternOperation visit(Parser::BindContext* ctx);
 
   parsedQuery::GraphPatternOperation visit(Parser::InlineDataContext* ctx);
@@ -343,10 +346,10 @@ class SparqlQleverVisitor {
 
   PropertyPath visit(Parser::PathEltOrInverseContext* ctx);
 
-  // NOTE: The `visit` overloads marked `[[noreturn]]` always throw an exception
-  // because the corresponding feature is not (yet) supported by QLever. Most
-  // of them have a return type of `void`. Some of the don't, in order to make
-  // the usage of abstractions like `visitAlternative` easier.
+  // NOTE: The `visit` overloads marked `[[noreturn]]` always throw an
+  // exception because the corresponding feature is not (yet) supported by
+  // QLever. Most of them have a return type of `void`. Some of the don't, in
+  // order to make the usage of abstractions like `visitAlternative` easier.
 
   [[noreturn]] static void visit(Parser::PathModContext* ctx);
 
@@ -485,8 +488,8 @@ class SparqlQleverVisitor {
   static std::string currentTimeAsXsdString();
 
   // Member starTime_ is needed for the NOW expression. All calls within
-  // the query execution reference it. The underlying date time format is e.g.:
-  // 2011-01-10T14:45:13.815-05:00
+  // the query execution reference it. The underlying date time format is
+  // e.g.: 2011-01-10T14:45:13.815-05:00
   std::string startTime_ = currentTimeAsXsdString();
 
   template <typename Visitor, typename Ctx>
@@ -503,15 +506,16 @@ class SparqlQleverVisitor {
 
   // Get the part of the original input string that pertains to the given
   // context. This is necessary because ANTLR's `getText()` only provides that
-  // part with *all* whitespace removed. Preserving the whitespace is important
-  // for readability (for example, in an error message), and even more so when
-  // using such parts for further processing (like the body of a SERVICE query,
-  // which is not valid SPARQL anymore when you remove all whitespace).
+  // part with *all* whitespace removed. Preserving the whitespace is
+  // important for readability (for example, in an error message), and even
+  // more so when using such parts for further processing (like the body of a
+  // SERVICE query, which is not valid SPARQL anymore when you remove all
+  // whitespace).
   static std::string getOriginalInputForContext(
       const antlr4::ParserRuleContext* context);
 
-  // Process an IRI function call. This is used in both `visitFunctionCall` and
-  // `visitIriOrFunction`.
+  // Process an IRI function call. This is used in both `visitFunctionCall`
+  // and `visitIriOrFunction`.
   static ExpressionPtr processIriFunctionCall(
       const TripleComponent::Iri& iri, std::vector<ExpressionPtr> argList,
       const antlr4::ParserRuleContext*);
@@ -555,13 +559,14 @@ class SparqlQleverVisitor {
   template <typename Ctx>
   auto visitOptional(Ctx* ctx) -> std::optional<decltype(visit(ctx))>;
 
-  /// If `ctx` is not `nullptr`, visit it, convert the result to `Intermediate`
-  /// and assign it to `*target`. The case where `Intermediate!=Target` is
-  /// useful, when the result of `visit(ctx)` cannot be converted to `Target`,
-  /// but the conversion chain `VisitResult -> Intermediate -> Target` is valid.
-  /// For example when `visit(ctx)` yields `A`, `A` is explicitly convertible to
-  /// `B` and `Target` is `optional<B>`, then `B` has to be specified as
-  /// `Intermediate` (see for example the implementation of `visitAlternative`).
+  /// If `ctx` is not `nullptr`, visit it, convert the result to
+  /// `Intermediate` and assign it to `*target`. The case where
+  /// `Intermediate!=Target` is useful, when the result of `visit(ctx)` cannot
+  /// be converted to `Target`, but the conversion chain `VisitResult ->
+  /// Intermediate -> Target` is valid. For example when `visit(ctx)` yields
+  /// `A`, `A` is explicitly convertible to `B` and `Target` is `optional<B>`,
+  /// then `B` has to be specified as `Intermediate` (see for example the
+  /// implementation of `visitAlternative`).
   template <typename Target, typename Intermediate = Target, typename Ctx>
   void visitIf(Target* target, Ctx* ctx);
 
@@ -581,8 +586,8 @@ class SparqlQleverVisitor {
   template <typename Context>
   Triples parseTriplesConstruction(Context* ctx);
 
-  // If the triple is a special triple for the text index (i.e. its predicate is
-  // either `ql:contains-word` or `ql:contains-entity`, register the magic
+  // If the triple is a special triple for the text index (i.e. its predicate
+  // is either `ql:contains-word` or `ql:contains-entity`, register the magic
   // variables for the matching word and the score that will be created when
   // processing those triples in the query body, s.t. they can be selected as
   // part of the query result.
@@ -593,8 +598,8 @@ class SparqlQleverVisitor {
   static TripleComponent visitGraphTerm(const GraphTerm& graphTerm);
 
   // If any of the variables used in `expression` did not appear previously in
-  // the query, add a warning or throw an exception (depending on the setting of
-  // the corresponding `RuntimeParameter`).
+  // the query, add a warning or throw an exception (depending on the setting
+  // of the corresponding `RuntimeParameter`).
   void warnOrThrowIfUnboundVariables(auto* ctx,
                                      const SparqlExpressionPimpl& expression,
                                      std::string_view clauseName);

From 7f30e170def42200ff1211339d2830173a76c668 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 30 Jan 2025 15:20:37 +0100
Subject: [PATCH 06/25] It still works and is a little bit cleaner.
 TODO<joka921>:

1. Unit tests
2. correct cache-clearing (or updates etc.)

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/CMakeLists.txt                     |  1 -
 src/engine/NamedQueryCache.cpp                | 31 ++++++++++-
 src/engine/NamedQueryCache.h                  | 39 ++++++++------
 src/engine/QueryExecutionContext.cpp          |  3 --
 src/engine/QueryExecutionContext.h            |  1 -
 src/engine/Server.cpp                         | 16 ++----
 src/engine/ValuesForTesting.h                 | 53 ++++++++-----------
 src/global/Constants.h                        |  5 +-
 src/parser/CMakeLists.txt                     |  2 -
 src/parser/NamedCachedQuery.cpp               |  5 --
 src/parser/NamedCachedQuery.h                 |  6 +++
 .../sparqlParser/SparqlQleverVisitor.cpp      | 16 +++---
 src/parser/sparqlParser/SparqlQleverVisitor.h | 49 +++++++++--------
 13 files changed, 121 insertions(+), 106 deletions(-)
 delete mode 100644 src/parser/NamedCachedQuery.cpp

diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt
index d517c0f239..91587521fd 100644
--- a/src/engine/CMakeLists.txt
+++ b/src/engine/CMakeLists.txt
@@ -16,6 +16,5 @@ add_library(engine
         CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp
         Describe.cpp GraphStoreProtocol.cpp
         NamedQueryCache.cpp
-        NamedQueryCache.h
         QueryExecutionContext.cpp)
 qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)
diff --git a/src/engine/NamedQueryCache.cpp b/src/engine/NamedQueryCache.cpp
index 9d4bbb15a3..6c333fcbb0 100644
--- a/src/engine/NamedQueryCache.cpp
+++ b/src/engine/NamedQueryCache.cpp
@@ -2,4 +2,33 @@
 //                  Chair of Algorithms and Data Structures.
 //  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
-#include "NamedQueryCache.h"
+#include "engine/NamedQueryCache.h"
+
+// _____________________________________________________________________________
+std::shared_ptr<ValuesForTesting> NamedQueryCache ::getOperation(
+    const Key& key, QueryExecutionContext* ctx) const {
+  const auto& [table, map, sortedOn] = get(key);
+  // TODO<joka921> we should get rid of the copies for the IdTable (and
+  // probably the other members) especially for larger results).
+  return std::make_shared<ValuesForTesting>(ctx, table.clone(), map);
+}
+
+// _____________________________________________________________________________
+auto NamedQueryCache::get(const Key& key) const -> const Value& {
+  auto l = cache_.wlock();
+  auto it = l->find(key);
+  if (it == l->end()) {
+    throw std::runtime_error{
+        absl::StrCat("The named query with the name \"", key,
+                     "\" was not pinned to the named query cache")};
+  }
+  return it->second;
+}
+
+// _____________________________________________________________________________
+void NamedQueryCache::store(const Key& key, Value value) {
+  (*cache_.wlock()).insert_or_assign(key, std::move(value));
+}
+
+// _____________________________________________________________________________
+void NamedQueryCache::clear() { cache_.wlock()->clear(); }
diff --git a/src/engine/NamedQueryCache.h b/src/engine/NamedQueryCache.h
index 03b9c3ebe8..4d9775b1c5 100644
--- a/src/engine/NamedQueryCache.h
+++ b/src/engine/NamedQueryCache.h
@@ -4,11 +4,14 @@
 #pragma once
 
 #include "engine/ValuesForTesting.h"
-#include "util/Cache.h"
 #include "util/Synchronized.h"
 
+// A simple threadsafe cache that associates query results with an explicit
+// name.
 class NamedQueryCache {
  public:
+  // The cache value. It stores all the information required to construct a
+  // proper `QueryExecutionTree` later on.
   struct Value {
     IdTable result_;
     VariableToColumnMap varToColMap_;
@@ -21,21 +24,23 @@ class NamedQueryCache {
   ad_utility::Synchronized<Cache> cache_;
 
  public:
-  void store(const Key& key, Value value) {
-    (*cache_.wlock()).insert_or_assign(key, std::move(value));
-  }
-  const Value& get(const Key& key) {
-    auto l = cache_.wlock();
-    auto it = l->find(key);
-    // TODO<joka921> Proper error message.
-    AD_CONTRACT_CHECK(it != l->end());
-    return it->second;
-  }
+  // Store an explicit query result with a given `key`. Previously stored
+  // `value`s with the same `key` are overwritten.
+  void store(const Key& key, Value value);
 
-  std::shared_ptr<ValuesForTesting> getOperation(const Key& key,
-                                                 QueryExecutionContext* ctx) {
-    const auto& [table, map, sortedOn] = get(key);
-    return std::make_shared<ValuesForTesting>(
-        ctx, std::make_shared<IdTable>(table.clone()), map);
-  }
+  // Clear the cache.
+  void clear();
+
+  // Retrieve the query result that is associated with the `key`.
+  // Throw an exception if the `key` doesn't exist.
+  const Value& get(const Key& key) const;
+
+  // Retrieve the query result with the given `key` and convert it into an
+  // explicit `ValuesForTesting` operation that can be used as part of a
+  // `QueryExecutionTree`.
+  // TODO<joka921> This can be done more efficiently if we implement a dedicated
+  // operation for this use case, `ValuesForTesting` currently incurs one
+  // (unneeded) copy per query execution.
+  std::shared_ptr<ValuesForTesting> getOperation(
+      const Key& key, QueryExecutionContext* ctx) const;
 };
diff --git a/src/engine/QueryExecutionContext.cpp b/src/engine/QueryExecutionContext.cpp
index d7c4867898..b1676e834a 100644
--- a/src/engine/QueryExecutionContext.cpp
+++ b/src/engine/QueryExecutionContext.cpp
@@ -20,6 +20,3 @@ QueryExecutionContext::QueryExecutionContext(
       _sortPerformanceEstimator(sortPerformanceEstimator),
       updateCallback_(std::move(updateCallback)),
       namedQueryCache_{namedCache} {}
-
-// _____________________________________________________________________________
-QueryExecutionContext::~QueryExecutionContext() = default;
diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h
index cd1c931952..9eb632b48a 100644
--- a/src/engine/QueryExecutionContext.h
+++ b/src/engine/QueryExecutionContext.h
@@ -107,7 +107,6 @@ class QueryExecutionContext {
       std::function<void(std::string)> updateCallback =
           [](std::string) { /* No-op by default for testing */ },
       bool pinSubtrees = false, bool pinResult = false);
-  ~QueryExecutionContext();
 
   QueryResultCache& getQueryTreeCache() { return *_subtreeCache; }
 
diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp
index 8f2abaec1c..d9297ea841 100644
--- a/src/engine/Server.cpp
+++ b/src/engine/Server.cpp
@@ -361,11 +361,6 @@ Awaitable<void> Server::process(
   const auto parsedHttpRequest = parseHttpRequest(request);
   const auto& parameters = parsedHttpRequest.parameters_;
 
-  LOG(INFO) << "Logging all the parameters" << std::endl;
-  for (const auto& [key, value] : parameters) {
-    LOG(INFO) << key << ":" << value.at(0) << std::endl;
-  }
-
   // We always want to call `Server::checkParameter` with the same first
   // parameter.
   auto checkParameter = std::bind_front(&ad_utility::url_parser::checkParameter,
@@ -811,12 +806,9 @@ Awaitable<void> Server::processQuery(
   auto [cancellationHandle, cancelTimeoutOnDestruction] =
       setupCancellationHandle(messageSender.getQueryId(), timeLimit);
 
-  // Do the query planning. This creates a `QueryExecutionTree`, which will
-  // then be used to process the query.
+  // Figure out, whether the query is to be pinned in the cache (either
+  // implicitly, or explicitly as a named query).
   auto [pinSubtrees, pinResult] = determineResultPinning(params);
-  for (auto [key, value] : params) {
-    LOG(INFO) << "key : " << key << ": " << value.at(0) << std::endl;
-  }
   std::optional<std::string> pinNamed =
       ad_utility::url_parser::checkParameter(params, "pin-named-query", {});
   LOG(INFO) << "Processing the following SPARQL query:"
@@ -880,6 +872,8 @@ Awaitable<void> Server::processQuery(
   limitOffset._offset -= qet.getRootOperation()->getLimit()._offset;
 
   if (pinNamed.has_value()) {
+    // The query is to be pinned in the named cache. In this case we don't
+    // return the result, but only pin it.
     auto result = qet.getResult(false);
     auto t =
         NamedQueryCache::Value(result->idTable().clone(),
@@ -887,7 +881,7 @@ Awaitable<void> Server::processQuery(
     qec.namedQueryCache().store(pinNamed.value(), std::move(t));
 
     auto response = ad_utility::httpUtils::createOkResponse(
-        "successfully pinned the query result", request,
+        "Successfully pinned the query result", request,
         ad_utility::MediaType::textPlain);
     co_await send(response);
   } else {
diff --git a/src/engine/ValuesForTesting.h b/src/engine/ValuesForTesting.h
index c9ad456720..f7f78945b1 100644
--- a/src/engine/ValuesForTesting.h
+++ b/src/engine/ValuesForTesting.h
@@ -4,23 +4,18 @@
 
 #pragma once
 
-#include <util/TransparentFunctors.h>
-
 #include "engine/Operation.h"
 #include "engine/QueryExecutionContext.h"
 #include "engine/Result.h"
 #include "util/Algorithm.h"
 #include "util/Random.h"
 
-auto tables(auto& tables_) {
-  return ql::views::transform(tables_, ad_utility::dereference);
-}
 // An operation that yields a given `IdTable` as its result. It is used for
 // unit testing purposes when we need to specify the subtrees of another
 // operation.
 class ValuesForTesting : public Operation {
  private:
-  std::vector<std::shared_ptr<const IdTable>> tables_;
+  std::vector<IdTable> tables_;
   VariableToColumnMap variables_;
   bool supportsLimit_;
   // Those can be manually overwritten for testing using the respective getters.
@@ -47,33 +42,34 @@ class ValuesForTesting : public Operation {
         localVocab_{std::move(localVocab)},
         multiplicity_{multiplicity},
         forceFullyMaterialized_{forceFullyMaterialized} {
-    AD_CONTRACT_CHECK(variables_.size() == table.numColumns());
-    tables_.push_back(std::make_shared<const IdTable>(std::move(table)));
+    AD_CONTRACT_CHECK(variables.size() == table.numColumns());
+    tables_.push_back(std::move(table));
     variables_ = computeVarMapFromVector(variables);
   }
 
-  ValuesForTesting(QueryExecutionContext* ctx,
-                   std::shared_ptr<const IdTable> table,
+  ValuesForTesting(QueryExecutionContext* ctx, IdTable table,
                    VariableToColumnMap variables,
                    std::vector<ColumnIndex> sortedColumns = {},
                    LocalVocab localVocab = LocalVocab{})
       : Operation{ctx},
-        tables_{std::move(table)},
         variables_{std::move(variables)},
         supportsLimit_{false},
-        sizeEstimate_{tables_.at(0)->numRows()},
+        sizeEstimate_{table.numRows()},
         costEstimate_{0},
         resultSortedColumns_{std::move(sortedColumns)},
         localVocab_{std::move(localVocab)},
         multiplicity_{},
-        forceFullyMaterialized_{false} {}
+        forceFullyMaterialized_{false} {
+    tables_.push_back(std::move(table));
+  }
   explicit ValuesForTesting(QueryExecutionContext* ctx,
-                            std::vector<IdTable> idTables,
+                            std::vector<IdTable> tables,
                             std::vector<std::optional<Variable>> variables,
                             bool unlikelyToFitInCache = false,
                             std::vector<ColumnIndex> sortedColumns = {},
                             LocalVocab localVocab = LocalVocab{})
       : Operation{ctx},
+        tables_{std::move(tables)},
         supportsLimit_{false},
         sizeEstimate_{0},
         costEstimate_{0},
@@ -81,15 +77,12 @@ class ValuesForTesting : public Operation {
         resultSortedColumns_{std::move(sortedColumns)},
         localVocab_{std::move(localVocab)},
         multiplicity_{std::nullopt} {
-    for (auto& table : idTables) {
-      tables_.push_back(std::make_shared<const IdTable>(std::move(table)));
-    }
     AD_CONTRACT_CHECK(
-        ql::ranges::all_of(tables(tables_), [this](const IdTable& table) {
-          return variables_.size() == table.numColumns();
+        ql::ranges::all_of(tables_, [&variables](const IdTable& table) {
+          return variables.size() == table.numColumns();
         }));
     size_t totalRows = 0;
-    for (const IdTable& idTable : tables(tables_)) {
+    for (const IdTable& idTable : tables_) {
       totalRows += idTable.numRows();
     }
     sizeEstimate_ = totalRows;
@@ -108,7 +101,7 @@ class ValuesForTesting : public Operation {
       AD_CORRECTNESS_CHECK(!supportsLimit_);
       std::vector<IdTable> clones;
       clones.reserve(tables_.size());
-      for (const IdTable& idTable : tables(tables_)) {
+      for (const IdTable& idTable : tables_) {
         clones.push_back(idTable.clone());
       }
       auto generator = [](auto idTables,
@@ -121,15 +114,15 @@ class ValuesForTesting : public Operation {
     }
     std::optional<IdTable> optionalTable;
     if (tables_.size() > 1) {
-      IdTable aggregateTable{tables(tables_)[0].numColumns(),
-                             tables(tables_)[0].getAllocator()};
-      for (const IdTable& idTable : tables(tables_)) {
+      IdTable aggregateTable{tables_.at(0).numColumns(),
+                             tables_.at(0).getAllocator()};
+      for (const IdTable& idTable : tables_) {
         aggregateTable.insertAtEnd(idTable);
       }
       optionalTable = std::move(aggregateTable);
     }
     auto table = optionalTable.has_value() ? std::move(optionalTable).value()
-                                           : tables(tables_)[0].clone();
+                                           : tables_.at(0).clone();
     if (supportsLimit_) {
       table.erase(table.begin() + getLimit().upperBound(table.size()),
                   table.end());
@@ -151,13 +144,13 @@ class ValuesForTesting : public Operation {
     std::stringstream str;
     auto numRowsView = tables_ | ql::views::transform(&IdTable::numRows);
     auto totalNumRows = std::reduce(numRowsView.begin(), numRowsView.end(), 0);
-    auto numCols = tables_.empty() ? 0 : tables_.at(0)->numColumns();
+    auto numCols = tables_.empty() ? 0 : tables_.at(0).numColumns();
     str << "Values for testing with " << numCols << " columns and "
         << totalNumRows << " rows. ";
     if (totalNumRows > 1000) {
       str << ad_utility::FastRandomIntGenerator<int64_t>{}();
     } else {
-      for (const IdTable& idTable : tables(tables_)) {
+      for (const IdTable& idTable : tables_) {
         for (size_t i = 0; i < idTable.numColumns(); ++i) {
           for (Id entry : idTable.getColumn(i)) {
             str << entry << ' ';
@@ -177,7 +170,7 @@ class ValuesForTesting : public Operation {
   size_t getResultWidth() const override {
     // Assume a width of 1 if we have no tables and no other information to base
     // it on because 0 would otherwise cause stuff to break.
-    return tables_.empty() ? 1 : tables_.at(0)->numColumns();
+    return tables_.empty() ? 1 : tables_.at(0).numColumns();
   }
 
   vector<ColumnIndex> resultSortedOn() const override {
@@ -202,7 +195,7 @@ class ValuesForTesting : public Operation {
 
   bool knownEmptyResult() override {
     return ql::ranges::all_of(
-        tables(tables_), [](const IdTable& table) { return table.empty(); });
+        tables_, [](const IdTable& table) { return table.empty(); });
   }
 
  private:
@@ -214,7 +207,7 @@ class ValuesForTesting : public Operation {
         continue;
       }
       bool containsUndef =
-          ql::ranges::any_of(tables(tables_), [&i](const IdTable& table) {
+          ql::ranges::any_of(tables_, [&i](const IdTable& table) {
             return ql::ranges::any_of(table.getColumn(i),
                                       [](Id id) { return id.isUndefined(); });
           });
diff --git a/src/global/Constants.h b/src/global/Constants.h
index dde13b65ee..39f930b951 100644
--- a/src/global/Constants.h
+++ b/src/global/Constants.h
@@ -74,8 +74,11 @@ constexpr inline std::string_view DEFAULT_GRAPH_IRI =
 constexpr inline std::string_view QLEVER_INTERNAL_GRAPH_IRI =
     makeQleverInternalIriConst<"internal-graph">();
 
+// The prefix of a SERVICE IRI that refers to a query that has been pinned with
+// an explicit name. The format currently is `ql:named-cached-query-$query-id$`.
+// NOTE: This constant does not include the leading '<'.
 constexpr inline std::string_view NAMED_CACHED_QUERY_PREFIX =
-    ad_utility::constexprStrCat<"<", QLEVER_INTERNAL_PREFIX_URL,
+    ad_utility::constexprStrCat<QLEVER_INTERNAL_PREFIX_URL,
                                 "named-cached-query-">();
 
 constexpr inline std::pair<std::string_view, std::string_view> GEOF_PREFIX = {
diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt
index 57f519f6cf..6fa123a793 100644
--- a/src/parser/CMakeLists.txt
+++ b/src/parser/CMakeLists.txt
@@ -31,8 +31,6 @@ add_library(parser
         Literal.cpp
         LiteralOrIri.cpp
         DatasetClauses.cpp
-        NamedCachedQuery.cpp
-        NamedCachedQuery.h
 )
 qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2::re2 util engine index)
 
diff --git a/src/parser/NamedCachedQuery.cpp b/src/parser/NamedCachedQuery.cpp
deleted file mode 100644
index a1db7e95c3..0000000000
--- a/src/parser/NamedCachedQuery.cpp
+++ /dev/null
@@ -1,5 +0,0 @@
-//
-// Created by kalmbacj on 1/30/25.
-//
-
-#include "NamedCachedQuery.h"
diff --git a/src/parser/NamedCachedQuery.h b/src/parser/NamedCachedQuery.h
index 40a9604d12..72e9adc64b 100644
--- a/src/parser/NamedCachedQuery.h
+++ b/src/parser/NamedCachedQuery.h
@@ -7,18 +7,24 @@
 #include "parser/MagicServiceQuery.h"
 
 namespace parsedQuery {
+// A magic service for queries that are pinned with an explicit query name.
 class NamedCachedQuery : public MagicServiceQuery {
   std::string identifier_;
 
  public:
+  // Construct with the name of the named query.
   NamedCachedQuery(std::string identifier)
       : identifier_{std::move(identifier)} {}
 
+  // Currently the body of the SERVICE clause must be empty.
   void addParameter([[maybe_unused]] const SparqlTriple& triple) override {
     throw std::runtime_error{
         "The body of a named cache query request must be empty"};
   }
 
+  // Return the name of the named query, and check, that the configuration is
+  // valid (which currently means, that the body of the SERVICE clause was
+  // empty.
   const std::string& validateAndGetIdentifier() const {
     // TODO<joka921> Better error messages.
     AD_CORRECTNESS_CHECK(!childGraphPattern_.has_value());
diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
index 34cd48a25a..fd09eeb34a 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp
@@ -894,6 +894,7 @@ GraphPatternOperation Visitor::visitPathQuery(
 
 // _____________________________________________________________________________
 GraphPatternOperation Visitor::visitNamedCachedQuery(
+    const TripleComponent::Iri& target,
     Parser::ServiceGraphPatternContext* ctx) {
   auto parseContent = [ctx](parsedQuery::NamedCachedQuery& namedQuery,
                             const parsedQuery::GraphPatternOperation& op) {
@@ -908,13 +909,10 @@ GraphPatternOperation Visitor::visitNamedCachedQuery(
     }
   };
 
-  auto iri = std::get<Iri>(visit(ctx->varOrIri()));
-  auto s = iri.toSparql();
-  AD_CORRECTNESS_CHECK(s.starts_with(NAMED_CACHED_QUERY_PREFIX));
-  auto view = std::string_view{s};
-  // Remove the prefix and the trailing ">"
+  auto view = asStringViewUnsafe(target.getContent());
+  AD_CORRECTNESS_CHECK(view.starts_with(NAMED_CACHED_QUERY_PREFIX));
+  // Remove the prefix
   view.remove_prefix(NAMED_CACHED_QUERY_PREFIX.size());
-  view.remove_suffix(1);
 
   parsedQuery::GraphPattern graphPattern = visit(ctx->groupGraphPattern());
   parsedQuery::NamedCachedQuery namedQuery{std::string{view}};
@@ -986,9 +984,9 @@ GraphPatternOperation Visitor::visit(Parser::ServiceGraphPatternContext* ctx) {
     return visitPathQuery(ctx);
   } else if (serviceIri.toStringRepresentation() == SPATIAL_SEARCH_IRI) {
     return visitSpatialQuery(ctx);
-  } else if (serviceIri.toStringRepresentation().starts_with(
-                 NAMED_CACHED_QUERY_PREFIX)) {
-    return visitNamedCachedQuery(ctx);
+  } else if (asStringViewUnsafe(serviceIri.getContent())
+                 .starts_with(NAMED_CACHED_QUERY_PREFIX)) {
+    return visitNamedCachedQuery(serviceIri, ctx);
   }
   // Parse the body of the SERVICE query. Add the visible variables from the
   // SERVICE clause to the visible variables so far, but also remember them
diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h
index 697d353f65..6282994784 100644
--- a/src/parser/sparqlParser/SparqlQleverVisitor.h
+++ b/src/parser/sparqlParser/SparqlQleverVisitor.h
@@ -265,6 +265,7 @@ class SparqlQleverVisitor {
       Parser::ServiceGraphPatternContext* ctx);
 
   GraphPatternOperation visitNamedCachedQuery(
+      const TripleComponent::Iri& target,
       Parser::ServiceGraphPatternContext* ctx);
 
   parsedQuery::GraphPatternOperation visit(Parser::BindContext* ctx);
@@ -346,10 +347,10 @@ class SparqlQleverVisitor {
 
   PropertyPath visit(Parser::PathEltOrInverseContext* ctx);
 
-  // NOTE: The `visit` overloads marked `[[noreturn]]` always throw an
-  // exception because the corresponding feature is not (yet) supported by
-  // QLever. Most of them have a return type of `void`. Some of the don't, in
-  // order to make the usage of abstractions like `visitAlternative` easier.
+  // NOTE: The `visit` overloads marked `[[noreturn]]` always throw an exception
+  // because the corresponding feature is not (yet) supported by QLever. Most
+  // of them have a return type of `void`. Some of the don't, in order to make
+  // the usage of abstractions like `visitAlternative` easier.
 
   [[noreturn]] static void visit(Parser::PathModContext* ctx);
 
@@ -488,8 +489,8 @@ class SparqlQleverVisitor {
   static std::string currentTimeAsXsdString();
 
   // Member starTime_ is needed for the NOW expression. All calls within
-  // the query execution reference it. The underlying date time format is
-  // e.g.: 2011-01-10T14:45:13.815-05:00
+  // the query execution reference it. The underlying date time format is e.g.:
+  // 2011-01-10T14:45:13.815-05:00
   std::string startTime_ = currentTimeAsXsdString();
 
   template <typename Visitor, typename Ctx>
@@ -506,16 +507,15 @@ class SparqlQleverVisitor {
 
   // Get the part of the original input string that pertains to the given
   // context. This is necessary because ANTLR's `getText()` only provides that
-  // part with *all* whitespace removed. Preserving the whitespace is
-  // important for readability (for example, in an error message), and even
-  // more so when using such parts for further processing (like the body of a
-  // SERVICE query, which is not valid SPARQL anymore when you remove all
-  // whitespace).
+  // part with *all* whitespace removed. Preserving the whitespace is important
+  // for readability (for example, in an error message), and even more so when
+  // using such parts for further processing (like the body of a SERVICE query,
+  // which is not valid SPARQL anymore when you remove all whitespace).
   static std::string getOriginalInputForContext(
       const antlr4::ParserRuleContext* context);
 
-  // Process an IRI function call. This is used in both `visitFunctionCall`
-  // and `visitIriOrFunction`.
+  // Process an IRI function call. This is used in both `visitFunctionCall` and
+  // `visitIriOrFunction`.
   static ExpressionPtr processIriFunctionCall(
       const TripleComponent::Iri& iri, std::vector<ExpressionPtr> argList,
       const antlr4::ParserRuleContext*);
@@ -559,14 +559,13 @@ class SparqlQleverVisitor {
   template <typename Ctx>
   auto visitOptional(Ctx* ctx) -> std::optional<decltype(visit(ctx))>;
 
-  /// If `ctx` is not `nullptr`, visit it, convert the result to
-  /// `Intermediate` and assign it to `*target`. The case where
-  /// `Intermediate!=Target` is useful, when the result of `visit(ctx)` cannot
-  /// be converted to `Target`, but the conversion chain `VisitResult ->
-  /// Intermediate -> Target` is valid. For example when `visit(ctx)` yields
-  /// `A`, `A` is explicitly convertible to `B` and `Target` is `optional<B>`,
-  /// then `B` has to be specified as `Intermediate` (see for example the
-  /// implementation of `visitAlternative`).
+  /// If `ctx` is not `nullptr`, visit it, convert the result to `Intermediate`
+  /// and assign it to `*target`. The case where `Intermediate!=Target` is
+  /// useful, when the result of `visit(ctx)` cannot be converted to `Target`,
+  /// but the conversion chain `VisitResult -> Intermediate -> Target` is valid.
+  /// For example when `visit(ctx)` yields `A`, `A` is explicitly convertible to
+  /// `B` and `Target` is `optional<B>`, then `B` has to be specified as
+  /// `Intermediate` (see for example the implementation of `visitAlternative`).
   template <typename Target, typename Intermediate = Target, typename Ctx>
   void visitIf(Target* target, Ctx* ctx);
 
@@ -586,8 +585,8 @@ class SparqlQleverVisitor {
   template <typename Context>
   Triples parseTriplesConstruction(Context* ctx);
 
-  // If the triple is a special triple for the text index (i.e. its predicate
-  // is either `ql:contains-word` or `ql:contains-entity`, register the magic
+  // If the triple is a special triple for the text index (i.e. its predicate is
+  // either `ql:contains-word` or `ql:contains-entity`, register the magic
   // variables for the matching word and the score that will be created when
   // processing those triples in the query body, s.t. they can be selected as
   // part of the query result.
@@ -598,8 +597,8 @@ class SparqlQleverVisitor {
   static TripleComponent visitGraphTerm(const GraphTerm& graphTerm);
 
   // If any of the variables used in `expression` did not appear previously in
-  // the query, add a warning or throw an exception (depending on the setting
-  // of the corresponding `RuntimeParameter`).
+  // the query, add a warning or throw an exception (depending on the setting of
+  // the corresponding `RuntimeParameter`).
   void warnOrThrowIfUnboundVariables(auto* ctx,
                                      const SparqlExpressionPimpl& expression,
                                      std::string_view clauseName);

From 65caf947e38d31d3e64442b724328d3c6f15d25d Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 30 Jan 2025 18:16:21 +0100
Subject: [PATCH 07/25] Fix the compilation of the tests again.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/ValuesForTesting.h | 23 ++++++++++++-----------
 test/OperationTest.cpp        |  5 +++--
 test/ValuesForTestingTest.cpp |  7 ++++---
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/engine/ValuesForTesting.h b/src/engine/ValuesForTesting.h
index f7f78945b1..389eefd493 100644
--- a/src/engine/ValuesForTesting.h
+++ b/src/engine/ValuesForTesting.h
@@ -14,6 +14,9 @@
 // unit testing purposes when we need to specify the subtrees of another
 // operation.
 class ValuesForTesting : public Operation {
+ public:
+  using VarVector = std::vector<std::optional<Variable>>;
+
  private:
   std::vector<IdTable> tables_;
   VariableToColumnMap variables_;
@@ -27,13 +30,13 @@ class ValuesForTesting : public Operation {
   // Create an operation that has as its result the given `table` and the given
   // `variables`. The number of variables must be equal to the number
   // of columns in the table.
-  explicit ValuesForTesting(
-      QueryExecutionContext* ctx, IdTable table,
-      const std::vector<std::optional<Variable>>& variables,
-      bool supportsLimit = false, std::vector<ColumnIndex> sortedColumns = {},
-      LocalVocab localVocab = LocalVocab{},
-      std::optional<float> multiplicity = std::nullopt,
-      bool forceFullyMaterialized = false)
+  explicit ValuesForTesting(QueryExecutionContext* ctx, IdTable table,
+                            const VarVector& variables,
+                            bool supportsLimit = false,
+                            std::vector<ColumnIndex> sortedColumns = {},
+                            LocalVocab localVocab = LocalVocab{},
+                            std::optional<float> multiplicity = std::nullopt,
+                            bool forceFullyMaterialized = false)
       : Operation{ctx},
         supportsLimit_{supportsLimit},
         sizeEstimate_{table.numRows()},
@@ -63,8 +66,7 @@ class ValuesForTesting : public Operation {
     tables_.push_back(std::move(table));
   }
   explicit ValuesForTesting(QueryExecutionContext* ctx,
-                            std::vector<IdTable> tables,
-                            std::vector<std::optional<Variable>> variables,
+                            std::vector<IdTable> tables, VarVector variables,
                             bool unlikelyToFitInCache = false,
                             std::vector<ColumnIndex> sortedColumns = {},
                             LocalVocab localVocab = LocalVocab{})
@@ -199,8 +201,7 @@ class ValuesForTesting : public Operation {
   }
 
  private:
-  VariableToColumnMap computeVarMapFromVector(
-      const std::vector<std::optional<Variable>>& vars) const {
+  VariableToColumnMap computeVarMapFromVector(const VarVector& vars) const {
     VariableToColumnMap m;
     for (auto i = ColumnIndex{0}; i < vars.size(); ++i) {
       if (!vars.at(i).has_value()) {
diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp
index 5f2c2c377c..d0daeaaed2 100644
--- a/test/OperationTest.cpp
+++ b/test/OperationTest.cpp
@@ -133,7 +133,7 @@ class OperationTestFixture : public testing::Test {
       &namedCache,
       [&](std::string json) { jsonHistory.emplace_back(std::move(json)); }};
   IdTable table = makeIdTableFromVector({{}, {}, {}});
-  ValuesForTesting operation{&qec, std::move(table), {}};
+  ValuesForTesting operation{&qec, std::move(table), VariableToColumnMap{}};
 };
 
 // _____________________________________________________________________________
@@ -288,7 +288,8 @@ TEST(Operation, updateRuntimeStatsWorksCorrectly) {
   auto qec = getQec();
   auto idTable = makeIdTableFromVector({{3, 4}, {7, 8}, {9, 123}});
   ValuesForTesting valuesForTesting{
-      qec, std::move(idTable), {Variable{"?x"}, Variable{"?y"}}};
+      qec, std::move(idTable),
+      ValuesForTesting::VarVector{Variable{"?x"}, Variable{"?y"}}};
 
   auto& rti = valuesForTesting.runtimeInfo();
 
diff --git a/test/ValuesForTestingTest.cpp b/test/ValuesForTestingTest.cpp
index 8c4b86d019..c8108f4d98 100644
--- a/test/ValuesForTestingTest.cpp
+++ b/test/ValuesForTestingTest.cpp
@@ -16,7 +16,8 @@ TEST(ValuesForTesting, valuesForTesting) {
       (ValuesForTesting{getQec(), table.clone(), {Variable{"?x"}}}));
 
   ValuesForTesting v{
-      getQec(), table.clone(), {Variable{"?x"}, {Variable{"?y"}}}};
+      getQec(), table.clone(),
+      ValuesForTesting::VarVector{Variable{"?x"}, {Variable{"?y"}}}};
   // The following line has no effect. TODO<joka921> provide default
   // implementations for such boilerplate methods in the `Operation` base class.
   ASSERT_EQ(v.getResultWidth(), 2u);
@@ -42,7 +43,7 @@ TEST(ValuesForTesting, cornerCasesCacheKey) {
   auto empty = makeIdTableFromVector({});
   auto neutral = makeIdTableFromVector({{}});
 
-  ValuesForTesting vEmpty{getQec(), empty.clone(), {}};
-  ValuesForTesting vNeutral{getQec(), neutral.clone(), {}};
+  ValuesForTesting vEmpty{getQec(), empty.clone(), VariableToColumnMap{}};
+  ValuesForTesting vNeutral{getQec(), neutral.clone(), VariableToColumnMap{}};
   EXPECT_NE(vEmpty.getCacheKey(), vNeutral.getCacheKey());
 }

From e9e8dfd18978a96bf5d61926e828200b18d4a2ff Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 31 Jan 2025 09:03:12 +0100
Subject: [PATCH 08/25] Make the In-Memory-Vocabulary compatible with the
 RDFVocabulary

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/index/IndexImpl.Text.cpp              | 2 +-
 src/index/IndexImpl.cpp                   | 7 +++++--
 src/index/IndexImpl.h                     | 2 +-
 src/index/StringSortComparator.h          | 7 +++++++
 src/index/Vocabulary.cpp                  | 2 +-
 src/index/Vocabulary.h                    | 7 ++++++-
 src/index/vocabulary/VocabularyInMemory.h | 5 ++++-
 7 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp
index 3b872eb39c..2f15be7e5c 100644
--- a/src/index/IndexImpl.Text.cpp
+++ b/src/index/IndexImpl.Text.cpp
@@ -48,7 +48,7 @@ cppcoro::generator<WordsFileLine> IndexImpl::wordsInTextRecords(
       if (!isLiteral(text)) {
         continue;
       }
-      WordsFileLine entityLine{text, true, contextId, 1, true};
+      WordsFileLine entityLine{std::string{text}, true, contextId, 1, true};
       co_yield entityLine;
       std::string_view textView = text;
       textView = textView.substr(0, textView.rfind('"'));
diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp
index 72efec5307..6205d08f6b 100644
--- a/src/index/IndexImpl.cpp
+++ b/src/index/IndexImpl.cpp
@@ -974,7 +974,7 @@ size_t IndexImpl::getNumDistinctSubjectPredicatePairs() const {
 }
 
 // _____________________________________________________________________________
-bool IndexImpl::isLiteral(const string& object) const {
+bool IndexImpl::isLiteral(std::string_view object) const {
   return decltype(vocab_)::stringIsLiteral(object);
 }
 
@@ -1522,7 +1522,10 @@ size_t IndexImpl::getCardinality(
 }
 
 // ___________________________________________________________________________
-std::string IndexImpl::indexToString(VocabIndex id) const { return vocab_[id]; }
+// TODO<joka921> Make this the return type of the vocabulary.
+std::string IndexImpl::indexToString(VocabIndex id) const {
+  return std::string{vocab_[id]};
+}
 
 // ___________________________________________________________________________
 std::string_view IndexImpl::indexToString(WordVocabIndex id) const {
diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h
index d284cdb415..a698a96c6f 100644
--- a/src/index/IndexImpl.h
+++ b/src/index/IndexImpl.h
@@ -634,7 +634,7 @@ class IndexImpl {
   friend class IndexTest_createFromOnDiskIndexTest_Test;
   friend class CreatePatternsFixture_createPatterns_Test;
 
-  bool isLiteral(const string& object) const;
+  bool isLiteral(std::string_view object) const;
 
  public:
   LangtagAndTriple tripleToInternalRepresentation(TurtleTriple&& triple) const;
diff --git a/src/index/StringSortComparator.h b/src/index/StringSortComparator.h
index 81829f226e..33f1f2077c 100644
--- a/src/index/StringSortComparator.h
+++ b/src/index/StringSortComparator.h
@@ -619,6 +619,13 @@ class TripleComponentComparator {
     return compare(spA, spB, level) < 0;
   }
 
+  // TODO<joka921> Unify these three functions.
+  bool operator()(const SplitVal& spA, std::string_view b,
+                  const Level level) const {
+    auto spB = extractAndTransformComparable(b, level, false);
+    return compare(spA, spB, level) < 0;
+  }
+
   template <typename A, typename B, typename C>
   bool operator()(const SplitValBase<A, B, C>& a,
                   const SplitValBase<A, B, C>& b, const Level level) const {
diff --git a/src/index/Vocabulary.cpp b/src/index/Vocabulary.cpp
index ab2cb52505..cd3b25b490 100644
--- a/src/index/Vocabulary.cpp
+++ b/src/index/Vocabulary.cpp
@@ -43,7 +43,7 @@ void Vocabulary<S, C, I>::readFromFile(const string& fileName) {
             << std::endl;
   vocabulary_.close();
   vocabulary_.open(fileName);
-  if constexpr (isCompressed_) {
+  if constexpr (isCompressed_ && false) {
     const auto& internalExternalVocab =
         vocabulary_.getUnderlyingVocabulary().getUnderlyingVocabulary();
     LOG(INFO) << "Done, number of words: "
diff --git a/src/index/Vocabulary.h b/src/index/Vocabulary.h
index 6775a13217..fc9c118b87 100644
--- a/src/index/Vocabulary.h
+++ b/src/index/Vocabulary.h
@@ -37,9 +37,11 @@ using std::string;
 using std::vector;
 
 template <class StringType>
-using AccessReturnType_t =
+using AccessReturnType_t = std::string_view;
+/*
     std::conditional_t<std::is_same_v<StringType, CompressedString>,
                        std::string, std::string_view>;
+                       */
 
 template <typename IndexT = WordVocabIndex>
 class IdRange {
@@ -114,10 +116,13 @@ class Vocabulary {
   vector<std::string> internalizedLangs_;
   vector<std::string> externalizedPrefixes_{""};
 
+  using UnderlyingVocabulary = VocabularyInMemory;
+  /*
   using UnderlyingVocabulary =
       std::conditional_t<isCompressed_,
                          CompressedVocabulary<VocabularyInternalExternal>,
                          VocabularyInMemory>;
+      */
   using VocabularyWithUnicodeComparator =
       UnicodeVocabulary<UnderlyingVocabulary, ComparatorType>;
 
diff --git a/src/index/vocabulary/VocabularyInMemory.h b/src/index/vocabulary/VocabularyInMemory.h
index 5ce18fe721..efe9a9c7e7 100644
--- a/src/index/vocabulary/VocabularyInMemory.h
+++ b/src/index/vocabulary/VocabularyInMemory.h
@@ -68,11 +68,14 @@ class VocabularyInMemory
   struct WordWriter {
     typename Words::Writer writer_;
     explicit WordWriter(const std::string& filename) : writer_{filename} {}
-    void operator()(std::string_view str) {
+    void operator()(std::string_view str,
+                    [[maybe_unused]] bool isExternalDummy = false) {
       writer_.push(str.data(), str.size());
     }
 
     void finish() { writer_.finish(); }
+    std::string readableNameDummy_;
+    std::string& readableName() { return readableNameDummy_; }
   };
 
   // Return a `WordWriter` that directly writes the words to the given

From 79a11b662ad4a0e93db03b5d32512a689afef90d Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 31 Jan 2025 11:22:20 +0100
Subject: [PATCH 09/25] Refactor things.

TODO:
Make the vocabulary implementation be choosable from CMake

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/ExportQueryExecutionTrees.cpp      | 11 +++-
 src/index/Index.cpp                           |  5 +-
 src/index/Index.h                             | 10 ++--
 src/index/IndexImpl.cpp                       |  8 +--
 src/index/IndexImpl.h                         |  4 +-
 src/index/StringSortComparator.h              |  1 -
 src/index/Vocabulary.cpp                      | 42 ++++----------
 src/index/Vocabulary.h                        | 57 ++++++++-----------
 src/index/vocabulary/VocabularyInMemory.cpp   |  3 +
 src/index/vocabulary/VocabularyInMemory.h     |  6 ++
 .../vocabulary/VocabularyInternalExternal.cpp | 12 ++++
 .../vocabulary/VocabularyInternalExternal.h   |  5 +-
 test/engine/TextIndexScanTestHelpers.h        |  8 +--
 13 files changed, 85 insertions(+), 87 deletions(-)

diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp
index 3375e82924..351dc9b28d 100644
--- a/src/engine/ExportQueryExecutionTrees.cpp
+++ b/src/engine/ExportQueryExecutionTrees.cpp
@@ -356,8 +356,15 @@ ExportQueryExecutionTrees::getLiteralOrIriFromVocabIndex(
     case Datatype::LocalVocabIndex:
       return localVocab.getWord(id.getLocalVocabIndex()).asLiteralOrIri();
     case Datatype::VocabIndex: {
-      auto entity = index.indexToString(id.getVocabIndex());
-      return LiteralOrIri::fromStringRepresentation(entity);
+      auto getEntity = [&index, id]() {
+        return index.indexToString(id.getVocabIndex());
+      };
+      // The type of entity might be `string_view` (If the vocabulary is stored
+      // uncompressed in RAM) or `string` (if it is on-disk, or compressed or
+      // both). The following code works and is efficient in all cases. In
+      // particular, the `std::string` constructor is compiled out because of
+      // RVO if `getEntity()` already returns a `string`.
+      return LiteralOrIri::fromStringRepresentation(std::string(getEntity()));
     }
     default:
       AD_FAIL();
diff --git a/src/index/Index.cpp b/src/index/Index.cpp
index f66914bfca..06350e1e26 100644
--- a/src/index/Index.cpp
+++ b/src/index/Index.cpp
@@ -71,12 +71,13 @@ size_t Index::getCardinality(
 }
 
 // ____________________________________________________________________________
-std::string Index::indexToString(VocabIndex id) const {
+auto Index::indexToString(VocabIndex id) const -> Vocab::AccessReturnType {
   return pimpl_->indexToString(id);
 }
 
 // ____________________________________________________________________________
-std::string_view Index::indexToString(WordVocabIndex id) const {
+auto Index::indexToString(WordVocabIndex id) const
+    -> TextVocabulary::AccessReturnType {
   return pimpl_->indexToString(id);
 }
 
diff --git a/src/index/Index.h b/src/index/Index.h
index 8c6dd1cd40..101908ab7e 100644
--- a/src/index/Index.h
+++ b/src/index/Index.h
@@ -104,13 +104,11 @@ class Index {
   // Read necessary metadata into memory and open file handles.
   void addTextFromOnDiskIndex();
 
-  using Vocab =
-      Vocabulary<CompressedString, TripleComponentComparator, VocabIndex>;
+  using Vocab = RdfsVocabulary;
   [[nodiscard]] const Vocab& getVocab() const;
   Vocab& getNonConstVocabForTesting();
 
-  using TextVocab =
-      Vocabulary<std::string, SimpleStringComparator, WordVocabIndex>;
+  using TextVocab = TextVocabulary;
   [[nodiscard]] const TextVocab& getTextVocab() const;
 
   // Get a (non-owning) pointer to the BlankNodeManager of this Index.
@@ -132,8 +130,8 @@ class Index {
 
   // TODO<joka921> Once we have an overview over the folding this logic should
   // probably not be in the index class.
-  std::string indexToString(VocabIndex id) const;
-  std::string_view indexToString(WordVocabIndex id) const;
+  Vocab::AccessReturnType indexToString(VocabIndex id) const;
+  TextVocab::AccessReturnType indexToString(WordVocabIndex id) const;
 
   [[nodiscard]] Vocab::PrefixRanges prefixRanges(std::string_view prefix) const;
 
diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp
index 6205d08f6b..d5781bb297 100644
--- a/src/index/IndexImpl.cpp
+++ b/src/index/IndexImpl.cpp
@@ -1522,13 +1522,13 @@ size_t IndexImpl::getCardinality(
 }
 
 // ___________________________________________________________________________
-// TODO<joka921> Make this the return type of the vocabulary.
-std::string IndexImpl::indexToString(VocabIndex id) const {
-  return std::string{vocab_[id]};
+RdfsVocabulary::AccessReturnType IndexImpl::indexToString(VocabIndex id) const {
+  return vocab_[id];
 }
 
 // ___________________________________________________________________________
-std::string_view IndexImpl::indexToString(WordVocabIndex id) const {
+TextVocabulary::AccessReturnType IndexImpl::indexToString(
+    WordVocabIndex id) const {
   return textVocab_[id];
 }
 
diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h
index a698a96c6f..8478943c92 100644
--- a/src/index/IndexImpl.h
+++ b/src/index/IndexImpl.h
@@ -305,10 +305,10 @@ class IndexImpl {
       const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;
 
   // ___________________________________________________________________________
-  std::string indexToString(VocabIndex id) const;
+  RdfsVocabulary::AccessReturnType indexToString(VocabIndex id) const;
 
   // ___________________________________________________________________________
-  std::string_view indexToString(WordVocabIndex id) const;
+  TextVocabulary::AccessReturnType indexToString(WordVocabIndex id) const;
 
  public:
   // ___________________________________________________________________________
diff --git a/src/index/StringSortComparator.h b/src/index/StringSortComparator.h
index 33f1f2077c..d77e616a76 100644
--- a/src/index/StringSortComparator.h
+++ b/src/index/StringSortComparator.h
@@ -619,7 +619,6 @@ class TripleComponentComparator {
     return compare(spA, spB, level) < 0;
   }
 
-  // TODO<joka921> Unify these three functions.
   bool operator()(const SplitVal& spA, std::string_view b,
                   const Level level) const {
     auto spB = extractAndTransformComparable(b, level, false);
diff --git a/src/index/Vocabulary.cpp b/src/index/Vocabulary.cpp
index cd3b25b490..70e9f0c50e 100644
--- a/src/index/Vocabulary.cpp
+++ b/src/index/Vocabulary.cpp
@@ -39,20 +39,8 @@ bool Vocabulary<StringType, ComparatorType, IndexT>::PrefixRanges::contain(
 // _____________________________________________________________________________
 template <class S, class C, typename I>
 void Vocabulary<S, C, I>::readFromFile(const string& fileName) {
-  LOG(INFO) << "Reading vocabulary from file " << fileName << " ..."
-            << std::endl;
   vocabulary_.close();
   vocabulary_.open(fileName);
-  if constexpr (isCompressed_ && false) {
-    const auto& internalExternalVocab =
-        vocabulary_.getUnderlyingVocabulary().getUnderlyingVocabulary();
-    LOG(INFO) << "Done, number of words: "
-              << internalExternalVocab.internalVocab().size() << std::endl;
-    LOG(INFO) << "Number of words in external vocabulary: "
-              << internalExternalVocab.externalVocab().size() << std::endl;
-  } else {
-    LOG(INFO) << "Done, number of words: " << vocabulary_.size() << std::endl;
-  }
 
   // Precomputing ranges for IRIs, blank nodes, and literals, for faster
   // processing of the `isIrI` and `isLiteral` functions.
@@ -88,19 +76,12 @@ bool Vocabulary<S, C, I>::stringIsLiteral(std::string_view s) {
 // _____________________________________________________________________________
 template <class S, class C, class I>
 bool Vocabulary<S, C, I>::shouldBeExternalized(string_view s) const {
-  // TODO<joka921> Completely refactor the Vocabulary on the different
-  // Types, it is a mess.
-
-  // If the string is not compressed, this means that this is a text vocabulary
-  // and thus doesn't support externalization.
-  if constexpr (std::is_same_v<S, CompressedString>) {
-    if (!stringIsLiteral(s)) {
-      return shouldEntityBeExternalized(s);
-    } else {
-      return shouldLiteralBeExternalized(s);
-    }
+  // TODO<joka921> We should have a completely separate layer that handles the
+  // externalization, not the Vocab.
+  if (!stringIsLiteral(s)) {
+    return shouldEntityBeExternalized(s);
   } else {
-    return false;
+    return shouldLiteralBeExternalized(s);
   }
 }
 
@@ -264,17 +245,18 @@ auto Vocabulary<S, C, I>::prefixRanges(std::string_view prefix) const
 }
 
 // _____________________________________________________________________________
-template <typename S, typename C, typename I>
-auto Vocabulary<S, C, I>::operator[](IndexType idx) const
-    -> AccessReturnType_t<S> {
+template <typename UnderlyingVocabulary, typename C, typename I>
+auto Vocabulary<UnderlyingVocabulary, C, I>::operator[](IndexType idx) const
+    -> AccessReturnType {
   AD_CONTRACT_CHECK(idx.get() < size());
   return vocabulary_[idx.get()];
 }
 
 // Explicit template instantiations
-template class Vocabulary<CompressedString, TripleComponentComparator,
-                          VocabIndex>;
-template class Vocabulary<std::string, SimpleStringComparator, WordVocabIndex>;
+template class Vocabulary<detail::UnderlyingVocabRdfsVocabulary,
+                          TripleComponentComparator, VocabIndex>;
+template class Vocabulary<detail::UnderlyingVocabTextVocabulary,
+                          SimpleStringComparator, WordVocabIndex>;
 
 template void RdfsVocabulary::initializeInternalizedLangs<nlohmann::json>(
     const nlohmann::json&);
diff --git a/src/index/Vocabulary.h b/src/index/Vocabulary.h
index fc9c118b87..c7a8454a4a 100644
--- a/src/index/Vocabulary.h
+++ b/src/index/Vocabulary.h
@@ -36,13 +36,6 @@
 using std::string;
 using std::vector;
 
-template <class StringType>
-using AccessReturnType_t = std::string_view;
-/*
-    std::conditional_t<std::is_same_v<StringType, CompressedString>,
-                       std::string, std::string_view>;
-                       */
-
 template <typename IndexT = WordVocabIndex>
 class IdRange {
  public:
@@ -69,9 +62,15 @@ inline std::ostream& operator<<(std::ostream& stream,
 // retrieval. Template parameters that are supported are:
 // std::string -> no compression is applied
 // CompressedString -> prefix compression is applied
-template <typename StringType, typename ComparatorType, typename IndexT>
+template <typename UnderlyingVocabulary, typename ComparatorType,
+          typename IndexT>
 class Vocabulary {
  public:
+  // The type that is returned by the `operator[]` of this vocabulary. Typically
+  // either `std::string` or `std::string_view`.
+  using AccessReturnType =
+      decltype(std::declval<const UnderlyingVocabulary&>()[0]);
+
   // The index ranges for a prefix + a function to check whether a given index
   // is contained in one of them.
   //
@@ -96,17 +95,6 @@ class Vocabulary {
   // The different type of data that is stored in the vocabulary
   enum class Datatypes { Literal, Iri, Float, Date };
 
-  template <typename T, typename R = void>
-  using enable_if_compressed =
-      std::enable_if_t<std::is_same_v<T, CompressedString>>;
-
-  template <typename T, typename R = void>
-  using enable_if_uncompressed =
-      std::enable_if_t<!std::is_same_v<T, CompressedString>>;
-
-  static constexpr bool isCompressed_ =
-      std::is_same_v<StringType, CompressedString>;
-
   // If a literal uses one of these language tags or starts with one of these
   // prefixes, it will be externalized. By default, everything is externalized.
   // Both of these settings can be overridden using the `settings.json` file.
@@ -116,13 +104,19 @@ class Vocabulary {
   vector<std::string> internalizedLangs_;
   vector<std::string> externalizedPrefixes_{""};
 
-  using UnderlyingVocabulary = VocabularyInMemory;
+  //  using UnderlyingVocabulary = VocabularyInMemory;
   /*
   using UnderlyingVocabulary =
       std::conditional_t<isCompressed_,
                          CompressedVocabulary<VocabularyInternalExternal>,
                          VocabularyInMemory>;
       */
+  /*
+  using UnderlyingVocabulary =
+      std::conditional_t<isCompressed_,
+                         CompressedVocabulary<VocabularyInMemory>,
+                         VocabularyInMemory>;
+                         */
   using VocabularyWithUnicodeComparator =
       UnicodeVocabulary<UnderlyingVocabulary, ComparatorType>;
 
@@ -137,10 +131,7 @@ class Vocabulary {
   using SortLevel = typename ComparatorType::Level;
   using IndexType = IndexT;
 
-  template <
-      typename = std::enable_if_t<std::is_same_v<StringType, string> ||
-                                  std::is_same_v<StringType, CompressedString>>>
-  Vocabulary() {}
+  Vocabulary() = default;
   Vocabulary& operator=(Vocabulary&&) noexcept = default;
   Vocabulary(Vocabulary&&) noexcept = default;
 
@@ -151,10 +142,7 @@ class Vocabulary {
 
   // Get the word with the given `idx`. Throw if the `idx` is not contained
   // in the vocabulary.
-  AccessReturnType_t<StringType> operator[](IndexType idx) const;
-
-  // AccessReturnType_t<StringType> at(IndexType idx) const { return
-  // operator[](id); }
+  AccessReturnType operator[](IndexType idx) const;
 
   //! Get the number of words in the vocabulary.
   [[nodiscard]] size_t size() const { return vocabulary_.size(); }
@@ -247,7 +235,12 @@ class Vocabulary {
   }
 };
 
-using RdfsVocabulary =
-    Vocabulary<CompressedString, TripleComponentComparator, VocabIndex>;
-using TextVocabulary =
-    Vocabulary<std::string, SimpleStringComparator, WordVocabIndex>;
+namespace detail {
+using UnderlyingVocabRdfsVocabulary = VocabularyInMemory;
+using UnderlyingVocabTextVocabulary = VocabularyInMemory;
+}  // namespace detail
+
+using RdfsVocabulary = Vocabulary<detail::UnderlyingVocabRdfsVocabulary,
+                                  TripleComponentComparator, VocabIndex>;
+using TextVocabulary = Vocabulary<detail::UnderlyingVocabTextVocabulary,
+                                  SimpleStringComparator, WordVocabIndex>;
diff --git a/src/index/vocabulary/VocabularyInMemory.cpp b/src/index/vocabulary/VocabularyInMemory.cpp
index f3db258d59..a1c82231d3 100644
--- a/src/index/vocabulary/VocabularyInMemory.cpp
+++ b/src/index/vocabulary/VocabularyInMemory.cpp
@@ -8,9 +8,12 @@ using std::string;
 
 // _____________________________________________________________________________
 void VocabularyInMemory::open(const string& fileName) {
+  LOG(INFO) << "Reading vocabulary from file " << fileName << " ..."
+            << std::endl;
   _words.clear();
   ad_utility::serialization::FileReadSerializer file(fileName);
   file >> _words;
+  LOG(INFO) << "Done, number of words: " << size() << std::endl;
 }
 
 // _____________________________________________________________________________
diff --git a/src/index/vocabulary/VocabularyInMemory.h b/src/index/vocabulary/VocabularyInMemory.h
index efe9a9c7e7..ed498d1702 100644
--- a/src/index/vocabulary/VocabularyInMemory.h
+++ b/src/index/vocabulary/VocabularyInMemory.h
@@ -68,12 +68,18 @@ class VocabularyInMemory
   struct WordWriter {
     typename Words::Writer writer_;
     explicit WordWriter(const std::string& filename) : writer_{filename} {}
+
+    // Write a word. The `isExternalDummy` is only there to have a consistent
+    // interface with the `VocabularyInternalExternal`.
     void operator()(std::string_view str,
                     [[maybe_unused]] bool isExternalDummy = false) {
       writer_.push(str.data(), str.size());
     }
 
     void finish() { writer_.finish(); }
+
+    // The `readableName()` function is only there to have a consistent
+    // interface with the `VocabularyInternalExternal`.
     std::string readableNameDummy_;
     std::string& readableName() { return readableNameDummy_; }
   };
diff --git a/src/index/vocabulary/VocabularyInternalExternal.cpp b/src/index/vocabulary/VocabularyInternalExternal.cpp
index 62c5e29455..3d3d5fffb5 100644
--- a/src/index/vocabulary/VocabularyInternalExternal.cpp
+++ b/src/index/vocabulary/VocabularyInternalExternal.cpp
@@ -37,3 +37,15 @@ void VocabularyInternalExternal::WordWriter::finish() {
   internalWriter_.finish();
   externalWriter_.finish();
 }
+
+// _____________________________________________________________________________
+void VocabularyInternalExternal::open(const string& filename) {
+  LOG(INFO) << "Reading vocabulary from file " << filename << " ..."
+            << std::endl;
+  internalVocab_.open(filename + ".internal");
+  externalVocab_.open(filename + ".external");
+  LOG(INFO) << "Done, number of words: " << size() << std::endl;
+  LOG(INFO) << "Number of words in internal vocabulary (these are also part "
+               "of the external vocabulary): "
+            << internalVocab_.size() << std::endl;
+}
diff --git a/src/index/vocabulary/VocabularyInternalExternal.h b/src/index/vocabulary/VocabularyInternalExternal.h
index f9024369bd..d92510a49f 100644
--- a/src/index/vocabulary/VocabularyInternalExternal.h
+++ b/src/index/vocabulary/VocabularyInternalExternal.h
@@ -40,10 +40,7 @@ class VocabularyInternalExternal {
 
   // Read the vocabulary from a file. The file must have been created using a
   // `WordWriter`.
-  void open(const string& filename) {
-    internalVocab_.open(filename + ".internal");
-    externalVocab_.open(filename + ".external");
-  }
+  void open(const string& filename);
 
   // Return the total number of words
   [[nodiscard]] size_t size() const { return externalVocab_.size(); }
diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h
index 6ba1b8c6de..0cc6ae74c6 100644
--- a/test/engine/TextIndexScanTestHelpers.h
+++ b/test/engine/TextIndexScanTestHelpers.h
@@ -22,8 +22,8 @@ inline string getTextRecordFromResultTable(const QueryExecutionContext* qec,
       result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get();
   if (nofNonLiterals <= textRecordIdFromTable) {
     // Return when from Literals
-    return qec->getIndex().indexToString(
-        VocabIndex::make(textRecordIdFromTable - nofNonLiterals));
+    return std::string{qec->getIndex().indexToString(
+        VocabIndex::make(textRecordIdFromTable - nofNonLiterals))};
   } else {
     // Return when from DocsDB
     return qec->getIndex().getTextExcerpt(
@@ -41,8 +41,8 @@ inline const TextRecordIndex getTextRecordIdFromResultTable(
 inline string getEntityFromResultTable(const QueryExecutionContext* qec,
                                        const ProtoResult& result,
                                        const size_t& rowIndex) {
-  return qec->getIndex().indexToString(
-      result.idTable().getColumn(1)[rowIndex].getVocabIndex());
+  return std::string{qec->getIndex().indexToString(
+      result.idTable().getColumn(1)[rowIndex].getVocabIndex())};
 }
 
 // Only use on prefix search results

From e406fa429a3b666f73391e2f4b7a7586a593cfc1 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 31 Jan 2025 11:44:39 +0100
Subject: [PATCH 10/25] Making the vocab configuration configurable at runtime.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 CMakeLists.txt         | 10 ++++++++++
 src/index/Vocabulary.h | 18 +++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3679de4c51..67b2feb62b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,6 +203,16 @@ if (${USE_CPP_17_BACKPORTS})
     add_definitions("-DQLEVER_CPP_17 -DCPP_CXX_CONCEPTS=0")
 endif()
 
+set(VOCAB_IN_MEMORY OFF CACHE BOOL "Store QLever's vocabulary completely in RAM")
+if (${VOCAB_IN_MEMORY})
+    add_definitions("-D_QLEVER_VOCAB_IN_MEMORY")
+endif ()
+
+set(ENABLE_VOCAB_COMPRESSION ON CACHE BOOL "Compress the vocabulary")
+if (${ENABLE_VOCAB_COMPRESSION})
+    add_definitions("-D_QLEVER_ENABLE_VOCAB_COMPRESSION")
+endif ()
+
 # Enable the specification of additional linker flags manually from the commandline
 set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ADDITIONAL_LINKER_FLAGS}")
 set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ADDITIONAL_LINKER_FLAGS}")
diff --git a/src/index/Vocabulary.h b/src/index/Vocabulary.h
index c7a8454a4a..e3513c39d4 100644
--- a/src/index/Vocabulary.h
+++ b/src/index/Vocabulary.h
@@ -236,7 +236,23 @@ class Vocabulary {
 };
 
 namespace detail {
-using UnderlyingVocabRdfsVocabulary = VocabularyInMemory;
+// The two mactors `_QLEVER_VOCAB_IN_MEMORY` and
+// `_QLEVER_ENABLE_VOCAB_COMPRESSION` can be used to disable the external vocab
+// and the compression of the vocab at compile time. NOTE: These change the
+// binary format of QLever's index, so changing them requires rebuilding of the
+// indices.
+#ifdef _QLEVER_VOCAB_IN_MEMORY
+using VocabStorage = VocabularyInMemory;
+#else
+using VocabStorage = VocabularyInternalExternal;
+#endif
+
+#ifndef _QLEVER_ENABLE_VOCAB_COMPRESSION
+using UnderlyingVocabRdfsVocabulary = VocabStorage;
+#else
+using UnderlyingVocabRdfsVocabulary = CompressedVocabulary<VocabStorage>;
+#endif
+
 using UnderlyingVocabTextVocabulary = VocabularyInMemory;
 }  // namespace detail
 

From 53dc7411cd0547d0b77f316080844dd9c8f65f8c Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 31 Jan 2025 17:19:02 +0100
Subject: [PATCH 11/25] Do not move IdTables (we will later try this out on the
 dat dataset).

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/NamedQueryCache.cpp |  2 +-
 src/engine/Result.cpp          | 39 +++++++++++++++++++----
 src/engine/Result.h            |  4 ++-
 src/engine/ValuesForTesting.h  | 58 ++++++++++++++++++++++------------
 4 files changed, 74 insertions(+), 29 deletions(-)

diff --git a/src/engine/NamedQueryCache.cpp b/src/engine/NamedQueryCache.cpp
index 6c333fcbb0..7acc9515e5 100644
--- a/src/engine/NamedQueryCache.cpp
+++ b/src/engine/NamedQueryCache.cpp
@@ -10,7 +10,7 @@ std::shared_ptr<ValuesForTesting> NamedQueryCache ::getOperation(
   const auto& [table, map, sortedOn] = get(key);
   // TODO<joka921> we should get rid of the copies for the IdTable (and
   // probably the other members) especially for larger results).
-  return std::make_shared<ValuesForTesting>(ctx, table.clone(), map);
+  return std::make_shared<ValuesForTesting>(ctx, table.clone(), map, sortedOn);
 }
 
 // _____________________________________________________________________________
diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp
index 3b476777bb..5671d92676 100644
--- a/src/engine/Result.cpp
+++ b/src/engine/Result.cpp
@@ -8,6 +8,7 @@
 
 #include <absl/cleanup/cleanup.h>
 
+#include "../../cmake-build-clang-16-debug-backports/_deps/range-v3-src/include/range/v3/experimental/view/shared.hpp"
 #include "util/Exception.h"
 #include "util/Generators.h"
 #include "util/Log.h"
@@ -59,6 +60,18 @@ Result::Result(IdTable idTable, std::vector<ColumnIndex> sortedBy,
   assertSortOrderIsRespected(this->idTable(), sortedBy_);
 }
 
+// _____________________________________________________________________________
+Result::Result(std::shared_ptr<const IdTable> idTablePtr,
+               std::vector<ColumnIndex> sortedBy, LocalVocab&& localVocab)
+    : data_{IdTableSharedLocalVocabPair{
+          std::move(idTablePtr),
+          std::make_shared<const LocalVocab>(std::move(localVocab))}},
+      sortedBy_{std::move(sortedBy)} {
+  AD_CONTRACT_CHECK(std::get<IdTableSharedLocalVocabPair>(data_).localVocab_ !=
+                    nullptr);
+  assertSortOrderIsRespected(this->idTable(), sortedBy_);
+}
+
 // _____________________________________________________________________________
 Result::Result(IdTable idTable, std::vector<ColumnIndex> sortedBy,
                LocalVocab&& localVocab)
@@ -120,8 +133,13 @@ void Result::applyLimitOffset(
   }
   if (isFullyMaterialized()) {
     ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started};
-    resizeIdTable(std::get<IdTableSharedLocalVocabPair>(data_).idTable_,
-                  limitOffset);
+
+    auto& tableOrPtr = std::get<IdTableSharedLocalVocabPair>(data_).idTable_;
+    if (auto sharedTable =
+            std::get_if<std::shared_ptr<const IdTable>>(&tableOrPtr)) {
+      tableOrPtr = (**sharedTable).clone();
+    }
+    resizeIdTable(std::get<IdTable>(tableOrPtr), limitOffset);
     limitTimeCallback(limitTimer.msecs(), idTable());
   } else {
     auto generator = [](LazyResult original, LimitOffsetClause limitOffset,
@@ -177,7 +195,7 @@ void Result::assertThatLimitWasRespected(const LimitOffsetClause& limitOffset) {
 
 // _____________________________________________________________________________
 void Result::checkDefinedness(const VariableToColumnMap& varColMap) {
-  auto performCheck = [](const auto& map, IdTable& idTable) {
+  auto performCheck = [](const auto& map, const IdTable& idTable) {
     return ql::ranges::all_of(map, [&](const auto& varAndCol) {
       const auto& [columnIndex, mightContainUndef] = varAndCol.second;
       if (mightContainUndef == ColumnIndexAndTypeInfo::AlwaysDefined) {
@@ -189,8 +207,7 @@ void Result::checkDefinedness(const VariableToColumnMap& varColMap) {
     });
   };
   if (isFullyMaterialized()) {
-    AD_EXPENSIVE_CHECK(performCheck(
-        varColMap, std::get<IdTableSharedLocalVocabPair>(data_).idTable_));
+    AD_EXPENSIVE_CHECK(performCheck(varColMap, idTable()));
   } else {
     auto generator = [](LazyResult original,
                         [[maybe_unused]] VariableToColumnMap varColMap,
@@ -250,7 +267,17 @@ void Result::assertSortOrderIsRespected(
 // _____________________________________________________________________________
 const IdTable& Result::idTable() const {
   AD_CONTRACT_CHECK(isFullyMaterialized());
-  return std::get<IdTableSharedLocalVocabPair>(data_).idTable_;
+  auto visitor = []<typename T>(const T& arg) -> const IdTable& {
+    if constexpr (std::is_same_v<T, IdTable>) {
+      return arg;
+    } else {
+      static_assert(std::is_same_v<T, std::shared_ptr<const IdTable>>);
+      AD_CORRECTNESS_CHECK(arg != nullptr);
+      return *arg;
+    }
+  };
+  return std::visit(visitor,
+                    std::get<IdTableSharedLocalVocabPair>(data_).idTable_);
 }
 
 // _____________________________________________________________________________
diff --git a/src/engine/Result.h b/src/engine/Result.h
index c372cf7102..1fe7dbcdac 100644
--- a/src/engine/Result.h
+++ b/src/engine/Result.h
@@ -55,7 +55,7 @@ class Result {
   using LocalVocabPtr = std::shared_ptr<const LocalVocab>;
 
   struct IdTableSharedLocalVocabPair {
-    IdTable idTable_;
+    std::variant<std::shared_ptr<const IdTable>, IdTable> idTable_;
     // The local vocabulary of the result.
     LocalVocabPtr localVocab_;
   };
@@ -115,6 +115,8 @@ class Result {
          SharedLocalVocabWrapper localVocab);
   Result(IdTable idTable, std::vector<ColumnIndex> sortedBy,
          LocalVocab&& localVocab);
+  Result(std::shared_ptr<const IdTable> idTablePtr,
+         std::vector<ColumnIndex> sortedBy, LocalVocab&& localVocab);
   Result(IdTableVocabPair pair, std::vector<ColumnIndex> sortedBy);
   Result(Generator idTables, std::vector<ColumnIndex> sortedBy);
   // Prevent accidental copying of a result table.
diff --git a/src/engine/ValuesForTesting.h b/src/engine/ValuesForTesting.h
index 389eefd493..a3b28eca36 100644
--- a/src/engine/ValuesForTesting.h
+++ b/src/engine/ValuesForTesting.h
@@ -9,16 +9,22 @@
 #include "engine/Result.h"
 #include "util/Algorithm.h"
 #include "util/Random.h"
+#include "util/TransparentFunctors.h"
 
 // An operation that yields a given `IdTable` as its result. It is used for
 // unit testing purposes when we need to specify the subtrees of another
 // operation.
+namespace detail {
+auto getTables(const auto& tables) {
+  return ql::views::transform(tables, ad_utility::dereference);
+}
+}  // namespace detail
 class ValuesForTesting : public Operation {
  public:
   using VarVector = std::vector<std::optional<Variable>>;
 
  private:
-  std::vector<IdTable> tables_;
+  std::vector<std::shared_ptr<const IdTable>> tables_;
   VariableToColumnMap variables_;
   bool supportsLimit_;
   // Those can be manually overwritten for testing using the respective getters.
@@ -46,7 +52,7 @@ class ValuesForTesting : public Operation {
         multiplicity_{multiplicity},
         forceFullyMaterialized_{forceFullyMaterialized} {
     AD_CONTRACT_CHECK(variables.size() == table.numColumns());
-    tables_.push_back(std::move(table));
+    tables_.push_back(std::make_shared<const IdTable>(std::move(table)));
     variables_ = computeVarMapFromVector(variables);
   }
 
@@ -63,7 +69,7 @@ class ValuesForTesting : public Operation {
         localVocab_{std::move(localVocab)},
         multiplicity_{},
         forceFullyMaterialized_{false} {
-    tables_.push_back(std::move(table));
+    tables_.push_back(std::make_shared<const IdTable>(std::move(table)));
   }
   explicit ValuesForTesting(QueryExecutionContext* ctx,
                             std::vector<IdTable> tables, VarVector variables,
@@ -71,7 +77,7 @@ class ValuesForTesting : public Operation {
                             std::vector<ColumnIndex> sortedColumns = {},
                             LocalVocab localVocab = LocalVocab{})
       : Operation{ctx},
-        tables_{std::move(tables)},
+        tables_{},
         supportsLimit_{false},
         sizeEstimate_{0},
         costEstimate_{0},
@@ -79,12 +85,16 @@ class ValuesForTesting : public Operation {
         resultSortedColumns_{std::move(sortedColumns)},
         localVocab_{std::move(localVocab)},
         multiplicity_{std::nullopt} {
-    AD_CONTRACT_CHECK(
-        ql::ranges::all_of(tables_, [&variables](const IdTable& table) {
+    tables_.reserve(tables.size());
+    for (auto& table : tables) {
+      tables_.push_back(std::make_shared<const IdTable>(std::move(table)));
+    }
+    AD_CONTRACT_CHECK(ql::ranges::all_of(
+        detail::getTables(tables_), [&variables](const IdTable& table) {
           return variables.size() == table.numColumns();
         }));
     size_t totalRows = 0;
-    for (const IdTable& idTable : tables_) {
+    for (const IdTable& idTable : detail::getTables(tables_)) {
       totalRows += idTable.numRows();
     }
     sizeEstimate_ = totalRows;
@@ -98,12 +108,12 @@ class ValuesForTesting : public Operation {
 
   // ___________________________________________________________________________
   ProtoResult computeResult(bool requestLaziness) override {
-    if (requestLaziness && !forceFullyMaterialized_) {
+    if (requestLaziness && !forceFullyMaterialized_ && tables_.size() != 1) {
       // Not implemented yet
       AD_CORRECTNESS_CHECK(!supportsLimit_);
       std::vector<IdTable> clones;
       clones.reserve(tables_.size());
-      for (const IdTable& idTable : tables_) {
+      for (const IdTable& idTable : detail::getTables(tables_)) {
         clones.push_back(idTable.clone());
       }
       auto generator = [](auto idTables,
@@ -114,17 +124,21 @@ class ValuesForTesting : public Operation {
       }(std::move(clones), localVocab_.clone());
       return {std::move(generator), resultSortedOn()};
     }
+
+    if (tables_.size() == 1 && getLimit().isUnconstrained()) {
+      return {tables_.at(0), resultSortedOn(), localVocab_.clone()};
+    }
     std::optional<IdTable> optionalTable;
-    if (tables_.size() > 1) {
-      IdTable aggregateTable{tables_.at(0).numColumns(),
-                             tables_.at(0).getAllocator()};
-      for (const IdTable& idTable : tables_) {
+    if (detail::getTables(tables_).size() > 1) {
+      IdTable aggregateTable{tables_.at(0)->numColumns(),
+                             tables_.at(0)->getAllocator()};
+      for (const IdTable& idTable : detail::getTables(tables_)) {
         aggregateTable.insertAtEnd(idTable);
       }
       optionalTable = std::move(aggregateTable);
     }
     auto table = optionalTable.has_value() ? std::move(optionalTable).value()
-                                           : tables_.at(0).clone();
+                                           : tables_.at(0)->clone();
     if (supportsLimit_) {
       table.erase(table.begin() + getLimit().upperBound(table.size()),
                   table.end());
@@ -144,15 +158,16 @@ class ValuesForTesting : public Operation {
   // ___________________________________________________________________________
   string getCacheKeyImpl() const override {
     std::stringstream str;
-    auto numRowsView = tables_ | ql::views::transform(&IdTable::numRows);
+    auto numRowsView =
+        detail::getTables(tables_) | ql::views::transform(&IdTable::numRows);
     auto totalNumRows = std::reduce(numRowsView.begin(), numRowsView.end(), 0);
-    auto numCols = tables_.empty() ? 0 : tables_.at(0).numColumns();
+    auto numCols = tables_.empty() ? 0 : tables_.at(0)->numColumns();
     str << "Values for testing with " << numCols << " columns and "
         << totalNumRows << " rows. ";
     if (totalNumRows > 1000) {
       str << ad_utility::FastRandomIntGenerator<int64_t>{}();
     } else {
-      for (const IdTable& idTable : tables_) {
+      for (const IdTable& idTable : detail::getTables(tables_)) {
         for (size_t i = 0; i < idTable.numColumns(); ++i) {
           for (Id entry : idTable.getColumn(i)) {
             str << entry << ' ';
@@ -172,7 +187,7 @@ class ValuesForTesting : public Operation {
   size_t getResultWidth() const override {
     // Assume a width of 1 if we have no tables and no other information to base
     // it on because 0 would otherwise cause stuff to break.
-    return tables_.empty() ? 1 : tables_.at(0).numColumns();
+    return tables_.empty() ? 1 : tables_.at(0)->numColumns();
   }
 
   vector<ColumnIndex> resultSortedOn() const override {
@@ -197,7 +212,8 @@ class ValuesForTesting : public Operation {
 
   bool knownEmptyResult() override {
     return ql::ranges::all_of(
-        tables_, [](const IdTable& table) { return table.empty(); });
+        detail::getTables(tables_),
+        [](const IdTable& table) { return table.empty(); });
   }
 
  private:
@@ -207,8 +223,8 @@ class ValuesForTesting : public Operation {
       if (!vars.at(i).has_value()) {
         continue;
       }
-      bool containsUndef =
-          ql::ranges::any_of(tables_, [&i](const IdTable& table) {
+      bool containsUndef = ql::ranges::any_of(
+          detail::getTables(tables_), [&i](const IdTable& table) {
             return ql::ranges::any_of(table.getColumn(i),
                                       [](Id id) { return id.isUndefined(); });
           });

From 5e52784cb565fb80f3d7c24db8445e818b2d5dc6 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 31 Jan 2025 18:45:44 +0100
Subject: [PATCH 12/25] Remove rogue include.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/Result.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp
index 5671d92676..be25eef786 100644
--- a/src/engine/Result.cpp
+++ b/src/engine/Result.cpp
@@ -8,7 +8,6 @@
 
 #include <absl/cleanup/cleanup.h>
 
-#include "../../cmake-build-clang-16-debug-backports/_deps/range-v3-src/include/range/v3/experimental/view/shared.hpp"
 #include "util/Exception.h"
 #include "util/Generators.h"
 #include "util/Log.h"

From 49445e52badc92b2eae35a41a4b3e9cba80ce2a0 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 5 Feb 2025 08:48:05 +0100
Subject: [PATCH 13/25] An intermediate commit before switching branches.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/global/Pattern.h                          |   3 +
 src/index/ConstantsIndexBuilding.h            |   3 +-
 src/index/IndexBuilderMain.cpp                |  11 ++
 src/index/IndexImpl.cpp                       |  10 +-
 src/index/IndexImpl.h                         |   8 +
 src/index/Vocabulary.h                        |  14 ++
 src/index/VocabularyOnDisk.h                  |   2 +
 src/index/vocabulary/CMakeLists.txt           |   3 +-
 src/index/vocabulary/CompressedVocabulary.h   |  16 +-
 src/index/vocabulary/UnicodeVocabulary.h      |   8 +-
 src/index/vocabulary/VocabularyInMemory.h     |  10 +-
 .../vocabulary/VocabularyInMemoryBinSearch.h  |   3 +
 .../vocabulary/VocabularyInternalExternal.h   |  12 ++
 src/index/vocabulary/VocabularyVariant.cpp    |  76 ++++++++++
 src/index/vocabulary/VocabularyVariant.h      | 143 ++++++++++++++++++
 src/util/ProgramOptionsHelpers.h              |  52 +++++--
 src/util/Serializer/SerializeVector.h         |   3 +
 17 files changed, 355 insertions(+), 22 deletions(-)
 create mode 100644 src/index/vocabulary/VocabularyVariant.cpp
 create mode 100644 src/index/vocabulary/VocabularyVariant.h

diff --git a/src/global/Pattern.h b/src/global/Pattern.h
index 1005add22d..9c37eb39ce 100644
--- a/src/global/Pattern.h
+++ b/src/global/Pattern.h
@@ -194,6 +194,9 @@ struct CompactStringVectorWriter {
     commonInitialization();
   }
 
+  CompactStringVectorWriter(CompactStringVectorWriter&&) = default;
+  CompactStringVectorWriter& operator=(CompactStringVectorWriter&&) = default;
+
   void push(const data_type* data, size_t elementSize) {
     AD_CONTRACT_CHECK(!_finished);
     _offsets.push_back(_nextOffset);
diff --git a/src/index/ConstantsIndexBuilding.h b/src/index/ConstantsIndexBuilding.h
index d7c1802969..4ca58f3e80 100644
--- a/src/index/ConstantsIndexBuilding.h
+++ b/src/index/ConstantsIndexBuilding.h
@@ -99,7 +99,8 @@ constinit inline std::atomic<size_t> BUFFER_SIZE_PARTIAL_TO_GLOBAL_ID_MAPPINGS =
 // the overhead of the metadata that has to be stored per block becomes
 // infeasible. 250K seems to be a reasonable tradeoff here.
 constexpr inline ad_utility::MemorySize
-    UNCOMPRESSED_BLOCKSIZE_COMPRESSED_METADATA_PER_COLUMN = 250_kB;
+    UNCOMPRESSED_BLOCKSIZE_COMPRESSED_METADATA_PER_COLUMN =
+        ad_utility::MemorySize::kilobytes(250);
 
 constexpr inline size_t NumColumnsIndexBuilding = 4;
 
diff --git a/src/index/IndexBuilderMain.cpp b/src/index/IndexBuilderMain.cpp
index cfc121a2d1..8877c2d01a 100644
--- a/src/index/IndexBuilderMain.cpp
+++ b/src/index/IndexBuilderMain.cpp
@@ -11,6 +11,7 @@
 #include <string>
 
 #include "CompilationInfo.h"
+#include "IndexImpl.h"
 #include "global/Constants.h"
 #include "index/ConstantsIndexBuilding.h"
 #include "index/Index.h"
@@ -166,6 +167,8 @@ int main(int argc, char** argv) {
   bool addWordsFromLiterals = false;
   std::optional<ad_utility::MemorySize> stxxlMemory;
   std::optional<ad_utility::MemorySize> parserBufferSize;
+  std::optional<VocabularyEnum> vocabType;
+  // VocabularyEnum vocabType;
   optind = 1;
 
   Index index{ad_utility::makeUnlimitedAllocator<Id>()};
@@ -224,6 +227,9 @@ int main(int argc, char** argv) {
   add("only-pso-and-pos-permutations,o", po::bool_switch(&onlyPsoAndPos),
       "Only build the PSO and POS permutations. This is faster, but then "
       "queries with predicate variables are not supported");
+  add("vocabulary-type", po::value(&vocabType),
+      "The vocabulary implementation for strings in qlever, can be any of ... "
+      "(TODO joka)");
 
   // Options for the index building process.
   add("stxxl-memory,m", po::value(&stxxlMemory),
@@ -256,6 +262,11 @@ int main(int argc, char** argv) {
   if (parserBufferSize.has_value()) {
     index.parserBufferSize() = parserBufferSize.value();
   }
+  /*
+  if (vocabType.has_value()) {
+    index.getImpl().setVocabularyTypeForIndexBuilding(vocabType.value());
+  }
+  */
 
   // If no text index name was specified, take the part of the wordsfile after
   // the last slash.
diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp
index d5781bb297..40ffeb1115 100644
--- a/src/index/IndexImpl.cpp
+++ b/src/index/IndexImpl.cpp
@@ -341,6 +341,8 @@ void IndexImpl::createFromFiles(
         "The patterns can only be built when all 6 permutations are created"};
   }
 
+  vocab_.resetToType(vocabularyTypeForIndexBuilding_);
+
   readIndexBuilderSettingsFromFile();
 
   updateInputFileSpecificationsAndLog(files, useParallelParser_);
@@ -560,7 +562,7 @@ IndexBuilderDataAsStxxlVector IndexImpl::passFileForVocabulary(
       return (*cmp)(a, b, decltype(vocab_)::SortLevel::TOTAL);
     };
     auto wordCallback = vocab_.makeWordWriter(onDiskBase_ + VOCAB_SUFFIX);
-    wordCallback.readableName() = "internal vocabulary";
+    // wordCallback.readableName() = "internal vocabulary";
     return ad_utility::vocabulary_merger::mergeVocabulary(
         onDiskBase_, numFiles, sortPred, wordCallback,
         memoryLimitIndexBuilding());
@@ -1132,6 +1134,12 @@ void IndexImpl::readConfiguration() {
   loadDataMember("num-triples", numTriples_, NumNormalAndInternal{});
   loadDataMember("num-non-literals-text-index", nofNonLiteralsInTextIndex_, 0);
 
+  // TODO<joka921> Comment and also write the configuration.
+  // The default value is the one the used to be the only.
+  VocabularyEnum vocabType(VocabularyEnum::Enum::CompressedOnDisk);
+  loadDataMember("vocabulary-type", vocabType, vocabType);
+  vocab_.resetToType(vocabType);
+
   // Initialize BlankNodeManager
   uint64_t numBlankNodesTotal;
   loadDataMember("num-blank-nodes-total", numBlankNodesTotal);
diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h
index 8478943c92..ca35b52d86 100644
--- a/src/index/IndexImpl.h
+++ b/src/index/IndexImpl.h
@@ -192,6 +192,9 @@ class IndexImpl {
   std::optional<Id> idOfHasPatternDuringIndexBuilding_;
   std::optional<Id> idOfInternalGraphDuringIndexBuilding_;
 
+  VocabularyEnum vocabularyTypeForIndexBuilding_{
+      VocabularyEnum::Enum::CompressedOnDisk};
+
   // BlankNodeManager, initialized during `readConfiguration`
   std::unique_ptr<ad_utility::BlankNodeManager> blankNodeManager_{nullptr};
 
@@ -275,6 +278,11 @@ class IndexImpl {
     return deltaTriples_.value();
   }
 
+  void setVocabularyTypeForIndexBuilding(VocabularyEnum type) {
+    vocabularyTypeForIndexBuilding_ = type;
+    configurationJson_["vocabulary-type"] = type;
+  }
+
   // --------------------------------------------------------------------------
   //  -- RETRIEVAL ---
   // --------------------------------------------------------------------------
diff --git a/src/index/Vocabulary.h b/src/index/Vocabulary.h
index e3513c39d4..0f566cc138 100644
--- a/src/index/Vocabulary.h
+++ b/src/index/Vocabulary.h
@@ -27,6 +27,7 @@
 #include "index/vocabulary/UnicodeVocabulary.h"
 #include "index/vocabulary/VocabularyInMemory.h"
 #include "index/vocabulary/VocabularyInternalExternal.h"
+#include "index/vocabulary/VocabularyVariant.h"
 #include "util/Exception.h"
 #include "util/HashMap.h"
 #include "util/HashSet.h"
@@ -233,6 +234,13 @@ class Vocabulary {
       const std::string& filename) const {
     return vocabulary_.getUnderlyingVocabulary().makeDiskWriter(filename);
   }
+
+  // TODO<joka921> Comment.
+  void resetToType(VocabularyEnum type) {
+    if constexpr (std::is_same_v<UnderlyingVocabulary, VocabularyVariant>) {
+      vocabulary_.getUnderlyingVocabulary().resetToType(type);
+    }
+  }
 };
 
 namespace detail {
@@ -241,18 +249,24 @@ namespace detail {
 // and the compression of the vocab at compile time. NOTE: These change the
 // binary format of QLever's index, so changing them requires rebuilding of the
 // indices.
+/*
 #ifdef _QLEVER_VOCAB_IN_MEMORY
 using VocabStorage = VocabularyInMemory;
 #else
 using VocabStorage = VocabularyInternalExternal;
 #endif
+*/
 
+/*
 #ifndef _QLEVER_ENABLE_VOCAB_COMPRESSION
 using UnderlyingVocabRdfsVocabulary = VocabStorage;
 #else
 using UnderlyingVocabRdfsVocabulary = CompressedVocabulary<VocabStorage>;
 #endif
+*/
 
+// TODO<joka921> Change this place.
+using UnderlyingVocabRdfsVocabulary = VocabularyVariant;
 using UnderlyingVocabTextVocabulary = VocabularyInMemory;
 }  // namespace detail
 
diff --git a/src/index/VocabularyOnDisk.h b/src/index/VocabularyOnDisk.h
index f677ac3e7a..2b6455cda3 100644
--- a/src/index/VocabularyOnDisk.h
+++ b/src/index/VocabularyOnDisk.h
@@ -58,6 +58,8 @@ class VocabularyOnDisk : public VocabularyBinarySearchMixin<VocabularyOnDisk> {
     void finish();
     // Destructor. Implicitly calls `finish` if it hasn't been called before.
     ~WordWriter();
+    WordWriter(WordWriter&&) = default;
+    WordWriter& operator=(WordWriter&&) = default;
   };
 
   /// Build from a vector of pairs of `(string, id)`. This requires the IDs to
diff --git a/src/index/vocabulary/CMakeLists.txt b/src/index/vocabulary/CMakeLists.txt
index bb2dfdd4a3..ff3138601e 100644
--- a/src/index/vocabulary/CMakeLists.txt
+++ b/src/index/vocabulary/CMakeLists.txt
@@ -1,2 +1,3 @@
-add_library(vocabulary VocabularyInMemory.h VocabularyInMemory.cpp VocabularyInMemoryBinSearch.cpp VocabularyInternalExternal.cpp)
+add_library(vocabulary VocabularyInMemory.h VocabularyInMemory.cpp VocabularyInMemoryBinSearch.cpp VocabularyInternalExternal.cpp
+        VocabularyVariant.cpp)
 qlever_target_link_libraries(vocabulary)
diff --git a/src/index/vocabulary/CompressedVocabulary.h b/src/index/vocabulary/CompressedVocabulary.h
index dad9e84457..1eeda3599c 100644
--- a/src/index/vocabulary/CompressedVocabulary.h
+++ b/src/index/vocabulary/CompressedVocabulary.h
@@ -193,6 +193,10 @@ class CompressedVocabulary {
         delete;
     DiskWriterFromUncompressedWords& operator=(
         const DiskWriterFromUncompressedWords&) = delete;
+    DiskWriterFromUncompressedWords(DiskWriterFromUncompressedWords&&) =
+        default;
+    DiskWriterFromUncompressedWords& operator=(
+        DiskWriterFromUncompressedWords&&) = default;
 
    private:
     // Compress a complete block and write it to the underlying vocabulary.
@@ -243,12 +247,20 @@ class CompressedVocabulary {
   using WordWriter = DiskWriterFromUncompressedWords;
 
   // Return a `DiskWriter` that can be used to create the vocabulary.
-  DiskWriterFromUncompressedWords makeDiskWriter(
-      const std::string& filename) const {
+  static DiskWriterFromUncompressedWords makeDiskWriter(
+      const std::string& filename) {
     return DiskWriterFromUncompressedWords{
         absl::StrCat(filename, wordsSuffix),
         absl::StrCat(filename, decodersSuffix)};
   }
+
+  static std::unique_ptr<DiskWriterFromUncompressedWords> makeDiskWriterPtr(
+      const std::string& filename) {
+    return std::make_unique<DiskWriterFromUncompressedWords>(
+        absl::StrCat(filename, wordsSuffix),
+        absl::StrCat(filename, decodersSuffix));
+  }
+
   /// Initialize the vocabulary from the given `words`.
   // TODO<joka921> This can be a generic Mixin...
   void build(const std::vector<std::string>& words,
diff --git a/src/index/vocabulary/UnicodeVocabulary.h b/src/index/vocabulary/UnicodeVocabulary.h
index c215843c0f..73dc85556c 100644
--- a/src/index/vocabulary/UnicodeVocabulary.h
+++ b/src/index/vocabulary/UnicodeVocabulary.h
@@ -102,6 +102,12 @@ class UnicodeVocabulary {
   void close() { _underlyingVocabulary.close(); }
 
   void build(const std::vector<std::string>& v, const std::string& filename) {
-    _underlyingVocabulary.build(v, filename);
+    // TODO<joka921> This is really hacky, we should get rid of it and make the
+    // building consistent for all the vocabularies.
+    if constexpr (requires { _underlyingVocabulary.build(v, filename); }) {
+      _underlyingVocabulary.build(v, filename);
+    } else {
+      AD_FAIL();
+    }
   }
 };
diff --git a/src/index/vocabulary/VocabularyInMemory.h b/src/index/vocabulary/VocabularyInMemory.h
index ed498d1702..a2504ad265 100644
--- a/src/index/vocabulary/VocabularyInMemory.h
+++ b/src/index/vocabulary/VocabularyInMemory.h
@@ -82,14 +82,20 @@ class VocabularyInMemory
     // interface with the `VocabularyInternalExternal`.
     std::string readableNameDummy_;
     std::string& readableName() { return readableNameDummy_; }
+    WordWriter(WordWriter&&) = default;
+    WordWriter& operator=(WordWriter&&) = default;
   };
 
   // Return a `WordWriter` that directly writes the words to the given
   // `filename`. The words are not materialized in RAM, but the vocabulary later
-  // has to be explicitly initizlied via `open(filename)`.
-  WordWriter makeDiskWriter(const std::string& filename) const {
+  // has to be explicitly initialized via `open(filename)`.
+  static WordWriter makeDiskWriter(const std::string& filename) {
     return WordWriter{filename};
   }
+  static std::unique_ptr<WordWriter> makeDiskWriterPtr(
+      const std::string& filename) {
+    return std::make_unique<WordWriter>(filename);
+  }
 
   /// Clear the vocabulary.
   void close() { _words.clear(); }
diff --git a/src/index/vocabulary/VocabularyInMemoryBinSearch.h b/src/index/vocabulary/VocabularyInMemoryBinSearch.h
index 8367c1e965..df2314eb81 100644
--- a/src/index/vocabulary/VocabularyInMemoryBinSearch.h
+++ b/src/index/vocabulary/VocabularyInMemoryBinSearch.h
@@ -79,6 +79,9 @@ class VocabularyInMemoryBinSearch
     // Finish writing and dump all contents that still reside in buffers to
     // disk.
     void finish();
+
+    WordWriter(WordWriter&&) = default;
+    WordWriter& operator=(WordWriter&&) = default;
   };
 
   // Clear the vocabulary.
diff --git a/src/index/vocabulary/VocabularyInternalExternal.h b/src/index/vocabulary/VocabularyInternalExternal.h
index d92510a49f..491381a88e 100644
--- a/src/index/vocabulary/VocabularyInternalExternal.h
+++ b/src/index/vocabulary/VocabularyInternalExternal.h
@@ -112,6 +112,9 @@ class VocabularyInternalExternal {
 
     // Finish writing.
     void finish();
+
+    WordWriter(WordWriter&&) = default;
+    WordWriter& operator=(WordWriter&&) = default;
   };
 
   /// Clear the vocabulary.
@@ -148,4 +151,13 @@ class VocabularyInternalExternal {
     return boundFunction(externalVocab_, word, comparator,
                          boundFromInternalVocab.previousIndex(), upperBound);
   }
+
+ public:
+  // TODO<joka921> Clean up positions
+  static WordWriter makeDiskWriter(const std::string& filename) {
+    return WordWriter{filename};
+  }
+  static auto makeDiskWriterPtr(const std::string& filename) {
+    return std::make_unique<WordWriter>(filename);
+  }
 };
diff --git a/src/index/vocabulary/VocabularyVariant.cpp b/src/index/vocabulary/VocabularyVariant.cpp
new file mode 100644
index 0000000000..f8dca2b45d
--- /dev/null
+++ b/src/index/vocabulary/VocabularyVariant.cpp
@@ -0,0 +1,76 @@
+//  Copyright 2025, University of Freiburg,
+//  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#include "index/vocabulary/VocabularyVariant.h"
+
+#include <engine/CallFixedSize.h>
+
+void VocabularyVariant::open(const std::string& filename) {
+  std::visit([&filename](auto& vocab) { vocab.open(filename); }, vocab_);
+}
+
+void VocabularyVariant::open(const std::string& filename, VocabularyEnum type) {
+  resetToType(type);
+  open(filename);
+}
+
+void VocabularyVariant::close() {
+  return std::visit([](auto& vocab) { return vocab.close(); }, vocab_);
+}
+size_t VocabularyVariant::size() const {
+  return std::visit([](auto& vocab) { return vocab.size(); }, vocab_);
+}
+std::string VocabularyVariant::operator[](uint64_t i) const {
+  return std::visit([i](auto& vocab) { return std::string{vocab[i]}; }, vocab_);
+}
+
+VocabularyVariant::WordWriter::WordWriter(WordWriters writer)
+    : writer_(std::move(writer)) {}
+
+void VocabularyVariant::WordWriter::finish() {
+  std::visit([](auto& writer) { return writer->finish(); }, writer_);
+}
+
+void VocabularyVariant::WordWriter::operator()(std::string_view word,
+                                               bool isExternal) {
+  std::visit(
+      [&word, isExternal](auto& writer) { return (*writer)(word, isExternal); },
+      writer_);
+}
+
+auto VocabularyVariant::makeDiskWriter(const std::string& filename) const
+    -> WordWriter {
+  return WordWriter{std::visit(
+      [&filename](auto& vocab) -> WordWriters {
+        return vocab.makeDiskWriterPtr(filename);
+      },
+      vocab_)};
+}
+
+VocabularyVariant::WordWriter VocabularyVariant::makeDiskWriter(
+    const std::string& filename, VocabularyEnum type) {
+  VocabularyVariant dummyVocab;
+  dummyVocab.resetToType(type);
+  return dummyVocab.makeDiskWriter(filename);
+}
+
+void VocabularyVariant::resetToType(VocabularyEnum type) {
+  close();
+  switch (type.value()) {
+    case VocabularyEnum::Enum::InMemory:
+      vocab_.emplace<InMemory>();
+      break;
+    case VocabularyEnum::Enum::OnDisk:
+      vocab_.emplace<External>();
+      break;
+    case VocabularyEnum::Enum::CompressedInMemory:
+      vocab_.emplace<CompressedInMemory>();
+      break;
+    case VocabularyEnum::Enum::CompressedOnDisk:
+      vocab_.emplace<CompressedExternal>();
+      break;
+    default:
+      AD_FAIL();
+  }
+}
diff --git a/src/index/vocabulary/VocabularyVariant.h b/src/index/vocabulary/VocabularyVariant.h
new file mode 100644
index 0000000000..355fd58abd
--- /dev/null
+++ b/src/index/vocabulary/VocabularyVariant.h
@@ -0,0 +1,143 @@
+//  Copyright 2025, University of Freiburg,
+//  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#pragma once
+#include <absl/strings/str_cat.h>
+#include <absl/strings/str_join.h>
+
+#include <variant>
+
+#include "index/vocabulary/CompressedVocabulary.h"
+#include "index/vocabulary/VocabularyInMemory.h"
+#include "index/vocabulary/VocabularyInternalExternal.h"
+#include "util/json.h"
+
+template <typename Variant>
+static constexpr auto getWordWriterTypes(const Variant& var) {
+  return std::apply(
+      []<typename... Vocab>(const Vocab&...) {
+        return std::type_identity<
+            std::variant<std::unique_ptr<typename Vocab::WordWriter>...>>{};
+      },
+      var);
+}
+
+class VocabularyEnum {
+ public:
+  enum struct Enum { InMemory, OnDisk, CompressedInMemory, CompressedOnDisk };
+
+ private:
+  Enum value_ = Enum::InMemory;
+
+  static constexpr std::array<std::string_view, 4> descriptions{
+      "in-memory-uncompressed", "on-disk-uncompressed", "in-memory-compressed",
+      "on-disk-compressed"};
+
+ public:
+  VocabularyEnum() = default;
+  explicit VocabularyEnum(Enum value) : value_{value} {}
+
+  static VocabularyEnum fromString(std::string_view description) {
+    auto it = ql::ranges::find(descriptions, description);
+    if (it == descriptions.end()) {
+      throw std::runtime_error{
+          absl::StrCat("\"", description,
+                       "\" is not a valid vocabulary type. The currently "
+                       "supported vocabulary types are ",
+                       absl::StrJoin(descriptions, ", "))};
+      ;
+    }
+    return VocabularyEnum{static_cast<Enum>(it - descriptions.begin())};
+  }
+  std::string_view toString() const {
+    return descriptions.at(static_cast<size_t>(value_));
+  }
+
+  Enum value() const { return value_; }
+
+  // Conversion To JSON.
+  friend void to_json(nlohmann::json& j, const VocabularyEnum& vocabEnum) {
+    j = vocabEnum.toString();
+  }
+
+  // Conversion from JSON.
+  friend void from_json(const nlohmann::json& j, VocabularyEnum& vocabEnum) {
+    vocabEnum = VocabularyEnum::fromString(static_cast<std::string>(j));
+  }
+};
+
+class VocabularyVariant {
+ private:
+  using InMemory = VocabularyInMemory;
+  using External = VocabularyInternalExternal;
+  using CompressedInMemory = CompressedVocabulary<InMemory>;
+  using CompressedExternal = CompressedVocabulary<External>;
+  using Variant =
+      std::variant<InMemory, External, CompressedExternal, CompressedInMemory>;
+  using Tuple =
+      std::tuple<InMemory, External, CompressedExternal, CompressedInMemory>;
+
+  Variant vocab_;
+
+ public:
+  void resetToType(VocabularyEnum);
+  void open(const std::string& filename);
+  void open(const std::string& filename, VocabularyEnum type);
+  void close();
+  size_t size() const;
+  std::string operator[](uint64_t i) const;
+
+  template <typename String, typename Comp>
+  WordAndIndex lower_bound(const String& word, Comp comp) const {
+    return std::visit(
+        [&word, &comp](auto& vocab) {
+          return vocab.lower_bound(word, std::move(comp));
+        },
+        vocab_);
+  }
+
+  template <typename String, typename Comp>
+  WordAndIndex lower_bound_iterator(const String& word, Comp comp) const {
+    return std::visit(
+        [&word, &comp](auto& vocab) {
+          return vocab.lower_bound_iterator(word, std::move(comp));
+        },
+        vocab_);
+  }
+
+  template <typename String, typename Comp>
+  WordAndIndex upper_bound(const String& word, Comp comp) const {
+    return std::visit(
+        [&word, &comp](auto& vocab) {
+          return vocab.upper_bound(word, std::move(comp));
+        },
+        vocab_);
+  }
+
+  template <typename String, typename Comp>
+  WordAndIndex upper_bound_iterator(const String& word, Comp comp) const {
+    return std::visit(
+        [&word, &comp](auto& vocab) {
+          return vocab.upper_bound_iterator(word, std::move(comp));
+        },
+        vocab_);
+  }
+
+  using WordWriters = decltype(getWordWriterTypes(std::declval<Tuple>()))::type;
+
+  class WordWriter {
+    WordWriters writer_;
+
+   public:
+    explicit WordWriter(WordWriters);
+
+    void finish();
+
+    void operator()(std::string_view word, bool isExternal);
+  };
+
+  WordWriter makeDiskWriter(const std::string& filename) const;
+  static WordWriter makeDiskWriter(const std::string& filename,
+                                   VocabularyEnum type);
+};
diff --git a/src/util/ProgramOptionsHelpers.h b/src/util/ProgramOptionsHelpers.h
index bd804504d3..6c25565287 100644
--- a/src/util/ProgramOptionsHelpers.h
+++ b/src/util/ProgramOptionsHelpers.h
@@ -11,6 +11,8 @@
 #include "util/Concepts.h"
 #include "util/MemorySize/MemorySize.h"
 #include "util/Parameters.h"
+// TODO<joka921> only include the enum.
+#include "index/vocabulary/VocabularyVariant.h"
 namespace ad_utility {
 
 // An implicit wrapper that can be implicitly converted to and from `size_t`.
@@ -47,20 +49,6 @@ inline void validate(boost::any& v, const std::vector<std::string>& values,
   v = NonNegative{boost::lexical_cast<size_t>(s)};
 }
 
-// This function is required  to use `std::optional` in
-// `boost::program_options`.
-template <typename T>
-void validate(boost::any& v, const std::vector<std::string>& values,
-              std::optional<T>*, int) {
-  // First parse as a T
-  T* dummy = nullptr;
-  validate(v, values, dummy, 0);
-
-  // Wrap the T inside std::optional
-  AD_CONTRACT_CHECK(!v.empty());
-  v = std::optional<T>(boost::any_cast<T>(v));
-}
-
 // This function is required  to use `MemorySize` in `boost::program_options`.
 inline void validate(boost::any& v, const std::vector<std::string>& values,
                      MemorySize*, int) {
@@ -120,4 +108,40 @@ class ParameterToProgramOptionFactory {
 
 }  // namespace ad_utility
 
+// This function is required  to use `VocabularyEnum` in
+// `boost::program_options`.
+inline void validate(boost::any& v, const std::vector<std::string>& values,
+                     VocabularyEnum*, int) {
+  using namespace boost::program_options;
+
+  // Make sure no previous assignment to 'v' was made.
+  validators::check_first_occurrence(v);
+  // Extract the first string from 'values'. If there is more than
+  // one string, it's an error, and exception will be thrown.
+  const string& s = validators::get_single_string(values);
+
+  // Convert the string to `MemorySize` and put it into the option.
+  v = VocabularyEnum::fromString(s);
+}
+
+// This function is required  to use `std::optional` in
+// `boost::program_options`.
+// TODO<joka921> We should find a solution that doesn't require  opening
+// namespace `std`, for example we could put all types + this function into the
+// `ad_utility`namespace.
+namespace std {
+template <typename T>
+void validate(boost::any& v, const std::vector<std::string>& values,
+              std::optional<T>*, int) {
+  // First parse as a T
+  T* dummy = nullptr;
+  // using namespace boost::program_options;
+  validate(v, values, dummy, 0);
+
+  // Wrap the T inside std::optional
+  AD_CONTRACT_CHECK(!v.empty());
+  v = std::optional<T>(boost::any_cast<T>(v));
+}
+}  // namespace std
+
 #endif  // QLEVER_PROGRAMOPTIONSHELPERS_H
diff --git a/src/util/Serializer/SerializeVector.h b/src/util/Serializer/SerializeVector.h
index 982e43e2ff..d093f4c02f 100644
--- a/src/util/Serializer/SerializeVector.h
+++ b/src/util/Serializer/SerializeVector.h
@@ -75,6 +75,9 @@ class VectorIncrementalSerializer {
   }
 
   ~VectorIncrementalSerializer() { finish(); }
+  VectorIncrementalSerializer(VectorIncrementalSerializer&&) = default;
+  VectorIncrementalSerializer& operator=(VectorIncrementalSerializer&&) =
+      default;
 };
 
 }  // namespace ad_utility::serialization

From 6d11c3ba8d03532a6eba846599ec7696a614bdea Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 5 Feb 2025 11:41:29 +0100
Subject: [PATCH 14/25] This seems to work, but the IDE has crashed, so we just
 restart:)

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/index/IndexBuilderMain.cpp                |  2 +-
 src/index/IndexImpl.cpp                       |  4 +-
 src/index/IndexImpl.h                         |  6 +-
 src/index/Vocabulary.cpp                      | 12 +++-
 src/index/Vocabulary.h                        |  2 +-
 src/index/VocabularyOnDisk.h                  |  2 -
 src/index/vocabulary/CMakeLists.txt           |  3 +-
 src/index/vocabulary/CompressedVocabulary.h   | 17 +----
 src/index/vocabulary/UnicodeVocabulary.h      | 10 ---
 src/index/vocabulary/VocabularyInMemory.h     | 21 +-----
 .../vocabulary/VocabularyInMemoryBinSearch.h  |  3 -
 .../vocabulary/VocabularyInternalExternal.h   | 21 +++---
 src/index/vocabulary/VocabularyType.h         | 56 +++++++++++++++
 src/index/vocabulary/VocabularyVariant.cpp    | 14 ++--
 src/index/vocabulary/VocabularyVariant.h      | 70 +++++--------------
 src/util/ProgramOptionsHelpers.h              | 37 +++++-----
 src/util/Serializer/SerializeVector.h         |  3 -
 17 files changed, 128 insertions(+), 155 deletions(-)
 create mode 100644 src/index/vocabulary/VocabularyType.h

diff --git a/src/index/IndexBuilderMain.cpp b/src/index/IndexBuilderMain.cpp
index 8877c2d01a..1583a9a14f 100644
--- a/src/index/IndexBuilderMain.cpp
+++ b/src/index/IndexBuilderMain.cpp
@@ -167,7 +167,7 @@ int main(int argc, char** argv) {
   bool addWordsFromLiterals = false;
   std::optional<ad_utility::MemorySize> stxxlMemory;
   std::optional<ad_utility::MemorySize> parserBufferSize;
-  std::optional<VocabularyEnum> vocabType;
+  std::optional<ad_utility::VocabularyEnum> vocabType;
   // VocabularyEnum vocabType;
   optind = 1;
 
diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp
index 40ffeb1115..dd29e6d57a 100644
--- a/src/index/IndexImpl.cpp
+++ b/src/index/IndexImpl.cpp
@@ -562,7 +562,6 @@ IndexBuilderDataAsStxxlVector IndexImpl::passFileForVocabulary(
       return (*cmp)(a, b, decltype(vocab_)::SortLevel::TOTAL);
     };
     auto wordCallback = vocab_.makeWordWriter(onDiskBase_ + VOCAB_SUFFIX);
-    // wordCallback.readableName() = "internal vocabulary";
     return ad_utility::vocabulary_merger::mergeVocabulary(
         onDiskBase_, numFiles, sortPred, wordCallback,
         memoryLimitIndexBuilding());
@@ -1136,7 +1135,8 @@ void IndexImpl::readConfiguration() {
 
   // TODO<joka921> Comment and also write the configuration.
   // The default value is the one the used to be the only.
-  VocabularyEnum vocabType(VocabularyEnum::Enum::CompressedOnDisk);
+  ad_utility::VocabularyEnum vocabType(
+      ad_utility::VocabularyEnum::Enum::CompressedOnDisk);
   loadDataMember("vocabulary-type", vocabType, vocabType);
   vocab_.resetToType(vocabType);
 
diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h
index ca35b52d86..7c4a937fdb 100644
--- a/src/index/IndexImpl.h
+++ b/src/index/IndexImpl.h
@@ -192,8 +192,8 @@ class IndexImpl {
   std::optional<Id> idOfHasPatternDuringIndexBuilding_;
   std::optional<Id> idOfInternalGraphDuringIndexBuilding_;
 
-  VocabularyEnum vocabularyTypeForIndexBuilding_{
-      VocabularyEnum::Enum::CompressedOnDisk};
+  ad_utility::VocabularyEnum vocabularyTypeForIndexBuilding_{
+      ad_utility::VocabularyEnum::Enum::CompressedOnDisk};
 
   // BlankNodeManager, initialized during `readConfiguration`
   std::unique_ptr<ad_utility::BlankNodeManager> blankNodeManager_{nullptr};
@@ -278,7 +278,7 @@ class IndexImpl {
     return deltaTriples_.value();
   }
 
-  void setVocabularyTypeForIndexBuilding(VocabularyEnum type) {
+  void setVocabularyTypeForIndexBuilding(ad_utility::VocabularyEnum type) {
     vocabularyTypeForIndexBuilding_ = type;
     configurationJson_["vocabulary-type"] = type;
   }
diff --git a/src/index/Vocabulary.cpp b/src/index/Vocabulary.cpp
index 70e9f0c50e..80c61cc0ea 100644
--- a/src/index/Vocabulary.cpp
+++ b/src/index/Vocabulary.cpp
@@ -63,7 +63,17 @@ void Vocabulary<S, C, I>::createFromSet(
     return getCaseComparator()(a, b, SortLevel::TOTAL);
   };
   std::sort(begin(words), end(words), totalComparison);
-  vocabulary_.build(words, filename);
+  auto writer = makeWordWriter(filename);
+  auto writeWords = [&writer](std::string_view word) {
+    // All words are stored in the internal vocab (this is consistent with the
+    // previous behavior). NOTE: This function is currently only used for the
+    // text index and for few unit tests, where we don't have an external
+    // vocabulary anyway.
+    writer(word, false);
+  };
+  ql::ranges::for_each(words, writeWords);
+  writer.finish();
+  vocabulary_.open(filename);
   LOG(DEBUG) << "END Vocabulary::createFromSet" << std::endl;
 }
 
diff --git a/src/index/Vocabulary.h b/src/index/Vocabulary.h
index 0f566cc138..898233e284 100644
--- a/src/index/Vocabulary.h
+++ b/src/index/Vocabulary.h
@@ -236,7 +236,7 @@ class Vocabulary {
   }
 
   // TODO<joka921> Comment.
-  void resetToType(VocabularyEnum type) {
+  void resetToType(ad_utility::VocabularyEnum type) {
     if constexpr (std::is_same_v<UnderlyingVocabulary, VocabularyVariant>) {
       vocabulary_.getUnderlyingVocabulary().resetToType(type);
     }
diff --git a/src/index/VocabularyOnDisk.h b/src/index/VocabularyOnDisk.h
index 2b6455cda3..f677ac3e7a 100644
--- a/src/index/VocabularyOnDisk.h
+++ b/src/index/VocabularyOnDisk.h
@@ -58,8 +58,6 @@ class VocabularyOnDisk : public VocabularyBinarySearchMixin<VocabularyOnDisk> {
     void finish();
     // Destructor. Implicitly calls `finish` if it hasn't been called before.
     ~WordWriter();
-    WordWriter(WordWriter&&) = default;
-    WordWriter& operator=(WordWriter&&) = default;
   };
 
   /// Build from a vector of pairs of `(string, id)`. This requires the IDs to
diff --git a/src/index/vocabulary/CMakeLists.txt b/src/index/vocabulary/CMakeLists.txt
index ff3138601e..151f8ec18c 100644
--- a/src/index/vocabulary/CMakeLists.txt
+++ b/src/index/vocabulary/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_library(vocabulary VocabularyInMemory.h VocabularyInMemory.cpp VocabularyInMemoryBinSearch.cpp VocabularyInternalExternal.cpp
-        VocabularyVariant.cpp)
+        VocabularyVariant.cpp
+        VocabularyType.h)
 qlever_target_link_libraries(vocabulary)
diff --git a/src/index/vocabulary/CompressedVocabulary.h b/src/index/vocabulary/CompressedVocabulary.h
index 1eeda3599c..7f685750d4 100644
--- a/src/index/vocabulary/CompressedVocabulary.h
+++ b/src/index/vocabulary/CompressedVocabulary.h
@@ -193,10 +193,6 @@ class CompressedVocabulary {
         delete;
     DiskWriterFromUncompressedWords& operator=(
         const DiskWriterFromUncompressedWords&) = delete;
-    DiskWriterFromUncompressedWords(DiskWriterFromUncompressedWords&&) =
-        default;
-    DiskWriterFromUncompressedWords& operator=(
-        DiskWriterFromUncompressedWords&&) = default;
 
    private:
     // Compress a complete block and write it to the underlying vocabulary.
@@ -254,6 +250,7 @@ class CompressedVocabulary {
         absl::StrCat(filename, decodersSuffix)};
   }
 
+  // Return a `unique_ptr<DiskWriter>`.
   static std::unique_ptr<DiskWriterFromUncompressedWords> makeDiskWriterPtr(
       const std::string& filename) {
     return std::make_unique<DiskWriterFromUncompressedWords>(
@@ -261,18 +258,6 @@ class CompressedVocabulary {
         absl::StrCat(filename, decodersSuffix));
   }
 
-  /// Initialize the vocabulary from the given `words`.
-  // TODO<joka921> This can be a generic Mixin...
-  void build(const std::vector<std::string>& words,
-             const std::string& filename) {
-    WordWriter writer = makeDiskWriter(filename);
-    for (const auto& word : words) {
-      writer(word);
-    }
-    writer.finish();
-    open(filename);
-  }
-
   // Access to the underlying vocabulary.
   UnderlyingVocabulary& getUnderlyingVocabulary() {
     return underlyingVocabulary_;
diff --git a/src/index/vocabulary/UnicodeVocabulary.h b/src/index/vocabulary/UnicodeVocabulary.h
index 73dc85556c..66aaaf0d67 100644
--- a/src/index/vocabulary/UnicodeVocabulary.h
+++ b/src/index/vocabulary/UnicodeVocabulary.h
@@ -100,14 +100,4 @@ class UnicodeVocabulary {
   const UnicodeComparator& getComparator() const { return _comparator; }
 
   void close() { _underlyingVocabulary.close(); }
-
-  void build(const std::vector<std::string>& v, const std::string& filename) {
-    // TODO<joka921> This is really hacky, we should get rid of it and make the
-    // building consistent for all the vocabularies.
-    if constexpr (requires { _underlyingVocabulary.build(v, filename); }) {
-      _underlyingVocabulary.build(v, filename);
-    } else {
-      AD_FAIL();
-    }
-  }
 };
diff --git a/src/index/vocabulary/VocabularyInMemory.h b/src/index/vocabulary/VocabularyInMemory.h
index a2504ad265..6d68e2a6f6 100644
--- a/src/index/vocabulary/VocabularyInMemory.h
+++ b/src/index/vocabulary/VocabularyInMemory.h
@@ -77,13 +77,6 @@ class VocabularyInMemory
     }
 
     void finish() { writer_.finish(); }
-
-    // The `readableName()` function is only there to have a consistent
-    // interface with the `VocabularyInternalExternal`.
-    std::string readableNameDummy_;
-    std::string& readableName() { return readableNameDummy_; }
-    WordWriter(WordWriter&&) = default;
-    WordWriter& operator=(WordWriter&&) = default;
   };
 
   // Return a `WordWriter` that directly writes the words to the given
@@ -92,6 +85,9 @@ class VocabularyInMemory
   static WordWriter makeDiskWriter(const std::string& filename) {
     return WordWriter{filename};
   }
+
+  // Same as `makeDiskWriter` above, but the result is returned via
+  // `unique_ptr`.
   static std::unique_ptr<WordWriter> makeDiskWriterPtr(
       const std::string& filename) {
     return std::make_unique<WordWriter>(filename);
@@ -100,17 +96,6 @@ class VocabularyInMemory
   /// Clear the vocabulary.
   void close() { _words.clear(); }
 
-  /// Initialize the vocabulary from the given `words`.
-  void build(const std::vector<std::string>& words,
-             const std::string& filename) {
-    WordWriter writer = makeDiskWriter(filename);
-    for (const auto& word : words) {
-      writer(word);
-    }
-    writer.finish();
-    open(filename);
-  }
-
   // Const access to the underlying words.
   auto begin() const { return _words.begin(); }
   auto end() const { return _words.end(); }
diff --git a/src/index/vocabulary/VocabularyInMemoryBinSearch.h b/src/index/vocabulary/VocabularyInMemoryBinSearch.h
index df2314eb81..8367c1e965 100644
--- a/src/index/vocabulary/VocabularyInMemoryBinSearch.h
+++ b/src/index/vocabulary/VocabularyInMemoryBinSearch.h
@@ -79,9 +79,6 @@ class VocabularyInMemoryBinSearch
     // Finish writing and dump all contents that still reside in buffers to
     // disk.
     void finish();
-
-    WordWriter(WordWriter&&) = default;
-    WordWriter& operator=(WordWriter&&) = default;
   };
 
   // Clear the vocabulary.
diff --git a/src/index/vocabulary/VocabularyInternalExternal.h b/src/index/vocabulary/VocabularyInternalExternal.h
index 491381a88e..897b29258d 100644
--- a/src/index/vocabulary/VocabularyInternalExternal.h
+++ b/src/index/vocabulary/VocabularyInternalExternal.h
@@ -112,11 +112,17 @@ class VocabularyInternalExternal {
 
     // Finish writing.
     void finish();
-
-    WordWriter(WordWriter&&) = default;
-    WordWriter& operator=(WordWriter&&) = default;
   };
 
+  // Return a `WordWriter` or (in the second function) a
+  // `unique_ptr<WordWriter>` for the given filename.
+  static WordWriter makeDiskWriter(const std::string& filename) {
+    return WordWriter{filename};
+  }
+  static auto makeDiskWriterPtr(const std::string& filename) {
+    return std::make_unique<WordWriter>(filename);
+  }
+
   /// Clear the vocabulary.
   void close() { internalVocab_.close(); }
 
@@ -151,13 +157,4 @@ class VocabularyInternalExternal {
     return boundFunction(externalVocab_, word, comparator,
                          boundFromInternalVocab.previousIndex(), upperBound);
   }
-
- public:
-  // TODO<joka921> Clean up positions
-  static WordWriter makeDiskWriter(const std::string& filename) {
-    return WordWriter{filename};
-  }
-  static auto makeDiskWriterPtr(const std::string& filename) {
-    return std::make_unique<WordWriter>(filename);
-  }
 };
diff --git a/src/index/vocabulary/VocabularyType.h b/src/index/vocabulary/VocabularyType.h
new file mode 100644
index 0000000000..21474023a3
--- /dev/null
+++ b/src/index/vocabulary/VocabularyType.h
@@ -0,0 +1,56 @@
+//  Copyright 2025, University of Freiburg,
+//  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#pragma once
+
+#include <array>
+#include <string_view>
+
+#include "util/json.h"
+
+namespace ad_utility {
+class VocabularyEnum {
+ public:
+  enum struct Enum { InMemory, OnDisk, CompressedInMemory, CompressedOnDisk };
+
+ private:
+  Enum value_ = Enum::InMemory;
+
+  static constexpr std::array<std::string_view, 4> descriptions{
+      "in-memory-uncompressed", "on-disk-uncompressed", "in-memory-compressed",
+      "on-disk-compressed"};
+
+ public:
+  VocabularyEnum() = default;
+  explicit VocabularyEnum(Enum value) : value_{value} {}
+
+  static VocabularyEnum fromString(std::string_view description) {
+    auto it = ql::ranges::find(descriptions, description);
+    if (it == descriptions.end()) {
+      throw std::runtime_error{
+          absl::StrCat("\"", description,
+                       "\" is not a valid vocabulary type. The currently "
+                       "supported vocabulary types are ",
+                       absl::StrJoin(descriptions, ", "))};
+      ;
+    }
+    return VocabularyEnum{static_cast<Enum>(it - descriptions.begin())};
+  }
+  std::string_view toString() const {
+    return descriptions.at(static_cast<size_t>(value_));
+  }
+
+  Enum value() const { return value_; }
+
+  // Conversion To JSON.
+  friend void to_json(nlohmann::json& j, const VocabularyEnum& vocabEnum) {
+    j = vocabEnum.toString();
+  }
+
+  // Conversion from JSON.
+  friend void from_json(const nlohmann::json& j, VocabularyEnum& vocabEnum) {
+    vocabEnum = VocabularyEnum::fromString(static_cast<std::string>(j));
+  }
+};
+}  // namespace ad_utility
diff --git a/src/index/vocabulary/VocabularyVariant.cpp b/src/index/vocabulary/VocabularyVariant.cpp
index f8dca2b45d..504591116e 100644
--- a/src/index/vocabulary/VocabularyVariant.cpp
+++ b/src/index/vocabulary/VocabularyVariant.cpp
@@ -10,7 +10,7 @@ void VocabularyVariant::open(const std::string& filename) {
   std::visit([&filename](auto& vocab) { vocab.open(filename); }, vocab_);
 }
 
-void VocabularyVariant::open(const std::string& filename, VocabularyEnum type) {
+void VocabularyVariant::open(const std::string& filename, VocabularyType type) {
   resetToType(type);
   open(filename);
 }
@@ -49,25 +49,25 @@ auto VocabularyVariant::makeDiskWriter(const std::string& filename) const
 }
 
 VocabularyVariant::WordWriter VocabularyVariant::makeDiskWriter(
-    const std::string& filename, VocabularyEnum type) {
+    const std::string& filename, VocabularyType type) {
   VocabularyVariant dummyVocab;
   dummyVocab.resetToType(type);
   return dummyVocab.makeDiskWriter(filename);
 }
 
-void VocabularyVariant::resetToType(VocabularyEnum type) {
+void VocabularyVariant::resetToType(VocabularyType type) {
   close();
   switch (type.value()) {
-    case VocabularyEnum::Enum::InMemory:
+    case VocabularyType::Enum::InMemory:
       vocab_.emplace<InMemory>();
       break;
-    case VocabularyEnum::Enum::OnDisk:
+    case VocabularyType::Enum::OnDisk:
       vocab_.emplace<External>();
       break;
-    case VocabularyEnum::Enum::CompressedInMemory:
+    case VocabularyType::Enum::CompressedInMemory:
       vocab_.emplace<CompressedInMemory>();
       break;
-    case VocabularyEnum::Enum::CompressedOnDisk:
+    case VocabularyType::Enum::CompressedOnDisk:
       vocab_.emplace<CompressedExternal>();
       break;
     default:
diff --git a/src/index/vocabulary/VocabularyVariant.h b/src/index/vocabulary/VocabularyVariant.h
index 355fd58abd..7ec162890d 100644
--- a/src/index/vocabulary/VocabularyVariant.h
+++ b/src/index/vocabulary/VocabularyVariant.h
@@ -11,63 +11,24 @@
 #include "index/vocabulary/CompressedVocabulary.h"
 #include "index/vocabulary/VocabularyInMemory.h"
 #include "index/vocabulary/VocabularyInternalExternal.h"
+#include "index/vocabulary/VocabularyType.h"
 #include "util/json.h"
 
-template <typename Variant>
-static constexpr auto getWordWriterTypes(const Variant& var) {
-  return std::apply(
-      []<typename... Vocab>(const Vocab&...) {
-        return std::type_identity<
-            std::variant<std::unique_ptr<typename Vocab::WordWriter>...>>{};
-      },
-      var);
-}
-
-class VocabularyEnum {
- public:
-  enum struct Enum { InMemory, OnDisk, CompressedInMemory, CompressedOnDisk };
-
- private:
-  Enum value_ = Enum::InMemory;
-
-  static constexpr std::array<std::string_view, 4> descriptions{
-      "in-memory-uncompressed", "on-disk-uncompressed", "in-memory-compressed",
-      "on-disk-compressed"};
+namespace polymorphic_vocabulary::detail {
 
- public:
-  VocabularyEnum() = default;
-  explicit VocabularyEnum(Enum value) : value_{value} {}
-
-  static VocabularyEnum fromString(std::string_view description) {
-    auto it = ql::ranges::find(descriptions, description);
-    if (it == descriptions.end()) {
-      throw std::runtime_error{
-          absl::StrCat("\"", description,
-                       "\" is not a valid vocabulary type. The currently "
-                       "supported vocabulary types are ",
-                       absl::StrJoin(descriptions, ", "))};
-      ;
-    }
-    return VocabularyEnum{static_cast<Enum>(it - descriptions.begin())};
-  }
-  std::string_view toString() const {
-    return descriptions.at(static_cast<size_t>(value_));
-  }
+template <typename T>
+struct WriterPointers {};
 
-  Enum value() const { return value_; }
-
-  // Conversion To JSON.
-  friend void to_json(nlohmann::json& j, const VocabularyEnum& vocabEnum) {
-    j = vocabEnum.toString();
-  }
-
-  // Conversion from JSON.
-  friend void from_json(const nlohmann::json& j, VocabularyEnum& vocabEnum) {
-    vocabEnum = VocabularyEnum::fromString(static_cast<std::string>(j));
-  }
+template <typename... Vocabs>
+struct WriterPointers<std::variant<Vocabs...>> {
+  using type = std::variant<std::unique_ptr<typename Vocabs::WordWriter>...>;
 };
+}  // namespace polymorphic_vocabulary::detail
 
 class VocabularyVariant {
+ public:
+  using VocabularyType = ad_utility::VocabularyEnum;
+
  private:
   using InMemory = VocabularyInMemory;
   using External = VocabularyInternalExternal;
@@ -81,9 +42,9 @@ class VocabularyVariant {
   Variant vocab_;
 
  public:
-  void resetToType(VocabularyEnum);
+  void resetToType(VocabularyType);
   void open(const std::string& filename);
-  void open(const std::string& filename, VocabularyEnum type);
+  void open(const std::string& filename, VocabularyType type);
   void close();
   size_t size() const;
   std::string operator[](uint64_t i) const;
@@ -124,7 +85,8 @@ class VocabularyVariant {
         vocab_);
   }
 
-  using WordWriters = decltype(getWordWriterTypes(std::declval<Tuple>()))::type;
+  using WordWriters =
+      polymorphic_vocabulary::detail::WriterPointers<Variant>::type;
 
   class WordWriter {
     WordWriters writer_;
@@ -139,5 +101,5 @@ class VocabularyVariant {
 
   WordWriter makeDiskWriter(const std::string& filename) const;
   static WordWriter makeDiskWriter(const std::string& filename,
-                                   VocabularyEnum type);
+                                   VocabularyType type);
 };
diff --git a/src/util/ProgramOptionsHelpers.h b/src/util/ProgramOptionsHelpers.h
index 6c25565287..0d3ede6a1a 100644
--- a/src/util/ProgramOptionsHelpers.h
+++ b/src/util/ProgramOptionsHelpers.h
@@ -49,6 +49,21 @@ inline void validate(boost::any& v, const std::vector<std::string>& values,
   v = NonNegative{boost::lexical_cast<size_t>(s)};
 }
 
+// This function is required  to use `std::optional` in
+// `boost::program_options`.
+template <typename T>
+void validate(boost::any& v, const std::vector<std::string>& values,
+              std::optional<T>*, int) {
+  // First parse as a T
+  T* dummy = nullptr;
+  // using namespace boost::program_options;
+  validate(v, values, dummy, 0);
+
+  // Wrap the T inside std::optional
+  AD_CONTRACT_CHECK(!v.empty());
+  v = std::optional<T>(boost::any_cast<T>(v));
+}
+
 // This function is required  to use `MemorySize` in `boost::program_options`.
 inline void validate(boost::any& v, const std::vector<std::string>& values,
                      MemorySize*, int) {
@@ -106,8 +121,6 @@ class ParameterToProgramOptionFactory {
   }
 };
 
-}  // namespace ad_utility
-
 // This function is required  to use `VocabularyEnum` in
 // `boost::program_options`.
 inline void validate(boost::any& v, const std::vector<std::string>& values,
@@ -124,24 +137,6 @@ inline void validate(boost::any& v, const std::vector<std::string>& values,
   v = VocabularyEnum::fromString(s);
 }
 
-// This function is required  to use `std::optional` in
-// `boost::program_options`.
-// TODO<joka921> We should find a solution that doesn't require  opening
-// namespace `std`, for example we could put all types + this function into the
-// `ad_utility`namespace.
-namespace std {
-template <typename T>
-void validate(boost::any& v, const std::vector<std::string>& values,
-              std::optional<T>*, int) {
-  // First parse as a T
-  T* dummy = nullptr;
-  // using namespace boost::program_options;
-  validate(v, values, dummy, 0);
-
-  // Wrap the T inside std::optional
-  AD_CONTRACT_CHECK(!v.empty());
-  v = std::optional<T>(boost::any_cast<T>(v));
-}
-}  // namespace std
+}  // namespace ad_utility
 
 #endif  // QLEVER_PROGRAMOPTIONSHELPERS_H
diff --git a/src/util/Serializer/SerializeVector.h b/src/util/Serializer/SerializeVector.h
index d093f4c02f..982e43e2ff 100644
--- a/src/util/Serializer/SerializeVector.h
+++ b/src/util/Serializer/SerializeVector.h
@@ -75,9 +75,6 @@ class VectorIncrementalSerializer {
   }
 
   ~VectorIncrementalSerializer() { finish(); }
-  VectorIncrementalSerializer(VectorIncrementalSerializer&&) = default;
-  VectorIncrementalSerializer& operator=(VectorIncrementalSerializer&&) =
-      default;
 };
 
 }  // namespace ad_utility::serialization

From 3e7f49476c5f344bd44a5a447a8ee20653c4adcf Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 5 Feb 2025 18:53:41 +0100
Subject: [PATCH 15/25] Several refactorings.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/index/IndexBuilderMain.cpp                |  2 +-
 src/index/IndexImpl.cpp                       |  4 +-
 src/index/IndexImpl.h                         |  6 +-
 src/index/Vocabulary.h                        |  8 +-
 src/index/vocabulary/CMakeLists.txt           |  2 +-
 .../vocabulary/PolymorphicVocabulary.cpp      | 90 +++++++++++++++++++
 ...ularyVariant.h => PolymorphicVocabulary.h} | 53 +++++++++--
 src/index/vocabulary/VocabularyType.h         | 16 ++--
 src/index/vocabulary/VocabularyVariant.cpp    | 76 ----------------
 src/util/ProgramOptionsHelpers.h              |  7 +-
 10 files changed, 158 insertions(+), 106 deletions(-)
 create mode 100644 src/index/vocabulary/PolymorphicVocabulary.cpp
 rename src/index/vocabulary/{VocabularyVariant.h => PolymorphicVocabulary.h} (56%)
 delete mode 100644 src/index/vocabulary/VocabularyVariant.cpp

diff --git a/src/index/IndexBuilderMain.cpp b/src/index/IndexBuilderMain.cpp
index 1583a9a14f..c75fd5d427 100644
--- a/src/index/IndexBuilderMain.cpp
+++ b/src/index/IndexBuilderMain.cpp
@@ -167,7 +167,7 @@ int main(int argc, char** argv) {
   bool addWordsFromLiterals = false;
   std::optional<ad_utility::MemorySize> stxxlMemory;
   std::optional<ad_utility::MemorySize> parserBufferSize;
-  std::optional<ad_utility::VocabularyEnum> vocabType;
+  std::optional<ad_utility::VocabularyType> vocabType;
   // VocabularyEnum vocabType;
   optind = 1;
 
diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp
index dd29e6d57a..3ad2e997ec 100644
--- a/src/index/IndexImpl.cpp
+++ b/src/index/IndexImpl.cpp
@@ -1135,8 +1135,8 @@ void IndexImpl::readConfiguration() {
 
   // TODO<joka921> Comment and also write the configuration.
   // The default value is the one the used to be the only.
-  ad_utility::VocabularyEnum vocabType(
-      ad_utility::VocabularyEnum::Enum::CompressedOnDisk);
+  ad_utility::VocabularyType vocabType(
+      ad_utility::VocabularyType::Enum::CompressedOnDisk);
   loadDataMember("vocabulary-type", vocabType, vocabType);
   vocab_.resetToType(vocabType);
 
diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h
index 7c4a937fdb..aaa6d0a1f1 100644
--- a/src/index/IndexImpl.h
+++ b/src/index/IndexImpl.h
@@ -192,8 +192,8 @@ class IndexImpl {
   std::optional<Id> idOfHasPatternDuringIndexBuilding_;
   std::optional<Id> idOfInternalGraphDuringIndexBuilding_;
 
-  ad_utility::VocabularyEnum vocabularyTypeForIndexBuilding_{
-      ad_utility::VocabularyEnum::Enum::CompressedOnDisk};
+  ad_utility::VocabularyType vocabularyTypeForIndexBuilding_{
+      ad_utility::VocabularyType::Enum::CompressedOnDisk};
 
   // BlankNodeManager, initialized during `readConfiguration`
   std::unique_ptr<ad_utility::BlankNodeManager> blankNodeManager_{nullptr};
@@ -278,7 +278,7 @@ class IndexImpl {
     return deltaTriples_.value();
   }
 
-  void setVocabularyTypeForIndexBuilding(ad_utility::VocabularyEnum type) {
+  void setVocabularyTypeForIndexBuilding(ad_utility::VocabularyType type) {
     vocabularyTypeForIndexBuilding_ = type;
     configurationJson_["vocabulary-type"] = type;
   }
diff --git a/src/index/Vocabulary.h b/src/index/Vocabulary.h
index 898233e284..46af9c8c56 100644
--- a/src/index/Vocabulary.h
+++ b/src/index/Vocabulary.h
@@ -24,10 +24,10 @@
 #include "index/StringSortComparator.h"
 #include "index/VocabularyOnDisk.h"
 #include "index/vocabulary/CompressedVocabulary.h"
+#include "index/vocabulary/PolymorphicVocabulary.h"
 #include "index/vocabulary/UnicodeVocabulary.h"
 #include "index/vocabulary/VocabularyInMemory.h"
 #include "index/vocabulary/VocabularyInternalExternal.h"
-#include "index/vocabulary/VocabularyVariant.h"
 #include "util/Exception.h"
 #include "util/HashMap.h"
 #include "util/HashSet.h"
@@ -236,8 +236,8 @@ class Vocabulary {
   }
 
   // TODO<joka921> Comment.
-  void resetToType(ad_utility::VocabularyEnum type) {
-    if constexpr (std::is_same_v<UnderlyingVocabulary, VocabularyVariant>) {
+  void resetToType(ad_utility::VocabularyType type) {
+    if constexpr (std::is_same_v<UnderlyingVocabulary, PolymorphicVocabulary>) {
       vocabulary_.getUnderlyingVocabulary().resetToType(type);
     }
   }
@@ -266,7 +266,7 @@ using UnderlyingVocabRdfsVocabulary = CompressedVocabulary<VocabStorage>;
 */
 
 // TODO<joka921> Change this place.
-using UnderlyingVocabRdfsVocabulary = VocabularyVariant;
+using UnderlyingVocabRdfsVocabulary = PolymorphicVocabulary;
 using UnderlyingVocabTextVocabulary = VocabularyInMemory;
 }  // namespace detail
 
diff --git a/src/index/vocabulary/CMakeLists.txt b/src/index/vocabulary/CMakeLists.txt
index 151f8ec18c..910ad61c3a 100644
--- a/src/index/vocabulary/CMakeLists.txt
+++ b/src/index/vocabulary/CMakeLists.txt
@@ -1,4 +1,4 @@
 add_library(vocabulary VocabularyInMemory.h VocabularyInMemory.cpp VocabularyInMemoryBinSearch.cpp VocabularyInternalExternal.cpp
-        VocabularyVariant.cpp
+        PolymorphicVocabulary.cpp
         VocabularyType.h)
 qlever_target_link_libraries(vocabulary)
diff --git a/src/index/vocabulary/PolymorphicVocabulary.cpp b/src/index/vocabulary/PolymorphicVocabulary.cpp
new file mode 100644
index 0000000000..1b9936afee
--- /dev/null
+++ b/src/index/vocabulary/PolymorphicVocabulary.cpp
@@ -0,0 +1,90 @@
+//  Copyright 2025, University of Freiburg,
+//  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#include "index/vocabulary/PolymorphicVocabulary.h"
+
+#include <engine/CallFixedSize.h>
+
+// _____________________________________________________________________________
+void PolymorphicVocabulary::open(const std::string& filename) {
+  std::visit([&filename](auto& vocab) { vocab.open(filename); }, vocab_);
+}
+
+// _____________________________________________________________________________
+void PolymorphicVocabulary::open(const std::string& filename,
+                                 VocabularyType type) {
+  resetToType(type);
+  open(filename);
+}
+
+// _____________________________________________________________________________
+void PolymorphicVocabulary::close() {
+  std::visit([](auto& vocab) { return vocab.close(); }, vocab_);
+}
+
+// _____________________________________________________________________________
+size_t PolymorphicVocabulary::size() const {
+  return std::visit([](auto& vocab) { return vocab.size(); }, vocab_);
+}
+
+// _____________________________________________________________________________
+std::string PolymorphicVocabulary::operator[](uint64_t i) const {
+  return std::visit([i](auto& vocab) { return std::string{vocab[i]}; }, vocab_);
+}
+
+// _____________________________________________________________________________
+PolymorphicVocabulary::WordWriter::WordWriter(WordWriters writer)
+    : writer_(std::move(writer)) {}
+
+// _____________________________________________________________________________
+void PolymorphicVocabulary::WordWriter::finish() {
+  std::visit([](auto& writer) { return writer->finish(); }, writer_);
+}
+
+// _____________________________________________________________________________
+void PolymorphicVocabulary::WordWriter::operator()(std::string_view word,
+                                                   bool isExternal) {
+  std::visit(
+      [&word, isExternal](auto& writer) { return (*writer)(word, isExternal); },
+      writer_);
+}
+
+// _____________________________________________________________________________
+auto PolymorphicVocabulary::makeDiskWriter(const std::string& filename) const
+    -> WordWriter {
+  return WordWriter{std::visit(
+      [&filename](auto& vocab) -> WordWriters {
+        return vocab.makeDiskWriterPtr(filename);
+      },
+      vocab_)};
+}
+
+// _____________________________________________________________________________
+PolymorphicVocabulary::WordWriter PolymorphicVocabulary::makeDiskWriter(
+    const std::string& filename, VocabularyType type) {
+  PolymorphicVocabulary dummyVocab;
+  dummyVocab.resetToType(type);
+  return dummyVocab.makeDiskWriter(filename);
+}
+
+// _____________________________________________________________________________
+void PolymorphicVocabulary::resetToType(VocabularyType type) {
+  close();
+  switch (type.value()) {
+    case VocabularyType::Enum::InMemory:
+      vocab_.emplace<InMemory>();
+      break;
+    case VocabularyType::Enum::OnDisk:
+      vocab_.emplace<External>();
+      break;
+    case VocabularyType::Enum::CompressedInMemory:
+      vocab_.emplace<CompressedInMemory>();
+      break;
+    case VocabularyType::Enum::CompressedOnDisk:
+      vocab_.emplace<CompressedExternal>();
+      break;
+    default:
+      AD_FAIL();
+  }
+}
diff --git a/src/index/vocabulary/VocabularyVariant.h b/src/index/vocabulary/PolymorphicVocabulary.h
similarity index 56%
rename from src/index/vocabulary/VocabularyVariant.h
rename to src/index/vocabulary/PolymorphicVocabulary.h
index 7ec162890d..4a18e57465 100644
--- a/src/index/vocabulary/VocabularyVariant.h
+++ b/src/index/vocabulary/PolymorphicVocabulary.h
@@ -16,6 +16,10 @@
 
 namespace polymorphic_vocabulary::detail {
 
+// For `T = std::variant<VocabType1, VocabType2, ...` `WriterPointers<T> =
+// std::variant<unique_ptr<VocabType1::WordWriter>,
+// unique_ptr<VocabType2::WordWriter>, ...>`. This is used in the implementation
+// of the `PolymorphicVocabulary` below.
 template <typename T>
 struct WriterPointers {};
 
@@ -25,30 +29,51 @@ struct WriterPointers<std::variant<Vocabs...>> {
 };
 }  // namespace polymorphic_vocabulary::detail
 
-class VocabularyVariant {
+// A vocabulary that can at runtime choose between different vocabulary
+// implementations. The only restriction is, that a vocabulary can only be read
+// from disk with the same implementation that it was written to.
+class PolymorphicVocabulary {
  public:
-  using VocabularyType = ad_utility::VocabularyEnum;
+  using VocabularyType = ad_utility::VocabularyType;
 
  private:
+  // Type aliases for all the currently supported vocabularies. If another
+  // vocabulary is added, don't forget to also register it in the
+  // `VocabularyType` enum.
   using InMemory = VocabularyInMemory;
   using External = VocabularyInternalExternal;
   using CompressedInMemory = CompressedVocabulary<InMemory>;
   using CompressedExternal = CompressedVocabulary<External>;
   using Variant =
       std::variant<InMemory, External, CompressedExternal, CompressedInMemory>;
-  using Tuple =
-      std::tuple<InMemory, External, CompressedExternal, CompressedInMemory>;
 
+  // In this variant we store the actual vocabulary.
   Variant vocab_;
 
  public:
-  void resetToType(VocabularyType);
-  void open(const std::string& filename);
+  // Read a vocabulary with the given `type` from the file with the `filename`.
+  // A vocabulary with the corresponding `type` must have been previously
+  // written to that file.
   void open(const std::string& filename, VocabularyType type);
+
+  // Close the vocabulary if it is open, and set the underlying vocabulary
+  // implementation according to the `type` without opening the vocabulary.
+  void resetToType(VocabularyType type);
+
+  // Same as the overload of `open` above, but expects that the correct
+  // `VocabularyType` has already been set via `resetToType` above.
+  void open(const std::string& filename);
+
+  // Close the vocabulary s.t. it consumes no more RAM.
   void close();
+
+  // Return the total number of words in the vocabulary.
   size_t size() const;
+
+  // Return the `i`-the word, throw of `i` is out of bounds.
   std::string operator[](uint64_t i) const;
 
+  // Same as `std::lower_bound`, return the smallest entry >= `word`.
   template <typename String, typename Comp>
   WordAndIndex lower_bound(const String& word, Comp comp) const {
     return std::visit(
@@ -58,6 +83,8 @@ class VocabularyVariant {
         vocab_);
   }
 
+  // Same as `lower_bound` above, but the comparator compares a `word` and an
+  // `iterator` instead of two words.
   template <typename String, typename Comp>
   WordAndIndex lower_bound_iterator(const String& word, Comp comp) const {
     return std::visit(
@@ -67,6 +94,7 @@ class VocabularyVariant {
         vocab_);
   }
 
+  // Analogous to `lower_bound` (see above).
   template <typename String, typename Comp>
   WordAndIndex upper_bound(const String& word, Comp comp) const {
     return std::visit(
@@ -76,6 +104,7 @@ class VocabularyVariant {
         vocab_);
   }
 
+  // Analogous to `lower_bound_iterator` (see above).
   template <typename String, typename Comp>
   WordAndIndex upper_bound_iterator(const String& word, Comp comp) const {
     return std::visit(
@@ -88,18 +117,28 @@ class VocabularyVariant {
   using WordWriters =
       polymorphic_vocabulary::detail::WriterPointers<Variant>::type;
 
+  // The `WordWriter` is used to write a vocabulary to disk word by word (in
+  // sorted order).
   class WordWriter {
     WordWriters writer_;
 
    public:
+    // Constructor, used by the `makeDiskWriter` functions below.
     explicit WordWriter(WordWriters);
 
+    // This function has to be called after the last word has been written.
     void finish();
 
+    // Write the next word to the vocabulary.
     void operator()(std::string_view word, bool isExternal);
   };
 
-  WordWriter makeDiskWriter(const std::string& filename) const;
+  // Create a `WordWriter` that will create a vocabulary with the given `type`
+  // at the given `filename`.
   static WordWriter makeDiskWriter(const std::string& filename,
                                    VocabularyType type);
+
+  // Same as above, but the `VocabularyType` is the currently active type of
+  // `this`.
+  WordWriter makeDiskWriter(const std::string& filename) const;
 };
diff --git a/src/index/vocabulary/VocabularyType.h b/src/index/vocabulary/VocabularyType.h
index 21474023a3..4e65a481df 100644
--- a/src/index/vocabulary/VocabularyType.h
+++ b/src/index/vocabulary/VocabularyType.h
@@ -10,7 +10,7 @@
 #include "util/json.h"
 
 namespace ad_utility {
-class VocabularyEnum {
+class VocabularyType {
  public:
   enum struct Enum { InMemory, OnDisk, CompressedInMemory, CompressedOnDisk };
 
@@ -22,10 +22,10 @@ class VocabularyEnum {
       "on-disk-compressed"};
 
  public:
-  VocabularyEnum() = default;
-  explicit VocabularyEnum(Enum value) : value_{value} {}
+  VocabularyType() = default;
+  explicit VocabularyType(Enum value) : value_{value} {}
 
-  static VocabularyEnum fromString(std::string_view description) {
+  static VocabularyType fromString(std::string_view description) {
     auto it = ql::ranges::find(descriptions, description);
     if (it == descriptions.end()) {
       throw std::runtime_error{
@@ -35,7 +35,7 @@ class VocabularyEnum {
                        absl::StrJoin(descriptions, ", "))};
       ;
     }
-    return VocabularyEnum{static_cast<Enum>(it - descriptions.begin())};
+    return VocabularyType{static_cast<Enum>(it - descriptions.begin())};
   }
   std::string_view toString() const {
     return descriptions.at(static_cast<size_t>(value_));
@@ -44,13 +44,13 @@ class VocabularyEnum {
   Enum value() const { return value_; }
 
   // Conversion To JSON.
-  friend void to_json(nlohmann::json& j, const VocabularyEnum& vocabEnum) {
+  friend void to_json(nlohmann::json& j, const VocabularyType& vocabEnum) {
     j = vocabEnum.toString();
   }
 
   // Conversion from JSON.
-  friend void from_json(const nlohmann::json& j, VocabularyEnum& vocabEnum) {
-    vocabEnum = VocabularyEnum::fromString(static_cast<std::string>(j));
+  friend void from_json(const nlohmann::json& j, VocabularyType& vocabEnum) {
+    vocabEnum = VocabularyType::fromString(static_cast<std::string>(j));
   }
 };
 }  // namespace ad_utility
diff --git a/src/index/vocabulary/VocabularyVariant.cpp b/src/index/vocabulary/VocabularyVariant.cpp
deleted file mode 100644
index 504591116e..0000000000
--- a/src/index/vocabulary/VocabularyVariant.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-//  Copyright 2025, University of Freiburg,
-//  Chair of Algorithms and Data Structures.
-//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
-
-#include "index/vocabulary/VocabularyVariant.h"
-
-#include <engine/CallFixedSize.h>
-
-void VocabularyVariant::open(const std::string& filename) {
-  std::visit([&filename](auto& vocab) { vocab.open(filename); }, vocab_);
-}
-
-void VocabularyVariant::open(const std::string& filename, VocabularyType type) {
-  resetToType(type);
-  open(filename);
-}
-
-void VocabularyVariant::close() {
-  return std::visit([](auto& vocab) { return vocab.close(); }, vocab_);
-}
-size_t VocabularyVariant::size() const {
-  return std::visit([](auto& vocab) { return vocab.size(); }, vocab_);
-}
-std::string VocabularyVariant::operator[](uint64_t i) const {
-  return std::visit([i](auto& vocab) { return std::string{vocab[i]}; }, vocab_);
-}
-
-VocabularyVariant::WordWriter::WordWriter(WordWriters writer)
-    : writer_(std::move(writer)) {}
-
-void VocabularyVariant::WordWriter::finish() {
-  std::visit([](auto& writer) { return writer->finish(); }, writer_);
-}
-
-void VocabularyVariant::WordWriter::operator()(std::string_view word,
-                                               bool isExternal) {
-  std::visit(
-      [&word, isExternal](auto& writer) { return (*writer)(word, isExternal); },
-      writer_);
-}
-
-auto VocabularyVariant::makeDiskWriter(const std::string& filename) const
-    -> WordWriter {
-  return WordWriter{std::visit(
-      [&filename](auto& vocab) -> WordWriters {
-        return vocab.makeDiskWriterPtr(filename);
-      },
-      vocab_)};
-}
-
-VocabularyVariant::WordWriter VocabularyVariant::makeDiskWriter(
-    const std::string& filename, VocabularyType type) {
-  VocabularyVariant dummyVocab;
-  dummyVocab.resetToType(type);
-  return dummyVocab.makeDiskWriter(filename);
-}
-
-void VocabularyVariant::resetToType(VocabularyType type) {
-  close();
-  switch (type.value()) {
-    case VocabularyType::Enum::InMemory:
-      vocab_.emplace<InMemory>();
-      break;
-    case VocabularyType::Enum::OnDisk:
-      vocab_.emplace<External>();
-      break;
-    case VocabularyType::Enum::CompressedInMemory:
-      vocab_.emplace<CompressedInMemory>();
-      break;
-    case VocabularyType::Enum::CompressedOnDisk:
-      vocab_.emplace<CompressedExternal>();
-      break;
-    default:
-      AD_FAIL();
-  }
-}
diff --git a/src/util/ProgramOptionsHelpers.h b/src/util/ProgramOptionsHelpers.h
index 0d3ede6a1a..a86a850c35 100644
--- a/src/util/ProgramOptionsHelpers.h
+++ b/src/util/ProgramOptionsHelpers.h
@@ -8,11 +8,10 @@
 #include <boost/program_options.hpp>
 #include <vector>
 
+#include "index/vocabulary/VocabularyType.h"
 #include "util/Concepts.h"
 #include "util/MemorySize/MemorySize.h"
 #include "util/Parameters.h"
-// TODO<joka921> only include the enum.
-#include "index/vocabulary/VocabularyVariant.h"
 namespace ad_utility {
 
 // An implicit wrapper that can be implicitly converted to and from `size_t`.
@@ -124,7 +123,7 @@ class ParameterToProgramOptionFactory {
 // This function is required  to use `VocabularyEnum` in
 // `boost::program_options`.
 inline void validate(boost::any& v, const std::vector<std::string>& values,
-                     VocabularyEnum*, int) {
+                     VocabularyType*, int) {
   using namespace boost::program_options;
 
   // Make sure no previous assignment to 'v' was made.
@@ -134,7 +133,7 @@ inline void validate(boost::any& v, const std::vector<std::string>& values,
   const string& s = validators::get_single_string(values);
 
   // Convert the string to `MemorySize` and put it into the option.
-  v = VocabularyEnum::fromString(s);
+  v = VocabularyType::fromString(s);
 }
 
 }  // namespace ad_utility

From 825f8bfb754ef2e83e1f6aed374207aa9b331d35 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 5 Feb 2025 19:09:04 +0100
Subject: [PATCH 16/25] Some additional fixes and comments.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/global/Pattern.h                  | 3 ---
 src/index/IndexBuilderMain.cpp        | 9 ++++-----
 src/index/IndexImpl.cpp               | 2 --
 src/index/IndexImpl.h                 | 2 ++
 src/index/vocabulary/VocabularyType.h | 7 +++++--
 5 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/global/Pattern.h b/src/global/Pattern.h
index 9c37eb39ce..1005add22d 100644
--- a/src/global/Pattern.h
+++ b/src/global/Pattern.h
@@ -194,9 +194,6 @@ struct CompactStringVectorWriter {
     commonInitialization();
   }
 
-  CompactStringVectorWriter(CompactStringVectorWriter&&) = default;
-  CompactStringVectorWriter& operator=(CompactStringVectorWriter&&) = default;
-
   void push(const data_type* data, size_t elementSize) {
     AD_CONTRACT_CHECK(!_finished);
     _offsets.push_back(_nextOffset);
diff --git a/src/index/IndexBuilderMain.cpp b/src/index/IndexBuilderMain.cpp
index c75fd5d427..29b11eae9b 100644
--- a/src/index/IndexBuilderMain.cpp
+++ b/src/index/IndexBuilderMain.cpp
@@ -168,7 +168,6 @@ int main(int argc, char** argv) {
   std::optional<ad_utility::MemorySize> stxxlMemory;
   std::optional<ad_utility::MemorySize> parserBufferSize;
   std::optional<ad_utility::VocabularyType> vocabType;
-  // VocabularyEnum vocabType;
   optind = 1;
 
   Index index{ad_utility::makeUnlimitedAllocator<Id>()};
@@ -228,8 +227,9 @@ int main(int argc, char** argv) {
       "Only build the PSO and POS permutations. This is faster, but then "
       "queries with predicate variables are not supported");
   add("vocabulary-type", po::value(&vocabType),
-      "The vocabulary implementation for strings in qlever, can be any of ... "
-      "(TODO joka)");
+      absl::StrCat(
+          "The vocabulary implementation for strings in qlever, can be any of ",
+          ad_utility::VocabularyType::getListOfSupportedValues()));
 
   // Options for the index building process.
   add("stxxl-memory,m", po::value(&stxxlMemory),
@@ -262,11 +262,10 @@ int main(int argc, char** argv) {
   if (parserBufferSize.has_value()) {
     index.parserBufferSize() = parserBufferSize.value();
   }
-  /*
+
   if (vocabType.has_value()) {
     index.getImpl().setVocabularyTypeForIndexBuilding(vocabType.value());
   }
-  */
 
   // If no text index name was specified, take the part of the wordsfile after
   // the last slash.
diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp
index 3ad2e997ec..9d8f89c19f 100644
--- a/src/index/IndexImpl.cpp
+++ b/src/index/IndexImpl.cpp
@@ -1133,8 +1133,6 @@ void IndexImpl::readConfiguration() {
   loadDataMember("num-triples", numTriples_, NumNormalAndInternal{});
   loadDataMember("num-non-literals-text-index", nofNonLiteralsInTextIndex_, 0);
 
-  // TODO<joka921> Comment and also write the configuration.
-  // The default value is the one the used to be the only.
   ad_utility::VocabularyType vocabType(
       ad_utility::VocabularyType::Enum::CompressedOnDisk);
   loadDataMember("vocabulary-type", vocabType, vocabType);
diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h
index aaa6d0a1f1..a8828f2236 100644
--- a/src/index/IndexImpl.h
+++ b/src/index/IndexImpl.h
@@ -192,6 +192,8 @@ class IndexImpl {
   std::optional<Id> idOfHasPatternDuringIndexBuilding_;
   std::optional<Id> idOfInternalGraphDuringIndexBuilding_;
 
+  // The vocabulary type that is used (only relevant during index building).
+  // The default is chosen s.t. the compatibility to old index builds.
   ad_utility::VocabularyType vocabularyTypeForIndexBuilding_{
       ad_utility::VocabularyType::Enum::CompressedOnDisk};
 
diff --git a/src/index/vocabulary/VocabularyType.h b/src/index/vocabulary/VocabularyType.h
index 4e65a481df..a6b0eacfb4 100644
--- a/src/index/vocabulary/VocabularyType.h
+++ b/src/index/vocabulary/VocabularyType.h
@@ -32,11 +32,14 @@ class VocabularyType {
           absl::StrCat("\"", description,
                        "\" is not a valid vocabulary type. The currently "
                        "supported vocabulary types are ",
-                       absl::StrJoin(descriptions, ", "))};
-      ;
+                       getListOfSupportedValues())};
     }
     return VocabularyType{static_cast<Enum>(it - descriptions.begin())};
   }
+
+  static std::string getListOfSupportedValues() {
+    return absl::StrJoin(descriptions, ", ");
+  }
   std::string_view toString() const {
     return descriptions.at(static_cast<size_t>(value_));
   }

From 066ddf62c50de6add776499d08f1e643750d3a71 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 5 Feb 2025 19:29:04 +0100
Subject: [PATCH 17/25] Refactoring there and back again.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/index/IndexImpl.h  |  3 ++-
 src/index/Vocabulary.h | 18 +-----------------
 2 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h
index a8828f2236..f3aba12cbb 100644
--- a/src/index/IndexImpl.h
+++ b/src/index/IndexImpl.h
@@ -193,7 +193,6 @@ class IndexImpl {
   std::optional<Id> idOfInternalGraphDuringIndexBuilding_;
 
   // The vocabulary type that is used (only relevant during index building).
-  // The default is chosen s.t. the compatibility to old index builds.
   ad_utility::VocabularyType vocabularyTypeForIndexBuilding_{
       ad_utility::VocabularyType::Enum::CompressedOnDisk};
 
@@ -280,6 +279,8 @@ class IndexImpl {
     return deltaTriples_.value();
   }
 
+  // See the documentation of the `vocabularyTypeForIndexBuilding_` member for
+  // details.
   void setVocabularyTypeForIndexBuilding(ad_utility::VocabularyType type) {
     vocabularyTypeForIndexBuilding_ = type;
     configurationJson_["vocabulary-type"] = type;
diff --git a/src/index/Vocabulary.h b/src/index/Vocabulary.h
index 46af9c8c56..7587275118 100644
--- a/src/index/Vocabulary.h
+++ b/src/index/Vocabulary.h
@@ -20,7 +20,6 @@
 #include "global/Constants.h"
 #include "global/Id.h"
 #include "global/Pattern.h"
-#include "index/CompressedString.h"
 #include "index/StringSortComparator.h"
 #include "index/VocabularyOnDisk.h"
 #include "index/vocabulary/CompressedVocabulary.h"
@@ -60,9 +59,7 @@ inline std::ostream& operator<<(std::ostream& stream,
 }
 
 // A vocabulary. Wraps a vector of strings and provides additional methods for
-// retrieval. Template parameters that are supported are:
-// std::string -> no compression is applied
-// CompressedString -> prefix compression is applied
+// retrieval.
 template <typename UnderlyingVocabulary, typename ComparatorType,
           typename IndexT>
 class Vocabulary {
@@ -105,19 +102,6 @@ class Vocabulary {
   vector<std::string> internalizedLangs_;
   vector<std::string> externalizedPrefixes_{""};
 
-  //  using UnderlyingVocabulary = VocabularyInMemory;
-  /*
-  using UnderlyingVocabulary =
-      std::conditional_t<isCompressed_,
-                         CompressedVocabulary<VocabularyInternalExternal>,
-                         VocabularyInMemory>;
-      */
-  /*
-  using UnderlyingVocabulary =
-      std::conditional_t<isCompressed_,
-                         CompressedVocabulary<VocabularyInMemory>,
-                         VocabularyInMemory>;
-                         */
   using VocabularyWithUnicodeComparator =
       UnicodeVocabulary<UnderlyingVocabulary, ComparatorType>;
 

From b9948ff68f580224c79bfe1cb8c590da3cbce99e Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Wed, 5 Feb 2025 19:37:16 +0100
Subject: [PATCH 18/25] Fix compilation.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/global/Pattern.h           | 3 +++
 src/index/IndexBuilderMain.cpp | 8 ++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/global/Pattern.h b/src/global/Pattern.h
index 1005add22d..28ca2a9c0e 100644
--- a/src/global/Pattern.h
+++ b/src/global/Pattern.h
@@ -227,6 +227,9 @@ struct CompactStringVectorWriter {
     }
   }
 
+  CompactStringVectorWriter(CompactStringVectorWriter&&) = default;
+  CompactStringVectorWriter& operator=(CompactStringVectorWriter&&) = default;
+
  private:
   // Has to be run by all the constructors
   void commonInitialization() {
diff --git a/src/index/IndexBuilderMain.cpp b/src/index/IndexBuilderMain.cpp
index 29b11eae9b..034e76050d 100644
--- a/src/index/IndexBuilderMain.cpp
+++ b/src/index/IndexBuilderMain.cpp
@@ -226,10 +226,10 @@ int main(int argc, char** argv) {
   add("only-pso-and-pos-permutations,o", po::bool_switch(&onlyPsoAndPos),
       "Only build the PSO and POS permutations. This is faster, but then "
       "queries with predicate variables are not supported");
-  add("vocabulary-type", po::value(&vocabType),
-      absl::StrCat(
-          "The vocabulary implementation for strings in qlever, can be any of ",
-          ad_utility::VocabularyType::getListOfSupportedValues()));
+  auto msg = absl::StrCat(
+      "The vocabulary implementation for strings in qlever, can be any of ",
+      ad_utility::VocabularyType::getListOfSupportedValues());
+  add("vocabulary-type", po::value(&vocabType), msg.c_str());
 
   // Options for the index building process.
   add("stxxl-memory,m", po::value(&stxxlMemory),

From b1b884e4fb482b030b6b9b24ed600f332412dbfd Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 6 Feb 2025 12:15:00 +0100
Subject: [PATCH 19/25] Feed this to the tools...

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 CMakeLists.txt                                | 11 ++---
 src/global/Pattern.h                          |  5 ++-
 src/index/CMakeLists.txt                      |  2 +-
 src/index/Vocabulary.h                        | 37 ++++++---------
 src/index/vocabulary/CMakeLists.txt           |  4 +-
 src/index/vocabulary/PolymorphicVocabulary.h  | 21 ---------
 .../vocabulary/VocabularyInternalExternal.h   |  2 +-
 .../{ => vocabulary}/VocabularyOnDisk.cpp     |  2 +-
 src/index/{ => vocabulary}/VocabularyOnDisk.h |  0
 src/index/vocabulary/VocabularyType.h         | 45 ++++++++++++++++---
 src/util/File.h                               |  6 ++-
 src/util/ProgramOptionsHelpers.h              |  2 +-
 test/CMakeLists.txt                           |  2 +-
 test/StringSortComparatorTest.cpp             |  5 +++
 test/index/vocabulary/CMakeLists.txt          | 14 +++---
 .../vocabulary/CompressedVocabularyTest.cpp   |  2 +-
 .../vocabulary/PolymorphicVocabularyTest.cpp  | 42 +++++++++++++++++
 .../VocabularyInternalExternalTest.cpp        |  2 +-
 .../index/vocabulary/VocabularyOnDiskTest.cpp |  2 +-
 test/index/vocabulary/VocabularyTypeTest.cpp  | 36 +++++++++++++++
 test/util/IndexTestHelpers.cpp                |  2 +
 21 files changed, 169 insertions(+), 75 deletions(-)
 rename src/index/{ => vocabulary}/VocabularyOnDisk.cpp (98%)
 rename src/index/{ => vocabulary}/VocabularyOnDisk.h (100%)
 create mode 100644 test/index/vocabulary/PolymorphicVocabularyTest.cpp
 create mode 100644 test/index/vocabulary/VocabularyTypeTest.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 67b2feb62b..9402201159 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,14 +203,9 @@ if (${USE_CPP_17_BACKPORTS})
     add_definitions("-DQLEVER_CPP_17 -DCPP_CXX_CONCEPTS=0")
 endif()
 
-set(VOCAB_IN_MEMORY OFF CACHE BOOL "Store QLever's vocabulary completely in RAM")
-if (${VOCAB_IN_MEMORY})
-    add_definitions("-D_QLEVER_VOCAB_IN_MEMORY")
-endif ()
-
-set(ENABLE_VOCAB_COMPRESSION ON CACHE BOOL "Compress the vocabulary")
-if (${ENABLE_VOCAB_COMPRESSION})
-    add_definitions("-D_QLEVER_ENABLE_VOCAB_COMPRESSION")
+set(VOCAB_UNCOMPRESSED_IN_MEMORY OFF CACHE BOOL "Store QLever's vocabulary uncompressed and completely in RAM")
+if (${VOCAB_UNCOMPRESSED_IN_MEMORY})
+    add_definitions("-D_QLEVER_VOCAB_UNCOMPRESSED_IN_MEMORY")
 endif ()
 
 # Enable the specification of additional linker flags manually from the commandline
diff --git a/src/global/Pattern.h b/src/global/Pattern.h
index 28ca2a9c0e..9178e5d640 100644
--- a/src/global/Pattern.h
+++ b/src/global/Pattern.h
@@ -17,6 +17,7 @@
 #include "util/File.h"
 #include "util/Generator.h"
 #include "util/Iterators.h"
+#include "util/ResetWhenMoved.h"
 #include "util/Serializer/FileSerializer.h"
 #include "util/Serializer/SerializeVector.h"
 #include "util/TypeTraits.h"
@@ -181,7 +182,9 @@ struct CompactStringVectorWriter {
   off_t _startOfFile;
   using offset_type = typename CompactVectorOfStrings<data_type>::offset_type;
   std::vector<offset_type> _offsets;
-  bool _finished = false;
+  // A `CompactStringVectorWriter` that has been moved from may not call
+  // `finish()` any more in its destructor.
+  ad_utility::ResetWhenMoved<bool, true> _finished = false;
   offset_type _nextOffset = 0;
 
   explicit CompactStringVectorWriter(const std::string& filename)
diff --git a/src/index/CMakeLists.txt b/src/index/CMakeLists.txt
index 4a226bdfdd..e421a03e55 100644
--- a/src/index/CMakeLists.txt
+++ b/src/index/CMakeLists.txt
@@ -1,7 +1,7 @@
 add_subdirectory(vocabulary)
 add_library(index
         Index.cpp IndexImpl.cpp IndexImpl.Text.cpp
-        Vocabulary.cpp VocabularyOnDisk.cpp
+        Vocabulary.cpp
         LocatedTriples.cpp Permutation.cpp TextMetaData.cpp
         DocsDB.cpp FTSAlgorithms.cpp
         PrefixHeuristic.cpp CompressedRelation.cpp
diff --git a/src/index/Vocabulary.h b/src/index/Vocabulary.h
index 7587275118..eecf3b832a 100644
--- a/src/index/Vocabulary.h
+++ b/src/index/Vocabulary.h
@@ -21,12 +21,9 @@
 #include "global/Id.h"
 #include "global/Pattern.h"
 #include "index/StringSortComparator.h"
-#include "index/VocabularyOnDisk.h"
-#include "index/vocabulary/CompressedVocabulary.h"
 #include "index/vocabulary/PolymorphicVocabulary.h"
 #include "index/vocabulary/UnicodeVocabulary.h"
 #include "index/vocabulary/VocabularyInMemory.h"
-#include "index/vocabulary/VocabularyInternalExternal.h"
 #include "util/Exception.h"
 #include "util/HashMap.h"
 #include "util/HashSet.h"
@@ -216,10 +213,16 @@ class Vocabulary {
   // vocabulary.
   UnderlyingVocabulary::WordWriter makeWordWriter(
       const std::string& filename) const {
+    // Note: In GCC this triggers a move construction of the created
+    // `DiskWriter`, although mandatory copy elision should kick in here
+    // according to our understanding (and does in clang). We could investigate
+    // whether this is a bug in GCC or whether we are missing something.
     return vocabulary_.getUnderlyingVocabulary().makeDiskWriter(filename);
   }
 
-  // TODO<joka921> Comment.
+  // If the `UnderlyingVocabulary` is a `PolymorphicVocabulary`, close the
+  // vocabulary and set the type of the vocabulary according to the `type`
+  // argument (see the `PolymorphicVocabulary` class for details).
   void resetToType(ad_utility::VocabularyType type) {
     if constexpr (std::is_same_v<UnderlyingVocabulary, PolymorphicVocabulary>) {
       vocabulary_.getUnderlyingVocabulary().resetToType(type);
@@ -228,29 +231,17 @@ class Vocabulary {
 };
 
 namespace detail {
-// The two mactors `_QLEVER_VOCAB_IN_MEMORY` and
-// `_QLEVER_ENABLE_VOCAB_COMPRESSION` can be used to disable the external vocab
-// and the compression of the vocab at compile time. NOTE: These change the
-// binary format of QLever's index, so changing them requires rebuilding of the
-// indices.
-/*
-#ifdef _QLEVER_VOCAB_IN_MEMORY
-using VocabStorage = VocabularyInMemory;
-#else
-using VocabStorage = VocabularyInternalExternal;
-#endif
-*/
+// Thecompile-time definitions `_QLEVER_VOCAB_UNCOMPRESSED_IN_MEMORY` can be
+// used to disable the external vocab and the compression of the vocab at
+// compile time. NOTE: These change the binary format of QLever's index, so
+// changing them requires rebuilding of the indices.
 
-/*
-#ifndef _QLEVER_ENABLE_VOCAB_COMPRESSION
-using UnderlyingVocabRdfsVocabulary = VocabStorage;
+#ifdef _QLEVER_VOCAB_UNCOMPRESSED_IN_MEMORY
+using UnderlyingVocabRdfsVocabulary = VocabularyInMemory;
 #else
-using UnderlyingVocabRdfsVocabulary = CompressedVocabulary<VocabStorage>;
+using UnderlyingVocabRdfsVocabulary = PolymorphicVocabulary;
 #endif
-*/
 
-// TODO<joka921> Change this place.
-using UnderlyingVocabRdfsVocabulary = PolymorphicVocabulary;
 using UnderlyingVocabTextVocabulary = VocabularyInMemory;
 }  // namespace detail
 
diff --git a/src/index/vocabulary/CMakeLists.txt b/src/index/vocabulary/CMakeLists.txt
index 910ad61c3a..ce746097da 100644
--- a/src/index/vocabulary/CMakeLists.txt
+++ b/src/index/vocabulary/CMakeLists.txt
@@ -1,4 +1,4 @@
 add_library(vocabulary VocabularyInMemory.h VocabularyInMemory.cpp VocabularyInMemoryBinSearch.cpp VocabularyInternalExternal.cpp
-        PolymorphicVocabulary.cpp
-        VocabularyType.h)
+        PolymorphicVocabulary.cpp VocabularyOnDisk.cpp
+        )
 qlever_target_link_libraries(vocabulary)
diff --git a/src/index/vocabulary/PolymorphicVocabulary.h b/src/index/vocabulary/PolymorphicVocabulary.h
index 4a18e57465..02fa12b962 100644
--- a/src/index/vocabulary/PolymorphicVocabulary.h
+++ b/src/index/vocabulary/PolymorphicVocabulary.h
@@ -83,17 +83,6 @@ class PolymorphicVocabulary {
         vocab_);
   }
 
-  // Same as `lower_bound` above, but the comparator compares a `word` and an
-  // `iterator` instead of two words.
-  template <typename String, typename Comp>
-  WordAndIndex lower_bound_iterator(const String& word, Comp comp) const {
-    return std::visit(
-        [&word, &comp](auto& vocab) {
-          return vocab.lower_bound_iterator(word, std::move(comp));
-        },
-        vocab_);
-  }
-
   // Analogous to `lower_bound` (see above).
   template <typename String, typename Comp>
   WordAndIndex upper_bound(const String& word, Comp comp) const {
@@ -104,16 +93,6 @@ class PolymorphicVocabulary {
         vocab_);
   }
 
-  // Analogous to `lower_bound_iterator` (see above).
-  template <typename String, typename Comp>
-  WordAndIndex upper_bound_iterator(const String& word, Comp comp) const {
-    return std::visit(
-        [&word, &comp](auto& vocab) {
-          return vocab.upper_bound_iterator(word, std::move(comp));
-        },
-        vocab_);
-  }
-
   using WordWriters =
       polymorphic_vocabulary::detail::WriterPointers<Variant>::type;
 
diff --git a/src/index/vocabulary/VocabularyInternalExternal.h b/src/index/vocabulary/VocabularyInternalExternal.h
index 897b29258d..209820c604 100644
--- a/src/index/vocabulary/VocabularyInternalExternal.h
+++ b/src/index/vocabulary/VocabularyInternalExternal.h
@@ -8,8 +8,8 @@
 #include <string>
 #include <string_view>
 
-#include "index/VocabularyOnDisk.h"
 #include "index/vocabulary/VocabularyInMemoryBinSearch.h"
+#include "index/vocabulary/VocabularyOnDisk.h"
 #include "index/vocabulary/VocabularyTypes.h"
 #include "util/Exception.h"
 
diff --git a/src/index/VocabularyOnDisk.cpp b/src/index/vocabulary/VocabularyOnDisk.cpp
similarity index 98%
rename from src/index/VocabularyOnDisk.cpp
rename to src/index/vocabulary/VocabularyOnDisk.cpp
index 251130be26..1dc53e8453 100644
--- a/src/index/VocabularyOnDisk.cpp
+++ b/src/index/vocabulary/VocabularyOnDisk.cpp
@@ -2,7 +2,7 @@
 // Chair of Algorithms and Data Structures.
 // Author: Johannes Kalmbach <johannes.kalmbach@gmail.com>
 
-#include "index/VocabularyOnDisk.h"
+#include "index/vocabulary/VocabularyOnDisk.h"
 
 #include <fstream>
 
diff --git a/src/index/VocabularyOnDisk.h b/src/index/vocabulary/VocabularyOnDisk.h
similarity index 100%
rename from src/index/VocabularyOnDisk.h
rename to src/index/vocabulary/VocabularyOnDisk.h
diff --git a/src/index/vocabulary/VocabularyType.h b/src/index/vocabulary/VocabularyType.h
index a6b0eacfb4..62036a495e 100644
--- a/src/index/vocabulary/VocabularyType.h
+++ b/src/index/vocabulary/VocabularyType.h
@@ -7,45 +7,72 @@
 #include <array>
 #include <string_view>
 
+#include "util/Random.h"
 #include "util/json.h"
 
 namespace ad_utility {
+
+// A lightweight enum for the different implementation strategies of the
+// `PolymorphicVocabulary`. Also includes operations for conversion to and from
+// string.
+// TODO<joka921> Implement a generic mixin that can also be used for other
+// enums, especially such used in command-line interfaces.
 class VocabularyType {
  public:
+  // The different vocabulary implementations;
   enum struct Enum { InMemory, OnDisk, CompressedInMemory, CompressedOnDisk };
 
  private:
   Enum value_ = Enum::InMemory;
 
-  static constexpr std::array<std::string_view, 4> descriptions{
+  static constexpr size_t numValues_ = 4;
+  // All possible values.
+  static constexpr std::array<Enum, numValues_> all_{
+      Enum::InMemory, Enum::OnDisk, Enum::CompressedInMemory,
+      Enum::CompressedOnDisk};
+
+  // The string representations of the enum values.
+  static constexpr std::array<std::string_view, numValues_> descriptions_{
       "in-memory-uncompressed", "on-disk-uncompressed", "in-memory-compressed",
       "on-disk-compressed"};
 
+  static_assert(all_.size() == descriptions_.size());
+
  public:
+  // Constructors
   VocabularyType() = default;
   explicit VocabularyType(Enum value) : value_{value} {}
 
+  // Create from a string. The string must be one of the `descriptions_`,
+  // otherwise a `runtime_error_` is thrown.
   static VocabularyType fromString(std::string_view description) {
-    auto it = ql::ranges::find(descriptions, description);
-    if (it == descriptions.end()) {
+    auto it = ql::ranges::find(descriptions_, description);
+    if (it == descriptions_.end()) {
       throw std::runtime_error{
           absl::StrCat("\"", description,
                        "\" is not a valid vocabulary type. The currently "
                        "supported vocabulary types are ",
                        getListOfSupportedValues())};
     }
-    return VocabularyType{static_cast<Enum>(it - descriptions.begin())};
+    return VocabularyType{all().at(it - descriptions_.begin())};
   }
 
+  // Return all the possible enum values as a comma-separated single string.
   static std::string getListOfSupportedValues() {
-    return absl::StrJoin(descriptions, ", ");
+    return absl::StrJoin(descriptions_, ", ");
   }
+
+  // Convert the enum to the corresponding string.
   std::string_view toString() const {
-    return descriptions.at(static_cast<size_t>(value_));
+    return descriptions_.at(static_cast<size_t>(value_));
   }
 
+  // Return the actual enum value.
   Enum value() const { return value_; }
 
+  // Return a list of all the enum values.
+  static constexpr const std::array<Enum, 4>& all() { return all_; }
+
   // Conversion To JSON.
   friend void to_json(nlohmann::json& j, const VocabularyType& vocabEnum) {
     j = vocabEnum.toString();
@@ -55,5 +82,11 @@ class VocabularyType {
   friend void from_json(const nlohmann::json& j, VocabularyType& vocabEnum) {
     vocabEnum = VocabularyType::fromString(static_cast<std::string>(j));
   }
+
+  // Get a random value, useful for fuzz testing.
+  static VocabularyType random() {
+    ad_utility::FastRandomIntGenerator<size_t> r;
+    return VocabularyType{static_cast<Enum>(r() % numValues_)};
+  }
 };
 }  // namespace ad_utility
diff --git a/src/util/File.h b/src/util/File.h
index cde77a4aaf..782e266380 100644
--- a/src/util/File.h
+++ b/src/util/File.h
@@ -52,6 +52,10 @@ class File {
     open(filename, mode);
   }
 
+  // Files are move-only types.
+  File(const File&) = delete;
+  File& operator=(const File&) = delete;
+
   File& operator=(File&& rhs) noexcept {
     if (isOpen()) {
       close();
@@ -63,7 +67,7 @@ class File {
     return *this;
   }
 
-  File(File&& rhs) : name_{std::move(rhs.name_)}, file_{rhs.file_} {
+  File(File&& rhs) noexcept : name_{std::move(rhs.name_)}, file_{rhs.file_} {
     rhs.file_ = nullptr;
   }
 
diff --git a/src/util/ProgramOptionsHelpers.h b/src/util/ProgramOptionsHelpers.h
index a86a850c35..b395768f50 100644
--- a/src/util/ProgramOptionsHelpers.h
+++ b/src/util/ProgramOptionsHelpers.h
@@ -55,7 +55,7 @@ void validate(boost::any& v, const std::vector<std::string>& values,
               std::optional<T>*, int) {
   // First parse as a T
   T* dummy = nullptr;
-  // using namespace boost::program_options;
+  using namespace boost::program_options;
   validate(v, values, dummy, 0);
 
   // Wrap the T inside std::optional
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 3a04b9d201..994b4ea9ae 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -201,7 +201,7 @@ addLinkAndDiscoverTest(BatchedPipelineTest)
 
 addLinkAndDiscoverTest(TupleHelpersTest)
 
-addLinkAndDiscoverTest(StringSortComparatorTest)
+addLinkAndDiscoverTestNoLibs(StringSortComparatorTest)
 
 addLinkAndDiscoverTest(PriorityQueueTest)
 
diff --git a/test/StringSortComparatorTest.cpp b/test/StringSortComparatorTest.cpp
index ade2178ae0..b6143ec70f 100644
--- a/test/StringSortComparatorTest.cpp
+++ b/test/StringSortComparatorTest.cpp
@@ -125,6 +125,11 @@ TEST(StringSortComparatorTest, TripleComponentComparatorTotal) {
     auto bSplit = comparator.extractAndTransformComparable(
         b, TripleComponentComparator::Level::TOTAL);
     EXPECT_EQ(ab, comp(aSplit, bSplit));
+    EXPECT_EQ(ab, comp(a, bSplit));
+    EXPECT_EQ(ab, comp(aSplit, b));
+
+    EXPECT_EQ(ba, comp(b, aSplit));
+    EXPECT_EQ(ba, comp(bSplit, a));
     EXPECT_EQ(ba, comp(bSplit, aSplit));
   };
 
diff --git a/test/index/vocabulary/CMakeLists.txt b/test/index/vocabulary/CMakeLists.txt
index 3b4499a751..2db01bd594 100644
--- a/test/index/vocabulary/CMakeLists.txt
+++ b/test/index/vocabulary/CMakeLists.txt
@@ -1,11 +1,15 @@
-addLinkAndDiscoverTest(VocabularyInMemoryTest vocabulary)
+addLinkAndDiscoverTestNoLibs(VocabularyInMemoryTest vocabulary)
 
-addLinkAndDiscoverTest(VocabularyOnDiskTest index)
+addLinkAndDiscoverTestNoLibs(VocabularyOnDiskTest index)
 
 addLinkAndDiscoverTest(CompressedVocabularyTest vocabulary)
 
-addLinkAndDiscoverTest(UnicodeVocabularyTest vocabulary)
+addLinkAndDiscoverTestNoLibs(UnicodeVocabularyTest vocabulary)
 
-addLinkAndDiscoverTest(VocabularyInternalExternalTest vocabulary)
+addLinkAndDiscoverTestNoLibs(VocabularyInternalExternalTest vocabulary)
 
-addLinkAndDiscoverTest(VocabularyInMemoryBinSearchTest vocabulary)
+addLinkAndDiscoverTestNoLibs(VocabularyInMemoryBinSearchTest vocabulary)
+
+addLinkAndDiscoverTestNoLibs(PolymorphicVocabularyTest vocabulary)
+
+addLinkAndDiscoverTestNoLibs(VocabularyTypeTest)
diff --git a/test/index/vocabulary/CompressedVocabularyTest.cpp b/test/index/vocabulary/CompressedVocabularyTest.cpp
index a1a445e213..8a6f39d2bb 100644
--- a/test/index/vocabulary/CompressedVocabularyTest.cpp
+++ b/test/index/vocabulary/CompressedVocabularyTest.cpp
@@ -6,10 +6,10 @@
 
 #include "VocabularyTestHelpers.h"
 #include "backports/algorithm.h"
-#include "index/VocabularyOnDisk.h"
 #include "index/vocabulary/CompressedVocabulary.h"
 #include "index/vocabulary/PrefixCompressor.h"
 #include "index/vocabulary/VocabularyInMemory.h"
+#include "index/vocabulary/VocabularyOnDisk.h"
 
 namespace {
 
diff --git a/test/index/vocabulary/PolymorphicVocabularyTest.cpp b/test/index/vocabulary/PolymorphicVocabularyTest.cpp
new file mode 100644
index 0000000000..fc01104d4c
--- /dev/null
+++ b/test/index/vocabulary/PolymorphicVocabularyTest.cpp
@@ -0,0 +1,42 @@
+//  Copyright 2025, University of Freiburg,
+//  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#include <gmock/gmock.h>
+
+#include "index/vocabulary/PolymorphicVocabulary.h"
+
+using ad_utility::VocabularyType;
+
+namespace {
+void testForVocabType(VocabularyType::Enum vocabType) {
+  VocabularyType type{vocabType};
+  std::string filename =
+      absl::StrCat("polymorphicVocabularyTest.", type.toString(), ".vocab");
+
+  auto writer = PolymorphicVocabulary::makeDiskWriter(filename, type);
+  writer("alpha", false);
+  writer("beta", true);
+  writer("gamma", false);
+  writer.finish();
+
+  PolymorphicVocabulary vocab;
+  vocab.open(filename, type);
+  EXPECT_EQ(vocab.size(), 3);
+
+  EXPECT_EQ(vocab[0], "alpha");
+  EXPECT_EQ(vocab[1], "beta");
+  EXPECT_EQ(vocab[2], "gamma");
+
+  auto wI = vocab.lower_bound("alx", ql::ranges::less{});
+  EXPECT_EQ(wI.index(), 1);
+  EXPECT_EQ(wI.word(), "beta");
+
+  wI = vocab.upper_bound("gamma", ql::ranges::less{});
+  EXPECT_TRUE(wI.isEnd());
+}
+}  // namespace
+
+TEST(PolymorphicVocabulary, basicTests) {
+  ql::ranges::for_each(VocabularyType::all(), &testForVocabType);
+}
diff --git a/test/index/vocabulary/VocabularyInternalExternalTest.cpp b/test/index/vocabulary/VocabularyInternalExternalTest.cpp
index 6c41dc415a..08ef9164dc 100644
--- a/test/index/vocabulary/VocabularyInternalExternalTest.cpp
+++ b/test/index/vocabulary/VocabularyInternalExternalTest.cpp
@@ -34,7 +34,7 @@ class VocabularyCreator {
   auto createVocabularyImpl(const std::vector<std::string>& words) {
     VocabularyInternalExternal vocabulary;
     {
-      auto writer = VocabularyInternalExternal::WordWriter(vocabFilename_);
+      auto writer = VocabularyInternalExternal::makeDiskWriter(vocabFilename_);
       size_t i = 0;
       for (auto& word : words) {
         writer(word, i % 2 == 0);
diff --git a/test/index/vocabulary/VocabularyOnDiskTest.cpp b/test/index/vocabulary/VocabularyOnDiskTest.cpp
index 54fc934f24..ee9090125e 100644
--- a/test/index/vocabulary/VocabularyOnDiskTest.cpp
+++ b/test/index/vocabulary/VocabularyOnDiskTest.cpp
@@ -5,7 +5,7 @@
 #include <gtest/gtest.h>
 
 #include "./VocabularyTestHelpers.h"
-#include "index/VocabularyOnDisk.h"
+#include "index/vocabulary/VocabularyOnDisk.h"
 #include "util/Forward.h"
 
 namespace {
diff --git a/test/index/vocabulary/VocabularyTypeTest.cpp b/test/index/vocabulary/VocabularyTypeTest.cpp
new file mode 100644
index 0000000000..2a8281dd80
--- /dev/null
+++ b/test/index/vocabulary/VocabularyTypeTest.cpp
@@ -0,0 +1,36 @@
+//  Copyright 2025, University of Freiburg,
+//  Chair of Algorithms and Data Structures.
+//  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
+
+#include <gmock/gmock.h>
+
+#include "index/vocabulary/VocabularyType.h"
+
+using namespace ad_utility;
+TEST(VocabularyType, allTests) {
+  using E = VocabularyType::Enum;
+  using T = VocabularyType;
+  T t{};
+  EXPECT_EQ(t.value(), E::InMemory);
+  for (auto e : T::all()) {
+    EXPECT_EQ(T{e}.value(), e);
+  }
+
+  t = T::fromString("on-disk-compressed");
+  EXPECT_EQ(t.value(), E::CompressedOnDisk);
+
+  EXPECT_ANY_THROW(T::fromString("kartoffelsalat"));
+
+  EXPECT_EQ(T{E::OnDisk}.toString(), "on-disk-uncompressed");
+
+  using namespace ::testing;
+  EXPECT_THAT(T::getListOfSupportedValues(),
+              AllOf(HasSubstr("in-memory-uncompressed"),
+                    HasSubstr(", on-disk-uncompressed")));
+
+  for (auto e : T::all()) {
+    nlohmann::json j = T{e};
+    t = j.get<T>();
+    EXPECT_EQ(t.value(), e);
+  }
+}
diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp
index 8e1a693209..6cc5724690 100644
--- a/test/util/IndexTestHelpers.cpp
+++ b/test/util/IndexTestHelpers.cpp
@@ -186,6 +186,8 @@ Index makeTestIndex(const std::string& indexBasename,
     index.loadAllPermutations() = loadAllPermutations;
     qlever::InputFileSpecification spec{inputFilename, qlever::Filetype::Turtle,
                                         std::nullopt};
+    // randomly choose one of the vocabulary implementations
+    index.getImpl().setVocabularyTypeForIndexBuilding(VocabularyType::random());
     index.createFromFiles({spec});
     if (createTextIndex) {
       if (contentsOfWordsFileAndDocsFile.has_value()) {

From 5f2ec6c2ca2850691bd66a80ae0f1d2db97f5ba7 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 6 Feb 2025 12:51:15 +0100
Subject: [PATCH 20/25] Fix for MacOS...

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/index/vocabulary/PolymorphicVocabulary.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/index/vocabulary/PolymorphicVocabulary.cpp b/src/index/vocabulary/PolymorphicVocabulary.cpp
index 1b9936afee..1c328dc6aa 100644
--- a/src/index/vocabulary/PolymorphicVocabulary.cpp
+++ b/src/index/vocabulary/PolymorphicVocabulary.cpp
@@ -25,7 +25,7 @@ void PolymorphicVocabulary::close() {
 
 // _____________________________________________________________________________
 size_t PolymorphicVocabulary::size() const {
-  return std::visit([](auto& vocab) { return vocab.size(); }, vocab_);
+  return std::visit([](auto& vocab) -> size_t { return vocab.size(); }, vocab_);
 }
 
 // _____________________________________________________________________________

From b30861a3029300862d8b3fad6dd39a540d71b619 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 6 Feb 2025 14:45:53 +0100
Subject: [PATCH 21/25] Move the actually used code into the `Operation class.`

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/Operation.cpp            | 23 +++++++++++++++++++++--
 src/engine/QueryExecutionContext.h  |  5 +++++
 src/engine/Server.cpp               | 26 +++++++-------------------
 test/engine/NamedQueryCacheTest.cpp |  3 +++
 4 files changed, 36 insertions(+), 21 deletions(-)
 create mode 100644 test/engine/NamedQueryCacheTest.cpp

diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp
index ed5d9d3cc6..4147d24ca8 100644
--- a/src/engine/Operation.cpp
+++ b/src/engine/Operation.cpp
@@ -4,8 +4,7 @@
 
 #include "engine/Operation.h"
 
-#include <absl/cleanup/cleanup.h>
-
+#include "engine/NamedQueryCache.h"
 #include "engine/QueryExecutionTree.h"
 #include "global/RuntimeParameters.h"
 #include "util/OnDestructionDontThrowDuringStackUnwinding.h"
@@ -292,6 +291,12 @@ std::shared_ptr<const Result> Operation::getResult(
       _executionContext->_pinResult && isRoot;
   const bool pinResult =
       _executionContext->_pinSubtrees || pinFinalResultButNotSubtrees;
+  const bool pinWithName =
+      _executionContext->pinWithExplicitName().has_value() && isRoot;
+
+  if (pinWithName) {
+    computationMode = ComputationMode::FULLY_MATERIALIZED;
+  }
 
   try {
     // In case of an exception, create the correct runtime info, no matter which
@@ -337,6 +342,20 @@ std::shared_ptr<const Result> Operation::getResult(
       updateRuntimeInformationOnSuccess(result, timer.msecs());
     }
 
+    if (pinWithName) {
+      const auto& name = _executionContext->pinWithExplicitName().value();
+      // The query is to be pinned in the named cache. In this case we don't
+      // return the result, but only pin it.
+      const auto& actualResult = result._resultPointer->resultTable();
+      AD_CORRECTNESS_CHECK(actualResult.isFullyMaterialized());
+      auto t = NamedQueryCache::Value(actualResult.idTable().clone(),
+                                      getExternallyVisibleVariableColumns(),
+                                      actualResult.sortedBy());
+      _executionContext->namedQueryCache().store(name, std::move(t));
+
+      runtimeInfo().addDetail("pinned-with-explicit-name", name);
+    }
+
     return result._resultPointer->resultTablePtr();
   } catch (ad_utility::CancellationException& e) {
     e.setOperation(getDescriptor());
diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h
index 9eb632b48a..fc8d1b2ac3 100644
--- a/src/engine/QueryExecutionContext.h
+++ b/src/engine/QueryExecutionContext.h
@@ -155,6 +155,9 @@ class QueryExecutionContext {
     return *namedQueryCache_;
   }
 
+  auto& pinWithExplicitName() { return pinWithExplicitName_; }
+  const auto& pinWithExplicitName() const { return pinWithExplicitName_; }
+
  private:
   const Index& _index;
 
@@ -176,4 +179,6 @@ class QueryExecutionContext {
       RuntimeParameters().get<"websocket-updates-enabled">();
 
   NamedQueryCache* namedQueryCache_ = nullptr;
+
+  std::optional<std::string> pinWithExplicitName_ = std::nullopt;
 };
diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp
index d9297ea841..ad1b75a61b 100644
--- a/src/engine/Server.cpp
+++ b/src/engine/Server.cpp
@@ -19,7 +19,6 @@
 #include "index/IndexImpl.h"
 #include "util/AsioHelpers.h"
 #include "util/MemorySize/MemorySize.h"
-#include "util/OnDestructionDontThrowDuringStackUnwinding.h"
 #include "util/ParseableDuration.h"
 #include "util/TypeIdentity.h"
 #include "util/TypeTraits.h"
@@ -872,25 +871,14 @@ Awaitable<void> Server::processQuery(
   limitOffset._offset -= qet.getRootOperation()->getLimit()._offset;
 
   if (pinNamed.has_value()) {
-    // The query is to be pinned in the named cache. In this case we don't
-    // return the result, but only pin it.
-    auto result = qet.getResult(false);
-    auto t =
-        NamedQueryCache::Value(result->idTable().clone(),
-                               qet.getVariableColumns(), result->sortedBy());
-    qec.namedQueryCache().store(pinNamed.value(), std::move(t));
-
-    auto response = ad_utility::httpUtils::createOkResponse(
-        "Successfully pinned the query result", request,
-        ad_utility::MediaType::textPlain);
-    co_await send(response);
-  } else {
-    // This actually processes the query and sends the result in the requested
-    // format.
-    co_await sendStreamableResponse(request, send, mediaType, plannedQuery, qet,
-                                    requestTimer, cancellationHandle);
+    // TODO<joka921>  1. Make this require a valid access token. 2. also allow
+    // for clearing the cache.
+    qec.pinWithExplicitName() = pinNamed.value();
   }
-
+  // This actually processes the query and sends the result in the requested
+  // format.
+  co_await sendStreamableResponse(request, send, mediaType, plannedQuery, qet,
+                                  requestTimer, cancellationHandle);
   // Print the runtime info. This needs to be done after the query
   // was computed.
   LOG(INFO) << "Done processing query and sending result"
diff --git a/test/engine/NamedQueryCacheTest.cpp b/test/engine/NamedQueryCacheTest.cpp
new file mode 100644
index 0000000000..669758e208
--- /dev/null
+++ b/test/engine/NamedQueryCacheTest.cpp
@@ -0,0 +1,3 @@
+//
+// Created by kalmbacj on 2/6/25.
+//

From d8080b30f9914a89e3ed3dcda9c9ccf85a880795 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Thu, 6 Feb 2025 17:33:33 +0100
Subject: [PATCH 22/25] Many more improvements for the tests and for the tools.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/global/Pattern.h                            | 17 ++++++++++++++++-
 src/index/vocabulary/VocabularyOnDisk.cpp       |  6 +++---
 src/index/vocabulary/VocabularyOnDisk.h         |  4 ++--
 .../vocabulary/PolymorphicVocabularyTest.cpp    | 11 +++++++++++
 4 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/src/global/Pattern.h b/src/global/Pattern.h
index 9178e5d640..c98487e772 100644
--- a/src/global/Pattern.h
+++ b/src/global/Pattern.h
@@ -182,6 +182,7 @@ struct CompactStringVectorWriter {
   off_t _startOfFile;
   using offset_type = typename CompactVectorOfStrings<data_type>::offset_type;
   std::vector<offset_type> _offsets;
+
   // A `CompactStringVectorWriter` that has been moved from may not call
   // `finish()` any more in its destructor.
   ad_utility::ResetWhenMoved<bool, true> _finished = false;
@@ -230,6 +231,16 @@ struct CompactStringVectorWriter {
     }
   }
 
+  // The copy operations would be deleted implicitly (because `File` is not
+  // copyable.
+  CompactStringVectorWriter(const CompactStringVectorWriter&) = delete;
+  CompactStringVectorWriter& operator=(const CompactStringVectorWriter&) =
+      delete;
+
+  // The move operations have to be explicitly defaulted, because we have a
+  // manually defined destructor.
+  // Note: The defaulted move operations behave correctly because of the usage
+  // of `ResetWhenMoved` with the `_finished` member.
   CompactStringVectorWriter(CompactStringVectorWriter&&) = default;
   CompactStringVectorWriter& operator=(CompactStringVectorWriter&&) = default;
 
@@ -237,12 +248,16 @@ struct CompactStringVectorWriter {
   // Has to be run by all the constructors
   void commonInitialization() {
     AD_CONTRACT_CHECK(_file.isOpen());
-    // We don't known the data size yet.
+    // We don't know the data size yet.
     _startOfFile = _file.tell();
     size_t dataSizeDummy = 0;
     _file.write(&dataSizeDummy, sizeof(dataSizeDummy));
   }
 };
+static_assert(
+    std::is_nothrow_move_assignable_v<CompactStringVectorWriter<char>>);
+static_assert(
+    std::is_nothrow_move_constructible_v<CompactStringVectorWriter<char>>);
 }  // namespace detail
 
 // Forward iterator for a `CompactVectorOfStrings` that reads directly from
diff --git a/src/index/vocabulary/VocabularyOnDisk.cpp b/src/index/vocabulary/VocabularyOnDisk.cpp
index 1dc53e8453..8f23170300 100644
--- a/src/index/vocabulary/VocabularyOnDisk.cpp
+++ b/src/index/vocabulary/VocabularyOnDisk.cpp
@@ -23,8 +23,8 @@ OffsetAndSize VocabularyOnDisk::getOffsetAndSize(uint64_t i) const {
 std::string VocabularyOnDisk::operator[](uint64_t idx) const {
   AD_CONTRACT_CHECK(idx < size());
   auto offsetAndSize = getOffsetAndSize(idx);
-  string result(offsetAndSize._size, '\0');
-  file_.read(result.data(), offsetAndSize._size, offsetAndSize._offset);
+  string result(offsetAndSize.size_, '\0');
+  file_.read(result.data(), offsetAndSize.size_, offsetAndSize.offset_);
   return result;
 }
 
@@ -88,7 +88,7 @@ VocabularyOnDisk::WordWriter::~WordWriter() {
 void VocabularyOnDisk::buildFromStringsAndIds(
     const std::vector<std::pair<std::string, uint64_t>>& wordsAndIds,
     const std::string& fileName) {
-  return buildFromIterable(wordsAndIds, fileName);
+  buildFromIterable(wordsAndIds, fileName);
 }
 
 // _____________________________________________________________________________
diff --git a/src/index/vocabulary/VocabularyOnDisk.h b/src/index/vocabulary/VocabularyOnDisk.h
index f677ac3e7a..87506a4ed5 100644
--- a/src/index/vocabulary/VocabularyOnDisk.h
+++ b/src/index/vocabulary/VocabularyOnDisk.h
@@ -86,8 +86,8 @@ class VocabularyOnDisk : public VocabularyBinarySearchMixin<VocabularyOnDisk> {
 
   // The offset of a word in `file_` and its size in number of bytes.
   struct OffsetAndSize {
-    uint64_t _offset;
-    uint64_t _size;
+    uint64_t offset_;
+    uint64_t size_;
   };
 
   // Helper function for implementing a random access iterator.
diff --git a/test/index/vocabulary/PolymorphicVocabularyTest.cpp b/test/index/vocabulary/PolymorphicVocabularyTest.cpp
index fc01104d4c..c5c91ed686 100644
--- a/test/index/vocabulary/PolymorphicVocabularyTest.cpp
+++ b/test/index/vocabulary/PolymorphicVocabularyTest.cpp
@@ -9,6 +9,8 @@
 using ad_utility::VocabularyType;
 
 namespace {
+
+// Test a `PolymorphicVocabulary` with a given `vocabType`.
 void testForVocabType(VocabularyType::Enum vocabType) {
   VocabularyType type{vocabType};
   std::string filename =
@@ -37,6 +39,15 @@ void testForVocabType(VocabularyType::Enum vocabType) {
 }
 }  // namespace
 
+// Test the general functionality of the `PolymorphicVocabulary` for all the
+// possible `VocabularyType`s.
 TEST(PolymorphicVocabulary, basicTests) {
   ql::ranges::for_each(VocabularyType::all(), &testForVocabType);
 }
+
+// Test a corner case in a `switch` statement.
+TEST(PolymorphicVocabulary, invalidVocabularyType) {
+  PolymorphicVocabulary vocab;
+  auto invalidType = VocabularyType{static_cast<VocabularyType::Enum>(23401)};
+  EXPECT_ANY_THROW(vocab.resetToType(invalidType));
+}

From ea477275ca0924f220cffc47070cbf5b7837054f Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 7 Feb 2025 13:54:16 +0100
Subject: [PATCH 23/25] Merge in the vocab branch.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/engine/CMakeLists.txt |  1 -
 src/libqlever/Qlever.h    | 11 +++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt
index 542f2c3822..fece7772bf 100644
--- a/src/engine/CMakeLists.txt
+++ b/src/engine/CMakeLists.txt
@@ -15,7 +15,6 @@ add_library(engine
         TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp
         CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp
         Describe.cpp GraphStoreProtocol.cpp)
-        CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp)
 add_library(server Server.cpp)
 qlever_target_link_libraries(server)
 qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)
diff --git a/src/libqlever/Qlever.h b/src/libqlever/Qlever.h
index b69dcf9588..88ac4fc042 100644
--- a/src/libqlever/Qlever.h
+++ b/src/libqlever/Qlever.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <index/vocabulary/VocabularyType.h>
 #include <util/MemorySize/MemorySize.h>
 
 #include <optional>
@@ -17,6 +18,7 @@
 #include "global/RuntimeParameters.h"
 #include "index/Index.h"
 #include "index/InputFileSpecification.h"
+#include "index/vocabulary/VocabularyType.h"
 #include "parser/SparqlParser.h"
 #include "util/AllocatorWithLimit.h"
 #include "util/http/MediaTypes.h"
@@ -55,6 +57,11 @@ struct QleverConfig {
   // TODO<joka921> Document these additional settings.
   std::string settingsFile;
 
+  // Specify whether the vocabulary is stored on disk or in RAM, compressed or
+  // uncompressed.
+  ad_utility::VocabularyType vocabularyType_{
+      ad_utility::VocabularyType::Enum::CompressedOnDisk};
+
   // The following members are only required if QLever's full-text search
   // extension is to be used, see `IndexBuilderMain.cpp` for additional details.
   bool addWordsFromLiterals = false;
@@ -93,6 +100,10 @@ class Qlever {
   // cancellation, time limits, and observable queries.
   std::string query(std::string query);
 
+  // Pin a query to the named query cache. In a subsequent query, this cache can
+  // be accessed via `SERVICE ql:
+  void pinNamed(std::string query, std::string name);
+
   // TODO<joka921> Give access to the RuntimeParameters() which allow for
   // further tweaking of the qlever instance.
 };

From 81529de5616ae64300de3662a50e421ce86d7262 Mon Sep 17 00:00:00 2001
From: Johannes Kalmbach <johannes.kalmbach@gmail.com>
Date: Fri, 7 Feb 2025 14:30:41 +0100
Subject: [PATCH 24/25] Update the example with a warmup etc.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
---
 src/libqlever/LibQLeverExample.cpp | 30 +++++++++++++++++++-
 src/libqlever/Qlever.cpp           | 45 +++++++++++++++++++++++++++++-
 src/libqlever/Qlever.h             |  2 ++
 3 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/src/libqlever/LibQLeverExample.cpp b/src/libqlever/LibQLeverExample.cpp
index dbc0ffe2e6..ccf5ef869c 100644
--- a/src/libqlever/LibQLeverExample.cpp
+++ b/src/libqlever/LibQLeverExample.cpp
@@ -2,15 +2,43 @@
 //                  Chair of Algorithms and Data Structures.
 //  Author: Johannes Kalmbach <kalmbach@cs.uni-freiburg.de>
 
+#include <absl/strings/str_replace.h>
+
 #include <iostream>
 
 #include "libqlever/Qlever.h"
+#include "util/Timer.h"
+
+static const std::string warmup1 = "";
+static const std::string warmup2 = "";
+
+static const std::string queryTemplate = R"(
+SELECT *  {
+  #INPUTS#
+  SERVICE ql:named-cached-query-warmup1 {}
+  SERVICE ql:named-cached-query-warmup2 {}
+}
+)";
+
+std::vector<std::string> inputs{""};
 
 int main() {
   qlever::QleverConfig config;
   config.baseName = "exampleIndex";
   config.inputFiles.emplace_back("/dev/stdin", qlever::Filetype::Turtle);
+  config.vocabularyType_ =
+      ad_utility::VocabularyType{ad_utility::VocabularyType::Enum::InMemory};
   qlever::Qlever::buildIndex(config);
   qlever::Qlever qlever{config};
-  std::cout << qlever.query("SELECT * {?s ?p ?o}") << std::endl;
+  qlever.pinNamed(warmup1, "warmup1");
+  qlever.pinNamed(warmup2, "warmup2");
+
+  for (std::string_view input : inputs) {
+    auto query = absl::StrReplaceAll(queryTemplate,
+                                     {{std::string_view{"#INPUTS#"}, input}});
+    ad_utility::Timer t{ad_utility::Timer::Started};
+    auto result = qlever.query(std::move(query));
+    std::cout << "retrieved a query result of size " << result.size() << " in "
+              << t.msecs().count() << "ms\n";
+  }
 }
diff --git a/src/libqlever/Qlever.cpp b/src/libqlever/Qlever.cpp
index cc37562dde..fb4c77eaf6 100644
--- a/src/libqlever/Qlever.cpp
+++ b/src/libqlever/Qlever.cpp
@@ -4,6 +4,8 @@
 
 #include "libqlever/Qlever.h"
 
+#include "index/IndexImpl.h"
+
 namespace qlever {
 static std::string getStxxlConfigFileName(const string& location) {
   return absl::StrCat(location, ".stxxl");
@@ -51,6 +53,8 @@ Qlever::Qlever(const QleverConfig& config)
   enablePatternTrick_ = !config.noPatterns;
   index_.loadAllPermutations() = !config.onlyPsoAndPos;
 
+  index_.getImpl().setVocabularyTypeForIndexBuilding(config.vocabularyType_);
+
   // Init the index.
   index_.createFromOnDiskIndex(config.baseName);
   // TODO<joka921> Enable the loading of the text index via the QLever lib.
@@ -121,7 +125,7 @@ void Qlever::buildIndex(QleverConfig config) {
 // ___________________________________________________________________________
 std::string Qlever::query(std::string query) {
   QueryExecutionContext qec{index_, &cache_, allocator_,
-                            sortPerformanceEstimator_};
+                            sortPerformanceEstimator_, &namedQueryCache_};
   auto parsedQuery = SparqlParser::parseQuery(query);
   auto handle = std::make_shared<ad_utility::CancellationHandle<>>();
   QueryPlanner qp{&qec, handle};
@@ -156,4 +160,43 @@ std::string Qlever::query(std::string query) {
   }
   return result;
 }
+// ___________________________________________________________________________
+// TODO<joka921> A lot of code duplication here.
+void Qlever::pinNamed(std::string query, std::string name) {
+  QueryExecutionContext qec{index_, &cache_, allocator_,
+                            sortPerformanceEstimator_, &namedQueryCache_};
+  qec.pinWithExplicitName() = std::move(name);
+  auto parsedQuery = SparqlParser::parseQuery(query);
+  auto handle = std::make_shared<ad_utility::CancellationHandle<>>();
+  QueryPlanner qp{&qec, handle};
+  qp.setEnablePatternTrick(enablePatternTrick_);
+  auto qet = qp.createExecutionTree(parsedQuery);
+  qet.isRoot() = true;
+  auto& limitOffset = parsedQuery._limitOffset;
+
+  // TODO<joka921> For cancellation we have to call
+  // `recursivelySetCancellationHandle` (see `Server::parseAndPlan`).
+
+  // TODO<joka921> The following interface looks fishy and should be
+  // incorporated directly in the query planner or somewhere else.
+  // (it is used identically in `Server.cpp`.
+
+  // Make sure that the offset is not applied again when exporting the result
+  // (it is already applied by the root operation in the query execution
+  // tree). Note that we don't need this for the limit because applying a
+  // fixed limit is idempotent.
+  AD_CORRECTNESS_CHECK(limitOffset._offset >=
+                       qet.getRootOperation()->getLimit()._offset);
+  limitOffset._offset -= qet.getRootOperation()->getLimit()._offset;
+
+  ad_utility::Timer timer{ad_utility::Timer::Started};
+  auto responseGenerator = ExportQueryExecutionTrees::computeResult(
+      parsedQuery, qet, ad_utility::MediaType::sparqlJson, timer,
+      std::move(handle));
+  std::string result;
+  std::cout << "Writing the result:" << std::endl;
+  for (const auto& batch : responseGenerator) {
+    result += batch;
+  }
+}
 }  // namespace qlever
diff --git a/src/libqlever/Qlever.h b/src/libqlever/Qlever.h
index 88ac4fc042..0a50620670 100644
--- a/src/libqlever/Qlever.h
+++ b/src/libqlever/Qlever.h
@@ -13,6 +13,7 @@
 #include <vector>
 
 #include "engine/ExportQueryExecutionTrees.h"
+#include "engine/NamedQueryCache.h"
 #include "engine/QueryExecutionContext.h"
 #include "engine/QueryPlanner.h"
 #include "global/RuntimeParameters.h"
@@ -84,6 +85,7 @@ class Qlever {
   ad_utility::AllocatorWithLimit<Id> allocator_;
   SortPerformanceEstimator sortPerformanceEstimator_;
   Index index_;
+  NamedQueryCache namedQueryCache_;
   bool enablePatternTrick_;
   static inline std::ostringstream ignoreLogStream;
 

From c0b7a44502a89ed269df0d3e475952b54a11a194 Mon Sep 17 00:00:00 2001
From: Hannah Bast <bast@cs.uni-freiburg.de>
Date: Mon, 10 Feb 2025 00:46:23 +0100
Subject: [PATCH 25/25] Add argument for media type to `Qlever::query` and
 `Qlever::pinNamed`

Also: remove some debug output
---
 src/libqlever/Qlever.cpp | 14 ++++++--------
 src/libqlever/Qlever.h   |  7 +++++--
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/libqlever/Qlever.cpp b/src/libqlever/Qlever.cpp
index fb4c77eaf6..109c906f63 100644
--- a/src/libqlever/Qlever.cpp
+++ b/src/libqlever/Qlever.cpp
@@ -123,7 +123,7 @@ void Qlever::buildIndex(QleverConfig config) {
 }
 
 // ___________________________________________________________________________
-std::string Qlever::query(std::string query) {
+std::string Qlever::query(std::string query, ad_utility::MediaType mediaType) {
   QueryExecutionContext qec{index_, &cache_, allocator_,
                             sortPerformanceEstimator_, &namedQueryCache_};
   auto parsedQuery = SparqlParser::parseQuery(query);
@@ -151,10 +151,8 @@ std::string Qlever::query(std::string query) {
 
   ad_utility::Timer timer{ad_utility::Timer::Started};
   auto responseGenerator = ExportQueryExecutionTrees::computeResult(
-      parsedQuery, qet, ad_utility::MediaType::sparqlJson, timer,
-      std::move(handle));
+      parsedQuery, qet, mediaType, timer, std::move(handle));
   std::string result;
-  std::cout << "Writing the result:" << std::endl;
   for (const auto& batch : responseGenerator) {
     result += batch;
   }
@@ -162,7 +160,8 @@ std::string Qlever::query(std::string query) {
 }
 // ___________________________________________________________________________
 // TODO<joka921> A lot of code duplication here.
-void Qlever::pinNamed(std::string query, std::string name) {
+std::string Qlever::pinNamed(std::string query, std::string name,
+                             ad_utility::MediaType mediaType) {
   QueryExecutionContext qec{index_, &cache_, allocator_,
                             sortPerformanceEstimator_, &namedQueryCache_};
   qec.pinWithExplicitName() = std::move(name);
@@ -191,12 +190,11 @@ void Qlever::pinNamed(std::string query, std::string name) {
 
   ad_utility::Timer timer{ad_utility::Timer::Started};
   auto responseGenerator = ExportQueryExecutionTrees::computeResult(
-      parsedQuery, qet, ad_utility::MediaType::sparqlJson, timer,
-      std::move(handle));
+      parsedQuery, qet, mediaType, timer, std::move(handle));
   std::string result;
-  std::cout << "Writing the result:" << std::endl;
   for (const auto& batch : responseGenerator) {
     result += batch;
   }
+  return result;
 }
 }  // namespace qlever
diff --git a/src/libqlever/Qlever.h b/src/libqlever/Qlever.h
index 0a50620670..a57c405b7d 100644
--- a/src/libqlever/Qlever.h
+++ b/src/libqlever/Qlever.h
@@ -100,11 +100,14 @@ class Qlever {
   // supported, and the result will always be in sparql-results+json format.
   // TODO<joka921> Support other formats + CONSTRUCT queries, support
   // cancellation, time limits, and observable queries.
-  std::string query(std::string query);
+  std::string query(std::string query, ad_utility::MediaType mediaType =
+                                           ad_utility::MediaType::sparqlJson);
 
   // Pin a query to the named query cache. In a subsequent query, this cache can
   // be accessed via `SERVICE ql:
-  void pinNamed(std::string query, std::string name);
+  [[maybe_unused]] std::string pinNamed(
+      std::string query, std::string name,
+      ad_utility::MediaType mediaType = ad_utility::MediaType::sparqlJson);
 
   // TODO<joka921> Give access to the RuntimeParameters() which allow for
   // further tweaking of the qlever instance.