Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First version of a serverless library for QLever #1669

Open
wants to merge 30 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
c27a4d7
First try of turning the index building into a free function that can…
joka921 Dec 9, 2024
237e93c
First try of turning the index building into a free function that can…
joka921 Dec 9, 2024
10d492d
Optimize some includes.
joka921 Dec 16, 2024
0d4fa20
We have the named cache compiling, now let's use it.
joka921 Jan 30, 2025
befc33d
This seems to work, but copies IdTables etc.
joka921 Jan 30, 2025
7f30e17
It still works and is a little bit cleaner.
joka921 Jan 30, 2025
65caf94
Fix the compilation of the tests again.
joka921 Jan 30, 2025
e9e8dfd
Make the In-Memory-Vocabulary compatible with the RDFVocabulary
joka921 Jan 31, 2025
79a11b6
Refactor things.
joka921 Jan 31, 2025
e406fa4
Making the vocab configuration configurable at runtime.
joka921 Jan 31, 2025
53dc741
Do not move IdTables (we will later try this out on the dat dataset).
joka921 Jan 31, 2025
5e52784
Remove rogue include.
joka921 Jan 31, 2025
49445e5
An intermediate commit before switching branches.
joka921 Feb 5, 2025
6d11c3b
This seems to work, but the IDE has crashed, so we just restart:)
joka921 Feb 5, 2025
3e7f494
Several refactorings.
joka921 Feb 5, 2025
825f8bf
Some additional fixes and comments.
joka921 Feb 5, 2025
d0465da
Merge remote-tracking branch 'origin/master' into allow-different-voc…
Feb 5, 2025
066ddf6
Refactoring there and back again.
joka921 Feb 5, 2025
3a4e223
Merge remote-tracking branch 'origin/allow-different-vocabularies' in…
joka921 Feb 5, 2025
b9948ff
Fix compilation.
joka921 Feb 5, 2025
b1b884e
Feed this to the tools...
joka921 Feb 6, 2025
5f2ec6c
Fix for MacOS...
joka921 Feb 6, 2025
b30861a
Move the actually used code into the `Operation class.`
joka921 Feb 6, 2025
d8080b3
Many more improvements for the tests and for the tools.
joka921 Feb 6, 2025
c0c9018
Merge branch 'allow-different-vocabularies' into libqlever
joka921 Feb 7, 2025
ea47727
Merge in the vocab branch.
joka921 Feb 7, 2025
0fed8f3
Merge branch 'explicitly-named-query' into libqlever
joka921 Feb 7, 2025
81529de
Update the example with a warmup etc.
joka921 Feb 7, 2025
4a65bb5
Merge remote-tracking branch 'origin/master' into libqlever
Feb 7, 2025
c0b7a44
Add argument for media type to `Qlever::query` and `Qlever::pinNamed`
Feb 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,11 @@ if (${USE_CPP_17_BACKPORTS})
add_definitions("-DQLEVER_CPP_17 -DCPP_CXX_CONCEPTS=0")
endif()

set(VOCAB_UNCOMPRESSED_IN_MEMORY OFF CACHE BOOL "Store QLever's vocabulary uncompressed and completely in RAM")
if (${VOCAB_UNCOMPRESSED_IN_MEMORY})
add_definitions("-D_QLEVER_VOCAB_UNCOMPRESSED_IN_MEMORY")
endif ()

# Enable the specification of additional linker flags manually from the commandline
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ADDITIONAL_LINKER_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ADDITIONAL_LINKER_FLAGS}")
Expand Down Expand Up @@ -425,6 +430,7 @@ target_precompile_headers(engine PRIVATE ${PRECOMPILED_HEADER_FILES_ENGINE})
add_subdirectory(src/index)
add_subdirectory(src/util)
add_subdirectory(benchmark)
add_subdirectory(src/libqlever)

enable_testing()
option(SINGLE_TEST_BINARY "Link all unit tests into a single binary. This is useful e.g. for code coverage tools" OFF)
Expand All @@ -439,7 +445,7 @@ add_executable(IndexBuilderMain src/index/IndexBuilderMain.cpp)
qlever_target_link_libraries(IndexBuilderMain index ${CMAKE_THREAD_LIBS_INIT} Boost::program_options compilationInfo)

add_executable(ServerMain src/ServerMain.cpp)
qlever_target_link_libraries(ServerMain engine ${CMAKE_THREAD_LIBS_INIT} Boost::program_options compilationInfo)
qlever_target_link_libraries(ServerMain engine server ${CMAKE_THREAD_LIBS_INIT} Boost::program_options compilationInfo)
target_precompile_headers(ServerMain REUSE_FROM engine)

add_executable(VocabularyMergerMain src/VocabularyMergerMain.cpp)
Expand Down
2 changes: 1 addition & 1 deletion benchmark/GroupByHashMapBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
#include <random>

#include "../benchmark/infrastructure/Benchmark.h"
#include "../test/engine/ValuesForTesting.h"
#include "../test/util/IdTableHelpers.h"
#include "../test/util/IndexTestHelpers.h"
#include "engine/GroupBy.h"
#include "engine/Sort.h"
#include "engine/Values.h"
#include "engine/ValuesForTesting.h"
#include "engine/sparqlExpressions/AggregateExpression.h"
#include "engine/sparqlExpressions/GroupConcatExpression.h"
#include "engine/sparqlExpressions/LiteralExpression.h"
Expand Down
6 changes: 4 additions & 2 deletions src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ add_library(engine
Engine.cpp QueryExecutionTree.cpp Operation.cpp Result.cpp LocalVocab.cpp
IndexScan.cpp Join.cpp Sort.cpp
Distinct.cpp OrderBy.cpp Filter.cpp
Server.cpp QueryPlanner.cpp QueryPlanningCostFactors.cpp
QueryPlanner.cpp QueryPlanningCostFactors.cpp
OptionalJoin.cpp CountAvailablePredicates.cpp GroupBy.cpp HasPredicateScan.cpp
Union.cpp MultiColumnJoin.cpp TransitivePathBase.cpp
TransitivePathHashMap.cpp TransitivePathBinSearch.cpp Service.cpp
Expand All @@ -14,5 +14,7 @@ add_library(engine
CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp
CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp
Describe.cpp GraphStoreProtocol.cpp)
Describe.cpp GraphStoreProtocol.cpp NamedQueryCache.cpp QueryExecutionContext.cpp)
add_library(server Server.cpp)
qlever_target_link_libraries(server)
qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)
8 changes: 5 additions & 3 deletions src/engine/CheckUsePatternTrick.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,11 @@ bool isVariableContainedInGraphPatternOperation(
} else if constexpr (std::is_same_v<T, p::Service>) {
return ad_utility::contains(arg.visibleVariables_, variable);
} else {
static_assert(
std::is_same_v<T, p::TransPath> || std::is_same_v<T, p::PathQuery> ||
std::is_same_v<T, p::Describe> || std::is_same_v<T, p::SpatialQuery>);
static_assert(std::is_same_v<T, p::TransPath> ||
std::is_same_v<T, p::PathQuery> ||
std::is_same_v<T, p::Describe> ||
std::is_same_v<T, p::SpatialQuery> ||
std::is_same_v<T, p::NamedCachedQuery>);
// The `TransPath` is set up later in the query planning, when this
// function should not be called anymore.
AD_FAIL();
Expand Down
2 changes: 1 addition & 1 deletion src/engine/Describe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

#include "engine/Describe.h"

#include "../../test/engine/ValuesForTesting.h"
#include "engine/IndexScan.h"
#include "engine/Join.h"
#include "engine/ValuesForTesting.h"

// _____________________________________________________________________________
Describe::Describe(QueryExecutionContext* qec,
Expand Down
11 changes: 9 additions & 2 deletions src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,15 @@ ExportQueryExecutionTrees::getLiteralOrIriFromVocabIndex(
case Datatype::LocalVocabIndex:
return localVocab.getWord(id.getLocalVocabIndex()).asLiteralOrIri();
case Datatype::VocabIndex: {
auto entity = index.indexToString(id.getVocabIndex());
return LiteralOrIri::fromStringRepresentation(entity);
auto getEntity = [&index, id]() {
return index.indexToString(id.getVocabIndex());
};
// The type of entity might be `string_view` (If the vocabulary is stored
// uncompressed in RAM) or `string` (if it is on-disk, or compressed or
// both). The following code works and is efficient in all cases. In
// particular, the `std::string` constructor is compiled out because of
// RVO if `getEntity()` already returns a `string`.
return LiteralOrIri::fromStringRepresentation(std::string(getEntity()));
}
default:
AD_FAIL();
Expand Down
34 changes: 34 additions & 0 deletions src/engine/NamedQueryCache.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2025, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>

#include "engine/NamedQueryCache.h"

// _____________________________________________________________________________
std::shared_ptr<ValuesForTesting> NamedQueryCache ::getOperation(
const Key& key, QueryExecutionContext* ctx) const {
const auto& [table, map, sortedOn] = get(key);
// TODO<joka921> we should get rid of the copies for the IdTable (and
// probably the other members) especially for larger results).
return std::make_shared<ValuesForTesting>(ctx, table.clone(), map, sortedOn);
}

// _____________________________________________________________________________
auto NamedQueryCache::get(const Key& key) const -> const Value& {
auto l = cache_.wlock();
auto it = l->find(key);
if (it == l->end()) {
throw std::runtime_error{
absl::StrCat("The named query with the name \"", key,
"\" was not pinned to the named query cache")};
}
return it->second;
}

// _____________________________________________________________________________
void NamedQueryCache::store(const Key& key, Value value) {
(*cache_.wlock()).insert_or_assign(key, std::move(value));
}

// _____________________________________________________________________________
void NamedQueryCache::clear() { cache_.wlock()->clear(); }
46 changes: 46 additions & 0 deletions src/engine/NamedQueryCache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright 2025, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>
#pragma once

#include "engine/ValuesForTesting.h"
#include "util/Synchronized.h"

// A simple threadsafe cache that associates query results with an explicit
// name.
class NamedQueryCache {
public:
// The cache value. It stores all the information required to construct a
// proper `QueryExecutionTree` later on.
struct Value {
IdTable result_;
VariableToColumnMap varToColMap_;
std::vector<ColumnIndex> resultSortedOn_;
};
using Key = std::string;
using Cache = ad_utility::HashMap<std::string, Value>;

private:
ad_utility::Synchronized<Cache> cache_;

public:
// Store an explicit query result with a given `key`. Previously stored
// `value`s with the same `key` are overwritten.
void store(const Key& key, Value value);

// Clear the cache.
void clear();

// Retrieve the query result that is associated with the `key`.
// Throw an exception if the `key` doesn't exist.
const Value& get(const Key& key) const;

// Retrieve the query result with the given `key` and convert it into an
// explicit `ValuesForTesting` operation that can be used as part of a
// `QueryExecutionTree`.
// TODO<joka921> This can be done more efficiently if we implement a dedicated
// operation for this use case, `ValuesForTesting` currently incurs one
// (unneeded) copy per query execution.
std::shared_ptr<ValuesForTesting> getOperation(
const Key& key, QueryExecutionContext* ctx) const;
};
23 changes: 21 additions & 2 deletions src/engine/Operation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@

#include "engine/Operation.h"

#include <absl/cleanup/cleanup.h>

#include "engine/NamedQueryCache.h"
#include "engine/QueryExecutionTree.h"
#include "global/RuntimeParameters.h"
#include "util/OnDestructionDontThrowDuringStackUnwinding.h"
Expand Down Expand Up @@ -293,6 +292,12 @@ std::shared_ptr<const Result> Operation::getResult(
_executionContext->_pinResult && isRoot;
const bool pinResult =
_executionContext->_pinSubtrees || pinFinalResultButNotSubtrees;
const bool pinWithName =
_executionContext->pinWithExplicitName().has_value() && isRoot;

if (pinWithName) {
computationMode = ComputationMode::FULLY_MATERIALIZED;
}

try {
// In case of an exception, create the correct runtime info, no matter which
Expand Down Expand Up @@ -339,6 +344,20 @@ std::shared_ptr<const Result> Operation::getResult(
updateRuntimeInformationOnSuccess(result, timer.msecs());
}

if (pinWithName) {
const auto& name = _executionContext->pinWithExplicitName().value();
// The query is to be pinned in the named cache. In this case we don't
// return the result, but only pin it.
const auto& actualResult = result._resultPointer->resultTable();
AD_CORRECTNESS_CHECK(actualResult.isFullyMaterialized());
auto t = NamedQueryCache::Value(actualResult.idTable().clone(),
getExternallyVisibleVariableColumns(),
actualResult.sortedBy());
_executionContext->namedQueryCache().store(name, std::move(t));

runtimeInfo().addDetail("pinned-with-explicit-name", name);
}

return result._resultPointer->resultTablePtr();
} catch (ad_utility::CancellationException& e) {
e.setOperation(getDescriptor());
Expand Down
22 changes: 22 additions & 0 deletions src/engine/QueryExecutionContext.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2025, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>

#include "engine/QueryExecutionContext.h"

// _____________________________________________________________________________
QueryExecutionContext::QueryExecutionContext(
const Index& index, QueryResultCache* const cache,
ad_utility::AllocatorWithLimit<Id> allocator,
SortPerformanceEstimator sortPerformanceEstimator,
NamedQueryCache* namedCache,
std::function<void(std::string)> updateCallback, const bool pinSubtrees,
const bool pinResult)
: _pinSubtrees(pinSubtrees),
_pinResult(pinResult),
_index(index),
_subtreeCache(cache),
_allocator(std::move(allocator)),
_sortPerformanceEstimator(sortPerformanceEstimator),
updateCallback_(std::move(updateCallback)),
namedQueryCache_{namedCache} {}
29 changes: 20 additions & 9 deletions src/engine/QueryExecutionContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ class CacheValue {
};
};

// Forward declaration because of cyclic dependencies
class NamedQueryCache;

// The key for the `QueryResultCache` below. It consists of a `string` (the
// actual cache key of a `QueryExecutionTree` and the index of the
// `LocatedTriplesSnapshot` that was used to create the corresponding value.
Expand All @@ -89,6 +92,9 @@ struct QueryCacheKey {
using QueryResultCache = ad_utility::ConcurrentCache<
ad_utility::LRUCache<QueryCacheKey, CacheValue, CacheValue::SizeGetter>>;

// Forward declaration because of cyclic dependency
class NamedQueryCache;

// Execution context for queries.
// Holds references to index and engine, implements caching.
class QueryExecutionContext {
Expand All @@ -97,17 +103,10 @@ class QueryExecutionContext {
const Index& index, QueryResultCache* const cache,
ad_utility::AllocatorWithLimit<Id> allocator,
SortPerformanceEstimator sortPerformanceEstimator,
NamedQueryCache* namedCache,
std::function<void(std::string)> updateCallback =
[](std::string) { /* No-op by default for testing */ },
const bool pinSubtrees = false, const bool pinResult = false)
: _pinSubtrees(pinSubtrees),
_pinResult(pinResult),
_index(index),
_subtreeCache(cache),
_allocator(std::move(allocator)),
_costFactors(),
_sortPerformanceEstimator(sortPerformanceEstimator),
updateCallback_(std::move(updateCallback)) {}
bool pinSubtrees = false, bool pinResult = false);

QueryResultCache& getQueryTreeCache() { return *_subtreeCache; }

Expand Down Expand Up @@ -151,6 +150,14 @@ class QueryExecutionContext {
return areWebsocketUpdatesEnabled_;
}

NamedQueryCache& namedQueryCache() {
AD_CORRECTNESS_CHECK(namedQueryCache_ != nullptr);
return *namedQueryCache_;
}

auto& pinWithExplicitName() { return pinWithExplicitName_; }
const auto& pinWithExplicitName() const { return pinWithExplicitName_; }

private:
const Index& _index;

Expand All @@ -170,4 +177,8 @@ class QueryExecutionContext {
// mutex.
bool areWebsocketUpdatesEnabled_ =
RuntimeParameters().get<"websocket-updates-enabled">();

NamedQueryCache* namedQueryCache_ = nullptr;

std::optional<std::string> pinWithExplicitName_ = std::nullopt;
};
12 changes: 12 additions & 0 deletions src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <type_traits>
#include <variant>

#include "NamedQueryCache.h"
#include "backports/algorithm.h"
#include "engine/Bind.h"
#include "engine/CartesianProductJoin.h"
Expand Down Expand Up @@ -2443,6 +2444,8 @@ void QueryPlanner::GraphPatternPlanner::graphPatternOperationVisitor(Arg& arg) {
visitDescribe(arg);
} else if constexpr (std::is_same_v<T, p::SpatialQuery>) {
visitSpatialSearch(arg);
} else if constexpr (std::is_same_v<T, p::NamedCachedQuery>) {
visitNamedCachedQuery(arg);
} else {
static_assert(std::is_same_v<T, p::BasicGraphPattern>);
visitBasicGraphPattern(arg);
Expand Down Expand Up @@ -2616,6 +2619,15 @@ void QueryPlanner::GraphPatternPlanner::visitSpatialSearch(
visitGroupOptionalOrMinus(std::move(candidatesOut));
}

// _____________________________________________________________________________
void QueryPlanner::GraphPatternPlanner::visitNamedCachedQuery(
parsedQuery::NamedCachedQuery& arg) {
auto candidate = SubtreePlan{
planner_._qec, planner_._qec->namedQueryCache().getOperation(
arg.validateAndGetIdentifier(), planner_._qec)};
visitGroupOptionalOrMinus(std::vector{std::move(candidate)});
}

// _______________________________________________________________
void QueryPlanner::GraphPatternPlanner::visitUnion(parsedQuery::Union& arg) {
// TODO<joka921> here we could keep all the candidates, and create a
Expand Down
1 change: 1 addition & 0 deletions src/engine/QueryPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ class QueryPlanner {
void visitTransitivePath(parsedQuery::TransPath& transitivePath);
void visitPathSearch(parsedQuery::PathQuery& config);
void visitSpatialSearch(parsedQuery::SpatialQuery& config);
void visitNamedCachedQuery(parsedQuery::NamedCachedQuery& config);
void visitUnion(parsedQuery::Union& un);
void visitSubquery(parsedQuery::Subquery& subquery);
void visitDescribe(parsedQuery::Describe& describe);
Expand Down
Loading
Loading