Skip to content

Commit

Permalink
Resolve FoundationDB#3008: Support Lucene index scrubbing
Browse files Browse the repository at this point in the history
  To validate Lucene index validity, support "Report Only" scrubbing for:
     Dangling Lucene index entries: Iterate "all entries" (similar toLuceneScanAllEntriesTest), validate that all pointers lead to existing records.
     Missing Lucene index entries: iterate all records, validate that their primary keys are represented in the “primary key to Lucene segment” map, and that the Lucene segment exists
  • Loading branch information
jjezra committed Dec 18, 2024
1 parent 35707ab commit 0769dbc
Show file tree
Hide file tree
Showing 4 changed files with 280 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/ReleaseNotes.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ The Apache Commons library has been removed as a dependency. There were a few lo
* **Feature** Feature 3 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
* **Feature** Feature 4 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
* **Feature** Associate index scrubbing with index maintainers [(Issue #2984)](https://github.com/FoundationDB/fdb-record-layer/issues/2984)
* **Feature** Support Lucene index scrubbing [(Issue #3008)](https://github.com/FoundationDB/fdb-record-layer/issues/3008)
* **Breaking change** Change 1 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
* **Breaking change** Change 2 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
* **Breaking change** Change 3 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@
import com.apple.foundationdb.record.provider.foundationdb.IndexOperation;
import com.apple.foundationdb.record.provider.foundationdb.IndexOperationResult;
import com.apple.foundationdb.record.provider.foundationdb.IndexScanBounds;
import com.apple.foundationdb.record.provider.foundationdb.IndexScrubbingTools;
import com.apple.foundationdb.record.provider.foundationdb.indexes.InvalidIndexEntry;
import com.apple.foundationdb.record.provider.foundationdb.indexes.StandardIndexMaintainer;
import com.apple.foundationdb.record.provider.foundationdb.indexes.ValueIndexScrubbingToolsMissing;
import com.apple.foundationdb.record.query.QueryToKeyMatcher;
import com.apple.foundationdb.tuple.Tuple;
import com.google.common.annotations.VisibleForTesting;
Expand Down Expand Up @@ -750,4 +752,17 @@ private void logSerializationError(String format, Object ... arguments) {
}
}
}

@Nullable
@Override
public IndexScrubbingTools<?> getIndexScrubbingTools(final IndexScrubbingTools.ScrubbingType type) {
switch (type) {
case MISSING:
return new LuceneIndexScrubbingToolsMissing(partitioner, state);
case DANGLING:
return new LuceneIndexScrubbingToolsDangling(partitioner, state);
default:
return null;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* LuceneIndexScrubbingToolsDangling.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2015-2024 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.apple.foundationdb.record.lucene;

import com.apple.foundationdb.record.EvaluationContext;
import com.apple.foundationdb.record.ExecuteProperties;
import com.apple.foundationdb.record.IndexEntry;
import com.apple.foundationdb.record.IsolationLevel;
import com.apple.foundationdb.record.RecordCursor;
import com.apple.foundationdb.record.RecordCursorResult;
import com.apple.foundationdb.record.ScanProperties;
import com.apple.foundationdb.record.TupleRange;
import com.apple.foundationdb.record.cursors.AutoContinuingCursor;
import com.apple.foundationdb.record.logging.KeyValueLogMessage;
import com.apple.foundationdb.record.logging.LogMessageKeys;
import com.apple.foundationdb.record.metadata.Index;
import com.apple.foundationdb.record.metadata.RecordType;
import com.apple.foundationdb.record.provider.foundationdb.FDBDatabaseRunner;
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore;
import com.apple.foundationdb.record.provider.foundationdb.FDBStoreTimer;
import com.apple.foundationdb.record.provider.foundationdb.IndexMaintainerState;
import com.apple.foundationdb.record.provider.foundationdb.IndexOrphanBehavior;
import com.apple.foundationdb.record.provider.foundationdb.IndexScrubbingTools;
import com.apple.foundationdb.record.query.plan.ScanComparisons;
import com.apple.foundationdb.tuple.Tuple;

import javax.annotation.Nonnull;
import java.util.Collection;
import java.util.concurrent.CompletableFuture;

/**
* Index Scrubbing Toolbox for a Lucene index maintainer. Scrub dangling value index entries - i.e. index entries
* pointing to non-existing record(s)
*/
public class LuceneIndexScrubbingToolsDangling implements IndexScrubbingTools<IndexEntry> {
Index index = null;
boolean isSynthetic = false;
@Nonnull
private final LucenePartitioner partitioner; // non-mutable
@Nonnull
private final IndexMaintainerState state;

public LuceneIndexScrubbingToolsDangling(@Nonnull final LucenePartitioner partitioner, @Nonnull final IndexMaintainerState state) {
this.partitioner = partitioner;
this.state = state;
}

@Override
public void presetCommonParams(final Index index, final boolean allowRepair, final boolean isSynthetic, final Collection<RecordType> types) {
this.index = index;
this.isSynthetic = isSynthetic;
}

@Override
public RecordCursor<IndexEntry> getCursor(final TupleRange range, final FDBRecordStore store, final int limit) {
// TODO: Range tuple should begin with a null or [groupingKey, timestamp]
FDBDatabaseRunner runner = state.context.newRunner();
final ScanProperties scanProperties = new ScanProperties(ExecuteProperties.newBuilder()
.setIsolationLevel(IsolationLevel.SERIALIZABLE)
.setReturnedRowLimit(limit)
.build());
// TODO: start from continuation
LuceneQueryClause search = LuceneQuerySearchClause.MATCH_ALL_DOCS_QUERY;
LuceneScanParameters scan = new LuceneScanQueryParameters(
ScanComparisons.EMPTY,
search,
null, null, null,
null);
// See paritiiionManager
return new AutoContinuingCursor<>(
runner,
(context, continuation) -> {
LuceneScanBounds scanBounds = scan.bind(store, index, EvaluationContext.EMPTY);
return store.scanIndex(index, scanBounds, continuation, scanProperties);
});
}

@Override
public Tuple getKeyFromCursorResult(final RecordCursorResult<IndexEntry> result) {
final IndexEntry indexEntry = result.get();
return indexEntry == null ? null : indexEntry.getKey();
// Todo: return tuple that contains groupId, timestmap

}

@Override
public CompletableFuture<Issue> handleOneItem(final FDBRecordStore store, final RecordCursorResult<IndexEntry> result) {
if (index == null) {
throw new IllegalStateException("presetParams was not called appropriately for this scrubbing tool");
}

final IndexEntry indexEntry = result.get();
if (indexEntry == null) {
return CompletableFuture.completedFuture(null);
}

if (isSynthetic) {
return store.loadSyntheticRecord(indexEntry.getPrimaryKey()).thenApply(syntheticRecord -> {
if (syntheticRecord.getConstituents().isEmpty()) {
// None of the constituents of this synthetic type are present, so it must be dangling
return scrubDanglingEntry(indexEntry);
}
return null;
});
} else {
return store.loadIndexEntryRecord(indexEntry, IndexOrphanBehavior.RETURN).thenApply(indexedRecord -> {
if (!indexedRecord.hasStoredRecord()) {
// Here: Oh, No! this index is dangling!
return scrubDanglingEntry(indexEntry);
}
return null;
});
}
}

private Issue scrubDanglingEntry(@Nonnull IndexEntry indexEntry) {
// Here: the index entry is dangling. Fix it (if allowed) and report the issue.
final Tuple valueKey = indexEntry.getKey();
return new Issue(
KeyValueLogMessage.build("Scrubber: dangling index entry",
LogMessageKeys.KEY, valueKey,
LogMessageKeys.PRIMARY_KEY, indexEntry.getPrimaryKey()),
FDBStoreTimer.Counts.INDEX_SCRUBBER_DANGLING_ENTRIES,
null);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* LuceneIndexScrubbingToolsMissing.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2015-2024 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.apple.foundationdb.record.lucene;

import com.apple.foundationdb.record.ExecuteProperties;
import com.apple.foundationdb.record.IsolationLevel;
import com.apple.foundationdb.record.RecordCursor;
import com.apple.foundationdb.record.RecordCursorResult;
import com.apple.foundationdb.record.ScanProperties;
import com.apple.foundationdb.record.TupleRange;
import com.apple.foundationdb.record.logging.KeyValueLogMessage;
import com.apple.foundationdb.record.logging.LogMessageKeys;
import com.apple.foundationdb.record.metadata.Index;
import com.apple.foundationdb.record.metadata.RecordType;
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore;
import com.apple.foundationdb.record.provider.foundationdb.FDBStoreTimer;
import com.apple.foundationdb.record.provider.foundationdb.FDBStoredRecord;
import com.apple.foundationdb.record.provider.foundationdb.IndexMaintainerState;
import com.apple.foundationdb.record.provider.foundationdb.IndexScrubbingTools;
import com.apple.foundationdb.tuple.Tuple;
import com.google.protobuf.Message;

import javax.annotation.Nonnull;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.CompletableFuture;

public class LuceneIndexScrubbingToolsMissing implements IndexScrubbingTools<FDBStoredRecord<Message>> {
private Collection<RecordType> recordTypes = null;
private Index index;
private boolean isSynthetic;

@Nonnull
private final LucenePartitioner partitioner; // non-mutable
@Nonnull
private final IndexMaintainerState state;

public LuceneIndexScrubbingToolsMissing(@Nonnull final LucenePartitioner partitioner, @Nonnull final IndexMaintainerState state) {
this.partitioner = partitioner;
this.state = state;
}


@Override
public void presetCommonParams(Index index, boolean allowRepair, boolean isSynthetic, Collection<RecordType> types) {
this.recordTypes = types;
this.index = index;
this.isSynthetic = isSynthetic;
}

@Override
public RecordCursor<FDBStoredRecord<Message>> getCursor(final TupleRange range, final FDBRecordStore store, final int limit) {
final IsolationLevel isolationLevel = IsolationLevel.SNAPSHOT;
final ExecuteProperties.Builder executeProperties = ExecuteProperties.newBuilder()
.setIsolationLevel(isolationLevel)
.setReturnedRowLimit(limit);

final ScanProperties scanProperties = new ScanProperties(executeProperties.build(), false);
return store.scanRecords(range, null, scanProperties);
}

@Override
public Tuple getKeyFromCursorResult(final RecordCursorResult<FDBStoredRecord<Message>> result) {
final FDBStoredRecord<Message> storedRecord = result.get();
return storedRecord == null ? null : storedRecord.getPrimaryKey();
}

@Override
public CompletableFuture<Issue> handleOneItem(final FDBRecordStore store, final RecordCursorResult<FDBStoredRecord<Message>> result) {
if (recordTypes == null || index == null) {
throw new IllegalStateException("presetParams was not called appropriately for this scrubbing tool");
}

final FDBStoredRecord<Message> rec = result.get();
if (rec == null || !recordTypes.contains(rec.getRecordType())) {
return CompletableFuture.completedFuture(null);
}

return getMissingIndexKeys(store, rec)
.thenApply(missingIndexesKeys -> {
if (missingIndexesKeys.isEmpty()) {
return null;
}
// Here: Oh, No! the index is missing!!
// (Maybe) report an error and (maybe) return this record to be index
return new Issue(
KeyValueLogMessage.build("Scrubber: missing index entry",
LogMessageKeys.KEY, rec.getPrimaryKey().toString(),
LogMessageKeys.INDEX_KEY, missingIndexesKeys.toString()),
FDBStoreTimer.Counts.INDEX_SCRUBBER_MISSING_ENTRIES,
null);
});
}

private CompletableFuture<List<Tuple>> getMissingIndexKeys(FDBRecordStore store, FDBStoredRecord<Message> rec) {
// follow the logic of LuceneIndexMaintainer::tryDelete



return CompletableFuture.completedFuture(null);
}
}

0 comments on commit 0769dbc

Please sign in to comment.