Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Have indexing by record scan by key ranges instead of by tuple ranges #2655

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import com.apple.foundationdb.record.IndexScanType;
import com.apple.foundationdb.record.IndexState;
import com.apple.foundationdb.record.IsolationLevel;
import com.apple.foundationdb.record.KeyRange;
import com.apple.foundationdb.record.MutableRecordStoreState;
import com.apple.foundationdb.record.PipelineOperation;
import com.apple.foundationdb.record.PlanHashable;
Expand Down Expand Up @@ -136,6 +137,7 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.Supplier;
Expand Down Expand Up @@ -1192,32 +1194,56 @@ public RecordCursor<FDBStoredRecord<Message>> scanRecords(@Nullable final Tuple
}

@Nonnull
@SuppressWarnings("PMD.CloseResource")
public <M extends Message> RecordCursor<FDBStoredRecord<M>> scanTypedRecords(@Nonnull RecordSerializer<M> typedSerializer,
@Nullable final Tuple low, @Nullable final Tuple high,
@Nonnull final EndpointType lowEndpoint, @Nonnull final EndpointType highEndpoint,
@Nullable byte[] continuation,
@Nonnull ScanProperties scanProperties) {
return scanTypedRecordsInternal(typedSerializer,
builder -> builder.setLow(low, lowEndpoint).setHigh(high, highEndpoint),
continuation, scanProperties);
}

@Nonnull
@Override
public RecordCursor<FDBStoredRecord<Message>> scanRecordsKeyRange(@Nonnull final KeyRange range, @Nullable final byte[] continuation, @Nonnull final ScanProperties scanProperties) {
return scanTypedRecordsKeyRange(serializer, range, continuation, scanProperties);
}

@Nonnull
public <M extends Message> RecordCursor<FDBStoredRecord<M>> scanTypedRecordsKeyRange(@Nonnull RecordSerializer<M> typedSerializer,
@Nonnull final KeyRange range,
@Nullable byte[] continuation,
@Nonnull ScanProperties scanProperties) {
return scanTypedRecordsInternal(typedSerializer,
builder -> builder.setRange(range),
continuation, scanProperties);
}

@Nonnull
@SuppressWarnings("PMD.CloseResource")
private <M extends Message> RecordCursor<FDBStoredRecord<M>> scanTypedRecordsInternal(@Nonnull RecordSerializer<M> typedSerializer,
@Nonnull Consumer<KeyValueCursor.Builder> setRange,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor: I was trying to think if there was a better way to structure this than a Consumer. The answer may be "no". The best alternatives I could think of would be to either have this take a KeyValueCursor.Builder than callers would pre-fill with the range already set, or something more exotic like adding a .setRange(KeyValueCursor.Builder) to the TupleRange and KeyValueRange abstractions, or making TupleRange and KeyValueRange implement some common interface that could set the low and high endpoints on a KeyValueCursor.Builder.

@Nullable byte[] continuation,
@Nonnull ScanProperties scanProperties) {
final RecordMetaData metaData = metaDataProvider.getRecordMetaData();
final Subspace recordsSubspace = recordsSubspace();
final SplitHelper.SizeInfo sizeInfo = new SplitHelper.SizeInfo();
final RecordCursor<FDBRawRecord> rawRecords;
if (metaData.isSplitLongRecords()) {
RecordCursor<KeyValue> keyValues = KeyValueCursor.Builder.withSubspace(recordsSubspace)
KeyValueCursor.Builder keyValuesBuilder = KeyValueCursor.Builder.withSubspace(recordsSubspace)
.setContext(context).setContinuation(continuation)
.setLow(low, lowEndpoint)
.setHigh(high, highEndpoint)
.setScanProperties(scanProperties.with(ExecuteProperties::clearRowAndTimeLimits).with(ExecuteProperties::clearState))
.build();
.setScanProperties(scanProperties.with(ExecuteProperties::clearRowAndTimeLimits).with(ExecuteProperties::clearState));
setRange.accept(keyValuesBuilder);
RecordCursor<KeyValue> keyValues = keyValuesBuilder.build();
rawRecords = new SplitHelper.KeyValueUnsplitter(context, recordsSubspace, keyValues, useOldVersionFormat(), sizeInfo, scanProperties.isReverse(),
new CursorLimitManager(context, scanProperties.with(ExecuteProperties::clearReturnedRowLimit)))
.skip(scanProperties.getExecuteProperties().getSkip())
.limitRowsTo(scanProperties.getExecuteProperties().getReturnedRowLimit());
} else {
KeyValueCursor.Builder keyValuesBuilder = KeyValueCursor.Builder.withSubspace(recordsSubspace)
.setContext(context).setContinuation(continuation)
.setLow(low, lowEndpoint)
.setHigh(high, highEndpoint);
.setContext(context).setContinuation(continuation);
setRange.accept(keyValuesBuilder);
if (omitUnsplitRecordSuffix) {
rawRecords = keyValuesBuilder.setScanProperties(scanProperties).build().map(kv -> {
sizeInfo.set(kv);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import com.apple.foundationdb.record.IndexScanType;
import com.apple.foundationdb.record.IndexState;
import com.apple.foundationdb.record.IsolationLevel;
import com.apple.foundationdb.record.KeyRange;
import com.apple.foundationdb.record.PipelineOperation;
import com.apple.foundationdb.record.RecordCoreArgumentException;
import com.apple.foundationdb.record.RecordCoreException;
Expand Down Expand Up @@ -866,6 +867,20 @@ RecordCursor<FDBStoredRecord<M>> scanRecords(@Nullable Tuple low, @Nullable Tupl
@Nullable byte[] continuation,
@Nonnull ScanProperties scanProperties);

/**
* Scan the records in the database in a key range.
*
* @param range key range
* @param continuation any continuation from a previous scan
* @param scanProperties skip, limit and other scan properties
*
* @return a cursor that will scan everything in the range, picking up at continuation, and honoring the given scan properties
*/
@Nonnull
RecordCursor<FDBStoredRecord<M>> scanRecordsKeyRange(@Nonnull KeyRange range,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think this API should be INTERNAL? Most of the use cases I can think of are probably internal, though there may be some use case for APIs that returns a KeyRange that the user can hand back, or something.

@Nullable byte[] continuation,
@Nonnull ScanProperties scanProperties);

/**
* Count the number of records in the database in a range.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import com.apple.foundationdb.record.ExecuteState;
import com.apple.foundationdb.record.IndexEntry;
import com.apple.foundationdb.record.IsolationLevel;
import com.apple.foundationdb.record.KeyRange;
import com.apple.foundationdb.record.RecordCoreException;
import com.apple.foundationdb.record.RecordCursor;
import com.apple.foundationdb.record.RecordIndexUniquenessViolation;
Expand Down Expand Up @@ -183,6 +184,12 @@ public RecordCursor<FDBStoredRecord<M>> scanRecords(@Nullable Tuple low, @Nullab
return untypedStore.scanTypedRecords(typedSerializer, low, high, lowEndpoint, highEndpoint, continuation, scanProperties);
}

@Nonnull
@Override
public RecordCursor<FDBStoredRecord<M>> scanRecordsKeyRange(@Nonnull final KeyRange range, @Nullable final byte[] continuation, @Nonnull final ScanProperties scanProperties) {
return untypedStore.scanTypedRecordsKeyRange(typedSerializer, range, continuation, scanProperties);
}

@Nonnull
@Override
public CompletableFuture<Integer> countRecords(@Nullable Tuple low, @Nullable Tuple high, @Nonnull EndpointType lowEndpoint, @Nonnull EndpointType highEndpoint, @Nullable byte[] continuation, @Nonnull ScanProperties scanProperties) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
import com.apple.foundationdb.Range;
import com.apple.foundationdb.annotation.API;
import com.apple.foundationdb.async.AsyncUtil;
import com.apple.foundationdb.async.RangeSet;
import com.apple.foundationdb.record.ExecuteProperties;
import com.apple.foundationdb.record.IndexBuildProto;
import com.apple.foundationdb.record.IsolationLevel;
import com.apple.foundationdb.record.KeyRange;
import com.apple.foundationdb.record.RecordCursor;
import com.apple.foundationdb.record.RecordCursorResult;
import com.apple.foundationdb.record.ScanProperties;
Expand Down Expand Up @@ -125,9 +125,7 @@ private CompletableFuture<Void> buildMultiTargetIndex(@Nonnull SubspaceProvider
} else {
final Range range = tupleRange.toRange();
rangeStart = range.begin;
// tupleRange has an inclusive high endpoint, so end isn't a valid tuple.
// But buildRangeOnly needs to convert missing Ranges back to TupleRanges, so round up.
rangeEnd = ByteArrayUtil.strinc(range.end);
rangeEnd = range.end;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Setting up this particular test may be a pain, so it may not be worth it, but I think if we wanted to validate that this fixed anything, we could add a test that used non-integer record type keys. Or a test that used a record type key with what would have been problematic value for its record type key, like 255

}

final CompletableFuture<FDBRecordStore> maybePresetRangeFuture =
Expand Down Expand Up @@ -172,12 +170,13 @@ private CompletableFuture<Boolean> buildRangeOnly(@Nonnull FDBRecordStore store,
if (range == null) {
return AsyncUtil.READY_FALSE; // no more missing ranges - all done
}
final Tuple rangeStart = RangeSet.isFirstKey(range.begin) ? null : Tuple.fromBytes(range.begin);
final Tuple rangeEnd = RangeSet.isFinalKey(range.end) ? null : Tuple.fromBytes(range.end);
final TupleRange tupleRange = TupleRange.between(rangeStart, rangeEnd);
final byte[] keyPrefix = store.recordsSubspace().pack();
final byte[] rangeStart = ByteArrayUtil.join(keyPrefix, range.begin);
final byte[] rangeEnd = ByteArrayUtil.join(keyPrefix, range.end);
final KeyRange keyRange = new KeyRange(rangeStart, rangeEnd);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to wrap my head around the KeyRange. Looking at TupleRange::toRange (which is still used to translate a KeyRange into an FDB Range), a KeyRange where the low endpoint is RANGE_INCLUSIVE and the end endpoint is always RANGE_EXCLUSIVE will always behave just like a Range over those same two keys. But it also behaves just like a TupleRange with packed elements, so for instance, something like new KeyRange(bytes, EndpointType.RANGE_INCLUSIVE, bytes, EndpointType.RANGE_INCLUSIVE) represents all keys "strictly" prefixed by the range (that is, excluding keys prefixed by bytes + '\xff').

That feels fair enough.


RecordCursor<FDBStoredRecord<Message>> cursor =
store.scanRecords(tupleRange, null, scanProperties);
store.scanRecordsKeyRange(keyRange, null, scanProperties);

final AtomicReference<RecordCursorResult<FDBStoredRecord<Message>>> lastResult = new AtomicReference<>(RecordCursorResult.exhausted());
final AtomicBoolean hasMore = new AtomicBoolean(true);
Expand All @@ -189,20 +188,20 @@ private CompletableFuture<Boolean> buildRangeOnly(@Nonnull FDBRecordStore store,
this::getRecordIfTypeMatch,
lastResult, hasMore, recordsScanned, isIdempotent)
.thenCompose(ignore -> postIterateRangeOnly(targetRangeSets, hasMore.get(), lastResult,
rangeStart, rangeEnd, scanProperties.isReverse()));
range.begin, range.end, scanProperties.isReverse()));
});
}

private CompletableFuture<Boolean> postIterateRangeOnly(List<IndexingRangeSet> targetRangeSets, boolean hasMore,
AtomicReference<RecordCursorResult<FDBStoredRecord<Message>>> lastResult,
Tuple rangeStart, Tuple rangeEnd, boolean isReverse) {
byte[] rangeStart, byte[] rangeEnd, boolean isReverse) {
if (isReverse) {
Tuple continuation = hasMore ? lastResult.get().get().getPrimaryKey() : rangeStart;
return insertRanges(targetRangeSets, packOrNull(continuation), packOrNull(rangeEnd))
byte[] continuation = hasMore ? packOrNull(lastResult.get().get().getPrimaryKey()) : rangeStart;
return insertRanges(targetRangeSets, continuation, rangeEnd)
.thenApply(ignore -> hasMore || rangeStart != null);
} else {
Tuple continuation = hasMore ? lastResult.get().get().getPrimaryKey() : rangeEnd;
return insertRanges(targetRangeSets, packOrNull(rangeStart), packOrNull(continuation))
byte[] continuation = hasMore ? packOrNull(lastResult.get().get().getPrimaryKey()) : rangeEnd;
return insertRanges(targetRangeSets, rangeStart, continuation)
.thenApply(ignore -> hasMore || rangeEnd != null);
}
}
Expand Down