Skip to content

Commit

Permalink
also report index disk usage and number of segments
Browse files Browse the repository at this point in the history
  • Loading branch information
mikemccand committed Sep 9, 2024
1 parent ebefd8c commit 291641c
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 3 deletions.
18 changes: 17 additions & 1 deletion src/main/knn/KnnGraphTester.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,12 @@
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.StandardDirectoryReader;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
Expand Down Expand Up @@ -121,6 +123,8 @@ public class KnnGraphTester {
private boolean reindex;
private boolean forceMerge;
private int reindexTimeMsec;
private int indexNumSegments;
private double indexSizeOnDiskMB;
private int beamWidth;
private int maxConn;
private boolean quantize;
Expand Down Expand Up @@ -364,6 +368,16 @@ private void run(String... args) throws Exception {
if (forceMerge) {
forceMerge();
}
try (Directory dir = FSDirectory.open(indexPath); IndexReader reader = DirectoryReader.open(dir)) {
indexNumSegments = reader.leaves().size();
System.out.println("index has " + indexNumSegments + " segments");
long indexSizeOnDiskBytes = 0;
for(String fileName : ((StandardDirectoryReader) reader).getSegmentInfos().files(true)) {
indexSizeOnDiskBytes += dir.fileLength(fileName);
}
indexSizeOnDiskMB = indexSizeOnDiskBytes / 1024. / 1024.;
System.out.println(String.format(Locale.ROOT, "index disk uage is %.2f MB", indexSizeOnDiskMB));
}
if (operation != null) {
switch (operation) {
case "-search":
Expand Down Expand Up @@ -646,7 +660,7 @@ private void testSearch(Path indexPath, Path queryPath, Path outputPath, int[][]
}
System.out.printf(
Locale.ROOT,
"SUMMARY: %5.3f\t%5.2f\t%d\t%d\t%d\t%d\t%d\t%s\t%d\t%d\t%.2f\t%s\n",
"SUMMARY: %5.3f\t%5.3f\t%d\t%d\t%d\t%d\t%d\t%s\t%d\t%d\t%d\t%.2f\t%.2f\t%s\n",
recall,
totalCpuTimeMS / (float) numIters,
numDocs,
Expand All @@ -657,6 +671,8 @@ private void testSearch(Path indexPath, Path queryPath, Path outputPath, int[][]
quantizeDesc,
totalVisited,
reindexTimeMsec,
indexNumSegments,
indexSizeOnDiskMB,
selectivity,
prefilter ? "pre-filter" : "post-filter");
}
Expand Down
6 changes: 5 additions & 1 deletion src/main/knn/VectorReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,12 @@ void reset() throws IOException {
protected final void readNext() throws IOException {
int bytesRead = this.input.read(bytes);
if (bytesRead < bytes.capacity()) {
// wrap around back to the start of the file if we hit the end:
this.input.position(0);
this.input.read(bytes);
bytesRead = this.input.read(bytes);
if (bytesRead < bytes.capacity()) {
throw new IllegalStateException("vector file " + input + " doesn't even have enough bytes for a single vector? got bytesRead=" + bytesRead);
}
}
bytes.position(0);
}
Expand Down
2 changes: 1 addition & 1 deletion src/python/knnPerfTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def run_knn_benchmark(checkout, values):
all_results.append(summary)
print('\nResults:')

header = 'recall\tlatency (ms)\tnDoc\ttopK\tfanout\tmaxConn\tbeamWidth\tquantized\tvisited\tindex ms\tselectivity\tfilterType'
header = 'recall\tlatency (ms)\tnDoc\ttopK\tfanout\tmaxConn\tbeamWidth\tquantized\tvisited\tindex ms\tnum segments\tindex size (MB)\tselectivity\tfilterType'

# crazy logic to make everything fixed width so rendering in fixed width font "aligns":
headers = header.split('\t')
Expand Down

0 comments on commit 291641c

Please sign in to comment.