Skip to content

Commit

Permalink
to: basically done embedding context provider logic, but with a littl…
Browse files Browse the repository at this point in the history
…e bug
  • Loading branch information
Sma1lboy committed Jan 12, 2025
1 parent eebffc6 commit 4a1f7cb
Show file tree
Hide file tree
Showing 7 changed files with 16,461 additions and 11,720 deletions.
6 changes: 5 additions & 1 deletion backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,26 @@
"@types/bcrypt": "^5.0.2",
"@types/fs-extra": "^11.0.4",
"@types/normalize-path": "^3.0.2",
"@types/pacote": "^11.1.8",
"@types/toposort": "^2.0.7",
"toposort": "^2.0.2",
"axios": "^1.7.7",
"bcrypt": "^5.1.1",
"class-validator": "^0.14.1",
"fastembed": "^1.14.1",
"fs-extra": "^11.2.0",
"graphql": "^16.9.0",
"graphql-subscriptions": "^2.0.0",
"graphql-ws": "^5.16.0",
"lodash": "^4.17.21",
"markdown-to-txt": "^2.0.1",
"normalize-path": "^3.0.0",
"pacote": "^21.0.0",
"reflect-metadata": "^0.2.2",
"rxjs": "^7.8.1",
"sqlite3": "^5.1.7",
"subscriptions-transport-ws": "^0.11.0",
"tar": "^7.4.3",
"toposort": "^2.0.2",
"typeorm": "^0.3.20",
"uuid": "^10.0.0"
},
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { Logger } from '@nestjs/common';
import DependenciesEmbeddingHandler from '../dependencies-embedding-handler';

// Initialize a global logger instance
const logger = new Logger('dependencies embed tester');

// Only run integration tests if INTEGRATION_TEST environment variable is set to '1'
const isIntegrationTest = process.env.INTEGRATION_TEST === '1';

if (!isIntegrationTest) {
logger.log(
'Integration tests are skipped. Set INTEGRATION_TEST=1 to run them.',
);
} else {
describe('DependenciesEmbeddingHandler Integration Tests', () => {
let handler: DependenciesEmbeddingHandler;

// Increase the default timeout for integration tests
jest.setTimeout(300000); // 5 minutes

beforeAll(async () => {
logger.log(
'Initializing DependenciesEmbeddingHandler for integration tests...',
);
handler = new DependenciesEmbeddingHandler();
// Wait for the handler to initialize
await new Promise((resolve) => setTimeout(resolve, 5000)); // Wait 5 seconds
logger.log('Initialization complete.');
});

afterAll(() => {
logger.log('Integration tests completed.');
});

/**
* Integration Test Case: Add Real Packages and Perform a Search
*
* Purpose:
* - To verify that DependenciesEmbeddingHandler can handle real packages by fetching their type definitions,
* generating embeddings, and storing them correctly.
* - To ensure that the search functionality can retrieve relevant packages based on a real query.
*
* Steps:
* 1. Add multiple real npm packages using the addPackage method.
* 2. Perform a search with a query related to one of the added packages.
* 3. Validate that the search results include the relevant package(s) and are correctly ranked.
*/
test('should add real packages and perform a relevant search', async () => {
// Define real packages to add
const packagesToAdd = [
{ name: 'lodash', version: '4.17.21' },
// { name: 'express', version: '4.18.2' },
// { name: 'react', version: '18.2.0' },
// { name: 'typescript', version: '4.9.5' },
];

logger.log('Adding real packages...');

// Add all packages concurrently
await handler.addPackages(packagesToAdd);

logger.log('Packages added successfully.');

// Define a search query related to one of the packages, e.g., React
const searchQuery = 'React component lifecycle methods';

logger.log('Executing search with query:', searchQuery);

// Perform the search
const results = await handler.searchContext(searchQuery);

logger.log('Search results received.');

// Validate that results are returned
expect(results.length).toBeGreaterThan(0);

// Check that at least one of the top results is related to 'react'
const topResult = results[0];
expect(topResult.name).toBe('react');
expect(topResult.version).toBe('18.2.0');

logger.log('Top search result:', topResult);

// Optionally, you can print more details or perform additional assertions
});
});
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import axios from 'axios';
import DependenciesEmbeddingHandler from '../dependencies-embedding-handler';
import { Logger } from '@nestjs/common';

// Initialize a global logger instance
const logger = new Logger('dependencies embed tester');

// Mock axios to control HTTP requests during tests
jest.mock('axios');
const mockedAxios = axios as jest.Mocked<typeof axios>;

// Mock fastembed to control embedding behavior during tests
jest.mock('fastembed', () => ({
EmbeddingModel: {
BGEBaseEN: 'BGEBaseEN',
},
FlagEmbedding: {
init: jest.fn().mockResolvedValue({
passageEmbed: jest.fn(async function* (
types: string[],
batchSize: number,

Check warning on line 21 in backend/src/build-system/__tests__/dependencies-embedding-handler.spec.ts

View workflow job for this annotation

GitHub Actions / autofix

'batchSize' is defined but never used. Allowed unused args must match /^_/u
) {
for (const type of types) {
// Yield simulated embedding data as Float32Array
yield [new Float32Array([1, 2, 3])];
}
}),
queryEmbed: jest.fn(async (query: string) => [1, 2, 3]),
}),
},
}));

describe('DependenciesEmbeddingHandler', () => {
let handler: DependenciesEmbeddingHandler;

beforeEach(() => {
// Initialize a new instance of DependenciesEmbeddingHandler before each test
handler = new DependenciesEmbeddingHandler();
// Clear all mock calls and instances before each test
jest.clearAllMocks();
});

/**
* Test Case: Successfully add a package with built-in type definitions
*
* Purpose:
* - To verify that DependenciesEmbeddingHandler can correctly add a package that includes built-in type definitions.
* - To ensure that the handler retrieves the package's package.json, extracts the type definitions, and generates embeddings.
*
* Steps:
* 1. Mock axios.get to return a package.json containing the 'types' field.
* 2. Mock axios.get to return the content of the type definitions file.
* 3. Call the addPackage method to add the package.
* 4. Verify that the package information is correctly stored, including the generated embedding.
*/
test('should successfully add a package with built-in types', async () => {
// Mock the response for fetching package.json, including the 'types' field
mockedAxios.get.mockImplementationOnce(() =>
Promise.resolve({
data: {
name: 'test-package',
version: '1.0.0',
types: 'dist/index.d.ts',
},
}),
);

// Mock the response for fetching the type definitions file
mockedAxios.get.mockImplementationOnce(() =>
Promise.resolve({
data: `
interface TestInterface {
prop1: string;
prop2: number;
}
type TestType = {
field1: string;
field2: boolean;
};
`,
}),
);

// Add the package using the handler
await handler.addPackage('test-package', '1.0.0');

// Retrieve the added package information
const packageInfo = handler.getPackageInfo('test-package');

// Assertions to ensure the package was added correctly
expect(packageInfo).toBeDefined();
expect(packageInfo?.name).toBe('test-package');
expect(packageInfo?.version).toBe('1.0.0');
expect(packageInfo?.embedding).toBeDefined();
});

/**
* Test Case: Successfully search for relevant type definitions
*
* Purpose:
* - To verify that DependenciesEmbeddingHandler can generate query embeddings from a search string and return the most relevant packages.
* - To ensure that similarity calculations are accurate and results are correctly sorted based on similarity.
*
* Why the Search Returns Relevant Results:
* - The `FlagEmbedding` mock is set up to return identical embeddings for both package types and the query.
* - This setup ensures that the cosine similarity between the query embedding and each package's embedding is maximized for relevant packages.
* - As a result, the search function can accurately identify and return the most relevant packages based on the query.
*
* Steps:
* 1. Mock axios.get to return package.json and type definitions for two different packages.
* 2. Call addPackage method to add both packages.
* 3. Use a search query to call searchContext method.
* 4. Verify that the search results contain the relevant package and are sorted by similarity.
*/
test('should successfully search for relevant type definitions', async () => {
// Mock responses for the first package's package.json and type definitions
mockedAxios.get
.mockImplementationOnce(() =>
Promise.resolve({
data: {
types: 'index.d.ts',
},
}),
)
.mockImplementationOnce(() =>
Promise.resolve({
data: `
interface UserInterface {
id: string;
name: string;
email: string;
}
`,
}),
)
// Mock responses for the second package's package.json and type definitions
.mockImplementationOnce(() =>
Promise.resolve({
data: {
types: 'index.d.ts',
},
}),
)
.mockImplementationOnce(() =>
Promise.resolve({
data: `
interface ProductInterface {
id: string;
price: number;
description: string;
}
`,
}),
);

// Add the first package 'user-package'
await handler.addPackage('user-package', '1.0.0');
// Add the second package 'product-package'
await handler.addPackage('product-package', '1.0.0');

const searchQuery = 'user interface with email';

// Log the search query
logger.log('Search Query:', searchQuery);

// Perform the search using the handler
const results = await handler.searchContext(searchQuery);

// Log the search results
logger.log('Search Results:', results);

// Assertions to ensure that search results are as expected
expect(results.length).toBeGreaterThan(0);
expect(results[0].types?.content[0]).toContain('UserInterface');
}, 100000);
});
Loading

0 comments on commit 4a1f7cb

Please sign in to comment.