Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallelize GitHub data cache generation #17

Merged
merged 7 commits into from
Dec 3, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 67 additions & 12 deletions scripts/src/github-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ async function fetchWithAuth(url: string): Promise<ApiResponse> {
});

if (!response.ok) {
throw new Error(`GitHub API error: ${response.status.toString()} ${response.statusText}`);
const errorBody = await response.text();
throw new Error(`GitHub API error: ${String(response.status)} ${String(response.statusText)}\n${String(errorBody)}`);
}

// Parse Link header for pagination
Expand Down Expand Up @@ -163,8 +164,26 @@ async function processPRComments(pr: GitHubPR): Promise<Activity[]> {
return activities;
}

const startTime = performance.now();

async function main() {
try {
await fetchBotActivities();
} catch (error) {
process.stderr.write(String(error) + '\n');
process.exit(1);
}
}

main();

export async function fetchBotActivities(since?: string): Promise<Activity[]> {
try {
if (!GITHUB_TOKEN || GITHUB_TOKEN === 'placeholder') {
process.stderr.write('Error: GITHUB_TOKEN environment variable is not set or invalid\n');
throw new Error(String('Invalid GITHUB_TOKEN'));
}
console.log('Starting bot activities fetch...');
const activities: Activity[] = [];
const baseUrl = `https://api.github.com/repos/${REPO_OWNER}/${REPO_NAME}`;

Expand All @@ -186,31 +205,67 @@ export async function fetchBotActivities(since?: string): Promise<Activity[]> {
}

// Fetch issues and PRs
console.log('Fetching issues and PRs...');
const fetchStartTime = performance.now();
const items = await fetchAllPages<GitHubIssue>(`${baseUrl}/issues?${params.toString()}`);
console.log(`Fetched ${String(items.length)} items in ${((performance.now() - fetchStartTime) / 1000).toFixed(2)}s`);

for (const item of items) {
if (item.comments > 0) {
console.log('Processing items...');
const processStartTime = performance.now();
// Filter items that have comments
const itemsWithComments = items.filter(item => item.comments > 0);
console.log(`Processing ${String(itemsWithComments.length)} items with comments in parallel...`);

// Process items in parallel
const batchSize = 10; // Process 10 items at a time to avoid rate limiting
const results = [];

for (let i = 0; i < itemsWithComments.length; i += batchSize) {
const batch = itemsWithComments.slice(i, i + batchSize);
const batchNumber = Math.floor(i/batchSize) + 1;
const totalBatches = Math.ceil(itemsWithComments.length/batchSize);
console.log(`Processing batch ${String(batchNumber)}/${String(totalBatches)}...`);

const batchResults = await Promise.all(
batch.map(async item => {
if (item.pull_request === undefined) {
// Process regular issues
const issueActivities = await processIssueComments(item);
activities.push(...issueActivities);
return processIssueComments(item);
} else {
// Process PRs through the issue comments endpoint to catch all activity
const prActivities = await processPRComments({
return processPRComments({
number: item.number,
html_url: item.html_url,
comments_url: item.comments_url,
comments: item.comments
});
activities.push(...prActivities);
}
}
}
})
);

results.push(...batchResults);
}

// Flatten results and add to activities
activities.push(...results.flat());

console.log(`Processed all items in ${((performance.now() - processStartTime) / 1000).toFixed(2)}s`);

// Sort by timestamp in descending order
return activities.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime());
console.log('Sorting activities...');
const sortStartTime = performance.now();
const sortedActivities = activities.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime());
console.log(`Sorted ${String(activities.length)} activities in ${((performance.now() - sortStartTime) / 1000).toFixed(2)}s`);

const totalTime = (performance.now() - startTime) / 1000;
console.log(`Total execution time: ${totalTime.toFixed(2)}s`);

return sortedActivities;
} catch (error) {
console.error('Error fetching bot activities:', error);
throw error;
const errorMessage = error instanceof Error ? error.message : String(error);
process.stderr.write('Error fetching bot activities: ' + errorMessage + '\n');
const totalTime = (performance.now() - startTime) / 1000;
process.stderr.write('Total execution time: ' + totalTime.toFixed(2) + 's (failed)\n');
throw new Error(errorMessage);
}
}
Loading