Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Extract screenshots from CWS and Firefox addons #3

Merged
merged 5 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,7 @@ tsconfig.tsbuildinfo
*.njsproj
*.sln
*.sw?

# .env files
.env
.env.*
Binary file modified bun.lockb
Binary file not shown.
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
"dataloader": "^2.2.2",
"graphql": "^16.8.0",
"linkedom": "^0.15.3",
"picocolors": "^1.0.0"
"picocolors": "^1.0.0",
"radix3": "^1.1.2"
},
"devDependencies": {
"@aklinker1/check": "^1.2.0",
Expand Down
8 changes: 8 additions & 0 deletions src/apis/firefox-api.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import consola from "consola";
import { buildScreenshotUrl } from "../utils/urls";

export function createFirefoxApiClient() {
return {
Expand Down Expand Up @@ -29,6 +30,13 @@ export function createFirefoxApiClient() {
storeUrl: json.url,
version: json.current_version.version,
dailyActiveUsers: json.average_daily_users,
screenshots: (json.previews as any[]).map<Gql.Screenshot>(
(preview, i) => ({
index: i,
rawUrl: preview.image_url,
indexUrl: buildScreenshotUrl("firefox-addons", json.id, i),
}),
),
};
},
};
Expand Down
14 changes: 14 additions & 0 deletions src/crawlers/__tests__/chrome-crawler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,20 @@ describe("Chrome Web Store Crawler", () => {
"https://chromewebstore.google.com/detail/github-better-line-counts/ocfdgncpifmegplaglcnglhioflaimkd",
version: expect.any(String),
weeklyActiveUsers: expect.any(Number),
screenshots: [
{
index: 0,
indexUrl:
"http://localhost:3000/api/rest/chrome-extensions/ocfdgncpifmegplaglcnglhioflaimkd/screenshots/0",
rawUrl: expect.any(String),
},
{
index: 1,
indexUrl:
"http://localhost:3000/api/rest/chrome-extensions/ocfdgncpifmegplaglcnglhioflaimkd/screenshots/1",
rawUrl: expect.any(String),
},
],
});
});
});
27 changes: 26 additions & 1 deletion src/crawlers/chrome-crawler.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import consola from "consola";
import { HTMLAnchorElement, HTMLElement, parseHTML } from "linkedom";
import { buildScreenshotUrl } from "../utils/urls";

export async function crawlExtension(
id: string,
Expand All @@ -21,7 +22,7 @@ export async function crawlExtension(
const { document } = parseHTML(html);

// Uncomment to debug HTML
// Bun.write("chrome.html", document.documentElement.outerHTML);
Bun.write("chrome.html", document.documentElement.outerHTML);

// Basic metadata
const name = metaContent(document, "property=og:title")?.replace(
Expand Down Expand Up @@ -106,6 +107,23 @@ export async function crawlExtension(
// const rating = extractNumber(ratingDiv.title); // "Average rating: 4.78 stars"
// const reviewCount = extractNumber(ratingDiv.textContent); // "(1024)"

// <div
// aria-label="Item media 1 screenshot"
// data-media-url="https://lh3.googleusercontent.com/GUgh0ThX2FDPNvbaumYl4DqsUhsbYiCe-Hut9FoVEnkmTrXyA-sHbMk5jmZTj_t-dDP8rAmy6X6a6GNTCn9F8zo4VYU"
// data-is-video="false"
// data-slide-index="0"
// >
const screenshots = [...document.querySelectorAll("div[data-media-url]")]
.filter((div) => div.getAttribute("data-is-video") === "false")
.map<Gql.Screenshot>((div) => {
const index = Number(div.getAttribute("data-slide-index") || -1);
return {
index,
rawUrl: div.getAttribute("data-media-url") + "=s1280", // "s1280" gets the full resolution
indexUrl: buildScreenshotUrl("chrome-extensions", id, index),
};
});

if (name == null) return;
if (storeUrl == null) return;
if (iconUrl == null) return;
Expand All @@ -114,6 +132,12 @@ export async function crawlExtension(
if (version == null) return;
if (shortDescription == null) return;
if (longDescription == null) return;
if (
screenshots.some(
(screenshot) => screenshot.index === -1 || !screenshot.rawUrl,
)
)
return;

const result: Gql.ChromeExtension = {
id,
Expand All @@ -127,6 +151,7 @@ export async function crawlExtension(
longDescription,
rating,
reviewCount,
screenshots,
};
consola.debug("Crawl results:", result);
return result;
Expand Down
15 changes: 15 additions & 0 deletions src/rest/getChromeScreenshot.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import type { ChromeService } from "../services/chrome-service";
import { RouteHandler } from "../utils/rest-router";

export const getChromeScreenshot =
(chrome: ChromeService): RouteHandler<{ id: string; index: string }> =>
async (params) => {
const extension = await chrome.getExtension(params.id);
const index = Number(params.index);
const screenshot = extension?.screenshots.find(
(screenshot) => screenshot.index == index,
);

if (screenshot == null) return new Response(null, { status: 404 });
return Response.redirect(screenshot.rawUrl);
};
15 changes: 15 additions & 0 deletions src/rest/getFirefoxScreenshot.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import type { FirefoxService } from "../services/firefox-service";
import { RouteHandler } from "../utils/rest-router";

export const getFirefoxScreenshot =
(firefox: FirefoxService): RouteHandler<{ id: string; index: string }> =>
async (params) => {
const addon = await firefox.getAddon(params.id);
const index = Number(params.index);
const screenshot = addon?.screenshots.find(
(screenshot) => screenshot.index == index,
);

if (screenshot == null) return new Response(null, { status: 404 });
return Response.redirect(screenshot.rawUrl);
};
17 changes: 17 additions & 0 deletions src/schema.gql
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type ChromeExtension {
lastUpdated: String!
rating: Float
reviewCount: Int
screenshots: [Screenshot!]!
}

type FirefoxAddon {
Expand All @@ -47,4 +48,20 @@ type FirefoxAddon {
lastUpdated: String!
rating: Float
reviewCount: Int
screenshots: [Screenshot!]!
}

type Screenshot {
"""
The screenshot's order.
"""
index: Int!
"""
The image's raw URL provided by the service. When screenshots are updated, this URL changes.
"""
rawUrl: String!
"""
URL to the image based on the index. If the raw URL changes, the `indexUrl` will remain constant, good for links in README.md files.
"""
indexUrl: String!
}
23 changes: 22 additions & 1 deletion src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ import playgroundHtmlTemplate from "./public/playground.html";
import consola from "consola";
import { createChromeService } from "./services/chrome-service";
import { createFirefoxService } from "./services/firefox-service";
import { createRestRouter } from "./utils/rest-router";
import { getChromeScreenshot } from "./rest/getChromeScreenshot";
import { getFirefoxScreenshot } from "./rest/getFirefoxScreenshot";
import { SERVER_ORIGIN } from "./utils/urls";

const playgroundHtml = playgroundHtmlTemplate.replace(
"{{VERSION}}",
Expand All @@ -22,6 +26,16 @@ export function createServer(config?: ServerConfig) {
firefox,
});

const restRouter = createRestRouter()
.get(
"/api/rest/chrome-extensions/:id/screenshots/:index",
getChromeScreenshot(chrome),
)
.get(
"/api/rest/firefox-addons/:id/screenshots/:index",
getFirefoxScreenshot(firefox),
);

const httpServer = Bun.serve({
port,
error(request) {
Expand All @@ -32,8 +46,15 @@ export function createServer(config?: ServerConfig) {
return createResponse(undefined, { status: 204 });
}

const url = new URL(req.url, SERVER_ORIGIN);

// REST
if (url.pathname.startsWith("/api/rest")) {
return restRouter.fetch(url, req);
}

// GraphQL
if (req.url.endsWith("/api")) {
if (url.pathname.startsWith("/api")) {
const data = await graphql.evaluateQuery(req);

return createResponse(JSON.stringify(data), {
Expand Down
9 changes: 7 additions & 2 deletions src/services/chrome-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@ export function createChromeService() {
});

return {
getExtension: (id: string) => loader.load(id),
getExtensions: async (ids: string[]) => {
getExtension: (id: string): Promise<Gql.ChromeExtension | undefined> =>
loader.load(id),
getExtensions: async (
ids: string[],
): Promise<Array<Gql.ChromeExtension | undefined>> => {
const result = await loader.loadMany(ids);
return result.map((item, index) => {
if (item instanceof Error) {
Expand All @@ -29,3 +32,5 @@ export function createChromeService() {
},
};
}

export type ChromeService = ReturnType<typeof createChromeService>;
9 changes: 7 additions & 2 deletions src/services/firefox-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ export function createFirefoxService() {
>(HOUR_MS, (ids) => Promise.all(ids.map((id) => firefox.getAddon(id))));

return {
getAddon: (id: string | number) => loader.load(id),
getAddons: async (ids: Array<string | number>) => {
getAddon: (id: string | number): Promise<Gql.FirefoxAddon | undefined> =>
loader.load(id),
getAddons: async (
ids: Array<string | number>,
): Promise<Array<Gql.FirefoxAddon | undefined>> => {
const result = await loader.loadMany(ids);
return result.map((item) => {
if (item == null) return undefined;
Expand All @@ -25,3 +28,5 @@ export function createFirefoxService() {
},
};
}

export type FirefoxService = ReturnType<typeof createFirefoxService>;
42 changes: 42 additions & 0 deletions src/utils/rest-router.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import * as radix3 from "radix3";

export type RouteHandler<TParams = {}> = (
params: TParams,
url: URL,
req: Request,
) => Response | Promise<Response>;

export interface Route {
method: string;
handler: RouteHandler;
}

export function createRestRouter() {
const r = radix3.createRouter<Route>();
const router = {
get(path: string, handler: RouteHandler<any>) {
r.insert(path, { method: "GET", handler });
return router;
},
post(path: string, handler: RouteHandler<any>) {
r.insert(path, { method: "POST", handler });
return router;
},
any(path: string, handler: RouteHandler<any>) {
r.insert(path, { method: "ANY", handler });
return router;
},
on(method: string, path: string, handler: RouteHandler<any>) {
r.insert(path, { method, handler });
return router;
},
async fetch(url: URL, req: Request): Promise<Response> {
const match = r.lookup(url.pathname);
if (match && (req.method === match.method || match.method === "ANY")) {
return await match.handler(match.params ?? {}, url, req);
}
return new Response(null, { status: 404 });
},
};
return router;
}
10 changes: 10 additions & 0 deletions src/utils/urls.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
export const SERVER_ORIGIN =
process.env.SERVER_ORIGIN ?? "http://localhost:3000";

export function buildScreenshotUrl(
base: "chrome-extensions" | "firefox-addons",
id: string,
index: number,
) {
return `${SERVER_ORIGIN}/api/rest/${base}/${id}/screenshots/${index}`;
}