Skip to content

Commit

Permalink
compute servers: implement backend part of spend limit
Browse files Browse the repository at this point in the history
  • Loading branch information
williamstein committed Jan 7, 2025
1 parent d306216 commit d046f76
Show file tree
Hide file tree
Showing 11 changed files with 190 additions and 23 deletions.
16 changes: 14 additions & 2 deletions src/packages/frontend/compute/log-entry.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ import { useTypedRedux } from "@cocalc/frontend/app-framework";
import { Icon, isIconName } from "@cocalc/frontend/components";
import ComputeServerTag from "@cocalc/frontend/compute/server-tag";
import type { ComputeServerEvent } from "@cocalc/util/compute/log";
import { STATE_INFO } from "@cocalc/util/db-schema/compute-servers";
import { capitalize, plural } from "@cocalc/util/misc";
import {
STATE_INFO,
spendLimitPeriod,
} from "@cocalc/util/db-schema/compute-servers";
import { capitalize, currency, plural } from "@cocalc/util/misc";

export default function LogEntry({
project_id,
Expand Down Expand Up @@ -72,6 +75,15 @@ export default function LogEntry({
{event.idle_timeout} {plural(event.idle_timeout, "minute")}) {tag}
</>
);
case "spend-limit":
return (
<>
{cs} - Spend Limit Shutdown (total spend during the last{" "}
{spendLimitPeriod(event.spendLimit?.hours)} hit{" "}
{currency(event.total)} which exceeded limit of{" "}
{currency(event.spendLimit?.dollars)}) {tag}
</>
);
default:
return (
<>
Expand Down
19 changes: 3 additions & 16 deletions src/packages/frontend/compute/spend-limit.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { setServerConfiguration } from "./api";
import {
type SpendLimit as ISpendLimit,
SPEND_LIMIT_DEFAULTS,
spendLimitPeriod
} from "@cocalc/util/db-schema/compute-servers";
import { AutomaticShutdownCard } from "./automatic-shutdown";

Expand Down Expand Up @@ -104,7 +105,7 @@ export function SpendLimit({
<div
style={{ flex: 0.5, textAlign: "right", marginRight: "15px" }}
>
Maximum amount to spend per {period(spendLimit.hours)}:{" "}
Maximum amount to spend per {spendLimitPeriod(spendLimit.hours)}:{" "}
</div>
<div style={{ flex: 0.5 }}>
<InputNumber
Expand Down Expand Up @@ -164,18 +165,4 @@ export function SpendLimitModal({ id, project_id, close }) {
);
}

function period(hours) {
if (hours == 24) {
return "day";
}
if (hours == 24 * 7) {
return "week";
}
if (hours == 30.5 * 24 * 7) {
return "month";
}
if (hours == 12 * 30.5 * 24 * 7) {
return "year";
}
return `${hours} hours`;
}

Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export const SUPPORTED_CHANGES = [
"allowCollaboratorControl",
"authToken",
"proxy",
"spendLimit",
];

export const RUNNING_CHANGES = [
Expand All @@ -37,6 +38,7 @@ export const RUNNING_CHANGES = [
"allowCollaboratorControl",
"authToken",
"proxy",
"spendLimit",
];

export async function makeConfigurationChange({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export const SUPPORTED_CHANGES = [
"allowCollaboratorControl",
"authToken",
"proxy",
"spendLimit",
];

export const RUNNING_CHANGES = [
Expand All @@ -26,6 +27,7 @@ export const RUNNING_CHANGES = [
"allowCollaboratorControl",
"authToken",
"proxy",
"spendLimit",
];

export async function makeConfigurationChange({
Expand Down
7 changes: 7 additions & 0 deletions src/packages/server/compute/maintenance/cloud/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { hyperstackMaintenance } from "./hyperstack";

import automaticShutdown from "./automatic-shutdown";
import idleTimeout from "./idle-timeout";
import spendLimit from "./spend-limit";

const logger = getLogger("server:compute:maintenance:cloud");

Expand All @@ -35,6 +36,12 @@ async function startMaintenance() {

// once per minute makes sense
setInterval(idleTimeout, 60 * 1000);

// once per 5 minutes seems like enough for spend limits, since
// it is potentially more computational expensive, but also doesn't
// need to be as precise.
setTimeout(spendLimit, 30 * 1000); // also 30s after startup
setInterval(spendLimit, 3 * 60 * 1000);
}

let running = false;
Expand Down
118 changes: 118 additions & 0 deletions src/packages/server/compute/maintenance/cloud/spend-limit.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
Manage spend limit "automatic shutdown" of compute servers.
Call this function periodically to do the next round of checks. Each
compute server with configuration.spendingLimit?.enabled gets checked
for how much has been spent during the configured interval, and if it
exceeds the limit, the server gets stopped.
*/

import getPool from "@cocalc/database/pool";
import getLogger from "@cocalc/backend/logger";
import { stop } from "@cocalc/server/compute/control";
import { uuid } from "@cocalc/util/misc";
import type { ComputeServerEventLogEntry } from "@cocalc/util/compute/log";
import { map } from "awaiting";
import {
type SpendLimit,
validatedSpendLimit,
} from "@cocalc/util/db-schema/compute-servers";
import getPurchases from "@cocalc/server/purchases/get-purchases";
import dayjs from "dayjs";

const logger = getLogger("server:compute:maintenance:cloud:spend-limit");

export default async function spendLimit() {
try {
await update();
} catch (err) {
logger.debug(
`WARNING - unexpected issue running idle timeout update loop: ${err}`,
);
}
}

async function update() {
logger.debug("update");
const pool = getPool();
// finds all rows where: state is "running" and configuration.spendLimit.enabled is true,
const { rows } = await pool.query(
`
SELECT id, account_id, project_id, configuration#>'{spendLimit}' AS spend_limit
FROM compute_servers
WHERE state = 'running'
AND (configuration#>>'{spendLimit,enabled}')::boolean = true
`,
);
logger.debug(`got ${rows.length} servers with an enabled spend limit:`, rows);
const f = async (row) => {
logger.debug("checking if spend limit is hit", row);
const { dollars, hours } = validatedSpendLimit(row.spend_limit ?? {})!;
const { purchases } = await getPurchases({
compute_server_id: row.id,
account_id: row.account_id,
group: true,
cutoff: dayjs().subtract(hours, "hour").toDate(),
});
let total = 0;
for (const { cost, cost_so_far } of purchases) {
total += cost ?? cost_so_far ?? 0;
}
try {
await pool.query("UPDATE compute_servers SET spend=$1 where id=$2", [
total,
row.id,
]);
} catch (err) {
logger.debug(`WARNING -- unable to update spend field -- ${err}`);
}
if (total < dollars) {
logger.debug("spend is under the limit -- nothing to do", row);
return;
}
try {
await createProjectLogEntry({ ...row, total });
const { account_id, id } = row;
await stop({ account_id, id });
} catch (err) {
logger.debug(
`WARNING -- failed to stop ${row.id} in response to idle timeout -- ${err}`,
);
}
};
await map(rows, 20, f);
}

async function createProjectLogEntry({
id,
account_id,
project_id,
spend_limit,
total,
}: {
id: number;
account_id: string;
project_id: string;
spend_limit: SpendLimit;
total: number;
}) {
logger.debug("log entry that we spend limit terminated compute server", {
id,
});
const pool = getPool();
await pool.query(
"INSERT INTO project_log(id, project_id, account_id, time, event) VALUES($1,$2,$3,NOW(),$4)",
[
uuid(),
project_id,
account_id,
{
event: "compute-server",
action: "spend-limit",
spendLimit: spend_limit,
total,
server_id: id,
} as ComputeServerEventLogEntry,
],
);
}
7 changes: 7 additions & 0 deletions src/packages/server/compute/set-server-configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import updatePurchase from "./update-purchase";
import { isDnsAvailable } from "./dns";
import { setConfiguration } from "./util";
import { validatedSpendLimit } from "@cocalc/util/db-schema/compute-servers";
import { isEqual } from "lodash";

export default async function setServerConfiguration({
account_id,
Expand Down Expand Up @@ -65,6 +66,12 @@ export default async function setServerConfiguration({
...configuration,
spendLimit: validatedSpendLimit(configuration.spendLimit),
};
if (!isEqual(currentConfiguration.spendLimit, configuration.spendLimit)) {
// changing spendLimit invalidates "spend during the given period".
await pool.query("UPDATE compute_servers SET spend=NULL where id=$1", [
id,
]);
}
}

await validateConfigurationChange({
Expand Down
5 changes: 3 additions & 2 deletions src/packages/server/purchases/get-purchases.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ import { getOwner } from "@cocalc/server/compute/owner";

interface Options {
account_id: string;
cutoff?: Date; // returns purchases back to this date (limit/offset NOT ignored)
// returns purchases back to this date (limit/offset NOT ignored); never excludes unfinished purchases (i.e., with cost not set)
cutoff?: Date;
thisMonth?: boolean;
limit?: number;
offset?: number;
Expand Down Expand Up @@ -121,7 +122,7 @@ export default async function getPurchases({
}
if (cutoff) {
params.push(cutoff);
conditions.push(`p.time >= $${params.length}`);
conditions.push(`(p.time >= $${params.length} OR p.cost IS NULL)`);
}
if (no_statement) {
conditions.push("p.day_statement_id IS NULL");
Expand Down
9 changes: 9 additions & 0 deletions src/packages/util/compute/log.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import type {
State,
AutomaticShutdown,
SpendLimit,
} from "@cocalc/util/db-schema/compute-servers";

interface Event {
Expand Down Expand Up @@ -28,6 +29,12 @@ export interface IdleTimeoutEntry {
idle_timeout: number;
}

export interface SpendLimitEntry {
action: "spend-limit";
spendLimit: SpendLimit;
total: number;
}

interface Error {
action: "error";
error: string;
Expand All @@ -39,6 +46,7 @@ export type ComputeServerEvent = (
| Error
| AutomaticShutdownEntry
| IdleTimeoutEntry
| SpendLimitEntry
) &
Event;

Expand All @@ -47,4 +55,5 @@ export type ComputeServerEventLogEntry =
| StateChange
| AutomaticShutdownEntry
| IdleTimeoutEntry
| SpendLimitEntry
| Error;
26 changes: 24 additions & 2 deletions src/packages/util/db-schema/compute-servers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -421,14 +421,30 @@ export function validatedSpendLimit(spendLimit?: any): SpendLimit | undefined {
dollars = 1;
}
if (!isFinite(hours)) {
throw Error("hours must be finite");
throw Error(`hours (=${hours}) must be finite`);
}
if (!isFinite(dollars)) {
throw Error("dollars must be finite");
throw Error(`dollars (=${dollars}) must be finite`);
}
return { enabled, hours, dollars };
}

export function spendLimitPeriod(hours) {
if (hours == 24) {
return "day";
}
if (hours == 24 * 7) {
return "week";
}
if (hours == 30.5 * 24 * 7) {
return "month";
}
if (hours == 12 * 30.5 * 24 * 7) {
return "year";
}
return `${hours} hours`;
}

interface BaseConfiguration {
// image: name of the image to use, e.g. 'python' or 'pytorch'.
// images are managed in src/packages/server/compute/images.ts
Expand Down Expand Up @@ -727,6 +743,7 @@ export interface ComputeServerUserInfo {
update_purchase?: boolean;
last_purchase_update?: Date;
template?: ComputeServerTemplate;
spend?: number;
}

export interface ComputeServer extends ComputeServerUserInfo {
Expand Down Expand Up @@ -778,6 +795,7 @@ Table({
project_specific_id: null,
course_project_id: null,
course_server_id: null,
spend: null,
},
},
set: {
Expand Down Expand Up @@ -962,6 +980,10 @@ Table({
type: "integer",
desc: "If this compute server is a clone of an instructor server in a course, this is the id of that instructor server.",
},
spend: {
type: "number",
desc: "If configuration.spendLimit is enabled, then the spend during the current period gets recorded here every few minutes. This is useful to efficiently provide a UI element showing the current spend status. It is cleared whenever configuration.spendLimit is changed, to avoid confusion.",
},
},
});

Expand Down
2 changes: 1 addition & 1 deletion src/packages/util/db-schema/purchases.ts
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ Table({
},
pending: {
type: "boolean",
desc: "If true, then this transaction is considered pending, which means that for a few days it doesn't count against the user's quotas for the purposes of deciding whether or not a purchase is allowed. This is needed so we can charge a user for their subscriptions, then collect the money from them, without all of the running pay-as-you-go project upgrades suddenly breaking (etc.).",
desc: "**DEPRECATED** -- not used anywhere; do NOT use! If true, then this transaction is considered pending, which means that for a few days it doesn't count against the user's quotas for the purposes of deciding whether or not a purchase is allowed. This is needed so we can charge a user for their subscriptions, then collect the money from them, without all of the running pay-as-you-go project upgrades suddenly breaking (etc.).",
},
cost_per_hour: {
title: "Cost Per Hour",
Expand Down

0 comments on commit d046f76

Please sign in to comment.