chore: snapshot main sync
This commit is contained in:
@@ -1,13 +1,13 @@
|
||||
import type { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import type { Sql } from '../db.js';
|
||||
import type { FleetState } from '../services/fleet-state.js';
|
||||
import type { DeltaEmitter } from '../index.js';
|
||||
import {
|
||||
VIRTUAL_MODELS,
|
||||
resolveCandidates,
|
||||
splitComposite,
|
||||
} from '../services/gateway.js';
|
||||
import { resolveProviderBaseUrl } from '../services/llama-providers.js';
|
||||
import { recordFailure, recordSuccess } from '../services/circuit-breaker.js';
|
||||
import type { AppContext } from '../app-context.js';
|
||||
|
||||
/**
|
||||
* P7.1: OpenAI-compatible auto:* gateway.
|
||||
@@ -25,11 +25,9 @@ import { resolveProviderBaseUrl } from '../services/llama-providers.js';
|
||||
*/
|
||||
export function registerGatewayRoutes(
|
||||
app: FastifyInstance,
|
||||
sql: Sql,
|
||||
fleet: FleetState,
|
||||
_emitter: DeltaEmitter,
|
||||
ctx: AppContext,
|
||||
): void {
|
||||
// ─── model catalog ───────────────────────────────────────────────────────
|
||||
const { sql, fleet } = ctx;
|
||||
|
||||
app.get('/v1/models', async (_req: FastifyRequest, reply: FastifyReply) => {
|
||||
return reply.send({
|
||||
@@ -43,10 +41,6 @@ export function registerGatewayRoutes(
|
||||
});
|
||||
});
|
||||
|
||||
// ─── props (for getModelContext) ─────────────────────────────────────────
|
||||
// Resolve candidates and proxy the first healthy candidate's props so the
|
||||
// caller can read default_generation_settings.n_ctx.
|
||||
|
||||
app.get('/upstream/:model/props', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const { model } = req.params as { model: string };
|
||||
const { candidates } = await resolveCandidates(sql, fleet, model);
|
||||
@@ -69,8 +63,6 @@ export function registerGatewayRoutes(
|
||||
return reply.status(503).send({ error: 'no healthy candidate for virtual model', model });
|
||||
});
|
||||
|
||||
// ─── chat completions (dispatch with failover) ───────────────────────────
|
||||
|
||||
app.post('/v1/chat/completions', async (req: FastifyRequest, reply: FastifyReply) => {
|
||||
const body = req.body as Record<string, unknown>;
|
||||
const requestedModel = body?.model as string | undefined;
|
||||
@@ -113,11 +105,20 @@ export function registerGatewayRoutes(
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
// HTTP error before body — eligible for failover to the next candidate.
|
||||
recordFailure(compositeId);
|
||||
continue;
|
||||
}
|
||||
|
||||
// A null body on an OK response is a broken upstream; fail over to the
|
||||
// next candidate (nothing has been committed to the client yet).
|
||||
const reader = stream ? res.body?.getReader() : null;
|
||||
if (stream && !reader) {
|
||||
recordFailure(compositeId);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Success: dispatch chosen. Log and stream/return through.
|
||||
recordSuccess(compositeId);
|
||||
await logDispatch(sql, {
|
||||
virtualModel,
|
||||
chosen: compositeId,
|
||||
@@ -128,16 +129,11 @@ export function registerGatewayRoutes(
|
||||
durationMs: Date.now() - startedAt,
|
||||
});
|
||||
|
||||
if (stream) {
|
||||
if (stream && reader) {
|
||||
reply.header('Content-Type', 'text/event-stream');
|
||||
reply.header('Cache-Control', 'no-cache');
|
||||
reply.header('Connection', 'keep-alive');
|
||||
reply.raw.writeHead(200);
|
||||
const reader = res.body?.getReader();
|
||||
if (!reader) {
|
||||
reply.raw.end();
|
||||
return;
|
||||
}
|
||||
const decoder = new TextDecoder();
|
||||
try {
|
||||
while (true) {
|
||||
@@ -155,7 +151,7 @@ export function registerGatewayRoutes(
|
||||
const json = await res.json();
|
||||
return reply.send(json);
|
||||
} catch {
|
||||
// Connection error — failover to the next candidate.
|
||||
recordFailure(compositeId);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user