Batch 3c: when an agent has llama_extra_args in AGENTS.md, provider.ts routes inference through LLAMA_SIDECAR_URL instead of LLAMA_SWAP_URL. X-Agent-Flags header built from the agent's flags. Boot-time guard refuses to start if any agent has llama_extra_args but LLAMA_SIDECAR_URL is unset. PrefixFingerprint gains a route field (swap/sidecar) for per-turn visibility. 9 provider tests. AGENTS.md tool gap: all agents (except Prompt Builder) were missing 8 tools that were added after the original tool lists were written: request_read_access, view_truncated_output, ask_user_input, git_status, get_blast_radius, get_hot_files, get_middleware, get_routes. The missing request_read_access caused silent "permission denied" when reading files outside the project root. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
85 lines
2.5 KiB
TypeScript
85 lines
2.5 KiB
TypeScript
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
|
import type { LanguageModel } from 'ai';
|
|
|
|
// v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
|
|
// config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the
|
|
// upstream without touching env vars. No apiKey — llama-swap is unauth in our
|
|
// Tailscale topology and exposing it over the public internet is gated by
|
|
// Authelia at the Caddy layer, not by API keys.
|
|
//
|
|
// v2.4.1-sidecar: when the agent has llama_extra_args, route through
|
|
// llama-sidecar instead. A fresh provider is created per call (not cached)
|
|
// because the X-Agent-Flags header varies per agent. The llama-swap path
|
|
// stays cached since it has no per-request headers.
|
|
|
|
const swapCache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
|
|
|
|
function getSwapProvider(baseURL: string): ReturnType<typeof createOpenAICompatible> {
|
|
let provider = swapCache.get(baseURL);
|
|
if (!provider) {
|
|
provider = createOpenAICompatible({
|
|
name: 'llama-swap',
|
|
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
|
includeUsage: true,
|
|
});
|
|
swapCache.set(baseURL, provider);
|
|
}
|
|
return provider;
|
|
}
|
|
|
|
function sidecarProvider(
|
|
baseURL: string,
|
|
flags: string[],
|
|
): ReturnType<typeof createOpenAICompatible> {
|
|
return createOpenAICompatible({
|
|
name: 'llama-sidecar',
|
|
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
|
includeUsage: true,
|
|
headers: {
|
|
'X-Agent-Flags': flags.join(' '),
|
|
},
|
|
});
|
|
}
|
|
|
|
export type InferenceRoute = 'swap' | 'sidecar';
|
|
|
|
export interface RoutingInfo {
|
|
route: InferenceRoute;
|
|
flags: string[] | null;
|
|
}
|
|
|
|
interface AgentLike {
|
|
llama_extra_args: string[] | null;
|
|
}
|
|
|
|
interface ConfigLike {
|
|
LLAMA_SWAP_URL: string;
|
|
LLAMA_SIDECAR_URL?: string;
|
|
}
|
|
|
|
export function resolveRoute(agent: AgentLike | null): RoutingInfo {
|
|
const flags = agent?.llama_extra_args;
|
|
if (flags && flags.length > 0) {
|
|
return { route: 'sidecar', flags };
|
|
}
|
|
return { route: 'swap', flags: null };
|
|
}
|
|
|
|
export function upstreamModel(
|
|
config: ConfigLike,
|
|
modelId: string,
|
|
agent?: AgentLike | null,
|
|
): LanguageModel {
|
|
const { route, flags } = resolveRoute(agent ?? null);
|
|
if (route === 'sidecar') {
|
|
const url = config.LLAMA_SIDECAR_URL;
|
|
if (!url) {
|
|
throw new Error(
|
|
`Agent has llama_extra_args but LLAMA_SIDECAR_URL is not set`,
|
|
);
|
|
}
|
|
return sidecarProvider(url, flags!).chatModel(modelId);
|
|
}
|
|
return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
|
|
}
|