Compare commits
3 Commits
v2.4.0-uns
...
v2.5.1-bud
| Author | SHA1 | Date | |
|---|---|---|---|
| cbef7618b3 | |||
| fcc7c5a86e | |||
| bcfc94fa47 |
@@ -21,6 +21,7 @@ out/
|
|||||||
.opencode/
|
.opencode/
|
||||||
.vscode/
|
.vscode/
|
||||||
.idea/
|
.idea/
|
||||||
|
.claude/worktrees/
|
||||||
|
|
||||||
# Test artifacts / coverage
|
# Test artifacts / coverage
|
||||||
coverage/
|
coverage/
|
||||||
|
|||||||
@@ -11,6 +11,11 @@ POSTGRES_PASSWORD=CHANGE_ME
|
|||||||
# point BooCode at a different SearXNG instance.
|
# point BooCode at a different SearXNG instance.
|
||||||
SEARXNG_URL=http://100.114.205.53:8888
|
SEARXNG_URL=http://100.114.205.53:8888
|
||||||
|
|
||||||
|
# Task model: lightweight model for auto-naming, search rewrite, etc.
|
||||||
|
# Direct llama-server instance (NOT llama-swap). Falls back to LLAMA_SWAP_URL
|
||||||
|
# with FAST_MODEL when unset.
|
||||||
|
# TASK_MODEL_URL=http://100.90.172.55:7995
|
||||||
|
|
||||||
# v1.13.15-tools: BOOCODE_TOOLS narrows the tool whitelist sent to the LLM.
|
# v1.13.15-tools: BOOCODE_TOOLS narrows the tool whitelist sent to the LLM.
|
||||||
# Unset (default) → all tools (~21k schema). Useful primarily for single-purpose
|
# Unset (default) → all tools (~21k schema). Useful primarily for single-purpose
|
||||||
# sessions where the model only needs read-only filesystem access.
|
# sessions where the model only needs read-only filesystem access.
|
||||||
|
|||||||
@@ -25,6 +25,8 @@ const ConfigSchema = z.object({
|
|||||||
// v2.0.5: cheaper model for titles, summaries, labeling. Falls back to
|
// v2.0.5: cheaper model for titles, summaries, labeling. Falls back to
|
||||||
// session model (auto_name) or DEFAULT_MODEL when unset.
|
// session model (auto_name) or DEFAULT_MODEL when unset.
|
||||||
FAST_MODEL: z.string().optional(),
|
FAST_MODEL: z.string().optional(),
|
||||||
|
TASK_MODEL_URL: z.string().url().optional(),
|
||||||
|
LLAMA_SIDECAR_URL: z.string().url().optional(),
|
||||||
});
|
});
|
||||||
|
|
||||||
export type Config = z.infer<typeof ConfigSchema>;
|
export type Config = z.infer<typeof ConfigSchema>;
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ import { cleanupTruncations } from './services/truncate.js';
|
|||||||
import { loadMcpConfig } from './services/mcp-config.js';
|
import { loadMcpConfig } from './services/mcp-config.js';
|
||||||
import { initialize as initMcp, getTools as getMcpTools, shutdown as shutdownMcp } from './services/mcp-client.js';
|
import { initialize as initMcp, getTools as getMcpTools, shutdown as shutdownMcp } from './services/mcp-client.js';
|
||||||
import { appendMcpTools } from './services/tools.js';
|
import { appendMcpTools } from './services/tools.js';
|
||||||
import { refreshToolNames } from './services/agents.js';
|
import { refreshToolNames, getAgentsForProject } from './services/agents.js';
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
const config = loadConfig();
|
const config = loadConfig();
|
||||||
@@ -91,6 +91,20 @@ async function main() {
|
|||||||
}
|
}
|
||||||
app.addHook('onClose', async () => { await shutdownMcp(); });
|
app.addHook('onClose', async () => { await shutdownMcp(); });
|
||||||
|
|
||||||
|
// Boot-time guard: if any agent has llama_extra_args but LLAMA_SIDECAR_URL
|
||||||
|
// is unset, fail fast. Silent fallback would defeat per-agent flags.
|
||||||
|
if (!config.LLAMA_SIDECAR_URL) {
|
||||||
|
const { agents } = await getAgentsForProject('');
|
||||||
|
const offending = agents.find(a => a.llama_extra_args && a.llama_extra_args.length > 0);
|
||||||
|
if (offending) {
|
||||||
|
app.log.fatal(
|
||||||
|
{ agent: offending.name },
|
||||||
|
`Agent "${offending.name}" has llama_extra_args but LLAMA_SIDECAR_URL is not set`,
|
||||||
|
);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
await app.register(fastifyWebsocket);
|
await app.register(fastifyWebsocket);
|
||||||
|
|
||||||
app.get('/api/health', async () => {
|
app.get('/api/health', async () => {
|
||||||
|
|||||||
@@ -344,6 +344,7 @@ INSERT INTO settings (key, value) VALUES ('theme_mode', '"dark"') ON CONFLICT (k
|
|||||||
ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_system_prompt TEXT NOT NULL DEFAULT '';
|
ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_system_prompt TEXT NOT NULL DEFAULT '';
|
||||||
ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_web_search_enabled BOOLEAN NOT NULL DEFAULT false;
|
ALTER TABLE projects ADD COLUMN IF NOT EXISTS default_web_search_enabled BOOLEAN NOT NULL DEFAULT false;
|
||||||
ALTER TABLE sessions ADD COLUMN IF NOT EXISTS web_search_enabled BOOLEAN;
|
ALTER TABLE sessions ADD COLUMN IF NOT EXISTS web_search_enabled BOOLEAN;
|
||||||
|
ALTER TABLE sessions ADD COLUMN IF NOT EXISTS tags TEXT[] DEFAULT '{}';
|
||||||
|
|
||||||
-- v1.11: anchored rolling compaction.
|
-- v1.11: anchored rolling compaction.
|
||||||
-- compacted_at — marks rows that are "behind the curtain" of the latest
|
-- compacted_at — marks rows that are "behind the curtain" of the latest
|
||||||
@@ -366,3 +367,39 @@ ALTER TABLE messages ADD COLUMN IF NOT EXISTS summary BOOLEAN NOT NULL DEFAULT F
|
|||||||
ALTER TABLE messages ADD COLUMN IF NOT EXISTS tail_start_id UUID REFERENCES messages(id) ON DELETE SET NULL;
|
ALTER TABLE messages ADD COLUMN IF NOT EXISTS tail_start_id UUID REFERENCES messages(id) ON DELETE SET NULL;
|
||||||
ALTER TABLE chats ADD COLUMN IF NOT EXISTS needs_compaction BOOLEAN NOT NULL DEFAULT FALSE;
|
ALTER TABLE chats ADD COLUMN IF NOT EXISTS needs_compaction BOOLEAN NOT NULL DEFAULT FALSE;
|
||||||
CREATE INDEX IF NOT EXISTS idx_messages_chat_compacted ON messages (chat_id, compacted_at);
|
CREATE INDEX IF NOT EXISTS idx_messages_chat_compacted ON messages (chat_id, compacted_at);
|
||||||
|
|
||||||
|
-- tasks table (provider dispatch, arena)
|
||||||
|
CREATE TABLE IF NOT EXISTS tasks (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
project_id UUID NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
|
||||||
|
session_id UUID REFERENCES sessions(id) ON DELETE CASCADE,
|
||||||
|
parent_task_id UUID REFERENCES tasks(id),
|
||||||
|
arena_id UUID,
|
||||||
|
state TEXT NOT NULL DEFAULT 'pending'
|
||||||
|
CHECK (state IN ('pending','running','completed','failed','blocked','cancelled')),
|
||||||
|
input TEXT NOT NULL,
|
||||||
|
output_summary TEXT,
|
||||||
|
agent TEXT,
|
||||||
|
model TEXT,
|
||||||
|
mode_id TEXT,
|
||||||
|
thinking_option_id TEXT,
|
||||||
|
feature_values JSONB,
|
||||||
|
execution_path TEXT CHECK (execution_path IS NULL OR execution_path IN ('native','acp','pty','qwen')),
|
||||||
|
worktree_path TEXT,
|
||||||
|
cost_tokens INTEGER,
|
||||||
|
started_at TIMESTAMPTZ,
|
||||||
|
ended_at TIMESTAMPTZ,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT clock_timestamp()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Fix tasks FK to cascade on session delete (existing tables without CASCADE)
|
||||||
|
DO $$ BEGIN
|
||||||
|
IF EXISTS (
|
||||||
|
SELECT 1 FROM pg_constraint WHERE conname = 'tasks_session_id_fkey'
|
||||||
|
AND confdeltype != 'c'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE tasks DROP CONSTRAINT tasks_session_id_fkey;
|
||||||
|
ALTER TABLE tasks ADD CONSTRAINT tasks_session_id_fkey
|
||||||
|
FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE;
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|||||||
58
apps/server/src/services/__tests__/provider.test.ts
Normal file
58
apps/server/src/services/__tests__/provider.test.ts
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import { resolveRoute, upstreamModel } from '../inference/provider.js';
|
||||||
|
|
||||||
|
describe('resolveRoute', () => {
|
||||||
|
it('routes to swap when agent is null', () => {
|
||||||
|
expect(resolveRoute(null)).toEqual({ route: 'swap', flags: null });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('routes to swap when agent has no llama_extra_args', () => {
|
||||||
|
expect(resolveRoute({ llama_extra_args: null })).toEqual({ route: 'swap', flags: null });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('routes to swap when agent has empty llama_extra_args', () => {
|
||||||
|
expect(resolveRoute({ llama_extra_args: [] })).toEqual({ route: 'swap', flags: null });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('routes to sidecar when agent has llama_extra_args', () => {
|
||||||
|
const result = resolveRoute({ llama_extra_args: ['--top-k', '20'] });
|
||||||
|
expect(result.route).toBe('sidecar');
|
||||||
|
expect(result.flags).toEqual(['--top-k', '20']);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('upstreamModel', () => {
|
||||||
|
const swapConfig = { LLAMA_SWAP_URL: 'http://localhost:8401' };
|
||||||
|
const fullConfig = {
|
||||||
|
LLAMA_SWAP_URL: 'http://localhost:8401',
|
||||||
|
LLAMA_SIDECAR_URL: 'http://localhost:8402',
|
||||||
|
};
|
||||||
|
|
||||||
|
it('returns a model for swap route (no agent)', () => {
|
||||||
|
const model = upstreamModel(swapConfig, 'test-model');
|
||||||
|
expect(model).toBeDefined();
|
||||||
|
expect((model as any).modelId).toBe('test-model');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns a model for swap route (agent without extra args)', () => {
|
||||||
|
const model = upstreamModel(swapConfig, 'test-model', { llama_extra_args: null });
|
||||||
|
expect(model).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns a model for sidecar route', () => {
|
||||||
|
const model = upstreamModel(fullConfig, 'test-model', { llama_extra_args: ['--top-k', '20'] });
|
||||||
|
expect(model).toBeDefined();
|
||||||
|
expect((model as any).modelId).toBe('test-model');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws when sidecar route requested but URL missing', () => {
|
||||||
|
expect(() =>
|
||||||
|
upstreamModel(swapConfig, 'test-model', { llama_extra_args: ['--top-k', '20'] }),
|
||||||
|
).toThrow(/LLAMA_SIDECAR_URL/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('routes to swap for empty llama_extra_args array', () => {
|
||||||
|
const model = upstreamModel(swapConfig, 'test-model', { llama_extra_args: [] });
|
||||||
|
expect(model).toBeDefined();
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -1,9 +1,10 @@
|
|||||||
import type { InferenceContext } from './inference/index.js';
|
import type { InferenceContext } from './inference/index.js';
|
||||||
|
import { taskModelCompletion } from './task-model.js';
|
||||||
|
|
||||||
const NAMING_SYSTEM_PROMPT =
|
const NAMING_SYSTEM_PROMPT =
|
||||||
'You name chat sessions based on what the assistant did. Summarize the topic or outcome — do NOT copy the first few words verbatim. Reply directly with no thinking, reasoning, or explanation. Output ONLY the title, 4 words max, no quotes, no punctuation, no prefix like "Title:".';
|
'You name chat sessions. Reply with ONLY the title. 4 to 6 words. No quotes, no punctuation, no prefix.';
|
||||||
|
|
||||||
const MAX_TITLE_CHARS = 60;
|
const MAX_TITLE_CHARS = 80;
|
||||||
|
|
||||||
function cleanTitle(raw: string): string {
|
function cleanTitle(raw: string): string {
|
||||||
let name = raw.trim();
|
let name = raw.trim();
|
||||||
@@ -18,27 +19,7 @@ function cleanTitle(raw: string): string {
|
|||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface NamingResponse {
|
// TODO: wire suggestTags after task model validation
|
||||||
choices?: Array<{
|
|
||||||
message?: {
|
|
||||||
content?: string;
|
|
||||||
reasoning_content?: string;
|
|
||||||
};
|
|
||||||
}>;
|
|
||||||
}
|
|
||||||
|
|
||||||
function pickTitleSource(data: NamingResponse): string {
|
|
||||||
const choice = data.choices?.[0]?.message;
|
|
||||||
if (!choice) return '';
|
|
||||||
if (choice.content && choice.content.trim().length > 0) return choice.content;
|
|
||||||
const reasoning = choice.reasoning_content ?? '';
|
|
||||||
if (reasoning.length === 0) return '';
|
|
||||||
const lines = reasoning
|
|
||||||
.split('\n')
|
|
||||||
.map((l) => l.trim())
|
|
||||||
.filter((l) => l.length > 0);
|
|
||||||
return lines[lines.length - 1] ?? '';
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function maybeAutoNameChat(
|
export async function maybeAutoNameChat(
|
||||||
ctx: InferenceContext,
|
ctx: InferenceContext,
|
||||||
@@ -64,13 +45,6 @@ export async function maybeAutoNameChat(
|
|||||||
if (!chat) return;
|
if (!chat) return;
|
||||||
if (chat.name !== null && chat.name !== '') return;
|
if (chat.name !== null && chat.name !== '') return;
|
||||||
|
|
||||||
const sessionRows = await ctx.sql<{ model: string }[]>`
|
|
||||||
SELECT model FROM sessions WHERE id = ${sessionId}
|
|
||||||
`;
|
|
||||||
// v2.0.5: prefer FAST_MODEL for cheap LLM calls (titles, summaries).
|
|
||||||
const model = ctx.config.FAST_MODEL ?? sessionRows[0]?.model;
|
|
||||||
if (!model) return;
|
|
||||||
|
|
||||||
const assistantMsg = await ctx.sql<{ content: string }[]>`
|
const assistantMsg = await ctx.sql<{ content: string }[]>`
|
||||||
SELECT content FROM messages
|
SELECT content FROM messages
|
||||||
WHERE chat_id = ${chatId}
|
WHERE chat_id = ${chatId}
|
||||||
@@ -84,32 +58,12 @@ export async function maybeAutoNameChat(
|
|||||||
|
|
||||||
const assistantText = assistantMsg[0].content.slice(0, 2000);
|
const assistantText = assistantMsg[0].content.slice(0, 2000);
|
||||||
|
|
||||||
const body = {
|
const raw = await taskModelCompletion({
|
||||||
model,
|
system: NAMING_SYSTEM_PROMPT,
|
||||||
messages: [
|
user: assistantText,
|
||||||
{ role: 'system', content: NAMING_SYSTEM_PROMPT },
|
maxTokens: 30,
|
||||||
{
|
|
||||||
role: 'user',
|
|
||||||
content: assistantText,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
max_tokens: 30,
|
|
||||||
temperature: 0.3,
|
temperature: 0.3,
|
||||||
stream: false,
|
|
||||||
chat_template_kwargs: { enable_thinking: false },
|
|
||||||
};
|
|
||||||
|
|
||||||
const res = await fetch(`${ctx.config.LLAMA_SWAP_URL}/v1/chat/completions`, {
|
|
||||||
method: 'POST',
|
|
||||||
headers: { 'Content-Type': 'application/json' },
|
|
||||||
body: JSON.stringify(body),
|
|
||||||
});
|
});
|
||||||
if (!res.ok) {
|
|
||||||
const text = await res.text().catch(() => '');
|
|
||||||
throw new Error(`naming request failed: ${res.status} ${text.slice(0, 200)}`);
|
|
||||||
}
|
|
||||||
const data = (await res.json()) as NamingResponse;
|
|
||||||
const raw = pickTitleSource(data);
|
|
||||||
const name = cleanTitle(raw);
|
const name = cleanTitle(raw);
|
||||||
if (!name) {
|
if (!name) {
|
||||||
ctx.log.warn({ chatId, raw }, 'auto-name: empty title from model');
|
ctx.log.warn({ chatId, raw }, 'auto-name: empty title from model');
|
||||||
|
|||||||
@@ -18,9 +18,9 @@ import { READ_ONLY_TOOL_NAMES } from '../tools.js';
|
|||||||
// turns + deeper exploration without changing the safety floor materially —
|
// turns + deeper exploration without changing the safety floor materially —
|
||||||
// the doom-loop guard (3 identical calls → abort) catches the actual failure
|
// the doom-loop guard (3 identical calls → abort) catches the actual failure
|
||||||
// mode this cap was guarding against.
|
// mode this cap was guarding against.
|
||||||
export const BUDGET_READ_ONLY = 50;
|
export const BUDGET_READ_ONLY = 100;
|
||||||
export const BUDGET_NON_READ_ONLY = 10;
|
export const BUDGET_NON_READ_ONLY = 100;
|
||||||
export const BUDGET_NO_AGENT = 50;
|
export const BUDGET_NO_AGENT = 100;
|
||||||
|
|
||||||
const READ_ONLY_SET: ReadonlySet<string> = new Set(READ_ONLY_TOOL_NAMES);
|
const READ_ONLY_SET: ReadonlySet<string> = new Set(READ_ONLY_TOOL_NAMES);
|
||||||
|
|
||||||
|
|||||||
@@ -1,37 +1,84 @@
|
|||||||
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
||||||
import type { LanguageModel } from 'ai';
|
import type { LanguageModel } from 'ai';
|
||||||
|
|
||||||
// TODO: When per-agent llama-server flag overrides are added, route them
|
|
||||||
// through validateExtraArgs (./llama-args-validator.ts) first.
|
|
||||||
|
|
||||||
// v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
|
// v1.13.1-A: AI SDK provider against llama-swap. baseURL is threaded from
|
||||||
// config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the
|
// config.LLAMA_SWAP_URL at call time (not module-load) so tests can stub the
|
||||||
// upstream without touching env vars. No apiKey — llama-swap is unauth in our
|
// upstream without touching env vars. No apiKey — llama-swap is unauth in our
|
||||||
// Tailscale topology and exposing it over the public internet is gated by
|
// Tailscale topology and exposing it over the public internet is gated by
|
||||||
// Authelia at the Caddy layer, not by API keys.
|
// Authelia at the Caddy layer, not by API keys.
|
||||||
|
//
|
||||||
|
// v2.4.1-sidecar: when the agent has llama_extra_args, route through
|
||||||
|
// llama-sidecar instead. A fresh provider is created per call (not cached)
|
||||||
|
// because the X-Agent-Flags header varies per agent. The llama-swap path
|
||||||
|
// stays cached since it has no per-request headers.
|
||||||
|
|
||||||
const cache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
|
const swapCache = new Map<string, ReturnType<typeof createOpenAICompatible>>();
|
||||||
|
|
||||||
function getProvider(baseURL: string): ReturnType<typeof createOpenAICompatible> {
|
function getSwapProvider(baseURL: string): ReturnType<typeof createOpenAICompatible> {
|
||||||
let provider = cache.get(baseURL);
|
let provider = swapCache.get(baseURL);
|
||||||
if (!provider) {
|
if (!provider) {
|
||||||
provider = createOpenAICompatible({
|
provider = createOpenAICompatible({
|
||||||
name: 'llama-swap',
|
name: 'llama-swap',
|
||||||
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
||||||
// v1.13.7: @ai-sdk/openai-compatible defaults includeUsage=false, which
|
|
||||||
// omits `stream_options.include_usage` from the request body. Without
|
|
||||||
// it, llama.cpp / llama-swap never emits the trailing usage block, so
|
|
||||||
// `result.usage` resolves with inputTokens=outputTokens=undefined and
|
|
||||||
// tokens_used / ctx_used land as NULL in every messages row. Setting
|
|
||||||
// true here re-enables the per-stream usage payload across all models
|
|
||||||
// served via the llama-swap provider.
|
|
||||||
includeUsage: true,
|
includeUsage: true,
|
||||||
});
|
});
|
||||||
cache.set(baseURL, provider);
|
swapCache.set(baseURL, provider);
|
||||||
}
|
}
|
||||||
return provider;
|
return provider;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function upstreamModel(baseURL: string, modelId: string): LanguageModel {
|
function sidecarProvider(
|
||||||
return getProvider(baseURL).chatModel(modelId);
|
baseURL: string,
|
||||||
|
flags: string[],
|
||||||
|
): ReturnType<typeof createOpenAICompatible> {
|
||||||
|
return createOpenAICompatible({
|
||||||
|
name: 'llama-sidecar',
|
||||||
|
baseURL: baseURL.endsWith('/v1') ? baseURL : `${baseURL}/v1`,
|
||||||
|
includeUsage: true,
|
||||||
|
headers: {
|
||||||
|
'X-Agent-Flags': flags.join(' '),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export type InferenceRoute = 'swap' | 'sidecar';
|
||||||
|
|
||||||
|
export interface RoutingInfo {
|
||||||
|
route: InferenceRoute;
|
||||||
|
flags: string[] | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface AgentLike {
|
||||||
|
llama_extra_args: string[] | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ConfigLike {
|
||||||
|
LLAMA_SWAP_URL: string;
|
||||||
|
LLAMA_SIDECAR_URL?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function resolveRoute(agent: AgentLike | null): RoutingInfo {
|
||||||
|
const flags = agent?.llama_extra_args;
|
||||||
|
if (flags && flags.length > 0) {
|
||||||
|
return { route: 'sidecar', flags };
|
||||||
|
}
|
||||||
|
return { route: 'swap', flags: null };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function upstreamModel(
|
||||||
|
config: ConfigLike,
|
||||||
|
modelId: string,
|
||||||
|
agent?: AgentLike | null,
|
||||||
|
): LanguageModel {
|
||||||
|
const { route, flags } = resolveRoute(agent ?? null);
|
||||||
|
if (route === 'sidecar') {
|
||||||
|
const url = config.LLAMA_SIDECAR_URL;
|
||||||
|
if (!url) {
|
||||||
|
throw new Error(
|
||||||
|
`Agent has llama_extra_args but LLAMA_SIDECAR_URL is not set`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return sidecarProvider(url, flags!).chatModel(modelId);
|
||||||
|
}
|
||||||
|
return getSwapProvider(config.LLAMA_SWAP_URL).chatModel(modelId);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -157,7 +157,8 @@ export async function streamCompletion(
|
|||||||
opts: StreamOptions,
|
opts: StreamOptions,
|
||||||
onDelta: (content: string) => void,
|
onDelta: (content: string) => void,
|
||||||
onUsage: ((prompt: number | null, completion: number | null) => void) | undefined,
|
onUsage: ((prompt: number | null, completion: number | null) => void) | undefined,
|
||||||
signal?: AbortSignal
|
signal?: AbortSignal,
|
||||||
|
agent?: Agent | null,
|
||||||
): Promise<StreamResult> {
|
): Promise<StreamResult> {
|
||||||
const aiMessages = toModelMessages(messages);
|
const aiMessages = toModelMessages(messages);
|
||||||
const hasTools = opts.tools !== null && opts.tools.length > 0;
|
const hasTools = opts.tools !== null && opts.tools.length > 0;
|
||||||
@@ -195,7 +196,7 @@ export async function streamCompletion(
|
|||||||
};
|
};
|
||||||
|
|
||||||
const result = streamText({
|
const result = streamText({
|
||||||
model: upstreamModel(ctx.config.LLAMA_SWAP_URL, model),
|
model: upstreamModel(ctx.config, model, agent ?? null),
|
||||||
messages: aiMessages,
|
messages: aiMessages,
|
||||||
...(aiTools
|
...(aiTools
|
||||||
? { tools: aiTools, toolChoice: 'auto' as const, experimental_repairToolCall: repairToolCall }
|
? { tools: aiTools, toolChoice: 'auto' as const, experimental_repairToolCall: repairToolCall }
|
||||||
@@ -458,7 +459,8 @@ export async function executeStreamPhase(
|
|||||||
}, USAGE_THROTTLE_MS - elapsed);
|
}, USAGE_THROTTLE_MS - elapsed);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
signal
|
signal,
|
||||||
|
agent,
|
||||||
);
|
);
|
||||||
} finally {
|
} finally {
|
||||||
if (pendingFlushTimer) {
|
if (pendingFlushTimer) {
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import type {
|
|||||||
import { ALL_TOOLS } from '../tools.js';
|
import { ALL_TOOLS } from '../tools.js';
|
||||||
import { resolveProjectRoot } from '../path_guard.js';
|
import { resolveProjectRoot } from '../path_guard.js';
|
||||||
import { maybeAutoNameChat } from '../auto_name.js';
|
import { maybeAutoNameChat } from '../auto_name.js';
|
||||||
|
import { rewriteSearchQuery } from '../task-search-rewrite.js';
|
||||||
import { getAgentById } from '../agents.js';
|
import { getAgentById } from '../agents.js';
|
||||||
import * as compaction from '../compaction.js';
|
import * as compaction from '../compaction.js';
|
||||||
import type { Broker } from '../broker.js';
|
import type { Broker } from '../broker.js';
|
||||||
@@ -254,6 +255,16 @@ export async function runAssistantTurn(
|
|||||||
const webToolsEnabled =
|
const webToolsEnabled =
|
||||||
iterSession.web_search_enabled ?? iterProject.default_web_search_enabled ?? false;
|
iterSession.web_search_enabled ?? iterProject.default_web_search_enabled ?? false;
|
||||||
|
|
||||||
|
if (stepNumber === 0 && webToolsEnabled && messages.length >= 2) {
|
||||||
|
const lastUserMsg = [...messages].reverse().find((m) => m.role === 'user');
|
||||||
|
if (lastUserMsg?.content) {
|
||||||
|
const hint = await rewriteSearchQuery(lastUserMsg.content);
|
||||||
|
if (hint && messages[0]?.role === 'system' && messages[0].content) {
|
||||||
|
messages[0].content += `\n\nThe user's search intent can be summarized as: "${hint}"`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, signal };
|
const iterArgs: TurnArgs = { sessionId, chatId, assistantMessageId, toolsUsed, recentToolCalls, signal };
|
||||||
const state: StreamPhaseState = { accumulated: '', startedAt: null };
|
const state: StreamPhaseState = { accumulated: '', startedAt: null };
|
||||||
let result: StreamResult;
|
let result: StreamResult;
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import { createHash } from 'node:crypto';
|
|||||||
import { readFile, stat } from 'node:fs/promises';
|
import { readFile, stat } from 'node:fs/promises';
|
||||||
import type { Agent, Project, Session } from '../types/api.js';
|
import type { Agent, Project, Session } from '../types/api.js';
|
||||||
import { getAgentsMtimes } from './agents.js';
|
import { getAgentsMtimes } from './agents.js';
|
||||||
|
import { resolveRoute } from './inference/provider.js';
|
||||||
|
|
||||||
const BASE_SYSTEM_PROMPT = (projectPath: string) =>
|
const BASE_SYSTEM_PROMPT = (projectPath: string) =>
|
||||||
`You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`;
|
`You are BooCode Chat, a code investigation assistant. The user is working on a project located at ${projectPath}. Use the file-read tools (view_file, list_dir, grep, find_files) to investigate code when needed. Be concise. Cite file paths and line numbers when discussing code. Do not hallucinate file contents — read the file first. Tool results may be truncated; if so, narrow your query rather than guessing.`;
|
||||||
@@ -98,6 +99,7 @@ export interface PrefixFingerprint {
|
|||||||
has_agent_system_prompt: boolean;
|
has_agent_system_prompt: boolean;
|
||||||
has_session_override: boolean;
|
has_session_override: boolean;
|
||||||
has_project_override: boolean;
|
has_project_override: boolean;
|
||||||
|
route: 'swap' | 'sidecar';
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface PrefixDrift {
|
export interface PrefixDrift {
|
||||||
@@ -125,6 +127,7 @@ interface ObservedInputs {
|
|||||||
has_agent_system_prompt: boolean;
|
has_agent_system_prompt: boolean;
|
||||||
has_session_override: boolean;
|
has_session_override: boolean;
|
||||||
has_project_override: boolean;
|
has_project_override: boolean;
|
||||||
|
route: 'swap' | 'sidecar';
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ObserverEntry {
|
interface ObserverEntry {
|
||||||
@@ -183,6 +186,7 @@ export async function buildSystemPromptWithFingerprint(
|
|||||||
has_agent_system_prompt: !!(agent && agent.system_prompt.trim().length > 0),
|
has_agent_system_prompt: !!(agent && agent.system_prompt.trim().length > 0),
|
||||||
has_session_override: sessionPrompt.length > 0,
|
has_session_override: sessionPrompt.length > 0,
|
||||||
has_project_override: projectPrompt.length > 0,
|
has_project_override: projectPrompt.length > 0,
|
||||||
|
route: resolveRoute(agent).route,
|
||||||
};
|
};
|
||||||
|
|
||||||
const fingerprint: PrefixFingerprint = {
|
const fingerprint: PrefixFingerprint = {
|
||||||
@@ -199,6 +203,7 @@ export async function buildSystemPromptWithFingerprint(
|
|||||||
has_agent_system_prompt: inputs.has_agent_system_prompt,
|
has_agent_system_prompt: inputs.has_agent_system_prompt,
|
||||||
has_session_override: inputs.has_session_override,
|
has_session_override: inputs.has_session_override,
|
||||||
has_project_override: inputs.has_project_override,
|
has_project_override: inputs.has_project_override,
|
||||||
|
route: inputs.route,
|
||||||
};
|
};
|
||||||
|
|
||||||
let drift: PrefixDrift | null = null;
|
let drift: PrefixDrift | null = null;
|
||||||
|
|||||||
68
apps/server/src/services/task-model.ts
Normal file
68
apps/server/src/services/task-model.ts
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
import { loadConfig, type Config } from '../config.js';
|
||||||
|
|
||||||
|
const TIMEOUT_MS = 10_000;
|
||||||
|
|
||||||
|
export async function taskModelCompletion(opts: {
|
||||||
|
system: string;
|
||||||
|
user: string;
|
||||||
|
maxTokens?: number;
|
||||||
|
temperature?: number;
|
||||||
|
fallbackModel?: string;
|
||||||
|
}): Promise<string> {
|
||||||
|
const config = loadConfig();
|
||||||
|
const maxTokens = opts.maxTokens ?? 30;
|
||||||
|
const temperature = opts.temperature ?? 0.3;
|
||||||
|
|
||||||
|
const { url, model } = resolveEndpoint(config, opts.fallbackModel);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${url}/v1/chat/completions`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
model,
|
||||||
|
messages: [
|
||||||
|
{ role: 'system', content: opts.system },
|
||||||
|
{ role: 'user', content: opts.user },
|
||||||
|
],
|
||||||
|
max_tokens: maxTokens,
|
||||||
|
temperature,
|
||||||
|
stream: false,
|
||||||
|
chat_template_kwargs: { enable_thinking: false },
|
||||||
|
}),
|
||||||
|
signal: AbortSignal.timeout(TIMEOUT_MS),
|
||||||
|
});
|
||||||
|
if (!res.ok) {
|
||||||
|
const text = await res.text().catch(() => '');
|
||||||
|
console.warn(`task-model: ${res.status} ${text.slice(0, 200)}`);
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
const data = (await res.json()) as {
|
||||||
|
choices?: Array<{
|
||||||
|
message?: { content?: string; reasoning_content?: string };
|
||||||
|
}>;
|
||||||
|
};
|
||||||
|
const choice = data.choices?.[0]?.message;
|
||||||
|
if (!choice) return '';
|
||||||
|
const content = (choice.content ?? '').trim();
|
||||||
|
if (content.length > 0) return content;
|
||||||
|
const reasoning = choice.reasoning_content ?? '';
|
||||||
|
if (reasoning.length === 0) return '';
|
||||||
|
const lines = reasoning.split('\n').map((l) => l.trim()).filter((l) => l.length > 0);
|
||||||
|
return lines[lines.length - 1] ?? '';
|
||||||
|
} catch (err) {
|
||||||
|
console.warn('task-model: request failed', err);
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveEndpoint(
|
||||||
|
config: Config,
|
||||||
|
fallbackModel?: string,
|
||||||
|
): { url: string; model: string } {
|
||||||
|
if (config.TASK_MODEL_URL) {
|
||||||
|
return { url: config.TASK_MODEL_URL, model: 'gemma-3-270m-it' };
|
||||||
|
}
|
||||||
|
const model = config.FAST_MODEL ?? fallbackModel ?? config.DEFAULT_MODEL;
|
||||||
|
return { url: config.LLAMA_SWAP_URL, model };
|
||||||
|
}
|
||||||
19
apps/server/src/services/task-search-rewrite.ts
Normal file
19
apps/server/src/services/task-search-rewrite.ts
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
import { taskModelCompletion } from './task-model.js';
|
||||||
|
|
||||||
|
const SYSTEM_PROMPT =
|
||||||
|
'You rewrite user messages into concise web search queries. Reply with ONLY the search query. 3 to 6 words. No quotes, no explanation.';
|
||||||
|
|
||||||
|
const MAX_INPUT_CHARS = 500;
|
||||||
|
const FALLBACK_CHARS = 60;
|
||||||
|
|
||||||
|
export async function rewriteSearchQuery(userMessage: string): Promise<string> {
|
||||||
|
const input = userMessage.slice(0, MAX_INPUT_CHARS);
|
||||||
|
const result = await taskModelCompletion({
|
||||||
|
system: SYSTEM_PROMPT,
|
||||||
|
user: input,
|
||||||
|
maxTokens: 20,
|
||||||
|
temperature: 0.2,
|
||||||
|
});
|
||||||
|
if (result.length > 0) return result;
|
||||||
|
return userMessage.slice(0, FALLBACK_CHARS).trim();
|
||||||
|
}
|
||||||
24
apps/server/src/services/task-summary.ts
Normal file
24
apps/server/src/services/task-summary.ts
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
import { taskModelCompletion } from './task-model.js';
|
||||||
|
|
||||||
|
const SYSTEM_PROMPT =
|
||||||
|
'Summarize this conversation in one sentence, 15 words max. No quotes, no prefix.';
|
||||||
|
|
||||||
|
const MAX_INPUT_CHARS = 1000;
|
||||||
|
|
||||||
|
export async function oneLineSummary(
|
||||||
|
messages: Array<{ role: string; content: string }>,
|
||||||
|
): Promise<string> {
|
||||||
|
const lastPairs = messages.slice(-6);
|
||||||
|
let input = lastPairs
|
||||||
|
.map((m) => `${m.role}: ${m.content}`)
|
||||||
|
.join('\n');
|
||||||
|
if (input.length > MAX_INPUT_CHARS) {
|
||||||
|
input = input.slice(0, MAX_INPUT_CHARS);
|
||||||
|
}
|
||||||
|
return taskModelCompletion({
|
||||||
|
system: SYSTEM_PROMPT,
|
||||||
|
user: input,
|
||||||
|
maxTokens: 30,
|
||||||
|
temperature: 0.3,
|
||||||
|
});
|
||||||
|
}
|
||||||
22
apps/server/src/services/task-tags.ts
Normal file
22
apps/server/src/services/task-tags.ts
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
import { taskModelCompletion } from './task-model.js';
|
||||||
|
|
||||||
|
const SYSTEM_PROMPT =
|
||||||
|
'You tag chat sessions. Reply with 1 to 3 lowercase tags separated by commas. Tags should describe the topic. No explanation. Examples: "docker, deployment", "python, debugging", "react, styling".';
|
||||||
|
|
||||||
|
export async function suggestTags(
|
||||||
|
userMessage: string,
|
||||||
|
assistantReply: string,
|
||||||
|
): Promise<string[]> {
|
||||||
|
const input = `User: ${userMessage.slice(0, 300)}\nAssistant: ${assistantReply.slice(0, 300)}`;
|
||||||
|
const result = await taskModelCompletion({
|
||||||
|
system: SYSTEM_PROMPT,
|
||||||
|
user: input,
|
||||||
|
maxTokens: 30,
|
||||||
|
temperature: 0.3,
|
||||||
|
});
|
||||||
|
if (result.length === 0) return [];
|
||||||
|
return result
|
||||||
|
.split(',')
|
||||||
|
.map((t) => t.trim().toLowerCase())
|
||||||
|
.filter((t) => t.length > 0 && t.length <= 30);
|
||||||
|
}
|
||||||
@@ -7,7 +7,7 @@ top_p: 0.95
|
|||||||
top_k: 20
|
top_k: 20
|
||||||
min_p: 0.0
|
min_p: 0.0
|
||||||
presence_penalty: 0.0
|
presence_penalty: 0.0
|
||||||
tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes]
|
tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes, request_read_access, view_truncated_output, ask_user_input, git_status, get_blast_radius, get_hot_files, get_middleware, get_routes]
|
||||||
description: Reviews code for bugs, security issues, and maintainability. Read-only.
|
description: Reviews code for bugs, security issues, and maintainability. Read-only.
|
||||||
---
|
---
|
||||||
You review code. Find real problems, not style nits.
|
You review code. Find real problems, not style nits.
|
||||||
@@ -46,7 +46,7 @@ top_p: 0.95
|
|||||||
top_k: 20
|
top_k: 20
|
||||||
min_p: 0.0
|
min_p: 0.0
|
||||||
presence_penalty: 0.0
|
presence_penalty: 0.0
|
||||||
tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes]
|
tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes, request_read_access, view_truncated_output, ask_user_input, git_status, get_blast_radius, get_hot_files, get_middleware, get_routes]
|
||||||
description: Diagnoses bugs from error messages, logs, or described symptoms.
|
description: Diagnoses bugs from error messages, logs, or described symptoms.
|
||||||
---
|
---
|
||||||
You diagnose bugs. Form a hypothesis, prove it with evidence from the code.
|
You diagnose bugs. Form a hypothesis, prove it with evidence from the code.
|
||||||
@@ -72,7 +72,7 @@ top_k: 20
|
|||||||
min_p: 0.0
|
min_p: 0.0
|
||||||
presence_penalty: 0.0
|
presence_penalty: 0.0
|
||||||
steps: 5
|
steps: 5
|
||||||
tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes]
|
tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes, request_read_access, view_truncated_output, ask_user_input, git_status, get_blast_radius, get_hot_files, get_middleware, get_routes]
|
||||||
description: Proposes refactors for clarity, deduplication, or decoupling. Read-only — outputs plans, not edits.
|
description: Proposes refactors for clarity, deduplication, or decoupling. Read-only — outputs plans, not edits.
|
||||||
---
|
---
|
||||||
You propose refactors. You do not apply them. The user applies via OpenCode or Claude Code.
|
You propose refactors. You do not apply them. The user applies via OpenCode or Claude Code.
|
||||||
@@ -115,7 +115,7 @@ top_k: 20
|
|||||||
min_p: 0.0
|
min_p: 0.0
|
||||||
presence_penalty: 1.5
|
presence_penalty: 1.5
|
||||||
steps: 20
|
steps: 20
|
||||||
tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes]
|
tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes, request_read_access, view_truncated_output, ask_user_input, git_status, get_blast_radius, get_hot_files, get_middleware, get_routes]
|
||||||
description: Designs new features, modules, or architectural changes. Outputs a build plan.
|
description: Designs new features, modules, or architectural changes. Outputs a build plan.
|
||||||
---
|
---
|
||||||
You design. You produce build plans, not code.
|
You design. You produce build plans, not code.
|
||||||
@@ -157,7 +157,7 @@ top_p: 0.95
|
|||||||
top_k: 20
|
top_k: 20
|
||||||
min_p: 0.0
|
min_p: 0.0
|
||||||
presence_penalty: 0.0
|
presence_penalty: 0.0
|
||||||
tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes]
|
tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes, request_read_access, view_truncated_output, ask_user_input, git_status, get_blast_radius, get_hot_files, get_middleware, get_routes]
|
||||||
description: Audits code for security vulnerabilities. Read-only.
|
description: Audits code for security vulnerabilities. Read-only.
|
||||||
---
|
---
|
||||||
You audit for security issues. Concrete findings only, no generic warnings.
|
You audit for security issues. Concrete findings only, no generic warnings.
|
||||||
@@ -240,7 +240,7 @@ top_p: 0.95
|
|||||||
top_k: 20
|
top_k: 20
|
||||||
min_p: 0.0
|
min_p: 0.0
|
||||||
presence_penalty: 0.0
|
presence_penalty: 0.0
|
||||||
tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes]
|
tools: [find_files, get_codebase_overview, get_dependencies, get_file_analysis, get_framework_analysis, get_semantic_neighborhoods, get_symbol_info, grep, list_dir, search_symbols, view_file, watch_changes, request_read_access, view_truncated_output, ask_user_input, git_status, get_blast_radius, get_hot_files, get_middleware, get_routes]
|
||||||
description: Discovers and maps unfamiliar codebases. Reads architecture, traces data flow, identifies key symbols.
|
description: Discovers and maps unfamiliar codebases. Reads architecture, traces data flow, identifies key symbols.
|
||||||
---
|
---
|
||||||
You map codebases. Start broad, then drill into specifics.
|
You map codebases. Start broad, then drill into specifics.
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ services:
|
|||||||
CONTAINER_GUIDANCE_FILE: /app/BOOCHAT.md
|
CONTAINER_GUIDANCE_FILE: /app/BOOCHAT.md
|
||||||
DATABASE_URL: postgres://boocode:${POSTGRES_PASSWORD}@boocode_db:5432/boochat
|
DATABASE_URL: postgres://boocode:${POSTGRES_PASSWORD}@boocode_db:5432/boochat
|
||||||
BOOCODER_URL: http://100.114.205.53:9502
|
BOOCODER_URL: http://100.114.205.53:9502
|
||||||
|
LLAMA_SIDECAR_URL: http://100.101.41.16:8402
|
||||||
volumes:
|
volumes:
|
||||||
- /opt:/opt
|
- /opt:/opt
|
||||||
- /opt/projects:/opt/projects:rw
|
- /opt/projects:/opt/projects:rw
|
||||||
|
|||||||
Reference in New Issue
Block a user