Files
boocode/apps/server/src/routes/inference-settings.ts
indifferentketchup c132215064 feat(web,server): inference settings UI with per-session inference overrides
Adds Inference tab to SettingsPane with controls for temperature, top-p,
top-k, min-p, and other inference parameters. Server-side route and
provider config wiring to pass overrides through the inference pipeline.
2026-06-07 22:16:29 +00:00

56 lines
1.9 KiB
TypeScript

import { FastifyInstance } from 'fastify';
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
import { resolve, dirname } from 'path';
const CONFIG_PATH = resolve(process.env.BOOCODE_DATA_DIR || '/opt/boocode/data', 'inference-settings.json');
const DEFAULTS = {
cache_type_k: 'q4_0',
cache_reuse: 256,
spec_type: 'ngram-mod',
spec_ngram_mod_thsh: 2,
ctx_checkpoints: 32,
sleep_idle_seconds: 600,
metrics_enabled: true,
slot_save_path: '/tmp/llama-slots',
};
function load(): Record<string, unknown> {
try {
if (existsSync(CONFIG_PATH)) {
return JSON.parse(readFileSync(CONFIG_PATH, 'utf-8'));
}
} catch { /* corrupt file */ }
return { ...DEFAULTS };
}
function save(data: Record<string, unknown>): void {
const dir = dirname(CONFIG_PATH);
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
writeFileSync(CONFIG_PATH, JSON.stringify(data, null, 2) + '\n');
}
const VALID_CACHE_TYPES = ['f32', 'f16', 'q8_0', 'q4_0'] as const;
const VALID_SPEC_TYPES = ['off', 'ngram-mod', 'draft-simple'] as const;
export function registerInferenceSettingsRoutes(app: FastifyInstance): void {
app.get('/api/settings/inference', async (_req, _res) => {
return { ...DEFAULTS, ...load() };
});
app.patch<{ Body: Record<string, unknown> }>('/api/settings/inference', async (req, reply) => {
const current = { ...DEFAULTS, ...load() };
const merged = { ...current, ...req.body };
if (merged.cache_type_k && !(VALID_CACHE_TYPES as readonly string[]).includes(merged.cache_type_k as string)) {
return reply.status(400).send({ error: 'Invalid cache_type_k' });
}
if (merged.spec_type && !(VALID_SPEC_TYPES as readonly string[]).includes(merged.spec_type as string)) {
return reply.status(400).send({ error: 'Invalid spec_type' });
}
save(merged);
return { ...DEFAULTS, ...load() };
});
}