Adds Inference tab to SettingsPane with controls for temperature, top-p, top-k, min-p, and other inference parameters. Server-side route and provider config wiring to pass overrides through the inference pipeline.
56 lines
1.9 KiB
TypeScript
56 lines
1.9 KiB
TypeScript
import { FastifyInstance } from 'fastify';
|
|
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
import { resolve, dirname } from 'path';
|
|
|
|
const CONFIG_PATH = resolve(process.env.BOOCODE_DATA_DIR || '/opt/boocode/data', 'inference-settings.json');
|
|
|
|
const DEFAULTS = {
|
|
cache_type_k: 'q4_0',
|
|
cache_reuse: 256,
|
|
spec_type: 'ngram-mod',
|
|
spec_ngram_mod_thsh: 2,
|
|
ctx_checkpoints: 32,
|
|
sleep_idle_seconds: 600,
|
|
metrics_enabled: true,
|
|
slot_save_path: '/tmp/llama-slots',
|
|
};
|
|
|
|
function load(): Record<string, unknown> {
|
|
try {
|
|
if (existsSync(CONFIG_PATH)) {
|
|
return JSON.parse(readFileSync(CONFIG_PATH, 'utf-8'));
|
|
}
|
|
} catch { /* corrupt file */ }
|
|
return { ...DEFAULTS };
|
|
}
|
|
|
|
function save(data: Record<string, unknown>): void {
|
|
const dir = dirname(CONFIG_PATH);
|
|
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
writeFileSync(CONFIG_PATH, JSON.stringify(data, null, 2) + '\n');
|
|
}
|
|
|
|
const VALID_CACHE_TYPES = ['f32', 'f16', 'q8_0', 'q4_0'] as const;
|
|
const VALID_SPEC_TYPES = ['off', 'ngram-mod', 'draft-simple'] as const;
|
|
|
|
export function registerInferenceSettingsRoutes(app: FastifyInstance): void {
|
|
app.get('/api/settings/inference', async (_req, _res) => {
|
|
return { ...DEFAULTS, ...load() };
|
|
});
|
|
|
|
app.patch<{ Body: Record<string, unknown> }>('/api/settings/inference', async (req, reply) => {
|
|
const current = { ...DEFAULTS, ...load() };
|
|
const merged = { ...current, ...req.body };
|
|
|
|
if (merged.cache_type_k && !(VALID_CACHE_TYPES as readonly string[]).includes(merged.cache_type_k as string)) {
|
|
return reply.status(400).send({ error: 'Invalid cache_type_k' });
|
|
}
|
|
if (merged.spec_type && !(VALID_SPEC_TYPES as readonly string[]).includes(merged.spec_type as string)) {
|
|
return reply.status(400).send({ error: 'Invalid spec_type' });
|
|
}
|
|
|
|
save(merged);
|
|
return { ...DEFAULTS, ...load() };
|
|
});
|
|
}
|