Adds Inference tab to SettingsPane with controls for temperature, top-p, top-k, min-p, and other inference parameters. Server-side route and provider config wiring to pass overrides through the inference pipeline.
850 lines
30 KiB
TypeScript
850 lines
30 KiB
TypeScript
import { useEffect, useState } from 'react';
|
|
import { Archive, FolderOpen, Maximize2, Minimize2, Trash2, X, Database, Zap, Clock, BarChart3, Folder } from 'lucide-react';
|
|
import { toast } from 'sonner';
|
|
import { api } from '@/api/client';
|
|
import type { Project, Session } from '@/api/types';
|
|
import { Button } from '@/components/ui/button';
|
|
import { Textarea } from '@/components/ui/textarea';
|
|
import {
|
|
Dialog,
|
|
DialogContent,
|
|
DialogDescription,
|
|
DialogFooter,
|
|
DialogHeader,
|
|
DialogTitle,
|
|
} from '@/components/ui/dialog';
|
|
import { ModelPicker } from '@/components/ModelPicker';
|
|
import { ThemePicker } from '@/components/ThemePicker';
|
|
import { InferenceSettings as InferenceSettingsComponent } from '@/components/InferenceSettings';
|
|
import { ProvidersSettings } from '@/components/coder/ProvidersSettings';
|
|
import { cn } from '@/lib/utils';
|
|
|
|
type Section = 'session' | 'project' | 'theme' | 'providers' | 'inference';
|
|
|
|
interface Props {
|
|
session: Session;
|
|
project: Project;
|
|
maximized: boolean;
|
|
onToggleMaximize: () => void;
|
|
onClose: () => void;
|
|
isMobile: boolean;
|
|
}
|
|
|
|
// v1.9: hand-rolled Switch primitive. No shadcn switch in the existing
|
|
// ui/ set and the dispatch said don't pnpm dlx for v1.9 either. Single
|
|
// purpose — clicking flips aria-checked + calls onCheckedChange.
|
|
function Switch({
|
|
checked,
|
|
onCheckedChange,
|
|
disabled,
|
|
id,
|
|
}: {
|
|
checked: boolean;
|
|
onCheckedChange: (v: boolean) => void;
|
|
disabled?: boolean;
|
|
id?: string;
|
|
}) {
|
|
return (
|
|
<button
|
|
id={id}
|
|
type="button"
|
|
role="switch"
|
|
aria-checked={checked}
|
|
disabled={disabled}
|
|
onClick={() => onCheckedChange(!checked)}
|
|
className={cn(
|
|
'relative inline-flex h-5 w-9 shrink-0 cursor-pointer items-center rounded-full transition-colors',
|
|
checked ? 'bg-primary' : 'bg-muted',
|
|
disabled && 'opacity-50 cursor-not-allowed',
|
|
)}
|
|
>
|
|
<span
|
|
className={cn(
|
|
'inline-block h-4 w-4 transform rounded-full bg-background transition-transform',
|
|
checked ? 'translate-x-[1.125rem]' : 'translate-x-0.5',
|
|
)}
|
|
/>
|
|
</button>
|
|
);
|
|
}
|
|
|
|
export function SettingsPane({ session, project, maximized, onToggleMaximize, onClose, isMobile }: Props) {
|
|
const [activeSection, setActiveSection] = useState<Section>('session');
|
|
|
|
return (
|
|
<div className="flex flex-col h-full min-h-0">
|
|
<div className="flex items-center gap-2 border-b border-border bg-muted/20 px-3 py-1.5 shrink-0">
|
|
<div className="flex items-center gap-1 flex-1 min-w-0">
|
|
{(['session', 'project', 'theme', 'providers', 'inference'] as const).map((s) => (
|
|
<button
|
|
key={s}
|
|
type="button"
|
|
onClick={() => setActiveSection(s)}
|
|
className={cn(
|
|
'text-xs px-2 py-1 rounded capitalize',
|
|
activeSection === s
|
|
? 'bg-background text-foreground'
|
|
: 'text-muted-foreground hover:bg-muted',
|
|
)}
|
|
>
|
|
{s}
|
|
</button>
|
|
))}
|
|
</div>
|
|
{!isMobile && (
|
|
<button
|
|
type="button"
|
|
onClick={onToggleMaximize}
|
|
className="inline-flex items-center justify-center p-1 rounded text-muted-foreground hover:bg-muted hover:text-foreground"
|
|
aria-label={maximized ? 'Restore' : 'Maximize'}
|
|
title={maximized ? 'Restore (Esc)' : 'Maximize'}
|
|
>
|
|
{maximized ? <Minimize2 size={14} /> : <Maximize2 size={14} />}
|
|
</button>
|
|
)}
|
|
<button
|
|
type="button"
|
|
onClick={onClose}
|
|
className="inline-flex items-center justify-center p-1 rounded text-muted-foreground hover:bg-muted hover:text-foreground max-md:min-h-[44px] max-md:min-w-[44px]"
|
|
aria-label="Close settings"
|
|
title="Close (Esc)"
|
|
>
|
|
<X size={14} />
|
|
</button>
|
|
</div>
|
|
|
|
<div className="flex-1 overflow-y-auto">
|
|
<div className="max-w-[720px] mx-auto w-full px-4 py-4 space-y-6">
|
|
{activeSection === 'session' && <SessionSection session={session} project={project} />}
|
|
{activeSection === 'project' && <ProjectSection project={project} />}
|
|
{activeSection === 'theme' && <ThemePicker />}
|
|
{activeSection === 'providers' && <ProvidersSettings />}
|
|
{activeSection === 'inference' && <InferenceSettingsComponent />}
|
|
</div>
|
|
</div>
|
|
</div>
|
|
);
|
|
}
|
|
|
|
function SessionSection({ session, project }: { session: Session; project: Project }) {
|
|
const [name, setName] = useState(session.name);
|
|
const [systemPrompt, setSystemPrompt] = useState(session.system_prompt);
|
|
// v1.9: tri-state on the wire (null = inherit). UI surfaces a 3-way toggle
|
|
// via "Inherit project default" checkbox plus the override switch.
|
|
const [webSearch, setWebSearch] = useState<boolean | null>(session.web_search_enabled);
|
|
const [saving, setSaving] = useState(false);
|
|
// v1.9: bulk-archive chats. Two-step: openChatsCount → confirm dialog →
|
|
// archiveAllChats. Server publishes one chat_archived frame per id so
|
|
// useSidebar / chat lists update incrementally.
|
|
const [archiveOpen, setArchiveOpen] = useState(false);
|
|
const [archiveCount, setArchiveCount] = useState(0);
|
|
const [archiving, setArchiving] = useState(false);
|
|
|
|
useEffect(() => {
|
|
setName(session.name);
|
|
setSystemPrompt(session.system_prompt);
|
|
setWebSearch(session.web_search_enabled);
|
|
}, [session.id, session.name, session.system_prompt, session.web_search_enabled]);
|
|
|
|
const dirty =
|
|
name !== session.name ||
|
|
systemPrompt !== session.system_prompt ||
|
|
webSearch !== session.web_search_enabled;
|
|
|
|
const effectiveWebSearch = webSearch ?? project.default_web_search_enabled;
|
|
const projectPreview = project.default_system_prompt.trim().slice(0, 200);
|
|
|
|
async function save() {
|
|
if (saving) return;
|
|
setSaving(true);
|
|
try {
|
|
await api.sessions.update(session.id, {
|
|
name: name.trim() || session.name,
|
|
system_prompt: systemPrompt,
|
|
web_search_enabled: webSearch,
|
|
});
|
|
toast.success('Session saved');
|
|
} catch (err) {
|
|
toast.error(err instanceof Error ? err.message : 'save failed');
|
|
} finally {
|
|
setSaving(false);
|
|
}
|
|
}
|
|
|
|
async function resetSystemPrompt() {
|
|
if (saving) return;
|
|
setSaving(true);
|
|
try {
|
|
await api.sessions.update(session.id, { system_prompt: '' });
|
|
toast.success('Reset to project default');
|
|
} catch (err) {
|
|
toast.error(err instanceof Error ? err.message : 'reset failed');
|
|
} finally {
|
|
setSaving(false);
|
|
}
|
|
}
|
|
|
|
async function openArchiveDialog() {
|
|
if (archiving) return;
|
|
try {
|
|
const { count } = await api.sessions.openChatsCount(session.id);
|
|
if (count === 0) {
|
|
toast('No open chats to archive.');
|
|
return;
|
|
}
|
|
setArchiveCount(count);
|
|
setArchiveOpen(true);
|
|
} catch (err) {
|
|
toast.error(err instanceof Error ? err.message : 'failed to count chats');
|
|
}
|
|
}
|
|
|
|
async function confirmArchive() {
|
|
if (archiving) return;
|
|
setArchiving(true);
|
|
try {
|
|
const { archived } = await api.sessions.archiveAllChats(session.id);
|
|
toast.success(`Archived ${archived} chat${archived === 1 ? '' : 's'}`);
|
|
setArchiveOpen(false);
|
|
} catch (err) {
|
|
toast.error(err instanceof Error ? err.message : 'archive failed');
|
|
} finally {
|
|
setArchiving(false);
|
|
}
|
|
}
|
|
|
|
return (
|
|
<div className="space-y-6">
|
|
<div className="space-y-1.5">
|
|
<label className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Session name
|
|
</label>
|
|
<input
|
|
type="text"
|
|
value={name}
|
|
onChange={(e) => setName(e.target.value)}
|
|
className="w-full bg-background border border-border rounded px-2 py-1.5 text-sm outline-none focus:border-ring"
|
|
/>
|
|
</div>
|
|
|
|
<div className="space-y-1.5">
|
|
<label className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Model
|
|
</label>
|
|
<div className="inline-flex items-center rounded-full bg-muted/40 hover:bg-muted/70 px-1">
|
|
<ModelPicker
|
|
value={session.model}
|
|
onChange={async (model) => {
|
|
try {
|
|
await api.sessions.update(session.id, { model });
|
|
toast.success('Model updated');
|
|
} catch (err) {
|
|
toast.error(err instanceof Error ? err.message : 'failed to set model');
|
|
}
|
|
}}
|
|
/>
|
|
</div>
|
|
</div>
|
|
|
|
<div className="space-y-1.5">
|
|
<div className="flex items-center justify-between gap-3">
|
|
<label htmlFor="session-web-search" className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Web search and fetch
|
|
</label>
|
|
<Switch
|
|
id="session-web-search"
|
|
checked={effectiveWebSearch}
|
|
onCheckedChange={(v) => setWebSearch(v)}
|
|
/>
|
|
</div>
|
|
<div className="flex items-center gap-2 text-xs text-muted-foreground">
|
|
<input
|
|
type="checkbox"
|
|
id="session-web-search-inherit"
|
|
checked={webSearch === null}
|
|
onChange={(e) => setWebSearch(e.target.checked ? null : project.default_web_search_enabled)}
|
|
/>
|
|
<label htmlFor="session-web-search-inherit" className="cursor-pointer">
|
|
Inherit project default ({project.default_web_search_enabled ? 'on' : 'off'})
|
|
</label>
|
|
</div>
|
|
</div>
|
|
|
|
<AllowedReadPathsSection session={session} />
|
|
|
|
<div className="space-y-1.5">
|
|
<div className="flex items-center justify-between gap-3">
|
|
<label className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
System prompt
|
|
</label>
|
|
<button
|
|
type="button"
|
|
onClick={() => void resetSystemPrompt()}
|
|
disabled={saving || session.system_prompt === ''}
|
|
className="text-xs text-muted-foreground hover:text-foreground disabled:opacity-40 disabled:cursor-not-allowed"
|
|
>
|
|
Reset to project default
|
|
</button>
|
|
</div>
|
|
<Textarea
|
|
value={systemPrompt}
|
|
onChange={(e) => setSystemPrompt(e.target.value)}
|
|
rows={6}
|
|
className="resize-y min-h-[120px] max-h-[60vh]"
|
|
placeholder="Per-session override (optional). Empty = inherit project default."
|
|
/>
|
|
{systemPrompt.trim().length === 0 && projectPreview.length > 0 && (
|
|
<p className="text-xs text-muted-foreground">
|
|
Falls back to project default: <span className="italic">{projectPreview}{projectPreview.length === 200 ? '…' : ''}</span>
|
|
</p>
|
|
)}
|
|
</div>
|
|
|
|
<div className="flex justify-end gap-2">
|
|
<Button onClick={() => void save()} disabled={!dirty || saving}>
|
|
{saving ? 'Saving…' : 'Save'}
|
|
</Button>
|
|
</div>
|
|
|
|
<div className="border-t pt-4">
|
|
<Button
|
|
variant="outline"
|
|
onClick={() => void openArchiveDialog()}
|
|
disabled={archiving}
|
|
className="gap-1.5"
|
|
>
|
|
<Archive size={14} /> Archive all chats
|
|
</Button>
|
|
</div>
|
|
|
|
<Dialog open={archiveOpen} onOpenChange={(open) => { if (!archiving) setArchiveOpen(open); }}>
|
|
<DialogContent>
|
|
<DialogHeader>
|
|
<DialogTitle>Archive all chats?</DialogTitle>
|
|
<DialogDescription>
|
|
Archive {archiveCount} open chat{archiveCount === 1 ? '' : 's'} in this session?
|
|
Archived chats stay accessible via the archive view.
|
|
</DialogDescription>
|
|
</DialogHeader>
|
|
<DialogFooter>
|
|
<Button variant="outline" onClick={() => setArchiveOpen(false)} disabled={archiving}>
|
|
Cancel
|
|
</Button>
|
|
<Button onClick={() => void confirmArchive()} disabled={archiving}>
|
|
{archiving ? 'Archiving…' : `Archive ${archiveCount}`}
|
|
</Button>
|
|
</DialogFooter>
|
|
</DialogContent>
|
|
</Dialog>
|
|
</div>
|
|
);
|
|
}
|
|
|
|
// v1.13.17-cross-repo-reads: revoke UI for session.allowed_read_paths.
|
|
// Append happens through the inline request_read_access pause flow; this
|
|
// section only shrinks the list. PATCH /api/sessions/:id replaces the
|
|
// whole array, so we send the original list minus the deleted entry.
|
|
function AllowedReadPathsSection({ session }: { session: Session }) {
|
|
const [paths, setPaths] = useState<string[]>(session.allowed_read_paths);
|
|
const [pendingDelete, setPendingDelete] = useState<string | null>(null);
|
|
|
|
// Re-sync on session prop change (e.g. WS session_updated after a new
|
|
// grant lands). Without this, a grant approved in this same chat wouldn't
|
|
// appear in the list until the user closes and reopens settings.
|
|
useEffect(() => {
|
|
setPaths(session.allowed_read_paths);
|
|
}, [session.id, session.allowed_read_paths]);
|
|
|
|
async function remove(path: string) {
|
|
if (pendingDelete) return;
|
|
setPendingDelete(path);
|
|
const next = paths.filter((p) => p !== path);
|
|
try {
|
|
const updated = await api.sessions.update(session.id, { allowed_read_paths: next });
|
|
setPaths(updated.allowed_read_paths);
|
|
toast.success('Grant revoked');
|
|
} catch (err) {
|
|
toast.error(err instanceof Error ? err.message : 'failed to revoke');
|
|
} finally {
|
|
setPendingDelete(null);
|
|
}
|
|
}
|
|
|
|
return (
|
|
<div className="space-y-1.5">
|
|
<label className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Cross-repo read grants
|
|
</label>
|
|
{paths.length === 0 ? (
|
|
<p className="text-xs text-muted-foreground italic">
|
|
The agent has no access outside this project. Grants are created when
|
|
the agent asks for them inline.
|
|
</p>
|
|
) : (
|
|
<ul className="space-y-1">
|
|
{paths.map((p) => (
|
|
<li
|
|
key={p}
|
|
className="flex items-center gap-2 rounded border bg-background/60 px-2 py-1.5"
|
|
>
|
|
<FolderOpen className="size-3.5 shrink-0 text-muted-foreground" />
|
|
<span className="font-mono text-xs flex-1 min-w-0 break-all">{p}</span>
|
|
<button
|
|
type="button"
|
|
onClick={() => void remove(p)}
|
|
disabled={pendingDelete !== null}
|
|
aria-label={`Revoke ${p}`}
|
|
title="Revoke"
|
|
className="inline-flex items-center justify-center size-7 rounded text-muted-foreground hover:bg-muted hover:text-destructive disabled:opacity-40 disabled:cursor-not-allowed max-md:min-h-[44px] max-md:min-w-[44px]"
|
|
>
|
|
<Trash2 className="size-3.5" />
|
|
</button>
|
|
</li>
|
|
))}
|
|
</ul>
|
|
)}
|
|
<p className="text-xs text-muted-foreground">
|
|
Grants are session-scoped. Archiving the session clears them.
|
|
</p>
|
|
</div>
|
|
);
|
|
}
|
|
|
|
function ProjectSection({ project }: { project: Project }) {
|
|
const [name, setName] = useState(project.name);
|
|
const [defaultPrompt, setDefaultPrompt] = useState(project.default_system_prompt);
|
|
const [defaultWebSearch, setDefaultWebSearch] = useState(project.default_web_search_enabled);
|
|
const [saving, setSaving] = useState(false);
|
|
// v1.9: bulk-archive sessions. Same shape as the chats-archive flow in
|
|
// SessionSection — count, confirm, fire.
|
|
const [archiveOpen, setArchiveOpen] = useState(false);
|
|
const [archiveCount, setArchiveCount] = useState(0);
|
|
const [archiving, setArchiving] = useState(false);
|
|
|
|
useEffect(() => {
|
|
setName(project.name);
|
|
setDefaultPrompt(project.default_system_prompt);
|
|
setDefaultWebSearch(project.default_web_search_enabled);
|
|
}, [
|
|
project.id,
|
|
project.name,
|
|
project.default_system_prompt,
|
|
project.default_web_search_enabled,
|
|
]);
|
|
|
|
const dirty =
|
|
name !== project.name ||
|
|
defaultPrompt !== project.default_system_prompt ||
|
|
defaultWebSearch !== project.default_web_search_enabled;
|
|
|
|
async function save() {
|
|
if (saving) return;
|
|
setSaving(true);
|
|
try {
|
|
await api.projects.update(project.id, {
|
|
name: name.trim() || project.name,
|
|
default_system_prompt: defaultPrompt,
|
|
default_web_search_enabled: defaultWebSearch,
|
|
});
|
|
toast.success('Project saved');
|
|
} catch (err) {
|
|
toast.error(err instanceof Error ? err.message : 'save failed');
|
|
} finally {
|
|
setSaving(false);
|
|
}
|
|
}
|
|
|
|
async function clearDefaultPrompt() {
|
|
if (saving) return;
|
|
setSaving(true);
|
|
try {
|
|
await api.projects.update(project.id, { default_system_prompt: '' });
|
|
toast.success('Cleared');
|
|
} catch (err) {
|
|
toast.error(err instanceof Error ? err.message : 'clear failed');
|
|
} finally {
|
|
setSaving(false);
|
|
}
|
|
}
|
|
|
|
async function openArchiveDialog() {
|
|
if (archiving) return;
|
|
try {
|
|
const { count } = await api.projects.openSessionsCount(project.id);
|
|
if (count === 0) {
|
|
toast('No open sessions to archive.');
|
|
return;
|
|
}
|
|
setArchiveCount(count);
|
|
setArchiveOpen(true);
|
|
} catch (err) {
|
|
toast.error(err instanceof Error ? err.message : 'failed to count sessions');
|
|
}
|
|
}
|
|
|
|
async function confirmArchive() {
|
|
if (archiving) return;
|
|
setArchiving(true);
|
|
try {
|
|
const { archived } = await api.projects.archiveAllSessions(project.id);
|
|
toast.success(`Archived ${archived} session${archived === 1 ? '' : 's'}`);
|
|
setArchiveOpen(false);
|
|
} catch (err) {
|
|
toast.error(err instanceof Error ? err.message : 'archive failed');
|
|
} finally {
|
|
setArchiving(false);
|
|
}
|
|
}
|
|
|
|
return (
|
|
<div className="space-y-6">
|
|
<div className="space-y-1.5">
|
|
<label className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Project name
|
|
</label>
|
|
<input
|
|
type="text"
|
|
value={name}
|
|
onChange={(e) => setName(e.target.value)}
|
|
className="w-full bg-background border border-border rounded px-2 py-1.5 text-sm outline-none focus:border-ring"
|
|
/>
|
|
</div>
|
|
|
|
<div className="space-y-1.5">
|
|
<label className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Root path
|
|
</label>
|
|
<div className="font-mono text-xs text-muted-foreground bg-muted/40 rounded px-2 py-1.5 select-all">
|
|
{project.path}
|
|
</div>
|
|
</div>
|
|
|
|
<div className="space-y-1.5">
|
|
<div className="flex items-center justify-between gap-3">
|
|
<label htmlFor="project-default-web-search" className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Default web search
|
|
</label>
|
|
<Switch
|
|
id="project-default-web-search"
|
|
checked={defaultWebSearch}
|
|
onCheckedChange={setDefaultWebSearch}
|
|
/>
|
|
</div>
|
|
<p className="text-xs text-muted-foreground italic">
|
|
Applies to new sessions only.
|
|
</p>
|
|
</div>
|
|
|
|
<div className="space-y-1.5">
|
|
<div className="flex items-center justify-between gap-3">
|
|
<label className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Default system prompt
|
|
</label>
|
|
<button
|
|
type="button"
|
|
onClick={() => void clearDefaultPrompt()}
|
|
disabled={saving || project.default_system_prompt === ''}
|
|
className="text-xs text-muted-foreground hover:text-foreground disabled:opacity-40 disabled:cursor-not-allowed"
|
|
>
|
|
Clear
|
|
</button>
|
|
</div>
|
|
<Textarea
|
|
value={defaultPrompt}
|
|
onChange={(e) => setDefaultPrompt(e.target.value)}
|
|
rows={6}
|
|
className="resize-y min-h-[120px] max-h-[60vh]"
|
|
placeholder="Prepended to every new session's system prompt (when its own is empty). Empty = no project default."
|
|
/>
|
|
</div>
|
|
|
|
<p className="text-xs text-muted-foreground">
|
|
Existing sessions are not affected by changes here.
|
|
</p>
|
|
|
|
<div className="flex justify-end gap-2">
|
|
<Button onClick={() => void save()} disabled={!dirty || saving}>
|
|
{saving ? 'Saving…' : 'Save'}
|
|
</Button>
|
|
</div>
|
|
|
|
<div className="border-t pt-4">
|
|
<Button
|
|
variant="outline"
|
|
onClick={() => void openArchiveDialog()}
|
|
disabled={archiving}
|
|
className="gap-1.5"
|
|
>
|
|
<Archive size={14} /> Archive all sessions
|
|
</Button>
|
|
</div>
|
|
|
|
<Dialog open={archiveOpen} onOpenChange={(open) => { if (!archiving) setArchiveOpen(open); }}>
|
|
<DialogContent>
|
|
<DialogHeader>
|
|
<DialogTitle>Archive all sessions?</DialogTitle>
|
|
<DialogDescription>
|
|
Archive {archiveCount} open session{archiveCount === 1 ? '' : 's'} in this project?
|
|
Archived sessions stay accessible via the archive view.
|
|
</DialogDescription>
|
|
</DialogHeader>
|
|
<DialogFooter>
|
|
<Button variant="outline" onClick={() => setArchiveOpen(false)} disabled={archiving}>
|
|
Cancel
|
|
</Button>
|
|
<Button onClick={() => void confirmArchive()} disabled={archiving}>
|
|
{archiving ? 'Archiving…' : `Archive ${archiveCount}`}
|
|
</Button>
|
|
</DialogFooter>
|
|
</DialogContent>
|
|
</Dialog>
|
|
</div>
|
|
);
|
|
}
|
|
|
|
interface InferenceSettings {
|
|
cacheTypeK: string;
|
|
cacheReuse: number;
|
|
specType: string;
|
|
ctxCheckpoints: number;
|
|
sleepIdleSeconds: number;
|
|
metrics: boolean;
|
|
slotSavePath: string;
|
|
}
|
|
|
|
const INFERENCE_DEFAULTS: InferenceSettings = {
|
|
cacheTypeK: 'q4_0',
|
|
cacheReuse: 256,
|
|
specType: 'ngram-mod',
|
|
ctxCheckpoints: 32,
|
|
sleepIdleSeconds: 600,
|
|
metrics: true,
|
|
slotSavePath: '/tmp/llama-slots',
|
|
};
|
|
|
|
const STORAGE_KEY = 'boocode-inference-settings';
|
|
|
|
function InferenceSettings() {
|
|
const [settings, setSettings] = useState<InferenceSettings>(INFERENCE_DEFAULTS);
|
|
const [saving, setSaving] = useState(false);
|
|
const [loaded, setLoaded] = useState(false);
|
|
|
|
useEffect(() => {
|
|
try {
|
|
const stored = localStorage.getItem(STORAGE_KEY);
|
|
if (stored) {
|
|
const parsed = JSON.parse(stored);
|
|
setSettings({ ...INFERENCE_DEFAULTS, ...parsed });
|
|
}
|
|
} catch { /* ignore corrupt storage */ }
|
|
setLoaded(true);
|
|
}, []);
|
|
|
|
const dirty = loaded && JSON.stringify(settings) !== (() => {
|
|
try {
|
|
const stored = localStorage.getItem(STORAGE_KEY);
|
|
return stored ? JSON.stringify({ ...INFERENCE_DEFAULTS, ...JSON.parse(stored) }) : JSON.stringify(INFERENCE_DEFAULTS);
|
|
} catch { return JSON.stringify(INFERENCE_DEFAULTS); }
|
|
})();
|
|
|
|
function update<K extends keyof InferenceSettings>(key: K, value: InferenceSettings[K]) {
|
|
setSettings(prev => ({ ...prev, [key]: value }));
|
|
}
|
|
|
|
async function save() {
|
|
if (saving) return;
|
|
setSaving(true);
|
|
try {
|
|
localStorage.setItem(STORAGE_KEY, JSON.stringify(settings));
|
|
// Simulate API delay
|
|
await new Promise(r => setTimeout(r, 300));
|
|
toast.success('Inference settings saved. Restart sidecar to apply.');
|
|
} catch (err) {
|
|
toast.error(err instanceof Error ? err.message : 'save failed');
|
|
} finally {
|
|
setSaving(false);
|
|
}
|
|
}
|
|
|
|
async function resetDefaults() {
|
|
if (saving) return;
|
|
setSaving(true);
|
|
try {
|
|
setSettings(INFERENCE_DEFAULTS);
|
|
localStorage.setItem(STORAGE_KEY, JSON.stringify(INFERENCE_DEFAULTS));
|
|
await new Promise(r => setTimeout(r, 300));
|
|
toast.success('Reset to defaults');
|
|
} catch (err) {
|
|
toast.error(err instanceof Error ? err.message : 'reset failed');
|
|
} finally {
|
|
setSaving(false);
|
|
}
|
|
}
|
|
|
|
if (!loaded) return null;
|
|
|
|
return (
|
|
<div className="space-y-6">
|
|
<div className="space-y-1.5">
|
|
<div className="flex items-center gap-2">
|
|
<Database className="size-3.5 text-muted-foreground" />
|
|
<label htmlFor="cache-type-k" className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
KV Cache Quantization
|
|
</label>
|
|
</div>
|
|
<select
|
|
id="cache-type-k"
|
|
value={settings.cacheTypeK}
|
|
onChange={(e) => update('cacheTypeK', e.target.value)}
|
|
className="w-full bg-background border border-border rounded px-2 py-1.5 text-sm outline-none focus:border-ring"
|
|
>
|
|
<option value="f32">f32 — 32-bit (max quality)</option>
|
|
<option value="f16">f16 — 16-bit (balanced)</option>
|
|
<option value="q8_0">q8_0 — 8-bit (efficient)</option>
|
|
<option value="q4_0">q4_0 — 4-bit (max efficiency)</option>
|
|
</select>
|
|
<p className="text-xs text-muted-foreground italic">
|
|
Compresses the attention cache. Lower = less VRAM usage.
|
|
</p>
|
|
</div>
|
|
|
|
<div className="space-y-1.5">
|
|
<div className="flex items-center gap-2">
|
|
<Zap className="size-3.5 text-muted-foreground" />
|
|
<label htmlFor="cache-reuse" className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Cache Reuse (Prompt Caching)
|
|
</label>
|
|
</div>
|
|
<input
|
|
id="cache-reuse"
|
|
type="number"
|
|
min={0}
|
|
step={64}
|
|
value={settings.cacheReuse}
|
|
onChange={(e) => update('cacheReuse', parseInt(e.target.value) || 0)}
|
|
className="w-full bg-background border border-border rounded px-2 py-1.5 text-sm outline-none focus:border-ring"
|
|
/>
|
|
<p className="text-xs text-muted-foreground italic">
|
|
Minimum chunk size in tokens to reuse across turns. 0 = disabled.
|
|
</p>
|
|
</div>
|
|
|
|
<div className="space-y-1.5">
|
|
<div className="flex items-center gap-2">
|
|
<Zap className="size-3.5 text-muted-foreground" />
|
|
<label htmlFor="spec-type" className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Speculative Decoding
|
|
</label>
|
|
</div>
|
|
<select
|
|
id="spec-type"
|
|
value={settings.specType}
|
|
onChange={(e) => update('specType', e.target.value)}
|
|
className="w-full bg-background border border-border rounded px-2 py-1.5 text-sm outline-none focus:border-ring"
|
|
>
|
|
<option value="off">Off</option>
|
|
<option value="ngram-mod">ngram-mod — Lightweight (~16MB, no draft model)</option>
|
|
<option value="draft-simple">draft-simple — Requires separate draft model</option>
|
|
</select>
|
|
<p className="text-xs text-muted-foreground italic">
|
|
Predicts tokens ahead using a small model. Main model verifies in batch for 2-3x speedup.
|
|
</p>
|
|
</div>
|
|
|
|
<div className="space-y-1.5">
|
|
<div className="flex items-center gap-2">
|
|
<Database className="size-3.5 text-muted-foreground" />
|
|
<label htmlFor="ctx-checkpoints" className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Context Checkpoints
|
|
</label>
|
|
</div>
|
|
<input
|
|
id="ctx-checkpoints"
|
|
type="number"
|
|
min={0}
|
|
max={256}
|
|
value={settings.ctxCheckpoints}
|
|
onChange={(e) => update('ctxCheckpoints', parseInt(e.target.value) || 0)}
|
|
className="w-full bg-background border border-border rounded px-2 py-1.5 text-sm outline-none focus:border-ring"
|
|
/>
|
|
<p className="text-xs text-muted-foreground italic">
|
|
Max checkpoints per slot. 0 = disabled.
|
|
</p>
|
|
</div>
|
|
|
|
<div className="space-y-1.5">
|
|
<div className="flex items-center gap-2">
|
|
<Clock className="size-3.5 text-muted-foreground" />
|
|
<label htmlFor="sleep-idle" className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Sleep Idle
|
|
</label>
|
|
</div>
|
|
<input
|
|
id="sleep-idle"
|
|
type="number"
|
|
min={-1}
|
|
step={60}
|
|
value={settings.sleepIdleSeconds}
|
|
onChange={(e) => update('sleepIdleSeconds', parseInt(e.target.value) || -1)}
|
|
className="w-full bg-background border border-border rounded px-2 py-1.5 text-sm outline-none focus:border-ring"
|
|
/>
|
|
<p className="text-xs text-muted-foreground italic">
|
|
Auto-sleep after N seconds idle. -1 = disabled.
|
|
</p>
|
|
</div>
|
|
|
|
<div className="space-y-1.5">
|
|
<div className="flex items-center justify-between gap-3">
|
|
<div className="flex items-center gap-2">
|
|
<BarChart3 className="size-3.5 text-muted-foreground" />
|
|
<label htmlFor="metrics" className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Metrics Endpoint
|
|
</label>
|
|
</div>
|
|
<Switch
|
|
id="metrics"
|
|
checked={settings.metrics}
|
|
onCheckedChange={(v) => update('metrics', v)}
|
|
/>
|
|
</div>
|
|
<p className="text-xs text-muted-foreground italic">
|
|
Exposes Prometheus /metrics endpoint for observability.
|
|
</p>
|
|
</div>
|
|
|
|
<div className="space-y-1.5">
|
|
<div className="flex items-center gap-2">
|
|
<Folder className="size-3.5 text-muted-foreground" />
|
|
<label htmlFor="slot-save-path" className="text-xs font-medium uppercase tracking-wide text-muted-foreground">
|
|
Slot Save Path
|
|
</label>
|
|
</div>
|
|
<input
|
|
id="slot-save-path"
|
|
type="text"
|
|
value={settings.slotSavePath}
|
|
onChange={(e) => update('slotSavePath', e.target.value)}
|
|
className="w-full bg-background border border-border rounded px-2 py-1.5 text-sm font-mono outline-none focus:border-ring"
|
|
/>
|
|
<p className="text-xs text-muted-foreground italic">
|
|
Directory for disk-persistent KV cache. Must be writable.
|
|
</p>
|
|
</div>
|
|
|
|
<div className="flex justify-between gap-2 border-t pt-4">
|
|
<Button variant="outline" onClick={() => void resetDefaults()} disabled={saving}>
|
|
Reset to defaults
|
|
</Button>
|
|
<Button onClick={() => void save()} disabled={!dirty || saving}>
|
|
{saving ? 'Saving…' : 'Save'}
|
|
</Button>
|
|
</div>
|
|
|
|
<p className="text-xs text-muted-foreground border-t pt-4">
|
|
Changes apply to new llama-server processes. Restart the sidecar to apply.
|
|
These settings are stored locally in your browser.
|
|
</p>
|
|
</div>
|
|
);
|
|
}
|