- llama-server does not emit n_ctx in timings (confirmed empirically); dead code at inference.ts:479 and compaction.ts:300 never fired - New model-context.ts: cached fetch of /upstream/<model>/props with positive-cache (no TTL) and 60s negative-cache - Wired into all 4 ctx_max write sites: 3 in inference.ts (executeToolPhase, finalizeCompletion, runCapHitSummary) and 1 in compaction.ts (summary row INSERT) - AbortController 3s timeout, lenient parsing with sensible defaults - 12 new vitest cases for the cache module (59 total) - 7 historical assistant rows backfilled manually (see notes)
206 lines
7.4 KiB
TypeScript
206 lines
7.4 KiB
TypeScript
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
import {
|
|
configureModelContext,
|
|
getModelContext,
|
|
invalidateModelContext,
|
|
} from '../model-context.js';
|
|
|
|
// ---- fixtures ---------------------------------------------------------------
|
|
|
|
const TEST_URL = 'http://llama-swap.test:8401';
|
|
|
|
function mockOkProps(n_ctx: number, total_slots = 1) {
|
|
return new Response(
|
|
JSON.stringify({
|
|
default_generation_settings: { n_ctx },
|
|
total_slots,
|
|
}),
|
|
{ status: 200, headers: { 'Content-Type': 'application/json' } },
|
|
);
|
|
}
|
|
|
|
beforeEach(() => {
|
|
invalidateModelContext();
|
|
configureModelContext({ llamaSwapUrl: TEST_URL });
|
|
});
|
|
|
|
afterEach(() => {
|
|
vi.restoreAllMocks();
|
|
vi.useRealTimers();
|
|
});
|
|
|
|
// ---- positive cache ---------------------------------------------------------
|
|
|
|
describe('getModelContext — positive cache', () => {
|
|
it('returns the parsed body on a 200 with valid shape', async () => {
|
|
const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(mockOkProps(262_144, 1));
|
|
const result = await getModelContext('qwen3.6');
|
|
expect(result).not.toBeNull();
|
|
expect(result!.n_ctx).toBe(262_144);
|
|
expect(result!.total_slots).toBe(1);
|
|
expect(typeof result!.fetched_at).toBe('number');
|
|
// Verify the URL was constructed correctly — encodes the model name in
|
|
// case it contains characters that would break the path.
|
|
expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
|
|
`${TEST_URL}/upstream/qwen3.6/props`,
|
|
expect.objectContaining({ signal: expect.any(AbortSignal) }),
|
|
);
|
|
});
|
|
|
|
it('serves the second call from cache without refetching', async () => {
|
|
const fetchSpy = vi
|
|
.spyOn(globalThis, 'fetch')
|
|
.mockResolvedValueOnce(mockOkProps(262_144));
|
|
const a = await getModelContext('qwen3.6');
|
|
const b = await getModelContext('qwen3.6');
|
|
expect(a).toEqual(b);
|
|
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it('defaults total_slots to 1 when the server omits it', async () => {
|
|
// Mirror the docstring claim — total_slots is informational and we don't
|
|
// reject the response just because it's missing.
|
|
vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
|
|
new Response(JSON.stringify({ default_generation_settings: { n_ctx: 8192 } }), {
|
|
status: 200,
|
|
}),
|
|
);
|
|
const result = await getModelContext('partial-model');
|
|
expect(result).not.toBeNull();
|
|
expect(result!.n_ctx).toBe(8192);
|
|
expect(result!.total_slots).toBe(1);
|
|
});
|
|
});
|
|
|
|
// ---- negative cache (single-shot) ------------------------------------------
|
|
|
|
describe('getModelContext — negative cache (single failure modes)', () => {
|
|
it('returns null and negative-caches when default_generation_settings is missing', async () => {
|
|
const fetchSpy = vi
|
|
.spyOn(globalThis, 'fetch')
|
|
.mockResolvedValueOnce(new Response(JSON.stringify({ total_slots: 1 }), { status: 200 }));
|
|
const result = await getModelContext('broken');
|
|
expect(result).toBeNull();
|
|
// Second call within TTL must not refetch.
|
|
const result2 = await getModelContext('broken');
|
|
expect(result2).toBeNull();
|
|
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it('returns null and negative-caches when n_ctx is missing inside default_generation_settings', async () => {
|
|
const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
|
|
new Response(JSON.stringify({ default_generation_settings: {}, total_slots: 1 }), {
|
|
status: 200,
|
|
}),
|
|
);
|
|
await getModelContext('half-broken');
|
|
await getModelContext('half-broken');
|
|
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it('returns null and negative-caches on non-200 (404)', async () => {
|
|
const fetchSpy = vi
|
|
.spyOn(globalThis, 'fetch')
|
|
.mockResolvedValueOnce(new Response('not found', { status: 404 }));
|
|
const result = await getModelContext('missing-model');
|
|
expect(result).toBeNull();
|
|
const result2 = await getModelContext('missing-model');
|
|
expect(result2).toBeNull();
|
|
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it('returns null and negative-caches on network error', async () => {
|
|
const fetchSpy = vi
|
|
.spyOn(globalThis, 'fetch')
|
|
.mockRejectedValueOnce(new TypeError('fetch failed: connect ECONNREFUSED'));
|
|
const result = await getModelContext('down-upstream');
|
|
expect(result).toBeNull();
|
|
const result2 = await getModelContext('down-upstream');
|
|
expect(result2).toBeNull();
|
|
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
|
});
|
|
});
|
|
|
|
// ---- negative cache TTL -----------------------------------------------------
|
|
|
|
describe('getModelContext — negative cache TTL', () => {
|
|
it('does NOT refetch when a second call lands within the 60s TTL', async () => {
|
|
vi.useFakeTimers();
|
|
const fetchSpy = vi
|
|
.spyOn(globalThis, 'fetch')
|
|
.mockResolvedValueOnce(new Response('boom', { status: 500 }));
|
|
|
|
await getModelContext('flapping');
|
|
vi.advanceTimersByTime(30_000);
|
|
await getModelContext('flapping');
|
|
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it('refetches when the second call lands after the 60s TTL expires', async () => {
|
|
vi.useFakeTimers();
|
|
const fetchSpy = vi
|
|
.spyOn(globalThis, 'fetch')
|
|
.mockResolvedValueOnce(new Response('boom', { status: 500 }))
|
|
// Recovered upstream on the retry — we expect a positive cache hit
|
|
// after this fires.
|
|
.mockResolvedValueOnce(mockOkProps(8192));
|
|
|
|
await getModelContext('flapping');
|
|
vi.advanceTimersByTime(61_000);
|
|
const result = await getModelContext('flapping');
|
|
expect(result).not.toBeNull();
|
|
expect(result!.n_ctx).toBe(8192);
|
|
expect(fetchSpy).toHaveBeenCalledTimes(2);
|
|
});
|
|
});
|
|
|
|
// ---- invalidateModelContext -------------------------------------------------
|
|
|
|
describe('invalidateModelContext', () => {
|
|
it('clears a single positive entry by model name', async () => {
|
|
const fetchSpy = vi
|
|
.spyOn(globalThis, 'fetch')
|
|
.mockResolvedValueOnce(mockOkProps(8192))
|
|
.mockResolvedValueOnce(mockOkProps(8192));
|
|
|
|
await getModelContext('cleared');
|
|
invalidateModelContext('cleared');
|
|
await getModelContext('cleared');
|
|
expect(fetchSpy).toHaveBeenCalledTimes(2);
|
|
});
|
|
|
|
it('clears ALL entries when called with no arg', async () => {
|
|
const fetchSpy = vi
|
|
.spyOn(globalThis, 'fetch')
|
|
.mockResolvedValueOnce(mockOkProps(8192))
|
|
.mockResolvedValueOnce(mockOkProps(16_384))
|
|
// After the full clear, both models re-fetch.
|
|
.mockResolvedValueOnce(mockOkProps(8192))
|
|
.mockResolvedValueOnce(mockOkProps(16_384));
|
|
|
|
await getModelContext('alpha');
|
|
await getModelContext('beta');
|
|
invalidateModelContext();
|
|
await getModelContext('alpha');
|
|
await getModelContext('beta');
|
|
expect(fetchSpy).toHaveBeenCalledTimes(4);
|
|
});
|
|
|
|
it('clearing a positive entry also clears the matching negative entry', async () => {
|
|
// Mixed state: first call fails (negative-caches), then we invalidate
|
|
// explicitly and the next call should fetch again rather than serve
|
|
// the stale negative entry.
|
|
const fetchSpy = vi
|
|
.spyOn(globalThis, 'fetch')
|
|
.mockResolvedValueOnce(new Response('boom', { status: 500 }))
|
|
.mockResolvedValueOnce(mockOkProps(4096));
|
|
|
|
await getModelContext('formerly-broken');
|
|
invalidateModelContext('formerly-broken');
|
|
const result = await getModelContext('formerly-broken');
|
|
expect(result).not.toBeNull();
|
|
expect(result!.n_ctx).toBe(4096);
|
|
expect(fetchSpy).toHaveBeenCalledTimes(2);
|
|
});
|
|
});
|