v1.11.3: fix ctx_max capture via /props endpoint
- llama-server does not emit n_ctx in timings (confirmed empirically); dead code at inference.ts:479 and compaction.ts:300 never fired - New model-context.ts: cached fetch of /upstream/<model>/props with positive-cache (no TTL) and 60s negative-cache - Wired into all 4 ctx_max write sites: 3 in inference.ts (executeToolPhase, finalizeCompletion, runCapHitSummary) and 1 in compaction.ts (summary row INSERT) - AbortController 3s timeout, lenient parsing with sensible defaults - 12 new vitest cases for the cache module (59 total) - 7 historical assistant rows backfilled manually (see notes)
This commit is contained in:
205
apps/server/src/services/__tests__/model-context.test.ts
Normal file
205
apps/server/src/services/__tests__/model-context.test.ts
Normal file
@@ -0,0 +1,205 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
configureModelContext,
|
||||
getModelContext,
|
||||
invalidateModelContext,
|
||||
} from '../model-context.js';
|
||||
|
||||
// ---- fixtures ---------------------------------------------------------------
|
||||
|
||||
const TEST_URL = 'http://llama-swap.test:8401';
|
||||
|
||||
function mockOkProps(n_ctx: number, total_slots = 1) {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
default_generation_settings: { n_ctx },
|
||||
total_slots,
|
||||
}),
|
||||
{ status: 200, headers: { 'Content-Type': 'application/json' } },
|
||||
);
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
invalidateModelContext();
|
||||
configureModelContext({ llamaSwapUrl: TEST_URL });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
// ---- positive cache ---------------------------------------------------------
|
||||
|
||||
describe('getModelContext — positive cache', () => {
|
||||
it('returns the parsed body on a 200 with valid shape', async () => {
|
||||
const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(mockOkProps(262_144, 1));
|
||||
const result = await getModelContext('qwen3.6');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.n_ctx).toBe(262_144);
|
||||
expect(result!.total_slots).toBe(1);
|
||||
expect(typeof result!.fetched_at).toBe('number');
|
||||
// Verify the URL was constructed correctly — encodes the model name in
|
||||
// case it contains characters that would break the path.
|
||||
expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
|
||||
`${TEST_URL}/upstream/qwen3.6/props`,
|
||||
expect.objectContaining({ signal: expect.any(AbortSignal) }),
|
||||
);
|
||||
});
|
||||
|
||||
it('serves the second call from cache without refetching', async () => {
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce(mockOkProps(262_144));
|
||||
const a = await getModelContext('qwen3.6');
|
||||
const b = await getModelContext('qwen3.6');
|
||||
expect(a).toEqual(b);
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('defaults total_slots to 1 when the server omits it', async () => {
|
||||
// Mirror the docstring claim — total_slots is informational and we don't
|
||||
// reject the response just because it's missing.
|
||||
vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
|
||||
new Response(JSON.stringify({ default_generation_settings: { n_ctx: 8192 } }), {
|
||||
status: 200,
|
||||
}),
|
||||
);
|
||||
const result = await getModelContext('partial-model');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.n_ctx).toBe(8192);
|
||||
expect(result!.total_slots).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
// ---- negative cache (single-shot) ------------------------------------------
|
||||
|
||||
describe('getModelContext — negative cache (single failure modes)', () => {
|
||||
it('returns null and negative-caches when default_generation_settings is missing', async () => {
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce(new Response(JSON.stringify({ total_slots: 1 }), { status: 200 }));
|
||||
const result = await getModelContext('broken');
|
||||
expect(result).toBeNull();
|
||||
// Second call within TTL must not refetch.
|
||||
const result2 = await getModelContext('broken');
|
||||
expect(result2).toBeNull();
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('returns null and negative-caches when n_ctx is missing inside default_generation_settings', async () => {
|
||||
const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
|
||||
new Response(JSON.stringify({ default_generation_settings: {}, total_slots: 1 }), {
|
||||
status: 200,
|
||||
}),
|
||||
);
|
||||
await getModelContext('half-broken');
|
||||
await getModelContext('half-broken');
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('returns null and negative-caches on non-200 (404)', async () => {
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce(new Response('not found', { status: 404 }));
|
||||
const result = await getModelContext('missing-model');
|
||||
expect(result).toBeNull();
|
||||
const result2 = await getModelContext('missing-model');
|
||||
expect(result2).toBeNull();
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('returns null and negative-caches on network error', async () => {
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockRejectedValueOnce(new TypeError('fetch failed: connect ECONNREFUSED'));
|
||||
const result = await getModelContext('down-upstream');
|
||||
expect(result).toBeNull();
|
||||
const result2 = await getModelContext('down-upstream');
|
||||
expect(result2).toBeNull();
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
// ---- negative cache TTL -----------------------------------------------------
|
||||
|
||||
describe('getModelContext — negative cache TTL', () => {
|
||||
it('does NOT refetch when a second call lands within the 60s TTL', async () => {
|
||||
vi.useFakeTimers();
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce(new Response('boom', { status: 500 }));
|
||||
|
||||
await getModelContext('flapping');
|
||||
vi.advanceTimersByTime(30_000);
|
||||
await getModelContext('flapping');
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('refetches when the second call lands after the 60s TTL expires', async () => {
|
||||
vi.useFakeTimers();
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce(new Response('boom', { status: 500 }))
|
||||
// Recovered upstream on the retry — we expect a positive cache hit
|
||||
// after this fires.
|
||||
.mockResolvedValueOnce(mockOkProps(8192));
|
||||
|
||||
await getModelContext('flapping');
|
||||
vi.advanceTimersByTime(61_000);
|
||||
const result = await getModelContext('flapping');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.n_ctx).toBe(8192);
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
// ---- invalidateModelContext -------------------------------------------------
|
||||
|
||||
describe('invalidateModelContext', () => {
|
||||
it('clears a single positive entry by model name', async () => {
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce(mockOkProps(8192))
|
||||
.mockResolvedValueOnce(mockOkProps(8192));
|
||||
|
||||
await getModelContext('cleared');
|
||||
invalidateModelContext('cleared');
|
||||
await getModelContext('cleared');
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('clears ALL entries when called with no arg', async () => {
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce(mockOkProps(8192))
|
||||
.mockResolvedValueOnce(mockOkProps(16_384))
|
||||
// After the full clear, both models re-fetch.
|
||||
.mockResolvedValueOnce(mockOkProps(8192))
|
||||
.mockResolvedValueOnce(mockOkProps(16_384));
|
||||
|
||||
await getModelContext('alpha');
|
||||
await getModelContext('beta');
|
||||
invalidateModelContext();
|
||||
await getModelContext('alpha');
|
||||
await getModelContext('beta');
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(4);
|
||||
});
|
||||
|
||||
it('clearing a positive entry also clears the matching negative entry', async () => {
|
||||
// Mixed state: first call fails (negative-caches), then we invalidate
|
||||
// explicitly and the next call should fetch again rather than serve
|
||||
// the stale negative entry.
|
||||
const fetchSpy = vi
|
||||
.spyOn(globalThis, 'fetch')
|
||||
.mockResolvedValueOnce(new Response('boom', { status: 500 }))
|
||||
.mockResolvedValueOnce(mockOkProps(4096));
|
||||
|
||||
await getModelContext('formerly-broken');
|
||||
invalidateModelContext('formerly-broken');
|
||||
const result = await getModelContext('formerly-broken');
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.n_ctx).toBe(4096);
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user