v1.11.3: fix ctx_max capture via /props endpoint

- llama-server does not emit n_ctx in timings (confirmed empirically); dead code at inference.ts:479 and compaction.ts:300 never fired - New model-context.ts: cached fetch of /upstream/<model>/props with positive-cache (no TTL) and 60s negative-cache - Wired into all 4 ctx_max write sites: 3 in inference.ts (executeToolPhase, finalizeCompletion, runCapHitSummary) and 1 in compaction.ts (summary row INSERT) - AbortController 3s timeout, lenient parsing with sensible defaults - 12 new vitest cases for the cache module (59 total) - 7 historical assistant rows backfilled manually (see notes)
2026-05-20 19:29:26 +00:00
parent 8cd270a5da
commit 89dcfb95dc
5 changed files with 361 additions and 18 deletions
--- a/apps/server/src/services/tests/model-context.test.ts
+++ b/apps/server/src/services/tests/model-context.test.ts
@@ -0,0 +1,205 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import {
+  configureModelContext,
+  getModelContext,
+  invalidateModelContext,
+} from '../model-context.js';
+
+// ---- fixtures ---------------------------------------------------------------
+
+const TEST_URL = 'http://llama-swap.test:8401';
+
+function mockOkProps(n_ctx: number, total_slots = 1) {
+  return new Response(
+    JSON.stringify({
+      default_generation_settings: { n_ctx },
+      total_slots,
+    }),
+    { status: 200, headers: { 'Content-Type': 'application/json' } },
+  );
+}
+
+beforeEach(() => {
+  invalidateModelContext();
+  configureModelContext({ llamaSwapUrl: TEST_URL });
+});
+
+afterEach(() => {
+  vi.restoreAllMocks();
+  vi.useRealTimers();
+});
+
+// ---- positive cache ---------------------------------------------------------
+
+describe('getModelContext — positive cache', () => {
+  it('returns the parsed body on a 200 with valid shape', async () => {
+    const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(mockOkProps(262_144, 1));
+    const result = await getModelContext('qwen3.6');
+    expect(result).not.toBeNull();
+    expect(result!.n_ctx).toBe(262_144);
+    expect(result!.total_slots).toBe(1);
+    expect(typeof result!.fetched_at).toBe('number');
+    // Verify the URL was constructed correctly — encodes the model name in
+    // case it contains characters that would break the path.
+    expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
+      `${TEST_URL}/upstream/qwen3.6/props`,
+      expect.objectContaining({ signal: expect.any(AbortSignal) }),
+    );
+  });
+
+  it('serves the second call from cache without refetching', async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce(mockOkProps(262_144));
+    const a = await getModelContext('qwen3.6');
+    const b = await getModelContext('qwen3.6');
+    expect(a).toEqual(b);
+    expect(fetchSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it('defaults total_slots to 1 when the server omits it', async () => {
+    // Mirror the docstring claim — total_slots is informational and we don't
+    // reject the response just because it's missing.
+    vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
+      new Response(JSON.stringify({ default_generation_settings: { n_ctx: 8192 } }), {
+        status: 200,
+      }),
+    );
+    const result = await getModelContext('partial-model');
+    expect(result).not.toBeNull();
+    expect(result!.n_ctx).toBe(8192);
+    expect(result!.total_slots).toBe(1);
+  });
+});
+
+// ---- negative cache (single-shot) ------------------------------------------
+
+describe('getModelContext — negative cache (single failure modes)', () => {
+  it('returns null and negative-caches when default_generation_settings is missing', async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce(new Response(JSON.stringify({ total_slots: 1 }), { status: 200 }));
+    const result = await getModelContext('broken');
+    expect(result).toBeNull();
+    // Second call within TTL must not refetch.
+    const result2 = await getModelContext('broken');
+    expect(result2).toBeNull();
+    expect(fetchSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it('returns null and negative-caches when n_ctx is missing inside default_generation_settings', async () => {
+    const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
+      new Response(JSON.stringify({ default_generation_settings: {}, total_slots: 1 }), {
+        status: 200,
+      }),
+    );
+    await getModelContext('half-broken');
+    await getModelContext('half-broken');
+    expect(fetchSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it('returns null and negative-caches on non-200 (404)', async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce(new Response('not found', { status: 404 }));
+    const result = await getModelContext('missing-model');
+    expect(result).toBeNull();
+    const result2 = await getModelContext('missing-model');
+    expect(result2).toBeNull();
+    expect(fetchSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it('returns null and negative-caches on network error', async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockRejectedValueOnce(new TypeError('fetch failed: connect ECONNREFUSED'));
+    const result = await getModelContext('down-upstream');
+    expect(result).toBeNull();
+    const result2 = await getModelContext('down-upstream');
+    expect(result2).toBeNull();
+    expect(fetchSpy).toHaveBeenCalledTimes(1);
+  });
+});
+
+// ---- negative cache TTL -----------------------------------------------------
+
+describe('getModelContext — negative cache TTL', () => {
+  it('does NOT refetch when a second call lands within the 60s TTL', async () => {
+    vi.useFakeTimers();
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce(new Response('boom', { status: 500 }));
+
+    await getModelContext('flapping');
+    vi.advanceTimersByTime(30_000);
+    await getModelContext('flapping');
+    expect(fetchSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it('refetches when the second call lands after the 60s TTL expires', async () => {
+    vi.useFakeTimers();
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce(new Response('boom', { status: 500 }))
+      // Recovered upstream on the retry — we expect a positive cache hit
+      // after this fires.
+      .mockResolvedValueOnce(mockOkProps(8192));
+
+    await getModelContext('flapping');
+    vi.advanceTimersByTime(61_000);
+    const result = await getModelContext('flapping');
+    expect(result).not.toBeNull();
+    expect(result!.n_ctx).toBe(8192);
+    expect(fetchSpy).toHaveBeenCalledTimes(2);
+  });
+});
+
+// ---- invalidateModelContext -------------------------------------------------
+
+describe('invalidateModelContext', () => {
+  it('clears a single positive entry by model name', async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce(mockOkProps(8192))
+      .mockResolvedValueOnce(mockOkProps(8192));
+
+    await getModelContext('cleared');
+    invalidateModelContext('cleared');
+    await getModelContext('cleared');
+    expect(fetchSpy).toHaveBeenCalledTimes(2);
+  });
+
+  it('clears ALL entries when called with no arg', async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce(mockOkProps(8192))
+      .mockResolvedValueOnce(mockOkProps(16_384))
+      // After the full clear, both models re-fetch.
+      .mockResolvedValueOnce(mockOkProps(8192))
+      .mockResolvedValueOnce(mockOkProps(16_384));
+
+    await getModelContext('alpha');
+    await getModelContext('beta');
+    invalidateModelContext();
+    await getModelContext('alpha');
+    await getModelContext('beta');
+    expect(fetchSpy).toHaveBeenCalledTimes(4);
+  });
+
+  it('clearing a positive entry also clears the matching negative entry', async () => {
+    // Mixed state: first call fails (negative-caches), then we invalidate
+    // explicitly and the next call should fetch again rather than serve
+    // the stale negative entry.
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce(new Response('boom', { status: 500 }))
+      .mockResolvedValueOnce(mockOkProps(4096));
+
+    await getModelContext('formerly-broken');
+    invalidateModelContext('formerly-broken');
+    const result = await getModelContext('formerly-broken');
+    expect(result).not.toBeNull();
+    expect(result!.n_ctx).toBe(4096);
+    expect(fetchSpy).toHaveBeenCalledTimes(2);
+  });
+});