chore: snapshot working tree - pty_exited notifications + in-flight inference WIP

feat(booterm): structured pty_exited WS notifications. Plan-validated, impl-validated, code-reviewed green (contracts build clean, contracts test 29/29, booterm + web typecheck clean). wip: in-progress inference/provider refactor (agents.ts, provider.ts, new llama-providers.ts, removed llama-args-validator), plus arena, dispatcher, compaction, schema changes. openspec: pty-exit-notifications complete; x-agent-flags planned (not yet implemented).
2026-06-14 12:48:47 +00:00
parent 0ed506f1da
commit b18de2a331
204 changed files with 25344 additions and 867 deletions
--- a/apps/server/src/services/tests/model-context.test.ts
+++ b/apps/server/src/services/tests/model-context.test.ts
@@ -1,14 +1,44 @@
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
-import {
-  configureModelContext,
-  getModelContext,
-  invalidateModelContext,
-} from '../model-context.js';
+
+// ---- mock llama-providers registry -----------------------------------------
+// model-context.ts imports resolveModelProvider from inference/provider.ts,
+// which uses getLlamaProviders() from llama-providers.ts. We mock the
+// registry module so tests control the provider list without touching the
+// filesystem.
+
+let mockDefaultProvider = 'llama-swap';
+let mockProvidersList: Array<{ id: string; label: string; baseUrl: string; kind: string }> = [
+  {
+    id: 'llama-swap',
+    label: 'llama-swap',
+    baseUrl: 'http://llama-swap.test:8401',
+    kind: 'llama-swap',
+  },
+];
+
+vi.mock('../llama-providers.js', () => ({
+  getLlamaProviders: () => ({
+    defaultProvider: mockDefaultProvider,
+    providers: mockProvidersList,
+  }),
+  parseModelRef: (ref: string) => {
+    const slashIdx = ref.indexOf('/');
+    if (slashIdx <= 0) {
+      return { providerId: mockDefaultProvider, wireModelId: ref, isLegacyBareId: true };
+    }
+    return {
+      providerId: ref.slice(0, slashIdx),
+      wireModelId: ref.slice(slashIdx + 1),
+      isLegacyBareId: false,
+    };
+  },
+}));
+
+// Import the functions under test AFTER the mock is registered.
+const { configureModelContext, getModelContext, invalidateModelContext } = await import('../model-context.js');

 // ---- fixtures ---------------------------------------------------------------

-const TEST_URL = 'http://llama-swap.test:8401';
-
 function mockOkProps(n_ctx: number) {
  return new Response(
    JSON.stringify({ default_generation_settings: { n_ctx } }),
@@ -16,9 +46,28 @@ function mockOkProps(n_ctx: number) {
  );
 }

+// Legacy test config (backward-compatible { llamaSwapUrl } shape).
+const LEGACY_CONFIG = { llamaSwapUrl: 'http://llama-swap.test:8401' };
+
+// Provider-aware config for multi-provider tests.
+const MULTI_PROVIDER_CONFIG = {
+  LLAMA_SWAP_URL: 'http://llama-swap.test:8401',
+  DEEPSEEK_API_KEY: 'sk-test',
+  DEEPSEEK_BASE_URL: 'https://api.deepseek.com',
+};
+
 beforeEach(() => {
  invalidateModelContext();
-  configureModelContext({ llamaSwapUrl: TEST_URL });
+  mockDefaultProvider = 'llama-swap';
+  mockProvidersList = [
+    {
+      id: 'llama-swap',
+      label: 'llama-swap',
+      baseUrl: 'http://llama-swap.test:8401',
+      kind: 'llama-swap',
+    },
+  ];
+  configureModelContext(LEGACY_CONFIG);
 });

 afterEach(() => {
@@ -37,7 +86,7 @@ describe('getModelContext — positive cache', () => {
    // Verify the URL was constructed correctly — encodes the model name in
    // case it contains characters that would break the path.
    expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
-      `${TEST_URL}/upstream/qwen3.6/props`,
+      `${LEGACY_CONFIG.llamaSwapUrl}/upstream/qwen3.6/props`,
      expect.objectContaining({ signal: expect.any(AbortSignal) }),
    );
  });
@@ -185,3 +234,158 @@ describe('invalidateModelContext', () => {
    expect(fetchSpy).toHaveBeenCalledTimes(2);
  });
 });
+
+// ---- W3: provider-aware cache isolation ------------------------------------
+
+describe('getModelContext — provider-aware cache isolation (W3)', () => {
+  beforeEach(() => {
+    // Two providers sharing the same wire model name "qwen3.6" but on
+    // different base URLs. This is the core scenario for cache isolation.
+    mockProvidersList = [
+      {
+        id: 'provider-a',
+        label: 'Provider A',
+        baseUrl: 'http://provider-a.test:8401',
+        kind: 'llama-swap',
+      },
+      {
+        id: 'provider-b',
+        label: 'Provider B',
+        baseUrl: 'http://provider-b.test:8401',
+        kind: 'llama-swap',
+      },
+    ];
+    mockDefaultProvider = 'provider-a';
+    configureModelContext(MULTI_PROVIDER_CONFIG);
+  });
+
+  it('two providers serving the same wire model name have separate cache entries', async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce(mockOkProps(32_768))   // provider-a: qwen3.6
+      .mockResolvedValueOnce(mockOkProps(16_384));   // provider-b: qwen3.6
+
+    // Both resolve to the wire model "qwen3.6" but different providers.
+    const a = await getModelContext('provider-a/qwen3.6');
+    const b = await getModelContext('provider-b/qwen3.6');
+
+    expect(a).not.toBeNull();
+    expect(a!.n_ctx).toBe(32_768);
+    expect(b).not.toBeNull();
+    expect(b!.n_ctx).toBe(16_384);
+
+    // Two separate fetches — one per provider's baseUrl.
+    expect(fetchSpy).toHaveBeenCalledTimes(2);
+    expect(fetchSpy.mock.calls[0]![0]).toContain('provider-a.test');
+    expect(fetchSpy.mock.calls[1]![0]).toContain('provider-b.test');
+  });
+
+  it('cached entry for one provider does not leak to the other', async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce(mockOkProps(32_768));   // provider-a: qwen3.6
+
+    // Populate provider-a's cache.
+    await getModelContext('provider-a/qwen3.6');
+    expect(fetchSpy).toHaveBeenCalledTimes(1);
+
+    // provider-b/qwen3.6 should NOT hit provider-a's cache — it must fetch.
+    fetchSpy.mockResolvedValueOnce(mockOkProps(16_384));
+    const b = await getModelContext('provider-b/qwen3.6');
+    expect(b).not.toBeNull();
+    expect(b!.n_ctx).toBe(16_384);
+    expect(fetchSpy).toHaveBeenCalledTimes(2);
+  });
+
+  it('invalidateModelContext(key) only clears the targeted provider entry', async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce(mockOkProps(32_768))   // provider-a: qwen3.6
+      .mockResolvedValueOnce(mockOkProps(16_384))   // provider-b: qwen3.6
+      .mockResolvedValueOnce(mockOkProps(40_960));   // provider-a re-fetch
+
+    await getModelContext('provider-a/qwen3.6');
+    await getModelContext('provider-b/qwen3.6');
+
+    // Invalidate only provider-a's entry.
+    invalidateModelContext('provider-a/qwen3.6');
+
+    // provider-a must re-fetch; provider-b still cached.
+    const a2 = await getModelContext('provider-a/qwen3.6');
+    expect(a2).not.toBeNull();
+    expect(a2!.n_ctx).toBe(40_960);
+    expect(fetchSpy).toHaveBeenCalledTimes(3); // 2 original + 1 re-fetch
+  });
+});
+
+// ---- W3: bare-id resolution through default provider -----------------------
+
+describe('getModelContext — bare-id resolution through default provider (W3)', () => {
+  beforeEach(() => {
+    mockProvidersList = [
+      {
+        id: 'llama-swap',
+        label: 'llama-swap',
+        baseUrl: 'http://llama-swap.test:8401',
+        kind: 'llama-swap',
+      },
+      {
+        id: 'deepseek',
+        label: 'DeepSeek',
+        baseUrl: 'https://api.deepseek.com',
+        kind: 'deepseek',
+      },
+    ];
+    mockDefaultProvider = 'llama-swap';
+    configureModelContext(MULTI_PROVIDER_CONFIG);
+  });
+
+  it('bare model id resolves through the default provider', async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce(mockOkProps(8192));
+
+    const result = await getModelContext('qwen3.6');
+    expect(result).not.toBeNull();
+    expect(result!.n_ctx).toBe(8192);
+
+    // Default provider is "llama-swap", so the URL uses its baseUrl.
+    expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
+      'http://llama-swap.test:8401/upstream/qwen3.6/props',
+      expect.objectContaining({ signal: expect.any(AbortSignal) }),
+    );
+  });
+
+  it('bare id and explicit default-provider composite share a cache entry', async () => {
+    const fetchSpy = vi
+      .spyOn(globalThis, 'fetch')
+      .mockResolvedValueOnce(mockOkProps(8192));
+
+    // Both resolve to "llama-swap/qwen3.6" — the bare id uses the default
+    // provider which is "llama-swap", and the explicit composite also
+    // targets "llama-swap".
+    const a = await getModelContext('qwen3.6');
+    const b = await getModelContext('llama-swap/qwen3.6');
+
+    expect(a).toEqual(b);
+    expect(fetchSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it('bare "deepseek-*" id returns static default without fetching', async () => {
+    const fetchSpy = vi.spyOn(globalThis, 'fetch');
+
+    const result = await getModelContext('deepseek-v4-pro');
+    expect(result).not.toBeNull();
+    expect(result!.n_ctx).toBe(131_072);
+    expect(fetchSpy).not.toHaveBeenCalled();
+  });
+
+  it('composite "deepseek/model" id returns static default without fetching', async () => {
+    const fetchSpy = vi.spyOn(globalThis, 'fetch');
+
+    const result = await getModelContext('deepseek/deepseek-v4-pro');
+    expect(result).not.toBeNull();
+    expect(result!.n_ctx).toBe(131_072);
+    expect(fetchSpy).not.toHaveBeenCalled();
+  });
+});