boocode/apps/server/src/services/__tests__/model-context.test.ts

import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';

// ---- mock llama-providers registry -----------------------------------------
// model-context.ts imports resolveModelProvider from inference/provider.ts,
// which uses getLlamaProviders() from llama-providers.ts. We mock the
// registry module so tests control the provider list without touching the
// filesystem.

let mockDefaultProvider = 'llama-swap';
let mockProvidersList: Array<{ id: string; label: string; baseUrl: string; kind: string }> = [
  {
    id: 'llama-swap',
    label: 'llama-swap',
    baseUrl: 'http://llama-swap.test:8401',
    kind: 'llama-swap',
  },
];

vi.mock('../llama-providers.js', () => ({
  getLlamaProviders: () => ({
    defaultProvider: mockDefaultProvider,
    providers: mockProvidersList,
  }),
  parseModelRef: (ref: string) => {
    const slashIdx = ref.indexOf('/');
    if (slashIdx <= 0) {
      return { providerId: mockDefaultProvider, wireModelId: ref, isLegacyBareId: true };
    }
    return {
      providerId: ref.slice(0, slashIdx),
      wireModelId: ref.slice(slashIdx + 1),
      isLegacyBareId: false,
    };
  },
}));

// Import the functions under test AFTER the mock is registered.
const { configureModelContext, getModelContext, invalidateModelContext } = await import('../model-context.js');

// ---- fixtures ---------------------------------------------------------------

function mockOkProps(n_ctx: number) {
  return new Response(
    JSON.stringify({ default_generation_settings: { n_ctx } }),
    { status: 200, headers: { 'Content-Type': 'application/json' } },
  );
}

// Legacy test config (backward-compatible { llamaSwapUrl } shape).
const LEGACY_CONFIG = { llamaSwapUrl: 'http://llama-swap.test:8401' };

// Provider-aware config for multi-provider tests.
const MULTI_PROVIDER_CONFIG = {
  LLAMA_SWAP_URL: 'http://llama-swap.test:8401',
  DEEPSEEK_API_KEY: 'sk-test',
  DEEPSEEK_BASE_URL: 'https://api.deepseek.com',
};

beforeEach(() => {
  invalidateModelContext();
  mockDefaultProvider = 'llama-swap';
  mockProvidersList = [
    {
      id: 'llama-swap',
      label: 'llama-swap',
      baseUrl: 'http://llama-swap.test:8401',
      kind: 'llama-swap',
    },
  ];
  configureModelContext(LEGACY_CONFIG);
});

afterEach(() => {
  vi.restoreAllMocks();
  vi.useRealTimers();
});

// ---- positive cache ---------------------------------------------------------

describe('getModelContext — positive cache', () => {
  it('returns the parsed body on a 200 with valid shape', async () => {
    const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(mockOkProps(262_144));
    const result = await getModelContext('qwen3.6');
    expect(result).not.toBeNull();
    expect(result!.n_ctx).toBe(262_144);
    // Verify the URL was constructed correctly — encodes the model name in
    // case it contains characters that would break the path.
    expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
      `${LEGACY_CONFIG.llamaSwapUrl}/upstream/qwen3.6/props`,
      expect.objectContaining({ signal: expect.any(AbortSignal) }),
    );
  });

  it('serves the second call from cache without refetching', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(mockOkProps(262_144));
    const a = await getModelContext('qwen3.6');
    const b = await getModelContext('qwen3.6');
    expect(a).toEqual(b);
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });

});

// ---- negative cache (single-shot) ------------------------------------------

describe('getModelContext — negative cache (single failure modes)', () => {
  it('returns null and negative-caches when default_generation_settings is missing', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(new Response(JSON.stringify({ total_slots: 1 }), { status: 200 }));
    const result = await getModelContext('broken');
    expect(result).toBeNull();
    // Second call within TTL must not refetch.
    const result2 = await getModelContext('broken');
    expect(result2).toBeNull();
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });

  it('returns null and negative-caches when n_ctx is missing inside default_generation_settings', async () => {
    const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
      new Response(JSON.stringify({ default_generation_settings: {}, total_slots: 1 }), {
        status: 200,
      }),
    );
    await getModelContext('half-broken');
    await getModelContext('half-broken');
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });

  it('returns null and negative-caches on non-200 (404)', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(new Response('not found', { status: 404 }));
    const result = await getModelContext('missing-model');
    expect(result).toBeNull();
    const result2 = await getModelContext('missing-model');
    expect(result2).toBeNull();
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });

  it('returns null and negative-caches on network error', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockRejectedValueOnce(new TypeError('fetch failed: connect ECONNREFUSED'));
    const result = await getModelContext('down-upstream');
    expect(result).toBeNull();
    const result2 = await getModelContext('down-upstream');
    expect(result2).toBeNull();
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });
});

// ---- negative cache TTL -----------------------------------------------------

describe('getModelContext — negative cache TTL', () => {
  it('does NOT refetch when a second call lands within the 60s TTL', async () => {
    vi.useFakeTimers();
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(new Response('boom', { status: 500 }));

    await getModelContext('flapping');
    vi.advanceTimersByTime(30_000);
    await getModelContext('flapping');
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });

  it('refetches when the second call lands after the 60s TTL expires', async () => {
    vi.useFakeTimers();
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(new Response('boom', { status: 500 }))
      // Recovered upstream on the retry — we expect a positive cache hit
      // after this fires.
      .mockResolvedValueOnce(mockOkProps(8192));

    await getModelContext('flapping');
    vi.advanceTimersByTime(61_000);
    const result = await getModelContext('flapping');
    expect(result).not.toBeNull();
    expect(result!.n_ctx).toBe(8192);
    expect(fetchSpy).toHaveBeenCalledTimes(2);
  });
});

// ---- invalidateModelContext -------------------------------------------------

describe('invalidateModelContext', () => {
  it('clears a single positive entry by model name', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(mockOkProps(8192))
      .mockResolvedValueOnce(mockOkProps(8192));

    await getModelContext('cleared');
    invalidateModelContext('cleared');
    await getModelContext('cleared');
    expect(fetchSpy).toHaveBeenCalledTimes(2);
  });

  it('clears ALL entries when called with no arg', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(mockOkProps(8192))
      .mockResolvedValueOnce(mockOkProps(16_384))
      // After the full clear, both models re-fetch.
      .mockResolvedValueOnce(mockOkProps(8192))
      .mockResolvedValueOnce(mockOkProps(16_384));

    await getModelContext('alpha');
    await getModelContext('beta');
    invalidateModelContext();
    await getModelContext('alpha');
    await getModelContext('beta');
    expect(fetchSpy).toHaveBeenCalledTimes(4);
  });

  it('clearing a positive entry also clears the matching negative entry', async () => {
    // Mixed state: first call fails (negative-caches), then we invalidate
    // explicitly and the next call should fetch again rather than serve
    // the stale negative entry.
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(new Response('boom', { status: 500 }))
      .mockResolvedValueOnce(mockOkProps(4096));

    await getModelContext('formerly-broken');
    invalidateModelContext('formerly-broken');
    const result = await getModelContext('formerly-broken');
    expect(result).not.toBeNull();
    expect(result!.n_ctx).toBe(4096);
    expect(fetchSpy).toHaveBeenCalledTimes(2);
  });
});

// ---- W3: provider-aware cache isolation ------------------------------------

describe('getModelContext — provider-aware cache isolation (W3)', () => {
  beforeEach(() => {
    // Two providers sharing the same wire model name "qwen3.6" but on
    // different base URLs. This is the core scenario for cache isolation.
    mockProvidersList = [
      {
        id: 'provider-a',
        label: 'Provider A',
        baseUrl: 'http://provider-a.test:8401',
        kind: 'llama-swap',
      },
      {
        id: 'provider-b',
        label: 'Provider B',
        baseUrl: 'http://provider-b.test:8401',
        kind: 'llama-swap',
      },
    ];
    mockDefaultProvider = 'provider-a';
    configureModelContext(MULTI_PROVIDER_CONFIG);
  });

  it('two providers serving the same wire model name have separate cache entries', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(mockOkProps(32_768))   // provider-a: qwen3.6
      .mockResolvedValueOnce(mockOkProps(16_384));   // provider-b: qwen3.6

    // Both resolve to the wire model "qwen3.6" but different providers.
    const a = await getModelContext('provider-a/qwen3.6');
    const b = await getModelContext('provider-b/qwen3.6');

    expect(a).not.toBeNull();
    expect(a!.n_ctx).toBe(32_768);
    expect(b).not.toBeNull();
    expect(b!.n_ctx).toBe(16_384);

    // Two separate fetches — one per provider's baseUrl.
    expect(fetchSpy).toHaveBeenCalledTimes(2);
    expect(fetchSpy.mock.calls[0]![0]).toContain('provider-a.test');
    expect(fetchSpy.mock.calls[1]![0]).toContain('provider-b.test');
  });

  it('cached entry for one provider does not leak to the other', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(mockOkProps(32_768));   // provider-a: qwen3.6

    // Populate provider-a's cache.
    await getModelContext('provider-a/qwen3.6');
    expect(fetchSpy).toHaveBeenCalledTimes(1);

    // provider-b/qwen3.6 should NOT hit provider-a's cache — it must fetch.
    fetchSpy.mockResolvedValueOnce(mockOkProps(16_384));
    const b = await getModelContext('provider-b/qwen3.6');
    expect(b).not.toBeNull();
    expect(b!.n_ctx).toBe(16_384);
    expect(fetchSpy).toHaveBeenCalledTimes(2);
  });

  it('invalidateModelContext(key) only clears the targeted provider entry', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(mockOkProps(32_768))   // provider-a: qwen3.6
      .mockResolvedValueOnce(mockOkProps(16_384))   // provider-b: qwen3.6
      .mockResolvedValueOnce(mockOkProps(40_960));   // provider-a re-fetch

    await getModelContext('provider-a/qwen3.6');
    await getModelContext('provider-b/qwen3.6');

    // Invalidate only provider-a's entry.
    invalidateModelContext('provider-a/qwen3.6');

    // provider-a must re-fetch; provider-b still cached.
    const a2 = await getModelContext('provider-a/qwen3.6');
    expect(a2).not.toBeNull();
    expect(a2!.n_ctx).toBe(40_960);
    expect(fetchSpy).toHaveBeenCalledTimes(3); // 2 original + 1 re-fetch
  });
});

// ---- W3: bare-id resolution through default provider -----------------------

describe('getModelContext — bare-id resolution through default provider (W3)', () => {
  beforeEach(() => {
    mockProvidersList = [
      {
        id: 'llama-swap',
        label: 'llama-swap',
        baseUrl: 'http://llama-swap.test:8401',
        kind: 'llama-swap',
      },
      {
        id: 'deepseek',
        label: 'DeepSeek',
        baseUrl: 'https://api.deepseek.com',
        kind: 'deepseek',
      },
    ];
    mockDefaultProvider = 'llama-swap';
    configureModelContext(MULTI_PROVIDER_CONFIG);
  });

  it('bare model id resolves through the default provider', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(mockOkProps(8192));

    const result = await getModelContext('qwen3.6');
    expect(result).not.toBeNull();
    expect(result!.n_ctx).toBe(8192);

    // Default provider is "llama-swap", so the URL uses its baseUrl.
    expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
      'http://llama-swap.test:8401/upstream/qwen3.6/props',
      expect.objectContaining({ signal: expect.any(AbortSignal) }),
    );
  });

  it('bare id and explicit default-provider composite share a cache entry', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(mockOkProps(8192));

    // Both resolve to "llama-swap/qwen3.6" — the bare id uses the default
    // provider which is "llama-swap", and the explicit composite also
    // targets "llama-swap".
    const a = await getModelContext('qwen3.6');
    const b = await getModelContext('llama-swap/qwen3.6');

    expect(a).toEqual(b);
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });

  it('bare "deepseek-*" id returns static default without fetching', async () => {
    const fetchSpy = vi.spyOn(globalThis, 'fetch');

    const result = await getModelContext('deepseek-v4-pro');
    expect(result).not.toBeNull();
    expect(result!.n_ctx).toBe(131_072);
    expect(fetchSpy).not.toHaveBeenCalled();
  });

  it('composite "deepseek/model" id returns static default without fetching', async () => {
    const fetchSpy = vi.spyOn(globalThis, 'fetch');

    const result = await getModelContext('deepseek/deepseek-v4-pro');
    expect(result).not.toBeNull();
    expect(result!.n_ctx).toBe(131_072);
    expect(fetchSpy).not.toHaveBeenCalled();
  });
});