boocode/apps/server/src/services/__tests__/model-context.test.ts

import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import {
  configureModelContext,
  getModelContext,
  invalidateModelContext,
} from '../model-context.js';

// ---- fixtures ---------------------------------------------------------------

const TEST_URL = 'http://llama-swap.test:8401';

function mockOkProps(n_ctx: number, total_slots = 1) {
  return new Response(
    JSON.stringify({
      default_generation_settings: { n_ctx },
      total_slots,
    }),
    { status: 200, headers: { 'Content-Type': 'application/json' } },
  );
}

beforeEach(() => {
  invalidateModelContext();
  configureModelContext({ llamaSwapUrl: TEST_URL });
});

afterEach(() => {
  vi.restoreAllMocks();
  vi.useRealTimers();
});

// ---- positive cache ---------------------------------------------------------

describe('getModelContext — positive cache', () => {
  it('returns the parsed body on a 200 with valid shape', async () => {
    const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(mockOkProps(262_144, 1));
    const result = await getModelContext('qwen3.6');
    expect(result).not.toBeNull();
    expect(result!.n_ctx).toBe(262_144);
    expect(result!.total_slots).toBe(1);
    expect(typeof result!.fetched_at).toBe('number');
    // Verify the URL was constructed correctly — encodes the model name in
    // case it contains characters that would break the path.
    expect(fetchSpy).toHaveBeenCalledExactlyOnceWith(
      `${TEST_URL}/upstream/qwen3.6/props`,
      expect.objectContaining({ signal: expect.any(AbortSignal) }),
    );
  });

  it('serves the second call from cache without refetching', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(mockOkProps(262_144));
    const a = await getModelContext('qwen3.6');
    const b = await getModelContext('qwen3.6');
    expect(a).toEqual(b);
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });

  it('defaults total_slots to 1 when the server omits it', async () => {
    // Mirror the docstring claim — total_slots is informational and we don't
    // reject the response just because it's missing.
    vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
      new Response(JSON.stringify({ default_generation_settings: { n_ctx: 8192 } }), {
        status: 200,
      }),
    );
    const result = await getModelContext('partial-model');
    expect(result).not.toBeNull();
    expect(result!.n_ctx).toBe(8192);
    expect(result!.total_slots).toBe(1);
  });
});

// ---- negative cache (single-shot) ------------------------------------------

describe('getModelContext — negative cache (single failure modes)', () => {
  it('returns null and negative-caches when default_generation_settings is missing', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(new Response(JSON.stringify({ total_slots: 1 }), { status: 200 }));
    const result = await getModelContext('broken');
    expect(result).toBeNull();
    // Second call within TTL must not refetch.
    const result2 = await getModelContext('broken');
    expect(result2).toBeNull();
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });

  it('returns null and negative-caches when n_ctx is missing inside default_generation_settings', async () => {
    const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
      new Response(JSON.stringify({ default_generation_settings: {}, total_slots: 1 }), {
        status: 200,
      }),
    );
    await getModelContext('half-broken');
    await getModelContext('half-broken');
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });

  it('returns null and negative-caches on non-200 (404)', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(new Response('not found', { status: 404 }));
    const result = await getModelContext('missing-model');
    expect(result).toBeNull();
    const result2 = await getModelContext('missing-model');
    expect(result2).toBeNull();
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });

  it('returns null and negative-caches on network error', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockRejectedValueOnce(new TypeError('fetch failed: connect ECONNREFUSED'));
    const result = await getModelContext('down-upstream');
    expect(result).toBeNull();
    const result2 = await getModelContext('down-upstream');
    expect(result2).toBeNull();
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });
});

// ---- negative cache TTL -----------------------------------------------------

describe('getModelContext — negative cache TTL', () => {
  it('does NOT refetch when a second call lands within the 60s TTL', async () => {
    vi.useFakeTimers();
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(new Response('boom', { status: 500 }));

    await getModelContext('flapping');
    vi.advanceTimersByTime(30_000);
    await getModelContext('flapping');
    expect(fetchSpy).toHaveBeenCalledTimes(1);
  });

  it('refetches when the second call lands after the 60s TTL expires', async () => {
    vi.useFakeTimers();
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(new Response('boom', { status: 500 }))
      // Recovered upstream on the retry — we expect a positive cache hit
      // after this fires.
      .mockResolvedValueOnce(mockOkProps(8192));

    await getModelContext('flapping');
    vi.advanceTimersByTime(61_000);
    const result = await getModelContext('flapping');
    expect(result).not.toBeNull();
    expect(result!.n_ctx).toBe(8192);
    expect(fetchSpy).toHaveBeenCalledTimes(2);
  });
});

// ---- invalidateModelContext -------------------------------------------------

describe('invalidateModelContext', () => {
  it('clears a single positive entry by model name', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(mockOkProps(8192))
      .mockResolvedValueOnce(mockOkProps(8192));

    await getModelContext('cleared');
    invalidateModelContext('cleared');
    await getModelContext('cleared');
    expect(fetchSpy).toHaveBeenCalledTimes(2);
  });

  it('clears ALL entries when called with no arg', async () => {
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(mockOkProps(8192))
      .mockResolvedValueOnce(mockOkProps(16_384))
      // After the full clear, both models re-fetch.
      .mockResolvedValueOnce(mockOkProps(8192))
      .mockResolvedValueOnce(mockOkProps(16_384));

    await getModelContext('alpha');
    await getModelContext('beta');
    invalidateModelContext();
    await getModelContext('alpha');
    await getModelContext('beta');
    expect(fetchSpy).toHaveBeenCalledTimes(4);
  });

  it('clearing a positive entry also clears the matching negative entry', async () => {
    // Mixed state: first call fails (negative-caches), then we invalidate
    // explicitly and the next call should fetch again rather than serve
    // the stale negative entry.
    const fetchSpy = vi
      .spyOn(globalThis, 'fetch')
      .mockResolvedValueOnce(new Response('boom', { status: 500 }))
      .mockResolvedValueOnce(mockOkProps(4096));

    await getModelContext('formerly-broken');
    invalidateModelContext('formerly-broken');
    const result = await getModelContext('formerly-broken');
    expect(result).not.toBeNull();
    expect(result!.n_ctx).toBe(4096);
    expect(fetchSpy).toHaveBeenCalledTimes(2);
  });
});