perf(llama): unshadow cache-type + spec-decoding flags for agent opt-in

KV cache quantization (--cache-type-k q4_0) and ngram speculative decoding
(--spec-type ngram-mod) are high-value llama.cpp features that improve VRAM
usage and tokens/sec. Removing them from the shadowing lists allows agents
to enable them via llama_extra_args.
This commit is contained in:
2026-06-07 22:40:23 +00:00
parent 02063072ab
commit a8e475fdf4
2 changed files with 10 additions and 21 deletions

View File

@@ -112,14 +112,14 @@ describe('stripShadowingFlags', () => {
expect(result).toEqual(['-c', '4096']);
});
it('strips cache flags by default', () => {
it('passes through cache flags (no longer shadowed)', () => {
const result = stripShadowingFlags(['--cache-type-k', 'q8_0']);
expect(result).toEqual([]);
expect(result).toEqual(['--cache-type-k', 'q8_0']);
});
it('strips spec flags by default', () => {
it('passes through spec flags (no longer shadowed)', () => {
const result = stripShadowingFlags(['--spec-draft-n-max', '16']);
expect(result).toEqual([]);
expect(result).toEqual(['--spec-draft-n-max', '16']);
});
});

View File

@@ -131,23 +131,13 @@ export function isManagedFlag(flag: string): boolean {
const SHADOW_CONTEXT = ['-c', '--ctx-size'];
const SHADOW_CACHE = ['-ctk', '--cache-type-k', '-ctv', '--cache-type-v'];
// Empty: agents should be able to opt into cache-type flags (lift analysis
// found these are high-value features, not safety concerns).
const SHADOW_CACHE: string[] = [];
const SHADOW_SPEC = [
'--spec-default',
'--spec-type',
'--spec-ngram-size-n',
'--spec-ngram-size',
'--draft-min',
'--draft-max',
'--spec-draft-n-max',
'--spec-draft-n-min',
'--spec-draft-p-min',
'--spec-draft-p-split',
'--spec-ngram-mod-n-match',
'--spec-ngram-mod-n-min',
'--spec-ngram-mod-n-max',
];
// Empty: ngram speculative decoding is a performance feature agents should
// be able to enable.
const SHADOW_SPEC: string[] = [];
const SHADOW_TEMPLATE = [
'--chat-template',
@@ -160,7 +150,6 @@ const SHADOW_TEMPLATE = [
// Shadowing flags that take no value — a boolean switch — so the stripper must
// not also drop the following token.
const VALUELESS_SHADOW_FLAGS: ReadonlySet<string> = new Set([
'--spec-default',
'--jinja',
'--no-jinja',
]);