perf(llama): unshadow cache-type + spec-decoding flags for agent opt-in

KV cache quantization (--cache-type-k q4_0) and ngram speculative decoding
(--spec-type ngram-mod) are high-value llama.cpp features that improve VRAM
usage and tokens/sec. Removing them from the shadowing lists allows agents
to enable them via llama_extra_args.
This commit is contained in:
2026-06-07 22:40:23 +00:00
parent 02063072ab
commit a8e475fdf4
2 changed files with 10 additions and 21 deletions

View File

@@ -112,14 +112,14 @@ describe('stripShadowingFlags', () => {
expect(result).toEqual(['-c', '4096']); expect(result).toEqual(['-c', '4096']);
}); });
it('strips cache flags by default', () => { it('passes through cache flags (no longer shadowed)', () => {
const result = stripShadowingFlags(['--cache-type-k', 'q8_0']); const result = stripShadowingFlags(['--cache-type-k', 'q8_0']);
expect(result).toEqual([]); expect(result).toEqual(['--cache-type-k', 'q8_0']);
}); });
it('strips spec flags by default', () => { it('passes through spec flags (no longer shadowed)', () => {
const result = stripShadowingFlags(['--spec-draft-n-max', '16']); const result = stripShadowingFlags(['--spec-draft-n-max', '16']);
expect(result).toEqual([]); expect(result).toEqual(['--spec-draft-n-max', '16']);
}); });
}); });

View File

@@ -131,23 +131,13 @@ export function isManagedFlag(flag: string): boolean {
const SHADOW_CONTEXT = ['-c', '--ctx-size']; const SHADOW_CONTEXT = ['-c', '--ctx-size'];
const SHADOW_CACHE = ['-ctk', '--cache-type-k', '-ctv', '--cache-type-v']; // Empty: agents should be able to opt into cache-type flags (lift analysis
// found these are high-value features, not safety concerns).
const SHADOW_CACHE: string[] = [];
const SHADOW_SPEC = [ // Empty: ngram speculative decoding is a performance feature agents should
'--spec-default', // be able to enable.
'--spec-type', const SHADOW_SPEC: string[] = [];
'--spec-ngram-size-n',
'--spec-ngram-size',
'--draft-min',
'--draft-max',
'--spec-draft-n-max',
'--spec-draft-n-min',
'--spec-draft-p-min',
'--spec-draft-p-split',
'--spec-ngram-mod-n-match',
'--spec-ngram-mod-n-min',
'--spec-ngram-mod-n-max',
];
const SHADOW_TEMPLATE = [ const SHADOW_TEMPLATE = [
'--chat-template', '--chat-template',
@@ -160,7 +150,6 @@ const SHADOW_TEMPLATE = [
// Shadowing flags that take no value — a boolean switch — so the stripper must // Shadowing flags that take no value — a boolean switch — so the stripper must
// not also drop the following token. // not also drop the following token.
const VALUELESS_SHADOW_FLAGS: ReadonlySet<string> = new Set([ const VALUELESS_SHADOW_FLAGS: ReadonlySet<string> = new Set([
'--spec-default',
'--jinja', '--jinja',
'--no-jinja', '--no-jinja',
]); ]);