perf(llama): unshadow cache-type + spec-decoding flags for agent opt-in
KV cache quantization (--cache-type-k q4_0) and ngram speculative decoding (--spec-type ngram-mod) are high-value llama.cpp features that improve VRAM usage and tokens/sec. Removing them from the shadowing lists allows agents to enable them via llama_extra_args.
This commit is contained in:
@@ -112,14 +112,14 @@ describe('stripShadowingFlags', () => {
|
||||
expect(result).toEqual(['-c', '4096']);
|
||||
});
|
||||
|
||||
it('strips cache flags by default', () => {
|
||||
it('passes through cache flags (no longer shadowed)', () => {
|
||||
const result = stripShadowingFlags(['--cache-type-k', 'q8_0']);
|
||||
expect(result).toEqual([]);
|
||||
expect(result).toEqual(['--cache-type-k', 'q8_0']);
|
||||
});
|
||||
|
||||
it('strips spec flags by default', () => {
|
||||
it('passes through spec flags (no longer shadowed)', () => {
|
||||
const result = stripShadowingFlags(['--spec-draft-n-max', '16']);
|
||||
expect(result).toEqual([]);
|
||||
expect(result).toEqual(['--spec-draft-n-max', '16']);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -131,23 +131,13 @@ export function isManagedFlag(flag: string): boolean {
|
||||
|
||||
const SHADOW_CONTEXT = ['-c', '--ctx-size'];
|
||||
|
||||
const SHADOW_CACHE = ['-ctk', '--cache-type-k', '-ctv', '--cache-type-v'];
|
||||
// Empty: agents should be able to opt into cache-type flags (lift analysis
|
||||
// found these are high-value features, not safety concerns).
|
||||
const SHADOW_CACHE: string[] = [];
|
||||
|
||||
const SHADOW_SPEC = [
|
||||
'--spec-default',
|
||||
'--spec-type',
|
||||
'--spec-ngram-size-n',
|
||||
'--spec-ngram-size',
|
||||
'--draft-min',
|
||||
'--draft-max',
|
||||
'--spec-draft-n-max',
|
||||
'--spec-draft-n-min',
|
||||
'--spec-draft-p-min',
|
||||
'--spec-draft-p-split',
|
||||
'--spec-ngram-mod-n-match',
|
||||
'--spec-ngram-mod-n-min',
|
||||
'--spec-ngram-mod-n-max',
|
||||
];
|
||||
// Empty: ngram speculative decoding is a performance feature agents should
|
||||
// be able to enable.
|
||||
const SHADOW_SPEC: string[] = [];
|
||||
|
||||
const SHADOW_TEMPLATE = [
|
||||
'--chat-template',
|
||||
@@ -160,7 +150,6 @@ const SHADOW_TEMPLATE = [
|
||||
// Shadowing flags that take no value — a boolean switch — so the stripper must
|
||||
// not also drop the following token.
|
||||
const VALUELESS_SHADOW_FLAGS: ReadonlySet<string> = new Set([
|
||||
'--spec-default',
|
||||
'--jinja',
|
||||
'--no-jinja',
|
||||
]);
|
||||
|
||||
Reference in New Issue
Block a user