feat: DeepSeek API integration + Whale lift (hooks, tool repair, MCP permissions, token tracking)
DeepSeek API: - @ai-sdk/deepseek provider replaces openai-compatible for deepseek-* models - Token tracking: cache_hit/reasoning tokens flow API → DB → WS frames → UI - thinking effort levels (off/low/medium/high/xhigh/max) via AGENTS.md frontmatter - V4 models: deepseek-v4-flash, deepseek-v4-pro - Wired for both chat and coder panes Whale lifts: - Tool input repair (schema-based type coercion, markdown link unwrapping) - Hooks system (6 lifecycle events, shell exec, JSON stdin/stdout contract) - Per-MCP-server permissions (allow/ask/deny) - token tracking UI (cache N, think N in message stats line) Infra: - New DB columns: messages.cache_tokens, messages.reasoning_tokens - New WS frame fields: cache_tokens, reasoning_tokens on message_complete - coder provider snapshot merges DeepSeek models alongside llama-swap
This commit is contained in:
@@ -13,7 +13,7 @@ import type { OpenAiMessage } from './payload.js';
|
||||
import { extractToolCallBlocks } from './tool-call-parser.js';
|
||||
import { classifyStreamError } from './stream-error-classifier.js';
|
||||
import type { StreamResult } from './types.js';
|
||||
import { upstreamModel } from './provider.js';
|
||||
import { isDeepSeekModel, upstreamModel } from './provider.js';
|
||||
import {
|
||||
jsonSchema,
|
||||
streamText,
|
||||
@@ -51,6 +51,9 @@ export interface StreamOptions {
|
||||
dry_base?: number | null;
|
||||
dry_allowed_length?: number | null;
|
||||
dry_penalty_last_n?: number | null;
|
||||
// vDeepSeek: thinking/reasoning effort. Maps to DeepSeek's reasoning_effort
|
||||
// API param for deepseek-v4-flash / deepseek-v4-pro models.
|
||||
reasoning_effort?: 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max';
|
||||
}
|
||||
|
||||
// P5: the 10-field sampler-options literal that was copy-pasted at 4 sites
|
||||
@@ -74,6 +77,7 @@ export function samplerOptsFromAgent(agent: Agent | null): SamplerOpts {
|
||||
dry_base: agent?.dry_base ?? undefined,
|
||||
dry_allowed_length: agent?.dry_allowed_length ?? undefined,
|
||||
dry_penalty_last_n: agent?.dry_penalty_last_n ?? undefined,
|
||||
reasoning_effort: agent?.reasoning_effort ?? undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -272,6 +276,19 @@ export async function streamCompletion(
|
||||
// before this. They now go through the same extraBody path as the new params.
|
||||
const samplerBody = buildSamplerProviderOptions(opts);
|
||||
|
||||
// vDeepSeek: build providerOptions.deepseek for DeepSeek V4 models.
|
||||
let deepseekProviderOptions:
|
||||
| { thinking: { type: 'enabled' | 'disabled' }; reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh' | 'max' }
|
||||
| undefined;
|
||||
if (isDeepSeekModel(model)) {
|
||||
const dsEffort = opts.reasoning_effort;
|
||||
const thinkingEnabled = dsEffort && dsEffort !== 'off';
|
||||
deepseekProviderOptions = {
|
||||
thinking: { type: thinkingEnabled ? 'enabled' : 'disabled' },
|
||||
...(thinkingEnabled ? { reasoningEffort: dsEffort } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
// F6: per-chunk stall deadline. If the model stops emitting chunks for
|
||||
// STALL_TIMEOUT_MS the stallAc fires through AbortSignal.any; the post-loop
|
||||
// abort check below then throws AbortError → handleAbortOrError writes
|
||||
@@ -297,7 +314,14 @@ export async function streamCompletion(
|
||||
...(typeof opts.temperature === 'number' ? { temperature: opts.temperature } : {}),
|
||||
...(typeof opts.top_p === 'number' ? { topP: opts.top_p } : {}),
|
||||
...(typeof opts.presence_penalty === 'number' ? { presencePenalty: opts.presence_penalty } : {}),
|
||||
...(samplerBody ? { providerOptions: { openaiCompatible: samplerBody } } : {}),
|
||||
...(samplerBody || deepseekProviderOptions
|
||||
? {
|
||||
providerOptions: {
|
||||
...(samplerBody ? { openaiCompatible: samplerBody } : {}),
|
||||
...(deepseekProviderOptions ? { deepseek: deepseekProviderOptions } : {}),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
abortSignal: effectiveSignal,
|
||||
});
|
||||
|
||||
@@ -401,12 +425,26 @@ export async function streamCompletion(
|
||||
|
||||
// Usage lands as a promise on the result; awaiting after fullStream is
|
||||
// drained is safe. AI SDK v6 names: `inputTokens` / `outputTokens`.
|
||||
// Some providers (llama-swap via openai-compatible) return plain numbers;
|
||||
// others (deepseek via @ai-sdk/deepseek) return {total, cacheRead, noCache, ...}.
|
||||
let promptTokens: number | null = null;
|
||||
let completionTokens: number | null = null;
|
||||
let cacheReadTokens: number | null = null;
|
||||
let reasoningTokens: number | null = null;
|
||||
try {
|
||||
const usage = await result.usage;
|
||||
if (typeof usage.inputTokens === 'number') promptTokens = usage.inputTokens;
|
||||
if (typeof usage.outputTokens === 'number') completionTokens = usage.outputTokens;
|
||||
if (typeof usage.inputTokens === 'number') {
|
||||
promptTokens = usage.inputTokens;
|
||||
} else if (usage.inputTokens && typeof usage.inputTokens === 'object') {
|
||||
promptTokens = (usage.inputTokens as Record<string, number | undefined>).total ?? null;
|
||||
cacheReadTokens = (usage.inputTokens as Record<string, number | undefined>).cacheRead ?? null;
|
||||
}
|
||||
if (typeof usage.outputTokens === 'number') {
|
||||
completionTokens = usage.outputTokens;
|
||||
} else if (usage.outputTokens && typeof usage.outputTokens === 'object') {
|
||||
completionTokens = (usage.outputTokens as Record<string, number | undefined>).total ?? null;
|
||||
reasoningTokens = (usage.outputTokens as Record<string, number | undefined>).reasoning ?? null;
|
||||
}
|
||||
} catch {
|
||||
// Some providers omit usage on partial streams; leave both null.
|
||||
}
|
||||
@@ -422,6 +460,13 @@ export async function streamCompletion(
|
||||
);
|
||||
}
|
||||
|
||||
if (cacheReadTokens !== null || reasoningTokens !== null) {
|
||||
ctx.log.debug(
|
||||
{ promptTokens, completionTokens, cacheReadTokens, reasoningTokens, model },
|
||||
'streamCompletion: deepseek usage breakdown',
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
finishReason,
|
||||
content,
|
||||
@@ -429,6 +474,10 @@ export async function streamCompletion(
|
||||
promptTokens,
|
||||
completionTokens,
|
||||
reasoning: reasoningAccumulated,
|
||||
// vDeepSeek: optional usage breakdown populated when the provider returns
|
||||
// structured usage (cache hit tokens, reasoning tokens).
|
||||
cacheReadTokens: cacheReadTokens ?? undefined,
|
||||
reasoningTokens: reasoningTokens ?? undefined,
|
||||
};
|
||||
} finally {
|
||||
// Clear the stall timer whether the stream completes normally, throws, or
|
||||
|
||||
Reference in New Issue
Block a user