feat: DeepSeek API integration + Whale lift (hooks, tool repair, MCP permissions, token tracking)
DeepSeek API: - @ai-sdk/deepseek provider replaces openai-compatible for deepseek-* models - Token tracking: cache_hit/reasoning tokens flow API → DB → WS frames → UI - thinking effort levels (off/low/medium/high/xhigh/max) via AGENTS.md frontmatter - V4 models: deepseek-v4-flash, deepseek-v4-pro - Wired for both chat and coder panes Whale lifts: - Tool input repair (schema-based type coercion, markdown link unwrapping) - Hooks system (6 lifecycle events, shell exec, JSON stdin/stdout contract) - Per-MCP-server permissions (allow/ask/deny) - token tracking UI (cache N, think N in message stats line) Infra: - New DB columns: messages.cache_tokens, messages.reasoning_tokens - New WS frame fields: cache_tokens, reasoning_tokens on message_complete - coder provider snapshot merges DeepSeek models alongside llama-swap
This commit is contained in:
@@ -122,6 +122,8 @@ export async function finalizeStreamedRow(
|
||||
completionTokens: number | null;
|
||||
promptTokens: number | null;
|
||||
startedAt: string | null;
|
||||
cacheTokens?: number | null;
|
||||
reasoningTokens?: number | null;
|
||||
beforeComplete?: () => Promise<void>;
|
||||
},
|
||||
): Promise<void> {
|
||||
@@ -137,6 +139,8 @@ export async function finalizeStreamedRow(
|
||||
tokens_used = ${opts.completionTokens},
|
||||
ctx_used = ${opts.promptTokens},
|
||||
ctx_max = ${nCtx},
|
||||
cache_tokens = ${opts.cacheTokens ?? null},
|
||||
reasoning_tokens = ${opts.reasoningTokens ?? null},
|
||||
finished_at = clock_timestamp()
|
||||
WHERE id = ${opts.messageId}
|
||||
RETURNING tokens_used, ctx_used, ctx_max, finished_at
|
||||
@@ -149,6 +153,8 @@ export async function finalizeStreamedRow(
|
||||
tokens_used: updated?.tokens_used ?? null,
|
||||
ctx_used: updated?.ctx_used ?? null,
|
||||
ctx_max: updated?.ctx_max ?? null,
|
||||
cache_tokens: opts.cacheTokens ?? null,
|
||||
reasoning_tokens: opts.reasoningTokens ?? null,
|
||||
started_at: opts.startedAt,
|
||||
finished_at: updated?.finished_at ?? null,
|
||||
model: opts.model,
|
||||
@@ -188,7 +194,7 @@ export async function finalizeCompletion(
|
||||
): Promise<void> {
|
||||
const { sessionId, chatId, assistantMessageId } = args;
|
||||
const content = stripToolMarkup(result.content, { final: true });
|
||||
const { finishReason, promptTokens, completionTokens } = result;
|
||||
const { finishReason, promptTokens, completionTokens, cacheReadTokens, reasoningTokens } = result;
|
||||
|
||||
// v1.11.3: see executeToolPhase for the rationale.
|
||||
const mctx = await modelContext.getModelContext(session.model);
|
||||
@@ -203,6 +209,8 @@ export async function finalizeCompletion(
|
||||
tokens_used = ${completionTokens},
|
||||
ctx_used = ${promptTokens},
|
||||
ctx_max = ${nCtx},
|
||||
cache_tokens = ${cacheReadTokens ?? null},
|
||||
reasoning_tokens = ${reasoningTokens ?? null},
|
||||
model = ${session.model},
|
||||
finished_at = clock_timestamp()
|
||||
WHERE id = ${assistantMessageId}
|
||||
@@ -268,6 +276,8 @@ export async function finalizeCompletion(
|
||||
tokens_used: updated?.tokens_used ?? null,
|
||||
ctx_used: updated?.ctx_used ?? null,
|
||||
ctx_max: updated?.ctx_max ?? null,
|
||||
cache_tokens: cacheReadTokens ?? null,
|
||||
reasoning_tokens: reasoningTokens ?? null,
|
||||
started_at: startedAt,
|
||||
finished_at: updated?.finished_at ?? null,
|
||||
model: session.model,
|
||||
|
||||
Reference in New Issue
Block a user