From fa4eb745abce3973093d2a1fb5aa22f02ef16c0a Mon Sep 17 00:00:00 2001 From: Raza Rauf Date: Tue, 3 Feb 2026 04:12:19 +0500 Subject: [PATCH] fix: correct context usage calculation to include cacheRead tokens and handle accumulated values The context formula was excluding cacheReadInputTokens, causing the gauge to drastically underestimate usage (e.g., 3% when reality was 23%). During multi-tool turns, accumulated token totals could exceed the context window, producing false 100% readings and premature compact warnings. - Include cacheReadInputTokens in the formula (input + cacheRead + cacheCreation) - Detect accumulated values (total > window) and return null to preserve last valid % - Skip context updates during accumulated turns instead of displaying inflated values - Fix MainPanel tooltip deriving from raw tab stats instead of preserved session percentage - Handle group chat participant/moderator accumulated values with -1 sentinel --- .../main/parsers/usage-aggregator.test.ts | 81 ++++++++++++++----- .../process-listeners/usage-listener.test.ts | 6 +- .../components/HistoryDetailModal.test.tsx | 6 +- .../renderer/components/MainPanel.test.tsx | 18 +++-- .../renderer/utils/contextExtractor.test.ts | 8 +- .../renderer/utils/contextUsage.test.ts | 77 +++++++++--------- src/main/parsers/usage-aggregator.ts | 69 ++++++++-------- src/main/process-listeners/usage-listener.ts | 59 +++++++++----- src/renderer/App.tsx | 50 ++++++------ src/renderer/components/MainPanel.tsx | 21 ++++- src/renderer/utils/contextUsage.ts | 69 ++++++++-------- 11 files changed, 281 insertions(+), 183 deletions(-) diff --git a/src/__tests__/main/parsers/usage-aggregator.test.ts b/src/__tests__/main/parsers/usage-aggregator.test.ts index 5161080a..f3fde93a 100644 --- a/src/__tests__/main/parsers/usage-aggregator.test.ts +++ b/src/__tests__/main/parsers/usage-aggregator.test.ts @@ -2,6 +2,7 @@ * Tests for usage aggregator utilities */ +import { describe, expect, it } from 'vitest'; import { aggregateModelUsage, estimateContextUsage, @@ -96,15 +97,32 @@ describe('estimateContextUsage', () => { expect(result).toBe(10); }); - it('should cap at 100%', () => { + it('should correctly calculate for Claude with all token types', () => { + // Simulates a real Claude response: input + cacheRead + cacheCreation = total const stats = createStats({ - inputTokens: 150000, - outputTokens: 100000, + inputTokens: 2, + cacheReadInputTokens: 33541, + cacheCreationInputTokens: 11657, + outputTokens: 12, contextWindow: 200000, }); const result = estimateContextUsage(stats, 'claude-code'); - // Output tokens excluded; 150k / 200k = 75% - expect(result).toBe(75); + // (2 + 33541 + 11657) / 200000 = 45200 / 200000 = 22.6% -> 23% + expect(result).toBe(23); + }); + + it('should return null when tokens exceed context window (accumulated values)', () => { + // When Claude Code does complex multi-tool turns, token values accumulate + // across internal API calls and can exceed the context window + const stats = createStats({ + inputTokens: 21627, + cacheReadInputTokens: 1079415, + cacheCreationInputTokens: 39734, + contextWindow: 200000, + }); + const result = estimateContextUsage(stats, 'claude-code'); + // Total = 1,140,776 > 200,000 -> null (accumulated, skip update) + expect(result).toBeNull(); }); }); @@ -112,6 +130,7 @@ describe('estimateContextUsage', () => { it('should use claude-code default context window (200k)', () => { const stats = createStats({ contextWindow: 0 }); const result = estimateContextUsage(stats, 'claude-code'); + // 10000 + 0 + 0 = 10000 / 200000 = 5% expect(result).toBe(5); }); @@ -149,6 +168,18 @@ describe('estimateContextUsage', () => { const result = estimateContextUsage(stats, 'claude-code'); expect(result).toBe(0); }); + + it('should return null when accumulated tokens exceed default window', () => { + const stats = createStats({ + inputTokens: 50000, + cacheReadInputTokens: 500000, + cacheCreationInputTokens: 10000, + contextWindow: 0, + }); + const result = estimateContextUsage(stats, 'claude-code'); + // 560000 > 200000 default -> null + expect(result).toBeNull(); + }); }); }); @@ -166,38 +197,52 @@ describe('calculateContextTokens', () => { ...overrides, }); - it('should exclude output tokens and cacheReadInputTokens for Claude agents', () => { + it('should include input + cacheRead + cacheCreation for Claude agents', () => { const stats = createStats(); const result = calculateContextTokens(stats, 'claude-code'); - // 10000 + 1000 = 11000 (no output tokens, no cacheRead - cumulative) - expect(result).toBe(11000); + // 10000 + 2000 + 1000 = 13000 (all input token types, excludes output) + expect(result).toBe(13000); }); - it('should include output tokens but exclude cacheReadInputTokens for Codex agents', () => { + it('should include input + cacheCreation + output for Codex agents', () => { const stats = createStats(); const result = calculateContextTokens(stats, 'codex'); - // 10000 + 5000 + 1000 = 16000 (includes output, excludes cacheRead) + // 10000 + 1000 + 5000 = 16000 (combined input+output window) expect(result).toBe(16000); }); it('should default to Claude behavior when agent is undefined', () => { const stats = createStats(); const result = calculateContextTokens(stats); - // 10000 + 1000 = 11000 (excludes cacheRead) - expect(result).toBe(11000); + // 10000 + 2000 + 1000 = 13000 (Claude default: all input token types) + expect(result).toBe(13000); }); - it('should exclude cacheReadInputTokens because they are cumulative session totals', () => { - // cacheReadInputTokens accumulate across all turns in a session and can - // exceed the context window. Including them would cause context % > 100%. + it('should calculate correctly for typical first Claude turn', () => { + // Real-world scenario: first message with system prompt cache + const stats = createStats({ + inputTokens: 2, + cacheReadInputTokens: 33541, + cacheCreationInputTokens: 11657, + outputTokens: 12, + }); + const result = calculateContextTokens(stats, 'claude-code'); + // 2 + 33541 + 11657 = 45200 (total context for the API call) + expect(result).toBe(45200); + }); + + it('should handle accumulated values from multi-tool turns', () => { + // When values are accumulated across internal API calls, + // the total can exceed the context window. calculateContextTokens + // returns the raw total; callers must check against contextWindow. const stats = createStats({ inputTokens: 5000, cacheCreationInputTokens: 1000, - cacheReadInputTokens: 500000, // Very high cumulative value + cacheReadInputTokens: 500000, // Accumulated from many internal calls }); const result = calculateContextTokens(stats, 'claude-code'); - // Should only be 5000 + 1000 = 6000, NOT 506000 - expect(result).toBe(6000); + // 5000 + 500000 + 1000 = 506000 (raw total, may exceed window) + expect(result).toBe(506000); }); }); diff --git a/src/__tests__/main/process-listeners/usage-listener.test.ts b/src/__tests__/main/process-listeners/usage-listener.test.ts index 6a2df6a6..6d5385ce 100644 --- a/src/__tests__/main/process-listeners/usage-listener.test.ts +++ b/src/__tests__/main/process-listeners/usage-listener.test.ts @@ -162,19 +162,21 @@ describe('Usage Listener', () => { }); }); - it('should handle zero context window gracefully', async () => { + it('should handle zero context window gracefully (falls back to 200k default)', async () => { setupListener(); const handler = eventHandlers.get('usage'); const usageStats = createMockUsageStats({ contextWindow: 0 }); handler?.('group-chat-test-chat-123-participant-TestAgent-abc123', usageStats); + // With contextWindow 0, falls back to 200k default + // 1800 / 200000 = 0.9% -> rounds to 1% await vi.waitFor(() => { expect(mockDeps.groupChatStorage.updateParticipant).toHaveBeenCalledWith( 'test-chat-123', 'TestAgent', expect.objectContaining({ - contextUsage: 0, + contextUsage: 1, }) ); }); diff --git a/src/__tests__/renderer/components/HistoryDetailModal.test.tsx b/src/__tests__/renderer/components/HistoryDetailModal.test.tsx index 234236b9..1db065af 100644 --- a/src/__tests__/renderer/components/HistoryDetailModal.test.tsx +++ b/src/__tests__/renderer/components/HistoryDetailModal.test.tsx @@ -470,9 +470,9 @@ describe('HistoryDetailModal', () => { /> ); - // Context = (inputTokens + cacheCreationInputTokens) / contextWindow (cacheRead excluded) - // (5000 + 5000) / 100000 = 10% - expect(screen.getByText('10%')).toBeInTheDocument(); + // Context = (inputTokens + cacheReadInputTokens + cacheCreationInputTokens) / contextWindow + // (5000 + 2000 + 5000) / 100000 = 12% + expect(screen.getByText('12%')).toBeInTheDocument(); }); it('should display token counts', () => { diff --git a/src/__tests__/renderer/components/MainPanel.test.tsx b/src/__tests__/renderer/components/MainPanel.test.tsx index c4c024e6..3808e781 100644 --- a/src/__tests__/renderer/components/MainPanel.test.tsx +++ b/src/__tests__/renderer/components/MainPanel.test.tsx @@ -1955,8 +1955,8 @@ describe('MainPanel', () => { ); - // Context usage should be 50000 / 200000 * 100 = 25% (cacheRead excluded - cumulative) - expect(getContextColor).toHaveBeenCalledWith(25, theme); + // Context usage: (50000 + 25000 + 0) / 200000 * 100 = 38% (input + cacheRead + cacheCreation) + expect(getContextColor).toHaveBeenCalledWith(38, theme); }); }); @@ -2373,9 +2373,10 @@ describe('MainPanel', () => { expect(screen.queryByText('Context Window')).not.toBeInTheDocument(); }); - it('should cap context usage at 100%', () => { - const getContextColor = vi.fn().mockReturnValue('#ef4444'); + it('should use preserved session.contextUsage when accumulated values exceed window', () => { + const getContextColor = vi.fn().mockReturnValue('#22c55e'); const session = createSession({ + contextUsage: 45, // Preserved valid percentage from last non-accumulated update aiTabs: [ { id: 'tab-1', @@ -2386,8 +2387,8 @@ describe('MainPanel', () => { usageStats: { inputTokens: 150000, outputTokens: 100000, - cacheReadInputTokens: 100000, // Excluded from calculation (cumulative) - cacheCreationInputTokens: 100000, // Included in calculation + cacheReadInputTokens: 100000, // Accumulated from multi-tool turn + cacheCreationInputTokens: 100000, // Accumulated from multi-tool turn totalCostUsd: 0.05, contextWindow: 200000, }, @@ -2400,8 +2401,9 @@ describe('MainPanel', () => { ); - // Context usage: (150000 + 100000) / 200000 = 125% -> capped at 100% - expect(getContextColor).toHaveBeenCalledWith(100, theme); + // raw = 150000 + 100000 + 100000 = 350000 > 200000 (accumulated) + // Falls back to session.contextUsage = 45% + expect(getContextColor).toHaveBeenCalledWith(45, theme); }); }); diff --git a/src/__tests__/renderer/utils/contextExtractor.test.ts b/src/__tests__/renderer/utils/contextExtractor.test.ts index 88feeec4..149cd1aa 100644 --- a/src/__tests__/renderer/utils/contextExtractor.test.ts +++ b/src/__tests__/renderer/utils/contextExtractor.test.ts @@ -650,8 +650,8 @@ describe('calculateTotalTokens', () => { const total = calculateTotalTokens(contexts); - // input + cacheCreation for each context (cacheRead excluded - cumulative) - expect(total).toBe(450); // (100+25) + (300+25) + // input + cacheRead + cacheCreation for each context + expect(total).toBe(575); // (100+50+25) + (300+75+25) }); }); @@ -694,8 +694,8 @@ describe('getContextSummary', () => { expect(summary.totalSources).toBe(2); expect(summary.totalLogs).toBe(5); - // (100+25) + (200+25) = 350 (cacheRead excluded - cumulative) - expect(summary.estimatedTokens).toBe(350); + // (100+50+25) + (200+75+25) = 475 (input + cacheRead + cacheCreation) + expect(summary.estimatedTokens).toBe(475); expect(summary.byAgent['claude-code']).toBe(1); expect(summary.byAgent['opencode']).toBe(1); }); diff --git a/src/__tests__/renderer/utils/contextUsage.test.ts b/src/__tests__/renderer/utils/contextUsage.test.ts index 3ac808c8..b3865c22 100644 --- a/src/__tests__/renderer/utils/contextUsage.test.ts +++ b/src/__tests__/renderer/utils/contextUsage.test.ts @@ -28,30 +28,30 @@ describe('estimateContextUsage', () => { expect(result).toBe(10); }); - it('should exclude cacheReadInputTokens from calculation (cumulative, not per-request)', () => { + it('should include cacheReadInputTokens in calculation (part of total input context)', () => { const stats = createStats({ inputTokens: 1000, outputTokens: 500, - cacheReadInputTokens: 50000, // Should be ignored + cacheReadInputTokens: 50000, cacheCreationInputTokens: 5000, contextWindow: 100000, }); const result = estimateContextUsage(stats, 'claude-code'); - // (1000 + 5000) / 100000 = 6% (cacheRead excluded) - expect(result).toBe(6); + // (1000 + 50000 + 5000) / 100000 = 56% + expect(result).toBe(56); }); - it('should cap at 100%', () => { + it('should return null when accumulated tokens exceed context window', () => { const stats = createStats({ inputTokens: 50000, outputTokens: 50000, - cacheReadInputTokens: 150000, // Ignored + cacheReadInputTokens: 150000, cacheCreationInputTokens: 200000, contextWindow: 200000, }); const result = estimateContextUsage(stats, 'claude-code'); - // (50000 + 200000) / 200000 = 125% -> capped at 100% - expect(result).toBe(100); + // (50000 + 150000 + 200000) = 400000 > 200000 -> null (accumulated values) + expect(result).toBeNull(); }); it('should round to nearest integer', () => { @@ -139,19 +139,20 @@ describe('estimateContextUsage', () => { expect(result).toBe(10); }); - it('should ignore large cache read tokens (they are cumulative, not per-request)', () => { - // Claude Code reports cacheReadInputTokens as cumulative session totals. - // They can exceed the context window, so we exclude them from calculation. + it('should return null when accumulated cacheRead tokens cause total to exceed context window', () => { + // During multi-tool turns, Claude Code accumulates token values across + // internal API calls. When accumulated total exceeds context window, + // return null to signal callers should preserve previous valid percentage. const stats = createStats({ - inputTokens: 500, // small new turn input - outputTokens: 1000, // small response - cacheReadInputTokens: 758000, // cumulative across session - should be IGNORED - cacheCreationInputTokens: 50000, // new cache this turn + inputTokens: 500, + outputTokens: 1000, + cacheReadInputTokens: 758000, // accumulated across multi-tool turn + cacheCreationInputTokens: 50000, contextWindow: 200000, }); const result = estimateContextUsage(stats, 'claude-code'); - // (500 + 50000) / 200000 = 25% (cacheRead excluded) - expect(result).toBe(25); + // (500 + 758000 + 50000) = 808500 > 200000 -> null (accumulated values) + expect(result).toBeNull(); }); }); @@ -172,17 +173,17 @@ describe('estimateContextUsage', () => { expect(result).toBe(5); }); - it('should handle very large token counts', () => { + it('should return null for very large accumulated token counts', () => { const stats = createStats({ inputTokens: 250000, outputTokens: 500000, - cacheReadInputTokens: 500000, // Ignored + cacheReadInputTokens: 500000, cacheCreationInputTokens: 250000, contextWindow: 0, }); const result = estimateContextUsage(stats, 'claude-code'); - // (250000 + 250000) / 200000 = 250% -> capped at 100% - expect(result).toBe(100); + // (250000 + 500000 + 250000) = 1000000 > 200000 -> null (accumulated values) + expect(result).toBeNull(); }); it('should handle very small percentages', () => { @@ -213,34 +214,33 @@ describe('calculateContextTokens', () => { ...overrides, }); - describe('Claude agents (excludes output and cacheRead tokens)', () => { - it('should exclude output and cacheRead tokens for claude-code', () => { + describe('Claude agents (input + cacheRead + cacheCreation)', () => { + it('should include input, cacheRead, and cacheCreation tokens for claude-code', () => { const stats = createStats(); const result = calculateContextTokens(stats, 'claude-code'); - // 10000 + 1000 = 11000 (no output, no cacheRead) - // cacheRead is excluded because Claude Code reports it as cumulative - expect(result).toBe(11000); + // 10000 + 2000 + 1000 = 13000 (excludes output only) + expect(result).toBe(13000); }); - it('should exclude output and cacheRead tokens for claude', () => { + it('should include input, cacheRead, and cacheCreation tokens for claude', () => { const stats = createStats(); const result = calculateContextTokens(stats, 'claude'); - expect(result).toBe(11000); + expect(result).toBe(13000); }); - it('should exclude output and cacheRead tokens when agent is undefined', () => { + it('should include input, cacheRead, and cacheCreation tokens when agent is undefined', () => { const stats = createStats(); const result = calculateContextTokens(stats); // Defaults to Claude behavior - expect(result).toBe(11000); + expect(result).toBe(13000); }); }); describe('OpenAI agents (includes output tokens)', () => { - it('should include output tokens for codex', () => { + it('should include input, output, and cacheCreation tokens for codex', () => { const stats = createStats(); const result = calculateContextTokens(stats, 'codex'); - // 10000 + 5000 + 1000 = 16000 (includes output, excludes cacheRead) + // 10000 + 5000 + 1000 = 16000 (input + output + cacheCreation, excludes cacheRead) expect(result).toBe(16000); }); }); @@ -268,18 +268,19 @@ describe('calculateContextTokens', () => { expect(result).toBe(10000); }); - it('should ignore large cacheRead values (cumulative session data)', () => { - // This tests the real bug scenario: Claude Code reports cumulative cacheRead - // that exceeds context window, which would cause 100%+ display + it('should include cacheRead in raw calculation (callers detect accumulated values)', () => { + // calculateContextTokens returns the raw total including cacheRead. + // Callers (estimateContextUsage) detect when total > contextWindow + // and return null to signal accumulated values from multi-tool turns. const stats = createStats({ inputTokens: 50000, outputTokens: 9000, - cacheReadInputTokens: 758000, // Cumulative - should be IGNORED + cacheReadInputTokens: 758000, cacheCreationInputTokens: 75000, }); const result = calculateContextTokens(stats, 'claude-code'); - // 50000 + 75000 = 125000 (cacheRead excluded) - expect(result).toBe(125000); + // 50000 + 758000 + 75000 = 883000 (raw total, callers check against window) + expect(result).toBe(883000); }); }); }); diff --git a/src/main/parsers/usage-aggregator.ts b/src/main/parsers/usage-aggregator.ts index 150937b2..10f4b323 100644 --- a/src/main/parsers/usage-aggregator.ts +++ b/src/main/parsers/usage-aggregator.ts @@ -60,12 +60,16 @@ const COMBINED_CONTEXT_AGENTS: Set = new Set(['codex']); /** * Calculate total context tokens based on agent-specific semantics. * - * IMPORTANT: Claude Code reports CUMULATIVE session tokens, not per-request tokens. - * The cacheReadInputTokens can exceed the context window because they accumulate - * across all turns in the conversation. For context pressure display, we should - * only count tokens that represent NEW context being added: + * For a single Anthropic API call, the total input context is the sum of: + * inputTokens + cacheReadInputTokens + cacheCreationInputTokens + * These three fields partition the input into uncached, cache-hit, and newly-cached segments. * - * Claude models: Context = input + cacheCreation (excludes cacheRead - already cached) + * CAVEAT: When Claude Code performs multi-tool turns (many internal API calls), + * the reported values may be accumulated across all internal calls within the turn. + * In that case the total can exceed the context window. Callers should check for + * this and skip the update (see estimateContextUsage). + * + * Claude models: Context = input + cacheRead + cacheCreation * OpenAI models: Context = input + output (combined limit) * * @param stats - The usage statistics containing token counts @@ -79,34 +83,31 @@ export function calculateContextTokens( >, agentId?: ToolType ): number { - // For Claude: inputTokens = uncached new tokens, cacheCreationInputTokens = newly cached tokens - // cacheReadInputTokens are EXCLUDED because they represent already-cached context - // that Claude Code reports cumulatively across the session, not per-request. - // Including them would cause context % to exceed 100% impossibly. - const baseTokens = stats.inputTokens + (stats.cacheCreationInputTokens || 0); - // OpenAI models have combined input+output context limits if (agentId && COMBINED_CONTEXT_AGENTS.has(agentId)) { - return baseTokens + stats.outputTokens; + return stats.inputTokens + (stats.cacheCreationInputTokens || 0) + stats.outputTokens; } - // Claude models: output tokens don't consume context window - return baseTokens; + // Claude models: total input = uncached + cache-hit + newly-cached + // Output tokens don't consume the input context window + return ( + stats.inputTokens + (stats.cacheReadInputTokens || 0) + (stats.cacheCreationInputTokens || 0) + ); } /** * Estimate context usage percentage when the agent doesn't provide it directly. * Uses agent-specific default context window sizes for accurate estimation. * - * IMPORTANT: Context calculation varies by agent: - * - Claude models: inputTokens + cacheCreationInputTokens - * (cacheRead excluded - cumulative, output excluded - separate limit) - * - OpenAI models (Codex): inputTokens + outputTokens - * (combined context window includes both input and output) + * Context calculation varies by agent: + * - Claude models: inputTokens + cacheReadInputTokens + cacheCreationInputTokens + * - OpenAI models (Codex): inputTokens + outputTokens (combined limit) * - * Note: cacheReadInputTokens are NOT included because Claude Code reports them - * as cumulative session totals, not per-request values. Including them would - * cause context percentage to exceed 100% impossibly. + * Returns null when the calculated total exceeds the context window, which indicates + * accumulated values from multi-tool turns (many internal API calls within one turn). + * A single API call's total input can never exceed the context window, so values + * above it are definitely accumulated. Callers should preserve the previous valid + * percentage when this returns null. * * @param stats - The usage statistics containing token counts * @param agentId - The agent identifier for agent-specific context window size @@ -126,19 +127,23 @@ export function estimateContextUsage( // Calculate total context using agent-specific semantics const totalContextTokens = calculateContextTokens(stats, agentId); - // If context window is provided and valid, use it - if (stats.contextWindow && stats.contextWindow > 0) { - return Math.min(100, Math.round((totalContextTokens / stats.contextWindow) * 100)); - } + // Determine effective context window + const effectiveContextWindow = + stats.contextWindow && stats.contextWindow > 0 + ? stats.contextWindow + : agentId && agentId !== 'terminal' + ? DEFAULT_CONTEXT_WINDOWS[agentId] || 0 + : 0; - // If no agent specified or terminal, cannot estimate - if (!agentId || agentId === 'terminal') { + if (!effectiveContextWindow || effectiveContextWindow <= 0) { return null; } - // Use agent-specific default context window - const defaultContextWindow = DEFAULT_CONTEXT_WINDOWS[agentId]; - if (!defaultContextWindow || defaultContextWindow <= 0) { + // If total exceeds context window, the values are accumulated across multiple + // internal API calls within a complex turn (tool use chains). A single API call's + // total input cannot exceed the context window. Return null to signal callers + // should keep the previous valid percentage. + if (totalContextTokens > effectiveContextWindow) { return null; } @@ -146,7 +151,7 @@ export function estimateContextUsage( return 0; } - return Math.min(100, Math.round((totalContextTokens / defaultContextWindow) * 100)); + return Math.round((totalContextTokens / effectiveContextWindow) * 100); } /** diff --git a/src/main/process-listeners/usage-listener.ts b/src/main/process-listeners/usage-listener.ts index 757bd0c9..26c2a042 100644 --- a/src/main/process-listeners/usage-listener.ts +++ b/src/main/process-listeners/usage-listener.ts @@ -52,18 +52,29 @@ export function setupUsageListener( // Calculate context usage percentage using agent-specific logic // Note: For group chat, we don't have agent type here, defaults to Claude behavior const totalContextTokens = usageAggregator.calculateContextTokens(usageStats); - const contextUsage = - usageStats.contextWindow > 0 - ? Math.round((totalContextTokens / usageStats.contextWindow) * 100) - : 0; + const effectiveWindow = usageStats.contextWindow > 0 ? usageStats.contextWindow : 200000; + + // Skip update if values are accumulated (total > window) from multi-tool turns + const contextUsage = + totalContextTokens <= effectiveWindow + ? Math.round((totalContextTokens / effectiveWindow) * 100) + : -1; // -1 signals "skip update" + + // Update participant with usage stats (skip context update if accumulated) + const updateData: { + contextUsage?: number; + tokenCount?: number; + totalCost: number; + } = { + totalCost: usageStats.totalCostUsd, + }; + if (contextUsage >= 0) { + updateData.contextUsage = contextUsage; + updateData.tokenCount = totalContextTokens; + } - // Update participant with usage stats groupChatStorage - .updateParticipant(groupChatId, participantName, { - contextUsage, - tokenCount: totalContextTokens, - totalCost: usageStats.totalCostUsd, - }) + .updateParticipant(groupChatId, participantName, updateData) .then((updatedChat) => { // Emit participants changed so UI updates // Note: updateParticipant returns the updated chat, avoiding extra DB read @@ -87,17 +98,25 @@ export function setupUsageListener( // Calculate context usage percentage using agent-specific logic // Note: Moderator is typically Claude, defaults to Claude behavior const totalContextTokens = usageAggregator.calculateContextTokens(usageStats); - const contextUsage = - usageStats.contextWindow > 0 - ? Math.round((totalContextTokens / usageStats.contextWindow) * 100) - : 0; + const effectiveWindow = usageStats.contextWindow > 0 ? usageStats.contextWindow : 200000; - // Emit moderator usage for the moderator card - groupChatEmitters.emitModeratorUsage?.(groupChatId, { - contextUsage, - totalCost: usageStats.totalCostUsd, - tokenCount: totalContextTokens, - }); + // Skip context update if values are accumulated (total > window) from multi-tool turns. + // When accumulated, emit with contextUsage/tokenCount as -1 so the handler + // knows to preserve the previous values. Cost is always updated. + if (totalContextTokens <= effectiveWindow) { + const contextUsage = Math.round((totalContextTokens / effectiveWindow) * 100); + groupChatEmitters.emitModeratorUsage?.(groupChatId, { + contextUsage, + totalCost: usageStats.totalCostUsd, + tokenCount: totalContextTokens, + }); + } else { + groupChatEmitters.emitModeratorUsage?.(groupChatId, { + contextUsage: -1, + totalCost: usageStats.totalCostUsd, + tokenCount: -1, + }); + } } safeSend('process:usage', sessionId, usageStats); diff --git a/src/renderer/App.tsx b/src/renderer/App.tsx index 8733308c..b010a767 100644 --- a/src/renderer/App.tsx +++ b/src/renderer/App.tsx @@ -2714,36 +2714,30 @@ function MaestroConsoleInner() { actualSessionId = sessionId; } - // Calculate context window usage percentage from CURRENT reported tokens. - // IMPORTANT: Claude Code reports cacheReadInputTokens as CUMULATIVE session totals, - // not per-request values. Including them causes context % to exceed 100% impossibly. - // For Claude: context = inputTokens + cacheCreationInputTokens (new content only) + // Calculate context window usage percentage. + // For Claude: context = inputTokens + cacheReadInputTokens + cacheCreationInputTokens + // (these three fields partition the total input into uncached, cache-hit, newly-cached) // For Codex: context = inputTokens + outputTokens (combined limit) + // + // When Claude Code performs complex multi-tool turns, the reported values are + // accumulated across internal API calls and can exceed the context window. + // estimateContextUsage returns null in that case - we skip the update and + // keep the last valid measurement. This means the gauge may stay static + // during tool-heavy turns, but it's always accurate when it does update, + // keeping the compact warning reliable. const sessionForUsage = sessionsRef.current.find((s) => s.id === actualSessionId); const agentToolType = sessionForUsage?.toolType; - const isClaudeUsage = agentToolType === 'claude-code' || agentToolType === 'claude'; - const currentContextTokens = isClaudeUsage - ? usageStats.inputTokens + usageStats.cacheCreationInputTokens - : usageStats.inputTokens + usageStats.outputTokens; - - // Calculate context percentage, falling back to agent-specific defaults if contextWindow not provided - let contextPercentage: number; - if (usageStats.contextWindow > 0) { - contextPercentage = Math.min( - Math.round((currentContextTokens / usageStats.contextWindow) * 100), - 100 - ); - } else { - // Use fallback estimation with agent-specific default context window - const estimated = estimateContextUsage(usageStats, agentToolType); - contextPercentage = estimated ?? 0; - } + const contextPercentage = estimateContextUsage(usageStats, agentToolType); // Batch the usage stats update, context percentage, and cycle tokens // The batched updater handles the accumulation logic internally batchedUpdater.updateUsage(actualSessionId, tabId, usageStats); batchedUpdater.updateUsage(actualSessionId, null, usageStats); // Session-level accumulation - batchedUpdater.updateContextUsage(actualSessionId, contextPercentage); + if (contextPercentage !== null) { + // Valid measurement from a non-accumulated turn - use it directly + batchedUpdater.updateContextUsage(actualSessionId, contextPercentage); + } + // When null (accumulated values), keep the last valid percentage unchanged batchedUpdater.updateCycleTokens(actualSessionId, usageStats.outputTokens); // Update persistent global stats (not batched - this is a separate concern) @@ -3272,7 +3266,17 @@ function MaestroConsoleInner() { const unsubModeratorUsage = window.maestro.groupChat.onModeratorUsage?.((id, usage) => { if (id === activeGroupChatId) { - setModeratorUsage(usage); + // When contextUsage is -1, values are accumulated from multi-tool turns. + // Preserve previous context/token values, only update cost. + if (usage.contextUsage === -1) { + setModeratorUsage((prev) => + prev + ? { ...prev, totalCost: usage.totalCost } + : { contextUsage: 0, totalCost: usage.totalCost, tokenCount: 0 } + ); + } else { + setModeratorUsage(usage); + } } }); diff --git a/src/renderer/components/MainPanel.tsx b/src/renderer/components/MainPanel.tsx index c23bdcc1..c9314f5e 100644 --- a/src/renderer/components/MainPanel.tsx +++ b/src/renderer/components/MainPanel.tsx @@ -544,11 +544,13 @@ export const MainPanel = React.memo( }, [configuredContextWindow, activeTab?.usageStats?.contextWindow]); // Compute context tokens using agent-specific calculation - // Claude: input + cacheCreation (excludes cacheRead which is cumulative) + // Claude: input + cacheRead + cacheCreation (total input for the request) // Codex: input + output (combined limit) + // When values are accumulated from multi-tool turns, total may exceed contextWindow. + // In that case, derive tokens from session.contextUsage (preserved last valid percentage). const activeTabContextTokens = useMemo(() => { if (!activeTab?.usageStats) return 0; - return calculateContextTokens( + const raw = calculateContextTokens( { inputTokens: activeTab.usageStats.inputTokens, outputTokens: activeTab.usageStats.outputTokens, @@ -557,7 +559,20 @@ export const MainPanel = React.memo( }, activeSession?.toolType ); - }, [activeTab?.usageStats, activeSession?.toolType]); + // Accumulated from multi-tool turns: derive from session's preserved percentage. + // App.tsx skips updating session.contextUsage when accumulated, so it holds + // the last valid percentage from estimateContextUsage. + if (activeTabContextWindow > 0 && raw > activeTabContextWindow) { + const preservedPercentage = activeSession?.contextUsage ?? 0; + return Math.round((preservedPercentage / 100) * activeTabContextWindow); + } + return raw; + }, [ + activeTab?.usageStats, + activeSession?.toolType, + activeTabContextWindow, + activeSession?.contextUsage, + ]); // Compute context usage percentage from context tokens and window size const activeTabContextUsage = useMemo(() => { diff --git a/src/renderer/utils/contextUsage.ts b/src/renderer/utils/contextUsage.ts index 1b150ddb..cb7e376b 100644 --- a/src/renderer/utils/contextUsage.ts +++ b/src/renderer/utils/contextUsage.ts @@ -31,12 +31,16 @@ const COMBINED_CONTEXT_AGENTS: Set = new Set(['codex']); /** * Calculate total context tokens based on agent-specific semantics. * - * IMPORTANT: Claude Code reports CUMULATIVE session tokens, not per-request tokens. - * The cacheReadInputTokens can exceed the context window because they accumulate - * across all turns in the conversation. For context pressure display, we should - * only count tokens that represent NEW context being added: + * For a single Anthropic API call, the total input context is the sum of: + * inputTokens + cacheReadInputTokens + cacheCreationInputTokens + * These three fields partition the input into uncached, cache-hit, and newly-cached segments. * - * Claude models: Context = input + cacheCreation (excludes cacheRead - already cached) + * CAVEAT: When Claude Code performs multi-tool turns (many internal API calls), + * the reported values may be accumulated across all internal calls within the turn. + * In that case the total can exceed the context window. Callers should check for + * this and skip the update (see estimateContextUsage). + * + * Claude models: Context = input + cacheRead + cacheCreation * OpenAI models: Context = input + output (combined limit) * * @param stats - The usage statistics containing token counts @@ -50,34 +54,31 @@ export function calculateContextTokens( >, agentId?: ToolType ): number { - // For Claude: inputTokens = uncached new tokens, cacheCreationInputTokens = newly cached tokens - // cacheReadInputTokens are EXCLUDED because they represent already-cached context - // that Claude Code reports cumulatively across the session, not per-request. - // Including them would cause context % to exceed 100% impossibly. - const baseTokens = stats.inputTokens + (stats.cacheCreationInputTokens || 0); - // OpenAI models have combined input+output context limits if (agentId && COMBINED_CONTEXT_AGENTS.has(agentId)) { - return baseTokens + stats.outputTokens; + return stats.inputTokens + (stats.cacheCreationInputTokens || 0) + stats.outputTokens; } - // Claude models: output tokens don't consume context window - return baseTokens; + // Claude models: total input = uncached + cache-hit + newly-cached + // Output tokens don't consume the input context window + return ( + stats.inputTokens + (stats.cacheReadInputTokens || 0) + (stats.cacheCreationInputTokens || 0) + ); } /** * Estimate context usage percentage when the agent doesn't provide it directly. * Uses agent-specific default context window sizes for accurate estimation. * - * IMPORTANT: Context calculation varies by agent: - * - Claude models: inputTokens + cacheCreationInputTokens - * (cacheRead excluded - cumulative, output excluded - separate limit) - * - OpenAI models (Codex): inputTokens + outputTokens - * (combined context window includes both input and output) + * Context calculation varies by agent: + * - Claude models: inputTokens + cacheReadInputTokens + cacheCreationInputTokens + * - OpenAI models (Codex): inputTokens + outputTokens (combined limit) * - * Note: cacheReadInputTokens are NOT included because Claude Code reports them - * as cumulative session totals, not per-request values. Including them would - * cause context percentage to exceed 100% impossibly. + * Returns null when the calculated total exceeds the context window, which indicates + * accumulated values from multi-tool turns (many internal API calls within one turn). + * A single API call's total input can never exceed the context window, so values + * above it are definitely accumulated. Callers should preserve the previous valid + * percentage when this returns null. * * @param stats - The usage statistics containing token counts * @param agentId - The agent identifier for agent-specific context window size @@ -97,19 +98,23 @@ export function estimateContextUsage( // Calculate total context using agent-specific semantics const totalContextTokens = calculateContextTokens(stats, agentId); - // If context window is provided and valid, use it - if (stats.contextWindow && stats.contextWindow > 0) { - return Math.min(100, Math.round((totalContextTokens / stats.contextWindow) * 100)); - } + // Determine effective context window + const effectiveContextWindow = + stats.contextWindow && stats.contextWindow > 0 + ? stats.contextWindow + : agentId && agentId !== 'terminal' + ? DEFAULT_CONTEXT_WINDOWS[agentId] || 0 + : 0; - // If no agent specified or terminal, cannot estimate - if (!agentId || agentId === 'terminal') { + if (!effectiveContextWindow || effectiveContextWindow <= 0) { return null; } - // Use agent-specific default context window - const defaultContextWindow = DEFAULT_CONTEXT_WINDOWS[agentId]; - if (!defaultContextWindow || defaultContextWindow <= 0) { + // If total exceeds context window, the values are accumulated across multiple + // internal API calls within a complex turn (tool use chains). A single API call's + // total input cannot exceed the context window. Return null to signal callers + // should keep the previous valid percentage. + if (totalContextTokens > effectiveContextWindow) { return null; } @@ -117,5 +122,5 @@ export function estimateContextUsage( return 0; } - return Math.min(100, Math.round((totalContextTokens / defaultContextWindow) * 100)); + return Math.round((totalContextTokens / effectiveContextWindow) * 100); }