From 7d76a5a06d4e2328f024d08cdc248fe80beb051a Mon Sep 17 00:00:00 2001 From: Raza Rauf Date: Wed, 4 Feb 2026 01:20:51 +0500 Subject: [PATCH] feat: add conservative context growth estimation during multi-tool turns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Claude Code performs multi-tool turns (many internal API calls), accumulated token values cause estimateContextUsage to return null, freezing the context gauge. This adds estimateAccumulatedGrowth which provides conservative 1-3% per-turn growth estimates so the gauge keeps moving during tool-heavy sessions. Safety: App.tsx caps all estimates at yellowThreshold - 5, guaranteeing that estimates can never trigger compact warnings — only real measurements from non-accumulated turns can. --- .../renderer/utils/contextUsage.test.ts | 59 +++++++++++++++++++ src/renderer/App.tsx | 36 +++++++++-- src/renderer/utils/contextUsage.ts | 53 ++++++++++++++++- 3 files changed, 141 insertions(+), 7 deletions(-) diff --git a/src/__tests__/renderer/utils/contextUsage.test.ts b/src/__tests__/renderer/utils/contextUsage.test.ts index b858548f..16db7054 100644 --- a/src/__tests__/renderer/utils/contextUsage.test.ts +++ b/src/__tests__/renderer/utils/contextUsage.test.ts @@ -5,6 +5,7 @@ import { estimateContextUsage, calculateContextTokens, + estimateAccumulatedGrowth, DEFAULT_CONTEXT_WINDOWS, } from '../../../renderer/utils/contextUsage'; import type { UsageStats } from '../../../shared/types'; @@ -273,6 +274,64 @@ describe('calculateContextTokens', () => { }); }); +describe('estimateAccumulatedGrowth', () => { + it('should grow by 1% for typical multi-tool turn with many internal calls', () => { + // 31% usage, 40 internal API calls + // outputTokens: 10026 (accumulated), cacheRead: 2.5M, window: 200K + const result = estimateAccumulatedGrowth(31, 10026, 2500000, 200000); + // prevTokens = 62000, estCalls = 2500000/62000 ≈ 40 + // singleTurnGrowth = 10026/40 ≈ 251, growthPercent = 251/200000*100 ≈ 0 → min 1% + expect(result).toBe(32); + }); + + it('should cap per-turn growth at 3%', () => { + // Fewer calls, more output per call + const result = estimateAccumulatedGrowth(40, 100000, 400000, 200000); + // prevTokens = 80000, estCalls = 400000/80000 = 5 + // singleTurnGrowth = 100000/5 = 20000, growthPercent = 20000/200000*100 = 10 → cap 3% + expect(result).toBe(43); + }); + + it('should guarantee minimum 1% growth', () => { + const result = estimateAccumulatedGrowth(50, 100, 5000000, 200000); + // Very small output → growthPercent ≈ 0 → min 1% + expect(result).toBe(51); + }); + + it('should return currentUsage unchanged when currentUsage is 0', () => { + const result = estimateAccumulatedGrowth(0, 10000, 500000, 200000); + expect(result).toBe(0); + }); + + it('should return currentUsage unchanged when contextWindow is 0', () => { + const result = estimateAccumulatedGrowth(30, 10000, 500000, 0); + expect(result).toBe(30); + }); + + it('should handle zero cacheRead tokens', () => { + const result = estimateAccumulatedGrowth(30, 5000, 0, 200000); + // estCalls = max(1, 0/60000) = 1, singleTurnGrowth = 5000 + // growthPercent = 5000/200000*100 = 3% (at cap) + expect(result).toBe(33); + }); + + it('should grow monotonically across consecutive accumulated turns', () => { + let usage = 31; + for (let i = 0; i < 5; i++) { + const prev = usage; + usage = estimateAccumulatedGrowth(usage, 10000, 2500000, 200000); + expect(usage).toBeGreaterThan(prev); + } + expect(usage).toBeGreaterThanOrEqual(36); + }); + + it('should not be capped internally (caller handles threshold cap)', () => { + // At 98%, growth should still apply — caller caps below warning threshold + const result = estimateAccumulatedGrowth(98, 50000, 500000, 200000); + expect(result).toBe(101); // Unbounded — caller applies Math.min with threshold + }); +}); + describe('DEFAULT_CONTEXT_WINDOWS', () => { it('should have context windows defined for all ToolType agent types', () => { // Only ToolType values have context windows defined diff --git a/src/renderer/App.tsx b/src/renderer/App.tsx index 5db1e99d..acec8d32 100644 --- a/src/renderer/App.tsx +++ b/src/renderer/App.tsx @@ -188,7 +188,11 @@ import { shouldOpenExternally, flattenTree } from './utils/fileExplorer'; import type { FileNode } from './types/fileTree'; import { substituteTemplateVariables } from './utils/templateVariables'; import { validateNewSession, getProviderDisplayName } from './utils/sessionValidation'; -import { estimateContextUsage } from './utils/contextUsage'; +import { + estimateContextUsage, + estimateAccumulatedGrowth, + DEFAULT_CONTEXT_WINDOWS, +} from './utils/contextUsage'; import { formatLogsForClipboard } from './utils/contextExtractor'; import { parseSessionId, @@ -2801,9 +2805,9 @@ function MaestroConsoleInner() { // Estimate context usage percentage using agent-specific calculation. // estimateContextUsage returns null when values are accumulated across multiple - // internal API calls within a complex turn. In that case, the UI may update less - // during tool-heavy turns, but it's always accurate when it does update, - // keeping the compact warning reliable. + // internal API calls within a complex turn. In that case, we use a conservative + // growth estimate so the gauge keeps moving, but cap it below the yellow warning + // threshold so estimates never trigger compact warnings — only real measurements can. // Use baseSessionId for lookup to handle synopsis/batch sessions that inherit parent's agent type const sessionForUsage = sessionsRef.current.find((s) => s.id === baseSessionId); const agentToolType = sessionForUsage?.toolType; @@ -2813,9 +2817,31 @@ function MaestroConsoleInner() { // The batched updater handles the accumulation logic internally batchedUpdater.updateUsage(actualSessionId, tabId, usageStats); batchedUpdater.updateUsage(actualSessionId, null, usageStats); // Session-level accumulation - // Only update context percentage if we got a valid value (not accumulated) if (contextPercentage !== null) { + // Valid measurement from a non-accumulated turn — use directly batchedUpdater.updateContextUsage(actualSessionId, contextPercentage); + } else { + // Accumulated values from multi-tool turn. Estimate conservative growth + // so the gauge doesn't freeze, but cap below the yellow warning threshold + // to guarantee estimates never trigger compact warnings. + const currentUsage = sessionForUsage?.contextUsage ?? 0; + if (currentUsage > 0) { + const effectiveWindow = + usageStats.contextWindow > 0 + ? usageStats.contextWindow + : DEFAULT_CONTEXT_WINDOWS[agentToolType as keyof typeof DEFAULT_CONTEXT_WINDOWS] || + 200000; + const estimated = estimateAccumulatedGrowth( + currentUsage, + usageStats.outputTokens, + usageStats.cacheReadInputTokens || 0, + effectiveWindow + ); + // Hard cap below yellow threshold — estimates must never trigger compact warnings + const yellowThreshold = contextManagementSettings.contextWarningYellowThreshold; + const maxEstimate = yellowThreshold - 5; + batchedUpdater.updateContextUsage(actualSessionId, Math.min(estimated, maxEstimate)); + } } batchedUpdater.updateCycleTokens(actualSessionId, usageStats.outputTokens); diff --git a/src/renderer/utils/contextUsage.ts b/src/renderer/utils/contextUsage.ts index 2545e15b..37616d6c 100644 --- a/src/renderer/utils/contextUsage.ts +++ b/src/renderer/utils/contextUsage.ts @@ -56,13 +56,17 @@ export function calculateContextTokens( ): number { // OpenAI models have combined input+output context limits if (agentId && COMBINED_CONTEXT_AGENTS.has(agentId as ToolType)) { - return (stats.inputTokens || 0) + (stats.cacheCreationInputTokens || 0) + (stats.outputTokens || 0); + return ( + (stats.inputTokens || 0) + (stats.cacheCreationInputTokens || 0) + (stats.outputTokens || 0) + ); } // Claude models: total input = uncached + cache-hit + newly-cached // Output tokens don't consume the input context window return ( - (stats.inputTokens || 0) + (stats.cacheReadInputTokens || 0) + (stats.cacheCreationInputTokens || 0) + (stats.inputTokens || 0) + + (stats.cacheReadInputTokens || 0) + + (stats.cacheCreationInputTokens || 0) ); } @@ -123,3 +127,48 @@ export function estimateContextUsage( return Math.round((totalContextTokens / effectiveContextWindow) * 100); } + +/** + * Estimate context growth during accumulated (multi-tool) turns. + * + * When estimateContextUsage returns null (accumulated values), the percentage + * would freeze at the last valid value. This function provides a conservative + * growth estimate so the gauge keeps moving during tool-heavy turns. + * + * Approach: de-accumulate output tokens by dividing by the estimated number + * of internal API calls (derived from cacheRead / previousContext), then + * compute what percentage of the window that single-turn output represents. + * Growth is bounded to 1-3% per turn. + * + * IMPORTANT: The caller must cap the result below the compact warning threshold + * so that estimates never trigger compact warnings — only real measurements can. + * + * @param currentUsage - Current context usage percentage (0-100) + * @param outputTokens - Output tokens from this turn (accumulated across internal calls) + * @param cacheReadTokens - Cache read tokens (accumulated, used to estimate call count) + * @param contextWindow - Effective context window size + * @returns Estimated new context usage percentage + */ +export function estimateAccumulatedGrowth( + currentUsage: number, + outputTokens: number, + cacheReadTokens: number, + contextWindow: number +): number { + if (currentUsage <= 0 || contextWindow <= 0) { + return currentUsage; + } + + // Estimate how many internal API calls occurred in this turn + const prevTokens = Math.round((currentUsage / 100) * contextWindow); + const estCalls = Math.max(1, Math.round((cacheReadTokens || 0) / Math.max(prevTokens, 1))); + + // De-accumulate: estimate single-call output growth + const singleTurnGrowth = Math.round(outputTokens / estCalls); + const growthPercent = Math.round((singleTurnGrowth / contextWindow) * 100); + + // Bound to 1-3% per turn (conservative to avoid overshooting) + const boundedGrowth = Math.max(1, Math.min(growthPercent, 3)); + + return currentUsage + boundedGrowth; +}