mirror of
https://github.com/jlengrand/Maestro.git
synced 2026-03-10 08:31:19 +00:00
fix: correct context usage calculation to include cacheRead tokens and handle accumulated values
The context formula was excluding cacheReadInputTokens, causing the gauge to drastically underestimate usage (e.g., 3% when reality was 23%). During multi-tool turns, accumulated token totals could exceed the context window, producing false 100% readings and premature compact warnings. - Include cacheReadInputTokens in the formula (input + cacheRead + cacheCreation) - Detect accumulated values (total > window) and return null to preserve last valid % - Skip context updates during accumulated turns instead of displaying inflated values - Fix MainPanel tooltip deriving from raw tab stats instead of preserved session percentage - Handle group chat participant/moderator accumulated values with -1 sentinel
This commit is contained in:
@@ -2,6 +2,7 @@
|
|||||||
* Tests for usage aggregator utilities
|
* Tests for usage aggregator utilities
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
import {
|
import {
|
||||||
aggregateModelUsage,
|
aggregateModelUsage,
|
||||||
estimateContextUsage,
|
estimateContextUsage,
|
||||||
@@ -96,15 +97,32 @@ describe('estimateContextUsage', () => {
|
|||||||
expect(result).toBe(10);
|
expect(result).toBe(10);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should cap at 100%', () => {
|
it('should correctly calculate for Claude with all token types', () => {
|
||||||
|
// Simulates a real Claude response: input + cacheRead + cacheCreation = total
|
||||||
const stats = createStats({
|
const stats = createStats({
|
||||||
inputTokens: 150000,
|
inputTokens: 2,
|
||||||
outputTokens: 100000,
|
cacheReadInputTokens: 33541,
|
||||||
|
cacheCreationInputTokens: 11657,
|
||||||
|
outputTokens: 12,
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
});
|
});
|
||||||
const result = estimateContextUsage(stats, 'claude-code');
|
const result = estimateContextUsage(stats, 'claude-code');
|
||||||
// Output tokens excluded; 150k / 200k = 75%
|
// (2 + 33541 + 11657) / 200000 = 45200 / 200000 = 22.6% -> 23%
|
||||||
expect(result).toBe(75);
|
expect(result).toBe(23);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return null when tokens exceed context window (accumulated values)', () => {
|
||||||
|
// When Claude Code does complex multi-tool turns, token values accumulate
|
||||||
|
// across internal API calls and can exceed the context window
|
||||||
|
const stats = createStats({
|
||||||
|
inputTokens: 21627,
|
||||||
|
cacheReadInputTokens: 1079415,
|
||||||
|
cacheCreationInputTokens: 39734,
|
||||||
|
contextWindow: 200000,
|
||||||
|
});
|
||||||
|
const result = estimateContextUsage(stats, 'claude-code');
|
||||||
|
// Total = 1,140,776 > 200,000 -> null (accumulated, skip update)
|
||||||
|
expect(result).toBeNull();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -112,6 +130,7 @@ describe('estimateContextUsage', () => {
|
|||||||
it('should use claude-code default context window (200k)', () => {
|
it('should use claude-code default context window (200k)', () => {
|
||||||
const stats = createStats({ contextWindow: 0 });
|
const stats = createStats({ contextWindow: 0 });
|
||||||
const result = estimateContextUsage(stats, 'claude-code');
|
const result = estimateContextUsage(stats, 'claude-code');
|
||||||
|
// 10000 + 0 + 0 = 10000 / 200000 = 5%
|
||||||
expect(result).toBe(5);
|
expect(result).toBe(5);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -149,6 +168,18 @@ describe('estimateContextUsage', () => {
|
|||||||
const result = estimateContextUsage(stats, 'claude-code');
|
const result = estimateContextUsage(stats, 'claude-code');
|
||||||
expect(result).toBe(0);
|
expect(result).toBe(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should return null when accumulated tokens exceed default window', () => {
|
||||||
|
const stats = createStats({
|
||||||
|
inputTokens: 50000,
|
||||||
|
cacheReadInputTokens: 500000,
|
||||||
|
cacheCreationInputTokens: 10000,
|
||||||
|
contextWindow: 0,
|
||||||
|
});
|
||||||
|
const result = estimateContextUsage(stats, 'claude-code');
|
||||||
|
// 560000 > 200000 default -> null
|
||||||
|
expect(result).toBeNull();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -166,38 +197,52 @@ describe('calculateContextTokens', () => {
|
|||||||
...overrides,
|
...overrides,
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should exclude output tokens and cacheReadInputTokens for Claude agents', () => {
|
it('should include input + cacheRead + cacheCreation for Claude agents', () => {
|
||||||
const stats = createStats();
|
const stats = createStats();
|
||||||
const result = calculateContextTokens(stats, 'claude-code');
|
const result = calculateContextTokens(stats, 'claude-code');
|
||||||
// 10000 + 1000 = 11000 (no output tokens, no cacheRead - cumulative)
|
// 10000 + 2000 + 1000 = 13000 (all input token types, excludes output)
|
||||||
expect(result).toBe(11000);
|
expect(result).toBe(13000);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should include output tokens but exclude cacheReadInputTokens for Codex agents', () => {
|
it('should include input + cacheCreation + output for Codex agents', () => {
|
||||||
const stats = createStats();
|
const stats = createStats();
|
||||||
const result = calculateContextTokens(stats, 'codex');
|
const result = calculateContextTokens(stats, 'codex');
|
||||||
// 10000 + 5000 + 1000 = 16000 (includes output, excludes cacheRead)
|
// 10000 + 1000 + 5000 = 16000 (combined input+output window)
|
||||||
expect(result).toBe(16000);
|
expect(result).toBe(16000);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should default to Claude behavior when agent is undefined', () => {
|
it('should default to Claude behavior when agent is undefined', () => {
|
||||||
const stats = createStats();
|
const stats = createStats();
|
||||||
const result = calculateContextTokens(stats);
|
const result = calculateContextTokens(stats);
|
||||||
// 10000 + 1000 = 11000 (excludes cacheRead)
|
// 10000 + 2000 + 1000 = 13000 (Claude default: all input token types)
|
||||||
expect(result).toBe(11000);
|
expect(result).toBe(13000);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should exclude cacheReadInputTokens because they are cumulative session totals', () => {
|
it('should calculate correctly for typical first Claude turn', () => {
|
||||||
// cacheReadInputTokens accumulate across all turns in a session and can
|
// Real-world scenario: first message with system prompt cache
|
||||||
// exceed the context window. Including them would cause context % > 100%.
|
const stats = createStats({
|
||||||
|
inputTokens: 2,
|
||||||
|
cacheReadInputTokens: 33541,
|
||||||
|
cacheCreationInputTokens: 11657,
|
||||||
|
outputTokens: 12,
|
||||||
|
});
|
||||||
|
const result = calculateContextTokens(stats, 'claude-code');
|
||||||
|
// 2 + 33541 + 11657 = 45200 (total context for the API call)
|
||||||
|
expect(result).toBe(45200);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle accumulated values from multi-tool turns', () => {
|
||||||
|
// When values are accumulated across internal API calls,
|
||||||
|
// the total can exceed the context window. calculateContextTokens
|
||||||
|
// returns the raw total; callers must check against contextWindow.
|
||||||
const stats = createStats({
|
const stats = createStats({
|
||||||
inputTokens: 5000,
|
inputTokens: 5000,
|
||||||
cacheCreationInputTokens: 1000,
|
cacheCreationInputTokens: 1000,
|
||||||
cacheReadInputTokens: 500000, // Very high cumulative value
|
cacheReadInputTokens: 500000, // Accumulated from many internal calls
|
||||||
});
|
});
|
||||||
const result = calculateContextTokens(stats, 'claude-code');
|
const result = calculateContextTokens(stats, 'claude-code');
|
||||||
// Should only be 5000 + 1000 = 6000, NOT 506000
|
// 5000 + 500000 + 1000 = 506000 (raw total, may exceed window)
|
||||||
expect(result).toBe(6000);
|
expect(result).toBe(506000);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -162,19 +162,21 @@ describe('Usage Listener', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle zero context window gracefully', async () => {
|
it('should handle zero context window gracefully (falls back to 200k default)', async () => {
|
||||||
setupListener();
|
setupListener();
|
||||||
const handler = eventHandlers.get('usage');
|
const handler = eventHandlers.get('usage');
|
||||||
const usageStats = createMockUsageStats({ contextWindow: 0 });
|
const usageStats = createMockUsageStats({ contextWindow: 0 });
|
||||||
|
|
||||||
handler?.('group-chat-test-chat-123-participant-TestAgent-abc123', usageStats);
|
handler?.('group-chat-test-chat-123-participant-TestAgent-abc123', usageStats);
|
||||||
|
|
||||||
|
// With contextWindow 0, falls back to 200k default
|
||||||
|
// 1800 / 200000 = 0.9% -> rounds to 1%
|
||||||
await vi.waitFor(() => {
|
await vi.waitFor(() => {
|
||||||
expect(mockDeps.groupChatStorage.updateParticipant).toHaveBeenCalledWith(
|
expect(mockDeps.groupChatStorage.updateParticipant).toHaveBeenCalledWith(
|
||||||
'test-chat-123',
|
'test-chat-123',
|
||||||
'TestAgent',
|
'TestAgent',
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
contextUsage: 0,
|
contextUsage: 1,
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -470,9 +470,9 @@ describe('HistoryDetailModal', () => {
|
|||||||
/>
|
/>
|
||||||
);
|
);
|
||||||
|
|
||||||
// Context = (inputTokens + cacheCreationInputTokens) / contextWindow (cacheRead excluded)
|
// Context = (inputTokens + cacheReadInputTokens + cacheCreationInputTokens) / contextWindow
|
||||||
// (5000 + 5000) / 100000 = 10%
|
// (5000 + 2000 + 5000) / 100000 = 12%
|
||||||
expect(screen.getByText('10%')).toBeInTheDocument();
|
expect(screen.getByText('12%')).toBeInTheDocument();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should display token counts', () => {
|
it('should display token counts', () => {
|
||||||
|
|||||||
@@ -1955,8 +1955,8 @@ describe('MainPanel', () => {
|
|||||||
<MainPanel {...defaultProps} activeSession={session} getContextColor={getContextColor} />
|
<MainPanel {...defaultProps} activeSession={session} getContextColor={getContextColor} />
|
||||||
);
|
);
|
||||||
|
|
||||||
// Context usage should be 50000 / 200000 * 100 = 25% (cacheRead excluded - cumulative)
|
// Context usage: (50000 + 25000 + 0) / 200000 * 100 = 38% (input + cacheRead + cacheCreation)
|
||||||
expect(getContextColor).toHaveBeenCalledWith(25, theme);
|
expect(getContextColor).toHaveBeenCalledWith(38, theme);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -2373,9 +2373,10 @@ describe('MainPanel', () => {
|
|||||||
expect(screen.queryByText('Context Window')).not.toBeInTheDocument();
|
expect(screen.queryByText('Context Window')).not.toBeInTheDocument();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should cap context usage at 100%', () => {
|
it('should use preserved session.contextUsage when accumulated values exceed window', () => {
|
||||||
const getContextColor = vi.fn().mockReturnValue('#ef4444');
|
const getContextColor = vi.fn().mockReturnValue('#22c55e');
|
||||||
const session = createSession({
|
const session = createSession({
|
||||||
|
contextUsage: 45, // Preserved valid percentage from last non-accumulated update
|
||||||
aiTabs: [
|
aiTabs: [
|
||||||
{
|
{
|
||||||
id: 'tab-1',
|
id: 'tab-1',
|
||||||
@@ -2386,8 +2387,8 @@ describe('MainPanel', () => {
|
|||||||
usageStats: {
|
usageStats: {
|
||||||
inputTokens: 150000,
|
inputTokens: 150000,
|
||||||
outputTokens: 100000,
|
outputTokens: 100000,
|
||||||
cacheReadInputTokens: 100000, // Excluded from calculation (cumulative)
|
cacheReadInputTokens: 100000, // Accumulated from multi-tool turn
|
||||||
cacheCreationInputTokens: 100000, // Included in calculation
|
cacheCreationInputTokens: 100000, // Accumulated from multi-tool turn
|
||||||
totalCostUsd: 0.05,
|
totalCostUsd: 0.05,
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
},
|
},
|
||||||
@@ -2400,8 +2401,9 @@ describe('MainPanel', () => {
|
|||||||
<MainPanel {...defaultProps} activeSession={session} getContextColor={getContextColor} />
|
<MainPanel {...defaultProps} activeSession={session} getContextColor={getContextColor} />
|
||||||
);
|
);
|
||||||
|
|
||||||
// Context usage: (150000 + 100000) / 200000 = 125% -> capped at 100%
|
// raw = 150000 + 100000 + 100000 = 350000 > 200000 (accumulated)
|
||||||
expect(getContextColor).toHaveBeenCalledWith(100, theme);
|
// Falls back to session.contextUsage = 45%
|
||||||
|
expect(getContextColor).toHaveBeenCalledWith(45, theme);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -650,8 +650,8 @@ describe('calculateTotalTokens', () => {
|
|||||||
|
|
||||||
const total = calculateTotalTokens(contexts);
|
const total = calculateTotalTokens(contexts);
|
||||||
|
|
||||||
// input + cacheCreation for each context (cacheRead excluded - cumulative)
|
// input + cacheRead + cacheCreation for each context
|
||||||
expect(total).toBe(450); // (100+25) + (300+25)
|
expect(total).toBe(575); // (100+50+25) + (300+75+25)
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -694,8 +694,8 @@ describe('getContextSummary', () => {
|
|||||||
|
|
||||||
expect(summary.totalSources).toBe(2);
|
expect(summary.totalSources).toBe(2);
|
||||||
expect(summary.totalLogs).toBe(5);
|
expect(summary.totalLogs).toBe(5);
|
||||||
// (100+25) + (200+25) = 350 (cacheRead excluded - cumulative)
|
// (100+50+25) + (200+75+25) = 475 (input + cacheRead + cacheCreation)
|
||||||
expect(summary.estimatedTokens).toBe(350);
|
expect(summary.estimatedTokens).toBe(475);
|
||||||
expect(summary.byAgent['claude-code']).toBe(1);
|
expect(summary.byAgent['claude-code']).toBe(1);
|
||||||
expect(summary.byAgent['opencode']).toBe(1);
|
expect(summary.byAgent['opencode']).toBe(1);
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -28,30 +28,30 @@ describe('estimateContextUsage', () => {
|
|||||||
expect(result).toBe(10);
|
expect(result).toBe(10);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should exclude cacheReadInputTokens from calculation (cumulative, not per-request)', () => {
|
it('should include cacheReadInputTokens in calculation (part of total input context)', () => {
|
||||||
const stats = createStats({
|
const stats = createStats({
|
||||||
inputTokens: 1000,
|
inputTokens: 1000,
|
||||||
outputTokens: 500,
|
outputTokens: 500,
|
||||||
cacheReadInputTokens: 50000, // Should be ignored
|
cacheReadInputTokens: 50000,
|
||||||
cacheCreationInputTokens: 5000,
|
cacheCreationInputTokens: 5000,
|
||||||
contextWindow: 100000,
|
contextWindow: 100000,
|
||||||
});
|
});
|
||||||
const result = estimateContextUsage(stats, 'claude-code');
|
const result = estimateContextUsage(stats, 'claude-code');
|
||||||
// (1000 + 5000) / 100000 = 6% (cacheRead excluded)
|
// (1000 + 50000 + 5000) / 100000 = 56%
|
||||||
expect(result).toBe(6);
|
expect(result).toBe(56);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should cap at 100%', () => {
|
it('should return null when accumulated tokens exceed context window', () => {
|
||||||
const stats = createStats({
|
const stats = createStats({
|
||||||
inputTokens: 50000,
|
inputTokens: 50000,
|
||||||
outputTokens: 50000,
|
outputTokens: 50000,
|
||||||
cacheReadInputTokens: 150000, // Ignored
|
cacheReadInputTokens: 150000,
|
||||||
cacheCreationInputTokens: 200000,
|
cacheCreationInputTokens: 200000,
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
});
|
});
|
||||||
const result = estimateContextUsage(stats, 'claude-code');
|
const result = estimateContextUsage(stats, 'claude-code');
|
||||||
// (50000 + 200000) / 200000 = 125% -> capped at 100%
|
// (50000 + 150000 + 200000) = 400000 > 200000 -> null (accumulated values)
|
||||||
expect(result).toBe(100);
|
expect(result).toBeNull();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should round to nearest integer', () => {
|
it('should round to nearest integer', () => {
|
||||||
@@ -139,19 +139,20 @@ describe('estimateContextUsage', () => {
|
|||||||
expect(result).toBe(10);
|
expect(result).toBe(10);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should ignore large cache read tokens (they are cumulative, not per-request)', () => {
|
it('should return null when accumulated cacheRead tokens cause total to exceed context window', () => {
|
||||||
// Claude Code reports cacheReadInputTokens as cumulative session totals.
|
// During multi-tool turns, Claude Code accumulates token values across
|
||||||
// They can exceed the context window, so we exclude them from calculation.
|
// internal API calls. When accumulated total exceeds context window,
|
||||||
|
// return null to signal callers should preserve previous valid percentage.
|
||||||
const stats = createStats({
|
const stats = createStats({
|
||||||
inputTokens: 500, // small new turn input
|
inputTokens: 500,
|
||||||
outputTokens: 1000, // small response
|
outputTokens: 1000,
|
||||||
cacheReadInputTokens: 758000, // cumulative across session - should be IGNORED
|
cacheReadInputTokens: 758000, // accumulated across multi-tool turn
|
||||||
cacheCreationInputTokens: 50000, // new cache this turn
|
cacheCreationInputTokens: 50000,
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
});
|
});
|
||||||
const result = estimateContextUsage(stats, 'claude-code');
|
const result = estimateContextUsage(stats, 'claude-code');
|
||||||
// (500 + 50000) / 200000 = 25% (cacheRead excluded)
|
// (500 + 758000 + 50000) = 808500 > 200000 -> null (accumulated values)
|
||||||
expect(result).toBe(25);
|
expect(result).toBeNull();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -172,17 +173,17 @@ describe('estimateContextUsage', () => {
|
|||||||
expect(result).toBe(5);
|
expect(result).toBe(5);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle very large token counts', () => {
|
it('should return null for very large accumulated token counts', () => {
|
||||||
const stats = createStats({
|
const stats = createStats({
|
||||||
inputTokens: 250000,
|
inputTokens: 250000,
|
||||||
outputTokens: 500000,
|
outputTokens: 500000,
|
||||||
cacheReadInputTokens: 500000, // Ignored
|
cacheReadInputTokens: 500000,
|
||||||
cacheCreationInputTokens: 250000,
|
cacheCreationInputTokens: 250000,
|
||||||
contextWindow: 0,
|
contextWindow: 0,
|
||||||
});
|
});
|
||||||
const result = estimateContextUsage(stats, 'claude-code');
|
const result = estimateContextUsage(stats, 'claude-code');
|
||||||
// (250000 + 250000) / 200000 = 250% -> capped at 100%
|
// (250000 + 500000 + 250000) = 1000000 > 200000 -> null (accumulated values)
|
||||||
expect(result).toBe(100);
|
expect(result).toBeNull();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle very small percentages', () => {
|
it('should handle very small percentages', () => {
|
||||||
@@ -213,34 +214,33 @@ describe('calculateContextTokens', () => {
|
|||||||
...overrides,
|
...overrides,
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('Claude agents (excludes output and cacheRead tokens)', () => {
|
describe('Claude agents (input + cacheRead + cacheCreation)', () => {
|
||||||
it('should exclude output and cacheRead tokens for claude-code', () => {
|
it('should include input, cacheRead, and cacheCreation tokens for claude-code', () => {
|
||||||
const stats = createStats();
|
const stats = createStats();
|
||||||
const result = calculateContextTokens(stats, 'claude-code');
|
const result = calculateContextTokens(stats, 'claude-code');
|
||||||
// 10000 + 1000 = 11000 (no output, no cacheRead)
|
// 10000 + 2000 + 1000 = 13000 (excludes output only)
|
||||||
// cacheRead is excluded because Claude Code reports it as cumulative
|
expect(result).toBe(13000);
|
||||||
expect(result).toBe(11000);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should exclude output and cacheRead tokens for claude', () => {
|
it('should include input, cacheRead, and cacheCreation tokens for claude', () => {
|
||||||
const stats = createStats();
|
const stats = createStats();
|
||||||
const result = calculateContextTokens(stats, 'claude');
|
const result = calculateContextTokens(stats, 'claude');
|
||||||
expect(result).toBe(11000);
|
expect(result).toBe(13000);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should exclude output and cacheRead tokens when agent is undefined', () => {
|
it('should include input, cacheRead, and cacheCreation tokens when agent is undefined', () => {
|
||||||
const stats = createStats();
|
const stats = createStats();
|
||||||
const result = calculateContextTokens(stats);
|
const result = calculateContextTokens(stats);
|
||||||
// Defaults to Claude behavior
|
// Defaults to Claude behavior
|
||||||
expect(result).toBe(11000);
|
expect(result).toBe(13000);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('OpenAI agents (includes output tokens)', () => {
|
describe('OpenAI agents (includes output tokens)', () => {
|
||||||
it('should include output tokens for codex', () => {
|
it('should include input, output, and cacheCreation tokens for codex', () => {
|
||||||
const stats = createStats();
|
const stats = createStats();
|
||||||
const result = calculateContextTokens(stats, 'codex');
|
const result = calculateContextTokens(stats, 'codex');
|
||||||
// 10000 + 5000 + 1000 = 16000 (includes output, excludes cacheRead)
|
// 10000 + 5000 + 1000 = 16000 (input + output + cacheCreation, excludes cacheRead)
|
||||||
expect(result).toBe(16000);
|
expect(result).toBe(16000);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -268,18 +268,19 @@ describe('calculateContextTokens', () => {
|
|||||||
expect(result).toBe(10000);
|
expect(result).toBe(10000);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should ignore large cacheRead values (cumulative session data)', () => {
|
it('should include cacheRead in raw calculation (callers detect accumulated values)', () => {
|
||||||
// This tests the real bug scenario: Claude Code reports cumulative cacheRead
|
// calculateContextTokens returns the raw total including cacheRead.
|
||||||
// that exceeds context window, which would cause 100%+ display
|
// Callers (estimateContextUsage) detect when total > contextWindow
|
||||||
|
// and return null to signal accumulated values from multi-tool turns.
|
||||||
const stats = createStats({
|
const stats = createStats({
|
||||||
inputTokens: 50000,
|
inputTokens: 50000,
|
||||||
outputTokens: 9000,
|
outputTokens: 9000,
|
||||||
cacheReadInputTokens: 758000, // Cumulative - should be IGNORED
|
cacheReadInputTokens: 758000,
|
||||||
cacheCreationInputTokens: 75000,
|
cacheCreationInputTokens: 75000,
|
||||||
});
|
});
|
||||||
const result = calculateContextTokens(stats, 'claude-code');
|
const result = calculateContextTokens(stats, 'claude-code');
|
||||||
// 50000 + 75000 = 125000 (cacheRead excluded)
|
// 50000 + 758000 + 75000 = 883000 (raw total, callers check against window)
|
||||||
expect(result).toBe(125000);
|
expect(result).toBe(883000);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -60,12 +60,16 @@ const COMBINED_CONTEXT_AGENTS: Set<ToolType> = new Set(['codex']);
|
|||||||
/**
|
/**
|
||||||
* Calculate total context tokens based on agent-specific semantics.
|
* Calculate total context tokens based on agent-specific semantics.
|
||||||
*
|
*
|
||||||
* IMPORTANT: Claude Code reports CUMULATIVE session tokens, not per-request tokens.
|
* For a single Anthropic API call, the total input context is the sum of:
|
||||||
* The cacheReadInputTokens can exceed the context window because they accumulate
|
* inputTokens + cacheReadInputTokens + cacheCreationInputTokens
|
||||||
* across all turns in the conversation. For context pressure display, we should
|
* These three fields partition the input into uncached, cache-hit, and newly-cached segments.
|
||||||
* only count tokens that represent NEW context being added:
|
|
||||||
*
|
*
|
||||||
* Claude models: Context = input + cacheCreation (excludes cacheRead - already cached)
|
* CAVEAT: When Claude Code performs multi-tool turns (many internal API calls),
|
||||||
|
* the reported values may be accumulated across all internal calls within the turn.
|
||||||
|
* In that case the total can exceed the context window. Callers should check for
|
||||||
|
* this and skip the update (see estimateContextUsage).
|
||||||
|
*
|
||||||
|
* Claude models: Context = input + cacheRead + cacheCreation
|
||||||
* OpenAI models: Context = input + output (combined limit)
|
* OpenAI models: Context = input + output (combined limit)
|
||||||
*
|
*
|
||||||
* @param stats - The usage statistics containing token counts
|
* @param stats - The usage statistics containing token counts
|
||||||
@@ -79,34 +83,31 @@ export function calculateContextTokens(
|
|||||||
>,
|
>,
|
||||||
agentId?: ToolType
|
agentId?: ToolType
|
||||||
): number {
|
): number {
|
||||||
// For Claude: inputTokens = uncached new tokens, cacheCreationInputTokens = newly cached tokens
|
|
||||||
// cacheReadInputTokens are EXCLUDED because they represent already-cached context
|
|
||||||
// that Claude Code reports cumulatively across the session, not per-request.
|
|
||||||
// Including them would cause context % to exceed 100% impossibly.
|
|
||||||
const baseTokens = stats.inputTokens + (stats.cacheCreationInputTokens || 0);
|
|
||||||
|
|
||||||
// OpenAI models have combined input+output context limits
|
// OpenAI models have combined input+output context limits
|
||||||
if (agentId && COMBINED_CONTEXT_AGENTS.has(agentId)) {
|
if (agentId && COMBINED_CONTEXT_AGENTS.has(agentId)) {
|
||||||
return baseTokens + stats.outputTokens;
|
return stats.inputTokens + (stats.cacheCreationInputTokens || 0) + stats.outputTokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Claude models: output tokens don't consume context window
|
// Claude models: total input = uncached + cache-hit + newly-cached
|
||||||
return baseTokens;
|
// Output tokens don't consume the input context window
|
||||||
|
return (
|
||||||
|
stats.inputTokens + (stats.cacheReadInputTokens || 0) + (stats.cacheCreationInputTokens || 0)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Estimate context usage percentage when the agent doesn't provide it directly.
|
* Estimate context usage percentage when the agent doesn't provide it directly.
|
||||||
* Uses agent-specific default context window sizes for accurate estimation.
|
* Uses agent-specific default context window sizes for accurate estimation.
|
||||||
*
|
*
|
||||||
* IMPORTANT: Context calculation varies by agent:
|
* Context calculation varies by agent:
|
||||||
* - Claude models: inputTokens + cacheCreationInputTokens
|
* - Claude models: inputTokens + cacheReadInputTokens + cacheCreationInputTokens
|
||||||
* (cacheRead excluded - cumulative, output excluded - separate limit)
|
* - OpenAI models (Codex): inputTokens + outputTokens (combined limit)
|
||||||
* - OpenAI models (Codex): inputTokens + outputTokens
|
|
||||||
* (combined context window includes both input and output)
|
|
||||||
*
|
*
|
||||||
* Note: cacheReadInputTokens are NOT included because Claude Code reports them
|
* Returns null when the calculated total exceeds the context window, which indicates
|
||||||
* as cumulative session totals, not per-request values. Including them would
|
* accumulated values from multi-tool turns (many internal API calls within one turn).
|
||||||
* cause context percentage to exceed 100% impossibly.
|
* A single API call's total input can never exceed the context window, so values
|
||||||
|
* above it are definitely accumulated. Callers should preserve the previous valid
|
||||||
|
* percentage when this returns null.
|
||||||
*
|
*
|
||||||
* @param stats - The usage statistics containing token counts
|
* @param stats - The usage statistics containing token counts
|
||||||
* @param agentId - The agent identifier for agent-specific context window size
|
* @param agentId - The agent identifier for agent-specific context window size
|
||||||
@@ -126,19 +127,23 @@ export function estimateContextUsage(
|
|||||||
// Calculate total context using agent-specific semantics
|
// Calculate total context using agent-specific semantics
|
||||||
const totalContextTokens = calculateContextTokens(stats, agentId);
|
const totalContextTokens = calculateContextTokens(stats, agentId);
|
||||||
|
|
||||||
// If context window is provided and valid, use it
|
// Determine effective context window
|
||||||
if (stats.contextWindow && stats.contextWindow > 0) {
|
const effectiveContextWindow =
|
||||||
return Math.min(100, Math.round((totalContextTokens / stats.contextWindow) * 100));
|
stats.contextWindow && stats.contextWindow > 0
|
||||||
}
|
? stats.contextWindow
|
||||||
|
: agentId && agentId !== 'terminal'
|
||||||
|
? DEFAULT_CONTEXT_WINDOWS[agentId] || 0
|
||||||
|
: 0;
|
||||||
|
|
||||||
// If no agent specified or terminal, cannot estimate
|
if (!effectiveContextWindow || effectiveContextWindow <= 0) {
|
||||||
if (!agentId || agentId === 'terminal') {
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use agent-specific default context window
|
// If total exceeds context window, the values are accumulated across multiple
|
||||||
const defaultContextWindow = DEFAULT_CONTEXT_WINDOWS[agentId];
|
// internal API calls within a complex turn (tool use chains). A single API call's
|
||||||
if (!defaultContextWindow || defaultContextWindow <= 0) {
|
// total input cannot exceed the context window. Return null to signal callers
|
||||||
|
// should keep the previous valid percentage.
|
||||||
|
if (totalContextTokens > effectiveContextWindow) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -146,7 +151,7 @@ export function estimateContextUsage(
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Math.min(100, Math.round((totalContextTokens / defaultContextWindow) * 100));
|
return Math.round((totalContextTokens / effectiveContextWindow) * 100);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -52,18 +52,29 @@ export function setupUsageListener(
|
|||||||
// Calculate context usage percentage using agent-specific logic
|
// Calculate context usage percentage using agent-specific logic
|
||||||
// Note: For group chat, we don't have agent type here, defaults to Claude behavior
|
// Note: For group chat, we don't have agent type here, defaults to Claude behavior
|
||||||
const totalContextTokens = usageAggregator.calculateContextTokens(usageStats);
|
const totalContextTokens = usageAggregator.calculateContextTokens(usageStats);
|
||||||
const contextUsage =
|
const effectiveWindow = usageStats.contextWindow > 0 ? usageStats.contextWindow : 200000;
|
||||||
usageStats.contextWindow > 0
|
|
||||||
? Math.round((totalContextTokens / usageStats.contextWindow) * 100)
|
// Skip update if values are accumulated (total > window) from multi-tool turns
|
||||||
: 0;
|
const contextUsage =
|
||||||
|
totalContextTokens <= effectiveWindow
|
||||||
|
? Math.round((totalContextTokens / effectiveWindow) * 100)
|
||||||
|
: -1; // -1 signals "skip update"
|
||||||
|
|
||||||
|
// Update participant with usage stats (skip context update if accumulated)
|
||||||
|
const updateData: {
|
||||||
|
contextUsage?: number;
|
||||||
|
tokenCount?: number;
|
||||||
|
totalCost: number;
|
||||||
|
} = {
|
||||||
|
totalCost: usageStats.totalCostUsd,
|
||||||
|
};
|
||||||
|
if (contextUsage >= 0) {
|
||||||
|
updateData.contextUsage = contextUsage;
|
||||||
|
updateData.tokenCount = totalContextTokens;
|
||||||
|
}
|
||||||
|
|
||||||
// Update participant with usage stats
|
|
||||||
groupChatStorage
|
groupChatStorage
|
||||||
.updateParticipant(groupChatId, participantName, {
|
.updateParticipant(groupChatId, participantName, updateData)
|
||||||
contextUsage,
|
|
||||||
tokenCount: totalContextTokens,
|
|
||||||
totalCost: usageStats.totalCostUsd,
|
|
||||||
})
|
|
||||||
.then((updatedChat) => {
|
.then((updatedChat) => {
|
||||||
// Emit participants changed so UI updates
|
// Emit participants changed so UI updates
|
||||||
// Note: updateParticipant returns the updated chat, avoiding extra DB read
|
// Note: updateParticipant returns the updated chat, avoiding extra DB read
|
||||||
@@ -87,17 +98,25 @@ export function setupUsageListener(
|
|||||||
// Calculate context usage percentage using agent-specific logic
|
// Calculate context usage percentage using agent-specific logic
|
||||||
// Note: Moderator is typically Claude, defaults to Claude behavior
|
// Note: Moderator is typically Claude, defaults to Claude behavior
|
||||||
const totalContextTokens = usageAggregator.calculateContextTokens(usageStats);
|
const totalContextTokens = usageAggregator.calculateContextTokens(usageStats);
|
||||||
const contextUsage =
|
const effectiveWindow = usageStats.contextWindow > 0 ? usageStats.contextWindow : 200000;
|
||||||
usageStats.contextWindow > 0
|
|
||||||
? Math.round((totalContextTokens / usageStats.contextWindow) * 100)
|
|
||||||
: 0;
|
|
||||||
|
|
||||||
// Emit moderator usage for the moderator card
|
// Skip context update if values are accumulated (total > window) from multi-tool turns.
|
||||||
groupChatEmitters.emitModeratorUsage?.(groupChatId, {
|
// When accumulated, emit with contextUsage/tokenCount as -1 so the handler
|
||||||
contextUsage,
|
// knows to preserve the previous values. Cost is always updated.
|
||||||
totalCost: usageStats.totalCostUsd,
|
if (totalContextTokens <= effectiveWindow) {
|
||||||
tokenCount: totalContextTokens,
|
const contextUsage = Math.round((totalContextTokens / effectiveWindow) * 100);
|
||||||
});
|
groupChatEmitters.emitModeratorUsage?.(groupChatId, {
|
||||||
|
contextUsage,
|
||||||
|
totalCost: usageStats.totalCostUsd,
|
||||||
|
tokenCount: totalContextTokens,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
groupChatEmitters.emitModeratorUsage?.(groupChatId, {
|
||||||
|
contextUsage: -1,
|
||||||
|
totalCost: usageStats.totalCostUsd,
|
||||||
|
tokenCount: -1,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
safeSend('process:usage', sessionId, usageStats);
|
safeSend('process:usage', sessionId, usageStats);
|
||||||
|
|||||||
@@ -2714,36 +2714,30 @@ function MaestroConsoleInner() {
|
|||||||
actualSessionId = sessionId;
|
actualSessionId = sessionId;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate context window usage percentage from CURRENT reported tokens.
|
// Calculate context window usage percentage.
|
||||||
// IMPORTANT: Claude Code reports cacheReadInputTokens as CUMULATIVE session totals,
|
// For Claude: context = inputTokens + cacheReadInputTokens + cacheCreationInputTokens
|
||||||
// not per-request values. Including them causes context % to exceed 100% impossibly.
|
// (these three fields partition the total input into uncached, cache-hit, newly-cached)
|
||||||
// For Claude: context = inputTokens + cacheCreationInputTokens (new content only)
|
|
||||||
// For Codex: context = inputTokens + outputTokens (combined limit)
|
// For Codex: context = inputTokens + outputTokens (combined limit)
|
||||||
|
//
|
||||||
|
// When Claude Code performs complex multi-tool turns, the reported values are
|
||||||
|
// accumulated across internal API calls and can exceed the context window.
|
||||||
|
// estimateContextUsage returns null in that case - we skip the update and
|
||||||
|
// keep the last valid measurement. This means the gauge may stay static
|
||||||
|
// during tool-heavy turns, but it's always accurate when it does update,
|
||||||
|
// keeping the compact warning reliable.
|
||||||
const sessionForUsage = sessionsRef.current.find((s) => s.id === actualSessionId);
|
const sessionForUsage = sessionsRef.current.find((s) => s.id === actualSessionId);
|
||||||
const agentToolType = sessionForUsage?.toolType;
|
const agentToolType = sessionForUsage?.toolType;
|
||||||
const isClaudeUsage = agentToolType === 'claude-code' || agentToolType === 'claude';
|
const contextPercentage = estimateContextUsage(usageStats, agentToolType);
|
||||||
const currentContextTokens = isClaudeUsage
|
|
||||||
? usageStats.inputTokens + usageStats.cacheCreationInputTokens
|
|
||||||
: usageStats.inputTokens + usageStats.outputTokens;
|
|
||||||
|
|
||||||
// Calculate context percentage, falling back to agent-specific defaults if contextWindow not provided
|
|
||||||
let contextPercentage: number;
|
|
||||||
if (usageStats.contextWindow > 0) {
|
|
||||||
contextPercentage = Math.min(
|
|
||||||
Math.round((currentContextTokens / usageStats.contextWindow) * 100),
|
|
||||||
100
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
// Use fallback estimation with agent-specific default context window
|
|
||||||
const estimated = estimateContextUsage(usageStats, agentToolType);
|
|
||||||
contextPercentage = estimated ?? 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Batch the usage stats update, context percentage, and cycle tokens
|
// Batch the usage stats update, context percentage, and cycle tokens
|
||||||
// The batched updater handles the accumulation logic internally
|
// The batched updater handles the accumulation logic internally
|
||||||
batchedUpdater.updateUsage(actualSessionId, tabId, usageStats);
|
batchedUpdater.updateUsage(actualSessionId, tabId, usageStats);
|
||||||
batchedUpdater.updateUsage(actualSessionId, null, usageStats); // Session-level accumulation
|
batchedUpdater.updateUsage(actualSessionId, null, usageStats); // Session-level accumulation
|
||||||
batchedUpdater.updateContextUsage(actualSessionId, contextPercentage);
|
if (contextPercentage !== null) {
|
||||||
|
// Valid measurement from a non-accumulated turn - use it directly
|
||||||
|
batchedUpdater.updateContextUsage(actualSessionId, contextPercentage);
|
||||||
|
}
|
||||||
|
// When null (accumulated values), keep the last valid percentage unchanged
|
||||||
batchedUpdater.updateCycleTokens(actualSessionId, usageStats.outputTokens);
|
batchedUpdater.updateCycleTokens(actualSessionId, usageStats.outputTokens);
|
||||||
|
|
||||||
// Update persistent global stats (not batched - this is a separate concern)
|
// Update persistent global stats (not batched - this is a separate concern)
|
||||||
@@ -3272,7 +3266,17 @@ function MaestroConsoleInner() {
|
|||||||
|
|
||||||
const unsubModeratorUsage = window.maestro.groupChat.onModeratorUsage?.((id, usage) => {
|
const unsubModeratorUsage = window.maestro.groupChat.onModeratorUsage?.((id, usage) => {
|
||||||
if (id === activeGroupChatId) {
|
if (id === activeGroupChatId) {
|
||||||
setModeratorUsage(usage);
|
// When contextUsage is -1, values are accumulated from multi-tool turns.
|
||||||
|
// Preserve previous context/token values, only update cost.
|
||||||
|
if (usage.contextUsage === -1) {
|
||||||
|
setModeratorUsage((prev) =>
|
||||||
|
prev
|
||||||
|
? { ...prev, totalCost: usage.totalCost }
|
||||||
|
: { contextUsage: 0, totalCost: usage.totalCost, tokenCount: 0 }
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
setModeratorUsage(usage);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -544,11 +544,13 @@ export const MainPanel = React.memo(
|
|||||||
}, [configuredContextWindow, activeTab?.usageStats?.contextWindow]);
|
}, [configuredContextWindow, activeTab?.usageStats?.contextWindow]);
|
||||||
|
|
||||||
// Compute context tokens using agent-specific calculation
|
// Compute context tokens using agent-specific calculation
|
||||||
// Claude: input + cacheCreation (excludes cacheRead which is cumulative)
|
// Claude: input + cacheRead + cacheCreation (total input for the request)
|
||||||
// Codex: input + output (combined limit)
|
// Codex: input + output (combined limit)
|
||||||
|
// When values are accumulated from multi-tool turns, total may exceed contextWindow.
|
||||||
|
// In that case, derive tokens from session.contextUsage (preserved last valid percentage).
|
||||||
const activeTabContextTokens = useMemo(() => {
|
const activeTabContextTokens = useMemo(() => {
|
||||||
if (!activeTab?.usageStats) return 0;
|
if (!activeTab?.usageStats) return 0;
|
||||||
return calculateContextTokens(
|
const raw = calculateContextTokens(
|
||||||
{
|
{
|
||||||
inputTokens: activeTab.usageStats.inputTokens,
|
inputTokens: activeTab.usageStats.inputTokens,
|
||||||
outputTokens: activeTab.usageStats.outputTokens,
|
outputTokens: activeTab.usageStats.outputTokens,
|
||||||
@@ -557,7 +559,20 @@ export const MainPanel = React.memo(
|
|||||||
},
|
},
|
||||||
activeSession?.toolType
|
activeSession?.toolType
|
||||||
);
|
);
|
||||||
}, [activeTab?.usageStats, activeSession?.toolType]);
|
// Accumulated from multi-tool turns: derive from session's preserved percentage.
|
||||||
|
// App.tsx skips updating session.contextUsage when accumulated, so it holds
|
||||||
|
// the last valid percentage from estimateContextUsage.
|
||||||
|
if (activeTabContextWindow > 0 && raw > activeTabContextWindow) {
|
||||||
|
const preservedPercentage = activeSession?.contextUsage ?? 0;
|
||||||
|
return Math.round((preservedPercentage / 100) * activeTabContextWindow);
|
||||||
|
}
|
||||||
|
return raw;
|
||||||
|
}, [
|
||||||
|
activeTab?.usageStats,
|
||||||
|
activeSession?.toolType,
|
||||||
|
activeTabContextWindow,
|
||||||
|
activeSession?.contextUsage,
|
||||||
|
]);
|
||||||
|
|
||||||
// Compute context usage percentage from context tokens and window size
|
// Compute context usage percentage from context tokens and window size
|
||||||
const activeTabContextUsage = useMemo(() => {
|
const activeTabContextUsage = useMemo(() => {
|
||||||
|
|||||||
@@ -31,12 +31,16 @@ const COMBINED_CONTEXT_AGENTS: Set<ToolType> = new Set(['codex']);
|
|||||||
/**
|
/**
|
||||||
* Calculate total context tokens based on agent-specific semantics.
|
* Calculate total context tokens based on agent-specific semantics.
|
||||||
*
|
*
|
||||||
* IMPORTANT: Claude Code reports CUMULATIVE session tokens, not per-request tokens.
|
* For a single Anthropic API call, the total input context is the sum of:
|
||||||
* The cacheReadInputTokens can exceed the context window because they accumulate
|
* inputTokens + cacheReadInputTokens + cacheCreationInputTokens
|
||||||
* across all turns in the conversation. For context pressure display, we should
|
* These three fields partition the input into uncached, cache-hit, and newly-cached segments.
|
||||||
* only count tokens that represent NEW context being added:
|
|
||||||
*
|
*
|
||||||
* Claude models: Context = input + cacheCreation (excludes cacheRead - already cached)
|
* CAVEAT: When Claude Code performs multi-tool turns (many internal API calls),
|
||||||
|
* the reported values may be accumulated across all internal calls within the turn.
|
||||||
|
* In that case the total can exceed the context window. Callers should check for
|
||||||
|
* this and skip the update (see estimateContextUsage).
|
||||||
|
*
|
||||||
|
* Claude models: Context = input + cacheRead + cacheCreation
|
||||||
* OpenAI models: Context = input + output (combined limit)
|
* OpenAI models: Context = input + output (combined limit)
|
||||||
*
|
*
|
||||||
* @param stats - The usage statistics containing token counts
|
* @param stats - The usage statistics containing token counts
|
||||||
@@ -50,34 +54,31 @@ export function calculateContextTokens(
|
|||||||
>,
|
>,
|
||||||
agentId?: ToolType
|
agentId?: ToolType
|
||||||
): number {
|
): number {
|
||||||
// For Claude: inputTokens = uncached new tokens, cacheCreationInputTokens = newly cached tokens
|
|
||||||
// cacheReadInputTokens are EXCLUDED because they represent already-cached context
|
|
||||||
// that Claude Code reports cumulatively across the session, not per-request.
|
|
||||||
// Including them would cause context % to exceed 100% impossibly.
|
|
||||||
const baseTokens = stats.inputTokens + (stats.cacheCreationInputTokens || 0);
|
|
||||||
|
|
||||||
// OpenAI models have combined input+output context limits
|
// OpenAI models have combined input+output context limits
|
||||||
if (agentId && COMBINED_CONTEXT_AGENTS.has(agentId)) {
|
if (agentId && COMBINED_CONTEXT_AGENTS.has(agentId)) {
|
||||||
return baseTokens + stats.outputTokens;
|
return stats.inputTokens + (stats.cacheCreationInputTokens || 0) + stats.outputTokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Claude models: output tokens don't consume context window
|
// Claude models: total input = uncached + cache-hit + newly-cached
|
||||||
return baseTokens;
|
// Output tokens don't consume the input context window
|
||||||
|
return (
|
||||||
|
stats.inputTokens + (stats.cacheReadInputTokens || 0) + (stats.cacheCreationInputTokens || 0)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Estimate context usage percentage when the agent doesn't provide it directly.
|
* Estimate context usage percentage when the agent doesn't provide it directly.
|
||||||
* Uses agent-specific default context window sizes for accurate estimation.
|
* Uses agent-specific default context window sizes for accurate estimation.
|
||||||
*
|
*
|
||||||
* IMPORTANT: Context calculation varies by agent:
|
* Context calculation varies by agent:
|
||||||
* - Claude models: inputTokens + cacheCreationInputTokens
|
* - Claude models: inputTokens + cacheReadInputTokens + cacheCreationInputTokens
|
||||||
* (cacheRead excluded - cumulative, output excluded - separate limit)
|
* - OpenAI models (Codex): inputTokens + outputTokens (combined limit)
|
||||||
* - OpenAI models (Codex): inputTokens + outputTokens
|
|
||||||
* (combined context window includes both input and output)
|
|
||||||
*
|
*
|
||||||
* Note: cacheReadInputTokens are NOT included because Claude Code reports them
|
* Returns null when the calculated total exceeds the context window, which indicates
|
||||||
* as cumulative session totals, not per-request values. Including them would
|
* accumulated values from multi-tool turns (many internal API calls within one turn).
|
||||||
* cause context percentage to exceed 100% impossibly.
|
* A single API call's total input can never exceed the context window, so values
|
||||||
|
* above it are definitely accumulated. Callers should preserve the previous valid
|
||||||
|
* percentage when this returns null.
|
||||||
*
|
*
|
||||||
* @param stats - The usage statistics containing token counts
|
* @param stats - The usage statistics containing token counts
|
||||||
* @param agentId - The agent identifier for agent-specific context window size
|
* @param agentId - The agent identifier for agent-specific context window size
|
||||||
@@ -97,19 +98,23 @@ export function estimateContextUsage(
|
|||||||
// Calculate total context using agent-specific semantics
|
// Calculate total context using agent-specific semantics
|
||||||
const totalContextTokens = calculateContextTokens(stats, agentId);
|
const totalContextTokens = calculateContextTokens(stats, agentId);
|
||||||
|
|
||||||
// If context window is provided and valid, use it
|
// Determine effective context window
|
||||||
if (stats.contextWindow && stats.contextWindow > 0) {
|
const effectiveContextWindow =
|
||||||
return Math.min(100, Math.round((totalContextTokens / stats.contextWindow) * 100));
|
stats.contextWindow && stats.contextWindow > 0
|
||||||
}
|
? stats.contextWindow
|
||||||
|
: agentId && agentId !== 'terminal'
|
||||||
|
? DEFAULT_CONTEXT_WINDOWS[agentId] || 0
|
||||||
|
: 0;
|
||||||
|
|
||||||
// If no agent specified or terminal, cannot estimate
|
if (!effectiveContextWindow || effectiveContextWindow <= 0) {
|
||||||
if (!agentId || agentId === 'terminal') {
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use agent-specific default context window
|
// If total exceeds context window, the values are accumulated across multiple
|
||||||
const defaultContextWindow = DEFAULT_CONTEXT_WINDOWS[agentId];
|
// internal API calls within a complex turn (tool use chains). A single API call's
|
||||||
if (!defaultContextWindow || defaultContextWindow <= 0) {
|
// total input cannot exceed the context window. Return null to signal callers
|
||||||
|
// should keep the previous valid percentage.
|
||||||
|
if (totalContextTokens > effectiveContextWindow) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -117,5 +122,5 @@ export function estimateContextUsage(
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Math.min(100, Math.round((totalContextTokens / defaultContextWindow) * 100));
|
return Math.round((totalContextTokens / effectiveContextWindow) * 100);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user