diff --git a/package.json b/package.json index 8b05f032..7225f5da 100644 --- a/package.json +++ b/package.json @@ -42,7 +42,8 @@ "test:e2e:ui": "npm run build:main && npm run build:renderer && playwright test --ui", "test:e2e:headed": "npm run build:main && npm run build:renderer && playwright test --headed", "test:integration": "vitest run --config vitest.integration.config.ts", - "test:integration:watch": "vitest --config vitest.integration.config.ts" + "test:integration:watch": "vitest --config vitest.integration.config.ts", + "test:performance": "vitest run --config vitest.performance.config.mts" }, "build": { "appId": "com.maestro.app", diff --git a/src/__tests__/main/agent-capabilities.test.ts b/src/__tests__/main/agent-capabilities.test.ts index bf2a33a3..75c1a532 100644 --- a/src/__tests__/main/agent-capabilities.test.ts +++ b/src/__tests__/main/agent-capabilities.test.ts @@ -257,6 +257,7 @@ describe('agent-capabilities', () => { 'supportsResultMessages', 'supportsModelSelection', 'requiresPromptToStart', + 'supportsThinkingDisplay', ]; const defaultKeys = Object.keys(DEFAULT_CAPABILITIES); diff --git a/src/__tests__/main/parsers/claude-output-parser.test.ts b/src/__tests__/main/parsers/claude-output-parser.test.ts index 6b1b4ea6..bae01b05 100644 --- a/src/__tests__/main/parsers/claude-output-parser.test.ts +++ b/src/__tests__/main/parsers/claude-output-parser.test.ts @@ -295,6 +295,125 @@ describe('ClaudeOutputParser', () => { }); }); + describe('toolUseBlocks extraction', () => { + it('should extract tool_use blocks from assistant messages', () => { + const line = JSON.stringify({ + type: 'assistant', + session_id: 'sess-abc123', + message: { + role: 'assistant', + content: [ + { type: 'text', text: 'Let me read that file' }, + { type: 'tool_use', id: 'toolu_123', name: 'Read', input: { file: 'foo.ts' } }, + ], + }, + }); + + const event = parser.parseJsonLine(line); + expect(event).not.toBeNull(); + expect(event?.type).toBe('text'); + expect(event?.text).toBe('Let me read that file'); + expect(event?.toolUseBlocks).toBeDefined(); + expect(event?.toolUseBlocks).toHaveLength(1); + expect(event?.toolUseBlocks?.[0]).toEqual({ + name: 'Read', + id: 'toolu_123', + input: { file: 'foo.ts' }, + }); + }); + + it('should extract multiple tool_use blocks', () => { + const line = JSON.stringify({ + type: 'assistant', + message: { + content: [ + { type: 'text', text: 'I will read and edit files' }, + { type: 'tool_use', id: 'toolu_1', name: 'Read', input: { file: 'a.ts' } }, + { type: 'tool_use', id: 'toolu_2', name: 'Edit', input: { file: 'b.ts', changes: [] } }, + ], + }, + }); + + const event = parser.parseJsonLine(line); + expect(event?.toolUseBlocks).toHaveLength(2); + expect(event?.toolUseBlocks?.[0].name).toBe('Read'); + expect(event?.toolUseBlocks?.[1].name).toBe('Edit'); + }); + + it('should not include toolUseBlocks when there are no tool_use blocks', () => { + const line = JSON.stringify({ + type: 'assistant', + message: { + content: [{ type: 'text', text: 'Just text, no tools' }], + }, + }); + + const event = parser.parseJsonLine(line); + expect(event?.type).toBe('text'); + expect(event?.text).toBe('Just text, no tools'); + expect(event?.toolUseBlocks).toBeUndefined(); + }); + + it('should not include toolUseBlocks for string content', () => { + const line = JSON.stringify({ + type: 'assistant', + message: { + content: 'String content, not array', + }, + }); + + const event = parser.parseJsonLine(line); + expect(event?.type).toBe('text'); + expect(event?.text).toBe('String content, not array'); + expect(event?.toolUseBlocks).toBeUndefined(); + }); + + it('should handle tool_use blocks without id field', () => { + const line = JSON.stringify({ + type: 'assistant', + message: { + content: [{ type: 'tool_use', name: 'Bash', input: { command: 'ls' } }], + }, + }); + + const event = parser.parseJsonLine(line); + expect(event?.toolUseBlocks).toHaveLength(1); + expect(event?.toolUseBlocks?.[0].name).toBe('Bash'); + expect(event?.toolUseBlocks?.[0].id).toBeUndefined(); + expect(event?.toolUseBlocks?.[0].input).toEqual({ command: 'ls' }); + }); + + it('should skip tool_use blocks without name', () => { + const line = JSON.stringify({ + type: 'assistant', + message: { + content: [ + { type: 'tool_use', id: 'toolu_valid', name: 'Read', input: {} }, + { type: 'tool_use', id: 'toolu_invalid' }, // Missing name + ], + }, + }); + + const event = parser.parseJsonLine(line); + expect(event?.toolUseBlocks).toHaveLength(1); + expect(event?.toolUseBlocks?.[0].name).toBe('Read'); + }); + + it('should extract tool_use blocks even with no text content', () => { + const line = JSON.stringify({ + type: 'assistant', + message: { + content: [{ type: 'tool_use', id: 'toolu_1', name: 'Read', input: { file: 'x.ts' } }], + }, + }); + + const event = parser.parseJsonLine(line); + expect(event?.type).toBe('text'); + expect(event?.text).toBe(''); + expect(event?.toolUseBlocks).toHaveLength(1); + }); + }); + describe('edge cases', () => { it('should handle empty result string', () => { const event = parser.parseJsonLine( diff --git a/src/__tests__/performance/ThinkingStreamPerformance.test.tsx b/src/__tests__/performance/ThinkingStreamPerformance.test.tsx new file mode 100644 index 00000000..9a4fa27f --- /dev/null +++ b/src/__tests__/performance/ThinkingStreamPerformance.test.tsx @@ -0,0 +1,709 @@ +/** + * @file ThinkingStreamPerformance.test.tsx + * @description Performance tests for the Show Thinking feature with large streams + * + * Task 6.5 - Test performance with large thinking streams (10-50KB+ per response): + * - RAF throttling efficiency for rapid chunk arrivals + * - Memory usage during large stream accumulation + * - UI responsiveness with 10KB, 25KB, and 50KB+ thinking content + * - Chunk batching effectiveness + */ + +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { render, screen, waitFor, act } from '@testing-library/react'; +import React from 'react'; +import { LayerStackProvider } from '../../renderer/contexts/LayerStackContext'; +import type { Theme, LogEntry, Session, AITab } from '../../renderer/types'; + +// ============================================================================ +// Test Utilities +// ============================================================================ + +/** + * Generate a large thinking stream content of specified size + * Simulates Codex reasoning tokens which can be very verbose + */ +const generateThinkingContent = (sizeKb: number): string => { + const targetBytes = sizeKb * 1024; + const reasoningPatterns = [ + 'Let me analyze this step by step. ', + 'First, I need to understand the context. ', + 'Looking at the code structure, I can see that ', + 'The implementation requires considering several factors: ', + 'Based on my analysis, the approach should be ', + 'Examining the dependencies and their interactions... ', + 'This function handles the core logic for ', + 'The data flow follows this pattern: ', + 'Considering edge cases such as null values and errors... ', + 'The optimal solution would involve ', + ]; + + let content = ''; + let patternIndex = 0; + + while (content.length < targetBytes) { + content += reasoningPatterns[patternIndex % reasoningPatterns.length]; + patternIndex++; + } + + return content.slice(0, targetBytes); +}; + +/** + * Split content into chunks of varying sizes (simulating real streaming) + */ +const splitIntoChunks = (content: string, avgChunkSize: number): string[] => { + const chunks: string[] = []; + let position = 0; + + while (position < content.length) { + // Vary chunk size by ±50% to simulate real network conditions + const variation = 0.5 + Math.random(); + const chunkSize = Math.floor(avgChunkSize * variation); + chunks.push(content.slice(position, position + chunkSize)); + position += chunkSize; + } + + return chunks; +}; + +// Create mock theme +const createMockTheme = (): Theme => ({ + id: 'test-theme', + name: 'Test Theme', + mode: 'dark', + colors: { + bgMain: '#1a1a1a', + bgPanel: '#252525', + bgActivity: '#2d2d2d', + textMain: '#ffffff', + textDim: '#888888', + accent: '#0066ff', + accentText: '#4488ff', + accentForeground: '#ffffff', + border: '#333333', + highlight: '#0066ff33', + success: '#00aa00', + warning: '#ffaa00', + error: '#ff0000', + }, +}); + +// Mock the thinking chunk handler logic (extracted from App.tsx) +interface ThinkingChunkBuffer { + buffer: Map; + rafId: number | null; +} + +const createThinkingChunkHandler = ( + onUpdate: (sessionId: string, tabId: string, content: string) => void +) => { + const state: ThinkingChunkBuffer = { + buffer: new Map(), + rafId: null, + }; + + const handleChunk = (sessionId: string, tabId: string, content: string) => { + const bufferKey = `${sessionId}:${tabId}`; + const existingContent = state.buffer.get(bufferKey) || ''; + state.buffer.set(bufferKey, existingContent + content); + + if (state.rafId === null) { + state.rafId = requestAnimationFrame(() => { + const chunksToProcess = new Map(state.buffer); + state.buffer.clear(); + state.rafId = null; + + for (const [key, bufferedContent] of chunksToProcess) { + const [sid, tid] = key.split(':'); + onUpdate(sid, tid, bufferedContent); + } + }); + } + }; + + const cleanup = () => { + if (state.rafId !== null) { + cancelAnimationFrame(state.rafId); + state.rafId = null; + } + state.buffer.clear(); + }; + + return { handleChunk, cleanup, getBufferSize: () => state.buffer.size }; +}; + +// ============================================================================ +// Performance Test Component +// ============================================================================ + +interface ThinkingDisplayProps { + logs: LogEntry[]; + theme: Theme; +} + +const ThinkingDisplay: React.FC = ({ logs, theme }) => { + const thinkingLogs = logs.filter(l => l.source === 'thinking'); + + return ( +
+ {thinkingLogs.map(log => ( +
+
+ + thinking + +
+
+ {log.text} +
+
+ ))} +
+ ); +}; + +// ============================================================================ +// Tests +// ============================================================================ + +describe('ThinkingStreamPerformance', () => { + let mockRaf: (callback: FrameRequestCallback) => number; + let mockCancelRaf: (id: number) => void; + let rafCallbacks: Map; + let rafIdCounter: number; + + beforeEach(() => { + vi.useFakeTimers(); + + // Set up RAF mock with control over when frames execute + rafCallbacks = new Map(); + rafIdCounter = 0; + + mockRaf = vi.fn((callback: FrameRequestCallback) => { + const id = ++rafIdCounter; + rafCallbacks.set(id, callback); + return id; + }); + + mockCancelRaf = vi.fn((id: number) => { + rafCallbacks.delete(id); + }); + + // Replace global functions + global.requestAnimationFrame = mockRaf; + global.cancelAnimationFrame = mockCancelRaf; + }); + + afterEach(() => { + vi.useRealTimers(); + rafCallbacks.clear(); + }); + + // Helper to flush all pending RAF callbacks + const flushRafCallbacks = () => { + const callbacks = Array.from(rafCallbacks.values()); + rafCallbacks.clear(); + const timestamp = performance.now(); + callbacks.forEach(cb => cb(timestamp)); + }; + + describe('RAF Throttling Efficiency', () => { + it('should batch multiple rapid chunk arrivals into single RAF callback', () => { + const updates: Array<{ sessionId: string; tabId: string; content: string }> = []; + const { handleChunk, cleanup } = createThinkingChunkHandler( + (sessionId, tabId, content) => updates.push({ sessionId, tabId, content }) + ); + + // Simulate 100 rapid chunks arriving within the same frame + const chunks = splitIntoChunks(generateThinkingContent(10), 100); + + for (const chunk of chunks) { + handleChunk('session-1', 'tab-1', chunk); + } + + // Before RAF fires, no updates should have happened + expect(updates.length).toBe(0); + expect(mockRaf).toHaveBeenCalledTimes(1); // Only one RAF scheduled + + // Execute the RAF callback + flushRafCallbacks(); + + // All chunks should be batched into a single update + expect(updates.length).toBe(1); + expect(updates[0].content.length).toBeGreaterThan(chunks.length * 50); // Batched content + + cleanup(); + }); + + it('should handle chunks for multiple sessions simultaneously', () => { + const updates: Array<{ sessionId: string; tabId: string; content: string }> = []; + const { handleChunk, cleanup } = createThinkingChunkHandler( + (sessionId, tabId, content) => updates.push({ sessionId, tabId, content }) + ); + + // Send chunks to 3 different sessions + handleChunk('session-1', 'tab-1', 'Content for session 1'); + handleChunk('session-2', 'tab-1', 'Content for session 2'); + handleChunk('session-3', 'tab-1', 'Content for session 3'); + handleChunk('session-1', 'tab-1', ' - more content'); // Additional for session 1 + + expect(mockRaf).toHaveBeenCalledTimes(1); + + flushRafCallbacks(); + + // Should have 3 updates (one per session) + expect(updates.length).toBe(3); + + const session1Update = updates.find(u => u.sessionId === 'session-1'); + expect(session1Update?.content).toBe('Content for session 1 - more content'); + + cleanup(); + }); + + it('should not schedule new RAF while one is pending', () => { + const updates: Array<{ sessionId: string; tabId: string; content: string }> = []; + const { handleChunk, cleanup } = createThinkingChunkHandler( + (sessionId, tabId, content) => updates.push({ sessionId, tabId, content }) + ); + + // Send many chunks + for (let i = 0; i < 1000; i++) { + handleChunk('session-1', 'tab-1', `chunk-${i} `); + } + + // Should only have one RAF scheduled despite 1000 chunks + expect(mockRaf).toHaveBeenCalledTimes(1); + + flushRafCallbacks(); + + expect(updates.length).toBe(1); + expect(updates[0].content).toContain('chunk-0'); + expect(updates[0].content).toContain('chunk-999'); + + cleanup(); + }); + }); + + describe('Large Stream Handling', () => { + it('should handle 10KB thinking stream efficiently', () => { + const updates: string[] = []; + const { handleChunk, cleanup } = createThinkingChunkHandler( + (_sid, _tid, content) => updates.push(content) + ); + + const content = generateThinkingContent(10); + const chunks = splitIntoChunks(content, 256); // Average 256 bytes per chunk + + const startTime = performance.now(); + + for (const chunk of chunks) { + handleChunk('session-1', 'tab-1', chunk); + } + + flushRafCallbacks(); + + const elapsed = performance.now() - startTime; + + // Performance assertion: should process 10KB in under 100ms + expect(elapsed).toBeLessThan(100); + expect(updates.length).toBe(1); + expect(updates[0].length).toBe(content.length); + + cleanup(); + }); + + it('should handle 25KB thinking stream efficiently', () => { + const updates: string[] = []; + const { handleChunk, cleanup } = createThinkingChunkHandler( + (_sid, _tid, content) => updates.push(content) + ); + + const content = generateThinkingContent(25); + const chunks = splitIntoChunks(content, 512); + + const startTime = performance.now(); + + for (const chunk of chunks) { + handleChunk('session-1', 'tab-1', chunk); + } + + flushRafCallbacks(); + + const elapsed = performance.now() - startTime; + + // Performance assertion: should process 25KB in under 150ms + expect(elapsed).toBeLessThan(150); + expect(updates.length).toBe(1); + expect(updates[0].length).toBe(content.length); + + cleanup(); + }); + + it('should handle 50KB thinking stream (Codex reasoning) efficiently', () => { + const updates: string[] = []; + const { handleChunk, cleanup } = createThinkingChunkHandler( + (_sid, _tid, content) => updates.push(content) + ); + + const content = generateThinkingContent(50); + const chunks = splitIntoChunks(content, 1024); + + const startTime = performance.now(); + + for (const chunk of chunks) { + handleChunk('session-1', 'tab-1', chunk); + } + + flushRafCallbacks(); + + const elapsed = performance.now() - startTime; + + // Performance assertion: should process 50KB in under 200ms + expect(elapsed).toBeLessThan(200); + expect(updates.length).toBe(1); + expect(updates[0].length).toBe(content.length); + + cleanup(); + }); + + it('should handle 100KB+ extreme stream without hanging', () => { + const updates: string[] = []; + const { handleChunk, cleanup } = createThinkingChunkHandler( + (_sid, _tid, content) => updates.push(content) + ); + + const content = generateThinkingContent(100); + const chunks = splitIntoChunks(content, 2048); + + const startTime = performance.now(); + + for (const chunk of chunks) { + handleChunk('session-1', 'tab-1', chunk); + } + + flushRafCallbacks(); + + const elapsed = performance.now() - startTime; + + // Performance assertion: should process 100KB in under 500ms + expect(elapsed).toBeLessThan(500); + expect(updates.length).toBe(1); + expect(updates[0].length).toBe(content.length); + + cleanup(); + }); + }); + + describe('Memory Efficiency', () => { + it('should clear buffer after processing', () => { + const updates: string[] = []; + const { handleChunk, cleanup, getBufferSize } = createThinkingChunkHandler( + (_sid, _tid, content) => updates.push(content) + ); + + handleChunk('session-1', 'tab-1', 'test content'); + expect(getBufferSize()).toBe(1); + + flushRafCallbacks(); + + expect(getBufferSize()).toBe(0); + + cleanup(); + }); + + it('should cleanup properly on unmount', () => { + const { handleChunk, cleanup } = createThinkingChunkHandler( + () => {} + ); + + // Schedule some chunks + handleChunk('session-1', 'tab-1', 'test'); + + // Cleanup before RAF fires + cleanup(); + + // RAF should have been cancelled + expect(mockCancelRaf).toHaveBeenCalled(); + }); + + it('should not accumulate memory with repeated stream cycles', () => { + const updates: string[] = []; + const { handleChunk, cleanup, getBufferSize } = createThinkingChunkHandler( + (_sid, _tid, content) => updates.push(content) + ); + + // Simulate multiple complete stream cycles + for (let cycle = 0; cycle < 10; cycle++) { + const content = generateThinkingContent(5); + const chunks = splitIntoChunks(content, 512); + + for (const chunk of chunks) { + handleChunk('session-1', 'tab-1', chunk); + } + + flushRafCallbacks(); + + // Buffer should be empty after each cycle + expect(getBufferSize()).toBe(0); + } + + expect(updates.length).toBe(10); + + cleanup(); + }); + }); + + describe('UI Rendering Performance', () => { + it('should render 10KB thinking content without performance issues', () => { + const theme = createMockTheme(); + const content = generateThinkingContent(10); + const logs: LogEntry[] = [{ + id: 'thinking-1', + timestamp: Date.now(), + source: 'thinking', + text: content, + }]; + + const startTime = performance.now(); + + const { container } = render( + + + + ); + + const elapsed = performance.now() - startTime; + + // Should render in under 100ms + expect(elapsed).toBeLessThan(100); + + const thinkingContent = screen.getByTestId('thinking-content'); + expect(thinkingContent.textContent?.length).toBe(content.length); + }); + + it('should render 50KB thinking content without hanging', () => { + const theme = createMockTheme(); + const content = generateThinkingContent(50); + const logs: LogEntry[] = [{ + id: 'thinking-1', + timestamp: Date.now(), + source: 'thinking', + text: content, + }]; + + const startTime = performance.now(); + + render( + + + + ); + + const elapsed = performance.now() - startTime; + + // Should render in under 500ms even for large content + expect(elapsed).toBeLessThan(500); + }); + + it('should handle incremental content updates efficiently', async () => { + const theme = createMockTheme(); + const logs: LogEntry[] = [{ + id: 'thinking-1', + timestamp: Date.now(), + source: 'thinking', + text: 'Initial content', + }]; + + const { rerender } = render( + + + + ); + + // Simulate incremental updates (like streaming) + const updateTimes: number[] = []; + + for (let i = 0; i < 20; i++) { + const startTime = performance.now(); + + // Append more content + logs[0].text += generateThinkingContent(1); + + rerender( + + + + ); + + updateTimes.push(performance.now() - startTime); + } + + // Average update time should be under 50ms + const avgTime = updateTimes.reduce((a, b) => a + b, 0) / updateTimes.length; + expect(avgTime).toBeLessThan(50); + }); + }); + + describe('Chunk Batching Edge Cases', () => { + it('should handle empty chunks gracefully', () => { + const updates: string[] = []; + const { handleChunk, cleanup } = createThinkingChunkHandler( + (_sid, _tid, content) => updates.push(content) + ); + + handleChunk('session-1', 'tab-1', ''); + handleChunk('session-1', 'tab-1', 'actual content'); + handleChunk('session-1', 'tab-1', ''); + + flushRafCallbacks(); + + expect(updates.length).toBe(1); + expect(updates[0]).toBe('actual content'); + + cleanup(); + }); + + it('should handle very small chunks (1-5 bytes) efficiently', () => { + const updates: string[] = []; + const { handleChunk, cleanup } = createThinkingChunkHandler( + (_sid, _tid, content) => updates.push(content) + ); + + const content = generateThinkingContent(5); + + // Split into very small chunks (simulating character-by-character streaming) + for (let i = 0; i < content.length; i++) { + handleChunk('session-1', 'tab-1', content[i]); + } + + expect(mockRaf).toHaveBeenCalledTimes(1); // Still just one RAF + + flushRafCallbacks(); + + expect(updates.length).toBe(1); + expect(updates[0]).toBe(content); + + cleanup(); + }); + + it('should handle interleaved chunks from multiple tabs', () => { + const updates: Array<{ sessionId: string; tabId: string; content: string }> = []; + const { handleChunk, cleanup } = createThinkingChunkHandler( + (sessionId, tabId, content) => updates.push({ sessionId, tabId, content }) + ); + + // Interleave chunks from different tabs + for (let i = 0; i < 100; i++) { + handleChunk('session-1', `tab-${i % 3}`, `chunk-${i} `); + } + + flushRafCallbacks(); + + // Should have 3 updates (one per tab) + expect(updates.length).toBe(3); + + // Verify each tab got its chunks + const tab0Update = updates.find(u => u.tabId === 'tab-0'); + const tab1Update = updates.find(u => u.tabId === 'tab-1'); + const tab2Update = updates.find(u => u.tabId === 'tab-2'); + + expect(tab0Update?.content).toContain('chunk-0'); + expect(tab1Update?.content).toContain('chunk-1'); + expect(tab2Update?.content).toContain('chunk-2'); + + cleanup(); + }); + }); + + describe('Stress Testing', () => { + it('should handle sustained high-frequency chunk arrivals', () => { + const updates: string[] = []; + const { handleChunk, cleanup } = createThinkingChunkHandler( + (_sid, _tid, content) => updates.push(content) + ); + + // Simulate 10 seconds of sustained streaming at 60fps + // Each frame gets 10 chunks + const framesCount = 600; // 10 seconds at 60fps + const chunksPerFrame = 10; + + const startTime = performance.now(); + + for (let frame = 0; frame < framesCount; frame++) { + for (let chunk = 0; chunk < chunksPerFrame; chunk++) { + handleChunk('session-1', 'tab-1', `frame-${frame}-chunk-${chunk} `); + } + + // Flush RAF to simulate frame completion + flushRafCallbacks(); + } + + const elapsed = performance.now() - startTime; + + // Should process all frames in reasonable time (under 5 seconds with fake timers) + expect(elapsed).toBeLessThan(5000); + expect(updates.length).toBe(framesCount); + + cleanup(); + }); + + it('should maintain consistency under concurrent session load', () => { + const updates: Map = new Map(); + const { handleChunk, cleanup } = createThinkingChunkHandler( + (sessionId, _tabId, content) => { + const sessionUpdates = updates.get(sessionId) || []; + sessionUpdates.push(content); + updates.set(sessionId, sessionUpdates); + } + ); + + const sessionCount = 10; + const chunksPerSession = 100; + + // Send chunks to many sessions + for (let chunk = 0; chunk < chunksPerSession; chunk++) { + for (let session = 0; session < sessionCount; session++) { + handleChunk(`session-${session}`, 'tab-1', `s${session}c${chunk} `); + } + + // Flush every 10 chunks + if ((chunk + 1) % 10 === 0) { + flushRafCallbacks(); + } + } + + // Final flush + flushRafCallbacks(); + + // Each session should have received all its chunks + for (let session = 0; session < sessionCount; session++) { + const sessionUpdates = updates.get(`session-${session}`); + expect(sessionUpdates).toBeDefined(); + + // Combine all updates for this session + const fullContent = sessionUpdates!.join(''); + expect(fullContent).toContain(`s${session}c0`); + expect(fullContent).toContain(`s${session}c99`); + } + + cleanup(); + }); + }); +}); diff --git a/src/__tests__/renderer/hooks/useAgentCapabilities.test.ts b/src/__tests__/renderer/hooks/useAgentCapabilities.test.ts index 6db24fa2..7b6e581c 100644 --- a/src/__tests__/renderer/hooks/useAgentCapabilities.test.ts +++ b/src/__tests__/renderer/hooks/useAgentCapabilities.test.ts @@ -23,6 +23,7 @@ const baseCapabilities = { supportsResultMessages: true, supportsModelSelection: false, supportsStreamJsonInput: true, + supportsThinkingDisplay: false, // Added in Show Thinking feature }; describe('useAgentCapabilities', () => { diff --git a/src/main/agent-capabilities.ts b/src/main/agent-capabilities.ts index e2a7fb93..96a9acee 100644 --- a/src/main/agent-capabilities.ts +++ b/src/main/agent-capabilities.ts @@ -60,6 +60,9 @@ export interface AgentCapabilities { /** Agent supports --input-format stream-json for image input via stdin */ supportsStreamJsonInput: boolean; + + /** Agent emits streaming thinking/reasoning content that can be displayed */ + supportsThinkingDisplay: boolean; } /** @@ -83,6 +86,7 @@ export const DEFAULT_CAPABILITIES: AgentCapabilities = { supportsResultMessages: false, supportsModelSelection: false, supportsStreamJsonInput: false, + supportsThinkingDisplay: false, }; /** @@ -118,6 +122,7 @@ export const AGENT_CAPABILITIES: Record = { supportsResultMessages: true, // "result" event type supportsModelSelection: false, // Model is configured via Anthropic account supportsStreamJsonInput: true, // --input-format stream-json for images via stdin + supportsThinkingDisplay: true, // Emits streaming assistant messages }, /** @@ -141,6 +146,7 @@ export const AGENT_CAPABILITIES: Record = { supportsResultMessages: false, supportsModelSelection: false, supportsStreamJsonInput: false, + supportsThinkingDisplay: false, // Terminal is not an AI agent }, /** @@ -167,6 +173,7 @@ export const AGENT_CAPABILITIES: Record = { supportsResultMessages: false, // All messages are agent_message type (no distinct result) - Verified supportsModelSelection: true, // -m, --model flag - Documented supportsStreamJsonInput: false, // Uses -i, --image flag instead + supportsThinkingDisplay: true, // Emits reasoning tokens (o3/o4-mini) }, /** @@ -192,6 +199,7 @@ export const AGENT_CAPABILITIES: Record = { supportsResultMessages: false, supportsModelSelection: false, // Not yet investigated supportsStreamJsonInput: false, + supportsThinkingDisplay: false, // Not yet investigated }, /** @@ -217,6 +225,7 @@ export const AGENT_CAPABILITIES: Record = { supportsResultMessages: false, supportsModelSelection: false, // Not yet investigated supportsStreamJsonInput: false, + supportsThinkingDisplay: false, // Not yet investigated }, /** @@ -243,6 +252,7 @@ export const AGENT_CAPABILITIES: Record = { supportsResultMessages: false, // Not yet investigated supportsModelSelection: true, // --model flag supportsStreamJsonInput: false, + supportsThinkingDisplay: false, // Not yet investigated }, /** @@ -269,6 +279,7 @@ export const AGENT_CAPABILITIES: Record = { supportsResultMessages: true, // step_finish with part.reason:"stop" - Verified supportsModelSelection: true, // --model provider/model (e.g., 'ollama/qwen3:8b') - Verified supportsStreamJsonInput: false, // Uses -f, --file flag instead + supportsThinkingDisplay: true, // Emits streaming text chunks }, }; diff --git a/src/main/index.ts b/src/main/index.ts index 76a7519b..5ef7b2e3 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -269,6 +269,7 @@ function createWebServer(): WebServer { const tabLogs = activeTab?.logs || []; if (tabLogs.length > 0) { // Find the last stdout/stderr entry from the AI (not user messages) + // Note: 'thinking' logs are already excluded since they have a distinct source type const lastAiLog = [...tabLogs].reverse().find((log: any) => log.source === 'stdout' || log.source === 'stderr' ); @@ -2237,6 +2238,18 @@ function setupProcessListeners() { mainWindow?.webContents.send('process:slash-commands', sessionId, slashCommands); }); + // Handle thinking/streaming content chunks from AI agents + // Emitted when agents produce partial text events (isPartial: true) + // Renderer decides whether to display based on tab's showThinking setting + processManager.on('thinking-chunk', (sessionId: string, content: string) => { + mainWindow?.webContents.send('process:thinking-chunk', sessionId, content); + }); + + // Handle tool execution events (OpenCode, Codex) + processManager.on('tool-execution', (sessionId: string, toolEvent: { toolName: string; state?: unknown; timestamp: number }) => { + mainWindow?.webContents.send('process:tool-execution', sessionId, toolEvent); + }); + // Handle stderr separately from runCommand (for clean command execution) processManager.on('stderr', (sessionId: string, data: string) => { mainWindow?.webContents.send('process:stderr', sessionId, data); diff --git a/src/main/parsers/agent-output-parser.ts b/src/main/parsers/agent-output-parser.ts index adbd2bc5..e46cb2b4 100644 --- a/src/main/parsers/agent-output-parser.ts +++ b/src/main/parsers/agent-output-parser.ts @@ -99,6 +99,17 @@ export interface ParsedEvent { */ isPartial?: boolean; + /** + * Tool use blocks extracted from the message (for agents with mixed content) + * When a message contains both text and tool_use, text goes in 'text' field + * and tool_use blocks are here. Process-manager emits tool-execution for each. + */ + toolUseBlocks?: Array<{ + name: string; + id?: string; + input?: unknown; + }>; + /** * Original event data for debugging * Preserved unchanged from agent output diff --git a/src/main/parsers/claude-output-parser.ts b/src/main/parsers/claude-output-parser.ts index e83ca3a2..127b6db2 100644 --- a/src/main/parsers/claude-output-parser.ts +++ b/src/main/parsers/claude-output-parser.ts @@ -16,6 +16,19 @@ import type { AgentOutputParser, ParsedEvent } from './agent-output-parser'; import { aggregateModelUsage, type ModelStats } from './usage-aggregator'; import { getErrorPatterns, matchErrorPattern } from './error-patterns'; +/** + * Content block in Claude assistant messages + * Can be either text or tool_use blocks + */ +interface ClaudeContentBlock { + type: string; + text?: string; + // Tool use fields + name?: string; + id?: string; + input?: unknown; +} + /** * Raw message structure from Claude Code stream-json output */ @@ -26,7 +39,7 @@ interface ClaudeRawMessage { result?: string; message?: { role?: string; - content?: string | Array<{ type: string; text?: string }>; + content?: string | ClaudeContentBlock[]; }; slash_commands?: string[]; modelUsage?: Record; @@ -115,11 +128,14 @@ export class ClaudeOutputParser implements AgentOutputParser { // Handle assistant messages (streaming partial responses) if (msg.type === 'assistant') { const text = this.extractTextFromMessage(msg); + const toolUseBlocks = this.extractToolUseBlocks(msg); + return { type: 'text', text, sessionId: msg.session_id, isPartial: true, + toolUseBlocks: toolUseBlocks.length > 0 ? toolUseBlocks : undefined, raw: msg, }; } @@ -152,6 +168,26 @@ export class ClaudeOutputParser implements AgentOutputParser { }; } + /** + * Extract tool_use blocks from a Claude assistant message + * These blocks contain tool invocation requests from the AI + */ + private extractToolUseBlocks( + msg: ClaudeRawMessage + ): Array<{ name: string; id?: string; input?: unknown }> { + if (!msg.message?.content || typeof msg.message.content === 'string') { + return []; + } + + return msg.message.content + .filter((block) => block.type === 'tool_use' && block.name) + .map((block) => ({ + name: block.name!, + id: block.id, + input: block.input, + })); + } + /** * Extract text content from a Claude assistant message */ diff --git a/src/main/preload.ts b/src/main/preload.ts index 20dfdbfb..44d24be2 100644 --- a/src/main/preload.ts +++ b/src/main/preload.ts @@ -130,6 +130,19 @@ contextBridge.exposeInMainWorld('maestro', { ipcRenderer.on('process:slash-commands', handler); return () => ipcRenderer.removeListener('process:slash-commands', handler); }, + // Thinking/streaming content chunks from AI agents + // Emitted when agents produce partial text events (isPartial: true) + // Renderer decides whether to display based on tab's showThinking setting + onThinkingChunk: (callback: (sessionId: string, content: string) => void) => { + const handler = (_: any, sessionId: string, content: string) => callback(sessionId, content); + ipcRenderer.on('process:thinking-chunk', handler); + return () => ipcRenderer.removeListener('process:thinking-chunk', handler); + }, + onToolExecution: (callback: (sessionId: string, toolEvent: { toolName: string; state?: unknown; timestamp: number }) => void) => { + const handler = (_: any, sessionId: string, toolEvent: { toolName: string; state?: unknown; timestamp: number }) => callback(sessionId, toolEvent); + ipcRenderer.on('process:tool-execution', handler); + return () => ipcRenderer.removeListener('process:tool-execution', handler); + }, // Remote command execution from web interface // This allows web commands to go through the same code path as desktop commands // inputMode is optional - if provided, renderer should use it instead of session state @@ -1181,6 +1194,8 @@ export interface MaestroAPI { onExit: (callback: (sessionId: string, code: number) => void) => () => void; onSessionId: (callback: (sessionId: string, agentSessionId: string) => void) => () => void; onSlashCommands: (callback: (sessionId: string, slashCommands: string[]) => void) => () => void; + onThinkingChunk: (callback: (sessionId: string, content: string) => void) => () => void; + onToolExecution: (callback: (sessionId: string, toolEvent: { toolName: string; state?: unknown; timestamp: number }) => void) => () => void; onRemoteCommand: (callback: (sessionId: string, command: string) => void) => () => void; onRemoteSwitchMode: (callback: (sessionId: string, mode: 'ai' | 'terminal') => void) => () => void; onRemoteInterrupt: (callback: (sessionId: string) => void) => () => void; diff --git a/src/main/process-manager.ts b/src/main/process-manager.ts index a697d7ab..e7dd68f9 100644 --- a/src/main/process-manager.ts +++ b/src/main/process-manager.ts @@ -755,10 +755,39 @@ export class ProcessManager extends EventEmitter { this.emit('slash-commands', sessionId, slashCommands); } - // Accumulate text from partial streaming events (OpenCode text messages) - // Skip error events - they're handled separately by detectErrorFromLine + // Handle streaming text events (OpenCode, Codex reasoning) + // Emit partial text immediately for real-time streaming UX + // Also accumulate for final result assembly if needed if (event.type === 'text' && event.isPartial && event.text) { + // Emit thinking chunk for real-time display (let renderer decide to display based on tab setting) + this.emit('thinking-chunk', sessionId, event.text); + + // Existing: accumulate for result fallback managedProcess.streamedText = (managedProcess.streamedText || '') + event.text; + // Emit streaming text immediately for real-time display + this.emit('data', sessionId, event.text); + } + + // Handle tool execution events (OpenCode, Codex) + // Emit tool events so UI can display what the agent is doing + if (event.type === 'tool_use' && event.toolName) { + this.emit('tool-execution', sessionId, { + toolName: event.toolName, + state: event.toolState, + timestamp: Date.now(), + }); + } + + // Handle tool_use blocks embedded in text events (Claude Code mixed content) + // Claude Code returns text with toolUseBlocks array attached + if (event.toolUseBlocks?.length) { + for (const tool of event.toolUseBlocks) { + this.emit('tool-execution', sessionId, { + toolName: tool.name, + state: { status: 'running', input: tool.input }, + timestamp: Date.now(), + }); + } } // Skip processing error events further - they're handled by agent-error emission diff --git a/src/renderer/App.tsx b/src/renderer/App.tsx index babca089..a50e8dc9 100644 --- a/src/renderer/App.tsx +++ b/src/renderer/App.tsx @@ -187,6 +187,7 @@ export default function MaestroConsole() { enterToSendAI, setEnterToSendAI, enterToSendTerminal, setEnterToSendTerminal, defaultSaveToHistory, setDefaultSaveToHistory, + defaultShowThinking, setDefaultShowThinking, leftSidebarWidth, setLeftSidebarWidth, rightPanelWidth, setRightPanelWidth, markdownEditMode, setMarkdownEditMode, @@ -1100,6 +1101,7 @@ export default function MaestroConsole() { : getActiveTab(currentSession); const logs = completedTab?.logs || []; const lastUserLog = logs.filter(log => log.source === 'user').pop(); + // Find last AI response: 'stdout' or 'ai' source (note: 'thinking' logs are already excluded since they have a distinct source type) const lastAiLog = logs.filter(log => log.source === 'stdout' || log.source === 'ai').pop(); // Use the completed tab's thinkingStartTime for accurate per-tab duration const completedTabData = currentSession.aiTabs?.find(tab => tab.id === tabIdFromSession); @@ -1820,6 +1822,129 @@ export default function MaestroConsole() { setAgentErrorModalSessionId(actualSessionId); }); + // Handle thinking/streaming content chunks from AI agents + // Only appends to logs if the tab has showThinking enabled + // THROTTLED: Uses requestAnimationFrame to batch rapid chunk arrivals (Phase 6.4) + const unsubscribeThinkingChunk = window.maestro.process.onThinkingChunk?.((sessionId: string, content: string) => { + // Parse sessionId to get actual session ID and tab ID (format: {id}-ai-{tabId}) + const aiTabMatch = sessionId.match(/^(.+)-ai-(.+)$/); + if (!aiTabMatch) return; // Only handle AI tab messages + + const actualSessionId = aiTabMatch[1]; + const tabId = aiTabMatch[2]; + const bufferKey = `${actualSessionId}:${tabId}`; + + // Buffer the chunk - accumulate if there's already content for this session+tab + const existingContent = thinkingChunkBufferRef.current.get(bufferKey) || ''; + thinkingChunkBufferRef.current.set(bufferKey, existingContent + content); + + // Schedule a single RAF callback to process all buffered chunks + // This naturally throttles to ~60fps (16.67ms) and batches multiple rapid arrivals + if (thinkingChunkRafIdRef.current === null) { + thinkingChunkRafIdRef.current = requestAnimationFrame(() => { + // Process all buffered chunks in a single setSessions call + const buffer = thinkingChunkBufferRef.current; + if (buffer.size === 0) { + thinkingChunkRafIdRef.current = null; + return; + } + + // Take a snapshot and clear the buffer + const chunksToProcess = new Map(buffer); + buffer.clear(); + thinkingChunkRafIdRef.current = null; + + setSessions(prev => prev.map(s => { + // Check if any buffered chunks are for this session + let hasChanges = false; + for (const [key] of chunksToProcess) { + if (key.startsWith(s.id + ':')) { + hasChanges = true; + break; + } + } + if (!hasChanges) return s; + + // Process each chunk for this session + let updatedTabs = s.aiTabs; + for (const [key, bufferedContent] of chunksToProcess) { + const [chunkSessionId, chunkTabId] = key.split(':'); + if (chunkSessionId !== s.id) continue; + + const targetTab = updatedTabs.find(t => t.id === chunkTabId); + if (!targetTab) continue; + + // Only append if thinking is enabled for this tab + if (!targetTab.showThinking) continue; + + // Find the last log entry - if it's a thinking entry, append to it + const lastLog = targetTab.logs[targetTab.logs.length - 1]; + if (lastLog?.source === 'thinking') { + // Append to existing thinking block + updatedTabs = updatedTabs.map(tab => + tab.id === chunkTabId + ? { ...tab, logs: [...tab.logs.slice(0, -1), { ...lastLog, text: lastLog.text + bufferedContent }] } + : tab + ); + } else { + // Create new thinking block + const newLog: LogEntry = { + id: generateId(), + timestamp: Date.now(), + source: 'thinking', + text: bufferedContent + }; + updatedTabs = updatedTabs.map(tab => + tab.id === chunkTabId + ? { ...tab, logs: [...tab.logs, newLog] } + : tab + ); + } + } + + return updatedTabs === s.aiTabs ? s : { ...s, aiTabs: updatedTabs }; + })); + }); + } + }); + + // Handle tool execution events from AI agents + // Only appends to logs if the tab has showThinking enabled (tools shown alongside thinking) + const unsubscribeToolExecution = window.maestro.process.onToolExecution?.((sessionId: string, toolEvent: { toolName: string; state?: unknown; timestamp: number }) => { + // Parse sessionId to get actual session ID and tab ID (format: {id}-ai-{tabId}) + const aiTabMatch = sessionId.match(/^(.+)-ai-(.+)$/); + if (!aiTabMatch) return; // Only handle AI tab messages + + const actualSessionId = aiTabMatch[1]; + const tabId = aiTabMatch[2]; + + setSessions(prev => prev.map(s => { + if (s.id !== actualSessionId) return s; + + const targetTab = s.aiTabs.find(t => t.id === tabId); + if (!targetTab?.showThinking) return s; // Only show if thinking enabled + + const toolLog: LogEntry = { + id: `tool-${Date.now()}-${toolEvent.toolName}`, + timestamp: toolEvent.timestamp, + source: 'tool', + text: toolEvent.toolName, + metadata: { + toolState: toolEvent.state as NonNullable['toolState'], + } + }; + + return { + ...s, + aiTabs: s.aiTabs.map(tab => + tab.id === tabId + ? { ...tab, logs: [...tab.logs, toolLog] } + : tab + ) + }; + })); + }); + // Cleanup listeners on unmount return () => { unsubscribeData(); @@ -1830,6 +1955,14 @@ export default function MaestroConsole() { unsubscribeCommandExit(); unsubscribeUsage(); unsubscribeAgentError(); + unsubscribeThinkingChunk?.(); + unsubscribeToolExecution?.(); + // Cancel any pending thinking chunk RAF and clear buffer (Phase 6.4) + if (thinkingChunkRafIdRef.current !== null) { + cancelAnimationFrame(thinkingChunkRafIdRef.current); + thinkingChunkRafIdRef.current = null; + } + thinkingChunkBufferRef.current.clear(); }; }, []); @@ -1976,6 +2109,11 @@ export default function MaestroConsole() { const pauseBatchOnErrorRef = useRef<((sessionId: string, error: AgentError, documentIndex: number, taskDescription?: string) => void) | null>(null); const getBatchStateRef = useRef<((sessionId: string) => BatchRunState) | null>(null); + // Refs for throttled thinking chunk updates (Phase 6.4) + // Buffer chunks per session+tab and use requestAnimationFrame to batch UI updates + const thinkingChunkBufferRef = useRef>(new Map()); // Key: "sessionId:tabId", Value: accumulated content + const thinkingChunkRafIdRef = useRef(null); + // Expose addToast to window for debugging/testing useEffect(() => { (window as any).__maestroDebug = { @@ -2208,14 +2346,14 @@ export default function MaestroConsole() { // Create a new tab in the session to start fresh setSessions(prev => prev.map(s => { if (s.id !== sessionId) return s; - const result = createTab(s); + const result = createTab(s, { saveToHistory: defaultSaveToHistory, showThinking: defaultShowThinking }); if (!result) return s; return result.session; })); // Focus the input after creating new tab setTimeout(() => inputRef.current?.focus(), 0); - }, [sessions, handleClearAgentError]); + }, [sessions, handleClearAgentError, defaultSaveToHistory, defaultShowThinking]); // Handler to retry after error (recovery action) const handleRetryAfterError = useCallback((sessionId: string) => { @@ -2302,6 +2440,7 @@ export default function MaestroConsole() { setSessions, setActiveSessionId, defaultSaveToHistory, + defaultShowThinking, }); // Web broadcasting hook - handles external history change notifications @@ -2604,6 +2743,7 @@ export default function MaestroConsole() { setAgentSessionsOpen, rightPanelRef, defaultSaveToHistory, + defaultShowThinking, }); // Note: spawnBackgroundSynopsisRef and spawnAgentWithPromptRef are now updated in useAgentExecution hook @@ -5452,8 +5592,10 @@ export default function MaestroConsole() { return { ...tab, state: 'busy' as const, thinkingStartTime: Date.now() }; } // Set any other busy tabs to idle (they were interrupted) and add canceled log + // Also clear any thinking/tool logs since the process was interrupted if (tab.state === 'busy') { - const updatedLogs = canceledLog ? [...tab.logs, canceledLog] : tab.logs; + const logsWithoutThinkingOrTools = tab.logs.filter(log => log.source !== 'thinking' && log.source !== 'tool'); + const updatedLogs = canceledLog ? [...logsWithoutThinkingOrTools, canceledLog] : logsWithoutThinkingOrTools; return { ...tab, state: 'idle' as const, thinkingStartTime: undefined, logs: updatedLogs }; } return tab; @@ -5488,18 +5630,23 @@ export default function MaestroConsole() { } // No queued items, just go to idle and add canceled log to the active tab + // Also clear any thinking/tool logs since the process was interrupted const activeTabForCancel = getActiveTab(s); const updatedAiTabsForIdle = canceledLog && activeTabForCancel - ? s.aiTabs.map(tab => - tab.id === activeTabForCancel.id - ? { ...tab, logs: [...tab.logs, canceledLog], state: 'idle' as const, thinkingStartTime: undefined } - : tab - ) - : s.aiTabs.map(tab => - tab.state === 'busy' - ? { ...tab, state: 'idle' as const, thinkingStartTime: undefined } - : tab - ); + ? s.aiTabs.map(tab => { + if (tab.id === activeTabForCancel.id) { + const logsWithoutThinkingOrTools = tab.logs.filter(log => log.source !== 'thinking' && log.source !== 'tool'); + return { ...tab, logs: [...logsWithoutThinkingOrTools, canceledLog], state: 'idle' as const, thinkingStartTime: undefined }; + } + return tab; + }) + : s.aiTabs.map(tab => { + if (tab.state === 'busy') { + const logsWithoutThinkingOrTools = tab.logs.filter(log => log.source !== 'thinking' && log.source !== 'tool'); + return { ...tab, state: 'idle' as const, thinkingStartTime: undefined, logs: logsWithoutThinkingOrTools }; + } + return tab; + }); return { ...s, @@ -5550,14 +5697,18 @@ export default function MaestroConsole() { setSessions(prev => prev.map(s => { if (s.id !== activeSession.id) return s; - // Add kill log to the appropriate place + // Add kill log to the appropriate place and clear thinking/tool logs let updatedSession = { ...s }; if (currentMode === 'ai') { const tab = getActiveTab(s); if (tab) { - updatedSession.aiTabs = s.aiTabs.map(t => - t.id === tab.id ? { ...t, logs: [...t.logs, killLog] } : t - ); + updatedSession.aiTabs = s.aiTabs.map(t => { + if (t.id === tab.id) { + const logsWithoutThinkingOrTools = t.logs.filter(log => log.source !== 'thinking' && log.source !== 'tool'); + return { ...t, logs: [...logsWithoutThinkingOrTools, killLog] }; + } + return t; + }); } } else { updatedSession.shellLogs = [...s.shellLogs, killLog]; @@ -5580,13 +5731,14 @@ export default function MaestroConsole() { }; } - // Set tabs appropriately + // Set tabs appropriately and clear thinking/tool logs from interrupted tabs let updatedAiTabs = updatedSession.aiTabs.map(tab => { if (tab.id === targetTab.id) { return { ...tab, state: 'busy' as const, thinkingStartTime: Date.now() }; } if (tab.state === 'busy') { - return { ...tab, state: 'idle' as const, thinkingStartTime: undefined }; + const logsWithoutThinkingOrTools = tab.logs.filter(log => log.source !== 'thinking' && log.source !== 'tool'); + return { ...tab, state: 'idle' as const, thinkingStartTime: undefined, logs: logsWithoutThinkingOrTools }; } return tab; }); @@ -5619,7 +5771,7 @@ export default function MaestroConsole() { }; } - // No queued items, just go to idle + // No queued items, just go to idle and clear thinking logs if (currentMode === 'ai') { const tab = getActiveTab(s); if (!tab) return { ...updatedSession, state: 'idle', busySource: undefined, thinkingStartTime: undefined }; @@ -5628,9 +5780,13 @@ export default function MaestroConsole() { state: 'idle', busySource: undefined, thinkingStartTime: undefined, - aiTabs: updatedSession.aiTabs.map(t => - t.id === tab.id ? { ...t, state: 'idle' as const, thinkingStartTime: undefined } : t - ) + aiTabs: updatedSession.aiTabs.map(t => { + if (t.id === tab.id) { + const logsWithoutThinkingOrTools = t.logs.filter(log => log.source !== 'thinking' && log.source !== 'tool'); + return { ...t, state: 'idle' as const, thinkingStartTime: undefined, logs: logsWithoutThinkingOrTools }; + } + return t; + }) }; } return { ...updatedSession, state: 'idle', busySource: undefined, thinkingStartTime: undefined }; @@ -5661,9 +5817,14 @@ export default function MaestroConsole() { state: 'idle', busySource: undefined, thinkingStartTime: undefined, - aiTabs: s.aiTabs.map(t => - t.id === tab.id ? { ...t, state: 'idle' as const, thinkingStartTime: undefined, logs: [...t.logs, errorLog] } : t - ) + aiTabs: s.aiTabs.map(t => { + if (t.id === tab.id) { + // Clear thinking/tool logs even on error + const logsWithoutThinkingOrTools = t.logs.filter(log => log.source !== 'thinking' && log.source !== 'tool'); + return { ...t, state: 'idle' as const, thinkingStartTime: undefined, logs: [...logsWithoutThinkingOrTools, errorLog] }; + } + return t; + }) }; } return { ...s, shellLogs: [...s.shellLogs, errorLog], state: 'idle', busySource: undefined, thinkingStartTime: undefined }; @@ -6138,7 +6299,7 @@ export default function MaestroConsole() { processMonitorOpen, logViewerOpen, createGroupModalOpen, confirmModalOpen, renameInstanceModalOpen, renameGroupModalOpen, activeSession, previewFile, fileTreeFilter, fileTreeFilterOpen, gitDiffPreview, gitLogOpen, lightboxImage, hasOpenLayers, hasOpenModal, visibleSessions, sortedSessions, groups, - bookmarksCollapsed, leftSidebarOpen, editingSessionId, editingGroupId, markdownEditMode, defaultSaveToHistory, + bookmarksCollapsed, leftSidebarOpen, editingSessionId, editingGroupId, markdownEditMode, defaultSaveToHistory, defaultShowThinking, setLeftSidebarOpen, setRightPanelOpen, addNewSession, deleteSession, setQuickActionInitialMode, setQuickActionOpen, cycleSession, toggleInputMode, setShortcutsHelpOpen, setSettingsModalOpen, setSettingsTab, setActiveRightTab, handleSetActiveRightTab, setActiveFocus, setBookmarksCollapsed, setGroups, @@ -6479,6 +6640,28 @@ export default function MaestroConsole() { })); } }} + onToggleTabShowThinking={() => { + if (activeSession?.inputMode === 'ai' && activeSession.activeTabId) { + setSessions(prev => prev.map(s => { + if (s.id !== activeSession.id) return s; + return { + ...s, + aiTabs: s.aiTabs.map(tab => { + if (tab.id !== s.activeTabId) return tab; + // When turning OFF, clear any thinking/tool logs + if (tab.showThinking) { + return { + ...tab, + showThinking: false, + logs: tab.logs.filter(l => l.source !== 'thinking' && l.source !== 'tool') + }; + } + return { ...tab, showThinking: true }; + }) + }; + })); + } + }} onOpenTabSwitcher={() => { if (activeSession?.inputMode === 'ai' && activeSession.aiTabs) { setTabSwitcherOpen(true); @@ -7625,7 +7808,7 @@ export default function MaestroConsole() { if (activeSession) { setSessions(prev => prev.map(s => { if (s.id !== activeSession.id) return s; - const result = createTab(s, { saveToHistory: defaultSaveToHistory }); + const result = createTab(s, { saveToHistory: defaultSaveToHistory, showThinking: defaultShowThinking }); if (!result) return s; return result.session; })); @@ -7825,7 +8008,7 @@ export default function MaestroConsole() { // Use functional setState to compute from fresh state (avoids stale closure issues) setSessions(prev => prev.map(s => { if (s.id !== activeSession.id) return s; - const result = createTab(s, { saveToHistory: defaultSaveToHistory }); + const result = createTab(s, { saveToHistory: defaultSaveToHistory, showThinking: defaultShowThinking }); if (!result) return s; return result.session; })); @@ -7952,6 +8135,29 @@ export default function MaestroConsole() { }; })); }} + onToggleTabShowThinking={() => { + if (!activeSession) return; + const activeTab = getActiveTab(activeSession); + if (!activeTab) return; + setSessions(prev => prev.map(s => { + if (s.id !== activeSession.id) return s; + return { + ...s, + aiTabs: s.aiTabs.map(tab => { + if (tab.id !== activeTab.id) return tab; + // When turning OFF, clear any thinking/tool logs + if (tab.showThinking) { + return { + ...tab, + showThinking: false, + logs: tab.logs.filter(l => l.source !== 'thinking' && l.source !== 'tool') + }; + } + return { ...tab, showThinking: true }; + }) + }; + })); + }} onScrollPositionChange={(scrollTop: number) => { if (!activeSession) return; // Save scroll position for the current view (AI tab or terminal) @@ -8413,6 +8619,8 @@ export default function MaestroConsole() { setEnterToSendTerminal={setEnterToSendTerminal} defaultSaveToHistory={defaultSaveToHistory} setDefaultSaveToHistory={setDefaultSaveToHistory} + defaultShowThinking={defaultShowThinking} + setDefaultShowThinking={setDefaultShowThinking} fontFamily={fontFamily} setFontFamily={setFontFamily} fontSize={fontSize} diff --git a/src/renderer/components/InputArea.tsx b/src/renderer/components/InputArea.tsx index 027ec4c4..de9280eb 100644 --- a/src/renderer/components/InputArea.tsx +++ b/src/renderer/components/InputArea.tsx @@ -1,5 +1,5 @@ import React, { useEffect, useMemo } from 'react'; -import { Terminal, Cpu, Keyboard, ImageIcon, X, ArrowUp, Eye, History, File, Folder, GitBranch, Tag, PenLine } from 'lucide-react'; +import { Terminal, Cpu, Keyboard, ImageIcon, X, ArrowUp, Eye, History, File, Folder, GitBranch, Tag, PenLine, Brain } from 'lucide-react'; import type { Session, Theme, BatchRunState } from '../types'; import type { TabCompletionSuggestion, TabCompletionFilter } from '../hooks/useTabCompletion'; import { ThinkingStatusPill } from './ThinkingStatusPill'; @@ -83,6 +83,10 @@ interface InputAreaProps { onOpenPromptComposer?: () => void; // Flash notification callback showFlashNotification?: (message: string) => void; + // Show Thinking toggle (per-tab) + tabShowThinking?: boolean; + onToggleTabShowThinking?: () => void; + supportsThinking?: boolean; // From agent capabilities } export const InputArea = React.memo(function InputArea(props: InputAreaProps) { @@ -110,7 +114,8 @@ export const InputArea = React.memo(function InputArea(props: InputAreaProps) { tabReadOnlyMode = false, onToggleTabReadOnlyMode, tabSaveToHistory = false, onToggleTabSaveToHistory, onOpenPromptComposer, - showFlashNotification + showFlashNotification, + tabShowThinking = false, onToggleTabShowThinking, supportsThinking = false } = props; // Get agent capabilities for conditional feature rendering @@ -730,6 +735,24 @@ export const InputArea = React.memo(function InputArea(props: InputAreaProps) { Read-only )} + {/* Show Thinking toggle - AI mode only, for agents that support it */} + {session.inputMode === 'ai' && supportsThinking && onToggleTabShowThinking && ( + + )}