From b92fa04908c6aff9fac867ecb9990d3cf4c74616 Mon Sep 17 00:00:00 2001 From: Pedram Amini Date: Tue, 6 Jan 2026 09:07:50 -0600 Subject: [PATCH] ## CHANGES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Wizard now auto-continues when AI implies “let me research…” dead-ends 🧠 - Added robust deferred-response phrase detection to keep conversations flowing 🔍 - Introduced “Your turn” indicator when assistant awaits user input 💡 - Updated wizard system prompt to forbid deferred/async promises outright 🚫 - Prevented auto-continue infinite loops with a per-exchange trigger guard 🛡️ - Improved auto-continue UX with delayed send after UI updates ⏱️ - Added full test suite for deferred-response detection patterns ✅ - Strengthened NewInstanceModal tests with remote directory stat validation 📁 - NewInstanceModal tests now wait for debounced remote path validation ⏳ - Expanded icon mocking to include Loader2 for more complete test coverage 🎭 --- .../components/FileExplorerPanel.test.tsx | 2 + .../components/NewInstanceModal.test.tsx | 13 ++ .../Wizard/screens/ConversationScreen.test.ts | 176 ++++++++++++++++++ src/prompts/wizard-system.md | 15 ++ .../Wizard/screens/ConversationScreen.tsx | 102 ++++++++++ 5 files changed, 308 insertions(+) create mode 100644 src/__tests__/renderer/components/Wizard/screens/ConversationScreen.test.ts diff --git a/src/__tests__/renderer/components/FileExplorerPanel.test.tsx b/src/__tests__/renderer/components/FileExplorerPanel.test.tsx index 11175e9c..ffe8cf59 100644 --- a/src/__tests__/renderer/components/FileExplorerPanel.test.tsx +++ b/src/__tests__/renderer/components/FileExplorerPanel.test.tsx @@ -48,6 +48,8 @@ vi.mock('lucide-react', () => ({ ⚠️, X: ({ className, style }: { className?: string; style?: React.CSSProperties }) => , + Loader2: ({ className, style }: { className?: string; style?: React.CSSProperties }) => + , })); // Mock @tanstack/react-virtual for virtualization diff --git a/src/__tests__/renderer/components/NewInstanceModal.test.tsx b/src/__tests__/renderer/components/NewInstanceModal.test.tsx index de44ea06..f7785edd 100644 --- a/src/__tests__/renderer/components/NewInstanceModal.test.tsx +++ b/src/__tests__/renderer/components/NewInstanceModal.test.tsx @@ -2310,6 +2310,14 @@ describe('NewInstanceModal', () => { enabled: true, }], }); + // Mock fs.stat to return a valid directory for remote path validation + vi.mocked(window.maestro.fs.stat).mockResolvedValue({ + size: 4096, + createdAt: '2024-01-01T00:00:00.000Z', + modifiedAt: '2024-01-15T12:30:00.000Z', + isDirectory: true, + isFile: false, + }); render( { const dirInput = screen.getByPlaceholderText(/Enter remote path/i); fireEvent.change(dirInput, { target: { value: '/test/path' } }); + // Wait for the remote path validation to complete (debounced 300ms) + await waitFor(() => { + expect(screen.getByText('Remote directory found')).toBeInTheDocument(); + }); + const createButton = screen.getByText('Create Agent'); await act(async () => { fireEvent.click(createButton); diff --git a/src/__tests__/renderer/components/Wizard/screens/ConversationScreen.test.ts b/src/__tests__/renderer/components/Wizard/screens/ConversationScreen.test.ts new file mode 100644 index 00000000..ef30e70a --- /dev/null +++ b/src/__tests__/renderer/components/Wizard/screens/ConversationScreen.test.ts @@ -0,0 +1,176 @@ +/** + * ConversationScreen.test.ts + * + * Tests for the deferred response phrase detection logic used in auto-continue. + * The actual function is internal to ConversationScreen.tsx, so we replicate + * the patterns here for testing purposes. + */ + +import { describe, it, expect } from 'vitest'; + +/** + * Patterns that indicate the AI said it will do something asynchronously. + * Must be kept in sync with ConversationScreen.tsx + */ +const DEFERRED_RESPONSE_PATTERNS = [ + /let me (?:research|investigate|look into|think about|analyze|examine|check|explore)/i, + /give me a (?:moment|minute|second)/i, + /i(?:'ll| will) (?:look into|research|investigate|get back|check)/i, + /(?:researching|investigating|looking into) (?:this|that|it)/i, + /let me (?:take a )?(?:closer )?look/i, +]; + +function containsDeferredResponsePhrase(message: string): boolean { + return DEFERRED_RESPONSE_PATTERNS.some((pattern) => pattern.test(message)); +} + +describe('ConversationScreen', () => { + describe('containsDeferredResponsePhrase', () => { + describe('should detect deferred response phrases', () => { + it('detects "let me research"', () => { + expect(containsDeferredResponsePhrase('Let me research this for you.')).toBe(true); + expect(containsDeferredResponsePhrase('let me research the options')).toBe(true); + }); + + it('detects "let me investigate"', () => { + expect(containsDeferredResponsePhrase('Let me investigate that further.')).toBe(true); + }); + + it('detects "let me look into"', () => { + expect(containsDeferredResponsePhrase('Let me look into this more.')).toBe(true); + }); + + it('detects "let me think about"', () => { + expect(containsDeferredResponsePhrase('Let me think about this more carefully.')).toBe(true); + }); + + it('detects "let me analyze"', () => { + expect(containsDeferredResponsePhrase('Let me analyze the requirements.')).toBe(true); + }); + + it('detects "let me examine"', () => { + expect(containsDeferredResponsePhrase('Let me examine the codebase.')).toBe(true); + }); + + it('detects "let me check"', () => { + expect(containsDeferredResponsePhrase('Let me check on that.')).toBe(true); + }); + + it('detects "let me explore"', () => { + expect(containsDeferredResponsePhrase('Let me explore the possibilities.')).toBe(true); + }); + + it('detects "give me a moment"', () => { + expect(containsDeferredResponsePhrase('Give me a moment to think.')).toBe(true); + }); + + it('detects "give me a minute"', () => { + expect(containsDeferredResponsePhrase('Give me a minute to process this.')).toBe(true); + }); + + it('detects "give me a second"', () => { + expect(containsDeferredResponsePhrase('Give me a second...')).toBe(true); + }); + + it('detects "I\'ll look into"', () => { + expect(containsDeferredResponsePhrase("I'll look into that for you.")).toBe(true); + }); + + it('detects "I will research"', () => { + expect(containsDeferredResponsePhrase('I will research the best approach.')).toBe(true); + }); + + it('detects "I\'ll get back"', () => { + expect(containsDeferredResponsePhrase("I'll get back to you on that.")).toBe(true); + }); + + it('detects "researching this"', () => { + expect(containsDeferredResponsePhrase('Researching this now...')).toBe(true); + }); + + it('detects "investigating that"', () => { + expect(containsDeferredResponsePhrase('Investigating that issue.')).toBe(true); + }); + + it('detects "looking into it"', () => { + expect(containsDeferredResponsePhrase('Looking into it now.')).toBe(true); + }); + + it('detects "let me take a look"', () => { + expect(containsDeferredResponsePhrase('Let me take a look at the files.')).toBe(true); + }); + + it('detects "let me take a closer look"', () => { + expect(containsDeferredResponsePhrase('Let me take a closer look at this.')).toBe(true); + }); + + it('detects phrases in longer messages', () => { + const longMessage = `Great questions! Let me do some research on the Netflix ecosystem and content metadata APIs to give you solid recommendations. + +I'll investigate: +1. Netflix API/Integration options +2. Content metadata APIs +3. Parental control mechanisms + +Give me a moment to research this...`; + expect(containsDeferredResponsePhrase(longMessage)).toBe(true); + }); + }); + + describe('should NOT detect normal conversation phrases', () => { + it('does not match "I can help you with that"', () => { + expect(containsDeferredResponsePhrase('I can help you with that.')).toBe(false); + }); + + it('does not match questions', () => { + expect(containsDeferredResponsePhrase('What type of project is this?')).toBe(false); + }); + + it('does not match analysis statements', () => { + expect(containsDeferredResponsePhrase('Based on what you described, this sounds like a web app.')).toBe(false); + }); + + it('does not match "let me know"', () => { + expect(containsDeferredResponsePhrase('Let me know if you have questions.')).toBe(false); + }); + + it('does not match "let me explain"', () => { + expect(containsDeferredResponsePhrase('Let me explain how this works.')).toBe(false); + }); + + it('does not match "let me summarize"', () => { + expect(containsDeferredResponsePhrase('Let me summarize what I understand.')).toBe(false); + }); + + it('does not match past tense "researched"', () => { + expect(containsDeferredResponsePhrase('I researched this topic yesterday.')).toBe(false); + }); + + it('does not match "looking forward"', () => { + expect(containsDeferredResponsePhrase('Looking forward to working on this!')).toBe(false); + }); + + it('does not match confidence statements', () => { + expect(containsDeferredResponsePhrase("I'm ready to create your Playbook.")).toBe(false); + }); + + it('does not match empty string', () => { + expect(containsDeferredResponsePhrase('')).toBe(false); + }); + }); + + describe('case insensitivity', () => { + it('matches uppercase', () => { + expect(containsDeferredResponsePhrase('LET ME RESEARCH THIS')).toBe(true); + }); + + it('matches mixed case', () => { + expect(containsDeferredResponsePhrase('Let Me Research This')).toBe(true); + }); + + it('matches lowercase', () => { + expect(containsDeferredResponsePhrase('let me research this')).toBe(true); + }); + }); + }); +}); diff --git a/src/prompts/wizard-system.md b/src/prompts/wizard-system.md index 2818fbea..af3e2e67 100644 --- a/src/prompts/wizard-system.md +++ b/src/prompts/wizard-system.md @@ -103,6 +103,21 @@ Through a brief, focused conversation: - Be encouraging and helpful in tone - Once you have enough clarity, indicate you're ready to proceed +### Critical: No Deferred Responses + +**NEVER** say things that imply you'll do something after responding: +- ❌ "Let me research this..." +- ❌ "Give me a moment to investigate..." +- ❌ "I'll look into that and get back to you..." +- ❌ "Let me think about this more..." + +Each response is a **single turn** - you cannot continue working after responding. Instead: +- ✅ Provide your best analysis immediately, with caveats if uncertain +- ✅ Ask clarifying questions if you need more information +- ✅ Say "Based on what I can see, [your analysis]..." rather than promising future research + +If you need information you don't have, ask the user for it directly instead of implying you'll go find it. + ## Response Format You MUST respond with valid JSON in this exact format: diff --git a/src/renderer/components/Wizard/screens/ConversationScreen.tsx b/src/renderer/components/Wizard/screens/ConversationScreen.tsx index cddc5930..d940cb95 100644 --- a/src/renderer/components/Wizard/screens/ConversationScreen.tsx +++ b/src/renderer/components/Wizard/screens/ConversationScreen.tsx @@ -67,6 +67,28 @@ function formatTimestamp(timestamp: number): string { return date.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' }); } +/** + * Patterns that indicate the AI said it will do something asynchronously. + * This is a UX problem because the wizard can't actually support async operations - + * each message is a single turn. If the AI says "let me research this", the user + * is left waiting with no indication that they need to respond. + */ +const DEFERRED_RESPONSE_PATTERNS = [ + /let me (?:research|investigate|look into|think about|analyze|examine|check|explore)/i, + /give me a (?:moment|minute|second)/i, + /i(?:'ll| will) (?:look into|research|investigate|get back|check)/i, + /(?:researching|investigating|looking into) (?:this|that|it)/i, + /let me (?:take a )?(?:closer )?look/i, +]; + +/** + * Check if a message contains phrases that imply deferred/async work. + * The wizard can't actually support this - we need to auto-continue. + */ +function containsDeferredResponsePhrase(message: string): boolean { + return DEFERRED_RESPONSE_PATTERNS.some((pattern) => pattern.test(message)); +} + /** * ConfidenceMeter - Horizontal progress bar with gradient fill */ @@ -525,6 +547,9 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con const [announcement, setAnnouncement] = useState(''); const [announcementKey, setAnnouncementKey] = useState(0); + // Pending auto-continue message (when AI says "let me research this") + const [pendingAutoContinue, setPendingAutoContinue] = useState(null); + // Track previous ready state to avoid duplicate announcements const prevReadyRef = useRef(state.isReadyToProceed); @@ -544,6 +569,9 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con const inputRef = useRef(null); // Immediate send guard to prevent race conditions from rapid clicking const isSendingRef = useRef(false); + // Track if we've already triggered auto-continue for the current exchange + // This prevents infinite loops if the AI keeps saying "let me research" + const autoContinueTriggeredRef = useRef(false); // Scroll to bottom when messages change const scrollToBottom = useCallback(() => { @@ -559,6 +587,38 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con inputRef.current?.focus(); }, []); + // Handle pending auto-continue (when AI says "let me research this") + // We set the input and call handleSendMessage after a delay + useEffect(() => { + if (pendingAutoContinue && !state.isConversationLoading && !isSendingRef.current) { + const message = pendingAutoContinue; + setPendingAutoContinue(null); + + // Small delay to let the UI update and show the AI's response + const timeoutId = setTimeout(() => { + // Set the input value first so handleSendMessage picks it up + setInputValue(message); + }, 800); + + return () => clearTimeout(timeoutId); + } + }, [pendingAutoContinue, state.isConversationLoading]); + + // Store handleSendMessage in a ref so we can call it from the effect + const handleSendMessageRef = useRef<(() => void) | null>(null); + + // Effect to trigger send when input is set to the auto-continue message + useEffect(() => { + if ( + inputValue === 'Please proceed with your analysis.' && + !state.isConversationLoading && + !isSendingRef.current && + handleSendMessageRef.current + ) { + handleSendMessageRef.current(); + } + }, [inputValue, state.isConversationLoading]); + // Initialize conversation manager when entering this screen useEffect(() => { let mounted = true; @@ -686,6 +746,12 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con // Set immediate guard before any async work isSendingRef.current = true; + // Reset auto-continue flag if this is a user-initiated message (not auto-continue) + // This allows auto-continue to trigger again for the next exchange if needed + if (trimmedInput !== 'Please proceed with your analysis.') { + autoContinueTriggeredRef.current = false; + } + // Clear input immediately and reset textarea height setInputValue(''); if (inputRef.current) { @@ -846,6 +912,21 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con // Reset error retry count on success setErrorRetryCount(0); + + // Check if the AI said something that implies async work (e.g., "let me research this") + // The wizard can't support async operations - each message is a single turn. + // If we detect this pattern and haven't already auto-continued, schedule a follow-up. + const messageContent = sendResult.response.structured?.message || sendResult.response.rawText; + if ( + messageContent && + containsDeferredResponsePhrase(messageContent) && + !autoContinueTriggeredRef.current + ) { + console.log('[ConversationScreen] Detected deferred response phrase, scheduling auto-continue'); + autoContinueTriggeredRef.current = true; + // Set pending auto-continue - an effect will handle actually sending + setPendingAutoContinue('Please proceed with your analysis.'); + } } }, onError: (error) => { @@ -893,6 +974,11 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con setIsReadyToProceed, ]); + // Keep ref updated with current handleSendMessage for auto-continue effect + useEffect(() => { + handleSendMessageRef.current = handleSendMessage; + }, [handleSendMessage]); + /** * Auto-send initial message when continuing with existing docs * This triggers the AI to analyze the docs and provide a synopsis @@ -1367,6 +1453,22 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con borderColor: theme.colors.border, }} > + {/* "Your turn" indicator - shows when AI responded and waiting for user */} + {!state.isConversationLoading && + state.conversationHistory.length > 0 && + state.conversationHistory[state.conversationHistory.length - 1].role === 'assistant' && + state.confidenceLevel < READY_CONFIDENCE_THRESHOLD && ( +
+ + Your turn — continue the conversation +
+ )}