From b92fa04908c6aff9fac867ecb9990d3cf4c74616 Mon Sep 17 00:00:00 2001
From: Pedram Amini <pedram.amini@gmail.com>
Date: Tue, 6 Jan 2026 09:07:50 -0600
Subject: [PATCH] ## CHANGES
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Wizard now auto-continues when AI implies “let me research…” dead-ends 🧠
- Added robust deferred-response phrase detection to keep conversations flowing 🔍
- Introduced “Your turn” indicator when assistant awaits user input 💡
- Updated wizard system prompt to forbid deferred/async promises outright 🚫
- Prevented auto-continue infinite loops with a per-exchange trigger guard 🛡️
- Improved auto-continue UX with delayed send after UI updates ⏱️
- Added full test suite for deferred-response detection patterns ✅
- Strengthened NewInstanceModal tests with remote directory stat validation 📁
- NewInstanceModal tests now wait for debounced remote path validation ⏳
- Expanded icon mocking to include Loader2 for more complete test coverage 🎭
---
 .../components/FileExplorerPanel.test.tsx     |   2 +
 .../components/NewInstanceModal.test.tsx      |  13 ++
 .../Wizard/screens/ConversationScreen.test.ts | 176 ++++++++++++++++++
 src/prompts/wizard-system.md                  |  15 ++
 .../Wizard/screens/ConversationScreen.tsx     | 102 ++++++++++
 5 files changed, 308 insertions(+)
 create mode 100644 src/__tests__/renderer/components/Wizard/screens/ConversationScreen.test.ts
diff --git a/src/__tests__/renderer/components/FileExplorerPanel.test.tsx b/src/__tests__/renderer/components/FileExplorerPanel.test.tsx
index 11175e9c..ffe8cf59 100644
--- a/src/__tests__/renderer/components/FileExplorerPanel.test.tsx
+++ b/src/__tests__/renderer/components/FileExplorerPanel.test.tsx
@@ -48,6 +48,8 @@ vi.mock('lucide-react', () => ({
     <span data-testid="alert-triangle-icon" className={className} style={style}>⚠️</span>,
   X: ({ className, style }: { className?: string; style?: React.CSSProperties }) =>
     <span data-testid="x-icon" className={className} style={style}>✕</span>,
+  Loader2: ({ className, style }: { className?: string; style?: React.CSSProperties }) =>
+    <span data-testid="loader2-icon" className={className} style={style}>⏳</span>,
 }));
 
 // Mock @tanstack/react-virtual for virtualization
diff --git a/src/__tests__/renderer/components/NewInstanceModal.test.tsx b/src/__tests__/renderer/components/NewInstanceModal.test.tsx
index de44ea06..f7785edd 100644
--- a/src/__tests__/renderer/components/NewInstanceModal.test.tsx
+++ b/src/__tests__/renderer/components/NewInstanceModal.test.tsx
@@ -2310,6 +2310,14 @@ describe('NewInstanceModal', () => {
           enabled: true,
         }],
       });
+      // Mock fs.stat to return a valid directory for remote path validation
+      vi.mocked(window.maestro.fs.stat).mockResolvedValue({
+        size: 4096,
+        createdAt: '2024-01-01T00:00:00.000Z',
+        modifiedAt: '2024-01-15T12:30:00.000Z',
+        isDirectory: true,
+        isFile: false,
+      });
 
       render(
         <NewInstanceModal
@@ -2345,6 +2353,11 @@ describe('NewInstanceModal', () => {
       const dirInput = screen.getByPlaceholderText(/Enter remote path/i);
       fireEvent.change(dirInput, { target: { value: '/test/path' } });
 
+      // Wait for the remote path validation to complete (debounced 300ms)
+      await waitFor(() => {
+        expect(screen.getByText('Remote directory found')).toBeInTheDocument();
+      });
+
       const createButton = screen.getByText('Create Agent');
       await act(async () => {
         fireEvent.click(createButton);
diff --git a/src/__tests__/renderer/components/Wizard/screens/ConversationScreen.test.ts b/src/__tests__/renderer/components/Wizard/screens/ConversationScreen.test.ts
new file mode 100644
index 00000000..ef30e70a
--- /dev/null
+++ b/src/__tests__/renderer/components/Wizard/screens/ConversationScreen.test.ts
@@ -0,0 +1,176 @@
+/**
+ * ConversationScreen.test.ts
+ *
+ * Tests for the deferred response phrase detection logic used in auto-continue.
+ * The actual function is internal to ConversationScreen.tsx, so we replicate
+ * the patterns here for testing purposes.
+ */
+
+import { describe, it, expect } from 'vitest';
+
+/**
+ * Patterns that indicate the AI said it will do something asynchronously.
+ * Must be kept in sync with ConversationScreen.tsx
+ */
+const DEFERRED_RESPONSE_PATTERNS = [
+  /let me (?:research|investigate|look into|think about|analyze|examine|check|explore)/i,
+  /give me a (?:moment|minute|second)/i,
+  /i(?:'ll| will) (?:look into|research|investigate|get back|check)/i,
+  /(?:researching|investigating|looking into) (?:this|that|it)/i,
+  /let me (?:take a )?(?:closer )?look/i,
+];
+
+function containsDeferredResponsePhrase(message: string): boolean {
+  return DEFERRED_RESPONSE_PATTERNS.some((pattern) => pattern.test(message));
+}
+
+describe('ConversationScreen', () => {
+  describe('containsDeferredResponsePhrase', () => {
+    describe('should detect deferred response phrases', () => {
+      it('detects "let me research"', () => {
+        expect(containsDeferredResponsePhrase('Let me research this for you.')).toBe(true);
+        expect(containsDeferredResponsePhrase('let me research the options')).toBe(true);
+      });
+
+      it('detects "let me investigate"', () => {
+        expect(containsDeferredResponsePhrase('Let me investigate that further.')).toBe(true);
+      });
+
+      it('detects "let me look into"', () => {
+        expect(containsDeferredResponsePhrase('Let me look into this more.')).toBe(true);
+      });
+
+      it('detects "let me think about"', () => {
+        expect(containsDeferredResponsePhrase('Let me think about this more carefully.')).toBe(true);
+      });
+
+      it('detects "let me analyze"', () => {
+        expect(containsDeferredResponsePhrase('Let me analyze the requirements.')).toBe(true);
+      });
+
+      it('detects "let me examine"', () => {
+        expect(containsDeferredResponsePhrase('Let me examine the codebase.')).toBe(true);
+      });
+
+      it('detects "let me check"', () => {
+        expect(containsDeferredResponsePhrase('Let me check on that.')).toBe(true);
+      });
+
+      it('detects "let me explore"', () => {
+        expect(containsDeferredResponsePhrase('Let me explore the possibilities.')).toBe(true);
+      });
+
+      it('detects "give me a moment"', () => {
+        expect(containsDeferredResponsePhrase('Give me a moment to think.')).toBe(true);
+      });
+
+      it('detects "give me a minute"', () => {
+        expect(containsDeferredResponsePhrase('Give me a minute to process this.')).toBe(true);
+      });
+
+      it('detects "give me a second"', () => {
+        expect(containsDeferredResponsePhrase('Give me a second...')).toBe(true);
+      });
+
+      it('detects "I\'ll look into"', () => {
+        expect(containsDeferredResponsePhrase("I'll look into that for you.")).toBe(true);
+      });
+
+      it('detects "I will research"', () => {
+        expect(containsDeferredResponsePhrase('I will research the best approach.')).toBe(true);
+      });
+
+      it('detects "I\'ll get back"', () => {
+        expect(containsDeferredResponsePhrase("I'll get back to you on that.")).toBe(true);
+      });
+
+      it('detects "researching this"', () => {
+        expect(containsDeferredResponsePhrase('Researching this now...')).toBe(true);
+      });
+
+      it('detects "investigating that"', () => {
+        expect(containsDeferredResponsePhrase('Investigating that issue.')).toBe(true);
+      });
+
+      it('detects "looking into it"', () => {
+        expect(containsDeferredResponsePhrase('Looking into it now.')).toBe(true);
+      });
+
+      it('detects "let me take a look"', () => {
+        expect(containsDeferredResponsePhrase('Let me take a look at the files.')).toBe(true);
+      });
+
+      it('detects "let me take a closer look"', () => {
+        expect(containsDeferredResponsePhrase('Let me take a closer look at this.')).toBe(true);
+      });
+
+      it('detects phrases in longer messages', () => {
+        const longMessage = `Great questions! Let me do some research on the Netflix ecosystem and content metadata APIs to give you solid recommendations.
+
+I'll investigate:
+1. Netflix API/Integration options
+2. Content metadata APIs
+3. Parental control mechanisms
+
+Give me a moment to research this...`;
+        expect(containsDeferredResponsePhrase(longMessage)).toBe(true);
+      });
+    });
+
+    describe('should NOT detect normal conversation phrases', () => {
+      it('does not match "I can help you with that"', () => {
+        expect(containsDeferredResponsePhrase('I can help you with that.')).toBe(false);
+      });
+
+      it('does not match questions', () => {
+        expect(containsDeferredResponsePhrase('What type of project is this?')).toBe(false);
+      });
+
+      it('does not match analysis statements', () => {
+        expect(containsDeferredResponsePhrase('Based on what you described, this sounds like a web app.')).toBe(false);
+      });
+
+      it('does not match "let me know"', () => {
+        expect(containsDeferredResponsePhrase('Let me know if you have questions.')).toBe(false);
+      });
+
+      it('does not match "let me explain"', () => {
+        expect(containsDeferredResponsePhrase('Let me explain how this works.')).toBe(false);
+      });
+
+      it('does not match "let me summarize"', () => {
+        expect(containsDeferredResponsePhrase('Let me summarize what I understand.')).toBe(false);
+      });
+
+      it('does not match past tense "researched"', () => {
+        expect(containsDeferredResponsePhrase('I researched this topic yesterday.')).toBe(false);
+      });
+
+      it('does not match "looking forward"', () => {
+        expect(containsDeferredResponsePhrase('Looking forward to working on this!')).toBe(false);
+      });
+
+      it('does not match confidence statements', () => {
+        expect(containsDeferredResponsePhrase("I'm ready to create your Playbook.")).toBe(false);
+      });
+
+      it('does not match empty string', () => {
+        expect(containsDeferredResponsePhrase('')).toBe(false);
+      });
+    });
+
+    describe('case insensitivity', () => {
+      it('matches uppercase', () => {
+        expect(containsDeferredResponsePhrase('LET ME RESEARCH THIS')).toBe(true);
+      });
+
+      it('matches mixed case', () => {
+        expect(containsDeferredResponsePhrase('Let Me Research This')).toBe(true);
+      });
+
+      it('matches lowercase', () => {
+        expect(containsDeferredResponsePhrase('let me research this')).toBe(true);
+      });
+    });
+  });
+});
diff --git a/src/prompts/wizard-system.md b/src/prompts/wizard-system.md
index 2818fbea..af3e2e67 100644
--- a/src/prompts/wizard-system.md
+++ b/src/prompts/wizard-system.md
@@ -103,6 +103,21 @@ Through a brief, focused conversation:
 - Be encouraging and helpful in tone
 - Once you have enough clarity, indicate you're ready to proceed
 
+### Critical: No Deferred Responses
+
+**NEVER** say things that imply you'll do something after responding:
+- ❌ "Let me research this..."
+- ❌ "Give me a moment to investigate..."
+- ❌ "I'll look into that and get back to you..."
+- ❌ "Let me think about this more..."
+
+Each response is a **single turn** - you cannot continue working after responding. Instead:
+- ✅ Provide your best analysis immediately, with caveats if uncertain
+- ✅ Ask clarifying questions if you need more information
+- ✅ Say "Based on what I can see, [your analysis]..." rather than promising future research
+
+If you need information you don't have, ask the user for it directly instead of implying you'll go find it.
+
 ## Response Format
 
 You MUST respond with valid JSON in this exact format:
diff --git a/src/renderer/components/Wizard/screens/ConversationScreen.tsx b/src/renderer/components/Wizard/screens/ConversationScreen.tsx
index cddc5930..d940cb95 100644
--- a/src/renderer/components/Wizard/screens/ConversationScreen.tsx
+++ b/src/renderer/components/Wizard/screens/ConversationScreen.tsx
@@ -67,6 +67,28 @@ function formatTimestamp(timestamp: number): string {
   return date.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
 }
 
+/**
+ * Patterns that indicate the AI said it will do something asynchronously.
+ * This is a UX problem because the wizard can't actually support async operations -
+ * each message is a single turn. If the AI says "let me research this", the user
+ * is left waiting with no indication that they need to respond.
+ */
+const DEFERRED_RESPONSE_PATTERNS = [
+  /let me (?:research|investigate|look into|think about|analyze|examine|check|explore)/i,
+  /give me a (?:moment|minute|second)/i,
+  /i(?:'ll| will) (?:look into|research|investigate|get back|check)/i,
+  /(?:researching|investigating|looking into) (?:this|that|it)/i,
+  /let me (?:take a )?(?:closer )?look/i,
+];
+
+/**
+ * Check if a message contains phrases that imply deferred/async work.
+ * The wizard can't actually support this - we need to auto-continue.
+ */
+function containsDeferredResponsePhrase(message: string): boolean {
+  return DEFERRED_RESPONSE_PATTERNS.some((pattern) => pattern.test(message));
+}
+
 /**
  * ConfidenceMeter - Horizontal progress bar with gradient fill
  */
@@ -525,6 +547,9 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
   const [announcement, setAnnouncement] = useState('');
   const [announcementKey, setAnnouncementKey] = useState(0);
 
+  // Pending auto-continue message (when AI says "let me research this")
+  const [pendingAutoContinue, setPendingAutoContinue] = useState<string | null>(null);
+
   // Track previous ready state to avoid duplicate announcements
   const prevReadyRef = useRef(state.isReadyToProceed);
 
@@ -544,6 +569,9 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
   const inputRef = useRef<HTMLTextAreaElement>(null);
   // Immediate send guard to prevent race conditions from rapid clicking
   const isSendingRef = useRef(false);
+  // Track if we've already triggered auto-continue for the current exchange
+  // This prevents infinite loops if the AI keeps saying "let me research"
+  const autoContinueTriggeredRef = useRef(false);
 
   // Scroll to bottom when messages change
   const scrollToBottom = useCallback(() => {
@@ -559,6 +587,38 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
     inputRef.current?.focus();
   }, []);
 
+  // Handle pending auto-continue (when AI says "let me research this")
+  // We set the input and call handleSendMessage after a delay
+  useEffect(() => {
+    if (pendingAutoContinue && !state.isConversationLoading && !isSendingRef.current) {
+      const message = pendingAutoContinue;
+      setPendingAutoContinue(null);
+
+      // Small delay to let the UI update and show the AI's response
+      const timeoutId = setTimeout(() => {
+        // Set the input value first so handleSendMessage picks it up
+        setInputValue(message);
+      }, 800);
+
+      return () => clearTimeout(timeoutId);
+    }
+  }, [pendingAutoContinue, state.isConversationLoading]);
+
+  // Store handleSendMessage in a ref so we can call it from the effect
+  const handleSendMessageRef = useRef<(() => void) | null>(null);
+
+  // Effect to trigger send when input is set to the auto-continue message
+  useEffect(() => {
+    if (
+      inputValue === 'Please proceed with your analysis.' &&
+      !state.isConversationLoading &&
+      !isSendingRef.current &&
+      handleSendMessageRef.current
+    ) {
+      handleSendMessageRef.current();
+    }
+  }, [inputValue, state.isConversationLoading]);
+
   // Initialize conversation manager when entering this screen
   useEffect(() => {
     let mounted = true;
@@ -686,6 +746,12 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
     // Set immediate guard before any async work
     isSendingRef.current = true;
 
+    // Reset auto-continue flag if this is a user-initiated message (not auto-continue)
+    // This allows auto-continue to trigger again for the next exchange if needed
+    if (trimmedInput !== 'Please proceed with your analysis.') {
+      autoContinueTriggeredRef.current = false;
+    }
+
     // Clear input immediately and reset textarea height
     setInputValue('');
     if (inputRef.current) {
@@ -846,6 +912,21 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
 
               // Reset error retry count on success
               setErrorRetryCount(0);
+
+              // Check if the AI said something that implies async work (e.g., "let me research this")
+              // The wizard can't support async operations - each message is a single turn.
+              // If we detect this pattern and haven't already auto-continued, schedule a follow-up.
+              const messageContent = sendResult.response.structured?.message || sendResult.response.rawText;
+              if (
+                messageContent &&
+                containsDeferredResponsePhrase(messageContent) &&
+                !autoContinueTriggeredRef.current
+              ) {
+                console.log('[ConversationScreen] Detected deferred response phrase, scheduling auto-continue');
+                autoContinueTriggeredRef.current = true;
+                // Set pending auto-continue - an effect will handle actually sending
+                setPendingAutoContinue('Please proceed with your analysis.');
+              }
             }
           },
           onError: (error) => {
@@ -893,6 +974,11 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
     setIsReadyToProceed,
   ]);
 
+  // Keep ref updated with current handleSendMessage for auto-continue effect
+  useEffect(() => {
+    handleSendMessageRef.current = handleSendMessage;
+  }, [handleSendMessage]);
+
   /**
    * Auto-send initial message when continuing with existing docs
    * This triggers the AI to analyze the docs and provide a synopsis
@@ -1367,6 +1453,22 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
           borderColor: theme.colors.border,
         }}
       >
+        {/* "Your turn" indicator - shows when AI responded and waiting for user */}
+        {!state.isConversationLoading &&
+          state.conversationHistory.length > 0 &&
+          state.conversationHistory[state.conversationHistory.length - 1].role === 'assistant' &&
+          state.confidenceLevel < READY_CONFIDENCE_THRESHOLD && (
+            <div
+              className="flex items-center gap-2 mb-2 text-xs"
+              style={{ color: theme.colors.accent }}
+            >
+              <span
+                className="w-2 h-2 rounded-full animate-pulse"
+                style={{ backgroundColor: theme.colors.accent }}
+              />
+              <span>Your turn — continue the conversation</span>
+            </div>
+          )}
         <div className="flex gap-3">
           <div className="flex-1 relative flex items-center">
             <textarea