## CHANGES

- Wizard now auto-continues when AI implies “let me research…” dead-ends 🧠 - Added robust deferred-response phrase detection to keep conversations flowing 🔍 - Introduced “Your turn” indicator when assistant awaits user input 💡 - Updated wizard system prompt to forbid deferred/async promises outright 🚫 - Prevented auto-continue infinite loops with a per-exchange trigger guard 🛡️ - Improved auto-continue UX with delayed send after UI updates ⏱️ - Added full test suite for deferred-response detection patterns ✅ - Strengthened NewInstanceModal tests with remote directory stat validation 📁 - NewInstanceModal tests now wait for debounced remote path validation ⏳ - Expanded icon mocking to include Loader2 for more complete test coverage 🎭
2026-03-10 08:31:19 +00:00 · 2026-01-06 09:07:50 -06:00
parent 7db629f8aa
commit b92fa04908
5 changed files with 308 additions and 0 deletions
--- a/src/tests/renderer/components/FileExplorerPanel.test.tsx
+++ b/src/tests/renderer/components/FileExplorerPanel.test.tsx
@@ -48,6 +48,8 @@ vi.mock('lucide-react', () => ({
    <span data-testid="alert-triangle-icon" className={className} style={style}>⚠️</span>,
  X: ({ className, style }: { className?: string; style?: React.CSSProperties }) =>
    <span data-testid="x-icon" className={className} style={style}>✕</span>,
+  Loader2: ({ className, style }: { className?: string; style?: React.CSSProperties }) =>
+    <span data-testid="loader2-icon" className={className} style={style}>⏳</span>,
 }));

 // Mock @tanstack/react-virtual for virtualization
--- a/src/tests/renderer/components/NewInstanceModal.test.tsx
+++ b/src/tests/renderer/components/NewInstanceModal.test.tsx
@@ -2310,6 +2310,14 @@ describe('NewInstanceModal', () => {
          enabled: true,
        }],
      });
+      // Mock fs.stat to return a valid directory for remote path validation
+      vi.mocked(window.maestro.fs.stat).mockResolvedValue({
+        size: 4096,
+        createdAt: '2024-01-01T00:00:00.000Z',
+        modifiedAt: '2024-01-15T12:30:00.000Z',
+        isDirectory: true,
+        isFile: false,
+      });

      render(
        <NewInstanceModal
@@ -2345,6 +2353,11 @@ describe('NewInstanceModal', () => {
      const dirInput = screen.getByPlaceholderText(/Enter remote path/i);
      fireEvent.change(dirInput, { target: { value: '/test/path' } });

+      // Wait for the remote path validation to complete (debounced 300ms)
+      await waitFor(() => {
+        expect(screen.getByText('Remote directory found')).toBeInTheDocument();
+      });
+
      const createButton = screen.getByText('Create Agent');
      await act(async () => {
        fireEvent.click(createButton);
--- a/src/tests/renderer/components/Wizard/screens/ConversationScreen.test.ts
+++ b/src/tests/renderer/components/Wizard/screens/ConversationScreen.test.ts
@@ -0,0 +1,176 @@
+/**
+ * ConversationScreen.test.ts
+ *
+ * Tests for the deferred response phrase detection logic used in auto-continue.
+ * The actual function is internal to ConversationScreen.tsx, so we replicate
+ * the patterns here for testing purposes.
+ */
+
+import { describe, it, expect } from 'vitest';
+
+/**
+ * Patterns that indicate the AI said it will do something asynchronously.
+ * Must be kept in sync with ConversationScreen.tsx
+ */
+const DEFERRED_RESPONSE_PATTERNS = [
+  /let me (?:research|investigate|look into|think about|analyze|examine|check|explore)/i,
+  /give me a (?:moment|minute|second)/i,
+  /i(?:'ll| will) (?:look into|research|investigate|get back|check)/i,
+  /(?:researching|investigating|looking into) (?:this|that|it)/i,
+  /let me (?:take a )?(?:closer )?look/i,
+];
+
+function containsDeferredResponsePhrase(message: string): boolean {
+  return DEFERRED_RESPONSE_PATTERNS.some((pattern) => pattern.test(message));
+}
+
+describe('ConversationScreen', () => {
+  describe('containsDeferredResponsePhrase', () => {
+    describe('should detect deferred response phrases', () => {
+      it('detects "let me research"', () => {
+        expect(containsDeferredResponsePhrase('Let me research this for you.')).toBe(true);
+        expect(containsDeferredResponsePhrase('let me research the options')).toBe(true);
+      });
+
+      it('detects "let me investigate"', () => {
+        expect(containsDeferredResponsePhrase('Let me investigate that further.')).toBe(true);
+      });
+
+      it('detects "let me look into"', () => {
+        expect(containsDeferredResponsePhrase('Let me look into this more.')).toBe(true);
+      });
+
+      it('detects "let me think about"', () => {
+        expect(containsDeferredResponsePhrase('Let me think about this more carefully.')).toBe(true);
+      });
+
+      it('detects "let me analyze"', () => {
+        expect(containsDeferredResponsePhrase('Let me analyze the requirements.')).toBe(true);
+      });
+
+      it('detects "let me examine"', () => {
+        expect(containsDeferredResponsePhrase('Let me examine the codebase.')).toBe(true);
+      });
+
+      it('detects "let me check"', () => {
+        expect(containsDeferredResponsePhrase('Let me check on that.')).toBe(true);
+      });
+
+      it('detects "let me explore"', () => {
+        expect(containsDeferredResponsePhrase('Let me explore the possibilities.')).toBe(true);
+      });
+
+      it('detects "give me a moment"', () => {
+        expect(containsDeferredResponsePhrase('Give me a moment to think.')).toBe(true);
+      });
+
+      it('detects "give me a minute"', () => {
+        expect(containsDeferredResponsePhrase('Give me a minute to process this.')).toBe(true);
+      });
+
+      it('detects "give me a second"', () => {
+        expect(containsDeferredResponsePhrase('Give me a second...')).toBe(true);
+      });
+
+      it('detects "I\'ll look into"', () => {
+        expect(containsDeferredResponsePhrase("I'll look into that for you.")).toBe(true);
+      });
+
+      it('detects "I will research"', () => {
+        expect(containsDeferredResponsePhrase('I will research the best approach.')).toBe(true);
+      });
+
+      it('detects "I\'ll get back"', () => {
+        expect(containsDeferredResponsePhrase("I'll get back to you on that.")).toBe(true);
+      });
+
+      it('detects "researching this"', () => {
+        expect(containsDeferredResponsePhrase('Researching this now...')).toBe(true);
+      });
+
+      it('detects "investigating that"', () => {
+        expect(containsDeferredResponsePhrase('Investigating that issue.')).toBe(true);
+      });
+
+      it('detects "looking into it"', () => {
+        expect(containsDeferredResponsePhrase('Looking into it now.')).toBe(true);
+      });
+
+      it('detects "let me take a look"', () => {
+        expect(containsDeferredResponsePhrase('Let me take a look at the files.')).toBe(true);
+      });
+
+      it('detects "let me take a closer look"', () => {
+        expect(containsDeferredResponsePhrase('Let me take a closer look at this.')).toBe(true);
+      });
+
+      it('detects phrases in longer messages', () => {
+        const longMessage = `Great questions! Let me do some research on the Netflix ecosystem and content metadata APIs to give you solid recommendations.
+
+I'll investigate:
+1. Netflix API/Integration options
+2. Content metadata APIs
+3. Parental control mechanisms
+
+Give me a moment to research this...`;
+        expect(containsDeferredResponsePhrase(longMessage)).toBe(true);
+      });
+    });
+
+    describe('should NOT detect normal conversation phrases', () => {
+      it('does not match "I can help you with that"', () => {
+        expect(containsDeferredResponsePhrase('I can help you with that.')).toBe(false);
+      });
+
+      it('does not match questions', () => {
+        expect(containsDeferredResponsePhrase('What type of project is this?')).toBe(false);
+      });
+
+      it('does not match analysis statements', () => {
+        expect(containsDeferredResponsePhrase('Based on what you described, this sounds like a web app.')).toBe(false);
+      });
+
+      it('does not match "let me know"', () => {
+        expect(containsDeferredResponsePhrase('Let me know if you have questions.')).toBe(false);
+      });
+
+      it('does not match "let me explain"', () => {
+        expect(containsDeferredResponsePhrase('Let me explain how this works.')).toBe(false);
+      });
+
+      it('does not match "let me summarize"', () => {
+        expect(containsDeferredResponsePhrase('Let me summarize what I understand.')).toBe(false);
+      });
+
+      it('does not match past tense "researched"', () => {
+        expect(containsDeferredResponsePhrase('I researched this topic yesterday.')).toBe(false);
+      });
+
+      it('does not match "looking forward"', () => {
+        expect(containsDeferredResponsePhrase('Looking forward to working on this!')).toBe(false);
+      });
+
+      it('does not match confidence statements', () => {
+        expect(containsDeferredResponsePhrase("I'm ready to create your Playbook.")).toBe(false);
+      });
+
+      it('does not match empty string', () => {
+        expect(containsDeferredResponsePhrase('')).toBe(false);
+      });
+    });
+
+    describe('case insensitivity', () => {
+      it('matches uppercase', () => {
+        expect(containsDeferredResponsePhrase('LET ME RESEARCH THIS')).toBe(true);
+      });
+
+      it('matches mixed case', () => {
+        expect(containsDeferredResponsePhrase('Let Me Research This')).toBe(true);
+      });
+
+      it('matches lowercase', () => {
+        expect(containsDeferredResponsePhrase('let me research this')).toBe(true);
+      });
+    });
+  });
+});
--- a/src/prompts/wizard-system.md
+++ b/src/prompts/wizard-system.md
@@ -103,6 +103,21 @@ Through a brief, focused conversation:
 - Be encouraging and helpful in tone
 - Once you have enough clarity, indicate you're ready to proceed

+### Critical: No Deferred Responses
+
+**NEVER** say things that imply you'll do something after responding:
+- ❌ "Let me research this..."
+- ❌ "Give me a moment to investigate..."
+- ❌ "I'll look into that and get back to you..."
+- ❌ "Let me think about this more..."
+
+Each response is a **single turn** - you cannot continue working after responding. Instead:
+- ✅ Provide your best analysis immediately, with caveats if uncertain
+- ✅ Ask clarifying questions if you need more information
+- ✅ Say "Based on what I can see, [your analysis]..." rather than promising future research
+
+If you need information you don't have, ask the user for it directly instead of implying you'll go find it.
+
 ## Response Format

 You MUST respond with valid JSON in this exact format:
--- a/src/renderer/components/Wizard/screens/ConversationScreen.tsx
+++ b/src/renderer/components/Wizard/screens/ConversationScreen.tsx
@@ -67,6 +67,28 @@ function formatTimestamp(timestamp: number): string {
  return date.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
 }

+/**
+ * Patterns that indicate the AI said it will do something asynchronously.
+ * This is a UX problem because the wizard can't actually support async operations -
+ * each message is a single turn. If the AI says "let me research this", the user
+ * is left waiting with no indication that they need to respond.
+ */
+const DEFERRED_RESPONSE_PATTERNS = [
+  /let me (?:research|investigate|look into|think about|analyze|examine|check|explore)/i,
+  /give me a (?:moment|minute|second)/i,
+  /i(?:'ll| will) (?:look into|research|investigate|get back|check)/i,
+  /(?:researching|investigating|looking into) (?:this|that|it)/i,
+  /let me (?:take a )?(?:closer )?look/i,
+];
+
+/**
+ * Check if a message contains phrases that imply deferred/async work.
+ * The wizard can't actually support this - we need to auto-continue.
+ */
+function containsDeferredResponsePhrase(message: string): boolean {
+  return DEFERRED_RESPONSE_PATTERNS.some((pattern) => pattern.test(message));
+}
+
 /**
 * ConfidenceMeter - Horizontal progress bar with gradient fill
 */
@@ -525,6 +547,9 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
  const [announcement, setAnnouncement] = useState('');
  const [announcementKey, setAnnouncementKey] = useState(0);

+  // Pending auto-continue message (when AI says "let me research this")
+  const [pendingAutoContinue, setPendingAutoContinue] = useState<string | null>(null);
+
  // Track previous ready state to avoid duplicate announcements
  const prevReadyRef = useRef(state.isReadyToProceed);

@@ -544,6 +569,9 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
  const inputRef = useRef<HTMLTextAreaElement>(null);
  // Immediate send guard to prevent race conditions from rapid clicking
  const isSendingRef = useRef(false);
+  // Track if we've already triggered auto-continue for the current exchange
+  // This prevents infinite loops if the AI keeps saying "let me research"
+  const autoContinueTriggeredRef = useRef(false);

  // Scroll to bottom when messages change
  const scrollToBottom = useCallback(() => {
@@ -559,6 +587,38 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
    inputRef.current?.focus();
  }, []);

+  // Handle pending auto-continue (when AI says "let me research this")
+  // We set the input and call handleSendMessage after a delay
+  useEffect(() => {
+    if (pendingAutoContinue && !state.isConversationLoading && !isSendingRef.current) {
+      const message = pendingAutoContinue;
+      setPendingAutoContinue(null);
+
+      // Small delay to let the UI update and show the AI's response
+      const timeoutId = setTimeout(() => {
+        // Set the input value first so handleSendMessage picks it up
+        setInputValue(message);
+      }, 800);
+
+      return () => clearTimeout(timeoutId);
+    }
+  }, [pendingAutoContinue, state.isConversationLoading]);
+
+  // Store handleSendMessage in a ref so we can call it from the effect
+  const handleSendMessageRef = useRef<(() => void) | null>(null);
+
+  // Effect to trigger send when input is set to the auto-continue message
+  useEffect(() => {
+    if (
+      inputValue === 'Please proceed with your analysis.' &&
+      !state.isConversationLoading &&
+      !isSendingRef.current &&
+      handleSendMessageRef.current
+    ) {
+      handleSendMessageRef.current();
+    }
+  }, [inputValue, state.isConversationLoading]);
+
  // Initialize conversation manager when entering this screen
  useEffect(() => {
    let mounted = true;
@@ -686,6 +746,12 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
    // Set immediate guard before any async work
    isSendingRef.current = true;

+    // Reset auto-continue flag if this is a user-initiated message (not auto-continue)
+    // This allows auto-continue to trigger again for the next exchange if needed
+    if (trimmedInput !== 'Please proceed with your analysis.') {
+      autoContinueTriggeredRef.current = false;
+    }
+
    // Clear input immediately and reset textarea height
    setInputValue('');
    if (inputRef.current) {
@@ -846,6 +912,21 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con

              // Reset error retry count on success
              setErrorRetryCount(0);
+
+              // Check if the AI said something that implies async work (e.g., "let me research this")
+              // The wizard can't support async operations - each message is a single turn.
+              // If we detect this pattern and haven't already auto-continued, schedule a follow-up.
+              const messageContent = sendResult.response.structured?.message || sendResult.response.rawText;
+              if (
+                messageContent &&
+                containsDeferredResponsePhrase(messageContent) &&
+                !autoContinueTriggeredRef.current
+              ) {
+                console.log('[ConversationScreen] Detected deferred response phrase, scheduling auto-continue');
+                autoContinueTriggeredRef.current = true;
+                // Set pending auto-continue - an effect will handle actually sending
+                setPendingAutoContinue('Please proceed with your analysis.');
+              }
            }
          },
          onError: (error) => {
@@ -893,6 +974,11 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
    setIsReadyToProceed,
  ]);

+  // Keep ref updated with current handleSendMessage for auto-continue effect
+  useEffect(() => {
+    handleSendMessageRef.current = handleSendMessage;
+  }, [handleSendMessage]);
+
  /**
   * Auto-send initial message when continuing with existing docs
   * This triggers the AI to analyze the docs and provide a synopsis
@@ -1367,6 +1453,22 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
          borderColor: theme.colors.border,
        }}
      >
+        {/* "Your turn" indicator - shows when AI responded and waiting for user */}
+        {!state.isConversationLoading &&
+          state.conversationHistory.length > 0 &&
+          state.conversationHistory[state.conversationHistory.length - 1].role === 'assistant' &&
+          state.confidenceLevel < READY_CONFIDENCE_THRESHOLD && (
+            <div
+              className="flex items-center gap-2 mb-2 text-xs"
+              style={{ color: theme.colors.accent }}
+            >
+              <span
+                className="w-2 h-2 rounded-full animate-pulse"
+                style={{ backgroundColor: theme.colors.accent }}
+              />
+              <span>Your turn — continue the conversation</span>
+            </div>
+          )}
        <div className="flex gap-3">
          <div className="flex-1 relative flex items-center">
            <textarea