## CHANGES

- Wizard now auto-continues when AI implies “let me research…” dead-ends 🧠
- Added robust deferred-response phrase detection to keep conversations flowing 🔍
- Introduced “Your turn” indicator when assistant awaits user input 💡
- Updated wizard system prompt to forbid deferred/async promises outright 🚫
- Prevented auto-continue infinite loops with a per-exchange trigger guard 🛡️
- Improved auto-continue UX with delayed send after UI updates ⏱️
- Added full test suite for deferred-response detection patterns 
- Strengthened NewInstanceModal tests with remote directory stat validation 📁
- NewInstanceModal tests now wait for debounced remote path validation 
- Expanded icon mocking to include Loader2 for more complete test coverage 🎭
This commit is contained in:
Pedram Amini
2026-01-06 09:07:50 -06:00
parent 7db629f8aa
commit b92fa04908
5 changed files with 308 additions and 0 deletions

View File

@@ -48,6 +48,8 @@ vi.mock('lucide-react', () => ({
<span data-testid="alert-triangle-icon" className={className} style={style}></span>,
X: ({ className, style }: { className?: string; style?: React.CSSProperties }) =>
<span data-testid="x-icon" className={className} style={style}></span>,
Loader2: ({ className, style }: { className?: string; style?: React.CSSProperties }) =>
<span data-testid="loader2-icon" className={className} style={style}></span>,
}));
// Mock @tanstack/react-virtual for virtualization

View File

@@ -2310,6 +2310,14 @@ describe('NewInstanceModal', () => {
enabled: true,
}],
});
// Mock fs.stat to return a valid directory for remote path validation
vi.mocked(window.maestro.fs.stat).mockResolvedValue({
size: 4096,
createdAt: '2024-01-01T00:00:00.000Z',
modifiedAt: '2024-01-15T12:30:00.000Z',
isDirectory: true,
isFile: false,
});
render(
<NewInstanceModal
@@ -2345,6 +2353,11 @@ describe('NewInstanceModal', () => {
const dirInput = screen.getByPlaceholderText(/Enter remote path/i);
fireEvent.change(dirInput, { target: { value: '/test/path' } });
// Wait for the remote path validation to complete (debounced 300ms)
await waitFor(() => {
expect(screen.getByText('Remote directory found')).toBeInTheDocument();
});
const createButton = screen.getByText('Create Agent');
await act(async () => {
fireEvent.click(createButton);

View File

@@ -0,0 +1,176 @@
/**
* ConversationScreen.test.ts
*
* Tests for the deferred response phrase detection logic used in auto-continue.
* The actual function is internal to ConversationScreen.tsx, so we replicate
* the patterns here for testing purposes.
*/
import { describe, it, expect } from 'vitest';
/**
* Patterns that indicate the AI said it will do something asynchronously.
* Must be kept in sync with ConversationScreen.tsx
*/
const DEFERRED_RESPONSE_PATTERNS = [
/let me (?:research|investigate|look into|think about|analyze|examine|check|explore)/i,
/give me a (?:moment|minute|second)/i,
/i(?:'ll| will) (?:look into|research|investigate|get back|check)/i,
/(?:researching|investigating|looking into) (?:this|that|it)/i,
/let me (?:take a )?(?:closer )?look/i,
];
function containsDeferredResponsePhrase(message: string): boolean {
return DEFERRED_RESPONSE_PATTERNS.some((pattern) => pattern.test(message));
}
describe('ConversationScreen', () => {
describe('containsDeferredResponsePhrase', () => {
describe('should detect deferred response phrases', () => {
it('detects "let me research"', () => {
expect(containsDeferredResponsePhrase('Let me research this for you.')).toBe(true);
expect(containsDeferredResponsePhrase('let me research the options')).toBe(true);
});
it('detects "let me investigate"', () => {
expect(containsDeferredResponsePhrase('Let me investigate that further.')).toBe(true);
});
it('detects "let me look into"', () => {
expect(containsDeferredResponsePhrase('Let me look into this more.')).toBe(true);
});
it('detects "let me think about"', () => {
expect(containsDeferredResponsePhrase('Let me think about this more carefully.')).toBe(true);
});
it('detects "let me analyze"', () => {
expect(containsDeferredResponsePhrase('Let me analyze the requirements.')).toBe(true);
});
it('detects "let me examine"', () => {
expect(containsDeferredResponsePhrase('Let me examine the codebase.')).toBe(true);
});
it('detects "let me check"', () => {
expect(containsDeferredResponsePhrase('Let me check on that.')).toBe(true);
});
it('detects "let me explore"', () => {
expect(containsDeferredResponsePhrase('Let me explore the possibilities.')).toBe(true);
});
it('detects "give me a moment"', () => {
expect(containsDeferredResponsePhrase('Give me a moment to think.')).toBe(true);
});
it('detects "give me a minute"', () => {
expect(containsDeferredResponsePhrase('Give me a minute to process this.')).toBe(true);
});
it('detects "give me a second"', () => {
expect(containsDeferredResponsePhrase('Give me a second...')).toBe(true);
});
it('detects "I\'ll look into"', () => {
expect(containsDeferredResponsePhrase("I'll look into that for you.")).toBe(true);
});
it('detects "I will research"', () => {
expect(containsDeferredResponsePhrase('I will research the best approach.')).toBe(true);
});
it('detects "I\'ll get back"', () => {
expect(containsDeferredResponsePhrase("I'll get back to you on that.")).toBe(true);
});
it('detects "researching this"', () => {
expect(containsDeferredResponsePhrase('Researching this now...')).toBe(true);
});
it('detects "investigating that"', () => {
expect(containsDeferredResponsePhrase('Investigating that issue.')).toBe(true);
});
it('detects "looking into it"', () => {
expect(containsDeferredResponsePhrase('Looking into it now.')).toBe(true);
});
it('detects "let me take a look"', () => {
expect(containsDeferredResponsePhrase('Let me take a look at the files.')).toBe(true);
});
it('detects "let me take a closer look"', () => {
expect(containsDeferredResponsePhrase('Let me take a closer look at this.')).toBe(true);
});
it('detects phrases in longer messages', () => {
const longMessage = `Great questions! Let me do some research on the Netflix ecosystem and content metadata APIs to give you solid recommendations.
I'll investigate:
1. Netflix API/Integration options
2. Content metadata APIs
3. Parental control mechanisms
Give me a moment to research this...`;
expect(containsDeferredResponsePhrase(longMessage)).toBe(true);
});
});
describe('should NOT detect normal conversation phrases', () => {
it('does not match "I can help you with that"', () => {
expect(containsDeferredResponsePhrase('I can help you with that.')).toBe(false);
});
it('does not match questions', () => {
expect(containsDeferredResponsePhrase('What type of project is this?')).toBe(false);
});
it('does not match analysis statements', () => {
expect(containsDeferredResponsePhrase('Based on what you described, this sounds like a web app.')).toBe(false);
});
it('does not match "let me know"', () => {
expect(containsDeferredResponsePhrase('Let me know if you have questions.')).toBe(false);
});
it('does not match "let me explain"', () => {
expect(containsDeferredResponsePhrase('Let me explain how this works.')).toBe(false);
});
it('does not match "let me summarize"', () => {
expect(containsDeferredResponsePhrase('Let me summarize what I understand.')).toBe(false);
});
it('does not match past tense "researched"', () => {
expect(containsDeferredResponsePhrase('I researched this topic yesterday.')).toBe(false);
});
it('does not match "looking forward"', () => {
expect(containsDeferredResponsePhrase('Looking forward to working on this!')).toBe(false);
});
it('does not match confidence statements', () => {
expect(containsDeferredResponsePhrase("I'm ready to create your Playbook.")).toBe(false);
});
it('does not match empty string', () => {
expect(containsDeferredResponsePhrase('')).toBe(false);
});
});
describe('case insensitivity', () => {
it('matches uppercase', () => {
expect(containsDeferredResponsePhrase('LET ME RESEARCH THIS')).toBe(true);
});
it('matches mixed case', () => {
expect(containsDeferredResponsePhrase('Let Me Research This')).toBe(true);
});
it('matches lowercase', () => {
expect(containsDeferredResponsePhrase('let me research this')).toBe(true);
});
});
});
});

View File

@@ -103,6 +103,21 @@ Through a brief, focused conversation:
- Be encouraging and helpful in tone
- Once you have enough clarity, indicate you're ready to proceed
### Critical: No Deferred Responses
**NEVER** say things that imply you'll do something after responding:
- ❌ "Let me research this..."
- ❌ "Give me a moment to investigate..."
- ❌ "I'll look into that and get back to you..."
- ❌ "Let me think about this more..."
Each response is a **single turn** - you cannot continue working after responding. Instead:
- ✅ Provide your best analysis immediately, with caveats if uncertain
- ✅ Ask clarifying questions if you need more information
- ✅ Say "Based on what I can see, [your analysis]..." rather than promising future research
If you need information you don't have, ask the user for it directly instead of implying you'll go find it.
## Response Format
You MUST respond with valid JSON in this exact format:

View File

@@ -67,6 +67,28 @@ function formatTimestamp(timestamp: number): string {
return date.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
}
/**
* Patterns that indicate the AI said it will do something asynchronously.
* This is a UX problem because the wizard can't actually support async operations -
* each message is a single turn. If the AI says "let me research this", the user
* is left waiting with no indication that they need to respond.
*/
const DEFERRED_RESPONSE_PATTERNS = [
/let me (?:research|investigate|look into|think about|analyze|examine|check|explore)/i,
/give me a (?:moment|minute|second)/i,
/i(?:'ll| will) (?:look into|research|investigate|get back|check)/i,
/(?:researching|investigating|looking into) (?:this|that|it)/i,
/let me (?:take a )?(?:closer )?look/i,
];
/**
* Check if a message contains phrases that imply deferred/async work.
* The wizard can't actually support this - we need to auto-continue.
*/
function containsDeferredResponsePhrase(message: string): boolean {
return DEFERRED_RESPONSE_PATTERNS.some((pattern) => pattern.test(message));
}
/**
* ConfidenceMeter - Horizontal progress bar with gradient fill
*/
@@ -525,6 +547,9 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
const [announcement, setAnnouncement] = useState('');
const [announcementKey, setAnnouncementKey] = useState(0);
// Pending auto-continue message (when AI says "let me research this")
const [pendingAutoContinue, setPendingAutoContinue] = useState<string | null>(null);
// Track previous ready state to avoid duplicate announcements
const prevReadyRef = useRef(state.isReadyToProceed);
@@ -544,6 +569,9 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
const inputRef = useRef<HTMLTextAreaElement>(null);
// Immediate send guard to prevent race conditions from rapid clicking
const isSendingRef = useRef(false);
// Track if we've already triggered auto-continue for the current exchange
// This prevents infinite loops if the AI keeps saying "let me research"
const autoContinueTriggeredRef = useRef(false);
// Scroll to bottom when messages change
const scrollToBottom = useCallback(() => {
@@ -559,6 +587,38 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
inputRef.current?.focus();
}, []);
// Handle pending auto-continue (when AI says "let me research this")
// We set the input and call handleSendMessage after a delay
useEffect(() => {
if (pendingAutoContinue && !state.isConversationLoading && !isSendingRef.current) {
const message = pendingAutoContinue;
setPendingAutoContinue(null);
// Small delay to let the UI update and show the AI's response
const timeoutId = setTimeout(() => {
// Set the input value first so handleSendMessage picks it up
setInputValue(message);
}, 800);
return () => clearTimeout(timeoutId);
}
}, [pendingAutoContinue, state.isConversationLoading]);
// Store handleSendMessage in a ref so we can call it from the effect
const handleSendMessageRef = useRef<(() => void) | null>(null);
// Effect to trigger send when input is set to the auto-continue message
useEffect(() => {
if (
inputValue === 'Please proceed with your analysis.' &&
!state.isConversationLoading &&
!isSendingRef.current &&
handleSendMessageRef.current
) {
handleSendMessageRef.current();
}
}, [inputValue, state.isConversationLoading]);
// Initialize conversation manager when entering this screen
useEffect(() => {
let mounted = true;
@@ -686,6 +746,12 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
// Set immediate guard before any async work
isSendingRef.current = true;
// Reset auto-continue flag if this is a user-initiated message (not auto-continue)
// This allows auto-continue to trigger again for the next exchange if needed
if (trimmedInput !== 'Please proceed with your analysis.') {
autoContinueTriggeredRef.current = false;
}
// Clear input immediately and reset textarea height
setInputValue('');
if (inputRef.current) {
@@ -846,6 +912,21 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
// Reset error retry count on success
setErrorRetryCount(0);
// Check if the AI said something that implies async work (e.g., "let me research this")
// The wizard can't support async operations - each message is a single turn.
// If we detect this pattern and haven't already auto-continued, schedule a follow-up.
const messageContent = sendResult.response.structured?.message || sendResult.response.rawText;
if (
messageContent &&
containsDeferredResponsePhrase(messageContent) &&
!autoContinueTriggeredRef.current
) {
console.log('[ConversationScreen] Detected deferred response phrase, scheduling auto-continue');
autoContinueTriggeredRef.current = true;
// Set pending auto-continue - an effect will handle actually sending
setPendingAutoContinue('Please proceed with your analysis.');
}
}
},
onError: (error) => {
@@ -893,6 +974,11 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
setIsReadyToProceed,
]);
// Keep ref updated with current handleSendMessage for auto-continue effect
useEffect(() => {
handleSendMessageRef.current = handleSendMessage;
}, [handleSendMessage]);
/**
* Auto-send initial message when continuing with existing docs
* This triggers the AI to analyze the docs and provide a synopsis
@@ -1367,6 +1453,22 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
borderColor: theme.colors.border,
}}
>
{/* "Your turn" indicator - shows when AI responded and waiting for user */}
{!state.isConversationLoading &&
state.conversationHistory.length > 0 &&
state.conversationHistory[state.conversationHistory.length - 1].role === 'assistant' &&
state.confidenceLevel < READY_CONFIDENCE_THRESHOLD && (
<div
className="flex items-center gap-2 mb-2 text-xs"
style={{ color: theme.colors.accent }}
>
<span
className="w-2 h-2 rounded-full animate-pulse"
style={{ backgroundColor: theme.colors.accent }}
/>
<span>Your turn — continue the conversation</span>
</div>
)}
<div className="flex gap-3">
<div className="flex-1 relative flex items-center">
<textarea