mirror of
https://github.com/jlengrand/Maestro.git
synced 2026-03-10 08:31:19 +00:00
## CHANGES
- Wizard now auto-continues when AI implies “let me research…” dead-ends 🧠 - Added robust deferred-response phrase detection to keep conversations flowing 🔍 - Introduced “Your turn” indicator when assistant awaits user input 💡 - Updated wizard system prompt to forbid deferred/async promises outright 🚫 - Prevented auto-continue infinite loops with a per-exchange trigger guard 🛡️ - Improved auto-continue UX with delayed send after UI updates ⏱️ - Added full test suite for deferred-response detection patterns ✅ - Strengthened NewInstanceModal tests with remote directory stat validation 📁 - NewInstanceModal tests now wait for debounced remote path validation ⏳ - Expanded icon mocking to include Loader2 for more complete test coverage 🎭
This commit is contained in:
@@ -48,6 +48,8 @@ vi.mock('lucide-react', () => ({
|
||||
<span data-testid="alert-triangle-icon" className={className} style={style}>⚠️</span>,
|
||||
X: ({ className, style }: { className?: string; style?: React.CSSProperties }) =>
|
||||
<span data-testid="x-icon" className={className} style={style}>✕</span>,
|
||||
Loader2: ({ className, style }: { className?: string; style?: React.CSSProperties }) =>
|
||||
<span data-testid="loader2-icon" className={className} style={style}>⏳</span>,
|
||||
}));
|
||||
|
||||
// Mock @tanstack/react-virtual for virtualization
|
||||
|
||||
@@ -2310,6 +2310,14 @@ describe('NewInstanceModal', () => {
|
||||
enabled: true,
|
||||
}],
|
||||
});
|
||||
// Mock fs.stat to return a valid directory for remote path validation
|
||||
vi.mocked(window.maestro.fs.stat).mockResolvedValue({
|
||||
size: 4096,
|
||||
createdAt: '2024-01-01T00:00:00.000Z',
|
||||
modifiedAt: '2024-01-15T12:30:00.000Z',
|
||||
isDirectory: true,
|
||||
isFile: false,
|
||||
});
|
||||
|
||||
render(
|
||||
<NewInstanceModal
|
||||
@@ -2345,6 +2353,11 @@ describe('NewInstanceModal', () => {
|
||||
const dirInput = screen.getByPlaceholderText(/Enter remote path/i);
|
||||
fireEvent.change(dirInput, { target: { value: '/test/path' } });
|
||||
|
||||
// Wait for the remote path validation to complete (debounced 300ms)
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText('Remote directory found')).toBeInTheDocument();
|
||||
});
|
||||
|
||||
const createButton = screen.getByText('Create Agent');
|
||||
await act(async () => {
|
||||
fireEvent.click(createButton);
|
||||
|
||||
@@ -0,0 +1,176 @@
|
||||
/**
|
||||
* ConversationScreen.test.ts
|
||||
*
|
||||
* Tests for the deferred response phrase detection logic used in auto-continue.
|
||||
* The actual function is internal to ConversationScreen.tsx, so we replicate
|
||||
* the patterns here for testing purposes.
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
|
||||
/**
|
||||
* Patterns that indicate the AI said it will do something asynchronously.
|
||||
* Must be kept in sync with ConversationScreen.tsx
|
||||
*/
|
||||
const DEFERRED_RESPONSE_PATTERNS = [
|
||||
/let me (?:research|investigate|look into|think about|analyze|examine|check|explore)/i,
|
||||
/give me a (?:moment|minute|second)/i,
|
||||
/i(?:'ll| will) (?:look into|research|investigate|get back|check)/i,
|
||||
/(?:researching|investigating|looking into) (?:this|that|it)/i,
|
||||
/let me (?:take a )?(?:closer )?look/i,
|
||||
];
|
||||
|
||||
function containsDeferredResponsePhrase(message: string): boolean {
|
||||
return DEFERRED_RESPONSE_PATTERNS.some((pattern) => pattern.test(message));
|
||||
}
|
||||
|
||||
describe('ConversationScreen', () => {
|
||||
describe('containsDeferredResponsePhrase', () => {
|
||||
describe('should detect deferred response phrases', () => {
|
||||
it('detects "let me research"', () => {
|
||||
expect(containsDeferredResponsePhrase('Let me research this for you.')).toBe(true);
|
||||
expect(containsDeferredResponsePhrase('let me research the options')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "let me investigate"', () => {
|
||||
expect(containsDeferredResponsePhrase('Let me investigate that further.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "let me look into"', () => {
|
||||
expect(containsDeferredResponsePhrase('Let me look into this more.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "let me think about"', () => {
|
||||
expect(containsDeferredResponsePhrase('Let me think about this more carefully.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "let me analyze"', () => {
|
||||
expect(containsDeferredResponsePhrase('Let me analyze the requirements.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "let me examine"', () => {
|
||||
expect(containsDeferredResponsePhrase('Let me examine the codebase.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "let me check"', () => {
|
||||
expect(containsDeferredResponsePhrase('Let me check on that.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "let me explore"', () => {
|
||||
expect(containsDeferredResponsePhrase('Let me explore the possibilities.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "give me a moment"', () => {
|
||||
expect(containsDeferredResponsePhrase('Give me a moment to think.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "give me a minute"', () => {
|
||||
expect(containsDeferredResponsePhrase('Give me a minute to process this.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "give me a second"', () => {
|
||||
expect(containsDeferredResponsePhrase('Give me a second...')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "I\'ll look into"', () => {
|
||||
expect(containsDeferredResponsePhrase("I'll look into that for you.")).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "I will research"', () => {
|
||||
expect(containsDeferredResponsePhrase('I will research the best approach.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "I\'ll get back"', () => {
|
||||
expect(containsDeferredResponsePhrase("I'll get back to you on that.")).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "researching this"', () => {
|
||||
expect(containsDeferredResponsePhrase('Researching this now...')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "investigating that"', () => {
|
||||
expect(containsDeferredResponsePhrase('Investigating that issue.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "looking into it"', () => {
|
||||
expect(containsDeferredResponsePhrase('Looking into it now.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "let me take a look"', () => {
|
||||
expect(containsDeferredResponsePhrase('Let me take a look at the files.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects "let me take a closer look"', () => {
|
||||
expect(containsDeferredResponsePhrase('Let me take a closer look at this.')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects phrases in longer messages', () => {
|
||||
const longMessage = `Great questions! Let me do some research on the Netflix ecosystem and content metadata APIs to give you solid recommendations.
|
||||
|
||||
I'll investigate:
|
||||
1. Netflix API/Integration options
|
||||
2. Content metadata APIs
|
||||
3. Parental control mechanisms
|
||||
|
||||
Give me a moment to research this...`;
|
||||
expect(containsDeferredResponsePhrase(longMessage)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('should NOT detect normal conversation phrases', () => {
|
||||
it('does not match "I can help you with that"', () => {
|
||||
expect(containsDeferredResponsePhrase('I can help you with that.')).toBe(false);
|
||||
});
|
||||
|
||||
it('does not match questions', () => {
|
||||
expect(containsDeferredResponsePhrase('What type of project is this?')).toBe(false);
|
||||
});
|
||||
|
||||
it('does not match analysis statements', () => {
|
||||
expect(containsDeferredResponsePhrase('Based on what you described, this sounds like a web app.')).toBe(false);
|
||||
});
|
||||
|
||||
it('does not match "let me know"', () => {
|
||||
expect(containsDeferredResponsePhrase('Let me know if you have questions.')).toBe(false);
|
||||
});
|
||||
|
||||
it('does not match "let me explain"', () => {
|
||||
expect(containsDeferredResponsePhrase('Let me explain how this works.')).toBe(false);
|
||||
});
|
||||
|
||||
it('does not match "let me summarize"', () => {
|
||||
expect(containsDeferredResponsePhrase('Let me summarize what I understand.')).toBe(false);
|
||||
});
|
||||
|
||||
it('does not match past tense "researched"', () => {
|
||||
expect(containsDeferredResponsePhrase('I researched this topic yesterday.')).toBe(false);
|
||||
});
|
||||
|
||||
it('does not match "looking forward"', () => {
|
||||
expect(containsDeferredResponsePhrase('Looking forward to working on this!')).toBe(false);
|
||||
});
|
||||
|
||||
it('does not match confidence statements', () => {
|
||||
expect(containsDeferredResponsePhrase("I'm ready to create your Playbook.")).toBe(false);
|
||||
});
|
||||
|
||||
it('does not match empty string', () => {
|
||||
expect(containsDeferredResponsePhrase('')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('case insensitivity', () => {
|
||||
it('matches uppercase', () => {
|
||||
expect(containsDeferredResponsePhrase('LET ME RESEARCH THIS')).toBe(true);
|
||||
});
|
||||
|
||||
it('matches mixed case', () => {
|
||||
expect(containsDeferredResponsePhrase('Let Me Research This')).toBe(true);
|
||||
});
|
||||
|
||||
it('matches lowercase', () => {
|
||||
expect(containsDeferredResponsePhrase('let me research this')).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -103,6 +103,21 @@ Through a brief, focused conversation:
|
||||
- Be encouraging and helpful in tone
|
||||
- Once you have enough clarity, indicate you're ready to proceed
|
||||
|
||||
### Critical: No Deferred Responses
|
||||
|
||||
**NEVER** say things that imply you'll do something after responding:
|
||||
- ❌ "Let me research this..."
|
||||
- ❌ "Give me a moment to investigate..."
|
||||
- ❌ "I'll look into that and get back to you..."
|
||||
- ❌ "Let me think about this more..."
|
||||
|
||||
Each response is a **single turn** - you cannot continue working after responding. Instead:
|
||||
- ✅ Provide your best analysis immediately, with caveats if uncertain
|
||||
- ✅ Ask clarifying questions if you need more information
|
||||
- ✅ Say "Based on what I can see, [your analysis]..." rather than promising future research
|
||||
|
||||
If you need information you don't have, ask the user for it directly instead of implying you'll go find it.
|
||||
|
||||
## Response Format
|
||||
|
||||
You MUST respond with valid JSON in this exact format:
|
||||
|
||||
@@ -67,6 +67,28 @@ function formatTimestamp(timestamp: number): string {
|
||||
return date.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
|
||||
}
|
||||
|
||||
/**
|
||||
* Patterns that indicate the AI said it will do something asynchronously.
|
||||
* This is a UX problem because the wizard can't actually support async operations -
|
||||
* each message is a single turn. If the AI says "let me research this", the user
|
||||
* is left waiting with no indication that they need to respond.
|
||||
*/
|
||||
const DEFERRED_RESPONSE_PATTERNS = [
|
||||
/let me (?:research|investigate|look into|think about|analyze|examine|check|explore)/i,
|
||||
/give me a (?:moment|minute|second)/i,
|
||||
/i(?:'ll| will) (?:look into|research|investigate|get back|check)/i,
|
||||
/(?:researching|investigating|looking into) (?:this|that|it)/i,
|
||||
/let me (?:take a )?(?:closer )?look/i,
|
||||
];
|
||||
|
||||
/**
|
||||
* Check if a message contains phrases that imply deferred/async work.
|
||||
* The wizard can't actually support this - we need to auto-continue.
|
||||
*/
|
||||
function containsDeferredResponsePhrase(message: string): boolean {
|
||||
return DEFERRED_RESPONSE_PATTERNS.some((pattern) => pattern.test(message));
|
||||
}
|
||||
|
||||
/**
|
||||
* ConfidenceMeter - Horizontal progress bar with gradient fill
|
||||
*/
|
||||
@@ -525,6 +547,9 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
|
||||
const [announcement, setAnnouncement] = useState('');
|
||||
const [announcementKey, setAnnouncementKey] = useState(0);
|
||||
|
||||
// Pending auto-continue message (when AI says "let me research this")
|
||||
const [pendingAutoContinue, setPendingAutoContinue] = useState<string | null>(null);
|
||||
|
||||
// Track previous ready state to avoid duplicate announcements
|
||||
const prevReadyRef = useRef(state.isReadyToProceed);
|
||||
|
||||
@@ -544,6 +569,9 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
|
||||
const inputRef = useRef<HTMLTextAreaElement>(null);
|
||||
// Immediate send guard to prevent race conditions from rapid clicking
|
||||
const isSendingRef = useRef(false);
|
||||
// Track if we've already triggered auto-continue for the current exchange
|
||||
// This prevents infinite loops if the AI keeps saying "let me research"
|
||||
const autoContinueTriggeredRef = useRef(false);
|
||||
|
||||
// Scroll to bottom when messages change
|
||||
const scrollToBottom = useCallback(() => {
|
||||
@@ -559,6 +587,38 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
|
||||
inputRef.current?.focus();
|
||||
}, []);
|
||||
|
||||
// Handle pending auto-continue (when AI says "let me research this")
|
||||
// We set the input and call handleSendMessage after a delay
|
||||
useEffect(() => {
|
||||
if (pendingAutoContinue && !state.isConversationLoading && !isSendingRef.current) {
|
||||
const message = pendingAutoContinue;
|
||||
setPendingAutoContinue(null);
|
||||
|
||||
// Small delay to let the UI update and show the AI's response
|
||||
const timeoutId = setTimeout(() => {
|
||||
// Set the input value first so handleSendMessage picks it up
|
||||
setInputValue(message);
|
||||
}, 800);
|
||||
|
||||
return () => clearTimeout(timeoutId);
|
||||
}
|
||||
}, [pendingAutoContinue, state.isConversationLoading]);
|
||||
|
||||
// Store handleSendMessage in a ref so we can call it from the effect
|
||||
const handleSendMessageRef = useRef<(() => void) | null>(null);
|
||||
|
||||
// Effect to trigger send when input is set to the auto-continue message
|
||||
useEffect(() => {
|
||||
if (
|
||||
inputValue === 'Please proceed with your analysis.' &&
|
||||
!state.isConversationLoading &&
|
||||
!isSendingRef.current &&
|
||||
handleSendMessageRef.current
|
||||
) {
|
||||
handleSendMessageRef.current();
|
||||
}
|
||||
}, [inputValue, state.isConversationLoading]);
|
||||
|
||||
// Initialize conversation manager when entering this screen
|
||||
useEffect(() => {
|
||||
let mounted = true;
|
||||
@@ -686,6 +746,12 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
|
||||
// Set immediate guard before any async work
|
||||
isSendingRef.current = true;
|
||||
|
||||
// Reset auto-continue flag if this is a user-initiated message (not auto-continue)
|
||||
// This allows auto-continue to trigger again for the next exchange if needed
|
||||
if (trimmedInput !== 'Please proceed with your analysis.') {
|
||||
autoContinueTriggeredRef.current = false;
|
||||
}
|
||||
|
||||
// Clear input immediately and reset textarea height
|
||||
setInputValue('');
|
||||
if (inputRef.current) {
|
||||
@@ -846,6 +912,21 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
|
||||
|
||||
// Reset error retry count on success
|
||||
setErrorRetryCount(0);
|
||||
|
||||
// Check if the AI said something that implies async work (e.g., "let me research this")
|
||||
// The wizard can't support async operations - each message is a single turn.
|
||||
// If we detect this pattern and haven't already auto-continued, schedule a follow-up.
|
||||
const messageContent = sendResult.response.structured?.message || sendResult.response.rawText;
|
||||
if (
|
||||
messageContent &&
|
||||
containsDeferredResponsePhrase(messageContent) &&
|
||||
!autoContinueTriggeredRef.current
|
||||
) {
|
||||
console.log('[ConversationScreen] Detected deferred response phrase, scheduling auto-continue');
|
||||
autoContinueTriggeredRef.current = true;
|
||||
// Set pending auto-continue - an effect will handle actually sending
|
||||
setPendingAutoContinue('Please proceed with your analysis.');
|
||||
}
|
||||
}
|
||||
},
|
||||
onError: (error) => {
|
||||
@@ -893,6 +974,11 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
|
||||
setIsReadyToProceed,
|
||||
]);
|
||||
|
||||
// Keep ref updated with current handleSendMessage for auto-continue effect
|
||||
useEffect(() => {
|
||||
handleSendMessageRef.current = handleSendMessage;
|
||||
}, [handleSendMessage]);
|
||||
|
||||
/**
|
||||
* Auto-send initial message when continuing with existing docs
|
||||
* This triggers the AI to analyze the docs and provide a synopsis
|
||||
@@ -1367,6 +1453,22 @@ export function ConversationScreen({ theme, showThinking, setShowThinking }: Con
|
||||
borderColor: theme.colors.border,
|
||||
}}
|
||||
>
|
||||
{/* "Your turn" indicator - shows when AI responded and waiting for user */}
|
||||
{!state.isConversationLoading &&
|
||||
state.conversationHistory.length > 0 &&
|
||||
state.conversationHistory[state.conversationHistory.length - 1].role === 'assistant' &&
|
||||
state.confidenceLevel < READY_CONFIDENCE_THRESHOLD && (
|
||||
<div
|
||||
className="flex items-center gap-2 mb-2 text-xs"
|
||||
style={{ color: theme.colors.accent }}
|
||||
>
|
||||
<span
|
||||
className="w-2 h-2 rounded-full animate-pulse"
|
||||
style={{ backgroundColor: theme.colors.accent }}
|
||||
/>
|
||||
<span>Your turn — continue the conversation</span>
|
||||
</div>
|
||||
)}
|
||||
<div className="flex gap-3">
|
||||
<div className="flex-1 relative flex items-center">
|
||||
<textarea
|
||||
|
||||
Reference in New Issue
Block a user