mirror of
https://github.com/jlengrand/Maestro.git
synced 2026-03-10 08:31:19 +00:00
## CHANGES
- Fixed image attachment support for Codex and OpenCode agents 🖼️ - Added file-based image arguments for non-stream agents 📁 - Implemented temp file creation for image attachments 💾 - Enhanced integration tests with image upload capabilities 🧪 - Improved cleanup of temporary image files on exit 🧹 - Fixed lightbox keyboard navigation after image deletion ⌨️ - Repositioned thinking status dropdown to prevent overflow 📍 - Added proper image argument builders for each agent 🔧 - Streamlined image handling across different agent types 🎯 - Enhanced process manager with multi-image support 🚀
This commit is contained in:
@@ -26,10 +26,16 @@ import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
||||
import { spawn, ChildProcess } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
import { exec } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { getAgentCapabilities } from '../../main/agent-capabilities';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
// Path to test image fixture
|
||||
const TEST_IMAGE_PATH = path.join(__dirname, '../fixtures/maestro-test-image.png');
|
||||
|
||||
// Skip integration tests by default - they make real API calls and may incur costs.
|
||||
// Set RUN_INTEGRATION_TESTS=true to enable them.
|
||||
const SKIP_INTEGRATION = process.env.RUN_INTEGRATION_TESTS !== 'true';
|
||||
@@ -55,6 +61,10 @@ interface ProviderConfig {
|
||||
* - process-manager.ts (--input-format stream-json for images)
|
||||
*/
|
||||
buildInitialArgs: (prompt: string, options?: { images?: string[] }) => string[];
|
||||
/** Build args for message with image (file path) - for agents that use file-based image args */
|
||||
buildImageArgs?: (prompt: string, imagePath: string) => string[];
|
||||
/** Build stdin content for stream-json mode (for Claude Code) */
|
||||
buildStreamJsonInput?: (prompt: string, imageBase64: string, mediaType: string) => string;
|
||||
/** Build args for follow-up message (with session) */
|
||||
buildResumeArgs: (sessionId: string, prompt: string) => string[];
|
||||
/** Parse session ID from output */
|
||||
@@ -131,6 +141,33 @@ const PROVIDERS: ProviderConfig[] = [
|
||||
isSuccessful: (output: string, exitCode: number) => {
|
||||
return exitCode === 0;
|
||||
},
|
||||
/**
|
||||
* Build stream-json input for Claude Code with image.
|
||||
* This mirrors buildStreamJsonMessage() in process-manager.ts
|
||||
*/
|
||||
buildStreamJsonInput: (prompt: string, imageBase64: string, mediaType: string) => {
|
||||
const message = {
|
||||
type: 'user',
|
||||
message: {
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: mediaType,
|
||||
data: imageBase64,
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
text: prompt,
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
return JSON.stringify(message);
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'Codex',
|
||||
@@ -232,6 +269,20 @@ const PROVIDERS: ProviderConfig[] = [
|
||||
}
|
||||
return false;
|
||||
},
|
||||
/**
|
||||
* Build args with image file path for Codex.
|
||||
* Mirrors agent-detector.ts: imageArgs: (imagePath) => ['-i', imagePath]
|
||||
*/
|
||||
buildImageArgs: (prompt: string, imagePath: string) => [
|
||||
'exec',
|
||||
'--dangerously-bypass-approvals-and-sandbox',
|
||||
'--skip-git-repo-check',
|
||||
'--json',
|
||||
'-C', TEST_CWD,
|
||||
'-i', imagePath,
|
||||
'--',
|
||||
prompt,
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'OpenCode',
|
||||
@@ -308,6 +359,17 @@ const PROVIDERS: ProviderConfig[] = [
|
||||
isSuccessful: (output: string, exitCode: number) => {
|
||||
return exitCode === 0;
|
||||
},
|
||||
/**
|
||||
* Build args with image file path for OpenCode.
|
||||
* Mirrors agent-detector.ts: imageArgs: (imagePath) => ['-f', imagePath]
|
||||
*/
|
||||
buildImageArgs: (prompt: string, imagePath: string) => [
|
||||
'run',
|
||||
'--format', 'json',
|
||||
'-f', imagePath,
|
||||
'--',
|
||||
prompt,
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
@@ -325,11 +387,13 @@ async function isProviderAvailable(provider: ProviderConfig): Promise<boolean> {
|
||||
|
||||
/**
|
||||
* Run a provider command and capture output
|
||||
* @param stdinContent - Optional content to write to stdin before closing (for stream-json mode)
|
||||
*/
|
||||
function runProvider(
|
||||
provider: ProviderConfig,
|
||||
args: string[],
|
||||
cwd: string = TEST_CWD
|
||||
cwd: string = TEST_CWD,
|
||||
stdinContent?: string
|
||||
): Promise<{ stdout: string; stderr: string; exitCode: number }> {
|
||||
return new Promise((resolve) => {
|
||||
let stdout = '';
|
||||
@@ -342,7 +406,11 @@ function runProvider(
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
// Close stdin immediately to signal EOF (prevents processes waiting for input)
|
||||
// If we have stdin content, write it and then close
|
||||
if (stdinContent) {
|
||||
proc.stdin?.write(stdinContent + '\n');
|
||||
}
|
||||
// Close stdin to signal EOF (prevents processes waiting for input)
|
||||
proc.stdin?.end();
|
||||
|
||||
proc.stdout?.on('data', (data) => {
|
||||
|
||||
@@ -42,6 +42,7 @@ export interface AgentConfig {
|
||||
modelArgs?: (modelId: string) => string[]; // Function to build model selection args (e.g., ['--model', modelId])
|
||||
yoloModeArgs?: string[]; // Args for YOLO/full-access mode (e.g., ['--dangerously-bypass-approvals-and-sandbox'])
|
||||
workingDirArgs?: (dir: string) => string[]; // Function to build working directory args (e.g., ['-C', dir])
|
||||
imageArgs?: (imagePath: string) => string[]; // Function to build image attachment args (e.g., ['-i', imagePath] for Codex)
|
||||
}
|
||||
|
||||
const AGENT_DEFINITIONS: Omit<AgentConfig, 'available' | 'path' | 'capabilities'>[] = [
|
||||
@@ -83,6 +84,7 @@ const AGENT_DEFINITIONS: Omit<AgentConfig, 'available' | 'path' | 'capabilities'
|
||||
readOnlyArgs: ['--sandbox', 'read-only'], // Read-only/plan mode
|
||||
yoloModeArgs: ['--dangerously-bypass-approvals-and-sandbox'], // Full access mode
|
||||
workingDirArgs: (dir: string) => ['-C', dir], // Set working directory
|
||||
imageArgs: (imagePath: string) => ['-i', imagePath], // Image attachment: codex exec -i /path/to/image.png
|
||||
// Agent-specific configuration options shown in UI
|
||||
configOptions: [
|
||||
{
|
||||
@@ -123,6 +125,7 @@ const AGENT_DEFINITIONS: Omit<AgentConfig, 'available' | 'path' | 'capabilities'
|
||||
readOnlyArgs: ['--agent', 'plan'], // Read-only/plan mode
|
||||
modelArgs: (modelId: string) => ['--model', modelId], // Model selection (e.g., 'ollama/qwen3:8b')
|
||||
yoloModeArgs: ['run'], // 'run' subcommand auto-approves all permissions (YOLO mode is implicit)
|
||||
imageArgs: (imagePath: string) => ['-f', imagePath], // Image/file attachment: opencode run -f /path/to/image.png
|
||||
// Agent-specific configuration options shown in UI
|
||||
configOptions: [
|
||||
{
|
||||
|
||||
@@ -293,6 +293,7 @@ export function registerProcessHandlers(deps: ProcessHandlerDependencies): void
|
||||
shellEnvVars: shellEnvVars, // Shell-specific env vars (for terminal sessions)
|
||||
contextWindow, // Pass configured context window to process manager
|
||||
customEnvVars: effectiveCustomEnvVars, // Pass custom env vars (session-level or agent-level)
|
||||
imageArgs: agent?.imageArgs, // Function to build image CLI args (for Codex, OpenCode)
|
||||
});
|
||||
|
||||
logger.info(`Process spawned successfully`, LOG_CONTEXT, {
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import { spawn, ChildProcess } from 'child_process';
|
||||
import { EventEmitter } from 'events';
|
||||
import * as pty from 'node-pty';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { stripControlSequences, stripAllAnsiCodes } from './utils/terminalFilter';
|
||||
import { logger } from './utils/logger';
|
||||
import { getOutputParser, type ParsedEvent, type AgentOutputParser } from './parsers';
|
||||
@@ -50,7 +53,8 @@ interface ProcessConfig {
|
||||
shell?: string; // Shell to use for terminal sessions (e.g., 'zsh', 'bash', 'fish', or full path)
|
||||
shellArgs?: string; // Additional CLI arguments for shell sessions (e.g., '--login')
|
||||
shellEnvVars?: Record<string, string>; // Environment variables for shell sessions
|
||||
images?: string[]; // Base64 data URLs for images (passed via stream-json input)
|
||||
images?: string[]; // Base64 data URLs for images (passed via stream-json input or file args)
|
||||
imageArgs?: (imagePath: string) => string[]; // Function to build image CLI args (e.g., ['-i', path] for Codex)
|
||||
contextWindow?: number; // Configured context window size (0 or undefined = not configured, hide UI)
|
||||
customEnvVars?: Record<string, string>; // Custom environment variables from user configuration
|
||||
}
|
||||
@@ -76,6 +80,7 @@ interface ManagedProcess {
|
||||
stdoutBuffer?: string; // Buffer for accumulating stdout output (for error detection at exit)
|
||||
streamedText?: string; // Buffer for accumulating streamed text from partial events (OpenCode, Codex)
|
||||
contextWindow?: number; // Configured context window size (0 or undefined = not configured)
|
||||
tempImageFiles?: string[]; // Temp files to clean up when process exits (for file-based image args)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -138,6 +143,50 @@ function buildStreamJsonMessage(prompt: string, images: string[]): string {
|
||||
return JSON.stringify(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Save a base64 data URL image to a temp file.
|
||||
* Returns the full path to the temp file.
|
||||
*/
|
||||
function saveImageToTempFile(dataUrl: string, index: number): string | null {
|
||||
const parsed = parseDataUrl(dataUrl);
|
||||
if (!parsed) {
|
||||
logger.warn('[ProcessManager] Failed to parse data URL for temp file', 'ProcessManager');
|
||||
return null;
|
||||
}
|
||||
|
||||
// Determine file extension from media type
|
||||
const ext = parsed.mediaType.split('/')[1] || 'png';
|
||||
const filename = `maestro-image-${Date.now()}-${index}.${ext}`;
|
||||
const tempPath = path.join(os.tmpdir(), filename);
|
||||
|
||||
try {
|
||||
// Convert base64 to buffer and write to file
|
||||
const buffer = Buffer.from(parsed.base64, 'base64');
|
||||
fs.writeFileSync(tempPath, buffer);
|
||||
logger.debug('[ProcessManager] Saved image to temp file', 'ProcessManager', { tempPath, size: buffer.length });
|
||||
return tempPath;
|
||||
} catch (error) {
|
||||
logger.error('[ProcessManager] Failed to save image to temp file', 'ProcessManager', { error: String(error) });
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up temp image files.
|
||||
*/
|
||||
function cleanupTempFiles(files: string[]): void {
|
||||
for (const file of files) {
|
||||
try {
|
||||
if (fs.existsSync(file)) {
|
||||
fs.unlinkSync(file);
|
||||
logger.debug('[ProcessManager] Cleaned up temp file', 'ProcessManager', { file });
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn('[ProcessManager] Failed to clean up temp file', 'ProcessManager', { file, error: String(error) });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class ProcessManager extends EventEmitter {
|
||||
private processes: Map<string, ManagedProcess> = new Map();
|
||||
|
||||
@@ -145,18 +194,38 @@ export class ProcessManager extends EventEmitter {
|
||||
* Spawn a new process for a session
|
||||
*/
|
||||
spawn(config: ProcessConfig): { pid: number; success: boolean } {
|
||||
const { sessionId, toolType, cwd, command, args, requiresPty, prompt, shell, shellArgs, shellEnvVars, images, contextWindow, customEnvVars } = config;
|
||||
const { sessionId, toolType, cwd, command, args, requiresPty, prompt, shell, shellArgs, shellEnvVars, images, imageArgs, contextWindow, customEnvVars } = config;
|
||||
|
||||
// For batch mode with images, use stream-json mode and send message via stdin
|
||||
// For batch mode without images, append prompt to args with -- separator
|
||||
const hasImages = images && images.length > 0;
|
||||
const capabilities = getAgentCapabilities(toolType);
|
||||
let finalArgs: string[];
|
||||
let tempImageFiles: string[] = [];
|
||||
|
||||
if (hasImages && prompt && capabilities.supportsStreamJsonInput) {
|
||||
// For agents that support stream-json input (like Claude Code), add the flag
|
||||
// The prompt will be sent via stdin as a JSON message with image data
|
||||
finalArgs = [...args, '--input-format', 'stream-json'];
|
||||
} else if (hasImages && prompt && imageArgs) {
|
||||
// For agents that use file-based image args (like Codex, OpenCode),
|
||||
// save images to temp files and add CLI args
|
||||
finalArgs = [...args]; // Start with base args
|
||||
tempImageFiles = [];
|
||||
for (let i = 0; i < images.length; i++) {
|
||||
const tempPath = saveImageToTempFile(images[i], i);
|
||||
if (tempPath) {
|
||||
tempImageFiles.push(tempPath);
|
||||
finalArgs = [...finalArgs, ...imageArgs(tempPath)];
|
||||
}
|
||||
}
|
||||
// Add the prompt at the end
|
||||
finalArgs = [...finalArgs, '--', prompt];
|
||||
logger.debug('[ProcessManager] Using file-based image args', 'ProcessManager', {
|
||||
sessionId,
|
||||
imageCount: images.length,
|
||||
tempFiles: tempImageFiles,
|
||||
});
|
||||
} else if (prompt) {
|
||||
// Regular batch mode - prompt as CLI arg
|
||||
// The -- ensures prompt is treated as positional arg, not a flag (even if it starts with --)
|
||||
@@ -170,6 +239,8 @@ export class ProcessManager extends EventEmitter {
|
||||
toolType,
|
||||
hasPrompt: !!prompt,
|
||||
hasImages,
|
||||
hasImageArgs: !!imageArgs,
|
||||
tempImageFilesCount: tempImageFiles.length,
|
||||
promptValue: prompt,
|
||||
baseArgs: args,
|
||||
finalArgs
|
||||
@@ -402,6 +473,7 @@ export class ProcessManager extends EventEmitter {
|
||||
stderrBuffer: '', // Initialize stderr buffer for error detection at exit
|
||||
stdoutBuffer: '', // Initialize stdout buffer for error detection at exit
|
||||
contextWindow, // User-configured context window size (0 = not configured)
|
||||
tempImageFiles: tempImageFiles.length > 0 ? tempImageFiles : undefined, // Temp files to clean up on exit
|
||||
};
|
||||
|
||||
this.processes.set(sessionId, managedProcess);
|
||||
@@ -681,6 +753,11 @@ export class ProcessManager extends EventEmitter {
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up temp image files if any
|
||||
if (managedProcess.tempImageFiles && managedProcess.tempImageFiles.length > 0) {
|
||||
cleanupTempFiles(managedProcess.tempImageFiles);
|
||||
}
|
||||
|
||||
this.emit('exit', sessionId, code || 0);
|
||||
this.processes.delete(sessionId);
|
||||
});
|
||||
@@ -705,6 +782,11 @@ export class ProcessManager extends EventEmitter {
|
||||
this.emit('agent-error', sessionId, agentError);
|
||||
}
|
||||
|
||||
// Clean up temp image files if any
|
||||
if (managedProcess.tempImageFiles && managedProcess.tempImageFiles.length > 0) {
|
||||
cleanupTempFiles(managedProcess.tempImageFiles);
|
||||
}
|
||||
|
||||
this.emit('data', sessionId, `[error] ${error.message}`);
|
||||
this.emit('exit', sessionId, 1); // Ensure exit is emitted on error
|
||||
this.processes.delete(sessionId);
|
||||
|
||||
@@ -120,6 +120,9 @@ export function LightboxModal({ image, stagedImages, onClose, onNavigate, onDele
|
||||
const newList = stagedImages.filter(img => img !== image);
|
||||
onNavigate(newList[currentIndex]);
|
||||
}
|
||||
|
||||
// Refocus the lightbox after deletion so keyboard navigation continues working
|
||||
setTimeout(() => lightboxRef.current?.focus(), 0);
|
||||
}, [image, stagedImages, currentIndex, onDelete, onNavigate, onClose]);
|
||||
|
||||
// Default theme for ConfirmModal if not provided
|
||||
|
||||
@@ -447,9 +447,9 @@ function ThinkingStatusPillInner({ sessions, theme, onSessionClick, namedSession
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Expanded dropdown - uses padding to create hover bridge between trigger and dropdown */}
|
||||
{/* Expanded dropdown - positioned above to avoid going off-screen */}
|
||||
{isExpanded && (
|
||||
<div className="absolute right-0 top-full pt-1 z-50">
|
||||
<div className="absolute right-0 bottom-full pb-1 z-50">
|
||||
<div
|
||||
className="min-w-[320px] rounded-lg shadow-xl overflow-hidden"
|
||||
style={{
|
||||
|
||||
Reference in New Issue
Block a user