## CHANGES

- Fixed image attachment support for Codex and OpenCode agents 🖼️
- Added file-based image arguments for non-stream agents 📁
- Implemented temp file creation for image attachments 💾
- Enhanced integration tests with image upload capabilities 🧪
- Improved cleanup of temporary image files on exit 🧹
- Fixed lightbox keyboard navigation after image deletion ⌨️
- Repositioned thinking status dropdown to prevent overflow 📍
- Added proper image argument builders for each agent 🔧
- Streamlined image handling across different agent types 🎯
- Enhanced process manager with multi-image support 🚀
This commit is contained in:
Pedram Amini
2025-12-19 13:55:07 -06:00
parent 55e0ad51e2
commit f634dc43ef
6 changed files with 163 additions and 6 deletions

View File

@@ -26,10 +26,16 @@ import { describe, it, expect, beforeAll, afterAll } from 'vitest';
import { spawn, ChildProcess } from 'child_process';
import { promisify } from 'util';
import { exec } from 'child_process';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { getAgentCapabilities } from '../../main/agent-capabilities';
const execAsync = promisify(exec);
// Path to test image fixture
const TEST_IMAGE_PATH = path.join(__dirname, '../fixtures/maestro-test-image.png');
// Skip integration tests by default - they make real API calls and may incur costs.
// Set RUN_INTEGRATION_TESTS=true to enable them.
const SKIP_INTEGRATION = process.env.RUN_INTEGRATION_TESTS !== 'true';
@@ -55,6 +61,10 @@ interface ProviderConfig {
* - process-manager.ts (--input-format stream-json for images)
*/
buildInitialArgs: (prompt: string, options?: { images?: string[] }) => string[];
/** Build args for message with image (file path) - for agents that use file-based image args */
buildImageArgs?: (prompt: string, imagePath: string) => string[];
/** Build stdin content for stream-json mode (for Claude Code) */
buildStreamJsonInput?: (prompt: string, imageBase64: string, mediaType: string) => string;
/** Build args for follow-up message (with session) */
buildResumeArgs: (sessionId: string, prompt: string) => string[];
/** Parse session ID from output */
@@ -131,6 +141,33 @@ const PROVIDERS: ProviderConfig[] = [
isSuccessful: (output: string, exitCode: number) => {
return exitCode === 0;
},
/**
* Build stream-json input for Claude Code with image.
* This mirrors buildStreamJsonMessage() in process-manager.ts
*/
buildStreamJsonInput: (prompt: string, imageBase64: string, mediaType: string) => {
const message = {
type: 'user',
message: {
role: 'user',
content: [
{
type: 'image',
source: {
type: 'base64',
media_type: mediaType,
data: imageBase64,
},
},
{
type: 'text',
text: prompt,
},
],
},
};
return JSON.stringify(message);
},
},
{
name: 'Codex',
@@ -232,6 +269,20 @@ const PROVIDERS: ProviderConfig[] = [
}
return false;
},
/**
* Build args with image file path for Codex.
* Mirrors agent-detector.ts: imageArgs: (imagePath) => ['-i', imagePath]
*/
buildImageArgs: (prompt: string, imagePath: string) => [
'exec',
'--dangerously-bypass-approvals-and-sandbox',
'--skip-git-repo-check',
'--json',
'-C', TEST_CWD,
'-i', imagePath,
'--',
prompt,
],
},
{
name: 'OpenCode',
@@ -308,6 +359,17 @@ const PROVIDERS: ProviderConfig[] = [
isSuccessful: (output: string, exitCode: number) => {
return exitCode === 0;
},
/**
* Build args with image file path for OpenCode.
* Mirrors agent-detector.ts: imageArgs: (imagePath) => ['-f', imagePath]
*/
buildImageArgs: (prompt: string, imagePath: string) => [
'run',
'--format', 'json',
'-f', imagePath,
'--',
prompt,
],
},
];
@@ -325,11 +387,13 @@ async function isProviderAvailable(provider: ProviderConfig): Promise<boolean> {
/**
* Run a provider command and capture output
* @param stdinContent - Optional content to write to stdin before closing (for stream-json mode)
*/
function runProvider(
provider: ProviderConfig,
args: string[],
cwd: string = TEST_CWD
cwd: string = TEST_CWD,
stdinContent?: string
): Promise<{ stdout: string; stderr: string; exitCode: number }> {
return new Promise((resolve) => {
let stdout = '';
@@ -342,7 +406,11 @@ function runProvider(
stdio: ['pipe', 'pipe', 'pipe'],
});
// Close stdin immediately to signal EOF (prevents processes waiting for input)
// If we have stdin content, write it and then close
if (stdinContent) {
proc.stdin?.write(stdinContent + '\n');
}
// Close stdin to signal EOF (prevents processes waiting for input)
proc.stdin?.end();
proc.stdout?.on('data', (data) => {

View File

@@ -42,6 +42,7 @@ export interface AgentConfig {
modelArgs?: (modelId: string) => string[]; // Function to build model selection args (e.g., ['--model', modelId])
yoloModeArgs?: string[]; // Args for YOLO/full-access mode (e.g., ['--dangerously-bypass-approvals-and-sandbox'])
workingDirArgs?: (dir: string) => string[]; // Function to build working directory args (e.g., ['-C', dir])
imageArgs?: (imagePath: string) => string[]; // Function to build image attachment args (e.g., ['-i', imagePath] for Codex)
}
const AGENT_DEFINITIONS: Omit<AgentConfig, 'available' | 'path' | 'capabilities'>[] = [
@@ -83,6 +84,7 @@ const AGENT_DEFINITIONS: Omit<AgentConfig, 'available' | 'path' | 'capabilities'
readOnlyArgs: ['--sandbox', 'read-only'], // Read-only/plan mode
yoloModeArgs: ['--dangerously-bypass-approvals-and-sandbox'], // Full access mode
workingDirArgs: (dir: string) => ['-C', dir], // Set working directory
imageArgs: (imagePath: string) => ['-i', imagePath], // Image attachment: codex exec -i /path/to/image.png
// Agent-specific configuration options shown in UI
configOptions: [
{
@@ -123,6 +125,7 @@ const AGENT_DEFINITIONS: Omit<AgentConfig, 'available' | 'path' | 'capabilities'
readOnlyArgs: ['--agent', 'plan'], // Read-only/plan mode
modelArgs: (modelId: string) => ['--model', modelId], // Model selection (e.g., 'ollama/qwen3:8b')
yoloModeArgs: ['run'], // 'run' subcommand auto-approves all permissions (YOLO mode is implicit)
imageArgs: (imagePath: string) => ['-f', imagePath], // Image/file attachment: opencode run -f /path/to/image.png
// Agent-specific configuration options shown in UI
configOptions: [
{

View File

@@ -293,6 +293,7 @@ export function registerProcessHandlers(deps: ProcessHandlerDependencies): void
shellEnvVars: shellEnvVars, // Shell-specific env vars (for terminal sessions)
contextWindow, // Pass configured context window to process manager
customEnvVars: effectiveCustomEnvVars, // Pass custom env vars (session-level or agent-level)
imageArgs: agent?.imageArgs, // Function to build image CLI args (for Codex, OpenCode)
});
logger.info(`Process spawned successfully`, LOG_CONTEXT, {

View File

@@ -1,6 +1,9 @@
import { spawn, ChildProcess } from 'child_process';
import { EventEmitter } from 'events';
import * as pty from 'node-pty';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { stripControlSequences, stripAllAnsiCodes } from './utils/terminalFilter';
import { logger } from './utils/logger';
import { getOutputParser, type ParsedEvent, type AgentOutputParser } from './parsers';
@@ -50,7 +53,8 @@ interface ProcessConfig {
shell?: string; // Shell to use for terminal sessions (e.g., 'zsh', 'bash', 'fish', or full path)
shellArgs?: string; // Additional CLI arguments for shell sessions (e.g., '--login')
shellEnvVars?: Record<string, string>; // Environment variables for shell sessions
images?: string[]; // Base64 data URLs for images (passed via stream-json input)
images?: string[]; // Base64 data URLs for images (passed via stream-json input or file args)
imageArgs?: (imagePath: string) => string[]; // Function to build image CLI args (e.g., ['-i', path] for Codex)
contextWindow?: number; // Configured context window size (0 or undefined = not configured, hide UI)
customEnvVars?: Record<string, string>; // Custom environment variables from user configuration
}
@@ -76,6 +80,7 @@ interface ManagedProcess {
stdoutBuffer?: string; // Buffer for accumulating stdout output (for error detection at exit)
streamedText?: string; // Buffer for accumulating streamed text from partial events (OpenCode, Codex)
contextWindow?: number; // Configured context window size (0 or undefined = not configured)
tempImageFiles?: string[]; // Temp files to clean up when process exits (for file-based image args)
}
/**
@@ -138,6 +143,50 @@ function buildStreamJsonMessage(prompt: string, images: string[]): string {
return JSON.stringify(message);
}
/**
* Save a base64 data URL image to a temp file.
* Returns the full path to the temp file.
*/
function saveImageToTempFile(dataUrl: string, index: number): string | null {
const parsed = parseDataUrl(dataUrl);
if (!parsed) {
logger.warn('[ProcessManager] Failed to parse data URL for temp file', 'ProcessManager');
return null;
}
// Determine file extension from media type
const ext = parsed.mediaType.split('/')[1] || 'png';
const filename = `maestro-image-${Date.now()}-${index}.${ext}`;
const tempPath = path.join(os.tmpdir(), filename);
try {
// Convert base64 to buffer and write to file
const buffer = Buffer.from(parsed.base64, 'base64');
fs.writeFileSync(tempPath, buffer);
logger.debug('[ProcessManager] Saved image to temp file', 'ProcessManager', { tempPath, size: buffer.length });
return tempPath;
} catch (error) {
logger.error('[ProcessManager] Failed to save image to temp file', 'ProcessManager', { error: String(error) });
return null;
}
}
/**
* Clean up temp image files.
*/
function cleanupTempFiles(files: string[]): void {
for (const file of files) {
try {
if (fs.existsSync(file)) {
fs.unlinkSync(file);
logger.debug('[ProcessManager] Cleaned up temp file', 'ProcessManager', { file });
}
} catch (error) {
logger.warn('[ProcessManager] Failed to clean up temp file', 'ProcessManager', { file, error: String(error) });
}
}
}
export class ProcessManager extends EventEmitter {
private processes: Map<string, ManagedProcess> = new Map();
@@ -145,18 +194,38 @@ export class ProcessManager extends EventEmitter {
* Spawn a new process for a session
*/
spawn(config: ProcessConfig): { pid: number; success: boolean } {
const { sessionId, toolType, cwd, command, args, requiresPty, prompt, shell, shellArgs, shellEnvVars, images, contextWindow, customEnvVars } = config;
const { sessionId, toolType, cwd, command, args, requiresPty, prompt, shell, shellArgs, shellEnvVars, images, imageArgs, contextWindow, customEnvVars } = config;
// For batch mode with images, use stream-json mode and send message via stdin
// For batch mode without images, append prompt to args with -- separator
const hasImages = images && images.length > 0;
const capabilities = getAgentCapabilities(toolType);
let finalArgs: string[];
let tempImageFiles: string[] = [];
if (hasImages && prompt && capabilities.supportsStreamJsonInput) {
// For agents that support stream-json input (like Claude Code), add the flag
// The prompt will be sent via stdin as a JSON message with image data
finalArgs = [...args, '--input-format', 'stream-json'];
} else if (hasImages && prompt && imageArgs) {
// For agents that use file-based image args (like Codex, OpenCode),
// save images to temp files and add CLI args
finalArgs = [...args]; // Start with base args
tempImageFiles = [];
for (let i = 0; i < images.length; i++) {
const tempPath = saveImageToTempFile(images[i], i);
if (tempPath) {
tempImageFiles.push(tempPath);
finalArgs = [...finalArgs, ...imageArgs(tempPath)];
}
}
// Add the prompt at the end
finalArgs = [...finalArgs, '--', prompt];
logger.debug('[ProcessManager] Using file-based image args', 'ProcessManager', {
sessionId,
imageCount: images.length,
tempFiles: tempImageFiles,
});
} else if (prompt) {
// Regular batch mode - prompt as CLI arg
// The -- ensures prompt is treated as positional arg, not a flag (even if it starts with --)
@@ -170,6 +239,8 @@ export class ProcessManager extends EventEmitter {
toolType,
hasPrompt: !!prompt,
hasImages,
hasImageArgs: !!imageArgs,
tempImageFilesCount: tempImageFiles.length,
promptValue: prompt,
baseArgs: args,
finalArgs
@@ -402,6 +473,7 @@ export class ProcessManager extends EventEmitter {
stderrBuffer: '', // Initialize stderr buffer for error detection at exit
stdoutBuffer: '', // Initialize stdout buffer for error detection at exit
contextWindow, // User-configured context window size (0 = not configured)
tempImageFiles: tempImageFiles.length > 0 ? tempImageFiles : undefined, // Temp files to clean up on exit
};
this.processes.set(sessionId, managedProcess);
@@ -681,6 +753,11 @@ export class ProcessManager extends EventEmitter {
}
}
// Clean up temp image files if any
if (managedProcess.tempImageFiles && managedProcess.tempImageFiles.length > 0) {
cleanupTempFiles(managedProcess.tempImageFiles);
}
this.emit('exit', sessionId, code || 0);
this.processes.delete(sessionId);
});
@@ -705,6 +782,11 @@ export class ProcessManager extends EventEmitter {
this.emit('agent-error', sessionId, agentError);
}
// Clean up temp image files if any
if (managedProcess.tempImageFiles && managedProcess.tempImageFiles.length > 0) {
cleanupTempFiles(managedProcess.tempImageFiles);
}
this.emit('data', sessionId, `[error] ${error.message}`);
this.emit('exit', sessionId, 1); // Ensure exit is emitted on error
this.processes.delete(sessionId);

View File

@@ -120,6 +120,9 @@ export function LightboxModal({ image, stagedImages, onClose, onNavigate, onDele
const newList = stagedImages.filter(img => img !== image);
onNavigate(newList[currentIndex]);
}
// Refocus the lightbox after deletion so keyboard navigation continues working
setTimeout(() => lightboxRef.current?.focus(), 0);
}, [image, stagedImages, currentIndex, onDelete, onNavigate, onClose]);
// Default theme for ConfirmModal if not provided

View File

@@ -447,9 +447,9 @@ function ThinkingStatusPillInner({ sessions, theme, onSessionClick, namedSession
</span>
</div>
{/* Expanded dropdown - uses padding to create hover bridge between trigger and dropdown */}
{/* Expanded dropdown - positioned above to avoid going off-screen */}
{isExpanded && (
<div className="absolute right-0 top-full pt-1 z-50">
<div className="absolute right-0 bottom-full pb-1 z-50">
<div
className="min-w-[320px] rounded-lg shadow-xl overflow-hidden"
style={{