diff --git a/whatsapp-bridge/main.go b/whatsapp-bridge/main.go index e83428a..5a4ecd5 100644 --- a/whatsapp-bridge/main.go +++ b/whatsapp-bridge/main.go @@ -3,8 +3,11 @@ package main import ( "context" "database/sql" + "encoding/binary" "encoding/json" "fmt" + "math" + "math/rand" "net/http" "os" "os/signal" @@ -16,6 +19,8 @@ import ( _ "github.com/mattn/go-sqlite3" "github.com/mdp/qrterminal" + "bytes" + "go.mau.fi/whatsmeow" waProto "go.mau.fi/whatsmeow/binary/proto" "go.mau.fi/whatsmeow/store/sqlstore" @@ -181,10 +186,11 @@ type SendMessageResponse struct { type SendMessageRequest struct { Recipient string `json:"recipient"` Message string `json:"message"` + MediaPath string `json:"media_path,omitempty"` } // Function to send a WhatsApp message -func sendWhatsAppMessage(client *whatsmeow.Client, recipient string, message string) (bool, string) { +func sendWhatsAppMessage(client *whatsmeow.Client, recipient string, message string, mediaPath string) (bool, string) { if !client.IsConnected() { return false, "Not connected to WhatsApp" } @@ -210,10 +216,140 @@ func sendWhatsAppMessage(client *whatsmeow.Client, recipient string, message str } } - // Send the message - _, err = client.SendMessage(context.Background(), recipientJID, &waProto.Message{ - Conversation: proto.String(message), - }) + msg := &waProto.Message{} + + // Check if we have media to send + if mediaPath != "" { + // Read media file + mediaData, err := os.ReadFile(mediaPath) + if err != nil { + return false, fmt.Sprintf("Error reading media file: %v", err) + } + + // Determine media type and mime type based on file extension + fileExt := strings.ToLower(mediaPath[strings.LastIndex(mediaPath, ".")+1:]) + var mediaType whatsmeow.MediaType + var mimeType string + + // Handle different media types + switch fileExt { + // Image types + case "jpg", "jpeg": + mediaType = whatsmeow.MediaImage + mimeType = "image/jpeg" + case "png": + mediaType = whatsmeow.MediaImage + mimeType = "image/png" + case "gif": + mediaType = whatsmeow.MediaImage + mimeType = "image/gif" + case "webp": + mediaType = whatsmeow.MediaImage + mimeType = "image/webp" + + // Audio types + case "ogg": + mediaType = whatsmeow.MediaAudio + mimeType = "audio/ogg; codecs=opus" + + // Video types + case "mp4": + mediaType = whatsmeow.MediaVideo + mimeType = "video/mp4" + case "avi": + mediaType = whatsmeow.MediaVideo + mimeType = "video/avi" + case "mov": + mediaType = whatsmeow.MediaVideo + mimeType = "video/quicktime" + + // Document types (for any other file type) + default: + mediaType = whatsmeow.MediaDocument + mimeType = "application/octet-stream" + } + + // Upload media to WhatsApp servers + resp, err := client.Upload(context.Background(), mediaData, mediaType) + if err != nil { + return false, fmt.Sprintf("Error uploading media: %v", err) + } + + fmt.Println("Media uploaded", resp) + + // Create the appropriate message type based on media type + switch mediaType { + case whatsmeow.MediaImage: + msg.ImageMessage = &waProto.ImageMessage{ + Caption: proto.String(message), + Mimetype: proto.String(mimeType), + URL: &resp.URL, + DirectPath: &resp.DirectPath, + MediaKey: resp.MediaKey, + FileEncSHA256: resp.FileEncSHA256, + FileSHA256: resp.FileSHA256, + FileLength: &resp.FileLength, + } + case whatsmeow.MediaAudio: + // Handle ogg audio files + var seconds uint32 = 30 // Default fallback + var waveform []byte = nil + + // Try to analyze the ogg file + if strings.Contains(mimeType, "ogg") { + analyzedSeconds, analyzedWaveform, err := analyzeOggOpus(mediaData) + if err == nil { + seconds = analyzedSeconds + waveform = analyzedWaveform + } else { + return false, fmt.Sprintf("Failed to analyze Ogg Opus file: %v", err) + } + } else { + fmt.Printf("Not an Ogg Opus file: %s\n", mimeType) + } + + msg.AudioMessage = &waProto.AudioMessage{ + Mimetype: proto.String(mimeType), + URL: &resp.URL, + DirectPath: &resp.DirectPath, + MediaKey: resp.MediaKey, + FileEncSHA256: resp.FileEncSHA256, + FileSHA256: resp.FileSHA256, + FileLength: &resp.FileLength, + Seconds: proto.Uint32(seconds), + PTT: proto.Bool(true), + Waveform: waveform, + } + case whatsmeow.MediaVideo: + msg.VideoMessage = &waProto.VideoMessage{ + Caption: proto.String(message), + Mimetype: proto.String(mimeType), + URL: &resp.URL, + DirectPath: &resp.DirectPath, + MediaKey: resp.MediaKey, + FileEncSHA256: resp.FileEncSHA256, + FileSHA256: resp.FileSHA256, + FileLength: &resp.FileLength, + } + case whatsmeow.MediaDocument: + msg.DocumentMessage = &waProto.DocumentMessage{ + Title: proto.String(mediaPath[strings.LastIndex(mediaPath, "/")+1:]), + Caption: proto.String(message), + Mimetype: proto.String(mimeType), + URL: &resp.URL, + DirectPath: &resp.DirectPath, + MediaKey: resp.MediaKey, + FileEncSHA256: resp.FileEncSHA256, + FileSHA256: resp.FileSHA256, + FileLength: &resp.FileLength, + } + } + } else { + msg.Conversation = proto.String(message) + } + + // Send message + _, err = client.SendMessage(context.Background(), recipientJID, msg) if err != nil { return false, fmt.Sprintf("Error sending message: %v", err) @@ -227,7 +363,6 @@ func startRESTServer(client *whatsmeow.Client, port int) { // Handler for sending messages http.HandleFunc("/api/send", func(w http.ResponseWriter, r *http.Request) { // Only allow POST requests - fmt.Println("Received request to send message") if r.Method != http.MethodPost { http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) return @@ -241,13 +376,20 @@ func startRESTServer(client *whatsmeow.Client, port int) { } // Validate request - if req.Recipient == "" || req.Message == "" { - http.Error(w, "Recipient and message are required", http.StatusBadRequest) + if req.Recipient == "" { + http.Error(w, "Recipient is required", http.StatusBadRequest) return } + if req.Message == "" && req.MediaPath == "" { + http.Error(w, "Message or media path is required", http.StatusBadRequest) + return + } + + fmt.Println("Received request to send message", req.Message, req.MediaPath) + // Send the message - success, message := sendWhatsAppMessage(client, req.Recipient, req.Message) + success, message := sendWhatsAppMessage(client, req.Recipient, req.Message, req.MediaPath) fmt.Println("Message sent", success, message) // Set response headers w.Header().Set("Content-Type", "application/json") @@ -693,3 +835,167 @@ func requestHistorySync(client *whatsmeow.Client) { fmt.Println("History sync requested. Waiting for server response...") } } + +// analyzeOggOpus tries to extract duration and generate a simple waveform from an Ogg Opus file +func analyzeOggOpus(data []byte) (duration uint32, waveform []byte, err error) { + // Try to detect if this is a valid Ogg file by checking for the "OggS" signature + // at the beginning of the file + if len(data) < 4 || string(data[0:4]) != "OggS" { + return 0, nil, fmt.Errorf("not a valid Ogg file (missing OggS signature)") + } + + // Parse Ogg pages to find the last page with a valid granule position + var lastGranule uint64 + var sampleRate uint32 = 48000 // Default Opus sample rate + var preSkip uint16 = 0 + var foundOpusHead bool + + // Scan through the file looking for Ogg pages + for i := 0; i < len(data); { + // Check if we have enough data to read Ogg page header + if i+27 >= len(data) { + break + } + + // Verify Ogg page signature + if string(data[i:i+4]) != "OggS" { + // Skip until next potential page + i++ + continue + } + + // Extract header fields + granulePos := binary.LittleEndian.Uint64(data[i+6 : i+14]) + pageSeqNum := binary.LittleEndian.Uint32(data[i+18 : i+22]) + numSegments := int(data[i+26]) + + // Extract segment table + if i+27+numSegments >= len(data) { + break + } + segmentTable := data[i+27 : i+27+numSegments] + + // Calculate page size + pageSize := 27 + numSegments + for _, segLen := range segmentTable { + pageSize += int(segLen) + } + + // Check if we're looking at an OpusHead packet (should be in first few pages) + if !foundOpusHead && pageSeqNum <= 1 { + // Look for "OpusHead" marker in this page + pageData := data[i : i+pageSize] + headPos := bytes.Index(pageData, []byte("OpusHead")) + if headPos >= 0 && headPos+12 < len(pageData) { + // Found OpusHead, extract sample rate and pre-skip + // OpusHead format: Magic(8) + Version(1) + Channels(1) + PreSkip(2) + SampleRate(4) + ... + headPos += 8 // Skip "OpusHead" marker + // PreSkip is 2 bytes at offset 10 + if headPos+12 <= len(pageData) { + preSkip = binary.LittleEndian.Uint16(pageData[headPos+10 : headPos+12]) + sampleRate = binary.LittleEndian.Uint32(pageData[headPos+12 : headPos+16]) + foundOpusHead = true + fmt.Printf("Found OpusHead: sampleRate=%d, preSkip=%d\n", sampleRate, preSkip) + } + } + } + + // Keep track of last valid granule position + if granulePos != 0 { + lastGranule = granulePos + } + + // Move to next page + i += pageSize + } + + if !foundOpusHead { + fmt.Println("Warning: OpusHead not found, using default values") + } + + // Calculate duration based on granule position + if lastGranule > 0 { + // Formula for duration: (lastGranule - preSkip) / sampleRate + durationSeconds := float64(lastGranule-uint64(preSkip)) / float64(sampleRate) + duration = uint32(math.Ceil(durationSeconds)) + fmt.Printf("Calculated Opus duration from granule: %f seconds (lastGranule=%d)\n", + durationSeconds, lastGranule) + } else { + // Fallback to rough estimation if granule position not found + fmt.Println("Warning: No valid granule position found, using estimation") + durationEstimate := float64(len(data)) / 2000.0 // Very rough approximation + duration = uint32(durationEstimate) + } + + // Make sure we have a reasonable duration (at least 1 second, at most 300 seconds) + if duration < 1 { + duration = 1 + } else if duration > 300 { + duration = 300 + } + + // Generate waveform + waveform = placeholderWaveform(duration) + + fmt.Printf("Ogg Opus analysis: size=%d bytes, calculated duration=%d sec, waveform=%d bytes\n", + len(data), duration, len(waveform)) + + return duration, waveform, nil +} + +// min returns the smaller of x or y +func min(x, y int) int { + if x < y { + return x + } + return y +} + +// placeholderWaveform generates a synthetic waveform for WhatsApp voice messages +// that appears natural with some variability based on the duration +func placeholderWaveform(duration uint32) []byte { + // WhatsApp expects a 64-byte waveform for voice messages + const waveformLength = 64 + waveform := make([]byte, waveformLength) + + // Seed the random number generator for consistent results with the same duration + rand.Seed(int64(duration)) + + // Create a more natural looking waveform with some patterns and variability + // rather than completely random values + + // Base amplitude and frequency - longer messages get faster frequency + baseAmplitude := 35.0 + frequencyFactor := float64(min(int(duration), 120)) / 30.0 + + for i := range waveform { + // Position in the waveform (normalized 0-1) + pos := float64(i) / float64(waveformLength) + + // Create a wave pattern with some randomness + // Use multiple sine waves of different frequencies for more natural look + val := baseAmplitude * math.Sin(pos*math.Pi*frequencyFactor*8) + val += (baseAmplitude / 2) * math.Sin(pos*math.Pi*frequencyFactor*16) + + // Add some randomness to make it look more natural + val += (rand.Float64() - 0.5) * 15 + + // Add some fade-in and fade-out effects + fadeInOut := math.Sin(pos * math.Pi) + val = val * (0.7 + 0.3*fadeInOut) + + // Center around 50 (typical voice baseline) + val = val + 50 + + // Ensure values stay within WhatsApp's expected range (0-100) + if val < 0 { + val = 0 + } else if val > 100 { + val = 100 + } + + waveform[i] = byte(val) + } + + return waveform +} diff --git a/whatsapp-mcp-server/audio.py b/whatsapp-mcp-server/audio.py new file mode 100644 index 0000000..145b27d --- /dev/null +++ b/whatsapp-mcp-server/audio.py @@ -0,0 +1,110 @@ +import os +import subprocess +import tempfile + +def convert_to_opus_ogg(input_file, output_file=None, bitrate="32k", sample_rate=24000): + """ + Convert an audio file to Opus format in an Ogg container. + + Args: + input_file (str): Path to the input audio file + output_file (str, optional): Path to save the output file. If None, replaces the + extension of input_file with .ogg + bitrate (str, optional): Target bitrate for Opus encoding (default: "32k") + sample_rate (int, optional): Sample rate for output (default: 24000) + + Returns: + str: Path to the converted file + + Raises: + FileNotFoundError: If the input file doesn't exist + RuntimeError: If the ffmpeg conversion fails + """ + if not os.path.isfile(input_file): + raise FileNotFoundError(f"Input file not found: {input_file}") + + # If no output file is specified, replace the extension with .ogg + if output_file is None: + output_file = os.path.splitext(input_file)[0] + ".ogg" + + # Ensure the output directory exists + output_dir = os.path.dirname(output_file) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Build the ffmpeg command + cmd = [ + "ffmpeg", + "-i", input_file, + "-c:a", "libopus", + "-b:a", bitrate, + "-ar", str(sample_rate), + "-application", "voip", # Optimize for voice + "-vbr", "on", # Variable bitrate + "-compression_level", "10", # Maximum compression + "-frame_duration", "60", # 60ms frames (good for voice) + "-y", # Overwrite output file if it exists + output_file + ] + + try: + # Run the ffmpeg command and capture output + process = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True + ) + return output_file + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Failed to convert audio. You likely need to install ffmpeg {e.stderr}") + + +def convert_to_opus_ogg_temp(input_file, bitrate="32k", sample_rate=24000): + """ + Convert an audio file to Opus format in an Ogg container and store in a temporary file. + + Args: + input_file (str): Path to the input audio file + bitrate (str, optional): Target bitrate for Opus encoding (default: "32k") + sample_rate (int, optional): Sample rate for output (default: 24000) + + Returns: + str: Path to the temporary file with the converted audio + + Raises: + FileNotFoundError: If the input file doesn't exist + RuntimeError: If the ffmpeg conversion fails + """ + # Create a temporary file with .ogg extension + temp_file = tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) + temp_file.close() + + try: + # Convert the audio + convert_to_opus_ogg(input_file, temp_file.name, bitrate, sample_rate) + return temp_file.name + except Exception as e: + # Clean up the temporary file if conversion fails + if os.path.exists(temp_file.name): + os.unlink(temp_file.name) + raise e + + +if __name__ == "__main__": + # Example usage + import sys + + if len(sys.argv) < 2: + print("Usage: python audio.py input_file [output_file]") + sys.exit(1) + + input_file = sys.argv[1] + + try: + result = convert_to_opus_ogg_temp(input_file) + print(f"Successfully converted to: {result}") + except Exception as e: + print(f"Error: {e}") + sys.exit(1) diff --git a/whatsapp-mcp-server/main.py b/whatsapp-mcp-server/main.py index 4707ea1..0155fd6 100644 --- a/whatsapp-mcp-server/main.py +++ b/whatsapp-mcp-server/main.py @@ -9,7 +9,9 @@ from whatsapp import ( get_contact_chats as whatsapp_get_contact_chats, get_last_interaction as whatsapp_get_last_interaction, get_message_context as whatsapp_get_message_context, - send_message as whatsapp_send_message + send_message as whatsapp_send_message, + send_file as whatsapp_send_file, + send_audio_message as whatsapp_audio_voice_message ) # Initialize FastMCP server @@ -180,6 +182,44 @@ def send_message( "message": status_message } +@mcp.tool() +def send_file(recipient: str, media_path: str) -> Dict[str, Any]: + """Send a file such as a picture, raw audio, video or document via WhatsApp to the specified recipient. For group messages use the JID. + + Args: + recipient: The recipient - either a phone number with country code but no + or other symbols, + or a JID (e.g., "123456789@s.whatsapp.net" or a group JID like "123456789@g.us") + media_path: The absolute path to the media file to send (image, video, document) + + Returns: + A dictionary containing success status and a status message + """ + + # Call the whatsapp_send_file function + success, status_message = whatsapp_send_file(recipient, media_path) + return { + "success": success, + "message": status_message + } + +@mcp.tool() +def send_audio_message(recipient: str, media_path: str) -> Dict[str, Any]: + """Send any audio file as a WhatsApp audio message to the specified recipient. For group messages use the JID. + + Args: + recipient: The recipient - either a phone number with country code but no + or other symbols, + or a JID (e.g., "123456789@s.whatsapp.net" or a group JID like "123456789@g.us") + media_path: The absolute path to the audio file to send (will be converted to Opus .ogg if it's not a .ogg file) + + Returns: + A dictionary containing success status and a status message + """ + success, status_message = whatsapp_audio_voice_message(recipient, media_path) + return { + "success": success, + "message": status_message + } + if __name__ == "__main__": # Initialize and run the server mcp.run(transport='stdio') \ No newline at end of file diff --git a/whatsapp-mcp-server/whatsapp.py b/whatsapp-mcp-server/whatsapp.py index 7b3db8e..cc6f4ee 100644 --- a/whatsapp-mcp-server/whatsapp.py +++ b/whatsapp-mcp-server/whatsapp.py @@ -5,6 +5,7 @@ from typing import Optional, List, Tuple import os.path import requests import json +import audio MESSAGES_DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'whatsapp-bridge', 'store', 'messages.db') WHATSAPP_API_BASE_URL = "http://localhost:8080/api" @@ -677,16 +678,6 @@ def get_direct_chat_by_contact(sender_phone_number: str) -> Optional[Chat]: conn.close() def send_message(recipient: str, message: str) -> Tuple[bool, str]: - """Send a WhatsApp message to the specified recipient. For group messages use the JID. - - Args: - recipient: The recipient - either a phone number with country code but no + or other symbols, - or a JID (e.g., "123456789@s.whatsapp.net" or a group JID like "123456789@g.us"). - message: The message text to send - - Returns: - Tuple[bool, str]: A tuple containing success status and a status message - """ try: # Validate input if not recipient: @@ -695,7 +686,81 @@ def send_message(recipient: str, message: str) -> Tuple[bool, str]: url = f"{WHATSAPP_API_BASE_URL}/send" payload = { "recipient": recipient, - "message": message + "message": message, + } + + response = requests.post(url, json=payload) + + # Check if the request was successful + if response.status_code == 200: + result = response.json() + return result.get("success", False), result.get("message", "Unknown response") + else: + return False, f"Error: HTTP {response.status_code} - {response.text}" + + except requests.RequestException as e: + return False, f"Request error: {str(e)}" + except json.JSONDecodeError: + return False, f"Error parsing response: {response.text}" + except Exception as e: + return False, f"Unexpected error: {str(e)}" + +def send_file(recipient: str, media_path: str) -> Tuple[bool, str]: + try: + # Validate input + if not recipient: + return False, "Recipient must be provided" + + if not media_path: + return False, "Media path must be provided" + + if not os.path.isfile(media_path): + return False, f"Media file not found: {media_path}" + + url = f"{WHATSAPP_API_BASE_URL}/send" + payload = { + "recipient": recipient, + "media_path": media_path + } + + response = requests.post(url, json=payload) + + # Check if the request was successful + if response.status_code == 200: + result = response.json() + return result.get("success", False), result.get("message", "Unknown response") + else: + return False, f"Error: HTTP {response.status_code} - {response.text}" + + except requests.RequestException as e: + return False, f"Request error: {str(e)}" + except json.JSONDecodeError: + return False, f"Error parsing response: {response.text}" + except Exception as e: + return False, f"Unexpected error: {str(e)}" + +def send_audio_message(recipient: str, media_path: str) -> Tuple[bool, str]: + try: + # Validate input + if not recipient: + return False, "Recipient must be provided" + + if not media_path: + return False, "Media path must be provided" + + if not os.path.isfile(media_path): + return False, f"Media file not found: {media_path}" + + if not media_path.endswith(".ogg"): + try: + media_path = audio.convert_to_opus_ogg_temp(media_path) + except Exception as e: + return False, f"Error converting file to opus ogg. You likely need to install ffmpeg: {str(e)}" + + url = f"{WHATSAPP_API_BASE_URL}/send" + payload = { + "recipient": recipient, + "media_path": media_path } response = requests.post(url, json=payload)