added send_file and send_audio_message

This commit is contained in:
Luke Harries
2025-04-05 22:07:43 +01:00
parent e95da254fa
commit 92e3d5eb3e
4 changed files with 542 additions and 21 deletions

View File

@@ -3,8 +3,11 @@ package main
import (
"context"
"database/sql"
"encoding/binary"
"encoding/json"
"fmt"
"math"
"math/rand"
"net/http"
"os"
"os/signal"
@@ -16,6 +19,8 @@ import (
_ "github.com/mattn/go-sqlite3"
"github.com/mdp/qrterminal"
"bytes"
"go.mau.fi/whatsmeow"
waProto "go.mau.fi/whatsmeow/binary/proto"
"go.mau.fi/whatsmeow/store/sqlstore"
@@ -181,10 +186,11 @@ type SendMessageResponse struct {
type SendMessageRequest struct {
Recipient string `json:"recipient"`
Message string `json:"message"`
MediaPath string `json:"media_path,omitempty"`
}
// Function to send a WhatsApp message
func sendWhatsAppMessage(client *whatsmeow.Client, recipient string, message string) (bool, string) {
func sendWhatsAppMessage(client *whatsmeow.Client, recipient string, message string, mediaPath string) (bool, string) {
if !client.IsConnected() {
return false, "Not connected to WhatsApp"
}
@@ -210,10 +216,140 @@ func sendWhatsAppMessage(client *whatsmeow.Client, recipient string, message str
}
}
// Send the message
_, err = client.SendMessage(context.Background(), recipientJID, &waProto.Message{
Conversation: proto.String(message),
})
msg := &waProto.Message{}
// Check if we have media to send
if mediaPath != "" {
// Read media file
mediaData, err := os.ReadFile(mediaPath)
if err != nil {
return false, fmt.Sprintf("Error reading media file: %v", err)
}
// Determine media type and mime type based on file extension
fileExt := strings.ToLower(mediaPath[strings.LastIndex(mediaPath, ".")+1:])
var mediaType whatsmeow.MediaType
var mimeType string
// Handle different media types
switch fileExt {
// Image types
case "jpg", "jpeg":
mediaType = whatsmeow.MediaImage
mimeType = "image/jpeg"
case "png":
mediaType = whatsmeow.MediaImage
mimeType = "image/png"
case "gif":
mediaType = whatsmeow.MediaImage
mimeType = "image/gif"
case "webp":
mediaType = whatsmeow.MediaImage
mimeType = "image/webp"
// Audio types
case "ogg":
mediaType = whatsmeow.MediaAudio
mimeType = "audio/ogg; codecs=opus"
// Video types
case "mp4":
mediaType = whatsmeow.MediaVideo
mimeType = "video/mp4"
case "avi":
mediaType = whatsmeow.MediaVideo
mimeType = "video/avi"
case "mov":
mediaType = whatsmeow.MediaVideo
mimeType = "video/quicktime"
// Document types (for any other file type)
default:
mediaType = whatsmeow.MediaDocument
mimeType = "application/octet-stream"
}
// Upload media to WhatsApp servers
resp, err := client.Upload(context.Background(), mediaData, mediaType)
if err != nil {
return false, fmt.Sprintf("Error uploading media: %v", err)
}
fmt.Println("Media uploaded", resp)
// Create the appropriate message type based on media type
switch mediaType {
case whatsmeow.MediaImage:
msg.ImageMessage = &waProto.ImageMessage{
Caption: proto.String(message),
Mimetype: proto.String(mimeType),
URL: &resp.URL,
DirectPath: &resp.DirectPath,
MediaKey: resp.MediaKey,
FileEncSHA256: resp.FileEncSHA256,
FileSHA256: resp.FileSHA256,
FileLength: &resp.FileLength,
}
case whatsmeow.MediaAudio:
// Handle ogg audio files
var seconds uint32 = 30 // Default fallback
var waveform []byte = nil
// Try to analyze the ogg file
if strings.Contains(mimeType, "ogg") {
analyzedSeconds, analyzedWaveform, err := analyzeOggOpus(mediaData)
if err == nil {
seconds = analyzedSeconds
waveform = analyzedWaveform
} else {
return false, fmt.Sprintf("Failed to analyze Ogg Opus file: %v", err)
}
} else {
fmt.Printf("Not an Ogg Opus file: %s\n", mimeType)
}
msg.AudioMessage = &waProto.AudioMessage{
Mimetype: proto.String(mimeType),
URL: &resp.URL,
DirectPath: &resp.DirectPath,
MediaKey: resp.MediaKey,
FileEncSHA256: resp.FileEncSHA256,
FileSHA256: resp.FileSHA256,
FileLength: &resp.FileLength,
Seconds: proto.Uint32(seconds),
PTT: proto.Bool(true),
Waveform: waveform,
}
case whatsmeow.MediaVideo:
msg.VideoMessage = &waProto.VideoMessage{
Caption: proto.String(message),
Mimetype: proto.String(mimeType),
URL: &resp.URL,
DirectPath: &resp.DirectPath,
MediaKey: resp.MediaKey,
FileEncSHA256: resp.FileEncSHA256,
FileSHA256: resp.FileSHA256,
FileLength: &resp.FileLength,
}
case whatsmeow.MediaDocument:
msg.DocumentMessage = &waProto.DocumentMessage{
Title: proto.String(mediaPath[strings.LastIndex(mediaPath, "/")+1:]),
Caption: proto.String(message),
Mimetype: proto.String(mimeType),
URL: &resp.URL,
DirectPath: &resp.DirectPath,
MediaKey: resp.MediaKey,
FileEncSHA256: resp.FileEncSHA256,
FileSHA256: resp.FileSHA256,
FileLength: &resp.FileLength,
}
}
} else {
msg.Conversation = proto.String(message)
}
// Send message
_, err = client.SendMessage(context.Background(), recipientJID, msg)
if err != nil {
return false, fmt.Sprintf("Error sending message: %v", err)
@@ -227,7 +363,6 @@ func startRESTServer(client *whatsmeow.Client, port int) {
// Handler for sending messages
http.HandleFunc("/api/send", func(w http.ResponseWriter, r *http.Request) {
// Only allow POST requests
fmt.Println("Received request to send message")
if r.Method != http.MethodPost {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
@@ -241,13 +376,20 @@ func startRESTServer(client *whatsmeow.Client, port int) {
}
// Validate request
if req.Recipient == "" || req.Message == "" {
http.Error(w, "Recipient and message are required", http.StatusBadRequest)
if req.Recipient == "" {
http.Error(w, "Recipient is required", http.StatusBadRequest)
return
}
if req.Message == "" && req.MediaPath == "" {
http.Error(w, "Message or media path is required", http.StatusBadRequest)
return
}
fmt.Println("Received request to send message", req.Message, req.MediaPath)
// Send the message
success, message := sendWhatsAppMessage(client, req.Recipient, req.Message)
success, message := sendWhatsAppMessage(client, req.Recipient, req.Message, req.MediaPath)
fmt.Println("Message sent", success, message)
// Set response headers
w.Header().Set("Content-Type", "application/json")
@@ -693,3 +835,167 @@ func requestHistorySync(client *whatsmeow.Client) {
fmt.Println("History sync requested. Waiting for server response...")
}
}
// analyzeOggOpus tries to extract duration and generate a simple waveform from an Ogg Opus file
func analyzeOggOpus(data []byte) (duration uint32, waveform []byte, err error) {
// Try to detect if this is a valid Ogg file by checking for the "OggS" signature
// at the beginning of the file
if len(data) < 4 || string(data[0:4]) != "OggS" {
return 0, nil, fmt.Errorf("not a valid Ogg file (missing OggS signature)")
}
// Parse Ogg pages to find the last page with a valid granule position
var lastGranule uint64
var sampleRate uint32 = 48000 // Default Opus sample rate
var preSkip uint16 = 0
var foundOpusHead bool
// Scan through the file looking for Ogg pages
for i := 0; i < len(data); {
// Check if we have enough data to read Ogg page header
if i+27 >= len(data) {
break
}
// Verify Ogg page signature
if string(data[i:i+4]) != "OggS" {
// Skip until next potential page
i++
continue
}
// Extract header fields
granulePos := binary.LittleEndian.Uint64(data[i+6 : i+14])
pageSeqNum := binary.LittleEndian.Uint32(data[i+18 : i+22])
numSegments := int(data[i+26])
// Extract segment table
if i+27+numSegments >= len(data) {
break
}
segmentTable := data[i+27 : i+27+numSegments]
// Calculate page size
pageSize := 27 + numSegments
for _, segLen := range segmentTable {
pageSize += int(segLen)
}
// Check if we're looking at an OpusHead packet (should be in first few pages)
if !foundOpusHead && pageSeqNum <= 1 {
// Look for "OpusHead" marker in this page
pageData := data[i : i+pageSize]
headPos := bytes.Index(pageData, []byte("OpusHead"))
if headPos >= 0 && headPos+12 < len(pageData) {
// Found OpusHead, extract sample rate and pre-skip
// OpusHead format: Magic(8) + Version(1) + Channels(1) + PreSkip(2) + SampleRate(4) + ...
headPos += 8 // Skip "OpusHead" marker
// PreSkip is 2 bytes at offset 10
if headPos+12 <= len(pageData) {
preSkip = binary.LittleEndian.Uint16(pageData[headPos+10 : headPos+12])
sampleRate = binary.LittleEndian.Uint32(pageData[headPos+12 : headPos+16])
foundOpusHead = true
fmt.Printf("Found OpusHead: sampleRate=%d, preSkip=%d\n", sampleRate, preSkip)
}
}
}
// Keep track of last valid granule position
if granulePos != 0 {
lastGranule = granulePos
}
// Move to next page
i += pageSize
}
if !foundOpusHead {
fmt.Println("Warning: OpusHead not found, using default values")
}
// Calculate duration based on granule position
if lastGranule > 0 {
// Formula for duration: (lastGranule - preSkip) / sampleRate
durationSeconds := float64(lastGranule-uint64(preSkip)) / float64(sampleRate)
duration = uint32(math.Ceil(durationSeconds))
fmt.Printf("Calculated Opus duration from granule: %f seconds (lastGranule=%d)\n",
durationSeconds, lastGranule)
} else {
// Fallback to rough estimation if granule position not found
fmt.Println("Warning: No valid granule position found, using estimation")
durationEstimate := float64(len(data)) / 2000.0 // Very rough approximation
duration = uint32(durationEstimate)
}
// Make sure we have a reasonable duration (at least 1 second, at most 300 seconds)
if duration < 1 {
duration = 1
} else if duration > 300 {
duration = 300
}
// Generate waveform
waveform = placeholderWaveform(duration)
fmt.Printf("Ogg Opus analysis: size=%d bytes, calculated duration=%d sec, waveform=%d bytes\n",
len(data), duration, len(waveform))
return duration, waveform, nil
}
// min returns the smaller of x or y
func min(x, y int) int {
if x < y {
return x
}
return y
}
// placeholderWaveform generates a synthetic waveform for WhatsApp voice messages
// that appears natural with some variability based on the duration
func placeholderWaveform(duration uint32) []byte {
// WhatsApp expects a 64-byte waveform for voice messages
const waveformLength = 64
waveform := make([]byte, waveformLength)
// Seed the random number generator for consistent results with the same duration
rand.Seed(int64(duration))
// Create a more natural looking waveform with some patterns and variability
// rather than completely random values
// Base amplitude and frequency - longer messages get faster frequency
baseAmplitude := 35.0
frequencyFactor := float64(min(int(duration), 120)) / 30.0
for i := range waveform {
// Position in the waveform (normalized 0-1)
pos := float64(i) / float64(waveformLength)
// Create a wave pattern with some randomness
// Use multiple sine waves of different frequencies for more natural look
val := baseAmplitude * math.Sin(pos*math.Pi*frequencyFactor*8)
val += (baseAmplitude / 2) * math.Sin(pos*math.Pi*frequencyFactor*16)
// Add some randomness to make it look more natural
val += (rand.Float64() - 0.5) * 15
// Add some fade-in and fade-out effects
fadeInOut := math.Sin(pos * math.Pi)
val = val * (0.7 + 0.3*fadeInOut)
// Center around 50 (typical voice baseline)
val = val + 50
// Ensure values stay within WhatsApp's expected range (0-100)
if val < 0 {
val = 0
} else if val > 100 {
val = 100
}
waveform[i] = byte(val)
}
return waveform
}

View File

@@ -0,0 +1,110 @@
import os
import subprocess
import tempfile
def convert_to_opus_ogg(input_file, output_file=None, bitrate="32k", sample_rate=24000):
"""
Convert an audio file to Opus format in an Ogg container.
Args:
input_file (str): Path to the input audio file
output_file (str, optional): Path to save the output file. If None, replaces the
extension of input_file with .ogg
bitrate (str, optional): Target bitrate for Opus encoding (default: "32k")
sample_rate (int, optional): Sample rate for output (default: 24000)
Returns:
str: Path to the converted file
Raises:
FileNotFoundError: If the input file doesn't exist
RuntimeError: If the ffmpeg conversion fails
"""
if not os.path.isfile(input_file):
raise FileNotFoundError(f"Input file not found: {input_file}")
# If no output file is specified, replace the extension with .ogg
if output_file is None:
output_file = os.path.splitext(input_file)[0] + ".ogg"
# Ensure the output directory exists
output_dir = os.path.dirname(output_file)
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir)
# Build the ffmpeg command
cmd = [
"ffmpeg",
"-i", input_file,
"-c:a", "libopus",
"-b:a", bitrate,
"-ar", str(sample_rate),
"-application", "voip", # Optimize for voice
"-vbr", "on", # Variable bitrate
"-compression_level", "10", # Maximum compression
"-frame_duration", "60", # 60ms frames (good for voice)
"-y", # Overwrite output file if it exists
output_file
]
try:
# Run the ffmpeg command and capture output
process = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True
)
return output_file
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Failed to convert audio. You likely need to install ffmpeg {e.stderr}")
def convert_to_opus_ogg_temp(input_file, bitrate="32k", sample_rate=24000):
"""
Convert an audio file to Opus format in an Ogg container and store in a temporary file.
Args:
input_file (str): Path to the input audio file
bitrate (str, optional): Target bitrate for Opus encoding (default: "32k")
sample_rate (int, optional): Sample rate for output (default: 24000)
Returns:
str: Path to the temporary file with the converted audio
Raises:
FileNotFoundError: If the input file doesn't exist
RuntimeError: If the ffmpeg conversion fails
"""
# Create a temporary file with .ogg extension
temp_file = tempfile.NamedTemporaryFile(suffix=".ogg", delete=False)
temp_file.close()
try:
# Convert the audio
convert_to_opus_ogg(input_file, temp_file.name, bitrate, sample_rate)
return temp_file.name
except Exception as e:
# Clean up the temporary file if conversion fails
if os.path.exists(temp_file.name):
os.unlink(temp_file.name)
raise e
if __name__ == "__main__":
# Example usage
import sys
if len(sys.argv) < 2:
print("Usage: python audio.py input_file [output_file]")
sys.exit(1)
input_file = sys.argv[1]
try:
result = convert_to_opus_ogg_temp(input_file)
print(f"Successfully converted to: {result}")
except Exception as e:
print(f"Error: {e}")
sys.exit(1)

View File

@@ -9,7 +9,9 @@ from whatsapp import (
get_contact_chats as whatsapp_get_contact_chats,
get_last_interaction as whatsapp_get_last_interaction,
get_message_context as whatsapp_get_message_context,
send_message as whatsapp_send_message
send_message as whatsapp_send_message,
send_file as whatsapp_send_file,
send_audio_message as whatsapp_audio_voice_message
)
# Initialize FastMCP server
@@ -180,6 +182,44 @@ def send_message(
"message": status_message
}
@mcp.tool()
def send_file(recipient: str, media_path: str) -> Dict[str, Any]:
"""Send a file such as a picture, raw audio, video or document via WhatsApp to the specified recipient. For group messages use the JID.
Args:
recipient: The recipient - either a phone number with country code but no + or other symbols,
or a JID (e.g., "123456789@s.whatsapp.net" or a group JID like "123456789@g.us")
media_path: The absolute path to the media file to send (image, video, document)
Returns:
A dictionary containing success status and a status message
"""
# Call the whatsapp_send_file function
success, status_message = whatsapp_send_file(recipient, media_path)
return {
"success": success,
"message": status_message
}
@mcp.tool()
def send_audio_message(recipient: str, media_path: str) -> Dict[str, Any]:
"""Send any audio file as a WhatsApp audio message to the specified recipient. For group messages use the JID.
Args:
recipient: The recipient - either a phone number with country code but no + or other symbols,
or a JID (e.g., "123456789@s.whatsapp.net" or a group JID like "123456789@g.us")
media_path: The absolute path to the audio file to send (will be converted to Opus .ogg if it's not a .ogg file)
Returns:
A dictionary containing success status and a status message
"""
success, status_message = whatsapp_audio_voice_message(recipient, media_path)
return {
"success": success,
"message": status_message
}
if __name__ == "__main__":
# Initialize and run the server
mcp.run(transport='stdio')

View File

@@ -5,6 +5,7 @@ from typing import Optional, List, Tuple
import os.path
import requests
import json
import audio
MESSAGES_DB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'whatsapp-bridge', 'store', 'messages.db')
WHATSAPP_API_BASE_URL = "http://localhost:8080/api"
@@ -677,16 +678,6 @@ def get_direct_chat_by_contact(sender_phone_number: str) -> Optional[Chat]:
conn.close()
def send_message(recipient: str, message: str) -> Tuple[bool, str]:
"""Send a WhatsApp message to the specified recipient. For group messages use the JID.
Args:
recipient: The recipient - either a phone number with country code but no + or other symbols,
or a JID (e.g., "123456789@s.whatsapp.net" or a group JID like "123456789@g.us").
message: The message text to send
Returns:
Tuple[bool, str]: A tuple containing success status and a status message
"""
try:
# Validate input
if not recipient:
@@ -695,7 +686,81 @@ def send_message(recipient: str, message: str) -> Tuple[bool, str]:
url = f"{WHATSAPP_API_BASE_URL}/send"
payload = {
"recipient": recipient,
"message": message
"message": message,
}
response = requests.post(url, json=payload)
# Check if the request was successful
if response.status_code == 200:
result = response.json()
return result.get("success", False), result.get("message", "Unknown response")
else:
return False, f"Error: HTTP {response.status_code} - {response.text}"
except requests.RequestException as e:
return False, f"Request error: {str(e)}"
except json.JSONDecodeError:
return False, f"Error parsing response: {response.text}"
except Exception as e:
return False, f"Unexpected error: {str(e)}"
def send_file(recipient: str, media_path: str) -> Tuple[bool, str]:
try:
# Validate input
if not recipient:
return False, "Recipient must be provided"
if not media_path:
return False, "Media path must be provided"
if not os.path.isfile(media_path):
return False, f"Media file not found: {media_path}"
url = f"{WHATSAPP_API_BASE_URL}/send"
payload = {
"recipient": recipient,
"media_path": media_path
}
response = requests.post(url, json=payload)
# Check if the request was successful
if response.status_code == 200:
result = response.json()
return result.get("success", False), result.get("message", "Unknown response")
else:
return False, f"Error: HTTP {response.status_code} - {response.text}"
except requests.RequestException as e:
return False, f"Request error: {str(e)}"
except json.JSONDecodeError:
return False, f"Error parsing response: {response.text}"
except Exception as e:
return False, f"Unexpected error: {str(e)}"
def send_audio_message(recipient: str, media_path: str) -> Tuple[bool, str]:
try:
# Validate input
if not recipient:
return False, "Recipient must be provided"
if not media_path:
return False, "Media path must be provided"
if not os.path.isfile(media_path):
return False, f"Media file not found: {media_path}"
if not media_path.endswith(".ogg"):
try:
media_path = audio.convert_to_opus_ogg_temp(media_path)
except Exception as e:
return False, f"Error converting file to opus ogg. You likely need to install ffmpeg: {str(e)}"
url = f"{WHATSAPP_API_BASE_URL}/send"
payload = {
"recipient": recipient,
"media_path": media_path
}
response = requests.post(url, json=payload)