mirror of
https://github.com/jlengrand/whatsapp-mcp.git
synced 2026-03-10 08:51:23 +00:00
added send_file and send_audio_message
This commit is contained in:
@@ -3,8 +3,11 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
@@ -16,6 +19,8 @@ import (
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
"github.com/mdp/qrterminal"
|
||||
|
||||
"bytes"
|
||||
|
||||
"go.mau.fi/whatsmeow"
|
||||
waProto "go.mau.fi/whatsmeow/binary/proto"
|
||||
"go.mau.fi/whatsmeow/store/sqlstore"
|
||||
@@ -181,10 +186,11 @@ type SendMessageResponse struct {
|
||||
type SendMessageRequest struct {
|
||||
Recipient string `json:"recipient"`
|
||||
Message string `json:"message"`
|
||||
MediaPath string `json:"media_path,omitempty"`
|
||||
}
|
||||
|
||||
// Function to send a WhatsApp message
|
||||
func sendWhatsAppMessage(client *whatsmeow.Client, recipient string, message string) (bool, string) {
|
||||
func sendWhatsAppMessage(client *whatsmeow.Client, recipient string, message string, mediaPath string) (bool, string) {
|
||||
if !client.IsConnected() {
|
||||
return false, "Not connected to WhatsApp"
|
||||
}
|
||||
@@ -210,10 +216,140 @@ func sendWhatsAppMessage(client *whatsmeow.Client, recipient string, message str
|
||||
}
|
||||
}
|
||||
|
||||
// Send the message
|
||||
_, err = client.SendMessage(context.Background(), recipientJID, &waProto.Message{
|
||||
Conversation: proto.String(message),
|
||||
})
|
||||
msg := &waProto.Message{}
|
||||
|
||||
// Check if we have media to send
|
||||
if mediaPath != "" {
|
||||
// Read media file
|
||||
mediaData, err := os.ReadFile(mediaPath)
|
||||
if err != nil {
|
||||
return false, fmt.Sprintf("Error reading media file: %v", err)
|
||||
}
|
||||
|
||||
// Determine media type and mime type based on file extension
|
||||
fileExt := strings.ToLower(mediaPath[strings.LastIndex(mediaPath, ".")+1:])
|
||||
var mediaType whatsmeow.MediaType
|
||||
var mimeType string
|
||||
|
||||
// Handle different media types
|
||||
switch fileExt {
|
||||
// Image types
|
||||
case "jpg", "jpeg":
|
||||
mediaType = whatsmeow.MediaImage
|
||||
mimeType = "image/jpeg"
|
||||
case "png":
|
||||
mediaType = whatsmeow.MediaImage
|
||||
mimeType = "image/png"
|
||||
case "gif":
|
||||
mediaType = whatsmeow.MediaImage
|
||||
mimeType = "image/gif"
|
||||
case "webp":
|
||||
mediaType = whatsmeow.MediaImage
|
||||
mimeType = "image/webp"
|
||||
|
||||
// Audio types
|
||||
case "ogg":
|
||||
mediaType = whatsmeow.MediaAudio
|
||||
mimeType = "audio/ogg; codecs=opus"
|
||||
|
||||
// Video types
|
||||
case "mp4":
|
||||
mediaType = whatsmeow.MediaVideo
|
||||
mimeType = "video/mp4"
|
||||
case "avi":
|
||||
mediaType = whatsmeow.MediaVideo
|
||||
mimeType = "video/avi"
|
||||
case "mov":
|
||||
mediaType = whatsmeow.MediaVideo
|
||||
mimeType = "video/quicktime"
|
||||
|
||||
// Document types (for any other file type)
|
||||
default:
|
||||
mediaType = whatsmeow.MediaDocument
|
||||
mimeType = "application/octet-stream"
|
||||
}
|
||||
|
||||
// Upload media to WhatsApp servers
|
||||
resp, err := client.Upload(context.Background(), mediaData, mediaType)
|
||||
if err != nil {
|
||||
return false, fmt.Sprintf("Error uploading media: %v", err)
|
||||
}
|
||||
|
||||
fmt.Println("Media uploaded", resp)
|
||||
|
||||
// Create the appropriate message type based on media type
|
||||
switch mediaType {
|
||||
case whatsmeow.MediaImage:
|
||||
msg.ImageMessage = &waProto.ImageMessage{
|
||||
Caption: proto.String(message),
|
||||
Mimetype: proto.String(mimeType),
|
||||
URL: &resp.URL,
|
||||
DirectPath: &resp.DirectPath,
|
||||
MediaKey: resp.MediaKey,
|
||||
FileEncSHA256: resp.FileEncSHA256,
|
||||
FileSHA256: resp.FileSHA256,
|
||||
FileLength: &resp.FileLength,
|
||||
}
|
||||
case whatsmeow.MediaAudio:
|
||||
// Handle ogg audio files
|
||||
var seconds uint32 = 30 // Default fallback
|
||||
var waveform []byte = nil
|
||||
|
||||
// Try to analyze the ogg file
|
||||
if strings.Contains(mimeType, "ogg") {
|
||||
analyzedSeconds, analyzedWaveform, err := analyzeOggOpus(mediaData)
|
||||
if err == nil {
|
||||
seconds = analyzedSeconds
|
||||
waveform = analyzedWaveform
|
||||
} else {
|
||||
return false, fmt.Sprintf("Failed to analyze Ogg Opus file: %v", err)
|
||||
}
|
||||
} else {
|
||||
fmt.Printf("Not an Ogg Opus file: %s\n", mimeType)
|
||||
}
|
||||
|
||||
msg.AudioMessage = &waProto.AudioMessage{
|
||||
Mimetype: proto.String(mimeType),
|
||||
URL: &resp.URL,
|
||||
DirectPath: &resp.DirectPath,
|
||||
MediaKey: resp.MediaKey,
|
||||
FileEncSHA256: resp.FileEncSHA256,
|
||||
FileSHA256: resp.FileSHA256,
|
||||
FileLength: &resp.FileLength,
|
||||
Seconds: proto.Uint32(seconds),
|
||||
PTT: proto.Bool(true),
|
||||
Waveform: waveform,
|
||||
}
|
||||
case whatsmeow.MediaVideo:
|
||||
msg.VideoMessage = &waProto.VideoMessage{
|
||||
Caption: proto.String(message),
|
||||
Mimetype: proto.String(mimeType),
|
||||
URL: &resp.URL,
|
||||
DirectPath: &resp.DirectPath,
|
||||
MediaKey: resp.MediaKey,
|
||||
FileEncSHA256: resp.FileEncSHA256,
|
||||
FileSHA256: resp.FileSHA256,
|
||||
FileLength: &resp.FileLength,
|
||||
}
|
||||
case whatsmeow.MediaDocument:
|
||||
msg.DocumentMessage = &waProto.DocumentMessage{
|
||||
Title: proto.String(mediaPath[strings.LastIndex(mediaPath, "/")+1:]),
|
||||
Caption: proto.String(message),
|
||||
Mimetype: proto.String(mimeType),
|
||||
URL: &resp.URL,
|
||||
DirectPath: &resp.DirectPath,
|
||||
MediaKey: resp.MediaKey,
|
||||
FileEncSHA256: resp.FileEncSHA256,
|
||||
FileSHA256: resp.FileSHA256,
|
||||
FileLength: &resp.FileLength,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
msg.Conversation = proto.String(message)
|
||||
}
|
||||
|
||||
// Send message
|
||||
_, err = client.SendMessage(context.Background(), recipientJID, msg)
|
||||
|
||||
if err != nil {
|
||||
return false, fmt.Sprintf("Error sending message: %v", err)
|
||||
@@ -227,7 +363,6 @@ func startRESTServer(client *whatsmeow.Client, port int) {
|
||||
// Handler for sending messages
|
||||
http.HandleFunc("/api/send", func(w http.ResponseWriter, r *http.Request) {
|
||||
// Only allow POST requests
|
||||
fmt.Println("Received request to send message")
|
||||
if r.Method != http.MethodPost {
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
@@ -241,13 +376,20 @@ func startRESTServer(client *whatsmeow.Client, port int) {
|
||||
}
|
||||
|
||||
// Validate request
|
||||
if req.Recipient == "" || req.Message == "" {
|
||||
http.Error(w, "Recipient and message are required", http.StatusBadRequest)
|
||||
if req.Recipient == "" {
|
||||
http.Error(w, "Recipient is required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if req.Message == "" && req.MediaPath == "" {
|
||||
http.Error(w, "Message or media path is required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Println("Received request to send message", req.Message, req.MediaPath)
|
||||
|
||||
// Send the message
|
||||
success, message := sendWhatsAppMessage(client, req.Recipient, req.Message)
|
||||
success, message := sendWhatsAppMessage(client, req.Recipient, req.Message, req.MediaPath)
|
||||
fmt.Println("Message sent", success, message)
|
||||
// Set response headers
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -693,3 +835,167 @@ func requestHistorySync(client *whatsmeow.Client) {
|
||||
fmt.Println("History sync requested. Waiting for server response...")
|
||||
}
|
||||
}
|
||||
|
||||
// analyzeOggOpus tries to extract duration and generate a simple waveform from an Ogg Opus file
|
||||
func analyzeOggOpus(data []byte) (duration uint32, waveform []byte, err error) {
|
||||
// Try to detect if this is a valid Ogg file by checking for the "OggS" signature
|
||||
// at the beginning of the file
|
||||
if len(data) < 4 || string(data[0:4]) != "OggS" {
|
||||
return 0, nil, fmt.Errorf("not a valid Ogg file (missing OggS signature)")
|
||||
}
|
||||
|
||||
// Parse Ogg pages to find the last page with a valid granule position
|
||||
var lastGranule uint64
|
||||
var sampleRate uint32 = 48000 // Default Opus sample rate
|
||||
var preSkip uint16 = 0
|
||||
var foundOpusHead bool
|
||||
|
||||
// Scan through the file looking for Ogg pages
|
||||
for i := 0; i < len(data); {
|
||||
// Check if we have enough data to read Ogg page header
|
||||
if i+27 >= len(data) {
|
||||
break
|
||||
}
|
||||
|
||||
// Verify Ogg page signature
|
||||
if string(data[i:i+4]) != "OggS" {
|
||||
// Skip until next potential page
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
// Extract header fields
|
||||
granulePos := binary.LittleEndian.Uint64(data[i+6 : i+14])
|
||||
pageSeqNum := binary.LittleEndian.Uint32(data[i+18 : i+22])
|
||||
numSegments := int(data[i+26])
|
||||
|
||||
// Extract segment table
|
||||
if i+27+numSegments >= len(data) {
|
||||
break
|
||||
}
|
||||
segmentTable := data[i+27 : i+27+numSegments]
|
||||
|
||||
// Calculate page size
|
||||
pageSize := 27 + numSegments
|
||||
for _, segLen := range segmentTable {
|
||||
pageSize += int(segLen)
|
||||
}
|
||||
|
||||
// Check if we're looking at an OpusHead packet (should be in first few pages)
|
||||
if !foundOpusHead && pageSeqNum <= 1 {
|
||||
// Look for "OpusHead" marker in this page
|
||||
pageData := data[i : i+pageSize]
|
||||
headPos := bytes.Index(pageData, []byte("OpusHead"))
|
||||
if headPos >= 0 && headPos+12 < len(pageData) {
|
||||
// Found OpusHead, extract sample rate and pre-skip
|
||||
// OpusHead format: Magic(8) + Version(1) + Channels(1) + PreSkip(2) + SampleRate(4) + ...
|
||||
headPos += 8 // Skip "OpusHead" marker
|
||||
// PreSkip is 2 bytes at offset 10
|
||||
if headPos+12 <= len(pageData) {
|
||||
preSkip = binary.LittleEndian.Uint16(pageData[headPos+10 : headPos+12])
|
||||
sampleRate = binary.LittleEndian.Uint32(pageData[headPos+12 : headPos+16])
|
||||
foundOpusHead = true
|
||||
fmt.Printf("Found OpusHead: sampleRate=%d, preSkip=%d\n", sampleRate, preSkip)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Keep track of last valid granule position
|
||||
if granulePos != 0 {
|
||||
lastGranule = granulePos
|
||||
}
|
||||
|
||||
// Move to next page
|
||||
i += pageSize
|
||||
}
|
||||
|
||||
if !foundOpusHead {
|
||||
fmt.Println("Warning: OpusHead not found, using default values")
|
||||
}
|
||||
|
||||
// Calculate duration based on granule position
|
||||
if lastGranule > 0 {
|
||||
// Formula for duration: (lastGranule - preSkip) / sampleRate
|
||||
durationSeconds := float64(lastGranule-uint64(preSkip)) / float64(sampleRate)
|
||||
duration = uint32(math.Ceil(durationSeconds))
|
||||
fmt.Printf("Calculated Opus duration from granule: %f seconds (lastGranule=%d)\n",
|
||||
durationSeconds, lastGranule)
|
||||
} else {
|
||||
// Fallback to rough estimation if granule position not found
|
||||
fmt.Println("Warning: No valid granule position found, using estimation")
|
||||
durationEstimate := float64(len(data)) / 2000.0 // Very rough approximation
|
||||
duration = uint32(durationEstimate)
|
||||
}
|
||||
|
||||
// Make sure we have a reasonable duration (at least 1 second, at most 300 seconds)
|
||||
if duration < 1 {
|
||||
duration = 1
|
||||
} else if duration > 300 {
|
||||
duration = 300
|
||||
}
|
||||
|
||||
// Generate waveform
|
||||
waveform = placeholderWaveform(duration)
|
||||
|
||||
fmt.Printf("Ogg Opus analysis: size=%d bytes, calculated duration=%d sec, waveform=%d bytes\n",
|
||||
len(data), duration, len(waveform))
|
||||
|
||||
return duration, waveform, nil
|
||||
}
|
||||
|
||||
// min returns the smaller of x or y
|
||||
func min(x, y int) int {
|
||||
if x < y {
|
||||
return x
|
||||
}
|
||||
return y
|
||||
}
|
||||
|
||||
// placeholderWaveform generates a synthetic waveform for WhatsApp voice messages
|
||||
// that appears natural with some variability based on the duration
|
||||
func placeholderWaveform(duration uint32) []byte {
|
||||
// WhatsApp expects a 64-byte waveform for voice messages
|
||||
const waveformLength = 64
|
||||
waveform := make([]byte, waveformLength)
|
||||
|
||||
// Seed the random number generator for consistent results with the same duration
|
||||
rand.Seed(int64(duration))
|
||||
|
||||
// Create a more natural looking waveform with some patterns and variability
|
||||
// rather than completely random values
|
||||
|
||||
// Base amplitude and frequency - longer messages get faster frequency
|
||||
baseAmplitude := 35.0
|
||||
frequencyFactor := float64(min(int(duration), 120)) / 30.0
|
||||
|
||||
for i := range waveform {
|
||||
// Position in the waveform (normalized 0-1)
|
||||
pos := float64(i) / float64(waveformLength)
|
||||
|
||||
// Create a wave pattern with some randomness
|
||||
// Use multiple sine waves of different frequencies for more natural look
|
||||
val := baseAmplitude * math.Sin(pos*math.Pi*frequencyFactor*8)
|
||||
val += (baseAmplitude / 2) * math.Sin(pos*math.Pi*frequencyFactor*16)
|
||||
|
||||
// Add some randomness to make it look more natural
|
||||
val += (rand.Float64() - 0.5) * 15
|
||||
|
||||
// Add some fade-in and fade-out effects
|
||||
fadeInOut := math.Sin(pos * math.Pi)
|
||||
val = val * (0.7 + 0.3*fadeInOut)
|
||||
|
||||
// Center around 50 (typical voice baseline)
|
||||
val = val + 50
|
||||
|
||||
// Ensure values stay within WhatsApp's expected range (0-100)
|
||||
if val < 0 {
|
||||
val = 0
|
||||
} else if val > 100 {
|
||||
val = 100
|
||||
}
|
||||
|
||||
waveform[i] = byte(val)
|
||||
}
|
||||
|
||||
return waveform
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user