Files
OpsLog/internal/audio/engine.go
T
2026-06-04 00:46:35 +02:00

272 lines
7.2 KiB
Go

//go:build windows
package audio
import (
"fmt"
"runtime"
"time"
"unsafe"
"github.com/go-ole/go-ole"
"github.com/moutend/go-wca/pkg/wca"
)
const (
// AUDCLNT_BUFFERFLAGS_SILENT — the capture packet is silent; emit zeros.
bufferFlagSilent uint32 = 0x1
// 1-second WASAPI buffer (REFERENCE_TIME is in 100-ns units).
bufferDuration100ns = 10_000_000
)
func coInit() error {
if e := ole.CoInitializeEx(0, ole.COINIT_APARTMENTTHREADED); e != nil {
if oe, ok := e.(*ole.OleError); !ok || oe.Code() != 0x00000001 { // S_FALSE ok
return e
}
}
return nil
}
// openDevice resolves an IMMDevice by endpoint id, falling back to the default
// endpoint for the flow when id is empty or not found. Caller must Release().
func openDevice(flow uint32, id string) (*wca.IMMDevice, error) {
var mmde *wca.IMMDeviceEnumerator
if err := wca.CoCreateInstance(wca.CLSID_MMDeviceEnumerator, 0, wca.CLSCTX_ALL,
wca.IID_IMMDeviceEnumerator, &mmde); err != nil {
return nil, fmt.Errorf("create enumerator: %w", err)
}
defer mmde.Release()
if id != "" {
var coll *wca.IMMDeviceCollection
if err := mmde.EnumAudioEndpoints(flow, wca.DEVICE_STATE_ACTIVE, &coll); err == nil && coll != nil {
defer coll.Release()
var count uint32
coll.GetCount(&count)
for i := uint32(0); i < count; i++ {
var dev *wca.IMMDevice
if coll.Item(i, &dev) != nil || dev == nil {
continue
}
var did string
dev.GetId(&did)
if did == id {
return dev, nil // caller owns it
}
dev.Release()
}
}
}
var dev *wca.IMMDevice
if err := mmde.GetDefaultAudioEndpoint(flow, wca.EConsole, &dev); err != nil {
return nil, fmt.Errorf("no audio endpoint (id %q): %w", id, err)
}
return dev, nil
}
// pcmFormat is the fixed capture format (16 kHz mono 16-bit PCM). WASAPI's
// AUTOCONVERTPCM resamples from the device's native mix format for us.
func pcmFormat() *wca.WAVEFORMATEX {
return &wca.WAVEFORMATEX{
WFormatTag: 1, // WAVE_FORMAT_PCM
NChannels: channels,
NSamplesPerSec: sampleRate,
NAvgBytesPerSec: bytesPerSec,
NBlockAlign: blockAlign,
WBitsPerSample: bitsPerSample,
CbSize: 0,
}
}
const autoConvert = wca.AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM | wca.AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY
// recordPCM captures from a device into 16 kHz mono 16-bit PCM bytes until the
// stop channel is closed.
func recordPCM(deviceID string, stop <-chan struct{}) ([]byte, error) {
out := make([]byte, 0, bytesPerSec*4)
err := captureStream(deviceID, stop, func(chunk []byte) { out = append(out, chunk...) })
return out, err
}
// captureStream opens a device and calls onChunk with freshly-captured 16 kHz
// mono 16-bit PCM as it arrives, until stop closes. onChunk receives a private
// copy it may retain. Runs on a COM-initialised, OS-locked thread.
func captureStream(deviceID string, stop <-chan struct{}, onChunk func([]byte)) error {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if err := coInit(); err != nil {
return fmt.Errorf("CoInitialize: %w", err)
}
defer ole.CoUninitialize()
dev, err := openDevice(wca.ECapture, deviceID)
if err != nil {
return err
}
defer dev.Release()
var ac *wca.IAudioClient
if err := dev.Activate(wca.IID_IAudioClient, wca.CLSCTX_ALL, nil, &ac); err != nil {
return fmt.Errorf("activate capture: %w", err)
}
defer ac.Release()
if err := ac.Initialize(wca.AUDCLNT_SHAREMODE_SHARED, autoConvert,
wca.REFERENCE_TIME(bufferDuration100ns), 0, pcmFormat(), nil); err != nil {
return fmt.Errorf("initialize capture: %w", err)
}
var acc *wca.IAudioCaptureClient
if err := ac.GetService(wca.IID_IAudioCaptureClient, &acc); err != nil {
return fmt.Errorf("get capture service: %w", err)
}
defer acc.Release()
if err := ac.Start(); err != nil {
return fmt.Errorf("start capture: %w", err)
}
defer ac.Stop()
for {
select {
case <-stop:
return nil
default:
}
var packet uint32
if err := acc.GetNextPacketSize(&packet); err != nil {
return err
}
if packet == 0 {
time.Sleep(10 * time.Millisecond)
continue
}
for packet > 0 {
var data *byte
var frames, flags uint32
var devpos, qpcpos uint64
if err := acc.GetBuffer(&data, &frames, &flags, &devpos, &qpcpos); err != nil {
return err
}
n := int(frames) * blockAlign
if n > 0 {
chunk := make([]byte, n)
if flags&bufferFlagSilent == 0 && data != nil {
copy(chunk, unsafe.Slice(data, n))
}
onChunk(chunk)
}
acc.ReleaseBuffer(frames)
if err := acc.GetNextPacketSize(&packet); err != nil {
return err
}
}
}
}
// playPCM renders raw PCM (with the given format) to a device, stopping early
// if the stop channel closes. Runs on a COM-initialised, OS-locked thread.
func playPCM(deviceID string, pcm []byte, rate, ch, bits int, stop <-chan struct{}) error {
if len(pcm) == 0 {
return nil
}
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if err := coInit(); err != nil {
return fmt.Errorf("CoInitialize: %w", err)
}
defer ole.CoUninitialize()
dev, err := openDevice(wca.ERender, deviceID)
if err != nil {
return err
}
defer dev.Release()
var ac *wca.IAudioClient
if err := dev.Activate(wca.IID_IAudioClient, wca.CLSCTX_ALL, nil, &ac); err != nil {
return fmt.Errorf("activate render: %w", err)
}
defer ac.Release()
frameBytes := ch * bits / 8
if frameBytes <= 0 {
return fmt.Errorf("bad audio format")
}
wfx := &wca.WAVEFORMATEX{
WFormatTag: 1, NChannels: uint16(ch), NSamplesPerSec: uint32(rate),
NAvgBytesPerSec: uint32(rate * frameBytes), NBlockAlign: uint16(frameBytes),
WBitsPerSample: uint16(bits), CbSize: 0,
}
if err := ac.Initialize(wca.AUDCLNT_SHAREMODE_SHARED, autoConvert,
wca.REFERENCE_TIME(bufferDuration100ns), 0, wfx, nil); err != nil {
return fmt.Errorf("initialize render: %w", err)
}
var bufFrames uint32
if err := ac.GetBufferSize(&bufFrames); err != nil {
return err
}
var arc *wca.IAudioRenderClient
if err := ac.GetService(wca.IID_IAudioRenderClient, &arc); err != nil {
return fmt.Errorf("get render service: %w", err)
}
defer arc.Release()
totalFrames := len(pcm) / frameBytes
written := 0
feed := func(maxFrames int) error {
if maxFrames <= 0 || written >= totalFrames {
return nil
}
n := totalFrames - written
if n > maxFrames {
n = maxFrames
}
var data *byte
if err := arc.GetBuffer(uint32(n), &data); err != nil {
return err
}
dst := unsafe.Slice(data, n*frameBytes)
copy(dst, pcm[written*frameBytes:(written+n)*frameBytes])
arc.ReleaseBuffer(uint32(n), 0)
written += n
return nil
}
// Pre-fill before starting to avoid an initial glitch.
if err := feed(int(bufFrames)); err != nil {
return err
}
if err := ac.Start(); err != nil {
return fmt.Errorf("start render: %w", err)
}
defer ac.Stop()
for written < totalFrames {
select {
case <-stop:
return nil
default:
}
var padding uint32
ac.GetCurrentPadding(&padding)
if err := feed(int(bufFrames - padding)); err != nil {
return err
}
time.Sleep(8 * time.Millisecond)
}
// Drain the remaining buffered audio.
for {
select {
case <-stop:
return nil
default:
}
var padding uint32
if ac.GetCurrentPadding(&padding) != nil || padding == 0 {
return nil
}
time.Sleep(10 * time.Millisecond)
}
}