272 lines
7.2 KiB
Go
272 lines
7.2 KiB
Go
//go:build windows
|
|
|
|
package audio
|
|
|
|
import (
|
|
"fmt"
|
|
"runtime"
|
|
"time"
|
|
"unsafe"
|
|
|
|
"github.com/go-ole/go-ole"
|
|
"github.com/moutend/go-wca/pkg/wca"
|
|
)
|
|
|
|
const (
|
|
// AUDCLNT_BUFFERFLAGS_SILENT — the capture packet is silent; emit zeros.
|
|
bufferFlagSilent uint32 = 0x1
|
|
// 1-second WASAPI buffer (REFERENCE_TIME is in 100-ns units).
|
|
bufferDuration100ns = 10_000_000
|
|
)
|
|
|
|
func coInit() error {
|
|
if e := ole.CoInitializeEx(0, ole.COINIT_APARTMENTTHREADED); e != nil {
|
|
if oe, ok := e.(*ole.OleError); !ok || oe.Code() != 0x00000001 { // S_FALSE ok
|
|
return e
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// openDevice resolves an IMMDevice by endpoint id, falling back to the default
|
|
// endpoint for the flow when id is empty or not found. Caller must Release().
|
|
func openDevice(flow uint32, id string) (*wca.IMMDevice, error) {
|
|
var mmde *wca.IMMDeviceEnumerator
|
|
if err := wca.CoCreateInstance(wca.CLSID_MMDeviceEnumerator, 0, wca.CLSCTX_ALL,
|
|
wca.IID_IMMDeviceEnumerator, &mmde); err != nil {
|
|
return nil, fmt.Errorf("create enumerator: %w", err)
|
|
}
|
|
defer mmde.Release()
|
|
|
|
if id != "" {
|
|
var coll *wca.IMMDeviceCollection
|
|
if err := mmde.EnumAudioEndpoints(flow, wca.DEVICE_STATE_ACTIVE, &coll); err == nil && coll != nil {
|
|
defer coll.Release()
|
|
var count uint32
|
|
coll.GetCount(&count)
|
|
for i := uint32(0); i < count; i++ {
|
|
var dev *wca.IMMDevice
|
|
if coll.Item(i, &dev) != nil || dev == nil {
|
|
continue
|
|
}
|
|
var did string
|
|
dev.GetId(&did)
|
|
if did == id {
|
|
return dev, nil // caller owns it
|
|
}
|
|
dev.Release()
|
|
}
|
|
}
|
|
}
|
|
var dev *wca.IMMDevice
|
|
if err := mmde.GetDefaultAudioEndpoint(flow, wca.EConsole, &dev); err != nil {
|
|
return nil, fmt.Errorf("no audio endpoint (id %q): %w", id, err)
|
|
}
|
|
return dev, nil
|
|
}
|
|
|
|
// pcmFormat is the fixed capture format (16 kHz mono 16-bit PCM). WASAPI's
|
|
// AUTOCONVERTPCM resamples from the device's native mix format for us.
|
|
func pcmFormat() *wca.WAVEFORMATEX {
|
|
return &wca.WAVEFORMATEX{
|
|
WFormatTag: 1, // WAVE_FORMAT_PCM
|
|
NChannels: channels,
|
|
NSamplesPerSec: sampleRate,
|
|
NAvgBytesPerSec: bytesPerSec,
|
|
NBlockAlign: blockAlign,
|
|
WBitsPerSample: bitsPerSample,
|
|
CbSize: 0,
|
|
}
|
|
}
|
|
|
|
const autoConvert = wca.AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM | wca.AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY
|
|
|
|
// recordPCM captures from a device into 16 kHz mono 16-bit PCM bytes until the
|
|
// stop channel is closed.
|
|
func recordPCM(deviceID string, stop <-chan struct{}) ([]byte, error) {
|
|
out := make([]byte, 0, bytesPerSec*4)
|
|
err := captureStream(deviceID, stop, func(chunk []byte) { out = append(out, chunk...) })
|
|
return out, err
|
|
}
|
|
|
|
// captureStream opens a device and calls onChunk with freshly-captured 16 kHz
|
|
// mono 16-bit PCM as it arrives, until stop closes. onChunk receives a private
|
|
// copy it may retain. Runs on a COM-initialised, OS-locked thread.
|
|
func captureStream(deviceID string, stop <-chan struct{}, onChunk func([]byte)) error {
|
|
runtime.LockOSThread()
|
|
defer runtime.UnlockOSThread()
|
|
if err := coInit(); err != nil {
|
|
return fmt.Errorf("CoInitialize: %w", err)
|
|
}
|
|
defer ole.CoUninitialize()
|
|
|
|
dev, err := openDevice(wca.ECapture, deviceID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer dev.Release()
|
|
|
|
var ac *wca.IAudioClient
|
|
if err := dev.Activate(wca.IID_IAudioClient, wca.CLSCTX_ALL, nil, &ac); err != nil {
|
|
return fmt.Errorf("activate capture: %w", err)
|
|
}
|
|
defer ac.Release()
|
|
|
|
if err := ac.Initialize(wca.AUDCLNT_SHAREMODE_SHARED, autoConvert,
|
|
wca.REFERENCE_TIME(bufferDuration100ns), 0, pcmFormat(), nil); err != nil {
|
|
return fmt.Errorf("initialize capture: %w", err)
|
|
}
|
|
var acc *wca.IAudioCaptureClient
|
|
if err := ac.GetService(wca.IID_IAudioCaptureClient, &acc); err != nil {
|
|
return fmt.Errorf("get capture service: %w", err)
|
|
}
|
|
defer acc.Release()
|
|
|
|
if err := ac.Start(); err != nil {
|
|
return fmt.Errorf("start capture: %w", err)
|
|
}
|
|
defer ac.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-stop:
|
|
return nil
|
|
default:
|
|
}
|
|
var packet uint32
|
|
if err := acc.GetNextPacketSize(&packet); err != nil {
|
|
return err
|
|
}
|
|
if packet == 0 {
|
|
time.Sleep(10 * time.Millisecond)
|
|
continue
|
|
}
|
|
for packet > 0 {
|
|
var data *byte
|
|
var frames, flags uint32
|
|
var devpos, qpcpos uint64
|
|
if err := acc.GetBuffer(&data, &frames, &flags, &devpos, &qpcpos); err != nil {
|
|
return err
|
|
}
|
|
n := int(frames) * blockAlign
|
|
if n > 0 {
|
|
chunk := make([]byte, n)
|
|
if flags&bufferFlagSilent == 0 && data != nil {
|
|
copy(chunk, unsafe.Slice(data, n))
|
|
}
|
|
onChunk(chunk)
|
|
}
|
|
acc.ReleaseBuffer(frames)
|
|
if err := acc.GetNextPacketSize(&packet); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// playPCM renders raw PCM (with the given format) to a device, stopping early
|
|
// if the stop channel closes. Runs on a COM-initialised, OS-locked thread.
|
|
func playPCM(deviceID string, pcm []byte, rate, ch, bits int, stop <-chan struct{}) error {
|
|
if len(pcm) == 0 {
|
|
return nil
|
|
}
|
|
runtime.LockOSThread()
|
|
defer runtime.UnlockOSThread()
|
|
if err := coInit(); err != nil {
|
|
return fmt.Errorf("CoInitialize: %w", err)
|
|
}
|
|
defer ole.CoUninitialize()
|
|
|
|
dev, err := openDevice(wca.ERender, deviceID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer dev.Release()
|
|
|
|
var ac *wca.IAudioClient
|
|
if err := dev.Activate(wca.IID_IAudioClient, wca.CLSCTX_ALL, nil, &ac); err != nil {
|
|
return fmt.Errorf("activate render: %w", err)
|
|
}
|
|
defer ac.Release()
|
|
|
|
frameBytes := ch * bits / 8
|
|
if frameBytes <= 0 {
|
|
return fmt.Errorf("bad audio format")
|
|
}
|
|
wfx := &wca.WAVEFORMATEX{
|
|
WFormatTag: 1, NChannels: uint16(ch), NSamplesPerSec: uint32(rate),
|
|
NAvgBytesPerSec: uint32(rate * frameBytes), NBlockAlign: uint16(frameBytes),
|
|
WBitsPerSample: uint16(bits), CbSize: 0,
|
|
}
|
|
if err := ac.Initialize(wca.AUDCLNT_SHAREMODE_SHARED, autoConvert,
|
|
wca.REFERENCE_TIME(bufferDuration100ns), 0, wfx, nil); err != nil {
|
|
return fmt.Errorf("initialize render: %w", err)
|
|
}
|
|
var bufFrames uint32
|
|
if err := ac.GetBufferSize(&bufFrames); err != nil {
|
|
return err
|
|
}
|
|
var arc *wca.IAudioRenderClient
|
|
if err := ac.GetService(wca.IID_IAudioRenderClient, &arc); err != nil {
|
|
return fmt.Errorf("get render service: %w", err)
|
|
}
|
|
defer arc.Release()
|
|
|
|
totalFrames := len(pcm) / frameBytes
|
|
written := 0
|
|
feed := func(maxFrames int) error {
|
|
if maxFrames <= 0 || written >= totalFrames {
|
|
return nil
|
|
}
|
|
n := totalFrames - written
|
|
if n > maxFrames {
|
|
n = maxFrames
|
|
}
|
|
var data *byte
|
|
if err := arc.GetBuffer(uint32(n), &data); err != nil {
|
|
return err
|
|
}
|
|
dst := unsafe.Slice(data, n*frameBytes)
|
|
copy(dst, pcm[written*frameBytes:(written+n)*frameBytes])
|
|
arc.ReleaseBuffer(uint32(n), 0)
|
|
written += n
|
|
return nil
|
|
}
|
|
|
|
// Pre-fill before starting to avoid an initial glitch.
|
|
if err := feed(int(bufFrames)); err != nil {
|
|
return err
|
|
}
|
|
if err := ac.Start(); err != nil {
|
|
return fmt.Errorf("start render: %w", err)
|
|
}
|
|
defer ac.Stop()
|
|
|
|
for written < totalFrames {
|
|
select {
|
|
case <-stop:
|
|
return nil
|
|
default:
|
|
}
|
|
var padding uint32
|
|
ac.GetCurrentPadding(&padding)
|
|
if err := feed(int(bufFrames - padding)); err != nil {
|
|
return err
|
|
}
|
|
time.Sleep(8 * time.Millisecond)
|
|
}
|
|
// Drain the remaining buffered audio.
|
|
for {
|
|
select {
|
|
case <-stop:
|
|
return nil
|
|
default:
|
|
}
|
|
var padding uint32
|
|
if ac.GetCurrentPadding(&padding) != nil || padding == 0 {
|
|
return nil
|
|
}
|
|
time.Sleep(10 * time.Millisecond)
|
|
}
|
|
}
|