//go:build windows package audio import ( "fmt" "runtime" "time" "unsafe" "github.com/go-ole/go-ole" "github.com/moutend/go-wca/pkg/wca" ) const ( // AUDCLNT_BUFFERFLAGS_SILENT — the capture packet is silent; emit zeros. bufferFlagSilent uint32 = 0x1 // 1-second WASAPI buffer (REFERENCE_TIME is in 100-ns units). bufferDuration100ns = 10_000_000 ) func coInit() error { if e := ole.CoInitializeEx(0, ole.COINIT_APARTMENTTHREADED); e != nil { if oe, ok := e.(*ole.OleError); !ok || oe.Code() != 0x00000001 { // S_FALSE ok return e } } return nil } // openDevice resolves an IMMDevice by endpoint id, falling back to the default // endpoint for the flow when id is empty or not found. Caller must Release(). func openDevice(flow uint32, id string) (*wca.IMMDevice, error) { var mmde *wca.IMMDeviceEnumerator if err := wca.CoCreateInstance(wca.CLSID_MMDeviceEnumerator, 0, wca.CLSCTX_ALL, wca.IID_IMMDeviceEnumerator, &mmde); err != nil { return nil, fmt.Errorf("create enumerator: %w", err) } defer mmde.Release() if id != "" { var coll *wca.IMMDeviceCollection if err := mmde.EnumAudioEndpoints(flow, wca.DEVICE_STATE_ACTIVE, &coll); err == nil && coll != nil { defer coll.Release() var count uint32 coll.GetCount(&count) for i := uint32(0); i < count; i++ { var dev *wca.IMMDevice if coll.Item(i, &dev) != nil || dev == nil { continue } var did string dev.GetId(&did) if did == id { return dev, nil // caller owns it } dev.Release() } } } var dev *wca.IMMDevice if err := mmde.GetDefaultAudioEndpoint(flow, wca.EConsole, &dev); err != nil { return nil, fmt.Errorf("no audio endpoint (id %q): %w", id, err) } return dev, nil } // pcmFormat is the fixed capture format (16 kHz mono 16-bit PCM). WASAPI's // AUTOCONVERTPCM resamples from the device's native mix format for us. func pcmFormat() *wca.WAVEFORMATEX { return &wca.WAVEFORMATEX{ WFormatTag: 1, // WAVE_FORMAT_PCM NChannels: channels, NSamplesPerSec: sampleRate, NAvgBytesPerSec: bytesPerSec, NBlockAlign: blockAlign, WBitsPerSample: bitsPerSample, CbSize: 0, } } const autoConvert = wca.AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM | wca.AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY // recordPCM captures from a device into 16 kHz mono 16-bit PCM bytes until the // stop channel is closed. func recordPCM(deviceID string, stop <-chan struct{}) ([]byte, error) { out := make([]byte, 0, bytesPerSec*4) err := captureStream(deviceID, stop, func(chunk []byte) { out = append(out, chunk...) }) return out, err } // captureStream opens a device and calls onChunk with freshly-captured 16 kHz // mono 16-bit PCM as it arrives, until stop closes. onChunk receives a private // copy it may retain. Runs on a COM-initialised, OS-locked thread. func captureStream(deviceID string, stop <-chan struct{}, onChunk func([]byte)) error { runtime.LockOSThread() defer runtime.UnlockOSThread() if err := coInit(); err != nil { return fmt.Errorf("CoInitialize: %w", err) } defer ole.CoUninitialize() dev, err := openDevice(wca.ECapture, deviceID) if err != nil { return err } defer dev.Release() var ac *wca.IAudioClient if err := dev.Activate(wca.IID_IAudioClient, wca.CLSCTX_ALL, nil, &ac); err != nil { return fmt.Errorf("activate capture: %w", err) } defer ac.Release() if err := ac.Initialize(wca.AUDCLNT_SHAREMODE_SHARED, autoConvert, wca.REFERENCE_TIME(bufferDuration100ns), 0, pcmFormat(), nil); err != nil { return fmt.Errorf("initialize capture: %w", err) } var acc *wca.IAudioCaptureClient if err := ac.GetService(wca.IID_IAudioCaptureClient, &acc); err != nil { return fmt.Errorf("get capture service: %w", err) } defer acc.Release() if err := ac.Start(); err != nil { return fmt.Errorf("start capture: %w", err) } defer ac.Stop() for { select { case <-stop: return nil default: } var packet uint32 if err := acc.GetNextPacketSize(&packet); err != nil { return err } if packet == 0 { time.Sleep(10 * time.Millisecond) continue } for packet > 0 { var data *byte var frames, flags uint32 var devpos, qpcpos uint64 if err := acc.GetBuffer(&data, &frames, &flags, &devpos, &qpcpos); err != nil { return err } n := int(frames) * blockAlign if n > 0 { chunk := make([]byte, n) if flags&bufferFlagSilent == 0 && data != nil { copy(chunk, unsafe.Slice(data, n)) } onChunk(chunk) } acc.ReleaseBuffer(frames) if err := acc.GetNextPacketSize(&packet); err != nil { return err } } } } // playPCM renders raw PCM (with the given format) to a device, stopping early // if the stop channel closes. Runs on a COM-initialised, OS-locked thread. func playPCM(deviceID string, pcm []byte, rate, ch, bits int, stop <-chan struct{}) error { if len(pcm) == 0 { return nil } runtime.LockOSThread() defer runtime.UnlockOSThread() if err := coInit(); err != nil { return fmt.Errorf("CoInitialize: %w", err) } defer ole.CoUninitialize() dev, err := openDevice(wca.ERender, deviceID) if err != nil { return err } defer dev.Release() var ac *wca.IAudioClient if err := dev.Activate(wca.IID_IAudioClient, wca.CLSCTX_ALL, nil, &ac); err != nil { return fmt.Errorf("activate render: %w", err) } defer ac.Release() frameBytes := ch * bits / 8 if frameBytes <= 0 { return fmt.Errorf("bad audio format") } wfx := &wca.WAVEFORMATEX{ WFormatTag: 1, NChannels: uint16(ch), NSamplesPerSec: uint32(rate), NAvgBytesPerSec: uint32(rate * frameBytes), NBlockAlign: uint16(frameBytes), WBitsPerSample: uint16(bits), CbSize: 0, } if err := ac.Initialize(wca.AUDCLNT_SHAREMODE_SHARED, autoConvert, wca.REFERENCE_TIME(bufferDuration100ns), 0, wfx, nil); err != nil { return fmt.Errorf("initialize render: %w", err) } var bufFrames uint32 if err := ac.GetBufferSize(&bufFrames); err != nil { return err } var arc *wca.IAudioRenderClient if err := ac.GetService(wca.IID_IAudioRenderClient, &arc); err != nil { return fmt.Errorf("get render service: %w", err) } defer arc.Release() totalFrames := len(pcm) / frameBytes written := 0 feed := func(maxFrames int) error { if maxFrames <= 0 || written >= totalFrames { return nil } n := totalFrames - written if n > maxFrames { n = maxFrames } var data *byte if err := arc.GetBuffer(uint32(n), &data); err != nil { return err } dst := unsafe.Slice(data, n*frameBytes) copy(dst, pcm[written*frameBytes:(written+n)*frameBytes]) arc.ReleaseBuffer(uint32(n), 0) written += n return nil } // Pre-fill before starting to avoid an initial glitch. if err := feed(int(bufFrames)); err != nil { return err } if err := ac.Start(); err != nil { return fmt.Errorf("start render: %w", err) } defer ac.Stop() for written < totalFrames { select { case <-stop: return nil default: } var padding uint32 ac.GetCurrentPadding(&padding) if err := feed(int(bufFrames - padding)); err != nil { return err } time.Sleep(8 * time.Millisecond) } // Drain the remaining buffered audio. for { select { case <-stop: return nil default: } var padding uint32 if ac.GetCurrentPadding(&padding) != nil || padding == 0 { return nil } time.Sleep(10 * time.Millisecond) } }