音频采集 via DirectShow

这里所说的音频采集是指通过麦克风采集声音数据然后经过编码保存为磁盘上的一个文件。
Windows 上有如下几种常见的实现方式:

DirectShow 简介

DirectShow(有时缩写为 DS 或 DShow),开发代号 Quartz,是微软在 ActiveMovie 和 Video for Windows 的基础上推出的新一代基于 COM 的流媒体处理的开发包,与 DirectX 开发包一起发布。DShow 使用一种叫 Filter Graph 的模型来管理整个数据流的处理过程,有了 DShow,我们可以很方便地从支持 WDM 驱动模型的采集卡上捕获数据,并且进行相应的后期处理乃至存储到文件中。这样使在多媒体数据库管理系统(MDBMS)中多媒体数据的存取变得更加方便。它广泛地支持各种媒体格式,包括 asf、mpeg、avi、dv、mp3、wav 等,为多媒体流的捕捉和回放提供了强有力的支持。

DirectShow 采集音频

采集流程图

音频采集 via DirectShow

采集代码概览

以下是整个 DirectShow 采集过程的概要代码,略去各个函数的具体实现和资源释放。

CoInitialize(NULL);  
hr = CoCreateInstance(CLSID_FilterGraph, NULL, CLSCTX_INPROC_SERVER, IID_IGraphBuilder, (void**)&pGraph);   

// enumerate the audio input device and pick the first one.   
hr = enumAudioInputFilters((void**)&pAudioInputFilter);   
hr = pGraph->AddFilter(pAudioInputFilter, _T("Capture"));   
hr = addFilterByCLSID(pGraph, CLSID_AviDest, _T("AVI Mux"), &pAVIMux);   
hr = connectFilters(pGraph, pAudioInputFilter, pAVIMux);   
hr = addFilterByCLSID(pGraph, CLSID_FileWriter, _T("File Writer"), &pFileWriter);   
hr = pFileWriter->QueryInterface(IID_IFileSinkFilter, (void**)&pSink);   
hr = pSink->SetFileName(argv[1], NULL);   
hr = connectFilters(pGraph, pAVIMux, pFileWriter); 
  
hr = pGraph->QueryInterface(IID_IMediaControl, (void**)&pControl);
hr = pControl->Run();   

char ch = getchar(); // Wait keyboard input and then stop the recording
hr = pControl->Stop();
CoUninitialize();

enumAudioInputFilters 函数

HRESULT enumAudioInputFilters(void** gottaFilter)   
{   
    HRESULT hr = E_FAIL;

    // Create the System Device Enumerator.   
    CComPtr<ICreateDevEnum> pDevEnum = NULL;   
    hr = CoCreateInstance(CLSID_SystemDeviceEnum, NULL, CLSCTX_INPROC_SERVER, IID_ICreateDevEnum, (void**)&pDevEnum);   
    RETURN_IF_FAILED(hr);

    // Obtain a class enumerator for the audio input category.   
    CComPtr<IEnumMoniker> pEnumCat = NULL;   
    hr = pDevEnum->CreateClassEnumerator(CLSID_AudioInputDeviceCategory, &pEnumCat, 0);   
    RETURN_IF_FAILED(hr);

    // Enumerate the monikers.   
    CComPtr<IMoniker> pMoniker = NULL;   
    ULONG cFetched = 0;   
    // Bind the first moniker to an object   
    hr = pEnumCat->Next(1, &pMoniker, &cFetched);
    RETURN_IF_FAILED(hr);

    CComPtr<IPropertyBag> pPropBag = NULL;   
    hr = pMoniker->BindToStorage(0, 0, IID_IPropertyBag, (void **)&pPropBag);   
    if (SUCCEEDED(hr)) {
        // To retrieve the filter's friendly name, do the following:   
        VARIANT varName;   
        VariantInit(&varName);   
        hr = pPropBag->Read(_T("FriendlyName"), &varName, 0);   
        if (SUCCEEDED(hr))   
            DL_T1("Audio Input Device: %s", varName.bstrVal);   
        VariantClear(&varName);   
    }

    // To create an instance of the filter, do the following:   
    // Remember to release pFilter later   
    hr = pMoniker->BindToObject(NULL, NULL, IID_IBaseFilter, gottaFilter);   
    RETURN_IF_FAILED(hr);

    return S_OK;   
}   

addFilterByCLSID 函数

HRESULT addFilterByCLSID(
    IGraphBuilder *pGraph,  // Pointer to the Filter Graph Manager.   
    const GUID& clsid,      // CLSID of the filter to create.   
    LPCWSTR wszName,        // A name for the filter.   
    IBaseFilter **ppF)      // Receives a pointer to the filter.   
{   
    *ppF = NULL;   
    HRESULT hr = E_FAIL;
    IBaseFilter *pF = NULL;   

    hr = CoCreateInstance(clsid, 0, CLSCTX_INPROC_SERVER, IID_IBaseFilter, reinterpret_cast<void**>(&pF));   
    RETURN_IF_FAILED(hr);

    hr = pGraph->AddFilter(pF, wszName);   
    if (SUCCEEDED(hr))   
        *ppF = pF;   
    else   
        pF->Release();   
    
    return hr;   
}   

connectFilters 函数

HRESULT connectFilters(IGraphBuilder *pGraph, IPin *pOut, IBaseFilter *pDest)
{   
    // Find an input pin on the downstream filter.   
    CComPtr<IPin> pIn = NULL;   
    HRESULT hr = getUnconnectedPin(pDest, PINDIR_INPUT, &pIn);   
    RETURN_IF_FAILED(hr);
    
    hr = pGraph->Connect(pOut, pIn);   
    RETURN_IF_FAILED(hr);
    
    return hr;   
}   

HRESULT connectFilters(IGraphBuilder *pGraph, IBaseFilter *pSrc, IBaseFilter *pDest)
{   
    // Find an output pin on the first filter.   
    CComPtr<IPin> pOut = NULL;   
    HRESULT hr = getUnconnectedPin(pSrc, PINDIR_OUTPUT, &pOut);   
    RETURN_IF_FAILED(hr);
    
    hr = connectFilters(pGraph, pOut, pDest);   
    RETURN_IF_FAILED(hr);
    
    return hr;   
}

getUnconnectedPin 函数

HRESULT getUnconnectedPin(IBaseFilter *pFilter, PIN_DIRECTION PinDir, IPin **ppPin) 
{ 
    CComPtr<IEnumPins> pEnum = NULL;   
    HRESULT hr = pFilter->EnumPins(&pEnum);   
    RETURN_IF_FAILED(hr);
    
    IPin *pPin = NULL;   
    while (pEnum->Next(1, &pPin, NULL) == S_OK) {   
        PIN_DIRECTION ThisPinDir;   
        pPin->QueryDirection(&ThisPinDir);   
        
        if (ThisPinDir == PinDir) {   
            IPin *pTmp = NULL;   
            hr = pPin->ConnectedTo(&pTmp);   
            if (SUCCEEDED(hr))  // Already connected, not the pin we want.   
                pTmp->Release();   
            else { // Unconnected, this is the pin we want.   
                *ppPin = pPin;   
                return S_OK;   
            }   
        }   
        pPin->Release();   
    }   
    
    return E_FAIL; // Did not find a matching pin.   
}   

GraphEdit

GraphEdit 是一个用于建立和测试 Filter Graph 的可视化工具。包含在 Windows SDK 7.x 中(Bin/graphedt.exe)。通过 GraphEdit,你可以在写程序代码前验证 Filter Graph。你也可以加载一个由程序创建的 Filter Graph,来核实是否创建了正确的 Graph。如果你开发一个定制的 Filter,GraphEdit 提供了一种快速的方式测试它:简单的加载一个带有你定制的 Filter 的 Graph,并且试着运行它。

下图就是上面音频采集程序的 Filter Graph,包含一个 Source Filter,AVI Mux 和 File Writer。
音频采集 via DirectShow
GraphEdit 中包含了丰富的内置或第三方(需注册)的 Filter,可以通过 Graph 菜单下的 Insert Filters… 子菜单打开,如下图所示:
音频采集 via DirectShow

音频采集 via DirectShow
EOF