Android multi-channel recording research

Native Android only supports 2 channel recording. But there will be more mic needs, such as voice recognition. It is currently known that the TDM protocol can transfer multi-mic data from the kernel to the hal, and from the kernel space to the user space. However, the native AudioRecord interface does not support the collection of multi-channel recording data at all. How to modify it so that it can be supported natively?

Let's start with the constructor of AudioRecord. No matter whether it works or not, we must study the reason! ​If we write a recording app, we generally use AudioRecord like this:

int sampleRateInHz = 8000;
int audioEncodingBits = AudioFormat.ENCODING_PCM_16BIT;
int recordBufferSize = AudioRecord.getMinBufferSize(sampleRateInHz,     channelConfiguration, audioEncodingBits);
mAudioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC, 
    sampleRateInHz, channelConfiguration, audioEncodingBits,
                    recordBufferSize);

Let me talk about the last parameter recordBufferSize of the AudioRecord constructor. from:

getMinBufferSize

//AudioRecord.java
static public int getMinBufferSize(int sampleRateInHz, int channelConfig, int audioFormat) {
        int channelCount = 0;
        ...
        //根据channelMask得出channelCount
        //这里竟然有个6声道的,估计可以参考下
        case AudioFormat.CHANNEL_IN_5POINT1:
            channelCount = 6;
        ...
        int size = native_get_min_buff_size(sampleRateInHz, channelCount, audioFormat);
        ...
}

native_get_min_buff_size对应android_media_AudioRecord_get_min_buff_size:

//android_media_AudioRecord.cpp
static jint android_media_AudioRecord_get_min_buff_size(JNIEnv *env,  jobject thiz,jint sampleRateInHertz, jint channelCount, jint audioFormat) {
    size_t frameCount = 0;
    audio_format_t format = audioFormatToNative(audioFormat);
    status_t result = AudioRecord::getMinFrameCount(&frameCount,
            sampleRateInHertz,
            format,
            audio_channel_in_mask_from_count(channelCount));
    return frameCount * channelCount *      audio_bytes_per_sample(format);
}

The format passed here is AudioFormat.ENCODING_PCM_16BIT, according to audio_bytes_per_sample:

//audio.h
static inline size_t audio_bytes_per_sample(audio_format_t format)
{
    ...
    case AUDIO_FORMAT_PCM_16_BIT:
      case AUDIO_FORMAT_IEC61937:
          size = sizeof(int16_t);
    ...
}

audio_bytes_per_sample returns sizeof(signed short) = 2.

status_t AudioRecord::getMinFrameCount(   
        size_t* frameCount,
        uint32_t sampleRate,
        audio_format_t format,
        audio_channel_mask_t channelMask)
{
    status_t status = AudioSystem::getInputBufferSize(sampleRate, format, channelMask, &size);
    ...
     //这里需要double一下
    // We double the size of input buffer for ping pong use of record buffer.
    // Assumes audio_is_linear_pcm(format)
    if ((*frameCount = (size * 2) / (audio_channel_count_from_in_mask(channelMask) *
            audio_bytes_per_sample(format))) == 0) {
        ALOGE("Unsupported configuration: sampleRate %u, format %#x, channelMask %#x",
            sampleRate, format, channelMask);
        return BAD_VALUE;
    }
}

getInputBufferSize directly looks at the hal layer:

//audio_hw.c
static size_t get_input_buffer_size(uint32_t sample_rate,
                                    audio_format_t format,
                                    int channel_count,
                                    bool is_low_latency)
{
    ...
        //这里是(8000*20)/1000
        size = (sample_rate * AUDIO_CAPTURE_PERIOD_DURATION_MSEC) / 1000;
        size *= sizeof(short) * channel_count;
    ...
}

size = (8000*20)/1000 * 2 * 2 = 640,get_input_buffer_size返回640.

In this scenario, getMinFrameCount gets frameCount = (640 *2) / (2 * 2) = 320

getMinBufferSize will return 320 * 2 * 2 = 1280. After calling the constructor, AudioRecord will set this value to take effect through audioBuffSizeCheck (the function name is check, I think this place is not reasonable.)

private void audioBuffSizeCheck(int audioBufferSize) throws IllegalArgumentException {                                                                                   
        // NB: this section is only valid with PCM data.
        // To update when supporting compressed formats
        //只支持无压缩的pcm
        int frameSizeInBytes = mChannelCount
            * (AudioFormat.getBytesPerSample(mAudioFormat));
        //检查用户设置的这个值是不是frameSizeInBytes的整数倍
        if ((audioBufferSize % frameSizeInBytes != 0) || (audioBufferSize < 1)) {
            throw new IllegalArgumentException("Invalid audio buffer size " + audioBufferSize
                    + " (frame size " + frameSizeInBytes + ")");
        }
        //存到这里。作为录音数据的buffer
        mNativeBufferSizeInBytes = audioBufferSize;
    }

Then, pass the value into the native layer by calling native_setup.

//android_media_AudioRecord.cpp
static jint
android_media_AudioRecord_setup
{
    ...
    size_t frameSize = channelCount * bytesPerSample;
    //这里还是上文说的320
    size_t frameCount = buffSizeInBytes / frameSize;
    ...
    const status_t status = lpRecorder->set(
            ...
            frameCount
            ...
            );
}

Then this function will call the AudioRecord set interface.

//AudioRecord.cpp
status_t AudioRecord::set(//参数省略)
{
    ...
    //上层请求的frameCount
    // mFrameCount is initialized in openRecord_l
    mReqFrameCount = frameCount;
    ...
    size_t frameCount = mReqFrameCount;
    ...
    //temp有可能会被openRecord修订
    size_t temp = frameCount;
    ...
    sp<IAudioRecord> record = audioFlinger->openRecord(
                                            ...
                                            &temp,
                                            ...
    );
}

Then set to the AudioFlinger side:

//services/audioflinger/Tracks.cpp
AudioFlinger::PlaybackThread::Track::Track(/*省略参数*/)
{
    ...
        if (sharedBuffer == 0) {
        mAudioTrackServerProxy = new AudioTrackServerProxy(mCblk, mBuffer, frameCount,mFrameSize, !isExternalTrack(), sampleRate);
    } else {
        mAudioTrackServerProxy = new StaticAudioTrackServerProxy(mCblk, mBuffer, frameCount,mFrameSize);
    }
    ...
}

The subject of this article is the study of multi-channel recording, so let's stop there.

Going back to the previous article, if you need to support multi-channel, you need to look at the fourth parameter

channelConfiguration

The only value ranges are:

public static final int CHANNEL_IN_DEFAULT = 1;
    // These directly match native
    public static final int CHANNEL_IN_LEFT = 0x4;
    public static final int CHANNEL_IN_RIGHT = 0x8;
    public static final int CHANNEL_IN_FRONT = 0x10;
    public static final int CHANNEL_IN_BACK = 0x20;
    public static final int CHANNEL_IN_LEFT_PROCESSED = 0x40;
    public static final int CHANNEL_IN_RIGHT_PROCESSED = 0x80;
    public static final int CHANNEL_IN_FRONT_PROCESSED = 0x100;
    public static final int CHANNEL_IN_BACK_PROCESSED = 0x200;
    public static final int CHANNEL_IN_PRESSURE = 0x400;
    public static final int CHANNEL_IN_X_AXIS = 0x800;
    public static final int CHANNEL_IN_Y_AXIS = 0x1000;
    public static final int CHANNEL_IN_Z_AXIS = 0x2000;
    public static final int CHANNEL_IN_VOICE_UPLINK = 0x4000;
    public static final int CHANNEL_IN_VOICE_DNLINK = 0x8000;
    public static final int CHANNEL_IN_MONO = CHANNEL_IN_FRONT;
    public static final int CHANNEL_IN_STEREO = (CHANNEL_IN_LEFT | CHANNEL_IN_RIGHT);
    /** @hide */
    public static final int CHANNEL_IN_FRONT_BACK = CHANNEL_IN_FRONT | CHANNEL_IN_BACK;
    // CHANNEL_IN_ALL is not yet defined; if added then it should match AUDIO_CHANNEL_IN_ALL

At first I didn't understand why it was defined that way. Until I see it... look down, it will be said later

//AudioRecord.java
public AudioRecord(int audioSource, int sampleRateInHz, int channelConfig, int audioFormat,                                                                              
            int bufferSizeInBytes)
    throws IllegalArgumentException {
        //调用另外一个重载的构造函数
        this((new AudioAttributes.Builder())
                    .setInternalCapturePreset(audioSource)
                    .build(),
                (new AudioFormat.Builder())                 .setChannelMask(getChannelMaskFromLegacyConfig(channelConfig,
                                        true/*allow legacy configurations*/))
                    .setEncoding(audioFormat)
                    .setSampleRate(sampleRateInHz)
                    .build(),
                bufferSizeInBytes,
                AudioManager.AUDIO_SESSION_ID_GENERATE);
    }

Pay attention to this line:

.setChannelMask(getChannelMaskFromLegacyConfig(channelConfig,
                                        true/*allow legacy configurations*/))

Do a compatibility switch. The end result is the same as before. The key here is Mask (called mask in Chinese). There are many such usages in Android

private static int getChannelMaskFromLegacyConfig(int inChannelConfig,
            boolean allowLegacyConfig) {
        int mask;
        switch (inChannelConfig) {
        case AudioFormat.CHANNEL_IN_DEFAULT: // AudioFormat.CHANNEL_CONFIGURATION_DEFAULT
        case AudioFormat.CHANNEL_IN_MONO:
        case AudioFormat.CHANNEL_CONFIGURATION_MONO:
            mask = AudioFormat.CHANNEL_IN_MONO;
            break;
        case AudioFormat.CHANNEL_IN_STEREO:
        case AudioFormat.CHANNEL_CONFIGURATION_STEREO:
            mask = AudioFormat.CHANNEL_IN_STEREO;
            break;
        case (AudioFormat.CHANNEL_IN_FRONT | AudioFormat.CHANNEL_IN_BACK):
            mask = inChannelConfig;
            break;
        default:
            throw new IllegalArgumentException("Unsupported channel configuration.");
        }
​
        if (!allowLegacyConfig && ((inChannelConfig == AudioFormat.CHANNEL_CONFIGURATION_MONO)
                || (inChannelConfig == AudioFormat.CHANNEL_CONFIGURATION_STEREO))) {
            // only happens with the constructor that uses AudioAttributes and AudioFormat
            throw new IllegalArgumentException("Unsupported deprecated configuration.");
        }
​
        return mask;
    }

getChannelMaskFromLegacyConfig doesn't handle more than 2 channels at all. Including the hide parameter in AudioFormat:

/** @hide */
    public static final int CHANNEL_IN_5POINT1 = (CHANNEL_IN_LEFT |
            CHANNEL_IN_RIGHT | CHANNEL_IN_FRONT | CHANNEL_IN_BACK |
            CHANNEL_IN_LEFT_PROCESSED | CHANNEL_IN_RIGHT_PROCESSED);

After seeing it, I intend to occupy a place first, and will support this 5.1-channel method in the future. Then can we add the same definition, for example 7.1 channel:

/** @hide */
    public static final int CHANNEL_IN_7POINT1 = (CHANNEL_IN_LEFT |
            CHANNEL_IN_RIGHT | CHANNEL_IN_FRONT | CHANNEL_IN_BACK |
            CHANNEL_IN_LEFT_PROCESSED | CHANNEL_IN_RIGHT_PROCESSED|
            CHANNEL_IN_FRONT_PROCESSED | CHANNEL_IN_BACK_PROCESSED);

It doesn't feel right though. Correspondingly, getChannelMaskFromLegacyConfig needs to be added, otherwise an IllegalArgumentException is thrown directly:

//AudioRecord.java-getChannelMaskFromLegacyConfig
case AudioFormat.CHANNEL_IN_7POINT1:
            mask = AudioFormat.CHANNEL_IN_7POINT1:
            break;

Then, the place to calculate bufferSize should also be modified:

case AudioFormat.CHANNEL_IN_7POINT1:
            channelCount = 8;

Further down, it seems that it will not be intercepted by the parameter check.

Only when channelCount is enough, frameCount can be correct and bufferSize can be corresponding. Otherwise the data will be messed up. Because the number of channels, format, etc. determine the buffer space required for each frame!

Another place to set channel_count:

//hal/audio_hw.c
struct pcm_config pcm_config_audio_capture = {
    .channels = 2,
    .period_count = AUDIO_CAPTURE_PERIOD_COUNT,
    .format = PCM_FORMAT_S16_LE,
};

The normal recording scene gives a default 2 channels. Then

static int adev_open_input_stream(struct audio_hw_device *dev,
                                  audio_io_handle_t handle,
                                  audio_devices_t devices,
                                  //注意这个:
                                  struct audio_config *config,
                                  struct audio_stream_in **stream_in,
                                  audio_input_flags_t flags __unused,
                                  const char *address __unused,
                                  audio_source_t source)
{
    ...
    in->config = pcm_config_audio_capture;//此时是默认值2
    ...
    //这里会取出应用层设置下来的channel_count
    int channel_count = audio_channel_count_from_in_mask(config->channel_mask);
    //如果应用设置的不是2.修改之
    in->config.channels = channel_count;
    ...
}

After all these changes, compile it all, and then:

int channelConfiguration = AudioFormat.CHANNEL_IN_7POINT1;
int audioEncodingBits = AudioFormat.ENCODING_PCM_16BIT;
int sampleRateInHz = 8000;
int recordBufferSize = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfiguration, audioEncodingBits);
LogD("recordBufferSize = " + String.valueOf(recordBufferSize));

recordBufferSize = 5120 (four times the previous stereo 1280)

It shows that the modification is successful! Oh yeah! Of course, after this modification, frameCount is still 320 (5120/(number of channels × number of bytes per sample)). Know why the concept of frame is defined.

Wait a minute, what a pit!

AudioRecord: set(): inputSource 6, sampleRate 8000, format 0x1, channelMask 0x3fc
...
audio_hw_primary: adev_open_input_stream: enter: sample_rate(8000) channel_mask(0xc)

Look at this log, after AudioRecord:set, adev_open_input_stream will change to this channelMask before. After following the code, I found that it is funny here:

audio_io_handle_t AudioPolicyManager::getInputForDevice(
    ...
    audio_channel_mask_t channelMask,
    ...)
{
    ...
    audio_channel_mask_t profileChannelMask = channelMask;
    for (;;) {
        //就是这里
        profile = getInputProfile(device,   address,profileSamplingRate, profileFormat,     profileChannelMask,profileFlags);
        if (profile != 0) {
            break; // success
        } else if (profileFlags & AUDIO_INPUT_FLAG_RAW) {
            profileFlags = (audio_input_flags_t) (profileFlags & ~AUDIO_INPUT_FLAG_RAW); // retry
        } else if (profileFlags != AUDIO_INPUT_FLAG_NONE) {
            profileFlags = AUDIO_INPUT_FLAG_NONE; // retry
        } else { // fail
            return input;
        }
    }
    ...
}

Let's take a look at getInputProfile

sp<IOProfile> AudioPolicyManager::getInputProfile(audio_devices_t device,const String8& address,uint32_t& samplingRate,audio_format_t& format,audio_channel_mask_t& channelMask,audio_input_flags_t flags)
{
    // Choose an input profile based on the requested capture parameters: select the first available
    // profile supporting all requested parameters.
    for (size_t i = 0; i < mHwModules.size(); i++)
    {
        if (mHwModules[i]->mHandle == 0) {
            continue;
        }   
        for (size_t j = 0; j < mHwModules[i]->mInputProfiles.size(); j++)
        {
            sp<IOProfile> profile = mHwModules[i]->mInputProfiles[j];
            // profile->log();
            if (profile->isCompatibleProfile(/*一堆参数*/) {
​
                return profile;
            }
        }
        //恕老夫眼拙,没看出来和上面的for循环有什么区别?????
        for (size_t j = 0; j < mHwModules[i]->mInputProfiles.size(); j++)
        {
            sp<IOProfile> profile = mHwModules[i]->mInputProfiles[j];
            // profile->log();
            if (profile->isCompatibleProfile(/*一堆参数同上*/) {
                                              
                return profile;
            }   
        }   
    }   
    return NULL;
}

Based on the capture parameter of the request, an input profile is selected, and the first available one is selected.

After reading the relevant code for a while, I almost vomited. However, I feel that the basic

audio_policy_configuration.xml (newly added in Android O) or audio_policy.conf, adding 8.1-channel support. For example:

<mixPort name="primary input" role="sink">
                    <profile name="" format="AUDIO_FORMAT_PCM_16_BIT"
                             samplingRates="8000,11025,12000,16000,22050,24000,32000,44100,48000"
                             channelMasks="AUDIO_CHANNEL_IN_MONO,AUDIO_CHANNEL_IN_STEREO,AUDIO_CHANNEL_IN_FRONT_BACK,AUDIO_CHANNEL_IN_8"/>

Correspondingly, the audo-base should also be changed.

//audio-base.h
//这个值应该要和java定义的对应0x3fc
AUDIO_CHANNEL_IN_8 = 1020u

Later, I found that when reading in, you need to add it here, otherwise, it will not be recognized

//libmedia/TypeConverter.cpp
template <>
const InputChannelConverter::Table InputChannelConverter::mTable[] = {
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_MONO),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_STEREO),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_FRONT_BACK),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_6),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_UPLINK_MONO),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_DNLINK_MONO),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_CALL_MONO),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_8),
    TERMINATOR
};

because:

//Serializer.cpp
status_t AudioProfileTraits::deserialize(_xmlDoc */*doc*/, const _xmlNode *root, PtrElement &profile,
                                         PtrSerializingCtx /*serializingContext*/)
{
    string samplingRates = getXmlAttribute(root, Attributes::samplingRates);
    string format = getXmlAttribute(root, Attributes::format);
    string channels = getXmlAttribute(root, Attributes::channelMasks);
    profile = new Element(formatFromString(format, gDynamicFormat),
                          //这里
                          channelMasksFromString(channels, ","),
                          samplingRatesFromString(samplingRates, ","));
    
    profile->setDynamicFormat(profile->getFormat() == gDynamicFormat);
    profile->setDynamicChannels(profile->getChannels().isEmpty());
    profile->setDynamicRate(profile->getSampleRates().isEmpty());
    
    return NO_ERROR;
}

After this change. It supports 8channel recording. Of course. . . If the implementation of tinyalsa does not support it, I am afraid that an error will be reported when pcm_open. That's another topic.

Guess you like

Origin blog.csdn.net/bberdong/article/details/84668331