背景
avcodec: split mp2 encoder into float and fixed
This makes the USE_FLOATS == 0 available to the end user
More float optimizations can easily be added as well now
common code should be factored out into a common file once all
fixed point & floating point optimizations are done, this is to
avoid having to move code back and forth between files.
前言
MP2 文件格式基于 1152 个采样间隔的连续数字帧,具有四种可能的格式:
• 单声道格式
• 立体声格式
• 强度编码联合立体声格式(立体声无关)
• 双通道(不相关)
MPEG Audio Layer II (MP2) 是 MP3 标准的核心算法。 MPEG-1 Audio Layer 2 编码是从 MUSICAM 音频编解码器获得的。
特别注意:MP2音频帧的采样个数是1152
格式说明
数据
ff f5 c8 c4 ff ff ff ff ff ff ff ff ff ed 51 d5 72 f7 51 5d 4c 93 4e 3b 03 cf c6 fc 11 41 04 11 41 24 92 3d 3c 75 49 1c 72 d3 35 55 5b 4d 75 d5 6d 95 d9 5e
帧是MPEG-1处理的最小单元,一帧处理1152个PCM的样值,对于16KHz的采样率,一帧对应声音样本时间1152/48000=0.072s=72ms。
帧头(32 bit)
同步字(12bit)
1111 1111 1111 (转为16进制:FF F)
ID(1 bit)
这1位标志用来识别音频编码算法,如下(例子中是0):
0: ISO/IEC 13818-3 [11] or MPEG-2 Audio extension to lower sampling frequencies;
1: ISO/IEC 11172-3 [3].
Layer(2 bit)
这2位指示用了哪一层,对应如下(例子中是10):
Code
Layer
11
not used in DAB
10
Layer II
01
not used in DAB
00
reserved
保护位(1bit)
这一位比较重要,表明该音频是否有CRC校验,如果是0,说明有,1则说明没有CRC校验。例子中是0.
比特率指示(4bit)
指示该音频的比特率,如下(只对应48KHz采样率,24KHz的表这里没有给出),示例音频可以看到这里是16进制的c对应二进制1100,比特率为256kbit/s
FFmpeg解码
相关代码
mpegaudiodec_template.c
该文件被
mpegaudiodec_fixed.c
mpegaudiodec_float.c
通过include包含进来项目,因此可能通过VS搜索不到
MP2定点解码器
#if CONFIG_MP2_DECODER
AVCodec ff_mp2_decoder = {
.name = "mp2",
.long_name = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_MP2,
.priv_data_size = sizeof(MPADecodeContext),
.init = decode_init,
.decode = decode_frame,
.capabilities = AV_CODEC_CAP_DR1,
.flush = flush,
.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
AV_SAMPLE_FMT_S16,
AV_SAMPLE_FMT_NONE },
};
#endif
输出采样格式
mpegaudiodec_fixed.c
#define OUT_FMT AV_SAMPLE_FMT_S16
#define OUT_FMT_P AV_SAMPLE_FMT_S16P
MP2浮点解码器
AVCodec ff_mp2float_decoder = {
.name = "mp2float",
.long_name = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
.type = AVMEDIA_TYPE_AUDIO,
.id = AV_CODEC_ID_MP2,
.priv_data_size = sizeof(MPADecodeContext),
.init = decode_init,
.decode = decode_frame,
.close = decode_close,
.capabilities = AV_CODEC_CAP_DR1,
.flush = flush,
.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
AV_SAMPLE_FMT_FLT,
AV_SAMPLE_FMT_NONE },
};
采样率
mpegaudiodec_float.c
#define OUT_FMT AV_SAMPLE_FMT_FLT
#define OUT_FMT_P AV_SAMPLE_FMT_FLTP
音频头解析
检查4个字节的音频头,过程为:
/* fast header check for resync */
static inline int ff_mpa_check_header(uint32_t header){
/* header */
if ((header & 0xffe00000) != 0xffe00000) //即前11位为固定头信息,全为1
return -1;
/* layer check */
if ((header & (3<<17)) == 0)//即第14、15两位表示layer
return -1;
/* bit rate */
if ((header & (0xf<<12)) == 0xf<<12)//即第17、18、19、20四位不能全为1
return -1;
/* frequency */
if ((header & (3<<10)) == 3<<10)//即第21、22位不能全为1
return -1;
return 0;
}
static av_cold int decode_init(AVCodecContext * avctx)
{
static int initialized_tables = 0;
MPADecodeContext *s = avctx->priv_data;
if (!initialized_tables) {
decode_init_static();
initialized_tables = 1;
}
s->avctx = avctx;
#if USE_FLOATS
s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
if (!s->fdsp)
return AVERROR(ENOMEM);
#endif
ff_mpadsp_init(&s->mpadsp);
if (avctx->request_sample_fmt == OUT_FMT &&
avctx->codec_id != AV_CODEC_ID_MP3ON4)
avctx->sample_fmt = OUT_FMT;
else
//在这里指定了采样格式
avctx->sample_fmt = OUT_FMT_P;
s->err_recognition = avctx->err_recognition;
if (avctx->codec_id == AV_CODEC_ID_MP3ADU)
s->adu_mode = 1;
return 0;
}
指定音频帧的采样个数
static int mp_decode_frame(MPADecodeContext *s, OUT_INT **samples,
const uint8_t *buf, int buf_size)
{
int i, nb_frames, ch, ret;
OUT_INT *samples_ptr;
init_get_bits(&s->gb, buf + HEADER_SIZE, (buf_size - HEADER_SIZE) * 8);
/* skip error protection field */
if (s->error_protection)
skip_bits(&s->gb, 16);
switch(s->layer) {
case 1:
s->avctx->frame_size = 384;
nb_frames = mp_decode_layer1(s);
break;
case 2:
s->avctx->frame_size = 1152;
nb_frames = mp_decode_layer2(s);
break;
ffprobe探测音频采样位数错误问题记录
F:\>ffprobe haikangnvr.mp2
ffprobe version git-2020-08-16-5df9724 Copyright (c) 2007-2020 the FFmpeg developers
built with gcc 10.2.1 (GCC) 20200805
configuration: --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --enable-libsvtav1 --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf
libavutil 56. 58.100 / 56. 58.100
libavcodec 58.100.100 / 58.100.100
libavformat 58. 51.100 / 58. 51.100
libavdevice 58. 11.101 / 58. 11.101
libavfilter 7. 87.100 / 7. 87.100
libswscale 5. 8.100 / 5. 8.100
libswresample 3. 8.100 / 3. 8.100
libpostproc 55. 8.100 / 55. 8.100
[mp3 @ 000001ed53840340] Estimating duration from bitrate, this may be inaccurate
Input #0, mp3, from 'haikangnvr.mp2':
Duration: 00:00:58.82, start: 0.000000, bitrate: 128 kb/s
Stream #0:0: Audio: mp2, 16000 Hz, mono, fltp, 128 kb/s
解惑
实际上fltp(AV_SAMPLE_FMT_FLTP)是不正确的,最后调用的MP2解码器会强制默认为s16p(AV_SAMPLE_FMT_S16P)采样格式进行解码
参考
static const SampleFmtInfo sample_fmt_info[AV_SAMPLE_FMT_NB] = {
[AV_SAMPLE_FMT_U8] = { .name = "u8", .bits = 8, .planar = 0, .altform = AV_SAMPLE_FMT_U8P },
[AV_SAMPLE_FMT_S16] = { .name = "s16", .bits = 16, .planar = 0, .altform = AV_SAMPLE_FMT_S16P },
[AV_SAMPLE_FMT_S32] = { .name = "s32", .bits = 32, .planar = 0, .altform = AV_SAMPLE_FMT_S32P },
[AV_SAMPLE_FMT_S64] = { .name = "s64", .bits = 64, .planar = 0, .altform = AV_SAMPLE_FMT_S64P },
[AV_SAMPLE_FMT_FLT] = { .name = "flt", .bits = 32, .planar = 0, .altform = AV_SAMPLE_FMT_FLTP },
[AV_SAMPLE_FMT_DBL] = { .name = "dbl", .bits = 64, .planar = 0, .altform = AV_SAMPLE_FMT_DBLP },
[AV_SAMPLE_FMT_U8P] = { .name = "u8p", .bits = 8, .planar = 1, .altform = AV_SAMPLE_FMT_U8 },
[AV_SAMPLE_FMT_S16P] = { .name = "s16p", .bits = 16, .planar = 1, .altform = AV_SAMPLE_FMT_S16 },
[AV_SAMPLE_FMT_S32P] = { .name = "s32p", .bits = 32, .planar = 1, .altform = AV_SAMPLE_FMT_S32 },
[AV_SAMPLE_FMT_S64P] = { .name = "s64p", .bits = 64, .planar = 1, .altform = AV_SAMPLE_FMT_S64 },
[AV_SAMPLE_FMT_FLTP] = { .name = "fltp", .bits = 32, .planar = 1, .altform = AV_SAMPLE_FMT_FLT },
[AV_SAMPLE_FMT_DBLP] = { .name = "dblp", .bits = 64, .planar = 1, .altform = AV_SAMPLE_FMT_DBL },
};
基础信息