音视频的格式是一个有歧义的说法。我们熟知的诸如Flv、Mp4、Mov啥的都是包装格式,可以理解为一种容器,就像一个盒子。里面放到是经过编码的音视频数据,而这些音视频数据都有自己的编码格式,如AAC、H264、H265等等。 今天要展示的是从直播流中获取到的音频编码数据进行解码并使用H5的音频API进行播放的过程。
这些格式分别是
1. speex
2. aac
3. mp3
这些格式都有开源的解码库,不过都是c库,在H5中需要通过emscripten编译成js执行。
#ifdef USE_SPEEX#include <speex/speex.h>#endif#ifdef USE_AAC#include "aacDecoder/include/neaacdec.h"// #include "libfdk-aac/libAACdec/include/aacdecoder_lib.h"#endif#ifdef USE_MP3#include "libmad/mad.h"//#include "libid3tag/tag.h"#endif
int bufferLength;int bufferFilled;u8 *outputBuffer;#ifdef USE_AAC faacDecHandle faacHandle;#endif#ifdef USE_SPEEX i16 *audioOutput; void *speexState; SpeexBits speexBits;#endif#ifdef USE_MP3 MP3Decoder mp3Decoder;#endif
bufferLength 用于指定缓冲区的长度,bufferFilled用于指示缓冲中没有使用的数据,outputBuffer用来存放解码后的数据。 MP3Decoder是自己写的一个类,需要定义这几个成员
mad_stream inputStream;mad_frame frame;mad_synth synth;
outputBuffer = (u8 *)malloc(bufferLength);#ifdef USE_SPEEX audioOutput = (i16 *)malloc(640); auto mode = speex_lib_get_mode(SPEEX_MODEID_WB); speexState = speex_decoder_init(mode); speex_bits_init(&speexBits);#endif#ifdef USE_AAC faacHandle = faacDecOpen();#endif
mp3的初始化
mad_stream_init(&inputStream);mad_frame_init(&frame);mad_synth_init(&synth);
input对象中包含了经过协议拆包后的原始音频数据(RTMP协议或Flv格式中的格式)缓冲大小虽然是自己定义,但必须遵循下面的规则
aac:1024的倍数(AAC一帧的播放时间是= 1024 * 1000/44100 = 22.32ms)
speex:320的倍数(320 * 1000/16000 = 20ms)
MP3:576的倍数(双声道1152 * 1000 /44100 = 26.122ms)
根据这些数据可以估算缓冲大小引起的音频的延时,然后需要和视频的延迟进行同步。
#ifdef USE_SPEEX if (input.length() <= 11) { memset(output, 0, 640); } else { speex_bits_read_from(&speexBits, (const char *)input, 52); speex_decode_int(speexState, &speexBits, audioOutput); memcpy(output, audioOutput, 640); } return 640;#endif#ifdef USE_AAC //0 = AAC sequence header ,1 = AAC raw if (input.readB<1, u8>()) { faacDecFrameInfo frame_info; auto pcm_data = faacDecDecode(faacHandle, &frame_info, (unsigned char *)input.point(), input.length()); if (frame_info.error > 0) { emscripten_log(1, "!!%sn", NeAACDecGetErrorMessage(frame_info.error)); } else { int samplesBytes = frame_info.samples << 1; memcpy(output, pcm_data, samplesBytes); return samplesBytes; } } else { unsigned long samplerate; unsigned char channels; auto config = faacDecGetCurrentConfiguration(faacHandle); config->defObjectType = LTP; faacDecSetConfiguration(faacHandle,config); faacDecInit2(faacHandle, (unsigned char *)input.point(), 4, &samplerate, &channels); emscripten_log(0, "aac samplerate:%d channels:%d", samplerate, channels); }#endif
mp3 比较复杂,这里不贴代码了,主要是mad库不能直接调用其提供的API,直播流中的MP3数据和mp3文件的格式有所不同导致。如果本文火的话,我就详细说明。
C++音视频开发学习资料:点击领取→音视频开发(资料文档+视频教程+面试题)(FFmpeg+WebRTC+RTMP+RTSP+HLS+RTP)
#ifdef USE_AAC faacDecClose(faacHandle);#endif#ifdef USE_SPEEX speex_decoder_destroy(speexState); speex_bits_destroy(&speexBits); free(audioOutput);#endif free(outputBuffer);
mp3
mad_synth_finish(&synth);mad_frame_finish(&frame);
window.AudioContext = window.AudioContext || window.webkitAudioContext;var context = new window.AudioContext();
var audioBuffers = []var audioBuffer = context.createBuffer(channels, frameCount, samplerate);
var playNextBuffer = function() { isPlaying = false; if (audioBuffers.length) { playAudio(audioBuffers.shift()); } if (audioBuffers.length > 1) audioBuffers.shift(); //console.log(audioBuffers.length) }; var copyAudioOutputArray = resampled ? function(target) { for (var i = 0; i < allFrameCount; i++) { var j = i << 1; target[j] = target[j + 1] = audioOutputArray[i] / 32768; } } : function(target) { for (var i = 0; i < allFrameCount; i++) { target[i] = audioOutputArray[i] / 32768; } }; var copyToCtxBuffer = channels > 1 ? function(fromBuffer) { for (var channel = 0; channel < channels; channel++) { var nowBuffering = audioBuffer.getChannelData(channel); if (fromBuffer) { for (var i = 0; i < frameCount; i++) { nowBuffering[i] = fromBuffer[i * (channel + 1)]; } } else { for (var i = 0; i < frameCount; i++) { nowBuffering[i] = audioOutputArray[i * (channel + 1)] / 32768; } } } } : function(fromBuffer) { var nowBuffering = audioBuffer.getChannelData(0); if (fromBuffer) nowBuffering.set(fromBuffer); else copyAudioOutputArray(nowBuffering); }; var playAudio = function(fromBuffer) { if (isPlaying) { var buffer = new Float32Array(resampled ? allFrameCount * 2 : allFrameCount); copyAudioOutputArray(buffer); audioBuffers.push(buffer); return; } isPlaying = true; copyToCtxBuffer(fromBuffer); var source = context.createBufferSource(); source.buffer = audioBuffer; source.connect(context.destination); source.onended = playNextBuffer; //setTimeout(playNextBuffer, audioBufferTime-audioBuffers.length*200); source.start(); };
其中playNextBuffer 函数用于从缓冲中取出数据 copyAudioOutputArray 函数用于将音频数据转化成浮点数。 copyToCtxBuffer 函数用于将音频数据拷贝进可以播放的缓冲数组中。 这些函数对单声道和双声道进行了处理
var resampled = samplerate < 22050;
对于频率小于22khz的数据,我们需要复制一份,模拟成22khz,因为H5只支持大于22khz的数据。