#音频格式转换
接口分析
//1申请重采样context 注意和视频转换context是不同的 SwsContext
struct SwrContext *swr_alloc(void);
//2设置音频重采样的格式信息
//播放速度可以通过样本率大小的改变来调节,但是声音会失真
//out_ch_layout 输出的layout(5.1声道,立体声,左右声道等)
//out_sample_fmt 输出的样本格式(s16 s24的 float的etc.), float格式一般是不能直接播放需要转换成s16/s24
//out_sample_rate 输出的样本率(一般用原始的样本率就行了)
//in_ch_layout 输入的layout
//log_offset 和 log_ctx是日志 直接传0
struct SwrContext *swr_alloc_set_opts(struct SwrContext *s,
int64_t out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate,
int64_t in_ch_layout, enum AVSampleFormat in_sample_fmt, int in_sample_rate,
int log_offset, void *log_ctx);
//初始化
int swr_init(struct SwrContext *s)
//释放空间
void swr_free(struct SwrContext **s)
//对<每一帧>音频数据来重采样(resample)
//out传入一个指针数组,空间要自己申请,这个是输出,也就是重采样后的数据
//out_count (nb_samples) 单通道样本的数量 ,只是单通道的样本数量,不是一个字节数不是一个字节数 双通道要x2 如果是s16再x2(nb_samples*format*channels = 1024 * 4(s16就是2) * 2)
//in 可以用avframe解码出来的data直接放里面就行
//in_count 单通道的样本数量 nb_samples
int swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
const uint8_t **in , int in_count);
###代码
#include <iostream>
#include <thread>
extern "C" {
#include "libavutil/avutil.h"
#include "libavcodec/avcodec.h "
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
}
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "swscale.lib")
#pragma comment(lib, "swresample.lib")
#pragma warning(disable:4996)
using namespace std;
//线程sleep c++11
void Xsleep(int ms)
{
//c++11支持
chrono::milliseconds du(ms);
this_thread::sleep_for(du);
}
//防止分母是0 挂掉
static double r2d(AVRational in)
{
return (in.den == 0) ? 0 : ((double)in.num / (double)in.den);
}
int main()
{
//avcodec_configuration();
//初始化封装库
av_register_all();
avformat_network_init();
//注册解码器
avcodec_register_all();
const char *path = "zxy10.mp4";
AVFormatContext *ic = NULL;
int ret = avformat_open_input(&ic, path, NULL, NULL);
if (ret != 0)
{
char buf[1024] = { 0 };
av_strerror(ret, buf, sizeof(buf) - 1);
av_log(NULL, AV_LOG_ERROR, "open file erro %s\n:", buf);
getchar();
return -1;
}
cout << "open file :" << ic->filename << " success" << endl;
avformat_find_stream_info(ic, NULL);
cout << "file time is " << ic->duration / AV_TIME_BASE << endl;
av_dump_format(ic, 0, path, 0);
int audioStream = 0; //记录音频和视频的编号
int videostream = 0;
#if 0
{
//获取音视频
for (int i = 0; i < ic->nb_streams; i++)
{
AVStream *as = ic->streams[i];
if (as->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) //视屏
{
videostream = i;
cout << "视频信息 stream :" << videostream << endl;
cout << "height :" << as->codecpar->height << endl;
cout << "weith :" << as->codecpar->width << endl;
cout << "codec_id :" << as->codecpar->codec_id << endl;
cout << "format :" << as->codecpar->format << endl;
cout << "frame_size :" << as->codecpar->frame_size << endl;
cout << "fps : " << r2d(as->avg_frame_rate) << endl;
//AVPixelFormat;
}
else if (as->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)//音频
{
audioStream = i;
cout << endl;
cout << "音频信息,stream " << audioStream << endl;
cout << "codec_id :" << as->codecpar->codec_id << endl;
cout << "channels :" << as->codecpar->channels << endl;
cout << "sample_rate :" << as->codecpar->sample_rate << endl;
cout << "format :" << as->codecpar->format << endl;
cout << "frame_size :" << as->codecpar->frame_size << endl;
cout << "fps : " << r2d(as->avg_frame_rate) << endl;
//AVSampleFormat;
}
}
}
#endif
//下面方法可以替换上面的
videostream = av_find_best_stream(ic, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
if (videostream < 0)
{
char buf[1024] = { 0 };
av_strerror(videostream, buf, sizeof(buf) - 1);
av_log(NULL, AV_LOG_ERROR, "Not find Video Stream : %s\n:", buf);
if (ic)
{
avformat_close_input(&ic);
}
getchar();
return -1;
}
else
{
cout << endl;
cout << "视频信息 stream :" << videostream << endl;
cout << "height :" << ic->streams[videostream]->codecpar->height << endl;
cout << "weith :" << ic->streams[videostream]->codecpar->width << endl;
cout << "codec_id :" << ic->streams[videostream]->codecpar->codec_id << endl;
cout << "format :" << ic->streams[videostream]->codecpar->format << endl;
cout << "frame_size :" << ic->streams[videostream]->codecpar->frame_size << endl;
cout << "fps : " << r2d(ic->streams[videostream]->avg_frame_rate) << endl;
//AVPixelFormat;
}
//获取视频流
audioStream = av_find_best_stream(ic, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
if (audioStream < 0)
{
char buf[1024] = { 0 };
av_strerror(audioStream, buf, sizeof(buf) - 1);
av_log(NULL, AV_LOG_ERROR, "Not find Audio Stream : %s\n:", buf);
if (ic)
{
avformat_close_input(&ic);
}
getchar();
return -1;
}
else
{
cout << endl;
cout << "音频信息,stream " << audioStream << endl;
cout << "codec_id :" << ic->streams[audioStream]->codecpar->codec_id << endl;
cout << "channels :" << ic->streams[audioStream]->codecpar->channels << endl;
cout << "sample_rate :" << ic->streams[audioStream]->codecpar->sample_rate << endl;
cout << "format :" << ic->streams[audioStream]->codecpar->format << endl;
cout << "frame_size :" << ic->streams[audioStream]->codecpar->frame_size << endl;
cout << "fps : " << r2d(ic->streams[audioStream]->avg_frame_rate) << endl;
}
打开视频解码器/
//找到视频解码器,不要在av_find_best_stream里去找解码器,那样耦合性太强了
AVCodec * vcodec = avcodec_find_decoder(ic->streams[videostream]->codecpar->codec_id);
if (!vcodec)
{
cout << "can not find AVcodec :" << ic->streams[videostream]->codecpar->codec_id << endl;
cin.get();
return -1;
}
cout << "find avcodec successed : id is" << ic->streams[videostream]->codecpar->codec_id << endl;
//创建解码器上下 记得释放~
AVCodecContext * vc = avcodec_alloc_context3(vcodec);
//复制(配置)解码器上下文参数
avcodec_parameters_to_context(vc, ic->streams[videostream]->codecpar);
vc->thread_count = 16; //手动指定线程数
//打开解码器上下文(分配空间给context) 因为vocdec已经被创建了,所以第二个参数传入0
ret = avcodec_open2(vc, 0, 0);
if (ret != 0)
{
char buf[1024] = { 0 };
av_strerror(ret, buf, sizeof(buf) - 1);
av_log(NULL, AV_LOG_ERROR, "avcodec_open2 %s\n:", buf);
getchar();
return -1;
}
cout << "video avcodec_open2 success!" << endl;
///打开视频解码器end//
打开音频解码器/
//找到音频解码器,不要在av_find_best_stream里去找解码器,那样耦合性太强了
AVCodec * acodec = avcodec_find_decoder(ic->streams[audioStream]->codecpar->codec_id);
if (!acodec)
{
cout << "can not find AVcodec :" << ic->streams[audioStream]->codecpar->codec_id << endl;
cin.get();
return -1;
}
cout << "find avcodec successed : id is" << ic->streams[audioStream]->codecpar->codec_id << endl;
//创建解码器上下 记得释放~
AVCodecContext * ac = avcodec_alloc_context3(acodec);
//复制(配置)解码器上下文参数
avcodec_parameters_to_context(ac, ic->streams[audioStream]->codecpar);
//vc->thread_count = 16; //手动指定线程数
//打开解码器上下文(分配空间给context) 因为vocdec已经被创建了,所以第二个参数传入0
ret = avcodec_open2(ac, 0, 0);
if (ret != 0)
{
char buf[1024] = { 0 };
av_strerror(ret, buf, sizeof(buf) - 1);
av_log(NULL, AV_LOG_ERROR, "avcodec_open2 %s\n:", buf);
getchar();
return -1;
}
cout << "audio avcodec_open2 success!" << endl;
///打开音频解码器end//
AVPacket * pkt = av_packet_alloc(); //解封用
AVFrame * frame = av_frame_alloc(); //解码用 frame 存放的数据可能是很大的yvu 比 pkt存放的数据大很多
//像素格式与尺寸转换的context
SwsContext * vctx = NULL ; //(s - scale)
unsigned char * rgb = NULL; //存放图像格式需要用usigned int , 不然有可能因为反转导致图像花屏
uint8_t * pcm = NULL; //这个空间大小 根据swr_alloc_set_opts 设置的参数来确定 : nb_samples*format*channels
//音频重采样
SwrContext * actx = swr_alloc();
//上下文初始化
actx = swr_alloc_set_opts(
actx,
av_get_default_channel_layout(2), //输出格式 设置默认两声道 ,还有什么2.1声道
AV_SAMPLE_FMT_S16, //输出样本格式 (没一个音频的样本格式,这里设成16位两个字节的)
ac->sample_rate, //输出采样率(一秒钟的音频样本数量)
av_get_default_channel_layout(ac->channels), //输入格式
ac->sample_fmt, //输入样本格式
ac->sample_rate, //输入采样率
0, 0 //日志没用到直接设置成0
);
ret = swr_init(actx);
if (0 != ret)
{
char buf[1024] = { 0 };
av_strerror(ret, buf, sizeof(buf) - 1);
av_log(NULL, AV_LOG_ERROR, "swr_init failed %s\n:", buf);
getchar();
return -1;
}
for (;;)
{
int ret = av_read_frame(ic, pkt);
if (ret != 0) //有可能是文件读完了;也有可能是出错了
{
//{
// //seek到3s的位置
// int ms = 3000;
// long long pos = (double)ms / (double)(r2d(ic->streams[pkt->stream_index]->time_base) * 1000);
// av_seek_frame(ic, videostream, pos, AVSEEK_FLAG_BACKWARD | AVSEEK_FLAG_FRAME);
//}
break; //失败不需要怎么样,直接跳出循环
}
//cout << "pkt->size : " << pkt->size << endl;
显示的时间
//cout << " pkt->pts :" << pkt->pts << endl;
显示的ms
//cout << (double)r2d(ic->streams[pkt->stream_index]->time_base) << endl;
//cout << "pkt->pts ms :" << (pkt->pts) * ((double)r2d(ic->streams[pkt->stream_index]->time_base) * 1000) << endl;
//cout << "pkt->dts :" << pkt->dts << endl;
AVCodecContext * cc = NULL;
if (pkt->stream_index == audioStream)
{
cc = ac;
cout << "音频:" << endl;
}
else if (pkt->stream_index == videostream)
{
cout << "图片:" << endl;
cc = vc; //如果既不是音频也不是视频那么我们应该退出,这里暂不处理
}
//先释放引用后(av_packet_unref )在进行解码操作
//发送packet到解码线程进行解码,这个函数是非阻塞的
//avcodec_send_packet发送到缓冲队列当中,当解码到最后缓冲当中的数据取出来需要调用avcodec_send_packet(cc,NULL)来获取,并且
//avcodec_receive_frame数次,这里代码没写
ret = avcodec_send_packet(cc,pkt);
//释放引用计数;为零 清空pkt.buff 指向的空间,这个需要avcodec_send_packet后清除引用计数,不然解码会出错,放到后面又有可能内存泄漏;
av_packet_unref(pkt);
if (ret != 0) //
{
char buf[1024] = { 0 };
av_strerror(ret, buf, sizeof(buf) - 1);
av_log(NULL, AV_LOG_ERROR, "avcodec_send_packet %s\n:", buf);
continue; //这个错误了不要退出 传入下一个pakcet,打印也在release版本要屏蔽
}
//receive 解码后的数据也就是AVFrame,非阻塞
//一次send可能有多次的receive,一个packet包当中可能会有多帧率数据,尤其在音频当中 所以这里要做个一个循环多次进行receive
//avcodec_receive_frame 和 avcodec_send_packet 在不用的线程中进行有自己的互斥机制
for (;;)
{
ret = avcodec_receive_frame(cc, frame);
if (ret != 0)
{
break;
}
//打印frame
//视频: 0 ; 768(宽度)
//音频:8(AV_SAMPLE_FMT_FLTP,< float, planar> = 4字节); 8192 (nb_samples*format*channels = 1024 * 4 * 2)
cout << "frame->format : " << frame->format << "linesize : "<<frame->linesize[0] <<endl; // format :AVPixelFormat ,AVSampleFormat
//视频格式转换
if (cc == vc)
{
vctx = sws_getCachedContext(
vctx, //传入null会重新创建
frame->width, frame->height, //源的宽和高
(AVPixelFormat)frame->format, //输入的格式 (比如说是 YUV420p)
frame->width, frame->height, //输出的宽高
AV_PIX_FMT_RGBA, //输出的格式
SWS_BILINEAR, //转换用的算法
0, 0, 0);
if (vctx == NULL)
{
cout << "sws_getCachedContext erro " << endl;
getchar();
}
if (vctx)
{
if (NULL == rgb)
{
rgb = new unsigned char[frame->width * frame->height * 4]; //这里不考虑对齐的问题,如果图像有问题那么考虑可能是这里出错了
}
uint8_t * data[2] = {0};
data[0] = rgb; //如果是一个平面格式那么就需要定义data[4],如果定义出错会直接挂掉
int lines[2] = { 0 };
lines[0] = frame->width * 4; //每一行的字节数
//开始格式转换,这步开销很大,很吃cpu; 这一部分应该抽取出去用显卡做
ret = sws_scale(vctx,
frame->data, //输入数据
frame->linesize, //输入行大小
0, //切片,传入0即可
frame->height, //输入高度
data,
lines
);
cout << "sws_scale : " << ret<< endl; //返回值是个高度;
}
}
else //音频重采样,该出不严谨,可能还有字幕
{
uint8_t * data[2] = { 0 };
if (pcm ==NULL)
{
pcm = new uint8_t[frame->nb_samples*2*2]; //swr_alloc_set_opts里面输出的样本格式(AV_SAMPLE_FMT_S16)和通道数来确定
}
data[0] = pcm;
ret = swr_convert(actx,
data, frame->nb_samples,//输出 nb_samples如果修改了就会导致声音失真
(const uint8_t **)frame->data, frame->nb_samples//输入
);
cout << "swr_convert :" << ret <<endl;
}
}
//Xsleep(500); //sleep 500ms
}
av_frame_free(&frame);
av_packet_free(&pkt);
if (ic)
{
avformat_close_input(&ic);
}
#ifdef _WIN32
//cout << "windows app" << endl;
#ifdef _WIN64
//cout << "win64" << endl;
#else
//cout << "win32" << endl;
#endif // _WIN64
#else
//cout << " linux" << endl;
#endif //_WIN32
cin.get();
return 0;
}