实验一 利用ffmpeg进行视音频信息提取 |音视频技术

一、实验目的

1、深入掌握视音频的基本参数信息
2、掌握ffmpeg编译环境配置
3、掌握和熟悉提取视音频文件的基本方法

二、实验要求

1、对ffmpeg的编译环境进行配置;
2、对一个视频文件,提取基本信息(例如,封装格式,码流,视频编码方式,音频编码方式,分辨率,帧率,时长等等),并输出为txt文档。结果与MediaInfo的信息对比,并截图;
3、对该视频文件,提取视频信息,保存为yuv格式。结果利用yuv播放器播放并截图;
4、对该视频文件,提取音频信息,保存为wav格式。结果利用adobe audition播放并截图。

1、环境配置

项目文件夹下

获取视频信息java_#define

获取视频信息java_#define_02


获取视频信息java_ide_03

获取视频信息java_音视频_04

2、提取基本信息并且对比存储

获取视频信息java_音视频_05


获取视频信息java_ide_06


3、对该视频文件,提取视频信息,保存为yuv格式。结果利用yuv播放器播放并截图

获取视频信息java_ide_07


获取视频信息java_#define_08


4、对该视频文件,提取音频信息,保存为wav格式。结果利用adobe audition播放并截图

获取视频信息java_ide_09

三、实验代码
#include <stdio.h>

#define __STDC_CONSTANT_MACROS

#ifdef _WIN32
 //Windows
extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/samplefmt.h>
};
#else
 //Linux...
#ifdef __cplusplus
extern "C"
{
#endif
#include <libavformat/avformat.h>
#ifdef __cplusplus
};
#endif
#endif


//'1': Use H.264 Bitstream Filter 
#define USE_H264BSF 1
#define MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio  

typedef struct WAVE_HEADER {//共占12字节
	char         fccID[4]; //4bytes  RIFF
	unsigned   long    dwSize; //文件字节数-8    8=fccID(4)+dwSize(4)
	char         fccType[4]; //4bytes WAVE
}WAVE_HEADER;

typedef struct WAVE_FMT {//共占24字节
	char         fccSub1ID[4]; //4bytes "fmt "后面是个空格
	unsigned   long       dwSub1Size;//16,存储该子块的字节数(不含fccSub1ID和dwSub1Size这8个字节)
	unsigned   short     wFormatTag;//存储音频文件的编码格式,例如若为PCM则其存储值为1,若为其他非PCM格式的则有一定的压缩。
	unsigned   short    numChannels;//通道数,单通道(Mono)值为1,双通道(Stereo)值为2,等等
	unsigned   long     sampleRate;//采样率,如8k,44.1k等
	unsigned   long      byteRate;//每秒存储的字节数,其值=SampleRate * NumChannels * BitsPerSample/8
	unsigned   short     wBlockAlign;//块对齐大小,其值=NumChannels * BitsPerSample/8
	unsigned   short     uiBitsPerSample;//每个采样点(sample)的bit数,一般为8,16,32等。
}WAVE_FMT;

typedef struct WAVE_DATA {//共占8个字节
	char       fccSub2ID[4];//4bytes "data"
	unsigned long dwSub2Size;//内容为接下来的正式的数据部分的字节数,其值=NumSamples * NumChannels * BitsPerSample/8
}WAVE_DATA;

//wav文件大小字节数=dwSize+8=44+dwSub2Size  => dwSize=dwSub2Size+36

#define SIZE_WAV_HEADER 12
#define SIZE_WAV_FMT 24
#define SIZE_WAV_DATA 8


/*
numSamples:单个通道的总采样点数的总和值
return wav的文件头部指针
*/
char* set_wav_parm(int numSamples) {
	WAVE_HEADER* wavHEADER;
	WAVE_FMT* wavFMT;
	WAVE_DATA* wavDATA;

	wavHEADER = (WAVE_HEADER*)malloc(SIZE_WAV_HEADER);
	char* p = wavHEADER->fccID;
	wavFMT = (WAVE_FMT*)(p + SIZE_WAV_HEADER);
	wavDATA = (WAVE_DATA*)(p + SIZE_WAV_HEADER + SIZE_WAV_FMT);

	//head
	memcpy(wavHEADER->fccID, "RIFF", 4);
	memcpy(wavHEADER->fccType, "WAVE", 4);

	//fmt
	memcpy(wavFMT->fccSub1ID, "fmt ", 4);

	wavFMT->dwSub1Size = 16;
	wavFMT->wFormatTag = 1;
	wavFMT->numChannels = 2;
	wavFMT->sampleRate = 44100;
	wavFMT->uiBitsPerSample = 16;
	wavFMT->byteRate = (wavFMT->sampleRate * wavFMT->numChannels * wavFMT->uiBitsPerSample) / 8;
	wavFMT->wBlockAlign = (wavFMT->numChannels * wavFMT->uiBitsPerSample) / 8;

	//data
	memcpy(wavDATA->fccSub2ID, "data", 4);
	wavDATA->dwSub2Size = (numSamples * wavFMT->numChannels * wavFMT->uiBitsPerSample) / 8;

	wavHEADER->dwSize = wavDATA->dwSub2Size + 36;

	return p;


}

int main(int argc, char* argv[])
{
	AVFormatContext* ifmt_ctx = NULL;
	AVCodecContext* ifmt_CodecCtx = NULL;
	AVCodec* pCodec;
	AVFrame* pFrame, * pFrameYUV;
	uint8_t* out_buffer;
	AVPacket pkt;
	int ret, i, got_picture;
	int videoindex = -1, audioindex = -1;
	struct SwsContext* img_convert_ctx;
	const char* in_filename = "Titanic.ts";//Input file URL
	const char* out_filename_v = "Titanic.h264";//Output file URL
	const char* out_filename_yuv = "Titanic.yuv";
	const char* out_filename_a = "Titanic.mp3";

	av_register_all();
	avcodec_register_all();
	avformat_network_init();
	ifmt_ctx = avformat_alloc_context();
	//Input
	if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
		printf("Could not open input file.");
		return -1;
	}
	if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
		printf("Failed to retrieve input stream information");
		return -1;
	}

	videoindex = -1;
	for (i = 0; i < ifmt_ctx->nb_streams; i++) {
		if (ifmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
			videoindex = i;
		}
		else if (ifmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
			audioindex = i;
		}
	}
	//Dump Format------------------
	printf("\nInput Video===========================\n");
	av_dump_format(ifmt_ctx, 0, in_filename, 0);
	printf("\n======================================\n");

	FILE* fp_audio = fopen(out_filename_a, "wb+");
	FILE* fp_video = fopen(out_filename_v, "wb+");
	FILE* fp_video_yuv = fopen(out_filename_yuv, "wb+");
	ifmt_CodecCtx = ifmt_ctx->streams[videoindex]->codec;

	//查找解码器
	pCodec = avcodec_find_decoder(ifmt_CodecCtx->codec_id);
	if (pCodec == NULL) {
		printf("Codec not found.\n");
		return -1;
	}

	//打开解码器
	if (avcodec_open2(ifmt_CodecCtx, pCodec, NULL) < 0) {
		printf("Could not open codec.\n");
		return -1;
	}

	
	//文件流形式输入到output.txt文本里
	FILE* fp = fopen("output.txt", "wb+");
	fprintf(fp, "封装格式:%s\n", ifmt_ctx->iformat->name);
	fprintf(fp, "宽高:%d * %d\n", ifmt_ctx->streams[videoindex]->codec->width, ifmt_ctx->streams[videoindex]->codec->height);//videoindex是从上面for循环遍历得到的视频流的索引,一般来说视频流索引为0,音频为1
	fprintf(fp, "时长: %d秒\n", ifmt_ctx->duration / 1000000);
	fprintf(fp, "码率: %d\n", ifmt_ctx->bit_rate);
	fprintf(fp, "封装格式的名称: %s\n", ifmt_ctx->iformat->name);
	fprintf(fp, "封装格式的长名称: %s\n", ifmt_ctx->iformat->long_name);
	fprintf(fp, "封装格式的扩展名: %s\n", ifmt_ctx->iformat->extensions);
	fprintf(fp, "输入视频的AVStream个数: %d\n", ifmt_ctx->nb_streams);
	fprintf(fp, "输入视频的AVStream序号: %d\n", ifmt_ctx->streams[videoindex]->id);
	fprintf(fp, "输入视频的AVStream的时基: %d\n", ifmt_ctx->streams[videoindex]->time_base);
	fprintf(fp, "输入视频的AVStream的帧率: %d\n", ifmt_ctx->streams[videoindex]->r_frame_rate);
	fprintf(fp, "视频像素格式: %s\n", ifmt_ctx->streams[videoindex]->codec->pix_fmt);
	fprintf(fp, "编解码器名称: %s\n", ifmt_ctx->streams[videoindex]->codec->codec->name);
	fprintf(fp, "编解码器长名称: %s\n", ifmt_ctx->streams[videoindex]->codec->codec->long_name);
	fprintf(fp, "编解码器类型: %s\n", ifmt_ctx->streams[videoindex]->codec->codec->type);
	fprintf(fp, "编解码器ID: %d\n", ifmt_ctx->streams[videoindex]->codec->codec->id);
	fprintf(fp, "音频采样率: %d\n", ifmt_ctx->streams[audioindex]->codec->sample_rate);
	fprintf(fp, "音频声道数: %d\n", ifmt_ctx->streams[audioindex]->codec->channels);
	fprintf(fp, "音频采样格式: %d\n", ifmt_ctx->streams[audioindex]->codec->sample_fmt);
	fclose(fp);

	/*
	FIX: H.264 in some container format (FLV, MP4, MKV etc.) need
	"h264_mp4toannexb" bitstream filter (BSF)
	  *Add SPS,PPS in front of IDR frame
	  *Add start code ("0,0,0,1") in front of NALU
	H.264 in some container (MPEG2TS) don't need this BSF.
	*/
	pFrame = av_frame_alloc();
	pFrameYUV = av_frame_alloc();
	out_buffer = (uint8_t*)av_malloc(avpicture_get_size(PIX_FMT_YUV420P, ifmt_CodecCtx->width, ifmt_CodecCtx->height));
	avpicture_fill((AVPicture*)pFrameYUV, out_buffer, PIX_FMT_YUV420P, ifmt_CodecCtx->width, ifmt_CodecCtx->height);
	img_convert_ctx = sws_getContext(ifmt_CodecCtx->width, ifmt_CodecCtx->height, ifmt_CodecCtx->pix_fmt,
		ifmt_CodecCtx->width, ifmt_CodecCtx->height, PIX_FMT_YUV420P, 4, NULL, NULL, NULL);
	

#if USE_H264BSF
	AVBitStreamFilterContext* h264bsfc = av_bitstream_filter_init("h264_mp4toannexb");
#endif
	int frame_cnt = 0;
	while (av_read_frame(ifmt_ctx, &pkt) >= 0) {
		if (pkt.stream_index == videoindex) {
#if USE_H264BSF
			av_bitstream_filter_filter(h264bsfc, ifmt_ctx->streams[videoindex]->codec, NULL, &pkt.data, &pkt.size, pkt.data, pkt.size, 0);
#endif
			printf("Write Video Packet. size:%d\tpts:%lld\n", pkt.size, pkt.pts);
			fwrite(pkt.data, 1, pkt.size, fp_video);
			//解码一帧压缩数据
			ret = avcodec_decode_video2(ifmt_CodecCtx, pFrame, &got_picture, &pkt);//YUV
			if (got_picture) {
				sws_scale(img_convert_ctx, (const uint8_t* const*)pFrame->data, pFrame->linesize, 0, ifmt_CodecCtx->height,
					pFrameYUV->data, pFrameYUV->linesize);
				printf("Decoded frame index: %d\n", frame_cnt);

				fwrite(pFrameYUV->data[0], 1, ifmt_CodecCtx->width * ifmt_CodecCtx->height, fp_video_yuv);
				fwrite(pFrameYUV->data[1], 1, ifmt_CodecCtx->width * ifmt_CodecCtx->height / 4, fp_video_yuv);
				fwrite(pFrameYUV->data[2], 1, ifmt_CodecCtx->width * ifmt_CodecCtx->height / 4, fp_video_yuv);

				frame_cnt++;

			}
		}
		else if (pkt.stream_index == audioindex) {
			/*
			AAC in some container format (FLV, MP4, MKV etc.) need to add 7 Bytes
			ADTS Header in front of AVPacket data manually.
			Other Audio Codec (MP3...) works well.
			*/
			printf("Write Audio Packet. size:%d\tpts:%lld\n", pkt.size, pkt.pts);
			fwrite(pkt.data, 1, pkt.size, fp_audio);
		}
		av_free_packet(&pkt);
	}

	const char* input_file = "Titanic.mp3";
	av_register_all();
	avformat_network_init();
	AVFormatContext* pFormatCtx = avformat_alloc_context();

	if (avformat_open_input(&pFormatCtx, input_file, 0, 0) != 0) {
		printf("Couldn't open input stream.\n");
		return -1;
	}

	if (avformat_find_stream_info(pFormatCtx, 0) < 0) {
		printf("Couldn't find stream information.\n");
		return -1;
	}
	//av_dump_format(pFormatCtx, 0, input_file, false);
	//获取音频流索引位置
	int j = 0, audio_stream_idx = -1;
	for (; j < pFormatCtx->nb_streams; j++) {
		if (pFormatCtx->streams[j]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
			audio_stream_idx = j;
			break;
		}
	}
	if (audio_stream_idx == -1) {
		printf("Didn't find a audio stream.\n");
		return -1;
	}

	AVCodecContext* codecCtx = pFormatCtx->streams[audio_stream_idx]->codec;
	AVCodec* codec = avcodec_find_decoder(codecCtx->codec_id);
	if (codec == NULL) {
		printf("Codec not found.\n");
		return -1;
	}

	if (avcodec_open2(codecCtx, codec, NULL) < 0) {
		printf("Could not open codec.\n");
		return -1;
	}

	AVSampleFormat in_sample_fmt = codecCtx->sample_fmt;//输入的采样格式
	int in_sample_rate = codecCtx->sample_rate;//输入的采样率
	int channels = codecCtx->channels;


	printf("采样率:%d ,声道数:%d\n", in_sample_rate, channels);

	//重采样设置参数
	AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16; //输出采样格式16bit PCM
	//输出采样率
	int out_sample_rate = 44100;
	//输出的声道布局(立体声)
	uint64_t out_ch_layout = AV_CH_LAYOUT_STEREO;
	SwrContext* swrCtx = swr_alloc();
	swrCtx = swr_alloc_set_opts(swrCtx,
		out_ch_layout, out_sample_fmt, out_sample_rate,
		av_get_default_channel_layout(codecCtx->channels), in_sample_fmt, in_sample_rate,
		0, NULL);
	/*
	av_opt_set_int(swrCtx, "in_channel_layout", AV_CH_LAYOUT_MONO, 0);
	av_opt_set_int(swrCtx, "in_sample_rate", in_sample_rate, 0);
	av_opt_set_sample_fmt(swrCtx, "in_sample_fmt", in_sample_fmt, 0);
	av_opt_set_int(swrCtx, "out_channel_layout", out_ch_layout, 0);
	av_opt_set_int(swrCtx, "out_sample_rate", out_sample_rate, 0);
	av_opt_set_sample_fmt(swrCtx, "out_sample_fmt", out_sample_fmt, 0);*/
	swr_init(swrCtx);

	//输出的声道个数
	int out_channel_nb = av_get_channel_layout_nb_channels(out_ch_layout);

	printf("输出pcm: 采样率:%d , 声道数:%d\n", out_sample_rate, out_channel_nb);

	const char* output_wav = "Titanic.wav";
	FILE* fp_wav = fopen(output_wav, "wb+");

	//16bit 44100 PCM 数据
	uint8_t* out_buffer1 = (uint8_t*)av_malloc(MAX_AUDIO_FRAME_SIZE * 2);
	int got_frame = 0, framecnt = 0;
	AVPacket* packet = (AVPacket*)av_malloc(sizeof(AVPacket));
	av_init_packet(packet);
	AVFrame* frame = av_frame_alloc();

	int totalsamples = 0;
	fseek(fp_wav, SIZE_WAV_HEADER + SIZE_WAV_FMT + SIZE_WAV_DATA, 1);
	while (av_read_frame(pFormatCtx, packet) >= 0) {

		if (packet->stream_index == audio_stream_idx) {
			//解码
			avcodec_decode_audio4(codecCtx, frame, &got_frame, packet);
			if (got_frame > 0) {
				//解码得到的Frame数据,转成PCM
				swr_convert(swrCtx, &out_buffer1, MAX_AUDIO_FRAME_SIZE, (const uint8_t**)frame->data, frame->nb_samples);
				//printf("index:%5d\t pts:%lld\t packet size:%d\n", framecnt, packet->pts, packet->size);
				//Write PCM

				totalsamples += frame->nb_samples;
				//音频文件字节大小= 采用率*时长*通道数*采样位数/8
				//计算一帧音频帧占用的字节数  通道数 * 采样点数* 采样位数/8 
				int out_buffer_size = av_samples_get_buffer_size(NULL, out_channel_nb,
					frame->nb_samples, out_sample_fmt, 1);
				//frame->nb_samples 当前帧的一个通道的采样点数(经测试,可能有的帧值不等) 和codecCtx->frame_size(音频帧的一个通道的采样点数,是固定值)  
				//虽然两者都是表示音频的采样点数,但是我的理解,这里用frame->nb_samples更加科学。
				fwrite(out_buffer1, 1, out_buffer_size, fp_wav);

				framecnt++;

			}
		}

		av_free_packet(packet);
	}
	swr_free(&swrCtx);
	av_frame_free(&frame);
	av_free(out_buffer1);

	char* p = set_wav_parm(totalsamples);
	rewind(fp_wav);//文件指针回到头部
	fwrite(p, 1, SIZE_WAV_HEADER + SIZE_WAV_FMT + SIZE_WAV_DATA, fp_wav);//size取1个字节没问题,可能跟p是char类型有关,取大了,报错
	fclose(fp_wav);
	printf("success");
	avcodec_close(codecCtx);
	avformat_close_input(&pFormatCtx);


#if USE_H264BSF
	av_bitstream_filter_close(h264bsfc);
#endif

	fclose(fp_video);
	fclose(fp_video_yuv);
	fclose(fp_audio);
	avformat_close_input(&ifmt_ctx);

	if (ret < 0){
		printf("Error occurred.\n");
		return -1;
	}

	
	return 0;
}