python如何使用FFmpeg 提取音频 ffmpeg提取视频和音频文件

转载

网猴儿 2024-08-03 19:42:56

文章标签 ffmpeg pcm 音频编码解码数据重采样 文章分类 Python 后端开发

概念

这里有些重要的概念需要有必要先理解一下:

音频重采样
音频都是有个固定的采样率，一般是44100Hz, 如果需要改变这个采样率，比如8000Hz，那么就必须进行重采样操作。
音频双通道
双通道的音频数据在存储的时候，需要交错格式存储，很多文章说明的是这样形容的：LRLRLRLR…，这样的，一开始我没理解过来，其实L就是左声道，R就是右声道，就是说存储的时候要先存一个左声道数据，然后再存储一个右声道数据。

处理流程

由于需要异步处理音频提取，并在界面展示进度，所以大致流程是这样的：

开启等待处理线程
输入文件，通过ffmpeg读取音频帧数据，解码音频数据帧，进行数据重采样处理，输入写入文件
异步通知当前进度，展示到进度条。

关键代码

如下代码都是基于Qt5，使用C++写的。

头文件定义

#include <QThread>
#include <QMutex>
#include <QWaitCondition>
extern "C" {
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libswresample/swresample.h>
}

class AudioPCMExtractor : public QThread
{
    Q_OBJECT
public:
    explicit AudioPCMExtractor(QObject *parent = nullptr);
    ~AudioPCMExtractor();

    void doExtract(int chan, int rate, QString src, QString dst);
    void stop();

signals:
    void procgress(int p);

protected:
    void run() override;

private:
    void setup();
    int setupOut();
    void process();
    void release();

    bool running;
    QWaitCondition wait_cond_;
    QMutex wait_lock_;
    QAtomicInt queued;

    int outChannel;           // 重采样后输出的通道
    AVSampleFormat outFormat; // 重采样后输出的格式
    int outSampleRate;        // 重采样后输出的采样率
    QString outFileFormat;

    QString src_filePath; //目标源文件
    QString dst_filePath; //目的文件

    AVFormatContext *pAVFormatContext; // ffmpeg的全局上下文
    AVCodecContext *pAVCodecContext;   // ffmpeg编码上下文
    AVFrame *pAVFrame;                 // ffmpeg单帧缓存
    AVPacket *pkt;
    AVCodec *pAVCodec;       // ffmpeg编码器
    SwrContext *pSwrContext; // ffmpeg音频转码
    int audio_index;
    float duration;
    int64_t bit_rate;

    AVFormatContext *pAVFormatContext_out;
    AVCodecContext *pAVCodecContext_out;
    AVStream *pAVStream_out;
    AVCodec *pAVCodec_out;
};

开启异步线程，进程任务等待, 初始化对象之后，就进入死循环的等待状态。

AudioPCMExtractor::AudioPCMExtractor(QObject *parent) : QThread(parent)
{
    pkt = nullptr;
    pSwrContext = nullptr;
    pAVFrame = nullptr;
    pAVCodecContext = nullptr;
    pAVFormatContext = nullptr;

    audio_index = 0;
    queued = false;
    running = true;
	
	//预设为16位采样
    outFormat = AV_SAMPLE_FMT_S16P;
	
	//开启等待线程
	start(QThread::NormalPriority);
}

void AudioPCMExtractor::stop()
{
    running = false;
    wait_cond_.wakeAll();
}

void AudioPCMExtractor::run()
{
    wait_lock_.lock();
    while (running)
    {
    	//等待
        wait_cond_.wait(&wait_lock_);
        if (!running)
        {
            break;
        }

        process();

        queued = false;
    }

    wait_lock_.unlock();
}

外部调用接口，告诉当前需要进行提取的文件路径，以及提取后的保存文件路径, 并进行FFmpeg打开对应的解码器等

void AudioPCMExtractor::doExtract(int chan, int rate, QString src, QString dst)
{
    if (queued)
    {
        return;
    }

    queued = true;
    src_filePath = src;
    dst_filePath = dst;
    outChannel = chan;
    outSampleRate = rate;
    setup();
    wait_cond_.wakeAll();
}

void AudioPCMExtractor::setup()
{
    int ret = 0;
    char errors[1024] = {0};

    //打开媒体，并读取头部信息
    int errCode = avformat_open_input(&pAVFormatContext, src_filePath.toUtf8().data(), nullptr, nullptr);
    if (errCode != 0)
    {
        av_strerror(errCode, errors, 1024);
        qCritical() << "Could not open 1" << src_filePath << "-" << errors;
        return;
    }

    //找到音频流
    audio_index = av_find_best_stream(pAVFormatContext, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
    if (audio_index < 0)
    {
        av_strerror(errCode, errors, 1024);
        qCritical() << "Could not open 2" << src_filePath << "-" << errors;
        avformat_free_context(pAVFormatContext);
        return;
    }
    AVStream *in_stream = pAVFormatContext->streams[audio_index];
    duration = in_stream->duration;

    //找到对应的解码器
    pAVCodec = avcodec_find_decoder(in_stream->codecpar->codec_id);
    if (pAVCodec == nullptr)
    {
        qDebug() << "avcodec_find_decoder fail";
        avformat_free_context(pAVFormatContext);
        return;
    }
    pAVCodecContext = avcodec_alloc_context3(pAVCodec);
    avcodec_parameters_to_context(pAVCodecContext, in_stream->codecpar);
    bit_rate = pAVCodecContext->bit_rate;

    //打开解码器
    ret = avcodec_open2(pAVCodecContext, pAVCodec, NULL);
    if (ret != 0)
    {
        qDebug() << "avcodec_open2 fail";
        avformat_free_context(pAVFormatContext);
        return;
    }
    pSwrContext = swr_alloc_set_opts(nullptr, // 输入为空，则会分配
                                     av_get_default_channel_layout(outChannel),
                                     outFormat,     // 输出的采样频率
                                     outSampleRate, // 输出的格式
                                     av_get_default_channel_layout(pAVCodecContext->channels),
                                     pAVCodecContext->sample_fmt,  // 输入的格式
                                     pAVCodecContext->sample_rate, // 输入的采样率
                                     0,
                                     nullptr);
    swr_init(pSwrContext);

    pkt = av_packet_alloc();
    av_init_packet(pkt);
    pAVFrame = av_frame_alloc();
}

唤醒线程，并读取数据帧，解码帧数据，并进行重采样，保存音频数据到文件

void AudioPCMExtractor::process()
{
    QFile file(dst_filePath);
    file.open(QIODevice::WriteOnly | QIODevice::Truncate);

    //获取单次采样的字节数
    int numBytes = av_get_bytes_per_sample(outFormat);

   //申请通道数据内存
    uint8_t *outData[outChannel];
    for (int i = 0; i < outChannel; i++)
    {
        outData[i] = (uint8_t *)av_malloc(192000);
    }

    float pcount = 0;
    int lastPersent = 0;
    //读取音频帧数据
    while (av_read_frame(pAVFormatContext, pkt) >= 0)
    {
        if (pkt->stream_index == audio_index)
        {

            //解码音频
            int ret = Decode(pAVCodecContext, pAVFrame, pkt);
            if (ret < 0)
            {
                break;
            }

            //重采样应输出采样数
            int dstNbSamples = av_rescale_rnd(pAVFrame->nb_samples,
                                              outSampleRate,
                                              pAVCodecContext->sample_rate,
                                              AV_ROUND_ZERO);
            //进行转码，并返回实际采样数
            int out_samples = swr_convert(pSwrContext,
                                          outData,
                                          dstNbSamples,
                                          (const uint8_t **)pAVFrame->data,
                                          pAVFrame->nb_samples);

            if (out_samples > 0)
            {
                //多声道数据合并
                for (int index = 0; index < out_samples; index++)
                {
                    for (int channel = 0; channel < outChannel; channel++)
                        file.write((const char *)outData[channel] + numBytes * index, numBytes);
                }
            }

            pcount += pkt->duration;
        }

        int persent = qFloor(pcount * 100 / duration);
        if (persent > lastPersent && persent <= 100)
        {
            lastPersent = persent;

			//发送信号，通知当前提取进度
            emit procgress(persent);
        }

        av_packet_unref(pkt);
    }

    file.close();

    for (int i = 0; i < outChannel; i++)
        av_free(outData[i]);
    release();
    qDebug() << "extract finish";
}

//释放资源
void AudioPCMExtractor::release()
{
    if (pkt)
        av_packet_free(&pkt);
    if (pSwrContext)
        swr_free(&pSwrContext);
    if (pAVFrame)
        av_frame_free(&pAVFrame);
    if (pAVCodecContext)
        avcodec_close(pAVCodecContext);
    if (pAVFormatContext)
        avformat_free_context(pAVFormatContext);

    pkt = nullptr;
    pSwrContext = nullptr;
    pAVFrame = nullptr;
    pAVCodecContext = nullptr;
    pAVFormatContext = nullptr;
}

音频解码

int Decode(AVCodecContext *pAVCodecContext, AVFrame* pAVFrame, AVPacket *pkt){
    int ret = 0;
    av_frame_unref(pAVFrame);

    while ((ret = avcodec_receive_frame(pAVCodecContext, pAVFrame)) == AVERROR(EAGAIN)){
        ret = avcodec_send_packet(pAVCodecContext, pkt);
        if (ret < 0) {
            qCritical() << "Failed to send packet to decoder." << ret;
            break;
        }
    }

    if(ret < 0 && ret != AVERROR_EOF){
        qDebug() << "Failed to receive packet from decoder." << ret;
    }

    return ret;
}

存储的时候要特别注意这段数据合并的代码，也就是LRLRLRLR的交错存储格式

//多声道数据合并
for (int index = 0; index < out_samples; index++)
 {
     for (int channel = 0; channel < outChannel; channel++)
         file.write((const char *)outData[channel] + numBytes * index, numBytes);
 }

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。