Added backend support for audio buffers (PipeWire)

2025-08-09 19:02:10 -04:00
parent 91ac5771c4
commit 22758910c3
6 changed files with 484 additions and 119 deletions
@@ -0,0 +1,16 @@
 #include "AudioImageProvider.h"
 AudioImageProvider::AudioImageProvider() 
    : QQuickImageProvider(QQuickImageProvider::Pixmap) {}
 QPixmap AudioImageProvider::requestPixmap(const QString &id, QSize *size, const QSize &requestedSize)
 {
    Q_UNUSED(id) // id is useless here. we always want to return the latest frame from AudioModel
    Q_UNUSED(requestedSize) // requested size is useless too. texture must always be 512x2
    if(size)
        *size = AudioModel::frame().size();
    //return the latest frame
    return AudioModel::frame();
 }
@@ -0,0 +1,17 @@
 #ifndef AUDIOIMAGEPROVIDER_H
 #define AUDIOIMAGEPROVIDER_H
 #include <QObject>
 #include <QPixmap>
 #include <QQuickImageProvider>
 #include "AudioModel.h"
 class AudioImageProvider : public QQuickImageProvider
 {
    public:
        explicit AudioImageProvider();
        QPixmap requestPixmap(const QString &id, QSize *size, const QSize &requestedSize) override;
 };
 #endif
@@ -1,72 +1,363 @@
 /* PipeWire */
 /* SPDX-FileCopyrightText: Copyright © 2022 Wim Taymans */
 /* SPDX-License-Identifier: MIT */
 /*
 [title]
 Audio capture using \ref pw_stream "pw_stream".
 [title]
 */
 #include <stdio.h>
 #include <math.h>
 #include <fftw3.h>
 #include "AudioModel.h"
-#ifdef AUDIOMODEL_H
+AudioModel::AudioModel(QObject *parent) : QObject(parent)
 AudioModel::AudioModel(QObject *parent)
    : QObject(parent), m_deviceString(QString())
 {
    m_impl_data = { nullptr, nullptr, 0, 1, {}, {}};
    m_impl_data.samples.reserve(4096);
    m_impl_data.smoothed.reserve(2048);
    //fill the smoothed data buffer with 0s
    for(int i = 0; i < 2048; ++i)
        m_impl_data.smoothed.insert(i, 0);
    m_thread = new QThread(parent);
    moveToThread(m_thread);
    connect(m_thread, &QThread::started, this, &AudioModel::startCaptureAsync);
    const struct spa_pod *params[1];
    uint8_t buffer[1024];
    struct pw_properties *props;
    struct spa_pod_builder b = SPA_POD_BUILDER_INIT(buffer, sizeof(buffer));
    pw_init(nullptr, nullptr);
    /* make a main loop. If you already have another main loop, you can add
         * the fd of this pipewire mainloop to it. */
    m_impl_data.loop = pw_main_loop_new(NULL);
    pw_loop_add_signal(pw_main_loop_get_loop(m_impl_data.loop), SIGINT, do_quit, &m_impl_data);
    pw_loop_add_signal(pw_main_loop_get_loop(m_impl_data.loop), SIGTERM, do_quit, &m_impl_data);
    /* Create a simple stream, the simple stream manages the core and remote
         * objects for you if you don't need to deal with them.
         *
         * If you plan to autoconnect your stream, you need to provide at least
         * media, category and role properties.
         *
         * Pass your events and a user_data pointer as the last arguments. This
         * will inform you about the stream state. The most important event
         * you need to listen to is the process event where you need to produce
         * the data.
         */
    props = pw_properties_new(PW_KEY_MEDIA_TYPE, "Audio",
                              PW_KEY_MEDIA_CATEGORY, "Capture",
                              PW_KEY_MEDIA_ROLE, "Music",
                              NULL);
    /* uncomment if you want to capture from the sink monitor ports */
    pw_properties_set(props, PW_KEY_STREAM_CAPTURE_SINK, "true");
    m_impl_data.stream = pw_stream_new_simple(
        pw_main_loop_get_loop(m_impl_data.loop),
        "audio-capture",
        props,
        &stream_events,
        &m_impl_data);
    struct spa_audio_info_raw info = SPA_AUDIO_INFO_RAW_INIT(
                                        .format = SPA_AUDIO_FORMAT_F32,
                                        .rate = 44100,
                                        .channels = 2
                                    );
    /* Make one parameter with the supported formats. The SPA_PARAM_EnumFormat
         * id means that this is a format enumeration (of 1 value).
         * We leave the channels and rate empty to accept the native graph
         * rate and channels. */
    params[0] = spa_format_audio_raw_build(&b, SPA_PARAM_EnumFormat, &info);
    /* Now connect this stream. We ask that our process function is
         * called in a realtime thread. */
    pw_stream_connect(m_impl_data.stream,
                      PW_DIRECTION_INPUT,
                      PW_ID_ANY,
                      static_cast<pw_stream_flags>(PW_STREAM_FLAG_AUTOCONNECT |
                          PW_STREAM_FLAG_MAP_BUFFERS |
                          PW_STREAM_FLAG_RT_PROCESS),
                      params, 1);
    if(!m_instance)
        m_instance = this;
 }
 AudioModel::~AudioModel()
 {
-    if (m_recorder) 
+    stopCapture();
    if (m_impl_data.stream)
    {
-        m_recorder->stop();
+        pw_stream_disconnect(m_impl_data.stream);
-        delete m_recorder;
+        pw_stream_destroy(m_impl_data.stream);
    }
    if (m_impl_data.loop)
        pw_main_loop_destroy(m_impl_data.loop);
    if(m_thread)
    {
        if(m_thread->isRunning())
            m_thread->quit();
        m_thread->deleteLater();
    }
-    if (m_audioInput) 
+    pw_deinit();
        delete m_audioInput;
    if (m_captureSession)
        delete m_captureSession;
 }
-QByteArray AudioModel::frame() const
+void AudioModel::startCapture()
 {
-    // This function should return the current audio frame.
+    if(m_thread->isRunning())
    // For now, we return an empty QByteArray.
    return QByteArray();
 }
 QString AudioModel::device() const
 {
    return m_deviceString;
 }
 QStringList AudioModel::availableDevices() const
 {
    QStringList devices;
    // Assuming QAudioDeviceInfo is used to get available audio devices
    for (const auto &device : QMediaDevices::audioInputs()) 
    {
        devices.append(QString::fromLatin1(device.id()));
    }
    return devices;
 }
 void AudioModel::setDeviceName(const QString &device)
 {
    if (m_deviceString == device)
        return;
-    m_deviceString = device;
+    m_thread->start(QThread::NormalPriority);
    // if (m_audioInput) 
    // {
    //     m_audioInput->setDevice(QAudioInput(device));
    //     getAudioFrame();
    // }
 }
-void AudioModel::getAudioFrame()
+void AudioModel::stopCapture()
 {
-    // This function should be implemented to retrieve the audio frame
+    m_running = false;
-    // from the audio input device and emit the frameChanged signal.
+    pw_main_loop_quit(m_impl_data.loop);
    // For now, we will just emit the signal to indicate that the frame is ready.
    Q_EMIT frameChanged();
 }
-#endif
+void AudioModel::startCaptureAsync()
 {
    pw_main_loop_run(m_impl_data.loop);
 }
 QPixmap AudioModel::frame()
 {
    return m_instance->m_frame;
 }
 /* Be notified when the stream param changes. We're only looking at the
 * format changes.
 */
 void AudioModel::on_stream_param_changed(void *_data, uint32_t id, const struct spa_pod *param)
 {
    struct impl *data = reinterpret_cast<impl*>(_data);
    /* NULL means to clear the format */
    if (param == NULL || id != SPA_PARAM_Format)
        return;
    if (spa_format_parse(param, &data->format.media_type, &data->format.media_subtype) < 0)
        return;
    /* only accept raw audio */
    if (data->format.media_type != SPA_MEDIA_TYPE_audio ||
        data->format.media_subtype != SPA_MEDIA_SUBTYPE_raw)
        return;
    /* call a helper function to parse the format for us. */
    spa_format_audio_raw_parse(param, &data->format.info.raw);
    fprintf(stdout, "capturing rate:%d channels:%d\n", data->format.info.raw.rate, data->format.info.raw.channels);
 }
 /* our data processing function is in general:
 *
 *  struct pw_buffer *b;
 *  b = pw_stream_dequeue_buffer(stream);
 *
 *  .. consume stuff in the buffer ...
 *
 *  pw_stream_queue_buffer(stream, b);
 */
 void AudioModel::on_process(void *userdata)
 {
    struct impl *data = reinterpret_cast<impl*>(userdata);
    struct pw_buffer *b;
    struct spa_buffer *buf;
    float *samples, max;
    uint32_t c, n, n_channels, n_samples, peak;
    if ((b = pw_stream_dequeue_buffer(data->stream)) == NULL) {
        pw_log_warn("out of buffers: %m");
        return;
    }
    buf = b->buffer;
    if ((samples = reinterpret_cast<float*>(buf->datas[0].data)) == NULL)
        return;
    n_channels = data->format.info.raw.channels;
    n_samples = buf->datas[0].chunk->size / sizeof(float);
    // convert channels to mono
    for(int index = 0; index < n_samples; index += n_channels)
    {
        float average = 0;
        for(int channel = 0; channel < n_channels; channel++)
            average += samples[index + channel];
        average /= n_channels;
        if(index > 0)
            data->samples.push_back(average);
    }
    /**
     * To convert the captured samples to an audio texture we need to:
     *
     * Take 2048 samples of audio data as an array of floating point data
     * 1. Calculate wave data
     * 2. Multiply it with Blackman window
     * 3. Convert samples into complex numbers (imaginary parts are all zeros)
     * 4. Apply the Fourier transform with fftSize = 2048, as a result we get 1024 FFT bins
     * 5. Convert complex result into real values using cabs() function
     * 6. Divide each value by fftSize
     * 7. Apply smoothing by using previously calculated spectrum values
     * 8. Convert resulting values to dB: dB = 20 * log10(v)
     * 9. Convert floating point dB spectrum into 8-bit values:
     * 10. Write 8-bit values into texture
     */
    // 1
    if(data->samples.length() >= 2048)
    {
        QVector<qreal> rawSamples = data->samples.mid(0, 2048);
        data->samples.remove(0, 2048);
        int N = 2048;
        auto window = createBlackmanWindow(N);
        std::vector<double> windowedSamples(N);
        QVector<int> waveData;
        for (int i = 0; i < N; ++i) {
            waveData.push_back(static_cast<int>(std::clamp(static_cast<int>(128 * rawSamples[i] + 1) * 2, 0, 255)));
            windowedSamples[i] = rawSamples[i] * window[i];
        }
        // Step 2: Convert to complex
        std::vector<std::complex<double>> complexSamples(N);
        for (int i = 0; i < N; ++i) {
            complexSamples[i] = std::complex<double>(windowedSamples[i], 0.0);
        }
        // Step 3: Apply FFTW3 transformation
        fftw_plan plan = fftw_plan_dft_1d(N,
                                          reinterpret_cast<fftw_complex*>(complexSamples.data()),
                                          reinterpret_cast<fftw_complex*>(complexSamples.data()),
                                          FFTW_FORWARD, FFTW_ESTIMATE);
        fftw_execute(plan);
        fftw_destroy_plan(plan);
        // Step 4: Convert back to floats and divide by N
        std::vector<float> magnitude(N);
        for (int i = 0; i < N; ++i) {
            double real = complexSamples[i].real();
            double imag = complexSamples[i].imag();
            magnitude[i] = static_cast<float>(std::sqrt(real * real + imag * imag) / N);
        }
        // Step 5: Apply smoothing
        auto smoothed = smoothData(magnitude, 3); // Using window size of 3
        // Step 6: Convert to decibels
        std::vector<float> dbValues(smoothed.size());
        const float minDb = -100.0f; // Minimum dB value for clamping
        const float reference = 1.0f; // Reference amplitude
        for (size_t i = 0; i < smoothed.size(); ++i) {
            if (smoothed[i] > 0) {
                dbValues[i] = 20.0f * std::log10(smoothed[i] / reference);
            } else {
                dbValues[i] = minDb;
            }
        }
        // Step 7: Clamp to 8-bit values for red channel
        std::vector<uint8_t> redChannel(dbValues.size());
        for (size_t i = 0; i < dbValues.size(); ++i) {
            // Clamp between -100dB and 0dB, then map to 0-255 range
            float clamped = std::max(minDb, std::min(0.0f, dbValues[i]));
            redChannel[i] = static_cast<uint8_t>((clamped + 100.0f) * 2.55f);
        }
        QPixmap audioTexture(512,2);
        QPainter painter(&audioTexture);
        painter.fillRect(QRect(0,0,512,2), QColor::fromRgb(0,0,0));
        //we can only paint the lower half of the spectrum
        for(int index = 0; index < 512; ++index)
        {
            //paint the pixels
            painter.setPen(QPen(QColor::fromRgb(redChannel[index], 0, 0), 1));
            painter.drawPoint(index, 0);
            painter.setPen(QPen(QColor::fromRgb(waveData[index], 0, 0), 1));
            painter.drawPoint(index, 1);
        }
        painter.end();
        if(m_mutex.tryLock(1))
        {
            m_instance->m_frame = audioTexture;
            m_mutex.unlock();
        }
    }
    pw_stream_queue_buffer(data->stream, b);
 }
 // Blackman window function
 std::vector<double> AudioModel::createBlackmanWindow(int size) {
    std::vector<double> window(size);
    const double a0 = 0.42;
    const double a1 = 0.5;
    const double a2 = 0.08;
    for (int i = 0; i < size; ++i) {
        window[i] = a0 - a1 * std::cos(2.0 * M_PI * i / (size - 1)) +
                    a2 * std::cos(4.0 * M_PI * i / (size - 1));
    }
    return window;
 }
 // Simple smoothing function using moving average
 std::vector<float> AudioModel::smoothData(const std::vector<float>& data, int windowSize) {
    std::vector<float> smoothed(data.size());
    for (size_t i = 0; i < data.size(); ++i) {
        float sum = 0.0f;
        int count = 0;
        for (int j = -windowSize/2; j <= windowSize/2; ++j) {
            int idx = i + j;
            if (idx >= 0 && idx < static_cast<int>(data.size())) {
                sum += data[idx];
                count++;
            }
        }
        smoothed[i] = count > 0 ? sum / count : 0.0f;
    }
    return smoothed;
 }
 void AudioModel::do_quit(void *userdata, int signal_number)
 {
    Q_UNUSED(signal_number)
    struct impl *data = reinterpret_cast<impl*>(userdata);
    pw_main_loop_quit(data->loop);
 }
@@ -3,6 +3,18 @@
 *  Copyright (C) 2025 @DigitalArtifex | github.com/DigitalArtifex
 *
 *  AudioModel.h
 * 
 *  This is pretty much just a reimplementation of the audiocapture example
 *  from the PipeWire docs.
 * 
 *  NOTICE:
 *  The spectrum data is currently out of spec according to the documentation
 *  https://webaudio.github.io/web-audio-api/#smoothing-over-time
 * 
 *  The described smoothing method was resulting in inconsistent data. This
 *  is likely to a poor implementation. A linear smoothing algo seems to work
 *  (at least visually). Will need to revisit the temporal implementation if 
 *  things do not work as expected.
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@@ -22,85 +34,102 @@
 #define AUDIOMODEL_H
 #include "Komplex_global.h"
-#include <QObject>
+#include <QObject> 
 #include <QString>
 #include <QFile>
 #include <QJsonDocument>
 #include <QJsonObject>
 #include <QJsonArray>
 #include <QJsonParseError>
-#include <QAudioDevice>
+#include <QThread>
-#include <QMediaDevices>
+#include <QtEndian>
-#include <QAudioInput>
+#include <QPixmap>
-#include <QMediaCaptureSession>
+#include <QQmlEngine>
-#include <QMediaRecorder>
+#include <QJSValue>
 #include <QVector>
 #include <QPainter>
 #include <QBrush>
 #include <QPen>
 #include <QThread>
 #include <QMutex>
 #include <QtConcurrent/QtConcurrent>
 #include <QtQml/qqmlregistration.h>
 #include <complex>
 #include <pipewire/pipewire.h>
 #include <spa/param/audio/raw.h>
 #include <spa/pod/pod.h>
 #include <spa/pod/builder.h>
 #include <spa/param/format-types.h>
 #include <spa/param/buffers.h>
 #include <spa/param/audio/format-utils.h>
-    class KOMPLEX_EXPORT AudioModel : public QObject
+class KOMPLEX_EXPORT AudioModel : public QObject
 {
    Q_OBJECT
    QML_SINGLETON
    QML_NAMED_ELEMENT(AudioModel)
 public:
    AudioModel(QObject *parent = nullptr);
    ~AudioModel();
    /**!
     * @brief frame
     * This function returns the current audio frame as a QPixmap.
     * It is expected to be called after the frameChanged signal is emitted, if using from CPP
     *
     * If it is being used from QML, it will need to be resolved from the AuidoTexture Image Provider (image:/audio/frame#.jpg).
     * See AudioImage provider for more details.
     * 
     * @return QPixmap containing the current audio frame.
     */
    static QPixmap frame();
    // Q_INVOKABLE bool init();
    Q_INVOKABLE static void startCapture();
    Q_INVOKABLE static void stopCapture();
 private Q_SLOTS:
    static void startCaptureAsync();
 private:
    static std::vector<double> createBlackmanWindow(int size);
    static std::vector<float> smoothData(const std::vector<float>& data, int windowSize = 5);
    struct impl
    {
-        Q_OBJECT
+        pw_main_loop *loop;
-        QML_ELEMENT
+        pw_stream *stream;
    public:
        explicit AudioModel(QObject *parent = nullptr);
        ~AudioModel();
-        /**!
+        spa_audio_info format;
-         * @brief frame
+        unsigned move:1;
         * This function returns the current audio frame as a QString.
         * It is expected to be called periodically to update the audio frame for the shader.
         * 
         * @return QString containing the current audio frame.
         */
        QByteArray frame() const;
-        /**!
+        QVector<qreal> samples; // we need at least 2048 samples
-         * @brief device
+        QVector<qreal> smoothed; // we're supposed to save for smoothing, but I couldn't get this method to work
-         * This function returns the currently set audio device name.
+        qreal last;
         * 
         * @return QString containing the name of the audio device.
         */
        QString device() const;
        /**!
         * @brief availableDevices
         * This function returns a list of available audio devices on the system.
         * 
         * @return QStringList containing the names of available audio devices.
         */
        QStringList availableDevices() const;
        /**!
         * @brief setDeviceName
         * This function sets the audio device to be used for capturing audio frames.
         * 
         * @param device The name of the audio device to set.
         */
        Q_INVOKABLE void setDeviceName(const QString &device);
        /**!
         * @brief getAudioFrame
         * This function retrieves the current audio frame from the specified audio device.
         * It is expected to be called periodically to update the audio frame for the shader.
         * 
         * It is an asynchronous fuction and will emit the frameChanged signal when the audio frame is ready.
         */
        Q_INVOKABLE void getAudioFrame();
    Q_SIGNALS:
        void frameChanged();
    private:
        QString m_deviceString;
        QMediaCaptureSession *m_captureSession = nullptr;
        QAudioInput *m_audioInput = nullptr;
        QMediaRecorder *m_recorder = nullptr;
        Q_PROPERTY(QByteArray frame READ frame NOTIFY frameChanged)
        Q_PROPERTY(QString device READ device WRITE setDeviceName NOTIFY frameChanged)
    };
    inline static AudioModel *m_instance = nullptr;
    inline static QThread *m_thread = nullptr;
    inline static QMutex m_mutex;
    QPixmap m_frame;
    inline static impl m_impl_data;
    inline static bool m_running = false;
    static void on_process(void *user_data);
    static void do_quit(void *user_data, int signal_number);
    static void on_stream_param_changed(void *_data, uint32_t id, const struct spa_pod *param);
    inline static const struct pw_stream_events stream_events = {
        .version = PW_VERSION_STREAM_EVENTS,
        .param_changed = on_stream_param_changed,
        .process = on_process,
    };
 };
 Q_DECLARE_METATYPE(AudioModel)
@@ -12,6 +12,8 @@ add_library(
        plugin.cpp
        ShaderPackModel.cpp
        AudioModel.cpp
        AudioImageProvider.cpp
        AudioImageProvider.h
 ) 
 qt_add_qml_module(
@@ -20,7 +22,7 @@ qt_add_qml_module(
        ${QMLPLUGIN_URI}
    VERSION
        1.0
-    PLUGIN_TARGET  
+    PLUGIN_TARGET
        ${PROJECT_NAME}
    CLASS_NAME
        KomplexPlugin
@@ -28,7 +30,8 @@ qt_add_qml_module(
        plugin.cpp
        ShaderPackModel.cpp
        AudioModel.cpp
-    NO_GENERATE_PLUGIN_SOURCE 
+        AudioImageProvider.cpp
    NO_GENERATE_PLUGIN_SOURCE
 )
 target_link_libraries(
@@ -43,6 +46,8 @@ target_link_libraries(
        KF6::CoreAddons
        KF6::I18n
        KF6::Package
        PipeWire::PipeWire
        fftw3
 )
 target_compile_definitions(
@@ -3,6 +3,7 @@
 #include <QQmlExtensionPlugin>
 #include "AudioModel.h"
 #include "AudioImageProvider.h"
 #include "ShaderPackModel.h"
 #include "Komplex_global.h"
@@ -18,6 +19,12 @@ public:
        qmlRegisterType<AudioModel>(uri, 1, 0, "AudioModel");
        qmlRegisterType<ShaderPackModel>(uri, 1, 0, "ShaderPackModel");
    }
    void initializeEngine(QQmlEngine *engine, const char *uri) override
    {
        Q_ASSERT(QLatin1String(uri) == QLatin1String("com.github.digitalartifex.komplex"));
        engine->addImageProvider(QString::fromLatin1("audiotexture"), new AudioImageProvider);
    }
 };
 #include "plugin.moc"