Added backend support for audio buffers (PipeWire)

This commit is contained in:
Digital Artifex
2025-08-09 19:02:10 -04:00
parent 91ac5771c4
commit 22758910c3
6 changed files with 484 additions and 119 deletions

View File

@@ -0,0 +1,16 @@
#include "AudioImageProvider.h"
AudioImageProvider::AudioImageProvider()
: QQuickImageProvider(QQuickImageProvider::Pixmap) {}
QPixmap AudioImageProvider::requestPixmap(const QString &id, QSize *size, const QSize &requestedSize)
{
Q_UNUSED(id) // id is useless here. we always want to return the latest frame from AudioModel
Q_UNUSED(requestedSize) // requested size is useless too. texture must always be 512x2
if(size)
*size = AudioModel::frame().size();
//return the latest frame
return AudioModel::frame();
}

View File

@@ -0,0 +1,17 @@
#ifndef AUDIOIMAGEPROVIDER_H
#define AUDIOIMAGEPROVIDER_H
#include <QObject>
#include <QPixmap>
#include <QQuickImageProvider>
#include "AudioModel.h"
class AudioImageProvider : public QQuickImageProvider
{
public:
explicit AudioImageProvider();
QPixmap requestPixmap(const QString &id, QSize *size, const QSize &requestedSize) override;
};
#endif

View File

@@ -1,72 +1,363 @@
/* PipeWire */
/* SPDX-FileCopyrightText: Copyright © 2022 Wim Taymans */
/* SPDX-License-Identifier: MIT */
/*
[title]
Audio capture using \ref pw_stream "pw_stream".
[title]
*/
#include <stdio.h>
#include <math.h>
#include <fftw3.h>
#include "AudioModel.h" #include "AudioModel.h"
#ifdef AUDIOMODEL_H AudioModel::AudioModel(QObject *parent) : QObject(parent)
AudioModel::AudioModel(QObject *parent)
: QObject(parent), m_deviceString(QString())
{ {
m_impl_data = { nullptr, nullptr, 0, 1, {}, {}};
m_impl_data.samples.reserve(4096);
m_impl_data.smoothed.reserve(2048);
//fill the smoothed data buffer with 0s
for(int i = 0; i < 2048; ++i)
m_impl_data.smoothed.insert(i, 0);
m_thread = new QThread(parent);
moveToThread(m_thread);
connect(m_thread, &QThread::started, this, &AudioModel::startCaptureAsync);
const struct spa_pod *params[1];
uint8_t buffer[1024];
struct pw_properties *props;
struct spa_pod_builder b = SPA_POD_BUILDER_INIT(buffer, sizeof(buffer));
pw_init(nullptr, nullptr);
/* make a main loop. If you already have another main loop, you can add
* the fd of this pipewire mainloop to it. */
m_impl_data.loop = pw_main_loop_new(NULL);
pw_loop_add_signal(pw_main_loop_get_loop(m_impl_data.loop), SIGINT, do_quit, &m_impl_data);
pw_loop_add_signal(pw_main_loop_get_loop(m_impl_data.loop), SIGTERM, do_quit, &m_impl_data);
/* Create a simple stream, the simple stream manages the core and remote
* objects for you if you don't need to deal with them.
*
* If you plan to autoconnect your stream, you need to provide at least
* media, category and role properties.
*
* Pass your events and a user_data pointer as the last arguments. This
* will inform you about the stream state. The most important event
* you need to listen to is the process event where you need to produce
* the data.
*/
props = pw_properties_new(PW_KEY_MEDIA_TYPE, "Audio",
PW_KEY_MEDIA_CATEGORY, "Capture",
PW_KEY_MEDIA_ROLE, "Music",
NULL);
/* uncomment if you want to capture from the sink monitor ports */
pw_properties_set(props, PW_KEY_STREAM_CAPTURE_SINK, "true");
m_impl_data.stream = pw_stream_new_simple(
pw_main_loop_get_loop(m_impl_data.loop),
"audio-capture",
props,
&stream_events,
&m_impl_data);
struct spa_audio_info_raw info = SPA_AUDIO_INFO_RAW_INIT(
.format = SPA_AUDIO_FORMAT_F32,
.rate = 44100,
.channels = 2
);
/* Make one parameter with the supported formats. The SPA_PARAM_EnumFormat
* id means that this is a format enumeration (of 1 value).
* We leave the channels and rate empty to accept the native graph
* rate and channels. */
params[0] = spa_format_audio_raw_build(&b, SPA_PARAM_EnumFormat, &info);
/* Now connect this stream. We ask that our process function is
* called in a realtime thread. */
pw_stream_connect(m_impl_data.stream,
PW_DIRECTION_INPUT,
PW_ID_ANY,
static_cast<pw_stream_flags>(PW_STREAM_FLAG_AUTOCONNECT |
PW_STREAM_FLAG_MAP_BUFFERS |
PW_STREAM_FLAG_RT_PROCESS),
params, 1);
if(!m_instance)
m_instance = this;
} }
AudioModel::~AudioModel() AudioModel::~AudioModel()
{ {
if (m_recorder) stopCapture();
if (m_impl_data.stream)
{ {
m_recorder->stop(); pw_stream_disconnect(m_impl_data.stream);
delete m_recorder; pw_stream_destroy(m_impl_data.stream);
}
if (m_impl_data.loop)
pw_main_loop_destroy(m_impl_data.loop);
if(m_thread)
{
if(m_thread->isRunning())
m_thread->quit();
m_thread->deleteLater();
} }
if (m_audioInput) pw_deinit();
delete m_audioInput;
if (m_captureSession)
delete m_captureSession;
} }
QByteArray AudioModel::frame() const void AudioModel::startCapture()
{ {
// This function should return the current audio frame. if(m_thread->isRunning())
// For now, we return an empty QByteArray.
return QByteArray();
}
QString AudioModel::device() const
{
return m_deviceString;
}
QStringList AudioModel::availableDevices() const
{
QStringList devices;
// Assuming QAudioDeviceInfo is used to get available audio devices
for (const auto &device : QMediaDevices::audioInputs())
{
devices.append(QString::fromLatin1(device.id()));
}
return devices;
}
void AudioModel::setDeviceName(const QString &device)
{
if (m_deviceString == device)
return; return;
m_deviceString = device; m_thread->start(QThread::NormalPriority);
// if (m_audioInput)
// {
// m_audioInput->setDevice(QAudioInput(device));
// getAudioFrame();
// }
} }
void AudioModel::getAudioFrame() void AudioModel::stopCapture()
{ {
// This function should be implemented to retrieve the audio frame m_running = false;
// from the audio input device and emit the frameChanged signal. pw_main_loop_quit(m_impl_data.loop);
// For now, we will just emit the signal to indicate that the frame is ready.
Q_EMIT frameChanged();
} }
#endif void AudioModel::startCaptureAsync()
{
pw_main_loop_run(m_impl_data.loop);
}
QPixmap AudioModel::frame()
{
return m_instance->m_frame;
}
/* Be notified when the stream param changes. We're only looking at the
* format changes.
*/
void AudioModel::on_stream_param_changed(void *_data, uint32_t id, const struct spa_pod *param)
{
struct impl *data = reinterpret_cast<impl*>(_data);
/* NULL means to clear the format */
if (param == NULL || id != SPA_PARAM_Format)
return;
if (spa_format_parse(param, &data->format.media_type, &data->format.media_subtype) < 0)
return;
/* only accept raw audio */
if (data->format.media_type != SPA_MEDIA_TYPE_audio ||
data->format.media_subtype != SPA_MEDIA_SUBTYPE_raw)
return;
/* call a helper function to parse the format for us. */
spa_format_audio_raw_parse(param, &data->format.info.raw);
fprintf(stdout, "capturing rate:%d channels:%d\n", data->format.info.raw.rate, data->format.info.raw.channels);
}
/* our data processing function is in general:
*
* struct pw_buffer *b;
* b = pw_stream_dequeue_buffer(stream);
*
* .. consume stuff in the buffer ...
*
* pw_stream_queue_buffer(stream, b);
*/
void AudioModel::on_process(void *userdata)
{
struct impl *data = reinterpret_cast<impl*>(userdata);
struct pw_buffer *b;
struct spa_buffer *buf;
float *samples, max;
uint32_t c, n, n_channels, n_samples, peak;
if ((b = pw_stream_dequeue_buffer(data->stream)) == NULL) {
pw_log_warn("out of buffers: %m");
return;
}
buf = b->buffer;
if ((samples = reinterpret_cast<float*>(buf->datas[0].data)) == NULL)
return;
n_channels = data->format.info.raw.channels;
n_samples = buf->datas[0].chunk->size / sizeof(float);
// convert channels to mono
for(int index = 0; index < n_samples; index += n_channels)
{
float average = 0;
for(int channel = 0; channel < n_channels; channel++)
average += samples[index + channel];
average /= n_channels;
if(index > 0)
data->samples.push_back(average);
}
/**
* To convert the captured samples to an audio texture we need to:
*
* Take 2048 samples of audio data as an array of floating point data
* 1. Calculate wave data
* 2. Multiply it with Blackman window
* 3. Convert samples into complex numbers (imaginary parts are all zeros)
* 4. Apply the Fourier transform with fftSize = 2048, as a result we get 1024 FFT bins
* 5. Convert complex result into real values using cabs() function
* 6. Divide each value by fftSize
* 7. Apply smoothing by using previously calculated spectrum values
* 8. Convert resulting values to dB: dB = 20 * log10(v)
* 9. Convert floating point dB spectrum into 8-bit values:
* 10. Write 8-bit values into texture
*/
// 1
if(data->samples.length() >= 2048)
{
QVector<qreal> rawSamples = data->samples.mid(0, 2048);
data->samples.remove(0, 2048);
int N = 2048;
auto window = createBlackmanWindow(N);
std::vector<double> windowedSamples(N);
QVector<int> waveData;
for (int i = 0; i < N; ++i) {
waveData.push_back(static_cast<int>(std::clamp(static_cast<int>(128 * rawSamples[i] + 1) * 2, 0, 255)));
windowedSamples[i] = rawSamples[i] * window[i];
}
// Step 2: Convert to complex
std::vector<std::complex<double>> complexSamples(N);
for (int i = 0; i < N; ++i) {
complexSamples[i] = std::complex<double>(windowedSamples[i], 0.0);
}
// Step 3: Apply FFTW3 transformation
fftw_plan plan = fftw_plan_dft_1d(N,
reinterpret_cast<fftw_complex*>(complexSamples.data()),
reinterpret_cast<fftw_complex*>(complexSamples.data()),
FFTW_FORWARD, FFTW_ESTIMATE);
fftw_execute(plan);
fftw_destroy_plan(plan);
// Step 4: Convert back to floats and divide by N
std::vector<float> magnitude(N);
for (int i = 0; i < N; ++i) {
double real = complexSamples[i].real();
double imag = complexSamples[i].imag();
magnitude[i] = static_cast<float>(std::sqrt(real * real + imag * imag) / N);
}
// Step 5: Apply smoothing
auto smoothed = smoothData(magnitude, 3); // Using window size of 3
// Step 6: Convert to decibels
std::vector<float> dbValues(smoothed.size());
const float minDb = -100.0f; // Minimum dB value for clamping
const float reference = 1.0f; // Reference amplitude
for (size_t i = 0; i < smoothed.size(); ++i) {
if (smoothed[i] > 0) {
dbValues[i] = 20.0f * std::log10(smoothed[i] / reference);
} else {
dbValues[i] = minDb;
}
}
// Step 7: Clamp to 8-bit values for red channel
std::vector<uint8_t> redChannel(dbValues.size());
for (size_t i = 0; i < dbValues.size(); ++i) {
// Clamp between -100dB and 0dB, then map to 0-255 range
float clamped = std::max(minDb, std::min(0.0f, dbValues[i]));
redChannel[i] = static_cast<uint8_t>((clamped + 100.0f) * 2.55f);
}
QPixmap audioTexture(512,2);
QPainter painter(&audioTexture);
painter.fillRect(QRect(0,0,512,2), QColor::fromRgb(0,0,0));
//we can only paint the lower half of the spectrum
for(int index = 0; index < 512; ++index)
{
//paint the pixels
painter.setPen(QPen(QColor::fromRgb(redChannel[index], 0, 0), 1));
painter.drawPoint(index, 0);
painter.setPen(QPen(QColor::fromRgb(waveData[index], 0, 0), 1));
painter.drawPoint(index, 1);
}
painter.end();
if(m_mutex.tryLock(1))
{
m_instance->m_frame = audioTexture;
m_mutex.unlock();
}
}
pw_stream_queue_buffer(data->stream, b);
}
// Blackman window function
std::vector<double> AudioModel::createBlackmanWindow(int size) {
std::vector<double> window(size);
const double a0 = 0.42;
const double a1 = 0.5;
const double a2 = 0.08;
for (int i = 0; i < size; ++i) {
window[i] = a0 - a1 * std::cos(2.0 * M_PI * i / (size - 1)) +
a2 * std::cos(4.0 * M_PI * i / (size - 1));
}
return window;
}
// Simple smoothing function using moving average
std::vector<float> AudioModel::smoothData(const std::vector<float>& data, int windowSize) {
std::vector<float> smoothed(data.size());
for (size_t i = 0; i < data.size(); ++i) {
float sum = 0.0f;
int count = 0;
for (int j = -windowSize/2; j <= windowSize/2; ++j) {
int idx = i + j;
if (idx >= 0 && idx < static_cast<int>(data.size())) {
sum += data[idx];
count++;
}
}
smoothed[i] = count > 0 ? sum / count : 0.0f;
}
return smoothed;
}
void AudioModel::do_quit(void *userdata, int signal_number)
{
Q_UNUSED(signal_number)
struct impl *data = reinterpret_cast<impl*>(userdata);
pw_main_loop_quit(data->loop);
}

View File

@@ -3,6 +3,18 @@
* Copyright (C) 2025 @DigitalArtifex | github.com/DigitalArtifex * Copyright (C) 2025 @DigitalArtifex | github.com/DigitalArtifex
* *
* AudioModel.h * AudioModel.h
*
* This is pretty much just a reimplementation of the audiocapture example
* from the PipeWire docs.
*
* NOTICE:
* The spectrum data is currently out of spec according to the documentation
* https://webaudio.github.io/web-audio-api/#smoothing-over-time
*
* The described smoothing method was resulting in inconsistent data. This
* is likely to a poor implementation. A linear smoothing algo seems to work
* (at least visually). Will need to revisit the temporal implementation if
* things do not work as expected.
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -22,85 +34,102 @@
#define AUDIOMODEL_H #define AUDIOMODEL_H
#include "Komplex_global.h" #include "Komplex_global.h"
#include <QObject> #include <QObject>
#include <QString> #include <QString>
#include <QFile> #include <QFile>
#include <QJsonDocument> #include <QJsonDocument>
#include <QJsonObject> #include <QJsonObject>
#include <QJsonArray> #include <QJsonArray>
#include <QJsonParseError> #include <QJsonParseError>
#include <QAudioDevice> #include <QThread>
#include <QMediaDevices> #include <QtEndian>
#include <QAudioInput> #include <QPixmap>
#include <QMediaCaptureSession> #include <QQmlEngine>
#include <QMediaRecorder> #include <QJSValue>
#include <QVector>
#include <QPainter>
#include <QBrush>
#include <QPen>
#include <QThread>
#include <QMutex>
#include <QtConcurrent/QtConcurrent>
#include <QtQml/qqmlregistration.h> #include <QtQml/qqmlregistration.h>
#include <complex>
#include <pipewire/pipewire.h>
#include <spa/param/audio/raw.h>
#include <spa/pod/pod.h>
#include <spa/pod/builder.h>
#include <spa/param/format-types.h>
#include <spa/param/buffers.h>
#include <spa/param/audio/format-utils.h>
class KOMPLEX_EXPORT AudioModel : public QObject class KOMPLEX_EXPORT AudioModel : public QObject
{
Q_OBJECT
QML_SINGLETON
QML_NAMED_ELEMENT(AudioModel)
public:
AudioModel(QObject *parent = nullptr);
~AudioModel();
/**!
* @brief frame
* This function returns the current audio frame as a QPixmap.
* It is expected to be called after the frameChanged signal is emitted, if using from CPP
*
* If it is being used from QML, it will need to be resolved from the AuidoTexture Image Provider (image:/audio/frame#.jpg).
* See AudioImage provider for more details.
*
* @return QPixmap containing the current audio frame.
*/
static QPixmap frame();
// Q_INVOKABLE bool init();
Q_INVOKABLE static void startCapture();
Q_INVOKABLE static void stopCapture();
private Q_SLOTS:
static void startCaptureAsync();
private:
static std::vector<double> createBlackmanWindow(int size);
static std::vector<float> smoothData(const std::vector<float>& data, int windowSize = 5);
struct impl
{ {
Q_OBJECT pw_main_loop *loop;
QML_ELEMENT pw_stream *stream;
public:
explicit AudioModel(QObject *parent = nullptr);
~AudioModel();
/**! spa_audio_info format;
* @brief frame unsigned move:1;
* This function returns the current audio frame as a QString.
* It is expected to be called periodically to update the audio frame for the shader.
*
* @return QString containing the current audio frame.
*/
QByteArray frame() const;
/**! QVector<qreal> samples; // we need at least 2048 samples
* @brief device QVector<qreal> smoothed; // we're supposed to save for smoothing, but I couldn't get this method to work
* This function returns the currently set audio device name. qreal last;
*
* @return QString containing the name of the audio device.
*/
QString device() const;
/**!
* @brief availableDevices
* This function returns a list of available audio devices on the system.
*
* @return QStringList containing the names of available audio devices.
*/
QStringList availableDevices() const;
/**!
* @brief setDeviceName
* This function sets the audio device to be used for capturing audio frames.
*
* @param device The name of the audio device to set.
*/
Q_INVOKABLE void setDeviceName(const QString &device);
/**!
* @brief getAudioFrame
* This function retrieves the current audio frame from the specified audio device.
* It is expected to be called periodically to update the audio frame for the shader.
*
* It is an asynchronous fuction and will emit the frameChanged signal when the audio frame is ready.
*/
Q_INVOKABLE void getAudioFrame();
Q_SIGNALS:
void frameChanged();
private:
QString m_deviceString;
QMediaCaptureSession *m_captureSession = nullptr;
QAudioInput *m_audioInput = nullptr;
QMediaRecorder *m_recorder = nullptr;
Q_PROPERTY(QByteArray frame READ frame NOTIFY frameChanged)
Q_PROPERTY(QString device READ device WRITE setDeviceName NOTIFY frameChanged)
}; };
inline static AudioModel *m_instance = nullptr;
inline static QThread *m_thread = nullptr;
inline static QMutex m_mutex;
QPixmap m_frame;
inline static impl m_impl_data;
inline static bool m_running = false;
static void on_process(void *user_data);
static void do_quit(void *user_data, int signal_number);
static void on_stream_param_changed(void *_data, uint32_t id, const struct spa_pod *param);
inline static const struct pw_stream_events stream_events = {
.version = PW_VERSION_STREAM_EVENTS,
.param_changed = on_stream_param_changed,
.process = on_process,
};
};
Q_DECLARE_METATYPE(AudioModel) Q_DECLARE_METATYPE(AudioModel)

View File

@@ -12,6 +12,8 @@ add_library(
plugin.cpp plugin.cpp
ShaderPackModel.cpp ShaderPackModel.cpp
AudioModel.cpp AudioModel.cpp
AudioImageProvider.cpp
AudioImageProvider.h
) )
qt_add_qml_module( qt_add_qml_module(
@@ -20,7 +22,7 @@ qt_add_qml_module(
${QMLPLUGIN_URI} ${QMLPLUGIN_URI}
VERSION VERSION
1.0 1.0
PLUGIN_TARGET PLUGIN_TARGET
${PROJECT_NAME} ${PROJECT_NAME}
CLASS_NAME CLASS_NAME
KomplexPlugin KomplexPlugin
@@ -28,7 +30,8 @@ qt_add_qml_module(
plugin.cpp plugin.cpp
ShaderPackModel.cpp ShaderPackModel.cpp
AudioModel.cpp AudioModel.cpp
NO_GENERATE_PLUGIN_SOURCE AudioImageProvider.cpp
NO_GENERATE_PLUGIN_SOURCE
) )
target_link_libraries( target_link_libraries(
@@ -43,6 +46,8 @@ target_link_libraries(
KF6::CoreAddons KF6::CoreAddons
KF6::I18n KF6::I18n
KF6::Package KF6::Package
PipeWire::PipeWire
fftw3
) )
target_compile_definitions( target_compile_definitions(

View File

@@ -3,6 +3,7 @@
#include <QQmlExtensionPlugin> #include <QQmlExtensionPlugin>
#include "AudioModel.h" #include "AudioModel.h"
#include "AudioImageProvider.h"
#include "ShaderPackModel.h" #include "ShaderPackModel.h"
#include "Komplex_global.h" #include "Komplex_global.h"
@@ -18,6 +19,12 @@ public:
qmlRegisterType<AudioModel>(uri, 1, 0, "AudioModel"); qmlRegisterType<AudioModel>(uri, 1, 0, "AudioModel");
qmlRegisterType<ShaderPackModel>(uri, 1, 0, "ShaderPackModel"); qmlRegisterType<ShaderPackModel>(uri, 1, 0, "ShaderPackModel");
} }
void initializeEngine(QQmlEngine *engine, const char *uri) override
{
Q_ASSERT(QLatin1String(uri) == QLatin1String("com.github.digitalartifex.komplex"));
engine->addImageProvider(QString::fromLatin1("audiotexture"), new AudioImageProvider);
}
}; };
#include "plugin.moc" #include "plugin.moc"