diff --git a/plugin/AudioImageProvider.cpp b/plugin/AudioImageProvider.cpp new file mode 100644 index 0000000..3910ac2 --- /dev/null +++ b/plugin/AudioImageProvider.cpp @@ -0,0 +1,16 @@ +#include "AudioImageProvider.h" + +AudioImageProvider::AudioImageProvider() + : QQuickImageProvider(QQuickImageProvider::Pixmap) {} + +QPixmap AudioImageProvider::requestPixmap(const QString &id, QSize *size, const QSize &requestedSize) +{ + Q_UNUSED(id) // id is useless here. we always want to return the latest frame from AudioModel + Q_UNUSED(requestedSize) // requested size is useless too. texture must always be 512x2 + + if(size) + *size = AudioModel::frame().size(); + + //return the latest frame + return AudioModel::frame(); +} \ No newline at end of file diff --git a/plugin/AudioImageProvider.h b/plugin/AudioImageProvider.h new file mode 100644 index 0000000..2ce4322 --- /dev/null +++ b/plugin/AudioImageProvider.h @@ -0,0 +1,17 @@ +#ifndef AUDIOIMAGEPROVIDER_H +#define AUDIOIMAGEPROVIDER_H +#include +#include +#include + +#include "AudioModel.h" + +class AudioImageProvider : public QQuickImageProvider +{ + public: + explicit AudioImageProvider(); + + QPixmap requestPixmap(const QString &id, QSize *size, const QSize &requestedSize) override; +}; + +#endif \ No newline at end of file diff --git a/plugin/AudioModel.cpp b/plugin/AudioModel.cpp index 83b5f60..3a3af8a 100644 --- a/plugin/AudioModel.cpp +++ b/plugin/AudioModel.cpp @@ -1,72 +1,363 @@ +/* PipeWire */ +/* SPDX-FileCopyrightText: Copyright © 2022 Wim Taymans */ +/* SPDX-License-Identifier: MIT */ + +/* + [title] + Audio capture using \ref pw_stream "pw_stream". + [title] + */ + +#include +#include +#include + #include "AudioModel.h" -#ifdef AUDIOMODEL_H - -AudioModel::AudioModel(QObject *parent) - : QObject(parent), m_deviceString(QString()) +AudioModel::AudioModel(QObject *parent) : QObject(parent) { + m_impl_data = { nullptr, nullptr, 0, 1, {}, {}}; + m_impl_data.samples.reserve(4096); + m_impl_data.smoothed.reserve(2048); + + //fill the smoothed data buffer with 0s + for(int i = 0; i < 2048; ++i) + m_impl_data.smoothed.insert(i, 0); + + m_thread = new QThread(parent); + + moveToThread(m_thread); + connect(m_thread, &QThread::started, this, &AudioModel::startCaptureAsync); + + const struct spa_pod *params[1]; + uint8_t buffer[1024]; + struct pw_properties *props; + struct spa_pod_builder b = SPA_POD_BUILDER_INIT(buffer, sizeof(buffer)); + + pw_init(nullptr, nullptr); + + /* make a main loop. If you already have another main loop, you can add + * the fd of this pipewire mainloop to it. */ + m_impl_data.loop = pw_main_loop_new(NULL); + + pw_loop_add_signal(pw_main_loop_get_loop(m_impl_data.loop), SIGINT, do_quit, &m_impl_data); + pw_loop_add_signal(pw_main_loop_get_loop(m_impl_data.loop), SIGTERM, do_quit, &m_impl_data); + + /* Create a simple stream, the simple stream manages the core and remote + * objects for you if you don't need to deal with them. + * + * If you plan to autoconnect your stream, you need to provide at least + * media, category and role properties. + * + * Pass your events and a user_data pointer as the last arguments. This + * will inform you about the stream state. The most important event + * you need to listen to is the process event where you need to produce + * the data. + */ + props = pw_properties_new(PW_KEY_MEDIA_TYPE, "Audio", + PW_KEY_MEDIA_CATEGORY, "Capture", + PW_KEY_MEDIA_ROLE, "Music", + NULL); + + /* uncomment if you want to capture from the sink monitor ports */ + pw_properties_set(props, PW_KEY_STREAM_CAPTURE_SINK, "true"); + + m_impl_data.stream = pw_stream_new_simple( + pw_main_loop_get_loop(m_impl_data.loop), + "audio-capture", + props, + &stream_events, + &m_impl_data); + + struct spa_audio_info_raw info = SPA_AUDIO_INFO_RAW_INIT( + .format = SPA_AUDIO_FORMAT_F32, + .rate = 44100, + .channels = 2 + ); + + /* Make one parameter with the supported formats. The SPA_PARAM_EnumFormat + * id means that this is a format enumeration (of 1 value). + * We leave the channels and rate empty to accept the native graph + * rate and channels. */ + params[0] = spa_format_audio_raw_build(&b, SPA_PARAM_EnumFormat, &info); + + /* Now connect this stream. We ask that our process function is + * called in a realtime thread. */ + pw_stream_connect(m_impl_data.stream, + PW_DIRECTION_INPUT, + PW_ID_ANY, + static_cast(PW_STREAM_FLAG_AUTOCONNECT | + PW_STREAM_FLAG_MAP_BUFFERS | + PW_STREAM_FLAG_RT_PROCESS), + params, 1); + + if(!m_instance) + m_instance = this; } AudioModel::~AudioModel() { - if (m_recorder) + stopCapture(); + + if (m_impl_data.stream) { - m_recorder->stop(); - delete m_recorder; + pw_stream_disconnect(m_impl_data.stream); + pw_stream_destroy(m_impl_data.stream); + } + if (m_impl_data.loop) + pw_main_loop_destroy(m_impl_data.loop); + + if(m_thread) + { + if(m_thread->isRunning()) + m_thread->quit(); + + m_thread->deleteLater(); } - if (m_audioInput) - delete m_audioInput; - - if (m_captureSession) - delete m_captureSession; + pw_deinit(); } -QByteArray AudioModel::frame() const +void AudioModel::startCapture() { - // This function should return the current audio frame. - // For now, we return an empty QByteArray. - return QByteArray(); -} - -QString AudioModel::device() const -{ - return m_deviceString; -} - -QStringList AudioModel::availableDevices() const -{ - QStringList devices; - - // Assuming QAudioDeviceInfo is used to get available audio devices - for (const auto &device : QMediaDevices::audioInputs()) - { - devices.append(QString::fromLatin1(device.id())); - } - - return devices; -} - -void AudioModel::setDeviceName(const QString &device) -{ - if (m_deviceString == device) + if(m_thread->isRunning()) return; - m_deviceString = device; - - // if (m_audioInput) - // { - // m_audioInput->setDevice(QAudioInput(device)); - // getAudioFrame(); - // } + m_thread->start(QThread::NormalPriority); } -void AudioModel::getAudioFrame() +void AudioModel::stopCapture() { - // This function should be implemented to retrieve the audio frame - // from the audio input device and emit the frameChanged signal. - // For now, we will just emit the signal to indicate that the frame is ready. - Q_EMIT frameChanged(); + m_running = false; + pw_main_loop_quit(m_impl_data.loop); } -#endif \ No newline at end of file +void AudioModel::startCaptureAsync() +{ + pw_main_loop_run(m_impl_data.loop); +} + +QPixmap AudioModel::frame() +{ + return m_instance->m_frame; +} + +/* Be notified when the stream param changes. We're only looking at the + * format changes. + */ +void AudioModel::on_stream_param_changed(void *_data, uint32_t id, const struct spa_pod *param) +{ + struct impl *data = reinterpret_cast(_data); + + /* NULL means to clear the format */ + if (param == NULL || id != SPA_PARAM_Format) + return; + + if (spa_format_parse(param, &data->format.media_type, &data->format.media_subtype) < 0) + return; + + /* only accept raw audio */ + if (data->format.media_type != SPA_MEDIA_TYPE_audio || + data->format.media_subtype != SPA_MEDIA_SUBTYPE_raw) + return; + + /* call a helper function to parse the format for us. */ + spa_format_audio_raw_parse(param, &data->format.info.raw); + + fprintf(stdout, "capturing rate:%d channels:%d\n", data->format.info.raw.rate, data->format.info.raw.channels); +} + +/* our data processing function is in general: + * + * struct pw_buffer *b; + * b = pw_stream_dequeue_buffer(stream); + * + * .. consume stuff in the buffer ... + * + * pw_stream_queue_buffer(stream, b); + */ +void AudioModel::on_process(void *userdata) +{ + struct impl *data = reinterpret_cast(userdata); + struct pw_buffer *b; + struct spa_buffer *buf; + + float *samples, max; + uint32_t c, n, n_channels, n_samples, peak; + + if ((b = pw_stream_dequeue_buffer(data->stream)) == NULL) { + pw_log_warn("out of buffers: %m"); + return; + } + + buf = b->buffer; + if ((samples = reinterpret_cast(buf->datas[0].data)) == NULL) + return; + + n_channels = data->format.info.raw.channels; + n_samples = buf->datas[0].chunk->size / sizeof(float); + + // convert channels to mono + for(int index = 0; index < n_samples; index += n_channels) + { + float average = 0; + + for(int channel = 0; channel < n_channels; channel++) + average += samples[index + channel]; + + average /= n_channels; + + if(index > 0) + data->samples.push_back(average); + } + + /** + * To convert the captured samples to an audio texture we need to: + * + * Take 2048 samples of audio data as an array of floating point data + * 1. Calculate wave data + * 2. Multiply it with Blackman window + * 3. Convert samples into complex numbers (imaginary parts are all zeros) + * 4. Apply the Fourier transform with fftSize = 2048, as a result we get 1024 FFT bins + * 5. Convert complex result into real values using cabs() function + * 6. Divide each value by fftSize + * 7. Apply smoothing by using previously calculated spectrum values + * 8. Convert resulting values to dB: dB = 20 * log10(v) + * 9. Convert floating point dB spectrum into 8-bit values: + * 10. Write 8-bit values into texture + */ + + // 1 + if(data->samples.length() >= 2048) + { + QVector rawSamples = data->samples.mid(0, 2048); + data->samples.remove(0, 2048); + + int N = 2048; + auto window = createBlackmanWindow(N); + std::vector windowedSamples(N); + + QVector waveData; + + for (int i = 0; i < N; ++i) { + waveData.push_back(static_cast(std::clamp(static_cast(128 * rawSamples[i] + 1) * 2, 0, 255))); + windowedSamples[i] = rawSamples[i] * window[i]; + } + + // Step 2: Convert to complex + std::vector> complexSamples(N); + for (int i = 0; i < N; ++i) { + complexSamples[i] = std::complex(windowedSamples[i], 0.0); + } + + // Step 3: Apply FFTW3 transformation + fftw_plan plan = fftw_plan_dft_1d(N, + reinterpret_cast(complexSamples.data()), + reinterpret_cast(complexSamples.data()), + FFTW_FORWARD, FFTW_ESTIMATE); + + fftw_execute(plan); + fftw_destroy_plan(plan); + + // Step 4: Convert back to floats and divide by N + std::vector magnitude(N); + for (int i = 0; i < N; ++i) { + double real = complexSamples[i].real(); + double imag = complexSamples[i].imag(); + magnitude[i] = static_cast(std::sqrt(real * real + imag * imag) / N); + } + + // Step 5: Apply smoothing + auto smoothed = smoothData(magnitude, 3); // Using window size of 3 + + // Step 6: Convert to decibels + std::vector dbValues(smoothed.size()); + const float minDb = -100.0f; // Minimum dB value for clamping + const float reference = 1.0f; // Reference amplitude + + for (size_t i = 0; i < smoothed.size(); ++i) { + if (smoothed[i] > 0) { + dbValues[i] = 20.0f * std::log10(smoothed[i] / reference); + } else { + dbValues[i] = minDb; + } + } + + // Step 7: Clamp to 8-bit values for red channel + std::vector redChannel(dbValues.size()); + + for (size_t i = 0; i < dbValues.size(); ++i) { + // Clamp between -100dB and 0dB, then map to 0-255 range + float clamped = std::max(minDb, std::min(0.0f, dbValues[i])); + redChannel[i] = static_cast((clamped + 100.0f) * 2.55f); + } + + QPixmap audioTexture(512,2); + QPainter painter(&audioTexture); + painter.fillRect(QRect(0,0,512,2), QColor::fromRgb(0,0,0)); + + //we can only paint the lower half of the spectrum + for(int index = 0; index < 512; ++index) + { + //paint the pixels + painter.setPen(QPen(QColor::fromRgb(redChannel[index], 0, 0), 1)); + painter.drawPoint(index, 0); + painter.setPen(QPen(QColor::fromRgb(waveData[index], 0, 0), 1)); + painter.drawPoint(index, 1); + } + + painter.end(); + + if(m_mutex.tryLock(1)) + { + m_instance->m_frame = audioTexture; + m_mutex.unlock(); + } + } + + pw_stream_queue_buffer(data->stream, b); +} + +// Blackman window function +std::vector AudioModel::createBlackmanWindow(int size) { + std::vector window(size); + const double a0 = 0.42; + const double a1 = 0.5; + const double a2 = 0.08; + + for (int i = 0; i < size; ++i) { + window[i] = a0 - a1 * std::cos(2.0 * M_PI * i / (size - 1)) + + a2 * std::cos(4.0 * M_PI * i / (size - 1)); + } + return window; +} + +// Simple smoothing function using moving average +std::vector AudioModel::smoothData(const std::vector& data, int windowSize) { + std::vector smoothed(data.size()); + + for (size_t i = 0; i < data.size(); ++i) { + float sum = 0.0f; + int count = 0; + + for (int j = -windowSize/2; j <= windowSize/2; ++j) { + int idx = i + j; + if (idx >= 0 && idx < static_cast(data.size())) { + sum += data[idx]; + count++; + } + } + + smoothed[i] = count > 0 ? sum / count : 0.0f; + } + + return smoothed; +} + +void AudioModel::do_quit(void *userdata, int signal_number) +{ + Q_UNUSED(signal_number) + + struct impl *data = reinterpret_cast(userdata); + pw_main_loop_quit(data->loop); +} diff --git a/plugin/AudioModel.h b/plugin/AudioModel.h index 9f57e7e..b0e795e 100644 --- a/plugin/AudioModel.h +++ b/plugin/AudioModel.h @@ -3,6 +3,18 @@ * Copyright (C) 2025 @DigitalArtifex | github.com/DigitalArtifex * * AudioModel.h + * + * This is pretty much just a reimplementation of the audiocapture example + * from the PipeWire docs. + * + * NOTICE: + * The spectrum data is currently out of spec according to the documentation + * https://webaudio.github.io/web-audio-api/#smoothing-over-time + * + * The described smoothing method was resulting in inconsistent data. This + * is likely to a poor implementation. A linear smoothing algo seems to work + * (at least visually). Will need to revisit the temporal implementation if + * things do not work as expected. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,85 +34,102 @@ #define AUDIOMODEL_H #include "Komplex_global.h" -#include +#include #include #include #include #include #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include - class KOMPLEX_EXPORT AudioModel : public QObject +class KOMPLEX_EXPORT AudioModel : public QObject +{ + Q_OBJECT + QML_SINGLETON + QML_NAMED_ELEMENT(AudioModel) + +public: + AudioModel(QObject *parent = nullptr); + ~AudioModel(); + + /**! + * @brief frame + * This function returns the current audio frame as a QPixmap. + * It is expected to be called after the frameChanged signal is emitted, if using from CPP + * + * If it is being used from QML, it will need to be resolved from the AuidoTexture Image Provider (image:/audio/frame#.jpg). + * See AudioImage provider for more details. + * + * @return QPixmap containing the current audio frame. + */ + static QPixmap frame(); + + // Q_INVOKABLE bool init(); + Q_INVOKABLE static void startCapture(); + Q_INVOKABLE static void stopCapture(); + +private Q_SLOTS: + static void startCaptureAsync(); + +private: + static std::vector createBlackmanWindow(int size); + static std::vector smoothData(const std::vector& data, int windowSize = 5); + + struct impl { - Q_OBJECT - QML_ELEMENT - public: - explicit AudioModel(QObject *parent = nullptr); - ~AudioModel(); + pw_main_loop *loop; + pw_stream *stream; - /**! - * @brief frame - * This function returns the current audio frame as a QString. - * It is expected to be called periodically to update the audio frame for the shader. - * - * @return QString containing the current audio frame. - */ - QByteArray frame() const; + spa_audio_info format; + unsigned move:1; - /**! - * @brief device - * This function returns the currently set audio device name. - * - * @return QString containing the name of the audio device. - */ - QString device() const; - - /**! - * @brief availableDevices - * This function returns a list of available audio devices on the system. - * - * @return QStringList containing the names of available audio devices. - */ - QStringList availableDevices() const; - - /**! - * @brief setDeviceName - * This function sets the audio device to be used for capturing audio frames. - * - * @param device The name of the audio device to set. - */ - Q_INVOKABLE void setDeviceName(const QString &device); - - /**! - * @brief getAudioFrame - * This function retrieves the current audio frame from the specified audio device. - * It is expected to be called periodically to update the audio frame for the shader. - * - * It is an asynchronous fuction and will emit the frameChanged signal when the audio frame is ready. - */ - Q_INVOKABLE void getAudioFrame(); - - Q_SIGNALS: - void frameChanged(); - - private: - QString m_deviceString; - - QMediaCaptureSession *m_captureSession = nullptr; - QAudioInput *m_audioInput = nullptr; - QMediaRecorder *m_recorder = nullptr; - - Q_PROPERTY(QByteArray frame READ frame NOTIFY frameChanged) - Q_PROPERTY(QString device READ device WRITE setDeviceName NOTIFY frameChanged) + QVector samples; // we need at least 2048 samples + QVector smoothed; // we're supposed to save for smoothing, but I couldn't get this method to work + qreal last; }; + inline static AudioModel *m_instance = nullptr; + inline static QThread *m_thread = nullptr; + inline static QMutex m_mutex; + + QPixmap m_frame; + + inline static impl m_impl_data; + inline static bool m_running = false; + + static void on_process(void *user_data); + static void do_quit(void *user_data, int signal_number); + + static void on_stream_param_changed(void *_data, uint32_t id, const struct spa_pod *param); + + inline static const struct pw_stream_events stream_events = { + .version = PW_VERSION_STREAM_EVENTS, + .param_changed = on_stream_param_changed, + .process = on_process, + }; +}; Q_DECLARE_METATYPE(AudioModel) diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt index 39cbb05..4e819dc 100644 --- a/plugin/CMakeLists.txt +++ b/plugin/CMakeLists.txt @@ -12,6 +12,8 @@ add_library( plugin.cpp ShaderPackModel.cpp AudioModel.cpp + AudioImageProvider.cpp + AudioImageProvider.h ) qt_add_qml_module( @@ -20,7 +22,7 @@ qt_add_qml_module( ${QMLPLUGIN_URI} VERSION 1.0 - PLUGIN_TARGET + PLUGIN_TARGET ${PROJECT_NAME} CLASS_NAME KomplexPlugin @@ -28,7 +30,8 @@ qt_add_qml_module( plugin.cpp ShaderPackModel.cpp AudioModel.cpp - NO_GENERATE_PLUGIN_SOURCE + AudioImageProvider.cpp + NO_GENERATE_PLUGIN_SOURCE ) target_link_libraries( @@ -43,6 +46,8 @@ target_link_libraries( KF6::CoreAddons KF6::I18n KF6::Package + PipeWire::PipeWire + fftw3 ) target_compile_definitions( diff --git a/plugin/plugin.cpp b/plugin/plugin.cpp index 5a0d60c..d792aa0 100644 --- a/plugin/plugin.cpp +++ b/plugin/plugin.cpp @@ -3,6 +3,7 @@ #include #include "AudioModel.h" +#include "AudioImageProvider.h" #include "ShaderPackModel.h" #include "Komplex_global.h" @@ -18,6 +19,12 @@ public: qmlRegisterType(uri, 1, 0, "AudioModel"); qmlRegisterType(uri, 1, 0, "ShaderPackModel"); } + + void initializeEngine(QQmlEngine *engine, const char *uri) override + { + Q_ASSERT(QLatin1String(uri) == QLatin1String("com.github.digitalartifex.komplex")); + engine->addImageProvider(QString::fromLatin1("audiotexture"), new AudioImageProvider); + } }; #include "plugin.moc" \ No newline at end of file