From bb1afc12d298a9ab4da2f62288e6fa1868cc276f Mon Sep 17 00:00:00 2001
From: WolverinDEV <git@did.science>
Date: Sun, 9 Feb 2020 14:53:39 +0100
Subject: [PATCH] Added the ability to resample the audio quality for the
 input/output device

---
 .../serverconnection/src/audio/AudioInput.cpp |  76 ++++++++--
 .../serverconnection/src/audio/AudioInput.h   | 108 +++++++-------
 .../src/audio/AudioMerger.cpp                 |   3 +-
 .../src/audio/AudioOutput.cpp                 | 135 +++++++++++++-----
 .../serverconnection/src/audio/AudioOutput.h  |  11 +-
 .../src/audio/AudioResampler.h                |  17 ++-
 .../src/audio/driver/SoundIO.h                |   6 +-
 .../src/audio/driver/SoundIOPlayback.cpp      |  10 +-
 .../src/audio/js/AudioOutputStream.cpp        |   2 +-
 .../src/connection/audio/VoiceClient.cpp      |  89 ++++++++----
 .../src/connection/audio/VoiceClient.h        |   3 +
 .../src/connection/audio/VoiceConnection.cpp  |   1 +
 12 files changed, 314 insertions(+), 147 deletions(-)
diff --git a/native/serverconnection/src/audio/AudioInput.cpp b/native/serverconnection/src/audio/AudioInput.cpp
index 8944907..cd81615 100644
--- a/native/serverconnection/src/audio/AudioInput.cpp
+++ b/native/serverconnection/src/audio/AudioInput.cpp
@@ -1,8 +1,9 @@
 #include <cstring>
 #include <string>
 #include <misc/spin_lock.h>
-#include "AudioInput.h"
-#include "AudioReframer.h"
+#include "./AudioInput.h"
+#include "./AudioReframer.h"
+#include "./AudioResampler.h"
 #include "../logger.h"
 
 using namespace std;
@@ -186,9 +187,12 @@ void AudioConsumer::process_data(const void *buffer, size_t samples) {
 AudioInput::AudioInput(size_t channels, size_t rate) : _channel_count(channels), _sample_rate(rate) {}
 AudioInput::~AudioInput() {
 	this->close_device();
-	lock_guard lock(this->consumers_lock);
-	for(const auto& consumer : this->_consumers)
-		consumer->handle = nullptr;
+    {
+        lock_guard lock(this->consumers_lock);
+        for(const auto& consumer : this->_consumers)
+            consumer->handle = nullptr;
+    }
+	free(this->resample_buffer);
 }
 
 void AudioInput::set_device(const std::shared_ptr<AudioDevice> &device) {
@@ -206,6 +210,7 @@ void AudioInput::close_device() {
         this->input_recorder->stop_if_possible();
         this->input_recorder.reset();
     }
+    this->_resampler = nullptr;
     this->input_device = nullptr;
 }
 
@@ -223,6 +228,10 @@ bool AudioInput::record(std::string& error) {
         return false;
     }
 
+    if(this->input_recorder->sample_rate() != this->sample_rate()) {
+        this->_resampler = std::make_unique<AudioResampler>(this->input_recorder->sample_rate(), this->sample_rate(), this->_channel_count);
+    }
+
     this->input_recorder->register_consumer(this);
     if(!this->input_recorder->start(error)) {
         this->input_recorder->remove_consumer(this);
@@ -264,18 +273,59 @@ void AudioInput::delete_consumer(const std::shared_ptr<AudioConsumer> &source) {
 	source->handle = nullptr;
 }
 
-void AudioInput::consume(const void *input, size_t frameCount, size_t /* channels */) {
-	if(this->_volume != 1 && false) {
-		auto ptr = (float*) input;
-		auto left = frameCount * this->_channel_count;
-		while(left-- > 0)
-			*(ptr++) *= this->_volume;
+void AudioInput::consume(const void *input, size_t frameCount, size_t channels) {
+    if(channels != this->_channel_count) {
+        log_critical(category::audio, tr("Channel count miss match (input)! Fixme!"));
+        return;
+    }
+
+	if(this->_resampler) {
+	    const auto expected_size = this->_resampler->estimated_output_size(frameCount);
+	    const auto expected_byte_size = expected_size * this->_channel_count * sizeof(float);
+
+	    if(this->resample_buffer_size < expected_byte_size) {
+	        free(this->resample_buffer);
+	        this->resample_buffer = malloc(expected_byte_size);
+	        this->resample_buffer_size = expected_byte_size;
+	    }
+
+	    auto result = this->_resampler->process(this->resample_buffer, input, frameCount);
+	    if(result < 0) {
+	        log_error(category::audio, tr("Failed to resample input audio: {}"), result);
+	        return;
+	    }
+
+	    frameCount = (size_t) result;
+	    input = this->resample_buffer;
+
+        if(this->_volume != 1) {
+            auto ptr = (float*) input;
+            auto left = frameCount * this->_channel_count;
+            while(left-- > 0)
+                *(ptr++) *= this->_volume;
+        }
+	} else if(this->_volume != 1) {
+        const auto byte_size = frameCount * this->_channel_count * sizeof(float);
+        if(this->resample_buffer_size < byte_size) {
+            free(this->resample_buffer);
+            this->resample_buffer = malloc(byte_size);
+            this->resample_buffer_size = byte_size;
+        }
+
+        memcpy(this->resample_buffer, input, byte_size);
+        input = this->resample_buffer;
+
+
+        auto ptr = (float*) input;
+        auto left = frameCount * this->_channel_count;
+        while(left-- > 0)
+            *(ptr++) *= this->_volume;
 	}
 
 	auto begin = chrono::system_clock::now();
-	for(const auto& consumer : this->consumers()) {
+	for(const auto& consumer : this->consumers())
 		consumer->process_data(input, frameCount);
-	}
+
 	auto end = chrono::system_clock::now();
 	auto ms = chrono::duration_cast<chrono::milliseconds>(end - begin).count();
 	if(ms > 5) {
diff --git a/native/serverconnection/src/audio/AudioInput.h b/native/serverconnection/src/audio/AudioInput.h
index aebc383..716318f 100644
--- a/native/serverconnection/src/audio/AudioInput.h
+++ b/native/serverconnection/src/audio/AudioInput.h
@@ -10,73 +10,77 @@
 #include "driver/AudioDriver.h"
 
 class AudioInputSource;
-namespace tc {
-	namespace audio {
-		class AudioInput;
-		class Reframer;
+namespace tc::audio {
+    class AudioInput;
+    class Reframer;
+    class AudioResampler;
 
-		class AudioConsumer {
-				friend class AudioInput;
-			public:
-				AudioInput* handle;
+    class AudioConsumer {
+            friend class AudioInput;
+        public:
+            AudioInput* handle;
 
-				size_t const channel_count = 0;
-				size_t const sample_rate = 0;
+            size_t const channel_count = 0;
+            size_t const sample_rate = 0;
 
-				size_t const frame_size = 0;
+            size_t const frame_size = 0;
 
-				spin_lock on_read_lock; /* locked to access the function */
-				std::function<void(const void* /* buffer */, size_t /* samples */)> on_read;
-			private:
-				AudioConsumer(AudioInput* handle, size_t channel_count, size_t sample_rate, size_t frame_size);
+            spin_lock on_read_lock; /* locked to access the function */
+            std::function<void(const void* /* buffer */, size_t /* samples */)> on_read;
+        private:
+            AudioConsumer(AudioInput* handle, size_t channel_count, size_t sample_rate, size_t frame_size);
 
-				std::unique_ptr<Reframer> reframer;
+            std::unique_ptr<Reframer> reframer;
 
-				void process_data(const void* /* buffer */, size_t /* samples */);
-				void handle_framed_data(const void* /* buffer */, size_t /* samples */);
-		};
+            void process_data(const void* /* buffer */, size_t /* samples */);
+            void handle_framed_data(const void* /* buffer */, size_t /* samples */);
+    };
 
-	    class AudioInput : public AudioDeviceRecord::Consumer {
-		        friend class ::AudioInputSource;
-			public:
-				AudioInput(size_t /* channels */, size_t /* rate */);
-				virtual ~AudioInput();
+    class AudioInput : public AudioDeviceRecord::Consumer {
+            friend class ::AudioInputSource;
+        public:
+            AudioInput(size_t /* channels */, size_t /* rate */);
+            virtual ~AudioInput();
 
-                void set_device(const std::shared_ptr<AudioDevice>& /* device */);
-                [[nodiscard]] std::shared_ptr<AudioDevice> current_device() const { return this->input_device; }
-                void close_device();
+            void set_device(const std::shared_ptr<AudioDevice>& /* device */);
+            [[nodiscard]] std::shared_ptr<AudioDevice> current_device() const { return this->input_device; }
+            void close_device();
 
-                [[nodiscard]] bool record(std::string& /* error */);
-                [[nodiscard]] bool recording();
-				void stop();
+            [[nodiscard]] bool record(std::string& /* error */);
+            [[nodiscard]] bool recording();
+            void stop();
 
-				std::deque<std::shared_ptr<AudioConsumer>> consumers() {
-					std::lock_guard lock(this->consumers_lock);
-					return this->_consumers;
-				}
+            std::deque<std::shared_ptr<AudioConsumer>> consumers() {
+                std::lock_guard lock(this->consumers_lock);
+                return this->_consumers;
+            }
 
-				std::shared_ptr<AudioConsumer> create_consumer(size_t /* frame size */);
-				void delete_consumer(const std::shared_ptr<AudioConsumer>& /* source */);
+            std::shared_ptr<AudioConsumer> create_consumer(size_t /* frame size */);
+            void delete_consumer(const std::shared_ptr<AudioConsumer>& /* source */);
 
-				inline size_t channel_count() { return this->_channel_count; }
-				inline size_t sample_rate() { return this->_sample_rate; }
+            inline size_t channel_count() { return this->_channel_count; }
+            inline size_t sample_rate() { return this->_sample_rate; }
 
-				inline float volume() { return this->_volume; }
-				inline void set_volume(float value) { this->_volume = value; }
-			private:
-				void consume(const void *, size_t, size_t) override;
+            inline float volume() { return this->_volume; }
+            inline void set_volume(float value) { this->_volume = value; }
+        private:
+            void consume(const void *, size_t, size_t) override;
 
-				size_t const _channel_count;
-				size_t const _sample_rate;
+            size_t const _channel_count;
+            size_t const _sample_rate;
 
-				std::mutex consumers_lock;
-				std::deque<std::shared_ptr<AudioConsumer>> _consumers;
+            std::mutex consumers_lock;
+            std::deque<std::shared_ptr<AudioConsumer>> _consumers;
+            std::unique_ptr<AudioResampler> _resampler{nullptr};
 
-				std::recursive_mutex input_source_lock;
-                std::shared_ptr<AudioDevice> input_device{};
-				float _volume = 1.f;
+            std::recursive_mutex input_source_lock;
+            std::shared_ptr<AudioDevice> input_device{};
 
-                std::shared_ptr<AudioDeviceRecord> input_recorder{};
-        };
-	}
+            void* resample_buffer{nullptr};
+            size_t resample_buffer_size{0};
+
+            float _volume = 1.f;
+
+            std::shared_ptr<AudioDeviceRecord> input_recorder{};
+    };
 }
\ No newline at end of file
diff --git a/native/serverconnection/src/audio/AudioMerger.cpp b/native/serverconnection/src/audio/AudioMerger.cpp
index f075ff3..c018580 100644
--- a/native/serverconnection/src/audio/AudioMerger.cpp
+++ b/native/serverconnection/src/audio/AudioMerger.cpp
@@ -47,7 +47,8 @@ bool merge::merge_n_sources(void *dest, void **srcs, size_t src_length, size_t c
 		if(src_length == 0)
 			return false;
 	}
-	memcpy(dest, srcs[0], channels * samples * 4);
+	if(srcs[0] != dest)
+	    memcpy(dest, srcs[0], channels * samples * 4);
 	srcs++;
 	src_length--;
 
diff --git a/native/serverconnection/src/audio/AudioOutput.cpp b/native/serverconnection/src/audio/AudioOutput.cpp
index 7be078d..60ee5eb 100644
--- a/native/serverconnection/src/audio/AudioOutput.cpp
+++ b/native/serverconnection/src/audio/AudioOutput.cpp
@@ -42,9 +42,10 @@ ssize_t AudioOutputSource::pop_samples(void *buffer, size_t samples) {
 
 
     if(auto fn = this->on_underflow; fn)
-        if(fn())
+        if(fn(samples - written))
             goto load_buffer;
 
+    memset(buffer, 0, (samples - written) * sizeof(float) * this->channel_count);
     this->buffering = true;
 	if(this->on_read)
 		this->on_read();
@@ -158,29 +159,52 @@ void AudioOutput::delete_source(const std::shared_ptr<tc::audio::AudioOutputSour
 }
 
 void AudioOutput::cleanup_buffers() {
-	lock_guard buffer_lock(this->buffer_lock);
-	if(this->source_buffer)
-		free(this->source_buffer);
-	if(this->source_merge_buffer)
-		free(this->source_merge_buffer);
+    free(this->source_buffer);
+    free(this->source_merge_buffer);
+    free(this->resample_overhead_buffer);
 
 	this->source_merge_buffer = nullptr;
 	this->source_buffer = nullptr;
+	this->resample_overhead_buffer = nullptr;
+
 	this->source_merge_buffer_length = 0;
 	this->source_buffer_length = 0;
+    this->resample_overhead_buffer_length = 0;
+    this->resample_overhead_samples = 0;
 }
 
-void AudioOutput::fill_buffer(void *output, size_t frameCount, size_t channels) {
-    const auto local_frame_count = this->_resampler ? this->_resampler->
-	lock_guard buffer_lock(this->buffer_lock);
+void AudioOutput::fill_buffer(void *output, size_t out_frame_count, size_t channels) {
+    if(channels != this->_channel_count) {
+        log_critical(category::audio, tr("Channel count miss match (output)! Fixme!"));
+        return;
+    }
+    const auto local_frame_count = this->_resampler ? this->_resampler->input_size(out_frame_count) : out_frame_count;
+
+    if(this->resample_overhead_samples > 0) {
+        const auto samples_to_write = this->resample_overhead_samples > out_frame_count ? out_frame_count : this->resample_overhead_samples;
+        const auto byte_length = samples_to_write * sizeof(float) * channels;
+
+        memcpy(output, this->resample_overhead_buffer, byte_length);
+        if(samples_to_write == out_frame_count) {
+            this->resample_overhead_samples -= samples_to_write;
+            memcpy(this->resample_overhead_buffer, (char*) this->resample_overhead_buffer + byte_length, this->resample_overhead_samples * this->_channel_count * sizeof(float));
+            return;
+        } else {
+            this->resample_overhead_samples = 0;
+            output = (char*) output + byte_length;
+            out_frame_count -= samples_to_write;
+        }
+    }
+
     if(this->_volume <= 0) {
         for(auto& source : this->_sources)
-            source->pop_samples(nullptr, frameCount);
-        memset(output, 0, sizeof(frameCount) * channels * sizeof(float));
+            source->pop_samples(nullptr, local_frame_count);
+        memset(output, 0, local_frame_count * channels * sizeof(float));
         return;
     }
 
-	size_t buffer_length = frameCount * 4 * this->_channel_count;
+	const size_t local_buffer_length = local_frame_count * 4 * this->_channel_count;
+    const size_t out_buffer_length = out_frame_count * 4 * this->_channel_count;
 	size_t sources = 0;
 	size_t actual_sources = 0;
 
@@ -191,68 +215,103 @@ void AudioOutput::fill_buffer(void *output, size_t frameCount, size_t channels)
 
 		if(sources > 0) {
 			 /* allocate the required space */
-            auto source_buffer_length = buffer_length * sources;
-            auto source_merge_buffer_length = sizeof(void*) * sources;
+            const auto required_source_buffer_length = (out_buffer_length > local_buffer_length ? out_buffer_length : local_buffer_length) * sources; /* ensure enough space for later resample */
+            const auto required_source_merge_buffer_length = sizeof(void*) * sources;
 
-            //TODO: Move this out of the loop?
             {
 
-                if(this->source_buffer_length < source_buffer_length || !this->source_buffer) {
+                if(this->source_buffer_length < required_source_buffer_length || !this->source_buffer) {
                     if(this->source_buffer)
                         free(this->source_buffer);
-                    this->source_buffer = malloc(source_buffer_length);
-                    this->source_buffer_length = source_buffer_length;
+                    this->source_buffer = malloc(required_source_buffer_length);
+                    this->source_buffer_length = required_source_buffer_length;
                 }
-                if(this->source_merge_buffer_length < source_merge_buffer_length || !this->source_merge_buffer) {
+                if(this->source_merge_buffer_length < required_source_merge_buffer_length || !this->source_merge_buffer) {
                     if (this->source_merge_buffer)
                         free(this->source_merge_buffer);
-                    this->source_merge_buffer = (void **) malloc(source_merge_buffer_length);
-                    this->source_merge_buffer_length = source_merge_buffer_length;
+                    this->source_merge_buffer = (void **) malloc(required_source_merge_buffer_length);
+                    this->source_merge_buffer_length = required_source_merge_buffer_length;
                 }
             }
 
 			for(size_t index = 0; index < sources; index++) {
 				auto& source = this->_sources[index];
 
-                this->source_merge_buffer[index] = (char*) this->source_buffer + (buffer_length * index);
-                auto written_frames = this->_sources[index]->pop_samples(this->source_merge_buffer[index], frameCount);
-                if(written_frames != frameCount) {
+                this->source_merge_buffer[index] = (char*) this->source_buffer + (local_buffer_length * index);
+                auto written_frames = this->_sources[index]->pop_samples(this->source_merge_buffer[index], local_frame_count);
+                if(written_frames != local_frame_count) {
                     if(written_frames <= 0) {
                         this->source_merge_buffer[index] = nullptr;
                         actual_sources--;
                     } else {
                         /* fill up the rest with silence (0) */
                         auto written = written_frames * this->_channel_count * 4;
-                        memset((char*) this->source_merge_buffer[index] + written, 0, (frameCount - written_frames) * this->_channel_count * 4);
+                        memset((char*) this->source_merge_buffer[index] + written, 0, (local_frame_count - written_frames) * this->_channel_count * 4);
                     }
                 }
 			}
-		}
+		} else
+		    goto clear_buffer_exit;
 	}
 
 	if(actual_sources > 0) {
-		if(!merge::merge_n_sources(output, this->source_merge_buffer, sources, this->_channel_count, frameCount))
-			log_warn(category::audio, tr("failed to merge buffers!"));
+	    if(local_frame_count == out_frame_count) {
+            if(!merge::merge_n_sources(output, this->source_merge_buffer, sources, this->_channel_count, local_frame_count))
+                log_warn(category::audio, tr("failed to merge buffers!"));
+	    } else {
+            if(!merge::merge_n_sources(this->source_buffer, this->source_merge_buffer, sources, this->_channel_count, local_frame_count))
+                log_warn(category::audio, tr("failed to merge buffers!"));
 
+            /* this->source_buffer could hold the amount of resampled data (checked above) */
+            auto resampled_samples = this->_resampler->process(this->source_buffer, this->source_buffer, local_frame_count);
+            if(resampled_samples != out_frame_count) {
+                if(resampled_samples > out_frame_count) {
+                    const auto diff_length = resampled_samples - out_frame_count;
+                    const auto overhead_buffer_offset = this->resample_overhead_samples * sizeof(float) * this->_channel_count;
+                    const auto diff_byte_length = diff_length * sizeof(float) * this->_channel_count;
+
+                    if(this->resample_overhead_buffer_length < diff_byte_length + overhead_buffer_offset) {
+                        this->resample_overhead_buffer_length = diff_byte_length + overhead_buffer_offset;
+                        auto new_buffer = malloc(this->resample_overhead_buffer_length);
+                        if(this->resample_overhead_buffer)
+                            memcpy(new_buffer, this->resample_overhead_buffer, overhead_buffer_offset);
+                        free(this->resample_overhead_buffer);
+                        this->resample_overhead_buffer = new_buffer;
+                    }
+                    memcpy(
+                            (char*) this->resample_overhead_buffer + overhead_buffer_offset,
+                            (char*) this->source_buffer + out_frame_count * sizeof(float) * this->_channel_count,
+                            diff_byte_length
+                    );
+                    this->resample_overhead_samples += diff_length;
+                } else {
+                    log_warn(category::audio, tr("Resampled samples does not match requested sampeles: {} <> {}"), resampled_samples, out_frame_count);
+                }
+            }
+            memcpy(output, this->source_buffer, out_frame_count * sizeof(float) * this->_channel_count);
+	    }
+
+	    /* lets apply the volume */
 		auto volume = this->_volume;
 		if(volume != 1) {
-            auto float_length = this->_channel_count * frameCount;
+            auto float_length = this->_channel_count * out_frame_count;
             auto data = (float*) output;
             while(float_length-- > 0)
                 *data++ *= volume;
 		}
 
 	} else {
-		memset(output, 0, this->_channel_count * sizeof(float) * frameCount);
+        clear_buffer_exit:
+		memset(output, 0, this->_channel_count * sizeof(float) * out_frame_count);
 	}
 }
 
-void AudioOutput::set_device(const std::shared_ptr<AudioDevice> &device) {
+void AudioOutput::set_device(const std::shared_ptr<AudioDevice> &new_device) {
     lock_guard lock(this->device_lock);
-    if(this->device == device) return;
+    if(this->device == new_device) return;
 
     this->close_device();
-    this->device = device;
+    this->device = new_device;
 }
 
 void AudioOutput::close_device() {
@@ -263,6 +322,7 @@ void AudioOutput::close_device() {
         this->_playback.reset();
     }
 
+    this->_resampler = nullptr;
     this->device = nullptr;
 }
 
@@ -280,6 +340,15 @@ bool AudioOutput::playback(std::string& error) {
         return false;
 	}
 
+    if(this->_playback->sample_rate() != this->sample_rate()) {
+        this->_resampler = std::make_unique<AudioResampler>(this->sample_rate(), this->_playback->sample_rate(), this->channel_count());
+        if(!this->_resampler->valid()) {
+            error = "failed to allocate a resampler";
+            this->_playback = nullptr;
+            return false;
+        }
+    }
+
 	this->_playback->register_source(this);
     return this->_playback->start(error);
 }
\ No newline at end of file
diff --git a/native/serverconnection/src/audio/AudioOutput.h b/native/serverconnection/src/audio/AudioOutput.h
index 48b1ba9..1fa4382 100644
--- a/native/serverconnection/src/audio/AudioOutput.h
+++ b/native/serverconnection/src/audio/AudioOutput.h
@@ -15,6 +15,7 @@
 
 namespace tc::audio {
 		class AudioOutput;
+        class AudioResampler;
 
 		namespace overflow_strategy {
 			enum value {
@@ -50,7 +51,7 @@ namespace tc::audio {
 				overflow_strategy::value overflow_strategy = overflow_strategy::discard_buffer_half;
 
 				/* if it returns true then the it means that the buffer has been refilled, we have to test again */
-				std::function<bool()> on_underflow;
+				std::function<bool(size_t /* sample count */)> on_underflow;
 				std::function<void(size_t /* sample count */)> on_overflow;
 				std::function<void()> on_read; /* will be invoked after sample read, e.g. for buffer fullup */
 
@@ -90,7 +91,7 @@ namespace tc::audio {
 				inline float volume() { return this->_volume; }
 				inline void set_volume(float value) { this->_volume = value; }
 			private:
-				void fill_buffer(void *, size_t frames, size_t channels) override;
+				void fill_buffer(void *, size_t out_frame_count, size_t channels) override;
 
 				size_t const _channel_count;
 				size_t const _sample_rate;
@@ -103,10 +104,14 @@ namespace tc::audio {
                 std::shared_ptr<AudioDevicePlayback> _playback{nullptr};
                 std::unique_ptr<AudioResampler> _resampler{nullptr};
 
-				std::mutex buffer_lock; /* not required, but why not. Usually only used within audio_callback! */
+                /* only access there buffers within the audio loop! */
 				void* source_buffer = nullptr;
 				void** source_merge_buffer = nullptr;
 
+                void* resample_overhead_buffer{nullptr};
+                size_t resample_overhead_buffer_length{0};
+                size_t resample_overhead_samples{0};
+
 				size_t source_buffer_length = 0;
 				size_t source_merge_buffer_length = 0;
 				void cleanup_buffers();
diff --git a/native/serverconnection/src/audio/AudioResampler.h b/native/serverconnection/src/audio/AudioResampler.h
index cef4f55..72fa2e9 100644
--- a/native/serverconnection/src/audio/AudioResampler.h
+++ b/native/serverconnection/src/audio/AudioResampler.h
@@ -17,18 +17,21 @@ namespace tc {
 				AudioResampler(size_t /* input rate */, size_t /* output rate */, size_t /* channels */);
 				virtual ~AudioResampler();
 
-				inline size_t channels() { return this->_channels; }
-				inline size_t input_rate() { return this->_input_rate; }
-				inline size_t output_rate() { return this->_output_rate; }
+                [[nodiscard]] inline size_t channels() { return this->_channels; }
+                [[nodiscard]] inline size_t input_rate() { return this->_input_rate; }
+                [[nodiscard]] inline size_t output_rate() { return this->_output_rate; }
 
-				inline long double io_ratio() { return (long double) this->_output_rate / (long double) this->_input_rate; }
-				inline size_t estimated_output_size(size_t input_length) {
+                [[nodiscard]] inline long double io_ratio() { return (long double) this->_output_rate / (long double) this->_input_rate; }
+                [[nodiscard]] inline size_t estimated_output_size(size_t input_length) {
 					return (size_t) lroundl(this->io_ratio() * input_length) + 1;
 				}
+                [[nodiscard]] inline size_t input_size(size_t output_length) {
+                    return (size_t) lroundl((long double) this->_input_rate / (long double) this->_output_rate * output_length);
+                }
 
-				inline bool valid() { return this->io_ratio() == 1 || this->soxr_handle != nullptr; }
+                [[nodiscard]] inline bool valid() { return this->io_ratio() == 1 || this->soxr_handle != nullptr; }
 
-				ssize_t process(void* /* output */, const void* /* input */, size_t /* input length */);
+                [[nodiscard]] ssize_t process(void* /* output */, const void* /* input */, size_t /* input length */);
 			private:
 				size_t const _channels = 0;
 				size_t const _input_rate = 0;
diff --git a/native/serverconnection/src/audio/driver/SoundIO.h b/native/serverconnection/src/audio/driver/SoundIO.h
index a3e7993..de2548b 100644
--- a/native/serverconnection/src/audio/driver/SoundIO.h
+++ b/native/serverconnection/src/audio/driver/SoundIO.h
@@ -31,12 +31,12 @@ namespace tc::audio {
 
     class SoundIOPlayback : public AudioDevicePlayback {
         public:
-            constexpr static auto kChunkTime{0.02};
+            constexpr static auto kChunkTime{0.01};
 
             explicit SoundIOPlayback(struct ::SoundIoDevice* /* handle */);
             virtual ~SoundIOPlayback();
 
-            size_t sample_rate() const override;
+            [[nodiscard]] size_t sample_rate() const override;
         protected:
             bool impl_start(std::string& /* error */) override;
             void impl_stop() override;
@@ -60,7 +60,7 @@ namespace tc::audio {
             explicit SoundIORecord(struct ::SoundIoDevice* /* handle */);
             virtual ~SoundIORecord();
 
-            size_t sample_rate() const override;
+            [[nodiscard]] size_t sample_rate() const override;
         protected:
             bool impl_start(std::string& /* error */) override;
             void impl_stop() override;
diff --git a/native/serverconnection/src/audio/driver/SoundIOPlayback.cpp b/native/serverconnection/src/audio/driver/SoundIOPlayback.cpp
index aca6800..f717a16 100644
--- a/native/serverconnection/src/audio/driver/SoundIOPlayback.cpp
+++ b/native/serverconnection/src/audio/driver/SoundIOPlayback.cpp
@@ -140,6 +140,8 @@ void SoundIOPlayback::write_callback(int frame_count_min, int frame_count_max) {
         if(frame_count_max == 0) return;
     }
 
+#ifdef WIN32
+    //TODO: Test for WASAPI & Shared mode
     {
         double latency{};
         if(auto err = soundio_outstream_get_latency(this->stream, &latency); err) {
@@ -148,15 +150,17 @@ void SoundIOPlayback::write_callback(int frame_count_min, int frame_count_max) {
         }
         if(latency > max_latency) return;
     }
+#endif
 
     while(frames_left > 0) {
         int frame_count{frames_left};
         auto buffered = soundio_ring_buffer_fill_count(this->buffer) / (sizeof(float) * layout->channel_count);
         if(frame_count > buffered) {
             if(buffered == 0) {
-                const auto length = sizeof(float) * frame_count * layout->channel_count;
-                this->fill_buffer(soundio_ring_buffer_write_ptr(this->buffer), frame_count, layout->channel_count);
-                soundio_ring_buffer_advance_write_ptr(this->buffer, length);
+                const auto fill_sample_count = (soundio_ring_buffer_free_count(this->buffer) / sizeof(float) / 2);
+                this->fill_buffer(soundio_ring_buffer_write_ptr(this->buffer), fill_sample_count, layout->channel_count);
+                soundio_ring_buffer_advance_write_ptr(this->buffer, fill_sample_count * sizeof(float) * 2);
+                buffered += fill_sample_count;
             } else
                 frame_count = buffered;
         }
diff --git a/native/serverconnection/src/audio/js/AudioOutputStream.cpp b/native/serverconnection/src/audio/js/AudioOutputStream.cpp
index 97b254f..88b49f5 100644
--- a/native/serverconnection/src/audio/js/AudioOutputStream.cpp
+++ b/native/serverconnection/src/audio/js/AudioOutputStream.cpp
@@ -92,7 +92,7 @@ void AudioOutputStreamWrapper::do_wrap(const v8::Local<v8::Object> &obj) {
 		});
 
 		this->_own_handle->on_overflow = [&](size_t){ this->call_overflow(); };
-		this->_own_handle->on_underflow = [&]{ this->call_underflow(); return false; };
+		this->_own_handle->on_underflow = [&](size_t){ this->call_underflow(); return false; };
 	}
 }
 
diff --git a/native/serverconnection/src/connection/audio/VoiceClient.cpp b/native/serverconnection/src/connection/audio/VoiceClient.cpp
index c573fd7..57dfffd 100644
--- a/native/serverconnection/src/connection/audio/VoiceClient.cpp
+++ b/native/serverconnection/src/connection/audio/VoiceClient.cpp
@@ -209,34 +209,6 @@ VoiceClientWrap::VoiceClientWrap(const std::shared_ptr<VoiceClient>& client) : _
 VoiceClientWrap::~VoiceClientWrap() {}
 
 VoiceClient::VoiceClient(const std::shared_ptr<VoiceConnection>&, uint16_t client_id) : _client_id(client_id) {
-	this->output_source = global_audio_output->create_source();
-	this->output_source->overflow_strategy = audio::overflow_strategy::ignore;
-	this->output_source->max_buffered_samples = (size_t) ceil(this->output_source->sample_rate * 0.5);
-	this->output_source->min_buffered_samples = (size_t) ceil(this->output_source->sample_rate * 0.04);
-
-	this->output_source->on_underflow = [&]{
-		if(this->_state == state::stopping)
-			this->set_state(state::stopped);
-		else if(this->_state != state::stopped) {
-			if(this->_last_received_packet + chrono::seconds(1) < chrono::system_clock::now()) {
-				this->set_state(state::stopped);
-				log_warn(category::audio, tr("Client {} has a audio buffer underflow and not received any data for one second. Stopping replay."), this->_client_id);
-			} else {
-				if(this->_state != state::buffering) {
-					log_warn(category::audio, tr("Client {} has a audio buffer underflow. Buffer again."), this->_client_id);
-					this->set_state(state::buffering);
-				}
-
-				audio::decode_event_loop->schedule(static_pointer_cast<event::EventEntry>(this->ref()));
-			}
-		}
-
-		return false;
-	};
-	this->output_source->on_overflow = [&](size_t count){
-		log_warn(category::audio, tr("Client {} has a audio buffer overflow of {}."), this->_client_id, count);
-	};
-
 	this->execute_lock_timeout = std::chrono::microseconds{500};
 }
 
@@ -248,8 +220,49 @@ VoiceClient::~VoiceClient() {
 	}
 
 	this->cancel_replay(); /* cleanup all buffers */
-	this->output_source->on_underflow = nullptr; /* to ensure */
-	global_audio_output->delete_source(this->output_source);
+	if(this->output_source) {
+        this->output_source->on_underflow = nullptr; /* to ensure */
+        global_audio_output->delete_source(this->output_source);
+	}
+}
+
+void VoiceClient::initialize() {
+    auto weak_this = this->_ref;
+
+    audio::initialize([weak_this]{
+        auto client = weak_this.lock();
+        if(!client) return;
+
+        assert(global_audio_output);
+        client->output_source = global_audio_output->create_source();
+        client->output_source->overflow_strategy = audio::overflow_strategy::ignore;
+        client->output_source->max_buffered_samples = (size_t) ceil(client->output_source->sample_rate * 0.5);
+        client->output_source->min_buffered_samples = (size_t) ceil(client->output_source->sample_rate * 0.04);
+
+        const auto client_ptr = &*client;
+        client->output_source->on_underflow = [client_ptr](size_t sample_count){ /* this callback will never be called when the client has been deallocated */
+            if(client_ptr->_state == state::stopping)
+                client_ptr->set_state(state::stopped);
+            else if(client_ptr->_state != state::stopped) {
+                if(client_ptr->_last_received_packet + chrono::seconds(1) < chrono::system_clock::now()) {
+                    client_ptr->set_state(state::stopped);
+                    log_warn(category::audio, tr("Client {} has a audio buffer underflow for {} samples and not received any data for one second. Stopping replay."), client_ptr->_client_id, sample_count);
+                } else {
+                    if(client_ptr->_state != state::buffering) {
+                        log_warn(category::audio, tr("Client {} has a audio buffer underflow for {} samples. Buffer again."), client_ptr->_client_id, sample_count);
+                        client_ptr->set_state(state::buffering);
+                    }
+
+                    audio::decode_event_loop->schedule(static_pointer_cast<event::EventEntry>(client_ptr->ref()));
+                }
+            }
+
+            return false;
+        };
+        client->output_source->on_overflow = [&](size_t count){
+            log_warn(category::audio, tr("Client {} has a audio buffer overflow of {}."), client->_client_id, count);
+        };
+    });
 }
 
 void VoiceClient::initialize_js_object() {
@@ -313,6 +326,11 @@ void VoiceClient::process_packet(uint16_t packet_id, const pipes::buffer_view& b
 		return;
 	}
 
+	if(!this->output_source) {
+	    /* audio hasn't been initialized yet */
+	    return;
+	}
+
 	auto& codec_data = this->codec[codec];
 	if(codec_data.state == AudioCodec::State::UNINITIALIZED)
 		this->initialize_code(codec);
@@ -374,7 +392,7 @@ void VoiceClient::process_packet(uint16_t packet_id, const pipes::buffer_view& b
 void VoiceClient::cancel_replay() {
 	log_trace(category::voice_connection, tr("Cancel replay for client {}"), this->_client_id);
 
-	this->output_source->clear();
+	if(output_source) this->output_source->clear();
 	this->set_state(state::stopped);
 	audio::decode_event_loop->cancel(static_pointer_cast<event::EventEntry>(this->ref()));
 
@@ -393,6 +411,11 @@ void VoiceClient::cancel_replay() {
 }
 
 void VoiceClient::event_execute(const std::chrono::system_clock::time_point &scheduled) {
+    if(!this->output_source) {
+        /* Audio hasn't been initialized yet. This also means there is no audio to be processed */
+        return;
+    }
+
 	static auto max_time = chrono::milliseconds(10);
 	auto reschedule{false};
 	string error;
@@ -555,6 +578,8 @@ void VoiceClient::event_execute(const std::chrono::system_clock::time_point &sch
 }
 
 void VoiceClient::initialize_code(const codec::value &audio_codec) {
+    assert(this->output_source);
+
 	string error;
 
 	auto& codec_data = this->codec[audio_codec];
@@ -590,6 +615,8 @@ void VoiceClient::initialize_code(const codec::value &audio_codec) {
 }
 
 std::shared_ptr<audio::SampleBuffer> VoiceClient::decode_buffer(const codec::value &audio_codec, const pipes::buffer_view &buffer) {
+    assert(this->output_source);
+
 	auto& codec_data = this->codec[audio_codec];
 	if(codec_data.state != AudioCodec::State::INITIALIZED_SUCCESSFULLY) {
 		log_trace(category::audio, tr("Cant decode auto buffer of codec {} because codec isn't successfully initialized (state: {})"), audio_codec, (int) codec_data.state);
diff --git a/native/serverconnection/src/connection/audio/VoiceClient.h b/native/serverconnection/src/connection/audio/VoiceClient.h
index 979c0e5..4a1cda6 100644
--- a/native/serverconnection/src/connection/audio/VoiceClient.h
+++ b/native/serverconnection/src/connection/audio/VoiceClient.h
@@ -69,6 +69,8 @@ namespace tc {
 				VoiceClient(const std::shared_ptr<VoiceConnection>& /* connection */, uint16_t /* client id */);
 				virtual ~VoiceClient();
 
+				void initialize();
+
 				inline uint16_t client_id() { return this->_client_id; }
 
 				void initialize_js_object();
@@ -140,6 +142,7 @@ namespace tc {
 				std::array<AudioCodec, codec::MAX + 1> codec{};
 				void initialize_code(const codec::value& /* codec */);
 
+				/* might be null (if audio hasn't been initialized) */
 				std::shared_ptr<audio::AudioOutputSource> output_source;
 
 				std::weak_ptr<VoiceClient> _ref;
diff --git a/native/serverconnection/src/connection/audio/VoiceConnection.cpp b/native/serverconnection/src/connection/audio/VoiceConnection.cpp
index e01b1c9..47bd55f 100644
--- a/native/serverconnection/src/connection/audio/VoiceConnection.cpp
+++ b/native/serverconnection/src/connection/audio/VoiceConnection.cpp
@@ -328,6 +328,7 @@ std::shared_ptr<VoiceClient> VoiceConnection::register_client(uint16_t client_id
 
 	client = make_shared<VoiceClient>(this->ref(), client_id);
 	client->_ref = client;
+    client->initialize();
 	this->_clients.push_back(client);
 	return client;
 }