TeaSpeak-Client/native/serverconnection/src/connection/audio/AudioSender.cpp

270 lines
9.7 KiB
C++

#include "AudioSender.h"
#include "VoiceConnection.h"
#include "../ServerConnection.h"
#include "../../audio/AudioEventLoop.h"
#include "../../audio/AudioMerger.h"
#include "../../audio/AudioReframer.h"
using namespace std;
using namespace tc;
using namespace tc::audio;
using namespace tc::audio::codec;
using namespace tc::connection;
VoiceSender::VoiceSender(tc::connection::VoiceConnection *handle) : handle{handle} {}
VoiceSender::~VoiceSender() {
/* Note: We can't be within the event loop since if we were we would have a shared reference*/
audio::encode_event_loop->cancel(dynamic_pointer_cast<event::EventEntry>(this->_ref.lock()));
{
lock_guard buffer_lock{this->raw_audio_buffer_mutex};
while(this->raw_audio_buffers_head) {
auto buffer = std::exchange(this->raw_audio_buffers_head, this->raw_audio_buffers_head->next);
buffer->~AudioFrame();
::free(this->raw_audio_buffers_head);
}
this->raw_audio_buffers_tail = &this->raw_audio_buffers_head;
}
}
void VoiceSender::set_voice_send_enabled(bool flag) {
this->voice_send_enabled = flag;
}
void VoiceSender::send_data(const float *data, size_t samples, size_t rate, size_t channels) {
if(!this->voice_send_enabled) {
log_warn(category::voice_connection, tr("Dropping raw audio frame because voice sending has been disabled!"));
return;
}
/* aligned for float values */
const auto aligned_frame_size{((sizeof(AudioFrame) + 3) / sizeof(float)) * sizeof(float)};
auto frame = (AudioFrame*) malloc(aligned_frame_size + samples * channels * sizeof(float));
new (frame) AudioFrame{};
frame->sample_count = samples;
frame->sample_rate = rate;
frame->channels = channels;
frame->buffer = (float*) frame + aligned_frame_size / sizeof(float);
memcpy(frame->buffer, data, samples * channels * sizeof(float));
frame->timestamp = chrono::system_clock::now();
{
lock_guard buffer_lock(this->raw_audio_buffer_mutex);
*this->raw_audio_buffers_tail = frame;
this->raw_audio_buffers_tail = &frame->next;
}
audio::encode_event_loop->schedule(dynamic_pointer_cast<event::EventEntry>(this->_ref.lock()));
}
void VoiceSender::send_stop() {
auto frame = (AudioFrame*) malloc(sizeof(AudioFrame));
new (frame) AudioFrame{};
frame->timestamp = chrono::system_clock::now();
{
lock_guard buffer_lock{this->raw_audio_buffer_mutex};
*this->raw_audio_buffers_tail = frame;
this->raw_audio_buffers_tail = &frame->next;
}
audio::encode_event_loop->schedule(dynamic_pointer_cast<event::EventEntry>(this->_ref.lock()));
}
void VoiceSender::finalize() {
auto execute_lock = this->execute_lock(true);
this->handle = nullptr;
}
void VoiceSender::event_execute(const std::chrono::system_clock::time_point &point) {
static auto max_time = chrono::milliseconds(10);
bool reschedule = false;
auto now = chrono::system_clock::now();
while(true) {
std::unique_lock buffer_lock{this->raw_audio_buffer_mutex};
if(!this->raw_audio_buffers_head) {
break;
}
auto next_buffer = std::exchange(this->raw_audio_buffers_head, this->raw_audio_buffers_head->next);
if(!this->raw_audio_buffers_head) {
assert(this->raw_audio_buffers_tail == &next_buffer->next);
this->raw_audio_buffers_tail = &this->raw_audio_buffers_head;
}
buffer_lock.unlock();
//TODO: Drop too old buffers!
if(this->handle) {
this->encode_raw_frame(next_buffer);
}
next_buffer->~AudioFrame();
::free(next_buffer);
if(chrono::system_clock::now() - now > max_time) {
reschedule = true;
break;
}
}
if(reschedule) {
log_warn(category::voice_connection, tr("Audio data decode will take longer than {} us. Enqueueing for later"), chrono::duration_cast<chrono::microseconds>(max_time).count());
audio::decode_event_loop->schedule(dynamic_pointer_cast<event::EventEntry>(this->_ref.lock()));
}
}
constexpr static auto kTempBufferMaxSampleCount{1024 * 8};
void VoiceSender::encode_raw_frame(const AudioFrame* frame) {
if(frame->sample_rate == 0) {
/* Audio sequence end */
this->audio_sequence_no = 0;
auto codec_protocol_id = audio::codec::audio_codec_to_protocol_id(this->current_codec);
if(codec_protocol_id.has_value()) {
this->flush_current_codec();
if(this->codec_encoder) {
this->codec_encoder->reset_sequence();
}
auto server = this->handle->handle();
server->send_voice_data(nullptr, 0, *codec_protocol_id, false);
}
return;
}
if(this->current_codec != this->target_codec_) {
auto codec_protocol_id = audio::codec::audio_codec_to_protocol_id(this->target_codec_);
if(!codec_protocol_id.has_value()) {
/* we can't send it so no need to initialize it */
return;
}
this->flush_current_codec();
this->audio_sequence_no = 0;
this->codec_resampler = nullptr;
this->codec_reframer = nullptr;
this->codec_encoder = nullptr;
this->current_codec = this->target_codec_;
if(!audio::codec::audio_encode_supported(this->current_codec)) {
log_warn(category::voice_connection, tr("Audio sender set to codec where encoding is not supported. Do not send any audio data."));
return;
}
this->codec_encoder = audio::codec::create_audio_encoder(this->current_codec);
if(!this->codec_encoder) {
log_error(category::voice_connection, tr("Failed to allocate new audio encoder for codec {}"), (uint32_t) this->target_codec_);
return;
}
std::string error{};
if(!this->codec_encoder->initialize(error)) {
log_error(category::voice_connection, tr("Failed to initialize auto encoder (codec {}) {}"), (uint32_t) this->target_codec_, error);
this->codec_encoder = nullptr;
return;
}
}
if(!this->codec_encoder) {
/* Codec failed to initialize */
return;
}
const auto codec_channel_count = this->codec_encoder->channel_count();
const auto codec_sample_rate = this->codec_encoder->sample_rate();
float temp_buffer[kTempBufferMaxSampleCount];
size_t current_sample_count{frame->sample_count};
float* current_sample_buffer;
if(frame->channels != codec_channel_count) {
assert(kTempBufferMaxSampleCount >= frame->sample_count * codec_channel_count);
if(!audio::merge::merge_channels_interleaved(temp_buffer, codec_channel_count, frame->buffer, frame->channels, frame->sample_count)) {
log_warn(category::voice_connection, tr("Failed to merge channels to output stream channel count! Dropping local voice packet"));
return;
}
current_sample_buffer = temp_buffer;
} else {
current_sample_buffer = frame->buffer;
}
if(frame->sample_rate != codec_sample_rate) {
if(!this->codec_resampler || this->codec_resampler->input_rate() != frame->sample_rate) {
this->codec_resampler = std::make_unique<audio::AudioResampler>(frame->sample_rate, codec_sample_rate, codec_channel_count);
}
size_t resampled_sample_count{this->codec_resampler->estimated_output_size(frame->sample_count)};
assert(kTempBufferMaxSampleCount >= resampled_sample_count * codec_channel_count);
if(!this->codec_resampler->process(temp_buffer, current_sample_buffer, frame->sample_count, resampled_sample_count)) {
log_error(category::voice_connection, tr("Failed to resample buffer. Dropping audio frame"));
return;
}
current_sample_buffer = temp_buffer;
current_sample_count = resampled_sample_count;
}
if(!this->codec_reframer) {
this->codec_reframer = std::make_unique<audio::AudioReframer>(codec_channel_count, (size_t) (0.02 * codec_sample_rate));
this->codec_reframer->on_frame = [&](const float* sample_buffer) {
assert(this->codec_reframer);
this->handle_network_frame(sample_buffer, this->codec_reframer->target_size(), false);
};
this->codec_reframer->on_flush = [&](const float* sample_buffer, size_t sample_count) {
this->handle_network_frame(sample_buffer, sample_count, true);
};
}
this->codec_reframer->process(current_sample_buffer, current_sample_count);
}
constexpr static auto kMaxPacketSize{1500};
void VoiceSender::handle_network_frame(const float *sample_buffer, size_t sample_count, bool is_flush) {
assert(this->codec_encoder);
auto codec_protocol_id = audio::codec::audio_codec_to_protocol_id(this->current_codec);
if(!codec_protocol_id.has_value()) {
return;
}
//log_trace(category::voice_connection, tr("Encoding audio chunk of {}/{} aka {}ms with codec {}"),
// sample_count, this->codec_encoder->sample_rate(), sample_count * 1000 / this->codec_encoder->sample_rate(), *this->current_codec_);
char packet_buffer[kMaxPacketSize];
size_t packet_size{kMaxPacketSize};
EncoderBufferInfo buffer_info{};
buffer_info.flush_encoder = is_flush;
buffer_info.sample_count = sample_count;
buffer_info.head_sequence = this->audio_sequence_no++ < 5;
std::string error{};
if(!this->codec_encoder->encode(error, packet_buffer, packet_size, buffer_info, sample_buffer)) {
log_error(category::voice_connection, tr("Failed to encode voice: {}"), error);
return;
}
if(!packet_size) {
/* No audio packet created */
return;
}
auto server = this->handle->handle();
server->send_voice_data(packet_buffer, packet_size, *codec_protocol_id, buffer_info.head_sequence);
}
void VoiceSender::flush_current_codec() {
if(!this->codec_reframer) {
return;
}
this->codec_reframer->flush();
}