mirror of
https://github.com/f4exb/sdrangel.git
synced 2026-03-11 18:49:35 -04:00
881 lines
36 KiB
C++
881 lines
36 KiB
C++
///////////////////////////////////////////////////////////////////////////////////
|
|
// Copyright (C) 2023 Jon Beniston, M7RCE <jon@beniston.com> //
|
|
// //
|
|
// This program is free software; you can redistribute it and/or modify //
|
|
// it under the terms of the GNU General Public License as published by //
|
|
// the Free Software Foundation as version 3 of the License, or //
|
|
// (at your option) any later version. //
|
|
// //
|
|
// This program is distributed in the hope that it will be useful, //
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
|
|
// GNU General Public License V3 for more details. //
|
|
// //
|
|
// You should have received a copy of the GNU General Public License //
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>. //
|
|
///////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include <QDebug>
|
|
|
|
#include <complex.h>
|
|
#include <cmath>
|
|
|
|
#ifndef M_PI
|
|
#define M_PI 3.14159265358979323846
|
|
#endif
|
|
|
|
#include "dsp/dspengine.h"
|
|
#include "dsp/fftfactory.h"
|
|
#include "util/db.h"
|
|
|
|
#include "freqscanner.h"
|
|
#include "freqscannersink.h"
|
|
|
|
FreqScannerSink::FreqScannerSink() :
|
|
m_channel(nullptr),
|
|
m_channelSampleRate(48000),
|
|
m_channelFrequencyOffset(0),
|
|
m_scannerSampleRate(33320),
|
|
m_centerFrequency(0),
|
|
m_messageQueueToChannel(nullptr),
|
|
m_fftSequence(-1),
|
|
m_fft(nullptr),
|
|
m_fftCounter(0),
|
|
m_fftSize(1024),
|
|
m_binsPerChannel(16),
|
|
m_averageCount(0),
|
|
m_cepstrumSequenceInverse(-1),
|
|
m_cepstrumSequenceForward(-1),
|
|
m_cepstrumFFTInverse(nullptr),
|
|
m_cepstrumFFTForward(nullptr),
|
|
m_cepstrumSize(0)
|
|
{
|
|
applySettings(m_settings, QStringList(), true);
|
|
applyChannelSettings(m_channelSampleRate, m_channelFrequencyOffset, 16, 4, true);
|
|
}
|
|
|
|
FreqScannerSink::~FreqScannerSink()
|
|
{
|
|
FFTFactory* fftFactory = DSPEngine::instance()->getFFTFactory();
|
|
|
|
if (m_fftSequence >= 0) {
|
|
fftFactory->releaseEngine(m_fftSize, false, m_fftSequence);
|
|
}
|
|
|
|
if (m_cepstrumSequenceInverse >= 0) {
|
|
fftFactory->releaseEngine(m_cepstrumSize, true, m_cepstrumSequenceInverse);
|
|
}
|
|
|
|
if (m_cepstrumSequenceForward >= 0) {
|
|
fftFactory->releaseEngine(m_cepstrumSize, false, m_cepstrumSequenceForward);
|
|
}
|
|
}
|
|
|
|
void FreqScannerSink::feed(const SampleVector::const_iterator& begin, const SampleVector::const_iterator& end)
|
|
{
|
|
Complex ci;
|
|
|
|
for (SampleVector::const_iterator it = begin; it != end; ++it)
|
|
{
|
|
Complex c(it->real(), it->imag());
|
|
c *= m_nco.nextIQ();
|
|
|
|
if (m_interpolatorDistance < 1.0f) // interpolate
|
|
{
|
|
while (!m_interpolator.interpolate(&m_interpolatorDistanceRemain, c, &ci))
|
|
{
|
|
processOneSample(ci);
|
|
m_interpolatorDistanceRemain += m_interpolatorDistance;
|
|
}
|
|
}
|
|
else // decimate (and filter)
|
|
{
|
|
if (m_interpolator.decimate(&m_interpolatorDistanceRemain, c, &ci))
|
|
{
|
|
processOneSample(ci);
|
|
m_interpolatorDistanceRemain += m_interpolatorDistance;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void FreqScannerSink::processOneSample(Complex &ci)
|
|
{
|
|
ci /= SDR_RX_SCALEF;
|
|
|
|
m_fft->in()[m_fftCounter] = ci;
|
|
m_fftCounter++;
|
|
if (m_fftCounter == m_fftSize)
|
|
{
|
|
// Apply windowing function
|
|
m_fftWindow.apply(m_fft->in());
|
|
|
|
// Perform FFT
|
|
m_fft->transform();
|
|
|
|
// Accumulate voice activity levels on individual FFT (before averaging)
|
|
// This captures sharp formant structure better than averaged spectrum
|
|
int freqCount = m_settings.m_frequencySettings.size();
|
|
if (m_voiceLevelSum.size() != freqCount) {
|
|
m_voiceLevelSum.resize(freqCount);
|
|
m_voiceLevelCount.resize(freqCount);
|
|
m_voiceLevelSum.fill(0.0);
|
|
m_voiceLevelCount.fill(0);
|
|
}
|
|
|
|
for (int i = 0; i < freqCount; i++)
|
|
{
|
|
if (m_settings.m_frequencySettings[i].m_enabled)
|
|
{
|
|
qint64 frequency = m_settings.m_frequencySettings[i].m_frequency;
|
|
qint64 startFrequency = m_centerFrequency - m_scannerSampleRate / 2;
|
|
qint64 diff = frequency - startFrequency;
|
|
float binBW = m_scannerSampleRate / (float)m_fftSize;
|
|
// qDebug() << "FreqScannerSink::processOneSample:"
|
|
// << "startFrequency" << startFrequency
|
|
// << "m_scannerSampleRate" << m_scannerSampleRate
|
|
// << "m_centerFrequency" << m_centerFrequency
|
|
// << "m_fftSize" << m_fftSize
|
|
// << "frequency" << frequency
|
|
// << "diff" << diff
|
|
// << "binBW" << binBW
|
|
// << "m_binsPerChannel" << m_binsPerChannel;
|
|
|
|
// avoid spectrum edges where there may be aliasing from half-band filters
|
|
if ((diff >= m_scannerSampleRate / 8) && (diff < m_scannerSampleRate * 7 / 8))
|
|
{
|
|
int bin = std::round(diff / binBW);
|
|
int channelBins;
|
|
if (m_settings.m_frequencySettings[i].m_channelBandwidth.isEmpty()) {
|
|
channelBins = m_binsPerChannel;
|
|
} else {
|
|
int channelBW = m_settings.getChannelBandwidth(&m_settings.m_frequencySettings[i]);
|
|
channelBins = m_fftSize / (m_scannerSampleRate / (float)channelBW);
|
|
}
|
|
|
|
Real voiceLevel = 0.0;
|
|
if (m_settings.m_voiceSquelchType == FreqScannerSettings::VoiceLsb) {
|
|
voiceLevel = voiceActivityLevel(frequency, bin, channelBins, true);
|
|
} else if (m_settings.m_voiceSquelchType == FreqScannerSettings::VoiceUsb) {
|
|
voiceLevel = voiceActivityLevel(frequency, bin, channelBins, false);
|
|
}
|
|
|
|
if (voiceLevel > 0.0) {
|
|
m_voiceLevelSum[i] += voiceLevel;
|
|
m_voiceLevelCount[i]++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Reorder (so negative frequencies are first) and average
|
|
int halfSize = m_fftSize / 2;
|
|
for (int i = 0; i < halfSize; i++) {
|
|
m_fftAverage.storeAndGetAvg(m_magSq[i], magSq(i + halfSize), i);
|
|
}
|
|
for (int i = 0; i < halfSize; i++) {
|
|
m_fftAverage.storeAndGetAvg(m_magSq[i + halfSize], magSq(i), i + halfSize);
|
|
}
|
|
|
|
if (m_fftAverage.nextAverage())
|
|
{
|
|
// Send results to channel
|
|
if (getMessageQueueToChannel() && (m_settings.m_channelBandwidth != 0) && (m_binsPerChannel != 0))
|
|
{
|
|
FreqScanner::MsgScanResult* msg = FreqScanner::MsgScanResult::create(m_fftStartTime);
|
|
QList<FreqScanner::MsgScanResult::ScanResult>& results = msg->getScanResults();
|
|
|
|
for (int i = 0; i < m_settings.m_frequencySettings.size(); i++)
|
|
{
|
|
if (m_settings.m_frequencySettings[i].m_enabled)
|
|
{
|
|
qint64 frequency = m_settings.m_frequencySettings[i].m_frequency;
|
|
qint64 startFrequency = m_centerFrequency - m_scannerSampleRate / 2;
|
|
qint64 diff = frequency - startFrequency;
|
|
float binBW = m_scannerSampleRate / (float)m_fftSize;
|
|
|
|
// Ignore results in upper and lower 12.5%, as there may be aliasing here from half-band filters
|
|
if ((diff >= m_scannerSampleRate / 8) && (diff < m_scannerSampleRate * 7 / 8))
|
|
{
|
|
int bin = std::round(diff / binBW); // Bin corresponding to the frequency
|
|
int channelBins; // Number of bins in the channel containing the frequency.
|
|
// This is either the default (m_binsPerChannel)
|
|
// or calculated based on the channel bandwidth if specified in settings for this frequency
|
|
|
|
if (m_settings.m_frequencySettings[i].m_channelBandwidth.isEmpty())
|
|
{
|
|
channelBins = m_binsPerChannel;
|
|
}
|
|
else
|
|
{
|
|
int channelBW = m_settings.getChannelBandwidth(&m_settings.m_frequencySettings[i]);
|
|
channelBins = m_fftSize / (m_scannerSampleRate / (float)channelBW);
|
|
}
|
|
|
|
// Calculate power at that frequency
|
|
Real power;
|
|
if (m_settings.m_measurement == FreqScannerSettings::PEAK) {
|
|
power = peakPower(bin, channelBins);
|
|
} else {
|
|
power = totalPower(bin, channelBins);
|
|
}
|
|
|
|
// Use averaged voice activity level from individual FFTs
|
|
Real voiceLevel = 0.0;
|
|
if ((m_settings.m_voiceSquelchType == FreqScannerSettings::VoiceLsb ||
|
|
m_settings.m_voiceSquelchType == FreqScannerSettings::VoiceUsb) &&
|
|
m_voiceLevelCount[i] > 0)
|
|
{
|
|
voiceLevel = m_voiceLevelSum[i] / m_voiceLevelCount[i];
|
|
if (voiceLevel > m_settings.m_voiceSquelchThreshold) {
|
|
qDebug() << "FreqScannerSink::processOneSample: frequency"
|
|
<< frequency + (m_settings.m_voiceSquelchType == FreqScannerSettings::VoiceLsb ? 1500 : -1500)
|
|
<< "voiceLevel" << voiceLevel << "count" << m_voiceLevelCount[i];
|
|
}
|
|
}
|
|
|
|
//qDebug() << "startFrequency:" << startFrequency << "m_scannerSampleRate:" << m_scannerSampleRate << "m_centerFrequency:" << m_centerFrequency << "frequency" << frequency << "bin" << bin << "power" << power << "voiceLevel" << voiceLevel;
|
|
FreqScanner::MsgScanResult::ScanResult result = {frequency, power, voiceLevel};
|
|
results.append(result);
|
|
}
|
|
}
|
|
}
|
|
getMessageQueueToChannel()->push(msg);
|
|
}
|
|
m_averageCount = 0;
|
|
m_fftStartTime = QDateTime::currentDateTime();
|
|
|
|
// Reset voice level accumulators for next averaging period
|
|
m_voiceLevelSum.fill(0.0);
|
|
m_voiceLevelCount.fill(0);
|
|
}
|
|
m_fftCounter = 0;
|
|
}
|
|
}
|
|
|
|
// Calculate total power in a channel containing the specified bin (i.e. sums adjacent bins in the same channel)
|
|
Real FreqScannerSink::totalPower(int bin, int channelBins) const
|
|
{
|
|
// Skip bin between halfway between channels
|
|
// Then skip first and last bins, to avoid spectral leakage (particularly at DC)
|
|
int startBin = bin - channelBins / 2 + 1 + 1;
|
|
Real magSqSum = 0.0f;
|
|
for (int i = 0; i < channelBins - 2 - 1; i++) {
|
|
int idx = startBin + i;
|
|
if ((idx < 0) || (idx >= m_fftSize)) {
|
|
continue;
|
|
}
|
|
magSqSum += m_magSq[idx];
|
|
}
|
|
Real db = CalcDb::dbPower(magSqSum);
|
|
return db;
|
|
}
|
|
|
|
// Calculate peak power in a channel containing the specified bin
|
|
Real FreqScannerSink::peakPower(int bin, int channelBins) const
|
|
{
|
|
// Skip bin between halfway between channels
|
|
// Then skip first and last bins, to avoid spectral leakage (particularly at DC)
|
|
int startBin = bin - channelBins/2 + 1 + 1;
|
|
Real maxMagSq = std::numeric_limits<Real>::min();
|
|
for (int i = 0; i < channelBins - 2 - 1; i++)
|
|
{
|
|
int idx = startBin + i;
|
|
if ((idx < 0) || (idx >= m_fftSize)) {
|
|
continue;
|
|
}
|
|
//qDebug() << "idx:" << idx << "power:" << CalcDb::dbPower(m_magSq[idx]);
|
|
maxMagSq = std::max(maxMagSq, m_magSq[idx]);
|
|
}
|
|
Real db = CalcDb::dbPower(maxMagSq);
|
|
return db;
|
|
}
|
|
|
|
Real FreqScannerSink::magSq(int bin) const
|
|
{
|
|
Complex c = m_fft->out()[bin];
|
|
Real v = c.real() * c.real() + c.imag() * c.imag();
|
|
Real magsq = v / (m_fftSize * m_fftSize);
|
|
return magsq;
|
|
}
|
|
|
|
// Compute magSq from raw FFT output with reordering (negative frequencies first)
|
|
Real FreqScannerSink::magSqFromRawFFT(int bin) const
|
|
{
|
|
// m_magSq is reordered: negative freqs first, then positive
|
|
// m_fft->out() is in standard FFT order: DC, positive freqs, negative freqs
|
|
int halfSize = m_fftSize / 2;
|
|
int fftBin;
|
|
if (bin < halfSize) {
|
|
// Negative frequencies: map to second half of FFT output
|
|
fftBin = bin + halfSize;
|
|
} else {
|
|
// Positive frequencies: map to first half of FFT output
|
|
fftBin = bin - halfSize;
|
|
}
|
|
return magSq(fftBin);
|
|
}
|
|
|
|
// Compute formant envelope using cepstral liftering
|
|
// This separates vocal tract resonances (formants) from pitch harmonics
|
|
// Method: log spectrum → IFFT → lifter → FFT → exp
|
|
void FreqScannerSink::getFormantEnvelope(int startBin, int endBin, QVector<Real>& envelope, Real *pitchHz)
|
|
{
|
|
if (pitchHz) {
|
|
*pitchHz = 0.0;
|
|
}
|
|
|
|
if (startBin < 0 || endBin >= m_fftSize || startBin > endBin) {
|
|
envelope.clear();
|
|
return;
|
|
}
|
|
|
|
int numBins = endBin - startBin + 1;
|
|
envelope.resize(numBins);
|
|
|
|
// Check if cepstral FFT engines are available and large enough
|
|
if (!m_cepstrumFFTInverse || !m_cepstrumFFTForward || numBins > m_cepstrumSize) {
|
|
// qDebug() << "FreqScannerSink::getFormantEnvelope: Cepstral FFT not available or too small"
|
|
// << "numBins:" << numBins << "cepstrumSize:" << m_cepstrumSize;
|
|
// Fallback: return simple log/exp without cepstral processing
|
|
for (int i = 0; i < numBins; i++) {
|
|
Real magSq = magSqFromRawFFT(startBin + i);
|
|
envelope[i] = std::sqrt(std::max(magSq, (Real)1e-12));
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Step 1: Compute log magnitude spectrum
|
|
QVector<Real> logMag(numBins);
|
|
Real minLog = -10.0; // Floor to avoid log(0)
|
|
Real sumMagSq = 0.0;
|
|
|
|
for (int i = 0; i < numBins; i++)
|
|
{
|
|
Real magSq = magSqFromRawFFT(startBin + i);
|
|
sumMagSq += magSq;
|
|
Real mag = std::sqrt(std::max(magSq, (Real)1e-12));
|
|
logMag[i] = std::log(mag);
|
|
if (logMag[i] < minLog) {
|
|
logMag[i] = minLog;
|
|
}
|
|
}
|
|
|
|
// qDebug() << "FreqScannerSink::getFormantEnvelope: Input spectrum check:"
|
|
// << "numBins:" << numBins
|
|
// << "sumMagSq:" << sumMagSq
|
|
// << "avgMagSq:" << (sumMagSq / numBins)
|
|
// << "first 5 logMag:" << logMag.mid(0, std::min(5, numBins));
|
|
|
|
// Step 2: Apply cepstral liftering for better source-filter separation
|
|
// Cepstral analysis separates:
|
|
// - Voice pitch (high quefrency peak)
|
|
// - Formants (low quefrency components)
|
|
// Liftering = low-pass filtering in quefrency domain
|
|
|
|
// Copy log magnitude to inverse FFT input (real data, symmetric spectrum)
|
|
// For real cepstrum, we need a symmetric spectrum:
|
|
// [DC, positive freqs, Nyquist, negative freqs (mirror)]
|
|
|
|
// DC component
|
|
m_cepstrumFFTInverse->in()[0] = Complex(logMag[0], 0.0);
|
|
|
|
// Positive frequencies
|
|
int halfBins = std::min(numBins, m_cepstrumSize / 2);
|
|
for (int i = 1; i < halfBins; i++) {
|
|
m_cepstrumFFTInverse->in()[i] = Complex(logMag[i], 0.0);
|
|
}
|
|
|
|
// Nyquist (if we have space)
|
|
if (m_cepstrumSize > halfBins) {
|
|
m_cepstrumFFTInverse->in()[halfBins] = Complex(numBins > halfBins ? logMag[halfBins] : logMag[halfBins-1], 0.0);
|
|
}
|
|
|
|
// Negative frequencies (mirror of positive)
|
|
for (int i = 1; i < halfBins; i++) {
|
|
m_cepstrumFFTInverse->in()[m_cepstrumSize - i] = Complex(logMag[i], 0.0);
|
|
}
|
|
|
|
// Zero-pad the middle if needed
|
|
for (int i = halfBins + 1; i < m_cepstrumSize - halfBins; i++) {
|
|
m_cepstrumFFTInverse->in()[i] = Complex(0.0, 0.0);
|
|
}
|
|
|
|
// Step 3: IFFT to get cepstrum (quefrency domain)
|
|
m_cepstrumFFTInverse->transform();
|
|
|
|
// Step 4: Estimate pitch from the cepstrum before liftering.
|
|
// Pitch appears as a peak at higher quefrencies (around 3-14 ms).
|
|
float binBW = m_scannerSampleRate / (float)m_fftSize;
|
|
float quefrencyResolution = 1.0f / (m_cepstrumSize * binBW); // seconds per bin in quefrency
|
|
|
|
if (pitchHz) {
|
|
const float minPitchHz = 70.0f;
|
|
const float maxPitchHz = 300.0f;
|
|
const float minQuefrency = 1.0f / maxPitchHz;
|
|
const float maxQuefrency = 1.0f / minPitchHz;
|
|
const int minBin = std::max(1, (int)std::ceil(minQuefrency / quefrencyResolution));
|
|
const int maxBin = std::min(m_cepstrumSize / 2, (int)std::floor(maxQuefrency / quefrencyResolution));
|
|
Real maxVal = 0.0;
|
|
int maxIdx = -1;
|
|
|
|
for (int i = minBin; i <= maxBin; i++) {
|
|
Real val = std::abs(m_cepstrumFFTInverse->out()[i].real());
|
|
if (val > maxVal) {
|
|
maxVal = val;
|
|
maxIdx = i;
|
|
}
|
|
}
|
|
|
|
if (maxIdx > 0) {
|
|
*pitchHz = 1.0f / (maxIdx * quefrencyResolution);
|
|
}
|
|
}
|
|
|
|
// Step 5: Apply lifter (low-pass filter in quefrency domain)
|
|
// Lifter cutoff: keep low quefrencies (formant envelope), remove high quefrencies (pitch harmonics)
|
|
// Typical pitch periods: 3-10 ms (100-330 Hz F0)
|
|
// We want to remove quefrencies corresponding to pitch harmonics
|
|
|
|
// Lifter cutoff in seconds (quefrency): keep components below this
|
|
// Use much lower cutoff - we only need to keep very low quefrencies for formant envelope
|
|
// Most formant information is in the first few quefrency bins
|
|
float lifterCutoffQuefrency = 0.002f; // 2 ms (reduced from 8 ms)
|
|
int lifterCutoffBin = std::max(1, (int)(lifterCutoffQuefrency / quefrencyResolution));
|
|
|
|
// Cap the lifter cutoff to reasonable maximum (1/4 of cepstrum size)
|
|
lifterCutoffBin = std::min(lifterCutoffBin, m_cepstrumSize / 4);
|
|
|
|
// Apply lifter: keep low quefrencies, zero out high quefrencies
|
|
// Use smooth transition (raised cosine) to reduce artifacts
|
|
int transitionBins = std::max(1, lifterCutoffBin / 4);
|
|
|
|
for (int i = 0; i < m_cepstrumSize; i++) {
|
|
Real lifterWeight = 1.0;
|
|
|
|
if (i > lifterCutoffBin + transitionBins) {
|
|
lifterWeight = 0.0; // Zero out high quefrencies
|
|
} else if (i > lifterCutoffBin) {
|
|
// Smooth transition using raised cosine
|
|
float t = (float)(i - lifterCutoffBin) / transitionBins;
|
|
lifterWeight = 0.5 * (1.0 + std::cos(M_PI * t));
|
|
}
|
|
// else: lifterWeight = 1.0 (keep low quefrencies)
|
|
|
|
m_cepstrumFFTForward->in()[i] = m_cepstrumFFTInverse->out()[i] * lifterWeight;
|
|
}
|
|
|
|
// qDebug() << "FreqScannerSink::getFormantEnvelope cepstral lifter:"
|
|
// << "cutoff bin:" << lifterCutoffBin
|
|
// << "(" << (lifterCutoffBin * quefrencyResolution * 1000.0) << "ms)"
|
|
// << "quefrency resolution:" << (quefrencyResolution * 1000.0) << "ms/bin";
|
|
|
|
// Step 6: FFT back to frequency domain (smoothed log spectrum)
|
|
m_cepstrumFFTForward->transform();
|
|
|
|
// Debug: check forward FFT output
|
|
// qDebug() << "FreqScannerSink::getFormantEnvelope: Forward FFT output check:"
|
|
// << "first 5 real:" << m_cepstrumFFTForward->out()[0].real()
|
|
// << m_cepstrumFFTForward->out()[1].real()
|
|
// << m_cepstrumFFTForward->out()[2].real()
|
|
// << m_cepstrumFFTForward->out()[3].real()
|
|
// << m_cepstrumFFTForward->out()[4].real();
|
|
|
|
// Step 7: Convert back to linear magnitude
|
|
// Take real part of FFT output and exponentiate
|
|
// IMPORTANT: Normalize by FFT size since FFT engines don't auto-normalize
|
|
Real normalization = 1.0 / m_cepstrumSize;
|
|
|
|
for (int i = 0; i < numBins; i++)
|
|
{
|
|
Real smoothedLog = m_cepstrumFFTForward->out()[i].real() * normalization;
|
|
envelope[i] = std::exp(smoothedLog);
|
|
}
|
|
|
|
// Debug: print first few envelope values
|
|
// qDebug() << "FreqScannerSink::getFormantEnvelope (cepstral) first 10 envelope values:"
|
|
// << envelope.mid(0, std::min(10, numBins));
|
|
}
|
|
|
|
void FreqScannerSink::applyChannelSettings(int channelSampleRate, int channelFrequencyOffset, int scannerSampleRate, int fftSize, int binsPerChannel, bool force)
|
|
{
|
|
qDebug() << "FreqScannerSink::applyChannelSettings:"
|
|
<< " channelSampleRate: " << channelSampleRate
|
|
<< " channelFrequencyOffset: " << channelFrequencyOffset
|
|
<< " scannerSampleRate: " << scannerSampleRate
|
|
<< " fftSize: " << fftSize
|
|
<< " binsPerChannel: " << binsPerChannel;
|
|
|
|
if ((m_channelFrequencyOffset != channelFrequencyOffset) ||
|
|
(m_channelSampleRate != channelSampleRate) || force)
|
|
{
|
|
m_nco.setFreq(-channelFrequencyOffset, channelSampleRate);
|
|
}
|
|
|
|
if ((m_channelSampleRate != channelSampleRate) || (m_scannerSampleRate != scannerSampleRate) || force)
|
|
{
|
|
m_interpolator.create(16, channelSampleRate, scannerSampleRate / 2.2); // Filter potential aliasing resulting from half-band filters
|
|
m_interpolatorDistance = (Real) channelSampleRate / (Real)scannerSampleRate;
|
|
m_interpolatorDistanceRemain = m_interpolatorDistance;
|
|
}
|
|
|
|
if ((m_fftSize != fftSize) || force)
|
|
{
|
|
FFTFactory* fftFactory = DSPEngine::instance()->getFFTFactory();
|
|
if (m_fftSequence >= 0) {
|
|
fftFactory->releaseEngine(m_fftSize, false, m_fftSequence);
|
|
}
|
|
m_fftSequence = fftFactory->getEngine(fftSize, false, &m_fft);
|
|
m_fftCounter = 0;
|
|
m_fftStartTime = QDateTime::currentDateTime();
|
|
m_fftWindow.create(FFTWindow::Hanning, fftSize);
|
|
|
|
int averages = m_settings.m_scanTime * scannerSampleRate / 2 / fftSize;
|
|
m_fftAverage.resize(fftSize, averages);
|
|
m_magSq.resize(fftSize);
|
|
|
|
// Resize voice level accumulators to match frequency count
|
|
int freqCount = m_settings.m_frequencySettings.size();
|
|
m_voiceLevelSum.resize(freqCount);
|
|
m_voiceLevelCount.resize(freqCount);
|
|
m_voiceLevelSum.fill(0.0);
|
|
m_voiceLevelCount.fill(0);
|
|
|
|
// Allocate cepstral FFT engines for formant detection
|
|
// Size needs to be power-of-2 and >= channel bins (typically ~100-200 bins)
|
|
// Use conservative size to handle most SSB channels
|
|
int maxChannelBins = std::max(binsPerChannel * 2, 256);
|
|
int cepstrumSize = 1;
|
|
while (cepstrumSize < maxChannelBins) {
|
|
cepstrumSize <<= 1;
|
|
}
|
|
|
|
if (m_cepstrumSize != cepstrumSize) {
|
|
// Release old engines if size changed
|
|
if (m_cepstrumSequenceInverse >= 0) {
|
|
fftFactory->releaseEngine(m_cepstrumSize, true, m_cepstrumSequenceInverse);
|
|
}
|
|
if (m_cepstrumSequenceForward >= 0) {
|
|
fftFactory->releaseEngine(m_cepstrumSize, false, m_cepstrumSequenceForward);
|
|
}
|
|
|
|
// Allocate new engines
|
|
m_cepstrumSequenceInverse = fftFactory->getEngine(cepstrumSize, true, &m_cepstrumFFTInverse);
|
|
m_cepstrumSequenceForward = fftFactory->getEngine(cepstrumSize, false, &m_cepstrumFFTForward);
|
|
m_cepstrumSize = cepstrumSize;
|
|
|
|
qDebug() << "FreqScannerSink::applyChannelSettings: Allocated cepstral FFT engines, size:" << cepstrumSize;
|
|
}
|
|
}
|
|
|
|
m_channelSampleRate = channelSampleRate;
|
|
m_channelFrequencyOffset = channelFrequencyOffset;
|
|
m_scannerSampleRate = scannerSampleRate;
|
|
m_fftSize = fftSize;
|
|
m_binsPerChannel = binsPerChannel;
|
|
}
|
|
|
|
void FreqScannerSink::applySettings(const FreqScannerSettings& settings, const QStringList& settingsKeys, bool force)
|
|
{
|
|
qDebug() << "FreqScannerSink::applySettings:"
|
|
<< settings.getDebugString(settingsKeys, force)
|
|
<< " force: " << force;
|
|
|
|
if (settingsKeys.contains("scanTime") || force)
|
|
{
|
|
int averages = settings.m_scanTime * m_scannerSampleRate / 2 / m_fftSize;
|
|
m_fftAverage.resize(m_fftSize, averages);
|
|
}
|
|
|
|
if (force) {
|
|
m_settings = settings;
|
|
} else {
|
|
m_settings.applySettings(settingsKeys, settings);
|
|
}
|
|
}
|
|
|
|
// Voice activity detection for SSB signals
|
|
// Detects voice by looking for formant-like structure using spectral smoothing
|
|
// Returns a value from 0.0 (no voice) to 1.0 (strong voice signature)
|
|
Real FreqScannerSink::voiceActivityLevel(qint64 freq, int bin, int channelBins, bool isLSB)
|
|
{
|
|
// Voice band in SSB is typically 100-3000 Hz from carrier
|
|
// We look for 2-4 formant peaks in the smoothed spectral envelope
|
|
|
|
int startBin = bin - channelBins / 2 + 1;
|
|
int endBin = startBin + channelBins - 1;
|
|
|
|
if (startBin < 0 || endBin >= m_fftSize) {
|
|
return 0.0;
|
|
}
|
|
|
|
// Calculate bin bandwidth in Hz
|
|
float binBW = m_scannerSampleRate / (float)m_fftSize;
|
|
|
|
// For LSB, spectrum is reversed - flip the search direction
|
|
int carrierBin = isLSB ? endBin : startBin;
|
|
|
|
// Get formant envelope using spectral smoothing
|
|
// This separates vocal tract resonances from pitch harmonics
|
|
QVector<Real> formantEnvelope;
|
|
Real pitchHz = 0.0;
|
|
getFormantEnvelope(startBin, endBin, formantEnvelope, &pitchHz);
|
|
|
|
if (formantEnvelope.isEmpty()) {
|
|
qDebug() << "FreqScannerSink::voiceActivityLevel formantEnvelope is empty!";
|
|
return 0.0;
|
|
}
|
|
|
|
// Calculate noise floor from formant envelope
|
|
Real noiseFloor = 0.0;
|
|
Real maxEnv = 0.0;
|
|
for (int i = 0; i < formantEnvelope.size(); i++) {
|
|
noiseFloor += formantEnvelope[i];
|
|
maxEnv = std::max(maxEnv, formantEnvelope[i]);
|
|
}
|
|
noiseFloor = formantEnvelope.size() > 0 ? noiseFloor / formantEnvelope.size() : 1e-12;
|
|
|
|
// For voice, we need reasonably high peaks relative to noise
|
|
// Use 4 dB (2.5x) above average as threshold for peak detection
|
|
// Additional validation: peak-to-noise ratio
|
|
// If max is not sufficiently higher than average, signal quality is poor
|
|
float peakToNoiseRatio = maxEnv / noiseFloor;
|
|
// qDebug() << "FreqScannerSink::voiceActivityLevel: noiseFloor:" << noiseFloor
|
|
// << "maxEnv:" << maxEnv << "peak-to-noise ratio:" << peakToNoiseRatio;
|
|
|
|
// Lower threshold: require at least 1.2x peak-to-noise ratio (only ~1.6dB)
|
|
// After heavy smoothing, formants appear as gentle bumps, not sharp peaks
|
|
if (peakToNoiseRatio < 1.2) {
|
|
// Signal is essentially all noise
|
|
// qDebug() << "FreqScannerSink::voiceActivityLevel: Rejected - insufficient signal (peak-to-noise:" << peakToNoiseRatio << ")";
|
|
return 0.0;
|
|
}
|
|
|
|
// For peak detection, use much lower threshold since smoothed spectral peaks are gentle
|
|
// Use 1.1x noise floor to catch formant peaks in the smoothed envelope
|
|
Real threshold = noiseFloor * 1.1;
|
|
|
|
// Find formant peaks in the smoothed envelope
|
|
QVector<int> formantBins;
|
|
QVector<Real> formantMags;
|
|
|
|
// Simple peak detection in formant envelope
|
|
for (int i = 1; i < formantEnvelope.size() - 1; i++)
|
|
{
|
|
Real prev = formantEnvelope[i - 1];
|
|
Real curr = formantEnvelope[i];
|
|
Real next = formantEnvelope[i + 1];
|
|
|
|
// Local maximum above threshold
|
|
if (curr > prev && curr > next && curr > threshold)
|
|
{
|
|
int absBin = startBin + i;
|
|
float freqOffset = std::abs(absBin - carrierBin) * binBW;
|
|
|
|
// Only include peaks within SSB voice bandwidth (100-3000 Hz from carrier)
|
|
if (freqOffset >= 100.0 && freqOffset <= 3000.0)
|
|
{
|
|
formantBins.append(absBin);
|
|
formantMags.append(curr);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Voice requires 2-4 formants
|
|
if (formantBins.size() < 2) {
|
|
// qDebug() << "FreqScannerSink::voiceActivityLevel: Not enough peaks detected" << formantBins.size()
|
|
// << "threshold:" << threshold << "envelope size:" << formantEnvelope.size();
|
|
return 0.0;
|
|
}
|
|
|
|
// qDebug() << "FreqScannerSink::voiceActivityLevel: Detected" << formantBins.size() << "peaks above threshold";
|
|
|
|
// Sort formants by frequency offset (not bin order) to handle LSB reversal
|
|
QVector<float> formantFreqs;
|
|
QVector<int> formantIndices;
|
|
|
|
for (int i = 0; i < formantBins.size(); i++)
|
|
{
|
|
float freqOffset = std::abs(formantBins[i] - carrierBin) * binBW;
|
|
formantFreqs.append(freqOffset);
|
|
formantIndices.append(i);
|
|
}
|
|
|
|
// Simple insertion sort by frequency
|
|
for (int i = 1; i < formantFreqs.size(); i++)
|
|
{
|
|
for (int j = i; j > 0 && formantFreqs[j] < formantFreqs[j - 1]; j--)
|
|
{
|
|
std::swap(formantFreqs[j], formantFreqs[j - 1]);
|
|
std::swap(formantIndices[j], formantIndices[j - 1]);
|
|
}
|
|
}
|
|
|
|
// Merge peaks that are too close together (within 400 Hz)
|
|
// Real formants have minimum separation of 400-500 Hz in SSB voice
|
|
// So anything closer is ripple within a single formant
|
|
// We keep the peak with highest magnitude and remove others
|
|
QVector<float> mergedFormantFreqs;
|
|
QVector<int> mergedFormantIndices;
|
|
|
|
const float minFormantSpacing = 400.0; // Hz - minimum spacing between real formants
|
|
|
|
for (int i = 0; i < formantFreqs.size(); i++)
|
|
{
|
|
if (i == 0 || formantFreqs[i] - mergedFormantFreqs.back() >= minFormantSpacing)
|
|
{
|
|
// This is a new formant (far enough from previous)
|
|
mergedFormantFreqs.append(formantFreqs[i]);
|
|
mergedFormantIndices.append(formantIndices[i]);
|
|
}
|
|
else
|
|
{
|
|
// This peak is too close to the previous one - merge by keeping highest magnitude
|
|
int prevIdx = mergedFormantIndices.back();
|
|
int currIdx = formantIndices[i];
|
|
if (formantMags[currIdx] > formantMags[prevIdx])
|
|
{
|
|
// Replace with higher magnitude peak
|
|
mergedFormantFreqs.back() = formantFreqs[i];
|
|
mergedFormantIndices.back() = currIdx;
|
|
}
|
|
}
|
|
}
|
|
|
|
formantFreqs = mergedFormantFreqs;
|
|
formantIndices = mergedFormantIndices;
|
|
|
|
// qDebug() << "FreqScannerSink::voiceActivityLevel: Merged to" << formantFreqs.size() << "formants:" << formantFreqs;
|
|
|
|
// After merging, we should still have at least 2 formants for voice
|
|
if (formantFreqs.size() < 2) {
|
|
// qDebug() << "FreqScannerSink::voiceActivityLevel: Not enough formants after merging";
|
|
return 0.0;
|
|
}
|
|
|
|
// Check formant spacing (voice formants should be 400-1500 Hz apart)
|
|
// F1-F2 spacing is typically 600-1200 Hz
|
|
bool goodSpacing = false;
|
|
for (int i = 0; i < formantFreqs.size() - 1; i++)
|
|
{
|
|
float spacing = formantFreqs[i + 1] - formantFreqs[i];
|
|
if (spacing >= 400.0 && spacing <= 1500.0) {
|
|
goodSpacing = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!goodSpacing) {
|
|
// qDebug() << "FreqScannerSink::voiceActivityLevel: Bad formant spacing" << formantFreqs;
|
|
return 0.0; // Formants too close or too far apart
|
|
}
|
|
|
|
// Check for F1 formant in expected range (300-1000 Hz from carrier)
|
|
// This is critical for voice detection
|
|
// F1 should be the lowest frequency formant
|
|
bool hasF1 = false;
|
|
Real f1Mag = 0.0;
|
|
int f1Idx = -1;
|
|
|
|
if (formantFreqs.size() > 0 && formantFreqs[0] >= 300.0 && formantFreqs[0] <= 1000.0)
|
|
{
|
|
hasF1 = true;
|
|
f1Idx = formantIndices[0];
|
|
f1Mag = formantMags[f1Idx];
|
|
}
|
|
|
|
if (!hasF1) {
|
|
// qDebug() << "FreqScannerSink::voiceActivityLevel: No valid F1 formant, lowest freq:" << (formantFreqs.size() > 0 ? formantFreqs[0] : -1);
|
|
return 0.0; // No F1 formant - not voice or mistuned
|
|
}
|
|
|
|
// Check for F2 formant in expected range (900-2500 Hz from carrier)
|
|
// F2 should be higher frequency than F1 AND 400-1500 Hz away from F1
|
|
bool hasF2 = false;
|
|
Real f2Mag = 0.0;
|
|
int f2Idx = -1;
|
|
float f2Freq = 0.0;
|
|
|
|
for (int i = 1; i < formantFreqs.size(); i++)
|
|
{
|
|
float freqOffset = formantFreqs[i];
|
|
float f1ToF2Spacing = freqOffset - formantFreqs[0];
|
|
|
|
// F2 must be: in range, higher than F1, and properly spaced from F1
|
|
if (freqOffset >= 900.0 && freqOffset <= 2500.0 &&
|
|
f1ToF2Spacing >= 400.0 && f1ToF2Spacing <= 1500.0)
|
|
{
|
|
hasF2 = true;
|
|
f2Idx = formantIndices[i];
|
|
f2Freq = freqOffset;
|
|
f2Mag = formantMags[f2Idx];
|
|
break; // Take the first valid F2
|
|
}
|
|
}
|
|
|
|
if (!hasF2) {
|
|
// qDebug() << "FreqScannerSink::voiceActivityLevel: No valid F2 formant, candidates not in range/spacing";
|
|
return 0.0; // No F2 formant - not voice or mistuned
|
|
}
|
|
|
|
// Calculate voice activity score based on formant characteristics
|
|
// Voice is indicated by presence of F1 and F2 formants - this is the primary voice signature
|
|
// Harmonics are less reliable in SSB due to spectral properties and noise
|
|
float score = 0.0;
|
|
|
|
// Base score from number of formants (2-4 formants typical for voice)
|
|
// Formants are the most reliable voice indicator
|
|
// Use merged formant count, not raw peak count
|
|
float formantScore = std::min(formantFreqs.size() / 3.0f, 1.0f);
|
|
score += formantScore * 0.6; // 60% weight
|
|
|
|
// Score from formant magnitude (strong formants = strong voice)
|
|
// Higher magnitudes indicate clearer voice detection
|
|
// Make this threshold appropriately high to prefer strong signals
|
|
float magnitudeScore = std::min((float)(f1Mag + f2Mag) / (float)(noiseFloor * 6.0), 1.0f);
|
|
score += magnitudeScore * 0.4; // 40% weight
|
|
|
|
// Apply a soft pitch-based weighting. Pitch helps detect detuning, but should not gate voice.
|
|
const float minPitchHz = 70.0f;
|
|
const float maxPitchHz = 300.0f;
|
|
const float softMinPitchHz = 50.0f;
|
|
const float softMaxPitchHz = 400.0f;
|
|
float pitchScore = 0.7f;
|
|
|
|
if (pitchHz > 0.0f) {
|
|
if (pitchHz >= minPitchHz && pitchHz <= maxPitchHz) {
|
|
pitchScore = 1.0f;
|
|
} else if (pitchHz >= softMinPitchHz && pitchHz <= softMaxPitchHz) {
|
|
if (pitchHz < minPitchHz) {
|
|
pitchScore = 0.7f + 0.3f * (pitchHz - softMinPitchHz) / (minPitchHz - softMinPitchHz);
|
|
} else {
|
|
pitchScore = 0.7f + 0.3f * (softMaxPitchHz - pitchHz) / (softMaxPitchHz - maxPitchHz);
|
|
}
|
|
} else {
|
|
pitchScore = 0.6f;
|
|
}
|
|
}
|
|
|
|
score *= pitchScore;
|
|
|
|
// Clamp to [0, 1]
|
|
score = std::max(0.0f, std::min(score, 1.0f));
|
|
|
|
// if (score > m_settings.m_voiceSquelchThreshold) {
|
|
// qDebug() << "FreqScannerSink::voiceActivityLevel:"
|
|
// << "freq:" << freq + (isLSB ? 1500 : -1500) << "Hz"
|
|
// << "formants:" << formantFreqs.size() << "(merged from" << formantBins.size() << "peaks)"
|
|
// << "F1:" << (hasF1 ? QString::number(formantFreqs[0], 'f', 0) : "none") << "Hz"
|
|
// << "F2:" << (hasF2 ? QString::number(f2Freq, 'f', 0) : "none") << "Hz"
|
|
// << "spacing:" << (hasF2 ? QString::number(f2Freq - formantFreqs[0], 'f', 0) : "none") << "Hz"
|
|
// << "peak-to-noise:" << (maxEnv / noiseFloor)
|
|
// << "noiseFloor:" << noiseFloor << "threshold:" << threshold
|
|
// << "f1Mag:" << f1Mag << "f2Mag:" << f2Mag
|
|
// << "pitchHz:" << pitchHz << "pitchScore:" << pitchScore
|
|
// << "score:" << score;
|
|
// }
|
|
|
|
return score;
|
|
}
|
|
|