DATV: soft LDPC (1)

2021-03-02 06:02:38 +01:00 · 2021-03-02 06:02:38 +01:00 · dc8f3ee9d9
parent 811f1462aa
commit dc8f3ee9d9
29 changed files with 14815 additions and 23 deletions
--- a/plugins/channelrx/demoddatv/CMakeLists.txt
+++ b/plugins/channelrx/demoddatv/CMakeLists.txt
@ -18,6 +18,7 @@ set(datv_SOURCES
    leansdr/framework.cpp
    leansdr/math.cpp
    leansdr/sdr.cpp
+    ldpctool/tables_handler.cpp
    datvdemodgui.ui
 )

@ -41,6 +42,10 @@ set(datv_HEADERS
    leansdr/framework.h
    leansdr/math.h
    leansdr/sdr.h
+    ldpctool/ldpc.h
+    ldpctool/dvb_s2_tables.h
+    ldpctool/dvb_s2x_tables.h
+    ldpctool/dvb_t2_tables.h
 )

 include_directories(
--- a/plugins/channelrx/demoddatv/datvdemodsink.cpp
+++ b/plugins/channelrx/demoddatv/datvdemodsink.cpp
@ -420,6 +420,11 @@ void DATVDemodSink::CleanUpDATVFramework(bool blnRelease)
            delete (leansdr::s2_fecdec<bool, leansdr::hard_sb>*) r_fecdec;
        }

+        if(r_fecdecsoft != nullptr)
+        {
+            delete (leansdr::s2_fecdec_soft<leansdr::llr_t,leansdr::llr_sb>*) r_fecdecsoft;
+        }
+
        if(p_deframer != nullptr)
        {
            delete (leansdr::s2_deframer*) p_deframer;
@ -533,6 +538,7 @@ void DATVDemodSink::CleanUpDATVFramework(bool blnRelease)
    p_bbframes = nullptr;
    p_s2_deinterleaver = nullptr;
    r_fecdec = nullptr;
+    r_fecdecsoft = nullptr;
    p_deframer = nullptr;
    r_scope_symbols_dvbs2 = nullptr;
 }
@ -1118,32 +1124,55 @@ void DATVDemodSink::InitDATVS2Framework()

    p_bbframes = new leansdr::pipebuf<leansdr::bbframe>(m_objScheduler, "BB frames", BUF_FRAMES);

-    p_fecframes = new leansdr::pipebuf< leansdr::fecframe<leansdr::hard_sb> >(m_objScheduler, "FEC frames", BUF_FRAMES);
+    // p_fecframes = new leansdr::pipebuf< leansdr::fecframe<leansdr::hard_sb> >(m_objScheduler, "FEC frames", BUF_FRAMES);

-    p_s2_deinterleaver = new leansdr::s2_deinterleaver<leansdr::llr_ss,leansdr::hard_sb>(
-        m_objScheduler,
-        *(leansdr::pipebuf< leansdr::plslot<leansdr::llr_ss> > *) p_slots_dvbs2,
-        *(leansdr::pipebuf< leansdr::fecframe<leansdr::hard_sb> > * ) p_fecframes
-    );
+    // p_s2_deinterleaver = new leansdr::s2_deinterleaver<leansdr::llr_ss,leansdr::hard_sb>(
+    //     m_objScheduler,
+    //     *(leansdr::pipebuf< leansdr::plslot<leansdr::llr_ss> > *) p_slots_dvbs2,
+    //     *(leansdr::pipebuf< leansdr::fecframe<leansdr::hard_sb> > * ) p_fecframes
+    // );

    p_vbitcount= new leansdr::pipebuf<int>(m_objScheduler, "Bits processed", BUF_S2PACKETS);
    p_verrcount = new leansdr::pipebuf<int>(m_objScheduler, "Bits corrected", BUF_S2PACKETS);

-    r_fecdec =  new leansdr::s2_fecdec<bool, leansdr::hard_sb>(
-        m_objScheduler, *(leansdr::pipebuf< leansdr::fecframe<leansdr::hard_sb> > * ) p_fecframes,
-        *(leansdr::pipebuf<leansdr::bbframe> *) p_bbframes,
-        p_vbitcount,
-        p_verrcount
-    );
-    leansdr::s2_fecdec<bool, leansdr::hard_sb> *fecdec = (leansdr::s2_fecdec<bool, leansdr::hard_sb> * ) r_fecdec;
-
-    fecdec->bitflips=m_settings.m_maxBitflips;
-
-    /*
-    fecdec->bitflips = cfg.ldpc_bf; //int TODO
-    if ( ! cfg.ldpc_bf )
-    fprintf(stderr, "Warning: No LDPC error correction selected.\n")
-    */
+    if (m_settings.m_softLDPC)
+    {
+        // External LDPC decoder mode.
+        // Deinterleave into soft bits.
+        p_fecframes = new leansdr::pipebuf<leansdr::fecframe<leansdr::llr_sb> >(m_objScheduler, "FEC frames", BUF_FRAMES);
+        p_s2_deinterleaver = new leansdr::s2_deinterleaver<leansdr::llr_ss, leansdr::llr_sb>(
+            m_objScheduler,
+            *(leansdr::pipebuf< leansdr::plslot<leansdr::llr_ss> > *) p_slots_dvbs2,
+            *(leansdr::pipebuf< leansdr::fecframe<leansdr::llr_sb> > * ) p_fecframes
+        );
+        r_fecdecsoft = new leansdr::s2_fecdec_soft<leansdr::llr_t,leansdr::llr_sb>(
+            m_objScheduler, *(leansdr::pipebuf< leansdr::fecframe<leansdr::llr_sb> > * ) p_fecframes,
+            *(leansdr::pipebuf<leansdr::bbframe> *) p_bbframes,
+            m_modcodModulation < 0 ? 0 : m_modcodModulation,
+            true, 5,
+            p_vbitcount,
+            p_verrcount
+        );
+    }
+    else
+    {
+        // Bit-flipping mode.
+        // Deinterleave into hard bits.
+        p_fecframes = new leansdr::pipebuf< leansdr::fecframe<leansdr::hard_sb> >(m_objScheduler, "FEC frames", BUF_FRAMES);
+        p_s2_deinterleaver = new leansdr::s2_deinterleaver<leansdr::llr_ss,leansdr::hard_sb>(
+            m_objScheduler,
+            *(leansdr::pipebuf< leansdr::plslot<leansdr::llr_ss> > *) p_slots_dvbs2,
+            *(leansdr::pipebuf< leansdr::fecframe<leansdr::hard_sb> > * ) p_fecframes
+        );
+        r_fecdec =  new leansdr::s2_fecdec<bool, leansdr::hard_sb>(
+            m_objScheduler, *(leansdr::pipebuf< leansdr::fecframe<leansdr::hard_sb> > * ) p_fecframes,
+            *(leansdr::pipebuf<leansdr::bbframe> *) p_bbframes,
+            p_vbitcount,
+            p_verrcount
+        );
+        leansdr::s2_fecdec<bool, leansdr::hard_sb> *fecdec = (leansdr::s2_fecdec<bool, leansdr::hard_sb> * ) r_fecdec;
+        fecdec->bitflips=m_settings.m_maxBitflips;
+    }

    // Deframe BB frames to TS packets
    p_lock = new leansdr::pipebuf<int> (m_objScheduler, "lock", BUF_SLOW);
--- a/plugins/channelrx/demoddatv/datvdemodsink.h
+++ b/plugins/channelrx/demoddatv/datvdemodsink.h
@ -224,6 +224,7 @@ private:
    void *p_bbframes;
    void *p_s2_deinterleaver;
    void *r_fecdec;
+    void *r_fecdecsoft;
    void *p_deframer;

    //DECIMATION
--- a/plugins/channelrx/demoddatv/ldpctool/algorithms.h
+++ b/plugins/channelrx/demoddatv/ldpctool/algorithms.h
@ -0,0 +1,416 @@
+/*
+SIMD-ified LDPC algorithms
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#ifndef ALGORITHMS_HH
+#define ALGORITHMS_HH
+
+#include "generic.h"
+#include "exclusive_reduce.h"
+
+namespace ldpctool {
+
+template <typename VALUE, int WIDTH>
+struct SelfCorrectedUpdate<SIMD<VALUE, WIDTH>>
+{
+	typedef SIMD<VALUE, WIDTH> TYPE;
+	static void update(TYPE *a, TYPE b)
+	{
+		*a = vreinterpret<TYPE>(vand(vmask(b), vorr(vceqz(*a), veor(vcgtz(*a), vcltz(b)))));
+	}
+};
+
+template <typename VALUE, int WIDTH, typename UPDATE>
+struct MinSumAlgorithm<SIMD<VALUE, WIDTH>, UPDATE>
+{
+	typedef SIMD<VALUE, WIDTH> TYPE;
+	static TYPE zero()
+	{
+		return vzero<TYPE>();
+	}
+	static TYPE one()
+	{
+		return vdup<TYPE>(1);
+	}
+	static TYPE min(TYPE a, TYPE b)
+	{
+		return vmin(a, b);
+	}
+	static TYPE sign(TYPE a, TYPE b)
+	{
+		return vsign(a, b);
+	}
+	static void finalp(TYPE *links, int cnt)
+	{
+		TYPE mags[cnt], mins[cnt];
+		for (int i = 0; i < cnt; ++i)
+			mags[i] = vabs(links[i]);
+		CODE::exclusive_reduce(mags, mins, cnt, min);
+
+		TYPE signs[cnt];
+		CODE::exclusive_reduce(links, signs, cnt, sign);
+
+		for (int i = 0; i < cnt; ++i)
+			links[i] = sign(mins[i], signs[i]);
+	}
+	static TYPE add(TYPE a, TYPE b)
+	{
+		return vadd(a, b);
+	}
+	static TYPE sub(TYPE a, TYPE b)
+	{
+		return vsub(a, b);
+	}
+	static bool bad(TYPE v, int blocks)
+	{
+		auto tmp = vcgtz(v);
+		for (int i = 0; i < blocks; ++i)
+			if (!tmp.v[i])
+				return true;
+		return false;
+	}
+	static void update(TYPE *a, TYPE b)
+	{
+		UPDATE::update(a, b);
+	}
+};
+
+template <int WIDTH, typename UPDATE>
+struct MinSumAlgorithm<SIMD<int8_t, WIDTH>, UPDATE>
+{
+	typedef int8_t VALUE;
+	typedef SIMD<VALUE, WIDTH> TYPE;
+	static TYPE zero()
+	{
+		return vzero<TYPE>();
+	}
+	static TYPE one()
+	{
+		return vdup<TYPE>(1);
+	}
+	static TYPE sign(TYPE a, TYPE b)
+	{
+		return vsign(a, b);
+	}
+	static TYPE eor(TYPE a, TYPE b)
+	{
+		return vreinterpret<TYPE>(veor(vmask(a), vmask(b)));
+	}
+	static TYPE orr(TYPE a, TYPE b)
+	{
+		return vreinterpret<TYPE>(vorr(vmask(a), vmask(b)));
+	}
+	static TYPE other(TYPE a, TYPE b, TYPE c)
+	{
+		return vreinterpret<TYPE>(vbsl(vceq(a, b), vmask(c), vmask(b)));
+	}
+	static void finalp(TYPE *links, int cnt)
+	{
+		TYPE mags[cnt];
+		for (int i = 0; i < cnt; ++i)
+			mags[i] = vqabs(links[i]);
+
+		TYPE mins[2];
+		mins[0] = vmin(mags[0], mags[1]);
+		mins[1] = vmax(mags[0], mags[1]);
+		for (int i = 2; i < cnt; ++i) {
+			mins[1] = vmin(mins[1], vmax(mins[0], mags[i]));
+			mins[0] = vmin(mins[0], mags[i]);
+		}
+
+		TYPE signs = links[0];
+		for (int i = 1; i < cnt; ++i)
+			signs = eor(signs, links[i]);
+
+		for (int i = 0; i < cnt; ++i)
+			links[i] = sign(other(mags[i], mins[0], mins[1]), orr(eor(signs, links[i]), vdup<TYPE>(127)));
+	}
+	static TYPE add(TYPE a, TYPE b)
+	{
+		return vqadd(a, b);
+	}
+	static TYPE sub(TYPE a, TYPE b)
+	{
+		return vqsub(a, b);
+	}
+	static bool bad(TYPE v, int blocks)
+	{
+		auto tmp = vcgtz(v);
+		for (int i = 0; i < blocks; ++i)
+			if (!tmp.v[i])
+				return true;
+		return false;
+	}
+	static void update(TYPE *a, TYPE b)
+	{
+		UPDATE::update(a, vmin(vmax(b, vdup<TYPE>(-32)), vdup<TYPE>(31)));
+	}
+};
+
+template <typename VALUE, int WIDTH, typename UPDATE, int FACTOR>
+struct OffsetMinSumAlgorithm<SIMD<VALUE, WIDTH>, UPDATE, FACTOR>
+{
+	typedef SIMD<VALUE, WIDTH> TYPE;
+	static TYPE zero()
+	{
+		return vzero<TYPE>();
+	}
+	static TYPE one()
+	{
+		return vdup<TYPE>(1);
+	}
+	static TYPE min(TYPE a, TYPE b)
+	{
+		return vmin(a, b);
+	}
+	static TYPE sign(TYPE a, TYPE b)
+	{
+		return vsign(a, b);
+	}
+	static void finalp(TYPE *links, int cnt)
+	{
+		TYPE beta = vdup<TYPE>(0.5 * FACTOR);
+		TYPE mags[cnt], mins[cnt];
+		for (int i = 0; i < cnt; ++i)
+			mags[i] = vmax(vsub(vabs(links[i]), beta), vzero<TYPE>());
+		CODE::exclusive_reduce(mags, mins, cnt, min);
+
+		TYPE signs[cnt];
+		CODE::exclusive_reduce(links, signs, cnt, sign);
+
+		for (int i = 0; i < cnt; ++i)
+			links[i] = sign(mins[i], signs[i]);
+	}
+	static TYPE add(TYPE a, TYPE b)
+	{
+		return vadd(a, b);
+	}
+	static TYPE sub(TYPE a, TYPE b)
+	{
+		return vsub(a, b);
+	}
+	static bool bad(TYPE v, int blocks)
+	{
+		auto tmp = vcgtz(v);
+		for (int i = 0; i < blocks; ++i)
+			if (!tmp.v[i])
+				return true;
+		return false;
+	}
+	static void update(TYPE *a, TYPE b)
+	{
+		UPDATE::update(a, b);
+	}
+};
+
+template <int WIDTH, typename UPDATE, int FACTOR>
+struct OffsetMinSumAlgorithm<SIMD<int8_t, WIDTH>, UPDATE, FACTOR>
+{
+	typedef int8_t VALUE;
+	typedef SIMD<VALUE, WIDTH> TYPE;
+	static TYPE zero()
+	{
+		return vzero<TYPE>();
+	}
+	static TYPE one()
+	{
+		return vdup<TYPE>(1);
+	}
+	static TYPE sign(TYPE a, TYPE b)
+	{
+		return vsign(a, b);
+	}
+	static TYPE eor(TYPE a, TYPE b)
+	{
+		return vreinterpret<TYPE>(veor(vmask(a), vmask(b)));
+	}
+	static TYPE orr(TYPE a, TYPE b)
+	{
+		return vreinterpret<TYPE>(vorr(vmask(a), vmask(b)));
+	}
+	static TYPE other(TYPE a, TYPE b, TYPE c)
+	{
+		return vreinterpret<TYPE>(vbsl(vceq(a, b), vmask(c), vmask(b)));
+	}
+	static void finalp(TYPE *links, int cnt)
+	{
+		auto beta = vunsigned(vdup<TYPE>(std::nearbyint(0.5 * FACTOR)));
+		TYPE mags[cnt];
+		for (int i = 0; i < cnt; ++i)
+			mags[i] = vsigned(vqsub(vunsigned(vqabs(links[i])), beta));
+
+		TYPE mins[2];
+		mins[0] = vmin(mags[0], mags[1]);
+		mins[1] = vmax(mags[0], mags[1]);
+		for (int i = 2; i < cnt; ++i) {
+			mins[1] = vmin(mins[1], vmax(mins[0], mags[i]));
+			mins[0] = vmin(mins[0], mags[i]);
+		}
+
+		TYPE signs = links[0];
+		for (int i = 1; i < cnt; ++i)
+			signs = eor(signs, links[i]);
+
+		for (int i = 0; i < cnt; ++i)
+			links[i] = sign(other(mags[i], mins[0], mins[1]), orr(eor(signs, links[i]), vdup<TYPE>(127)));
+	}
+	static TYPE add(TYPE a, TYPE b)
+	{
+		return vqadd(a, b);
+	}
+	static TYPE sub(TYPE a, TYPE b)
+	{
+		return vqsub(a, b);
+	}
+	static bool bad(TYPE v, int blocks)
+	{
+		auto tmp = vcgtz(v);
+		for (int i = 0; i < blocks; ++i)
+			if (!tmp.v[i])
+				return true;
+		return false;
+	}
+	static void update(TYPE *a, TYPE b)
+	{
+		UPDATE::update(a, vmin(vmax(b, vdup<TYPE>(-32)), vdup<TYPE>(31)));
+	}
+};
+
+
+template <typename VALUE, int WIDTH, typename UPDATE, int FACTOR>
+struct MinSumCAlgorithm<SIMD<VALUE, WIDTH>, UPDATE, FACTOR>
+{
+	typedef SIMD<VALUE, WIDTH> TYPE;
+	static TYPE zero()
+	{
+		return vzero<TYPE>();
+	}
+	static TYPE one()
+	{
+		return vdup<TYPE>(1);
+	}
+	static TYPE sign(TYPE a, TYPE b)
+	{
+		return vsign(a, b);
+	}
+	static TYPE correction_factor(TYPE a, TYPE b)
+	{
+		TYPE apb = vabs(vadd(a, b));
+		TYPE apb2 = vadd(apb, apb);
+		TYPE amb = vabs(vsub(a, b));
+		TYPE amb2 = vadd(amb, amb);
+		TYPE factor2 = vdup<TYPE>(FACTOR * 2);
+		auto pc = vmask(vdup<TYPE>(VALUE(FACTOR) / VALUE(2)));
+		auto nc = vmask(vdup<TYPE>(-VALUE(FACTOR) / VALUE(2)));
+		pc = vand(pc, vand(vcgt(factor2, apb), vcgt(amb, apb2)));
+		nc = vand(nc, vand(vcgt(factor2, amb), vcgt(apb, amb2)));
+		return vreinterpret<TYPE>(vorr(pc, nc));
+	}
+	static TYPE minc(TYPE a, TYPE b)
+	{
+		TYPE m = vmin(vabs(a), vabs(b));
+		TYPE x = vsign(vsign(m, a), b);
+		x = vadd(x, correction_factor(a, b));
+		return x;
+	}
+	static void finalp(TYPE *links, int cnt)
+	{
+		TYPE tmp[cnt];
+		CODE::exclusive_reduce(links, tmp, cnt, minc);
+		for (int i = 0; i < cnt; ++i)
+			links[i] = tmp[i];
+	}
+	static TYPE add(TYPE a, TYPE b)
+	{
+		return vadd(a, b);
+	}
+	static TYPE sub(TYPE a, TYPE b)
+	{
+		return vsub(a, b);
+	}
+	static bool bad(TYPE v, int blocks)
+	{
+		auto tmp = vcgtz(v);
+		for (int i = 0; i < blocks; ++i)
+			if (!tmp.v[i])
+				return true;
+		return false;
+	}
+	static void update(TYPE *a, TYPE b)
+	{
+		UPDATE::update(a, b);
+	}
+};
+
+template <int WIDTH, typename UPDATE, int FACTOR>
+struct MinSumCAlgorithm<SIMD<int8_t, WIDTH>, UPDATE, FACTOR>
+{
+	typedef int8_t VALUE;
+	typedef SIMD<VALUE, WIDTH> TYPE;
+	static TYPE zero()
+	{
+		return vzero<TYPE>();
+	}
+	static TYPE one()
+	{
+		return vdup<TYPE>(1);
+	}
+	static TYPE sign(TYPE a, TYPE b)
+	{
+		return vsign(a, b);
+	}
+	static TYPE correction_factor(TYPE a, TYPE b)
+	{
+		TYPE apb = vqabs(vqadd(a, b));
+		TYPE apb2 = vqadd(apb, apb);
+		TYPE amb = vqabs(vqsub(a, b));
+		TYPE amb2 = vqadd(amb, amb);
+		TYPE factor2 = vdup<TYPE>(FACTOR * 2);
+		auto pc = vmask(vdup<TYPE>(VALUE(FACTOR) / VALUE(2)));
+		auto nc = vmask(vdup<TYPE>(-VALUE(FACTOR) / VALUE(2)));
+		pc = vand(pc, vand(vcgt(factor2, apb), vcgt(amb, apb2)));
+		nc = vand(nc, vand(vcgt(factor2, amb), vcgt(apb, amb2)));
+		return vreinterpret<TYPE>(vorr(pc, nc));
+	}
+	static TYPE minc(TYPE a, TYPE b)
+	{
+		TYPE m = vmin(vqabs(a), vqabs(b));
+		TYPE x = vsign(vsign(m, a), b);
+		x = vqadd(x, correction_factor(a, b));
+		return x;
+	}
+	static void finalp(TYPE *links, int cnt)
+	{
+		TYPE *tmp = new TYPE[cnt];
+		CODE::exclusive_reduce(links, tmp, cnt, minc);
+		for (int i = 0; i < cnt; ++i)
+			links[i] = tmp[i];
+		delete[] tmp;
+	}
+	static TYPE add(TYPE a, TYPE b)
+	{
+		return vqadd(a, b);
+	}
+	static TYPE sub(TYPE a, TYPE b)
+	{
+		return vqsub(a, b);
+	}
+	static bool bad(TYPE v, int blocks)
+	{
+		auto tmp = vcgtz(v);
+		for (int i = 0; i < blocks; ++i)
+			if (!tmp.v[i])
+				return true;
+		return false;
+	}
+	static void update(TYPE *a, TYPE b)
+	{
+		UPDATE::update(a, vmin(vmax(b, vdup<TYPE>(-32)), vdup<TYPE>(31)));
+	}
+};
+
+} // namespace ldpctool
+
+#endif
--- a/plugins/channelrx/demoddatv/ldpctool/avx2.h
+++ b/plugins/channelrx/demoddatv/ldpctool/avx2.h
@ -0,0 +1,979 @@
+/*
+Intel AVX2 acceleration
+
+Copyright 2018 Ahmet Inan <inan@aicodix.de>
+*/
+
+#ifndef AVX2_HH
+#define AVX2_HH
+
+#include <immintrin.h>
+#include "simd.h"
+
+namespace ldpctool {
+
+template <>
+union SIMD<float, 8>
+{
+	static const int SIZE = 8;
+	typedef float value_type;
+	typedef uint32_t uint_type;
+	__m256 m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<double, 4>
+{
+	static const int SIZE = 4;
+	typedef double value_type;
+	typedef uint64_t uint_type;
+	__m256d m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<int8_t, 32>
+{
+	static const int SIZE = 32;
+	typedef int8_t value_type;
+	typedef uint8_t uint_type;
+	__m256i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<int16_t, 16>
+{
+	static const int SIZE = 16;
+	typedef int16_t value_type;
+	typedef uint16_t uint_type;
+	__m256i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<int32_t, 8>
+{
+	static const int SIZE = 8;
+	typedef int32_t value_type;
+	typedef uint32_t uint_type;
+	__m256i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<int64_t, 4>
+{
+	static const int SIZE = 4;
+	typedef int64_t value_type;
+	typedef uint64_t uint_type;
+	__m256i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<uint8_t, 32>
+{
+	static const int SIZE = 32;
+	typedef uint8_t value_type;
+	typedef uint8_t uint_type;
+	__m256i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<uint16_t, 16>
+{
+	static const int SIZE = 16;
+	typedef uint16_t value_type;
+	typedef uint16_t uint_type;
+	__m256i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<uint32_t, 8>
+{
+	static const int SIZE = 8;
+	typedef uint32_t value_type;
+	typedef uint32_t uint_type;
+	__m256i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<uint64_t, 4>
+{
+	static const int SIZE = 4;
+	typedef uint64_t value_type;
+	typedef uint64_t uint_type;
+	__m256i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+inline SIMD<float, 8> vreinterpret(SIMD<uint32_t, 8> a)
+{
+	SIMD<float, 8> tmp;
+	tmp.m = (__m256)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vreinterpret(SIMD<float, 8> a)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = (__m256i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 4> vreinterpret(SIMD<uint64_t, 4> a)
+{
+	SIMD<double, 4> tmp;
+	tmp.m = (__m256d)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vreinterpret(SIMD<double, 4> a)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = (__m256i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 32> vreinterpret(SIMD<int8_t, 32> a)
+{
+	SIMD<uint8_t, 32> tmp;
+	tmp.m = (__m256i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 32> vreinterpret(SIMD<uint8_t, 32> a)
+{
+	SIMD<int8_t, 32> tmp;
+	tmp.m = (__m256i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 16> vreinterpret(SIMD<int16_t, 16> a)
+{
+	SIMD<uint16_t, 16> tmp;
+	tmp.m = (__m256i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 16> vreinterpret(SIMD<uint16_t, 16> a)
+{
+	SIMD<int16_t, 16> tmp;
+	tmp.m = (__m256i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vreinterpret(SIMD<int32_t, 8> a)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = (__m256i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 8> vreinterpret(SIMD<uint32_t, 8> a)
+{
+	SIMD<int32_t, 8> tmp;
+	tmp.m = (__m256i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vreinterpret(SIMD<int64_t, 4> a)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = (__m256i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 4> vreinterpret(SIMD<uint64_t, 4> a)
+{
+	SIMD<int64_t, 4> tmp;
+	tmp.m = (__m256i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 8> vdup<SIMD<float, 8>>(float a)
+{
+	SIMD<float, 8> tmp;
+	tmp.m = _mm256_set1_ps(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 4> vdup<SIMD<double, 4>>(double a)
+{
+	SIMD<double, 4> tmp;
+	tmp.m = _mm256_set1_pd(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 32> vdup<SIMD<int8_t, 32>>(int8_t a)
+{
+	SIMD<int8_t, 32> tmp;
+	tmp.m = _mm256_set1_epi8(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 16> vdup<SIMD<int16_t, 16>>(int16_t a)
+{
+	SIMD<int16_t, 16> tmp;
+	tmp.m = _mm256_set1_epi16(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 8> vdup<SIMD<int32_t, 8>>(int32_t a)
+{
+	SIMD<int32_t, 8> tmp;
+	tmp.m = _mm256_set1_epi32(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 4> vdup<SIMD<int64_t, 4>>(int64_t a)
+{
+	SIMD<int64_t, 4> tmp;
+	tmp.m = _mm256_set1_epi64x(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 8> vzero()
+{
+	SIMD<float, 8> tmp;
+	tmp.m = _mm256_setzero_ps();
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 4> vzero()
+{
+	SIMD<double, 4> tmp;
+	tmp.m = _mm256_setzero_pd();
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 32> vzero()
+{
+	SIMD<int8_t, 32> tmp;
+	tmp.m = _mm256_setzero_si256();
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 16> vzero()
+{
+	SIMD<int16_t, 16> tmp;
+	tmp.m = _mm256_setzero_si256();
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 8> vzero()
+{
+	SIMD<int32_t, 8> tmp;
+	tmp.m = _mm256_setzero_si256();
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 4> vzero()
+{
+	SIMD<int64_t, 4> tmp;
+	tmp.m = _mm256_setzero_si256();
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 8> vadd(SIMD<float, 8> a, SIMD<float, 8> b)
+{
+	SIMD<float, 8> tmp;
+	tmp.m = _mm256_add_ps(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 4> vadd(SIMD<double, 4> a, SIMD<double, 4> b)
+{
+	SIMD<double, 4> tmp;
+	tmp.m = _mm256_add_pd(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 32> vadd(SIMD<int8_t, 32> a, SIMD<int8_t, 32> b)
+{
+	SIMD<int8_t, 32> tmp;
+	tmp.m = _mm256_add_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 16> vadd(SIMD<int16_t, 16> a, SIMD<int16_t, 16> b)
+{
+	SIMD<int16_t, 16> tmp;
+	tmp.m = _mm256_add_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 8> vadd(SIMD<int32_t, 8> a, SIMD<int32_t, 8> b)
+{
+	SIMD<int32_t, 8> tmp;
+	tmp.m = _mm256_add_epi32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 4> vadd(SIMD<int64_t, 4> a, SIMD<int64_t, 4> b)
+{
+	SIMD<int64_t, 4> tmp;
+	tmp.m = _mm256_add_epi64(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 32> vqadd(SIMD<int8_t, 32> a, SIMD<int8_t, 32> b)
+{
+	SIMD<int8_t, 32> tmp;
+	tmp.m = _mm256_adds_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 16> vqadd(SIMD<int16_t, 16> a, SIMD<int16_t, 16> b)
+{
+	SIMD<int16_t, 16> tmp;
+	tmp.m = _mm256_adds_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 8> vsub(SIMD<float, 8> a, SIMD<float, 8> b)
+{
+	SIMD<float, 8> tmp;
+	tmp.m = _mm256_sub_ps(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 4> vsub(SIMD<double, 4> a, SIMD<double, 4> b)
+{
+	SIMD<double, 4> tmp;
+	tmp.m = _mm256_sub_pd(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 32> vsub(SIMD<int8_t, 32> a, SIMD<int8_t, 32> b)
+{
+	SIMD<int8_t, 32> tmp;
+	tmp.m = _mm256_sub_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 16> vsub(SIMD<int16_t, 16> a, SIMD<int16_t, 16> b)
+{
+	SIMD<int16_t, 16> tmp;
+	tmp.m = _mm256_sub_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 8> vsub(SIMD<int32_t, 8> a, SIMD<int32_t, 8> b)
+{
+	SIMD<int32_t, 8> tmp;
+	tmp.m = _mm256_sub_epi32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 4> vsub(SIMD<int64_t, 4> a, SIMD<int64_t, 4> b)
+{
+	SIMD<int64_t, 4> tmp;
+	tmp.m = _mm256_sub_epi64(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 32> vqsub(SIMD<int8_t, 32> a, SIMD<int8_t, 32> b)
+{
+	SIMD<int8_t, 32> tmp;
+	tmp.m = _mm256_subs_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 16> vqsub(SIMD<int16_t, 16> a, SIMD<int16_t, 16> b)
+{
+	SIMD<int16_t, 16> tmp;
+	tmp.m = _mm256_subs_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 32> vqsub(SIMD<uint8_t, 32> a, SIMD<uint8_t, 32> b)
+{
+	SIMD<uint8_t, 32> tmp;
+	tmp.m = _mm256_subs_epu8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 16> vqsub(SIMD<uint16_t, 16> a, SIMD<uint16_t, 16> b)
+{
+	SIMD<uint16_t, 16> tmp;
+	tmp.m = _mm256_subs_epu16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 8> vabs(SIMD<float, 8> a)
+{
+	SIMD<float, 8> tmp;
+	tmp.m = _mm256_andnot_ps(_mm256_set1_ps(-0.f), a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 4> vabs(SIMD<double, 4> a)
+{
+	SIMD<double, 4> tmp;
+	tmp.m = _mm256_andnot_pd(_mm256_set1_pd(-0.), a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 32> vqabs(SIMD<int8_t, 32> a)
+{
+	SIMD<int8_t, 32> tmp;
+	tmp.m = _mm256_abs_epi8(_mm256_max_epi8(a.m, _mm256_set1_epi8(-INT8_MAX)));
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 16> vqabs(SIMD<int16_t, 16> a)
+{
+	SIMD<int16_t, 16> tmp;
+	tmp.m = _mm256_abs_epi16(_mm256_max_epi16(a.m, _mm256_set1_epi16(-INT16_MAX)));
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 8> vqabs(SIMD<int32_t, 8> a)
+{
+	SIMD<int32_t, 8> tmp;
+	tmp.m = _mm256_abs_epi32(_mm256_max_epi32(a.m, _mm256_set1_epi32(-INT32_MAX)));
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 8> vsign(SIMD<float, 8> a, SIMD<float, 8> b)
+{
+	SIMD<float, 8> tmp;
+	tmp.m = _mm256_andnot_ps(
+		_mm256_cmp_ps(b.m, _mm256_setzero_ps(), _CMP_EQ_OQ),
+		_mm256_xor_ps(a.m, _mm256_and_ps(_mm256_set1_ps(-0.f), b.m)));
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 4> vsign(SIMD<double, 4> a, SIMD<double, 4> b)
+{
+	SIMD<double, 4> tmp;
+	tmp.m = _mm256_andnot_pd(
+		_mm256_cmp_pd(b.m, _mm256_setzero_pd(), _CMP_EQ_OQ),
+		_mm256_xor_pd(a.m, _mm256_and_pd(_mm256_set1_pd(-0.), b.m)));
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 32> vsign(SIMD<int8_t, 32> a, SIMD<int8_t, 32> b)
+{
+	SIMD<int8_t, 32> tmp;
+	tmp.m = _mm256_sign_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 16> vsign(SIMD<int16_t, 16> a, SIMD<int16_t, 16> b)
+{
+	SIMD<int16_t, 16> tmp;
+	tmp.m = _mm256_sign_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 8> vsign(SIMD<int32_t, 8> a, SIMD<int32_t, 8> b)
+{
+	SIMD<int32_t, 8> tmp;
+	tmp.m = _mm256_sign_epi32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 32> vorr(SIMD<uint8_t, 32> a, SIMD<uint8_t, 32> b)
+{
+	SIMD<uint8_t, 32> tmp;
+	tmp.m = _mm256_or_si256(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 16> vorr(SIMD<uint16_t, 16> a, SIMD<uint16_t, 16> b)
+{
+	SIMD<uint16_t, 16> tmp;
+	tmp.m = _mm256_or_si256(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vorr(SIMD<uint32_t, 8> a, SIMD<uint32_t, 8> b)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = _mm256_or_si256(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vorr(SIMD<uint64_t, 4> a, SIMD<uint64_t, 4> b)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = _mm256_or_si256(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 32> vand(SIMD<uint8_t, 32> a, SIMD<uint8_t, 32> b)
+{
+	SIMD<uint8_t, 32> tmp;
+	tmp.m = _mm256_and_si256(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 16> vand(SIMD<uint16_t, 16> a, SIMD<uint16_t, 16> b)
+{
+	SIMD<uint16_t, 16> tmp;
+	tmp.m = _mm256_and_si256(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vand(SIMD<uint32_t, 8> a, SIMD<uint32_t, 8> b)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = _mm256_and_si256(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vand(SIMD<uint64_t, 4> a, SIMD<uint64_t, 4> b)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = _mm256_and_si256(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 32> veor(SIMD<uint8_t, 32> a, SIMD<uint8_t, 32> b)
+{
+	SIMD<uint8_t, 32> tmp;
+	tmp.m = _mm256_xor_si256(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 16> veor(SIMD<uint16_t, 16> a, SIMD<uint16_t, 16> b)
+{
+	SIMD<uint16_t, 16> tmp;
+	tmp.m = _mm256_xor_si256(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> veor(SIMD<uint32_t, 8> a, SIMD<uint32_t, 8> b)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = _mm256_xor_si256(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> veor(SIMD<uint64_t, 4> a, SIMD<uint64_t, 4> b)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = _mm256_xor_si256(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 32> vbic(SIMD<uint8_t, 32> a, SIMD<uint8_t, 32> b)
+{
+	SIMD<uint8_t, 32> tmp;
+	tmp.m = _mm256_andnot_si256(b.m, a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 16> vbic(SIMD<uint16_t, 16> a, SIMD<uint16_t, 16> b)
+{
+	SIMD<uint16_t, 16> tmp;
+	tmp.m = _mm256_andnot_si256(b.m, a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vbic(SIMD<uint32_t, 8> a, SIMD<uint32_t, 8> b)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = _mm256_andnot_si256(b.m, a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vbic(SIMD<uint64_t, 4> a, SIMD<uint64_t, 4> b)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = _mm256_andnot_si256(b.m, a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 32> vbsl(SIMD<uint8_t, 32> a, SIMD<uint8_t, 32> b, SIMD<uint8_t, 32> c)
+{
+	SIMD<uint8_t, 32> tmp;
+	tmp.m = _mm256_or_si256(_mm256_and_si256(a.m, b.m), _mm256_andnot_si256(a.m, c.m));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 16> vbsl(SIMD<uint16_t, 16> a, SIMD<uint16_t, 16> b, SIMD<uint16_t, 16> c)
+{
+	SIMD<uint16_t, 16> tmp;
+	tmp.m = _mm256_or_si256(_mm256_and_si256(a.m, b.m), _mm256_andnot_si256(a.m, c.m));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vbsl(SIMD<uint32_t, 8> a, SIMD<uint32_t, 8> b, SIMD<uint32_t, 8> c)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = _mm256_or_si256(_mm256_and_si256(a.m, b.m), _mm256_andnot_si256(a.m, c.m));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vbsl(SIMD<uint64_t, 4> a, SIMD<uint64_t, 4> b, SIMD<uint64_t, 4> c)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = _mm256_or_si256(_mm256_and_si256(a.m, b.m), _mm256_andnot_si256(a.m, c.m));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vceqz(SIMD<float, 8> a)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = (__m256i)_mm256_cmp_ps(a.m, _mm256_setzero_ps(), _CMP_EQ_OQ);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vceqz(SIMD<double, 4> a)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = (__m256i)_mm256_cmp_pd(a.m, _mm256_setzero_pd(), _CMP_EQ_OQ);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 32> vceqz(SIMD<int8_t, 32> a)
+{
+	SIMD<uint8_t, 32> tmp;
+	tmp.m = _mm256_cmpeq_epi8(a.m, _mm256_setzero_si256());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 16> vceqz(SIMD<int16_t, 16> a)
+{
+	SIMD<uint16_t, 16> tmp;
+	tmp.m = _mm256_cmpeq_epi16(a.m, _mm256_setzero_si256());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vceqz(SIMD<int32_t, 8> a)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = _mm256_cmpeq_epi32(a.m, _mm256_setzero_si256());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vceqz(SIMD<int64_t, 4> a)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = _mm256_cmpeq_epi64(a.m, _mm256_setzero_si256());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vceq(SIMD<float, 8> a, SIMD<float, 8> b)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = (__m256i)_mm256_cmp_ps(a.m, b.m, _CMP_EQ_OQ);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vceq(SIMD<double, 4> a, SIMD<double, 4> b)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = (__m256i)_mm256_cmp_pd(a.m, b.m, _CMP_EQ_OQ);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 32> vceq(SIMD<int8_t, 32> a, SIMD<int8_t, 32> b)
+{
+	SIMD<uint8_t, 32> tmp;
+	tmp.m = _mm256_cmpeq_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 16> vceq(SIMD<int16_t, 16> a, SIMD<int16_t, 16> b)
+{
+	SIMD<uint16_t, 16> tmp;
+	tmp.m = _mm256_cmpeq_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vceq(SIMD<int32_t, 8> a, SIMD<int32_t, 8> b)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = _mm256_cmpeq_epi32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vceq(SIMD<int64_t, 4> a, SIMD<int64_t, 4> b)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = _mm256_cmpeq_epi64(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vcgtz(SIMD<float, 8> a)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = (__m256i)_mm256_cmp_ps(a.m, _mm256_setzero_ps(), _CMP_GT_OQ);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vcgtz(SIMD<double, 4> a)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = (__m256i)_mm256_cmp_pd(a.m, _mm256_setzero_pd(), _CMP_GT_OQ);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 32> vcgtz(SIMD<int8_t, 32> a)
+{
+	SIMD<uint8_t, 32> tmp;
+	tmp.m = _mm256_cmpgt_epi8(a.m, _mm256_setzero_si256());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 16> vcgtz(SIMD<int16_t, 16> a)
+{
+	SIMD<uint16_t, 16> tmp;
+	tmp.m = _mm256_cmpgt_epi16(a.m, _mm256_setzero_si256());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vcgtz(SIMD<int32_t, 8> a)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = _mm256_cmpgt_epi32(a.m, _mm256_setzero_si256());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vcgtz(SIMD<int64_t, 4> a)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = _mm256_cmpgt_epi64(a.m, _mm256_setzero_si256());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vcltz(SIMD<float, 8> a)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = (__m256i)_mm256_cmp_ps(a.m, _mm256_setzero_ps(), _CMP_LT_OQ);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vcltz(SIMD<double, 4> a)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = (__m256i)_mm256_cmp_pd(a.m, _mm256_setzero_pd(), _CMP_LT_OQ);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 32> vcltz(SIMD<int8_t, 32> a)
+{
+	SIMD<uint8_t, 32> tmp;
+	tmp.m = _mm256_cmpgt_epi8(_mm256_setzero_si256(), a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 16> vcltz(SIMD<int16_t, 16> a)
+{
+	SIMD<uint16_t, 16> tmp;
+	tmp.m = _mm256_cmpgt_epi16(_mm256_setzero_si256(), a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 8> vcltz(SIMD<int32_t, 8> a)
+{
+	SIMD<uint32_t, 8> tmp;
+	tmp.m = _mm256_cmpgt_epi32(_mm256_setzero_si256(), a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 4> vcltz(SIMD<int64_t, 4> a)
+{
+	SIMD<uint64_t, 4> tmp;
+	tmp.m = _mm256_cmpgt_epi64(_mm256_setzero_si256(), a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 8> vmin(SIMD<float, 8> a, SIMD<float, 8> b)
+{
+	SIMD<float, 8> tmp;
+	tmp.m = _mm256_min_ps(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 4> vmin(SIMD<double, 4> a, SIMD<double, 4> b)
+{
+	SIMD<double, 4> tmp;
+	tmp.m = _mm256_min_pd(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 32> vmin(SIMD<int8_t, 32> a, SIMD<int8_t, 32> b)
+{
+	SIMD<int8_t, 32> tmp;
+	tmp.m = _mm256_min_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 16> vmin(SIMD<int16_t, 16> a, SIMD<int16_t, 16> b)
+{
+	SIMD<int16_t, 16> tmp;
+	tmp.m = _mm256_min_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 8> vmin(SIMD<int32_t, 8> a, SIMD<int32_t, 8> b)
+{
+	SIMD<int32_t, 8> tmp;
+	tmp.m = _mm256_min_epi32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 8> vmax(SIMD<float, 8> a, SIMD<float, 8> b)
+{
+	SIMD<float, 8> tmp;
+	tmp.m = _mm256_max_ps(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 4> vmax(SIMD<double, 4> a, SIMD<double, 4> b)
+{
+	SIMD<double, 4> tmp;
+	tmp.m = _mm256_max_pd(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 32> vmax(SIMD<int8_t, 32> a, SIMD<int8_t, 32> b)
+{
+	SIMD<int8_t, 32> tmp;
+	tmp.m = _mm256_max_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 16> vmax(SIMD<int16_t, 16> a, SIMD<int16_t, 16> b)
+{
+	SIMD<int16_t, 16> tmp;
+	tmp.m = _mm256_max_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 8> vmax(SIMD<int32_t, 8> a, SIMD<int32_t, 8> b)
+{
+	SIMD<int32_t, 8> tmp;
+	tmp.m = _mm256_max_epi32(a.m, b.m);
+	return tmp;
+}
+
+} // namespace ldpctool
+
+#endif
--- a/plugins/channelrx/demoddatv/ldpctool/dvb_s2_tables.h
+++ b/plugins/channelrx/demoddatv/ldpctool/dvb_s2_tables.h
--- a/plugins/channelrx/demoddatv/ldpctool/dvb_s2x_tables.h
+++ b/plugins/channelrx/demoddatv/ldpctool/dvb_s2x_tables.h
--- a/plugins/channelrx/demoddatv/ldpctool/dvb_t2_tables.h
+++ b/plugins/channelrx/demoddatv/ldpctool/dvb_t2_tables.h
--- a/plugins/channelrx/demoddatv/ldpctool/encoder.h
+++ b/plugins/channelrx/demoddatv/ldpctool/encoder.h
@ -0,0 +1,69 @@
+/*
+LDPC SISO encoder
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#ifndef ENCODER_HH
+#define ENCODER_HH
+
+#include "ldpc.h"
+
+namespace ldpctool {
+
+template <typename TYPE>
+class LDPCEncoder
+{
+	LDPCInterface *ldpc;
+	int N, K, R;
+	bool initialized;
+
+	TYPE one()
+	{
+		return 1;
+	}
+	TYPE sign(TYPE a, TYPE b)
+	{
+		return b < TYPE(0) ? -a : b > TYPE(0) ? a : TYPE(0);
+	}
+public:
+	LDPCEncoder() : initialized(false)
+	{
+	}
+	void init(LDPCInterface *it)
+	{
+		if (initialized)
+			delete ldpc;
+		initialized = true;
+		ldpc = it->clone();
+		N = ldpc->code_len();
+		K = ldpc->data_len();
+		R = N - K;
+	}
+	void operator()(TYPE *data, TYPE *parity)
+	{
+		for (int i = 0; i < R; ++i)
+			parity[i] = one();
+		ldpc->first_bit();
+		for (int j = 0; j < K; ++j) {
+			int *acc_pos = ldpc->acc_pos();
+			int bit_deg = ldpc->bit_deg();
+			for (int n = 0; n < bit_deg; ++n) {
+				int i = acc_pos[n];
+				parity[i] = sign(parity[i], data[j]);
+			}
+			ldpc->next_bit();
+		}
+		for (int i = 1; i < R; ++i)
+			parity[i] = sign(parity[i], parity[i-1]);
+	}
+	~LDPCEncoder()
+	{
+		if (initialized)
+			delete ldpc;
+	}
+};
+
+} // namespace ldpctool
+
+#endif
--- a/plugins/channelrx/demoddatv/ldpctool/encoder2.h
+++ b/plugins/channelrx/demoddatv/ldpctool/encoder2.h
@ -0,0 +1,82 @@
+/*
+LDPC SISO encoder v2
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#ifndef ENCODER_HH
+#define ENCODER_HH
+
+#include "ldpc.h"
+
+namespace ldpctool {
+
+template <typename TYPE>
+class LDPCEncoder
+{
+	uint16_t *pos;
+	uint8_t *cnc;
+	int R, CNL;
+	bool initialized;
+
+	TYPE one()
+	{
+		return 1;
+	}
+	TYPE sign(TYPE a, TYPE b)
+	{
+		return b < TYPE(0) ? -a : b > TYPE(0) ? a : TYPE(0);
+	}
+public:
+	LDPCEncoder() : initialized(false)
+	{
+	}
+	void init(LDPCInterface *it)
+	{
+		if (initialized) {
+			delete[] pos;
+			delete[] cnc;
+		}
+		initialized = true;
+		LDPCInterface *ldpc = it->clone();
+		int N = ldpc->code_len();
+		int K = ldpc->data_len();
+		R = N - K;
+		CNL = ldpc->links_max_cn() - 2;
+		pos = new uint16_t[R * CNL];
+		cnc = new uint8_t[R];
+		for (int i = 0; i < R; ++i)
+			cnc[i] = 0;
+		ldpc->first_bit();
+		for (int j = 0; j < K; ++j) {
+			int *acc_pos = ldpc->acc_pos();
+			int bit_deg = ldpc->bit_deg();
+			for (int n = 0; n < bit_deg; ++n) {
+				int i = acc_pos[n];
+				pos[CNL*i+cnc[i]++] = j;
+			}
+			ldpc->next_bit();
+		}
+		delete ldpc;
+	}
+	void operator()(TYPE *data, TYPE *parity)
+	{
+		TYPE tmp = one();
+		for (int i = 0; i < R; ++i) {
+			for (int j = 0; j < cnc[i]; ++j)
+				tmp = sign(tmp, data[pos[CNL*i+j]]);
+			parity[i] = tmp;
+		}
+	}
+	~LDPCEncoder()
+	{
+		if (initialized) {
+			delete[] pos;
+			delete[] cnc;
+		}
+	}
+};
+
+} // namespace ldpctool
+
+#endif
--- a/plugins/channelrx/demoddatv/ldpctool/exclusive_reduce.h
+++ b/plugins/channelrx/demoddatv/ldpctool/exclusive_reduce.h
@ -0,0 +1,34 @@
+/*
+Reduce N times while excluding ith input element
+
+Copyright 2018 Ahmet Inan <inan@aicodix.de>
+*/
+
+#ifndef EXCLUSIVE_REDUCE_HH
+#define EXCLUSIVE_REDUCE_HH
+
+namespace ldpctool {
+namespace CODE {
+
+template <typename TYPE, typename OPERATOR>
+void exclusive_reduce(const TYPE *in, TYPE *out, int N, OPERATOR op)
+{
+	TYPE pre = in[0];
+	for (int i = 1; i < N-1; ++i) {
+		out[i] = pre;
+		pre = op(pre, in[i]);
+	}
+	out[N-1] = pre;
+	TYPE suf = in[N-1];
+	for (int i = N-2; i > 0; --i) {
+		out[i] = op(out[i], suf);
+		suf = op(suf, in[i]);
+	}
+	out[0] = suf;
+}
+
+} // namespace CODE
+} // namespace ldpctool
+
+#endif
+
--- a/plugins/channelrx/demoddatv/ldpctool/flooding_decoder.h
+++ b/plugins/channelrx/demoddatv/ldpctool/flooding_decoder.h
@ -0,0 +1,174 @@
+/*
+LDPC SISO flooding decoder
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#ifndef FLOODING_DECODER_HH
+#define FLOODING_DECODER_HH
+
+#include <stdlib.h>
+#include "exclusive_reduce.h"
+#include "ldpc.h"
+
+namespace ldpctool {
+
+template <typename TYPE, typename ALG>
+class LDPCDecoder
+{
+	void *aligned_buffer;
+	TYPE *bnl, *bnv, *cnl, *cnv;
+	uint8_t *cnc;
+	LDPCInterface *ldpc;
+	ALG alg;
+	int N, K, R, CNL, LT;
+	bool initialized;
+
+	void bit_node_init(TYPE *data, TYPE *parity)
+	{
+		TYPE *bl = bnl;
+		for (int i = 0; i < R-1; ++i) {
+			bnv[i] = parity[i];
+			*bl++ = parity[i];
+			*bl++ = parity[i];
+		}
+		bnv[R-1] = parity[R-1];
+		*bl++ = parity[R-1];
+		ldpc->first_bit();
+		for (int j = 0; j < K; ++j) {
+			bnv[j+R] = data[j];
+			int bit_deg = ldpc->bit_deg();
+			for (int n = 0; n < bit_deg; ++n)
+				*bl++ = data[j];
+			ldpc->next_bit();
+		}
+	}
+	void check_node_update()
+	{
+		TYPE *bl = bnl;
+		cnv[0] = alg.sign(alg.one(), bnv[0]);
+		cnl[0] = *bl++;
+		cnc[0] = 1;
+		for (int i = 1; i < R; ++i) {
+			cnv[i] = alg.sign(alg.sign(alg.one(), bnv[i-1]), bnv[i]);
+			cnl[CNL*i] = *bl++;
+			cnl[CNL*i+1] = *bl++;
+			cnc[i] = 2;
+		}
+		ldpc->first_bit();
+		for (int j = 0; j < K; ++j) {
+			int *acc_pos = ldpc->acc_pos();
+			int bit_deg = ldpc->bit_deg();
+			for (int n = 0; n < bit_deg; ++n) {
+				int i = acc_pos[n];
+				cnv[i] = alg.sign(cnv[i], bnv[j+R]);
+				cnl[CNL*i+cnc[i]++] = *bl++;
+			}
+			ldpc->next_bit();
+		}
+		for (int i = 0; i < R; ++i)
+			alg.finalp(cnl+CNL*i, cnc[i]);
+	}
+	void bit_node_update(TYPE *data, TYPE *parity)
+	{
+		TYPE *bl = bnl;
+		bnv[0] = alg.add(parity[0], alg.add(cnl[0], cnl[CNL]));
+		alg.update(bl++, alg.add(parity[0], cnl[CNL]));
+		alg.update(bl++, alg.add(parity[0], cnl[0]));
+		cnc[0] = 1;
+		for (int i = 1; i < R-1; ++i) {
+			bnv[i] = alg.add(parity[i], alg.add(cnl[CNL*i+1], cnl[CNL*(i+1)]));
+			alg.update(bl++, alg.add(parity[i], cnl[CNL*(i+1)]));
+			alg.update(bl++, alg.add(parity[i], cnl[CNL*i+1]));
+			cnc[i] = 2;
+		}
+		bnv[R-1] = alg.add(parity[R-1], cnl[CNL*(R-1)+1]);
+		alg.update(bl++, parity[R-1]);
+		cnc[R-1] = 2;
+		ldpc->first_bit();
+		for (int j = 0; j < K; ++j) {
+			int *acc_pos = ldpc->acc_pos();
+			int bit_deg = ldpc->bit_deg();
+			TYPE inp[bit_deg];
+			for (int n = 0; n < bit_deg; ++n) {
+				int i = acc_pos[n];
+				inp[n] = cnl[CNL*i+cnc[i]++];
+			}
+			TYPE out[bit_deg];
+			CODE::exclusive_reduce(inp, out, bit_deg, alg.add);
+			bnv[j+R] = alg.add(data[j], alg.add(out[0], inp[0]));
+			for (int n = 0; n < bit_deg; ++n)
+				alg.update(bl++, alg.add(data[j], out[n]));
+			ldpc->next_bit();
+		}
+	}
+	bool hard_decision(int blocks)
+	{
+		for (int i = 0; i < R; ++i)
+			if (alg.bad(cnv[i], blocks))
+				return true;
+		return false;
+	}
+	void update_user(TYPE *data, TYPE *parity)
+	{
+		for (int i = 0; i < R; ++i)
+			parity[i] = bnv[i];
+		for (int i = 0; i < K; ++i)
+			data[i] = bnv[i+R];
+	}
+public:
+	LDPCDecoder() : initialized(false)
+	{
+	}
+	void init(LDPCInterface *it)
+	{
+		if (initialized) {
+			free(aligned_buffer);
+			delete[] cnc;
+			delete ldpc;
+		}
+		initialized = true;
+		ldpc = it->clone();
+		N = ldpc->code_len();
+		K = ldpc->data_len();
+		R = N - K;
+		CNL = ldpc->links_max_cn();
+		LT = ldpc->links_total();
+		int num = LT + N + R * CNL + R;
+		aligned_buffer = aligned_alloc(sizeof(TYPE), sizeof(TYPE) * num);
+		TYPE *ptr = reinterpret_cast<TYPE *>(aligned_buffer);
+		bnl = ptr; ptr += LT;
+		bnv = ptr; ptr += N;
+		cnl = ptr; ptr += R * CNL;
+		cnv = ptr; ptr += R;
+		cnc = new uint8_t[R];
+	}
+	int operator()(TYPE *data, TYPE *parity, int trials = 50, int blocks = 1)
+	{
+		bit_node_init(data, parity);
+		check_node_update();
+		if (!hard_decision(blocks))
+			return trials;
+		--trials;
+		bit_node_update(data, parity);
+		check_node_update();
+		while (hard_decision(blocks) && --trials >= 0) {
+			bit_node_update(data, parity);
+			check_node_update();
+		}
+		update_user(data, parity);
+		return trials;
+	}
+	~LDPCDecoder()
+	{
+		if (initialized) {
+			free(aligned_buffer);
+			delete[] cnc;
+			delete ldpc;
+		}
+	}
+};
+
+} // namespace ldpctool
+
+#endif
--- a/plugins/channelrx/demoddatv/ldpctool/generic.h
+++ b/plugins/channelrx/demoddatv/ldpctool/generic.h
@ -0,0 +1,704 @@
+/*
+Generic LDPC algorithms
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#ifndef GENERIC_HH
+#define GENERIC_HH
+
+#include "exclusive_reduce.h"
+
+namespace ldpctool {
+
+template <typename TYPE>
+struct NormalUpdate
+{
+	static void update(TYPE *a, TYPE b)
+	{
+		*a = b;
+	}
+};
+
+template <typename TYPE>
+struct SelfCorrectedUpdate
+{
+	static void update(TYPE *a, TYPE b)
+	{
+		*a = (*a == TYPE(0) || (*a < TYPE(0)) == (b < TYPE(0))) ? b : TYPE(0);
+	}
+};
+
+template <typename TYPE, typename UPDATE>
+struct MinSumAlgorithm
+{
+	static TYPE zero()
+	{
+		return 0;
+	}
+	static TYPE one()
+	{
+		return 1;
+	}
+	static TYPE min(TYPE a, TYPE b)
+	{
+		return std::min(a, b);
+	}
+	static TYPE sign(TYPE a, TYPE b)
+	{
+		return b < TYPE(0) ? -a : b > TYPE(0) ? a : TYPE(0);
+	}
+	static void finalp(TYPE *links, int cnt)
+	{
+		TYPE mags[cnt], mins[cnt];
+		for (int i = 0; i < cnt; ++i)
+			mags[i] = std::abs(links[i]);
+		CODE::exclusive_reduce(mags, mins, cnt, min);
+
+		TYPE signs[cnt];
+		CODE::exclusive_reduce(links, signs, cnt, sign);
+
+		for (int i = 0; i < cnt; ++i)
+			links[i] = sign(mins[i], signs[i]);
+	}
+	static TYPE add(TYPE a, TYPE b)
+	{
+		return a + b;
+	}
+	static TYPE sub(TYPE a, TYPE b)
+	{
+		return a - b;
+	}
+	static bool bad(TYPE v, int)
+	{
+		return v <= TYPE(0);
+	}
+	static void update(TYPE *a, TYPE b)
+	{
+		UPDATE::update(a, b);
+	}
+};
+
+template <typename UPDATE>
+struct MinSumAlgorithm<float, UPDATE>
+{
+	static float zero()
+	{
+		return 0.f;
+	}
+	static float one()
+	{
+		return 1.f;
+	}
+	static float min(float a, float b)
+	{
+		return std::min(a, b);
+	}
+	static int xor_(int a, int b)
+	{
+		return a ^ b;
+	}
+	static void finalp(float *links, int cnt)
+	{
+		int mask = 0x80000000;
+		float mags[cnt], mins[cnt];
+		for (int i = 0; i < cnt; ++i)
+			mags[i] = std::abs(links[i]);
+		CODE::exclusive_reduce(mags, mins, cnt, min);
+
+		int signs[cnt];
+		CODE::exclusive_reduce(reinterpret_cast<int *>(links), signs, cnt, xor_);
+		for (int i = 0; i < cnt; ++i)
+			signs[i] &= mask;
+
+		for (int i = 0; i < cnt; ++i)
+			reinterpret_cast<int *>(links)[i] = signs[i] | reinterpret_cast<int *>(mins)[i];
+	}
+	static float sign(float a, float b)
+	{
+		return b < 0.f ? -a : b > 0.f ? a : 0.f;
+	}
+	static float add(float a, float b)
+	{
+		return a + b;
+	}
+	static float sub(float a, float b)
+	{
+		return a - b;
+	}
+	static bool bad(float v, int)
+	{
+		return v <= 0.f;
+	}
+	static void update(float *a, float b)
+	{
+		UPDATE::update(a, b);
+	}
+};
+
+template <typename UPDATE>
+struct MinSumAlgorithm<int8_t, UPDATE>
+{
+	static int8_t zero()
+	{
+		return 0;
+	}
+	static int8_t one()
+	{
+		return 1;
+	}
+	static int8_t add(int8_t a, int8_t b)
+	{
+		int16_t x = int16_t(a) + int16_t(b);
+		x = std::min<int16_t>(std::max<int16_t>(x, -128), 127);
+		return x;
+	}
+	static int8_t sub(int8_t a, int8_t b)
+	{
+		int16_t x = int16_t(a) - int16_t(b);
+		x = std::min<int16_t>(std::max<int16_t>(x, -128), 127);
+		return x;
+	}
+	static int8_t min(int8_t a, int8_t b)
+	{
+		return std::min(a, b);
+	}
+	static int8_t xor_(int8_t a, int8_t b)
+	{
+		return a ^ b;
+	}
+	static int8_t sqabs(int8_t a)
+	{
+		return std::abs(std::max<int8_t>(a, -127));
+	}
+	static int8_t sign(int8_t a, int8_t b)
+	{
+		return b < 0 ? -a : b > 0 ? a : 0;
+	}
+	static void finalp(int8_t *links, int cnt)
+	{
+		int8_t mags[cnt], mins[cnt];
+		for (int i = 0; i < cnt; ++i)
+			mags[i] = sqabs(links[i]);
+		CODE::exclusive_reduce(mags, mins, cnt, min);
+
+		int8_t signs[cnt];
+		CODE::exclusive_reduce(links, signs, cnt, xor_);
+		for (int i = 0; i < cnt; ++i)
+			signs[i] |= 127;
+
+		for (int i = 0; i < cnt; ++i)
+			links[i] = sign(mins[i], signs[i]);
+	}
+	static bool bad(int8_t v, int)
+	{
+		return v <= 0;
+	}
+	static void update(int8_t *a, int8_t b)
+	{
+		UPDATE::update(a, std::min<int8_t>(std::max<int8_t>(b, -32), 31));
+	}
+};
+
+template <typename TYPE, typename UPDATE, int FACTOR>
+struct OffsetMinSumAlgorithm
+{
+	static TYPE zero()
+	{
+		return 0;
+	}
+	static TYPE one()
+	{
+		return 1;
+	}
+	static TYPE min(TYPE a, TYPE b)
+	{
+		return std::min(a, b);
+	}
+	static TYPE sign(TYPE a, TYPE b)
+	{
+		return b < TYPE(0) ? -a : b > TYPE(0) ? a : TYPE(0);
+	}
+	static void finalp(TYPE *links, int cnt)
+	{
+		TYPE beta = 0.5 * FACTOR;
+		TYPE mags[cnt], mins[cnt];
+		for (int i = 0; i < cnt; ++i)
+			mags[i] = std::max(std::abs(links[i]) - beta, TYPE(0));
+		CODE::exclusive_reduce(mags, mins, cnt, min);
+
+		TYPE signs[cnt];
+		CODE::exclusive_reduce(links, signs, cnt, sign);
+
+		for (int i = 0; i < cnt; ++i)
+			links[i] = sign(mins[i], signs[i]);
+	}
+	static TYPE add(TYPE a, TYPE b)
+	{
+		return a + b;
+	}
+	static TYPE sub(TYPE a, TYPE b)
+	{
+		return a - b;
+	}
+	static bool bad(TYPE v, int)
+	{
+		return v <= TYPE(0);
+	}
+	static void update(TYPE *a, TYPE b)
+	{
+		UPDATE::update(a, b);
+	}
+};
+
+template <typename UPDATE, int FACTOR>
+struct OffsetMinSumAlgorithm<int8_t, UPDATE, FACTOR>
+{
+	static int8_t zero()
+	{
+		return 0;
+	}
+	static int8_t one()
+	{
+		return 1;
+	}
+	static int8_t add(int8_t a, int8_t b)
+	{
+		int16_t x = int16_t(a) + int16_t(b);
+		x = std::min<int16_t>(std::max<int16_t>(x, -128), 127);
+		return x;
+	}
+	static int8_t sub(int8_t a, int8_t b)
+	{
+		int16_t x = int16_t(a) - int16_t(b);
+		x = std::min<int16_t>(std::max<int16_t>(x, -128), 127);
+		return x;
+	}
+	static uint8_t subu(uint8_t a, uint8_t b)
+	{
+		int16_t x = int16_t(a) - int16_t(b);
+		x = std::max<int16_t>(x, 0);
+		return x;
+	}
+	static int8_t min(int8_t a, int8_t b)
+	{
+		return std::min(a, b);
+	}
+	static int8_t xor_(int8_t a, int8_t b)
+	{
+		return a ^ b;
+	}
+	static int8_t sqabs(int8_t a)
+	{
+		return std::abs(std::max<int8_t>(a, -127));
+	}
+	static int8_t sign(int8_t a, int8_t b)
+	{
+		return b < 0 ? -a : b > 0 ? a : 0;
+	}
+	static void finalp(int8_t *links, int cnt)
+	{
+		int8_t beta = std::nearbyint(0.5 * FACTOR);
+		int8_t mags[cnt], mins[cnt];
+		for (int i = 0; i < cnt; ++i)
+			mags[i] = subu(sqabs(links[i]), beta);
+		CODE::exclusive_reduce(mags, mins, cnt, min);
+
+		int8_t signs[cnt];
+		CODE::exclusive_reduce(links, signs, cnt, xor_);
+		for (int i = 0; i < cnt; ++i)
+			signs[i] |= 127;
+
+		for (int i = 0; i < cnt; ++i)
+			links[i] = sign(mins[i], signs[i]);
+	}
+	static bool bad(int8_t v, int)
+	{
+		return v <= 0;
+	}
+	static void update(int8_t *a, int8_t b)
+	{
+		UPDATE::update(a, std::min<int8_t>(std::max<int8_t>(b, -32), 31));
+	}
+};
+
+template <typename TYPE, typename UPDATE, int FACTOR>
+struct MinSumCAlgorithm
+{
+	static TYPE zero()
+	{
+		return 0;
+	}
+	static TYPE one()
+	{
+		return 1;
+	}
+	static TYPE correction_factor(TYPE a, TYPE b)
+	{
+		if (1) {
+			TYPE c = TYPE(FACTOR) / TYPE(2);
+			TYPE apb = std::abs(a + b);
+			TYPE amb = std::abs(a - b);
+			if (apb < TYPE(2) && amb > TYPE(2) * apb)
+				return c;
+			if (amb < TYPE(2) && apb > TYPE(2) * amb)
+				return -c;
+			return 0;
+		}
+		return std::log(TYPE(1)+std::exp(-std::abs(a+b))) - std::log(TYPE(1)+std::exp(-std::abs(a-b)));
+	}
+	static TYPE sign(TYPE a, TYPE b)
+	{
+		return b < TYPE(0) ? -a : b > TYPE(0) ? a : TYPE(0);
+	}
+	static TYPE min(TYPE a, TYPE b)
+	{
+		TYPE m = std::min(std::abs(a), std::abs(b));
+		TYPE x = sign(sign(m, a), b);
+		x += correction_factor(a, b);
+		return x;
+	}
+	static void finalp(TYPE *links, int cnt)
+	{
+		TYPE tmp[cnt];
+		CODE::exclusive_reduce(links, tmp, cnt, min);
+		for (int i = 0; i < cnt; ++i)
+			links[i] = tmp[i];
+	}
+	static TYPE add(TYPE a, TYPE b)
+	{
+		return a + b;
+	}
+	static TYPE sub(TYPE a, TYPE b)
+	{
+		return a - b;
+	}
+	static bool bad(TYPE v, int)
+	{
+		return v <= TYPE(0);
+	}
+	static void update(TYPE *a, TYPE b)
+	{
+		UPDATE::update(a, b);
+	}
+};
+
+template <typename UPDATE, int FACTOR>
+struct MinSumCAlgorithm<float, UPDATE, FACTOR>
+{
+	static float zero()
+	{
+		return 0.f;
+	}
+	static float one()
+	{
+		return 1.f;
+	}
+	static float correction_factor(float a, float b)
+	{
+		float c = 0.5f;
+		float apb = std::abs(a + b);
+		float amb = std::abs(a - b);
+		if (apb < 2.f && amb > 2.f * apb)
+			return c;
+		if (amb < 2.f && apb > 2.f * amb)
+			return -c;
+		return 0;
+	}
+	static float min(float a, float b)
+	{
+		int mask = 0x80000000;
+		float m = std::min(std::abs(a), std::abs(b));
+		int tmp = (mask & (*reinterpret_cast<int *>(&a) ^ *reinterpret_cast<int *>(&b))) | *reinterpret_cast<int *>(&m);
+		float x = *reinterpret_cast<float *>(&tmp);
+		x += correction_factor(a, b);
+		return x;
+	}
+	static void finalp(float *links, int cnt)
+	{
+		float tmp[cnt];
+		CODE::exclusive_reduce(links, tmp, cnt, min);
+		for (int i = 0; i < cnt; ++i)
+			links[i] = tmp[i];
+	}
+	static float sign(float a, float b)
+	{
+		return b < 0.f ? -a : b > 0.f ? a : 0.f;
+	}
+	static float add(float a, float b)
+	{
+		return a + b;
+	}
+	static float sub(float a, float b)
+	{
+		return a - b;
+	}
+	static bool bad(float v, int)
+	{
+		return v <= 0.f;
+	}
+	static void update(float *a, float b)
+	{
+		UPDATE::update(a, b);
+	}
+};
+
+template <typename UPDATE, int FACTOR>
+struct MinSumCAlgorithm<int8_t, UPDATE, FACTOR>
+{
+	static int8_t zero()
+	{
+		return 0;
+	}
+	static int8_t one()
+	{
+		return 1;
+	}
+	static int8_t add(int8_t a, int8_t b)
+	{
+		int16_t x = int16_t(a) + int16_t(b);
+		x = std::min<int16_t>(std::max<int16_t>(x, -128), 127);
+		return x;
+	}
+	static uint8_t addu(uint8_t a, uint8_t b)
+	{
+		int16_t x = int16_t(a) + int16_t(b);
+		x = std::min<int16_t>(x, 255);
+		return x;
+	}
+	static int8_t sub(int8_t a, int8_t b)
+	{
+		int16_t x = int16_t(a) - int16_t(b);
+		x = std::min<int16_t>(std::max<int16_t>(x, -128), 127);
+		return x;
+	}
+	static uint8_t subu(uint8_t a, uint8_t b)
+	{
+		int16_t x = int16_t(a) - int16_t(b);
+		x = std::max<int16_t>(x, 0);
+		return x;
+	}
+	static uint8_t abs(int8_t a)
+	{
+		return std::abs<int16_t>(a);
+	}
+	static int8_t sqabs(int8_t a)
+	{
+		return std::abs(std::max<int8_t>(a, -127));
+	}
+	static int8_t sign(int8_t a, int8_t b)
+	{
+		return b < 0 ? -a : b > 0 ? a : 0;
+	}
+	static int8_t correction_factor(int8_t a, int8_t b)
+	{
+		uint8_t factor2 = FACTOR * 2;
+		uint8_t c = FACTOR / 2;
+		uint8_t apb = abs(add(a, b));
+		uint8_t apb2 = addu(apb, apb);
+		uint8_t amb = abs(sub(a, b));
+		uint8_t amb2 = addu(amb, amb);
+		if (subu(factor2, apb) && subu(amb, apb2))
+			return c;
+		if (subu(factor2, amb) && subu(apb, amb2))
+			return -c;
+		return 0;
+	}
+	static int8_t min(int8_t a, int8_t b)
+	{
+		int8_t m = std::min(sqabs(a), sqabs(b));
+		int8_t x = sign(sign(m, a), b);
+		x = add(x, correction_factor(a, b));
+		return x;
+	}
+	static void finalp(int8_t *links, int cnt)
+	{
+		int8_t tmp[cnt];
+		CODE::exclusive_reduce(links, tmp, cnt, min);
+		for (int i = 0; i < cnt; ++i)
+			links[i] = tmp[i];
+	}
+	static bool bad(int8_t v, int)
+	{
+		return v <= 0;
+	}
+	static void update(int8_t *a, int8_t b)
+	{
+		UPDATE::update(a, std::min<int8_t>(std::max<int8_t>(b, -32), 31));
+	}
+};
+
+template <typename TYPE, typename UPDATE>
+struct LogDomainSPA
+{
+	static TYPE zero()
+	{
+		return 0;
+	}
+	static TYPE one()
+	{
+		return 1;
+	}
+	static TYPE phi(TYPE x)
+	{
+		x = std::min(std::max(x, TYPE(0.000001)), TYPE(14.5));
+		return std::log(std::exp(x)+TYPE(1)) - std::log(std::exp(x)-TYPE(1));
+	}
+	static TYPE add(TYPE a, TYPE b)
+	{
+		return a + b;
+	}
+	static TYPE sub(TYPE a, TYPE b)
+	{
+		return a - b;
+	}
+	static TYPE sign(TYPE a, TYPE b)
+	{
+		return b < TYPE(0) ? -a : b > TYPE(0) ? a : TYPE(0);
+	}
+	static void finalp(TYPE *links, int cnt)
+	{
+		TYPE mags[cnt], sums[cnt];
+		for (int i = 0; i < cnt; ++i)
+			mags[i] = phi(std::abs(links[i]));
+		CODE::exclusive_reduce(mags, sums, cnt, add);
+
+		TYPE signs[cnt];
+		CODE::exclusive_reduce(links, signs, cnt, sign);
+
+		for (int i = 0; i < cnt; ++i)
+			links[i] = sign(phi(sums[i]), signs[i]);
+	}
+	static bool bad(TYPE v, int)
+	{
+		return v <= TYPE(0);
+	}
+	static void update(TYPE *a, TYPE b)
+	{
+		UPDATE::update(a, b);
+	}
+};
+
+template <typename TYPE, typename UPDATE, int LAMBDA>
+struct LambdaMinAlgorithm
+{
+	static TYPE zero()
+	{
+		return 0;
+	}
+	static TYPE one()
+	{
+		return 1;
+	}
+	static TYPE phi(TYPE x)
+	{
+		x = std::min(std::max(x, TYPE(0.000001)), TYPE(14.5));
+		return std::log(std::exp(x)+TYPE(1)) - std::log(std::exp(x)-TYPE(1));
+	}
+	static TYPE add(TYPE a, TYPE b)
+	{
+		return a + b;
+	}
+	static TYPE sub(TYPE a, TYPE b)
+	{
+		return a - b;
+	}
+	static TYPE sign(TYPE a, TYPE b)
+	{
+		return b < TYPE(0) ? -a : b > TYPE(0) ? a : TYPE(0);
+	}
+	static void finalp(TYPE *links, int cnt)
+	{
+		typedef std::pair<TYPE, int> Pair;
+		Pair mags[cnt];
+		for (int i = 0; i < cnt; ++i)
+			mags[i] = Pair(std::abs(links[i]), i);
+		std::nth_element(mags, mags+LAMBDA, mags+cnt, [](Pair a, Pair b){ return a.first < b.first; });
+
+		TYPE sums[cnt];
+		for (int i = 0; i < cnt; ++i) {
+			int j = 0;
+			if (i == mags[0].second)
+				++j;
+			sums[i] = phi(mags[j].first);
+			for (int l = 1; l < LAMBDA; ++l) {
+				++j;
+				if (i == mags[j].second)
+					++j;
+				sums[i] += phi(mags[j].first);
+			}
+		}
+
+		TYPE signs[cnt];
+		CODE::exclusive_reduce(links, signs, cnt, sign);
+
+		for (int i = 0; i < cnt; ++i)
+			links[i] = sign(phi(sums[i]), signs[i]);
+	}
+	static bool bad(TYPE v, int)
+	{
+		return v <= TYPE(0);
+	}
+	static void update(TYPE *a, TYPE b)
+	{
+		UPDATE::update(a, b);
+	}
+};
+
+template <typename TYPE, typename UPDATE>
+struct SumProductAlgorithm
+{
+	static TYPE zero()
+	{
+		return 0;
+	}
+	static TYPE one()
+	{
+		return 1;
+	}
+	static TYPE prep(TYPE x)
+	{
+		return std::tanh(TYPE(0.5) * x);
+	}
+	static TYPE postp(TYPE x)
+	{
+		return TYPE(2) * std::atanh(x);
+	}
+	static TYPE mul(TYPE a, TYPE b)
+	{
+		return a * b;
+	}
+	static TYPE sign(TYPE a, TYPE b)
+	{
+		return b < TYPE(0) ? -a : b > TYPE(0) ? a : TYPE(0);
+	}
+	static void finalp(TYPE *links, int cnt)
+	{
+		TYPE in[cnt], out[cnt];
+		for (int i = 0; i < cnt; ++i)
+			in[i] = prep(links[i]);
+		CODE::exclusive_reduce(in, out, cnt, mul);
+		for (int i = 0; i < cnt; ++i)
+			links[i] = postp(out[i]);
+	}
+	static TYPE add(TYPE a, TYPE b)
+	{
+		return a + b;
+	}
+	static TYPE sub(TYPE a, TYPE b)
+	{
+		return a - b;
+	}
+	static bool bad(TYPE v, int)
+	{
+		return v <= TYPE(0);
+	}
+	static void update(TYPE *a, TYPE b)
+	{
+		UPDATE::update(a, b);
+	}
+};
+
+} // namespace ldpctool
+
+#endif
--- a/plugins/channelrx/demoddatv/ldpctool/interleaver.h
+++ b/plugins/channelrx/demoddatv/ldpctool/interleaver.h
@ -0,0 +1,363 @@
+/*
+Bit interleaver
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#ifndef INTERLEAVER_HH
+#define INTERLEAVER_HH
+
+namespace ldpctool {
+
+template <typename TYPE>
+struct Interleaver
+{
+	virtual void fwd(TYPE *) = 0;
+	virtual void bwd(TYPE *) = 0;
+	virtual ~Interleaver() = default;
+};
+
+template <typename TYPE>
+struct ITL0 : public Interleaver<TYPE>
+{
+	void fwd(TYPE *){}
+	void bwd(TYPE *){}
+};
+
+template <typename TYPE, typename PITL, typename MUX>
+struct BITL : public Interleaver<TYPE>
+{
+	static const int N = PITL::N;
+	static const int COLS = MUX::N;
+	static const int ROWS = N / COLS;
+	TYPE tmp[N];
+	void fwd(TYPE *io)
+	{
+		PITL::fwd(tmp, io);
+		for (int row = 0; row < ROWS; ++row)
+			MUX::fwd(io+COLS*row, tmp+row, ROWS);
+	}
+	void bwd(TYPE *io)
+	{
+		for (int row = 0; row < ROWS; ++row)
+			MUX::bwd(tmp+row, io+COLS*row, ROWS);
+		PITL::bwd(io, tmp);
+	}
+};
+
+template <typename TYPE, int NUM>
+struct PITL0
+{
+	static const int N = NUM;
+	static void fwd(TYPE *out, TYPE *in)
+	{
+		for (int n = 0; n < N; ++n)
+			out[n] = in[n];
+	}
+	static void bwd(TYPE *out, TYPE *in)
+	{
+		for (int n = 0; n < N; ++n)
+			out[n] = in[n];
+	}
+};
+
+template <typename TYPE, int NUM, int Q>
+struct PITL
+{
+	static const int N = NUM;
+	static const int M = 360;
+	static const int K = N - M * Q;
+	static void fwd(TYPE *out, TYPE *in)
+	{
+		for (int k = 0; k < K; ++k)
+			out[k] = in[k];
+		for (int q = 0; q < Q; ++q)
+			for (int m = 0; m < M; ++m)
+				out[K+M*q+m] = in[K+Q*m+q];
+	}
+	static void bwd(TYPE *out, TYPE *in)
+	{
+		for (int k = 0; k < K; ++k)
+			out[k] = in[k];
+		for (int q = 0; q < Q; ++q)
+			for (int m = 0; m < M; ++m)
+				out[K+Q*m+q] = in[K+M*q+m];
+	}
+};
+
+template <typename TYPE, int NUM, int Q, typename CT>
+struct PCTITL
+{
+	static const int N = NUM;
+	static const int COLS = CT::N;
+	static const int ROWS = N / COLS;
+	static void fwd(TYPE *out, TYPE *in)
+	{
+		PITL<TYPE, N, Q>::fwd(out, in);
+		for (int n = 0; n < N; ++n)
+			in[n] = out[n];
+		for (int row = 0; row < ROWS; ++row)
+			CT::fwd(out+COLS*row, in, ROWS, row);
+	}
+	static void bwd(TYPE *out, TYPE *in)
+	{
+		for (int row = 0; row < ROWS; ++row)
+			CT::bwd(out, in+COLS*row, ROWS, row);
+		for (int n = 0; n < N; ++n)
+			in[n] = out[n];
+		PITL<TYPE, N, Q>::bwd(out, in);
+	}
+};
+
+template <typename TYPE>
+struct MUX0
+{
+	static const int N = 1;
+	static void fwd(TYPE *out, TYPE *in, int)
+	{
+		out[0] = in[0];
+	}
+	static void bwd(TYPE *out, TYPE *in, int)
+	{
+		out[0] = in[0];
+	}
+};
+
+template <typename TYPE, int E0, int E1, int E2>
+struct MUX3
+{
+	static const int N = 3;
+	static void fwd(TYPE *out, TYPE *in, int S)
+	{
+		out[E0] = in[0*S];
+		out[E1] = in[1*S];
+		out[E2] = in[2*S];
+	}
+	static void bwd(TYPE *out, TYPE *in, int S)
+	{
+		out[0*S] = in[E0];
+		out[1*S] = in[E1];
+		out[2*S] = in[E2];
+	}
+};
+
+template <typename TYPE, int E0, int E1, int E2, int E3, int E4, int E5, int E6, int E7>
+struct MUX8
+{
+	static const int N = 8;
+	static void fwd(TYPE *out, TYPE *in, int S)
+	{
+		out[E0] = in[0*S];
+		out[E1] = in[1*S];
+		out[E2] = in[2*S];
+		out[E3] = in[3*S];
+		out[E4] = in[4*S];
+		out[E5] = in[5*S];
+		out[E6] = in[6*S];
+		out[E7] = in[7*S];
+	}
+	static void bwd(TYPE *out, TYPE *in, int S)
+	{
+		out[0*S] = in[E0];
+		out[1*S] = in[E1];
+		out[2*S] = in[E2];
+		out[3*S] = in[E3];
+		out[4*S] = in[E4];
+		out[5*S] = in[E5];
+		out[6*S] = in[E6];
+		out[7*S] = in[E7];
+	}
+};
+
+template <typename TYPE, int E0, int E1, int E2, int E3, int E4, int E5, int E6, int E7, int E8, int E9, int E10, int E11>
+struct MUX12
+{
+	static const int N = 12;
+	static void fwd(TYPE *out, TYPE *in, int S)
+	{
+		out[E0] = in[0*S];
+		out[E1] = in[1*S];
+		out[E2] = in[2*S];
+		out[E3] = in[3*S];
+		out[E4] = in[4*S];
+		out[E5] = in[5*S];
+		out[E6] = in[6*S];
+		out[E7] = in[7*S];
+		out[E8] = in[8*S];
+		out[E9] = in[9*S];
+		out[E10] = in[10*S];
+		out[E11] = in[11*S];
+	}
+	static void bwd(TYPE *out, TYPE *in, int S)
+	{
+		out[0*S] = in[E0];
+		out[1*S] = in[E1];
+		out[2*S] = in[E2];
+		out[3*S] = in[E3];
+		out[4*S] = in[E4];
+		out[5*S] = in[E5];
+		out[6*S] = in[E6];
+		out[7*S] = in[E7];
+		out[8*S] = in[E8];
+		out[9*S] = in[E9];
+		out[10*S] = in[E10];
+		out[11*S] = in[E11];
+	}
+};
+
+template <typename TYPE, int E0, int E1, int E2, int E3, int E4, int E5, int E6, int E7, int E8, int E9, int E10, int E11, int E12, int E13, int E14, int E15>
+struct MUX16
+{
+	static const int N = 16;
+	static void fwd(TYPE *out, TYPE *in, int S)
+	{
+		out[E0] = in[0*S];
+		out[E1] = in[1*S];
+		out[E2] = in[2*S];
+		out[E3] = in[3*S];
+		out[E4] = in[4*S];
+		out[E5] = in[5*S];
+		out[E6] = in[6*S];
+		out[E7] = in[7*S];
+		out[E8] = in[8*S];
+		out[E9] = in[9*S];
+		out[E10] = in[10*S];
+		out[E11] = in[11*S];
+		out[E12] = in[12*S];
+		out[E13] = in[13*S];
+		out[E14] = in[14*S];
+		out[E15] = in[15*S];
+	}
+	static void bwd(TYPE *out, TYPE *in, int S)
+	{
+		out[0*S] = in[E0];
+		out[1*S] = in[E1];
+		out[2*S] = in[E2];
+		out[3*S] = in[E3];
+		out[4*S] = in[E4];
+		out[5*S] = in[E5];
+		out[6*S] = in[E6];
+		out[7*S] = in[E7];
+		out[8*S] = in[E8];
+		out[9*S] = in[E9];
+		out[10*S] = in[E10];
+		out[11*S] = in[E11];
+		out[12*S] = in[E12];
+		out[13*S] = in[E13];
+		out[14*S] = in[E14];
+		out[15*S] = in[E15];
+	}
+};
+
+template <typename TYPE, int T0, int T1, int T2, int T3, int T4, int T5, int T6, int T7>
+struct CT8
+{
+	static const int N = 8;
+	static void fwd(TYPE *out, TYPE *in, int S, int R)
+	{
+		out[0] = in[0*S+(R+S-T0)%S];
+		out[1] = in[1*S+(R+S-T1)%S];
+		out[2] = in[2*S+(R+S-T2)%S];
+		out[3] = in[3*S+(R+S-T3)%S];
+		out[4] = in[4*S+(R+S-T4)%S];
+		out[5] = in[5*S+(R+S-T5)%S];
+		out[6] = in[6*S+(R+S-T6)%S];
+		out[7] = in[7*S+(R+S-T7)%S];
+	}
+	static void bwd(TYPE *out, TYPE *in, int S, int R)
+	{
+		out[0*S+(R+S-T0)%S] = in[0];
+		out[1*S+(R+S-T1)%S] = in[1];
+		out[2*S+(R+S-T2)%S] = in[2];
+		out[3*S+(R+S-T3)%S] = in[3];
+		out[4*S+(R+S-T4)%S] = in[4];
+		out[5*S+(R+S-T5)%S] = in[5];
+		out[6*S+(R+S-T6)%S] = in[6];
+		out[7*S+(R+S-T7)%S] = in[7];
+	}
+};
+
+template <typename TYPE, int T0, int T1, int T2, int T3, int T4, int T5, int T6, int T7, int T8, int T9, int T10, int T11>
+struct CT12
+{
+	static const int N = 12;
+	static void fwd(TYPE *out, TYPE *in, int S, int R)
+	{
+		out[0] = in[0*S+(R+S-T0)%S];
+		out[1] = in[1*S+(R+S-T1)%S];
+		out[2] = in[2*S+(R+S-T2)%S];
+		out[3] = in[3*S+(R+S-T3)%S];
+		out[4] = in[4*S+(R+S-T4)%S];
+		out[5] = in[5*S+(R+S-T5)%S];
+		out[6] = in[6*S+(R+S-T6)%S];
+		out[7] = in[7*S+(R+S-T7)%S];
+		out[8] = in[8*S+(R+S-T8)%S];
+		out[9] = in[9*S+(R+S-T9)%S];
+		out[10] = in[10*S+(R+S-T10)%S];
+		out[11] = in[11*S+(R+S-T11)%S];
+	}
+	static void bwd(TYPE *out, TYPE *in, int S, int R)
+	{
+		out[0*S+(R+S-T0)%S] = in[0];
+		out[1*S+(R+S-T1)%S] = in[1];
+		out[2*S+(R+S-T2)%S] = in[2];
+		out[3*S+(R+S-T3)%S] = in[3];
+		out[4*S+(R+S-T4)%S] = in[4];
+		out[5*S+(R+S-T5)%S] = in[5];
+		out[6*S+(R+S-T6)%S] = in[6];
+		out[7*S+(R+S-T7)%S] = in[7];
+		out[8*S+(R+S-T8)%S] = in[8];
+		out[9*S+(R+S-T9)%S] = in[9];
+		out[10*S+(R+S-T10)%S] = in[10];
+		out[11*S+(R+S-T11)%S] = in[11];
+	}
+};
+
+template <typename TYPE, int T0, int T1, int T2, int T3, int T4, int T5, int T6, int T7, int T8, int T9, int T10, int T11, int T12, int T13, int T14, int T15>
+struct CT16
+{
+	static const int N = 16;
+	static void fwd(TYPE *out, TYPE *in, int S, int R)
+	{
+		out[0] = in[0*S+(R+S-T0)%S];
+		out[1] = in[1*S+(R+S-T1)%S];
+		out[2] = in[2*S+(R+S-T2)%S];
+		out[3] = in[3*S+(R+S-T3)%S];
+		out[4] = in[4*S+(R+S-T4)%S];
+		out[5] = in[5*S+(R+S-T5)%S];
+		out[6] = in[6*S+(R+S-T6)%S];
+		out[7] = in[7*S+(R+S-T7)%S];
+		out[8] = in[8*S+(R+S-T8)%S];
+		out[9] = in[9*S+(R+S-T9)%S];
+		out[10] = in[10*S+(R+S-T10)%S];
+		out[11] = in[11*S+(R+S-T11)%S];
+		out[12] = in[12*S+(R+S-T12)%S];
+		out[13] = in[13*S+(R+S-T13)%S];
+		out[14] = in[14*S+(R+S-T14)%S];
+		out[15] = in[15*S+(R+S-T15)%S];
+	}
+	static void bwd(TYPE *out, TYPE *in, int S, int R)
+	{
+		out[0*S+(R+S-T0)%S] = in[0];
+		out[1*S+(R+S-T1)%S] = in[1];
+		out[2*S+(R+S-T2)%S] = in[2];
+		out[3*S+(R+S-T3)%S] = in[3];
+		out[4*S+(R+S-T4)%S] = in[4];
+		out[5*S+(R+S-T5)%S] = in[5];
+		out[6*S+(R+S-T6)%S] = in[6];
+		out[7*S+(R+S-T7)%S] = in[7];
+		out[8*S+(R+S-T8)%S] = in[8];
+		out[9*S+(R+S-T9)%S] = in[9];
+		out[10*S+(R+S-T10)%S] = in[10];
+		out[11*S+(R+S-T11)%S] = in[11];
+		out[12*S+(R+S-T12)%S] = in[12];
+		out[13*S+(R+S-T13)%S] = in[13];
+		out[14*S+(R+S-T14)%S] = in[14];
+		out[15*S+(R+S-T15)%S] = in[15];
+	}
+};
+
+} // namespace ldpctool
+
+#endif
+
--- a/plugins/channelrx/demoddatv/ldpctool/itls_handler.cpp
+++ b/plugins/channelrx/demoddatv/ldpctool/itls_handler.cpp
@ -0,0 +1,221 @@
+/*
+LDPC interleavers handler
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#include <iostream>
+#include <cstring>
+#include "testbench.h"
+#include "interleaver.h"
+
+namespace ldpctool {
+
+Interleaver<code_type> *create_interleaver(char *modulation, char *standard, char prefix, int number)
+{
+	if (!strcmp(standard, "S2")) {
+		if (!strcmp(modulation, "8PSK")) {
+			typedef MUX3<code_type, 0, 1, 2> _012;
+			typedef MUX3<code_type, 2, 1, 0> _210;
+			if (prefix == 'B') {
+				switch (number) {
+				case 5:
+					return new BITL<code_type, PITL0<code_type, 64800>, _210>();
+				default:
+					return new BITL<code_type, PITL0<code_type, 64800>, _012>();
+				}
+			}
+			if (prefix == 'C') {
+				switch (number) {
+				case 5:
+					return new BITL<code_type, PITL0<code_type, 16200>, _210>();
+				default:
+					return new BITL<code_type, PITL0<code_type, 16200>, _012>();
+				}
+			}
+		}
+	}
+	if (!strcmp(standard, "S2X")) {
+		if (!strcmp(modulation, "8PSK")) {
+			typedef MUX3<code_type, 0, 1, 2> _012;
+			typedef MUX3<code_type, 1, 0, 2> _102;
+			if (prefix == 'B') {
+				switch (number) {
+				case 7:
+				case 8:
+				case 9:
+					return new BITL<code_type, PITL0<code_type, 64800>, _102>();
+				default:
+					return new BITL<code_type, PITL0<code_type, 64800>, _012>();
+				}
+			}
+			if (prefix == 'C') {
+				switch (number) {
+				case 4:
+				case 5:
+				case 6:
+				case 7:
+					return new BITL<code_type, PITL0<code_type, 16200>, _102>();
+				default:
+					return new BITL<code_type, PITL0<code_type, 16200>, _012>();
+				}
+			}
+		}
+	}
+	if (!strcmp(standard, "T2")) {
+		if (!strcmp(modulation, "QPSK")) {
+			if (prefix == 'B') {
+				switch (number) {
+				case 8:
+					return new BITL<code_type, PITL<code_type, 16200, 30>, MUX0<code_type>>();
+				case 9:
+					return new BITL<code_type, PITL<code_type, 16200, 27>, MUX0<code_type>>();
+				}
+			}
+		}
+		if (!strcmp(modulation, "QAM16")) {
+			typedef MUX8<code_type, 7, 1, 4, 2, 5, 3, 6, 0> _71425360;
+			if (prefix == 'A') {
+				typedef MUX8<code_type, 0, 5, 1, 2, 4, 7, 3, 6> _05124736;
+				typedef CT8<code_type, 0, 0, 2, 4, 4, 5, 7, 7> CT;
+				switch (number) {
+				case 1:
+					return new BITL<code_type, PCTITL<code_type, 64800, 90, CT>, _71425360>();
+				case 2:
+					return new BITL<code_type, PCTITL<code_type, 64800, 72, CT>, _05124736>();
+				case 3:
+					return new BITL<code_type, PCTITL<code_type, 64800, 60, CT>, _71425360>();
+				case 4:
+					return new BITL<code_type, PCTITL<code_type, 64800, 45, CT>, _71425360>();
+				case 5:
+					return new BITL<code_type, PCTITL<code_type, 64800, 36, CT>, _71425360>();
+				case 6:
+					return new BITL<code_type, PCTITL<code_type, 64800, 30, CT>, _71425360>();
+				}
+			}
+			if (prefix == 'B') {
+				typedef MUX8<code_type, 6, 0, 3, 4, 5, 2, 1, 7> _60345217;
+				typedef MUX8<code_type, 7, 5, 4, 0, 3, 1, 2, 6> _75403126;
+				typedef CT8<code_type, 0, 0, 0, 1, 7, 20, 20, 21> CT;
+				switch (number) {
+				case 1:
+					return new BITL<code_type, PCTITL<code_type, 16200, 36, CT>, _71425360>();
+				case 2:
+					return new BITL<code_type, PCTITL<code_type, 16200, 25, CT>, _71425360>();
+				case 3:
+					return new BITL<code_type, PCTITL<code_type, 16200, 18, CT>, _71425360>();
+				case 4:
+					return new BITL<code_type, PCTITL<code_type, 16200, 15, CT>, _71425360>();
+				case 5:
+					return new BITL<code_type, PCTITL<code_type, 16200, 12, CT>, _71425360>();
+				case 6:
+					return new BITL<code_type, PCTITL<code_type, 16200, 10, CT>, _71425360>();
+				case 7:
+					return new BITL<code_type, PCTITL<code_type, 16200, 8, CT>, _71425360>();
+				case 8:
+					return new BITL<code_type, PCTITL<code_type, 16200, 30, CT>, _60345217>();
+				case 9:
+					return new BITL<code_type, PCTITL<code_type, 16200, 27, CT>, _75403126>();
+				}
+			}
+		}
+		if (!strcmp(modulation, "QAM64")) {
+			typedef MUX12<code_type, 11, 7, 3, 10, 6, 2, 9, 5, 1, 8, 4, 0> _11731062951840;
+			if (prefix == 'A') {
+				typedef MUX12<code_type, 2, 7, 6, 9, 0, 3, 1, 8, 4, 11, 5, 10> _27690318411510;
+				typedef CT12<code_type, 0, 0, 2, 2, 3, 4, 4, 5, 5, 7, 8, 9> CT;
+				switch (number) {
+				case 1:
+					return new BITL<code_type, PCTITL<code_type, 64800, 90, CT>, _11731062951840>();
+				case 2:
+					return new BITL<code_type, PCTITL<code_type, 64800, 72, CT>, _27690318411510>();
+				case 3:
+					return new BITL<code_type, PCTITL<code_type, 64800, 60, CT>, _11731062951840>();
+				case 4:
+					return new BITL<code_type, PCTITL<code_type, 64800, 45, CT>, _11731062951840>();
+				case 5:
+					return new BITL<code_type, PCTITL<code_type, 64800, 36, CT>, _11731062951840>();
+				case 6:
+					return new BITL<code_type, PCTITL<code_type, 64800, 30, CT>, _11731062951840>();
+				}
+			}
+			if (prefix == 'B') {
+				typedef MUX12<code_type, 4, 2, 0, 5, 6, 1, 3, 7, 8, 9, 10, 11> _42056137891011;
+				typedef MUX12<code_type, 4, 0, 1, 6, 2, 3, 5, 8, 7, 10, 9, 11> _40162358710911;
+				typedef CT12<code_type, 0, 0, 0, 2, 2, 2, 3, 3, 3, 6, 7, 7> CT;
+				switch (number) {
+				case 1:
+					return new BITL<code_type, PCTITL<code_type, 16200, 36, CT>, _11731062951840>();
+				case 2:
+					return new BITL<code_type, PCTITL<code_type, 16200, 25, CT>, _11731062951840>();
+				case 3:
+					return new BITL<code_type, PCTITL<code_type, 16200, 18, CT>, _11731062951840>();
+				case 4:
+					return new BITL<code_type, PCTITL<code_type, 16200, 15, CT>, _11731062951840>();
+				case 5:
+					return new BITL<code_type, PCTITL<code_type, 16200, 12, CT>, _11731062951840>();
+				case 6:
+					return new BITL<code_type, PCTITL<code_type, 16200, 10, CT>, _11731062951840>();
+				case 7:
+					return new BITL<code_type, PCTITL<code_type, 16200, 8, CT>, _11731062951840>();
+				case 8:
+					return new BITL<code_type, PCTITL<code_type, 16200, 30, CT>, _42056137891011>();
+				case 9:
+					return new BITL<code_type, PCTITL<code_type, 16200, 27, CT>, _40162358710911>();
+				}
+			}
+		}
+		if (!strcmp(modulation, "QAM256")) {
+			if (prefix == 'A') {
+				typedef MUX16<code_type, 15, 1, 13, 3, 8, 11, 9, 5, 10, 6, 4, 7, 12, 2, 14, 0> _1511338119510647122140;
+				typedef MUX16<code_type, 2, 11, 3, 4, 0, 9, 1, 8, 10, 13, 7, 14, 6, 15, 5, 12> _2113409181013714615512;
+				typedef MUX16<code_type, 7, 2, 9, 0, 4, 6, 13, 3, 14, 10, 15, 5, 8, 12, 11, 1> _7290461331410155812111;
+				typedef CT16<code_type, 0, 2, 2, 2, 2, 3, 7, 15, 16, 20, 22, 22, 27, 27, 28, 32> CT;
+				switch (number) {
+				case 1:
+					return new BITL<code_type, PCTITL<code_type, 64800, 90, CT>, _1511338119510647122140>();
+				case 2:
+					return new BITL<code_type, PCTITL<code_type, 64800, 72, CT>, _2113409181013714615512>();
+				case 3:
+					return new BITL<code_type, PCTITL<code_type, 64800, 60, CT>, _7290461331410155812111>();
+				case 4:
+					return new BITL<code_type, PCTITL<code_type, 64800, 45, CT>, _1511338119510647122140>();
+				case 5:
+					return new BITL<code_type, PCTITL<code_type, 64800, 36, CT>, _1511338119510647122140>();
+				case 6:
+					return new BITL<code_type, PCTITL<code_type, 64800, 30, CT>, _1511338119510647122140>();
+				}
+			}
+			if (prefix == 'B') {
+				typedef MUX8<code_type, 7, 3, 1, 5, 2, 6, 4, 0> _73152640;
+				typedef MUX8<code_type, 4, 0, 1, 2, 5, 3, 6, 7> _40125367;
+				typedef MUX8<code_type, 4, 0, 5, 1, 2, 3, 6, 7> _40512367;
+				typedef CT8<code_type, 0, 0, 0, 1, 7, 20, 20, 21> CT;
+				switch (number) {
+				case 1:
+					return new BITL<code_type, PCTITL<code_type, 16200, 36, CT>, _73152640>();
+				case 2:
+					return new BITL<code_type, PCTITL<code_type, 16200, 25, CT>, _73152640>();
+				case 3:
+					return new BITL<code_type, PCTITL<code_type, 16200, 18, CT>, _73152640>();
+				case 4:
+					return new BITL<code_type, PCTITL<code_type, 16200, 15, CT>, _73152640>();
+				case 5:
+					return new BITL<code_type, PCTITL<code_type, 16200, 12, CT>, _73152640>();
+				case 6:
+					return new BITL<code_type, PCTITL<code_type, 16200, 10, CT>, _73152640>();
+				case 7:
+					return new BITL<code_type, PCTITL<code_type, 16200, 8, CT>, _73152640>();
+				case 8:
+					return new BITL<code_type, PCTITL<code_type, 16200, 30, CT>, _40125367>();
+				case 9:
+					return new BITL<code_type, PCTITL<code_type, 16200, 27, CT>, _40512367>();
+				}
+			}
+		}
+	}
+	std::cerr << "using noop interleaver." << std::endl;
+	return new ITL0<code_type>();
+}
+
+} // namespace ldpctool
--- a/plugins/channelrx/demoddatv/ldpctool/layered_decoder.h
+++ b/plugins/channelrx/demoddatv/ldpctool/layered_decoder.h
@ -0,0 +1,154 @@
+/*
+LDPC SISO layered decoder
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#ifndef LAYERED_DECODER_HH
+#define LAYERED_DECODER_HH
+
+#include <stdlib.h>
+#include "ldpc.h"
+
+namespace ldpctool {
+
+template <typename TYPE, typename ALG>
+class LDPCDecoder
+{
+	TYPE *bnl, *pty, *inp, *out;
+	uint16_t *pos;
+	uint8_t *cnc;
+	ALG alg;
+	int M, N, K, R, q, CNL, LT;
+	bool initialized;
+
+	void reset()
+	{
+		for (int i = 0; i < LT; ++i)
+			bnl[i] = alg.zero();
+	}
+	bool bad(TYPE *data, TYPE *parity, int blocks)
+	{
+		for (int i = 0; i < q; ++i) {
+			int cnt = cnc[i];
+			for (int j = 0; j < M; ++j) {
+				TYPE cnv = alg.sign(alg.one(), parity[M*i+j]);
+				if (i)
+					cnv = alg.sign(cnv, parity[M*(i-1)+j]);
+				else if (j)
+					cnv = alg.sign(cnv, parity[j+(q-1)*M-1]);
+				for (int c = 0; c < cnt; ++c)
+					cnv = alg.sign(cnv, data[pos[CNL*(M*i+j)+c]]);
+				if (alg.bad(cnv, blocks))
+					return true;
+			}
+		}
+		return false;
+	}
+	void update(TYPE *data, TYPE *parity)
+	{
+		TYPE *bl = bnl;
+		for (int i = 0; i < q; ++i) {
+			int cnt = cnc[i];
+			for (int j = 0; j < M; ++j) {
+				int deg = cnt + 2 - !(i|j);
+				for (int c = 0; c < cnt; ++c)
+					inp[c] = out[c] = alg.sub(data[pos[CNL*(M*i+j)+c]], bl[c]);
+				inp[cnt] = out[cnt] = alg.sub(parity[M*i+j], bl[cnt]);
+				if (i)
+					inp[cnt+1] = out[cnt+1] = alg.sub(parity[M*(i-1)+j], bl[cnt+1]);
+				else if (j)
+					inp[cnt+1] = out[cnt+1] = alg.sub(parity[j+(q-1)*M-1], bl[cnt+1]);
+				alg.finalp(out, deg);
+				for (int d = 0; d < deg; ++d)
+					alg.update(bl+d, out[d]);
+				for (int c = 0; c < cnt; ++c)
+					data[pos[CNL*(M*i+j)+c]] = alg.add(inp[c], bl[c]);
+				parity[M*i+j] = alg.add(inp[cnt], bl[cnt]);
+				if (i)
+					parity[M*(i-1)+j] = alg.add(inp[cnt+1], bl[cnt+1]);
+				else if (j)
+					parity[j+(q-1)*M-1] = alg.add(inp[cnt+1], bl[cnt+1]);
+				bl += deg;
+			}
+		}
+	}
+public:
+	LDPCDecoder() : initialized(false)
+	{
+	}
+	void init(LDPCInterface *it)
+	{
+		if (initialized) {
+			free(bnl);
+			free(pty);
+			delete[] cnc;
+			delete[] pos;
+			delete[] inp;
+			delete[] out;
+		}
+		initialized = true;
+		LDPCInterface *ldpc = it->clone();
+		N = ldpc->code_len();
+		K = ldpc->data_len();
+		M = ldpc->group_len();
+		R = N - K;
+		q = R / M;
+		CNL = ldpc->links_max_cn() - 2;
+		pos = new uint16_t[R * CNL];
+		cnc = new uint8_t[R];
+		inp = new TYPE[N];
+		out = new TYPE[N];
+		for (int i = 0; i < R; ++i)
+			cnc[i] = 0;
+		ldpc->first_bit();
+		for (int j = 0; j < K; ++j) {
+			int *acc_pos = ldpc->acc_pos();
+			int bit_deg = ldpc->bit_deg();
+			for (int n = 0; n < bit_deg; ++n) {
+				int i = acc_pos[n];
+				pos[CNL*i+cnc[i]++] = j;
+			}
+			ldpc->next_bit();
+		}
+		LT = ldpc->links_total();
+		delete ldpc;
+		bnl = reinterpret_cast<TYPE *>(aligned_alloc(sizeof(TYPE), sizeof(TYPE) * LT));
+		pty = reinterpret_cast<TYPE *>(aligned_alloc(sizeof(TYPE), sizeof(TYPE) * R));
+		uint16_t *tmp = new uint16_t[R * CNL];
+		for (int i = 0; i < q; ++i)
+			for (int j = 0; j < M; ++j)
+				for (int c = 0; c < CNL; ++c)
+					tmp[CNL*(M*i+j)+c] = pos[CNL*(q*j+i)+c];
+		delete[] pos;
+		pos = tmp;
+	}
+	int operator()(TYPE *data, TYPE *parity, int trials = 25, int blocks = 1)
+	{
+		reset();
+		for (int i = 0; i < q; ++i)
+			for (int j = 0; j < M; ++j)
+				pty[M*i+j] = parity[q*j+i];
+		while (bad(data, pty, blocks) && --trials >= 0)
+			update(data, pty);
+		for (int i = 0; i < q; ++i)
+			for (int j = 0; j < M; ++j)
+				parity[q*j+i] = pty[M*i+j];
+		return trials;
+	}
+	~LDPCDecoder()
+	{
+		if (initialized) {
+			free(bnl);
+			free(pty);
+			delete[] cnc;
+			delete[] pos;
+			delete[] inp;
+			delete[] out;
+		}
+	}
+};
+
+} // namespace ldpctool
+
+#endif
--- a/plugins/channelrx/demoddatv/ldpctool/ldpc.h
+++ b/plugins/channelrx/demoddatv/ldpctool/ldpc.h
@ -0,0 +1,119 @@
+/*
+LDPC table iterator
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#ifndef LDPC_HH
+#define LDPC_HH
+
+namespace ldpctool {
+
+
+struct LDPCInterface
+{
+	virtual LDPCInterface *clone() = 0;
+	virtual int code_len() = 0;
+	virtual int data_len() = 0;
+	virtual int group_len() = 0;
+	virtual int links_total() = 0;
+	virtual int links_max_cn() = 0;
+	virtual int bit_deg() = 0;
+	virtual int *acc_pos() = 0;
+	virtual void first_bit() = 0;
+	virtual void next_bit() = 0;
+	virtual ~LDPCInterface() = default;
+	static const char *mc_tabnames[2][32];
+};
+
+template <typename TABLE>
+class LDPC : public LDPCInterface
+{
+	static const int M = TABLE::M;
+	static const int N = TABLE::N;
+	static const int K = TABLE::K;
+	static const int R = N-K;
+	static const int q = R/M;
+
+	int acc_pos_[TABLE::DEG_MAX];
+	const int *row_ptr;
+	int bit_deg_;
+	int grp_num;
+	int grp_len;
+	int grp_cnt;
+	int row_cnt;
+
+	void next_group()
+	{
+		if (grp_cnt >= grp_len) {
+			grp_len = TABLE::LEN[grp_num];
+			bit_deg_ = TABLE::DEG[grp_num];
+			grp_cnt = 0;
+			++grp_num;
+		}
+		for (int i = 0; i < bit_deg_; ++i)
+			acc_pos_[i] = row_ptr[i];
+		row_ptr += bit_deg_;
+		++grp_cnt;
+	}
+public:
+	LDPCInterface *clone()
+	{
+		return new LDPC<TABLE>();
+	}
+	int code_len()
+	{
+		return N;
+	}
+	int data_len()
+	{
+		return K;
+	}
+	int group_len()
+	{
+		return M;
+	}
+	int links_total()
+	{
+		return TABLE::LINKS_TOTAL;
+	}
+	int links_max_cn()
+	{
+		return TABLE::LINKS_MAX_CN;
+	}
+	int bit_deg()
+	{
+		return bit_deg_;
+	}
+	int *acc_pos()
+	{
+		return acc_pos_;
+	}
+	void next_bit()
+	{
+		if (++row_cnt < M) {
+			for (int i = 0; i < bit_deg_; ++i)
+				acc_pos_[i] += q;
+			for (int i = 0; i < bit_deg_; ++i)
+				acc_pos_[i] %= R;
+		} else {
+			next_group();
+			row_cnt = 0;
+		}
+	}
+	void first_bit()
+	{
+		grp_num = 0;
+		grp_len = 0;
+		grp_cnt = 0;
+		row_cnt = 0;
+		row_ptr = TABLE::POS;
+		next_group();
+	}
+};
+
+LDPCInterface *create_ldpc(char *standard, char prefix, int number);
+
+} // namespace ldpctool
+
+#endif
--- a/plugins/channelrx/demoddatv/ldpctool/ldpc_tool.cpp
+++ b/plugins/channelrx/demoddatv/ldpctool/ldpc_tool.cpp
@ -0,0 +1,201 @@
+/*
+LDPC testbench
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+
+Transformed into external decoder for third-party applications
+Copyright 2019 <pabr@pabr.org>
+*/
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <iostream>
+#include <iomanip>
+#include <random>
+#include <cmath>
+#include <cassert>
+#include <chrono>
+#include <cstring>
+#include <algorithm>
+#include <functional>
+#include "testbench.h"
+#include "algorithms.h"
+
+#if 0
+#include "flooding_decoder.h"
+static const int DEFAULT_TRIALS = 50;
+#else
+#include "layered_decoder.h"
+static const int DEFAULT_TRIALS = 25;
+#endif
+
+ldpctool::LDPCInterface *create_ldpc(char *standard, char prefix, int number);
+
+void fail(const char *msg)
+{
+	std::cerr << "** plugin: " << msg << std::endl;
+	exit(1);
+}
+
+void fatal(const char *msg)
+{
+	fprintf(stderr, "** plugin: ");
+	perror(msg);
+	exit(1);
+}
+
+void usage(const char *name, FILE *f, int c, const char *info = NULL)
+{
+	fprintf(f, "Usage: %s [--standard DVB-S2] --modcod INT [--trials INT] [--shortframes]  < FECFRAMES.int8  > FECFRAMES.int8\n", name);
+	if (info)
+		fprintf(f, "** Error while processing '%s'\n", info);
+	exit(c);
+}
+
+int main(int argc, char **argv)
+{
+	const char *standard = "DVB-S2";
+	int modcod = -1;
+	int max_trials = DEFAULT_TRIALS;
+	bool shortframes = false;
+
+	for (int i = 1; i < argc; ++i)
+	{
+		if (!strcmp(argv[i], "--standard") && i + 1 < argc)
+			standard = argv[++i];
+		else if (!strcmp(argv[i], "--modcod") && i + 1 < argc)
+			modcod = atoi(argv[++i]);
+		else if (!strcmp(argv[i], "--trials") && i + 1 < argc)
+			max_trials = atoi(argv[++i]);
+		else if (!strcmp(argv[i], "--shortframes"))
+			shortframes = true;
+		else if (!strcmp(argv[i], "-h"))
+			usage(argv[0], stdout, 0);
+		else
+			usage(argv[0], stderr, 1, argv[i]);
+	}
+
+	if (strcmp(standard, "DVB-S2"))
+		fail("Only DVB-S2 is supported.");
+
+	if (modcod < 0 || modcod > 31)
+		usage(argv[0], stderr, 1);
+
+	typedef ldpctool::NormalUpdate<ldpctool::simd_type> update_type;
+	//typedef SelfCorrectedUpdate<simd_type> update_type;
+
+	//typedef MinSumAlgorithm<simd_type, update_type> algorithm_type;
+	//typedef OffsetMinSumAlgorithm<simd_type, update_type, FACTOR> algorithm_type;
+	typedef ldpctool::MinSumCAlgorithm<ldpctool::simd_type, update_type, ldpctool::FACTOR> algorithm_type;
+	//typedef LogDomainSPA<simd_type, update_type> algorithm_type;
+	//typedef LambdaMinAlgorithm<simd_type, update_type, 3> algorithm_type;
+	//typedef SumProductAlgorithm<simd_type, update_type> algorithm_type;
+
+	ldpctool::LDPCDecoder<ldpctool::simd_type, algorithm_type> decode;
+
+	// DVB-S2 MODCOD definitions
+	static const char *mc_tabnames[2][32] = { // [shortframes][modcod]
+											 {// Normal frames
+											  0, "B1", "B2", "B3", "B4", "B5", "B6", "B7",
+											  "B8", "B9", "B10", "B11", "B5", "B6", "B7", "B9",
+											  "B10", "B11", "B6", "B7", "B8", "B9", "B10", "B11",
+											  "B7", "B8", "B8", "B10", "B11", 0, 0, 0},
+											 {// Short frames
+											  0, "C1", "C2", "C3", "C4", "C5", "C6", "C7",
+											  "C8", "C9", "C10", 0, "C5", "C6", "C7", "C9",
+											  "C10", 0, "C6", "C7", "C8", "C9", "C10", 0,
+											  "C7", "C8", "C8", "C10", 0, 0, 0, 0}};
+
+	const char *tabname = mc_tabnames[shortframes][modcod];
+	if (!tabname)
+		fail("unsupported modcod");
+
+	ldpctool::LDPCInterface *ldpc = create_ldpc((char *)"S2", tabname[0], atoi(tabname + 1));
+
+	if (!ldpc)
+	{
+		std::cerr << "no such table!" << std::endl;
+		return -1;
+	}
+
+	const int CODE_LEN = ldpc->code_len();
+	const int DATA_LEN = ldpc->data_len();
+
+	decode.init(ldpc);
+
+	int BLOCKS = 32;
+	ldpctool::code_type *code = new ldpctool::code_type[BLOCKS * CODE_LEN];
+	void *aligned_buffer = aligned_alloc(sizeof(ldpctool::simd_type), sizeof(ldpctool::simd_type) * CODE_LEN);
+	ldpctool::simd_type *simd = reinterpret_cast<ldpctool::simd_type *>(aligned_buffer);
+
+	// Expect LLR values in int8_t format.
+	if (sizeof(ldpctool::code_type) != 1)
+		fail("Bug: Unsupported code_type");
+
+	while (true)
+	{
+		ssize_t iosize = BLOCKS * CODE_LEN * sizeof(*code);
+
+		for (ssize_t pos = 0; pos < iosize;)
+		{
+			int nr = read(0, code + pos, iosize - pos);
+
+			if (!nr)
+				exit(0);
+
+			if (nr < 0)
+				fatal("read");
+
+			pos += nr;
+		}
+
+		int iterations = 0;
+		int num_decodes = 0;
+
+		for (int j = 0; j < BLOCKS; j += ldpctool::SIMD_WIDTH)
+		{
+			int blocks = j + ldpctool::SIMD_WIDTH > BLOCKS ? BLOCKS - j : ldpctool::SIMD_WIDTH;
+
+			for (int n = 0; n < blocks; ++n)
+				for (int i = 0; i < CODE_LEN; ++i)
+					reinterpret_cast<ldpctool::code_type *>(simd + i)[n] = code[(j + n) * CODE_LEN + i];
+
+			int trials = max_trials;
+			int count = decode(simd, simd + DATA_LEN, trials, blocks);
+			++num_decodes;
+
+			for (int n = 0; n < blocks; ++n)
+				for (int i = 0; i < CODE_LEN; ++i)
+					code[(j + n) * CODE_LEN + i] = reinterpret_cast<ldpctool::code_type *>(simd + i)[n];
+
+			if (count < 0) {
+				iterations += blocks * trials;
+				// std::cerr << "decoder failed at converging to a code word in " << trials << " trials" << std::endl;
+			} else {
+				iterations += blocks * (trials - count);
+			}
+		}
+
+		for (int i = 0; i < BLOCKS * CODE_LEN; ++i)
+			assert(!std::isnan(code[i]));
+
+		for (ssize_t pos = 0; pos < iosize;)
+		{
+			ssize_t nw = write(1, code + pos, iosize - pos);
+
+			if (!nw)
+				exit(0);
+
+			if (nw < 0)
+				fatal("write");
+
+			pos += nw;
+		}
+	} // main loop
+
+	delete ldpc;
+
+	free(aligned_buffer);
+	delete[] code;
+
+	return 0;
+}
--- a/plugins/channelrx/demoddatv/ldpctool/mods_handler.cpp
+++ b/plugins/channelrx/demoddatv/ldpctool/mods_handler.cpp
@ -0,0 +1,64 @@
+/*
+LDPC modulations handler
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#include <cstring>
+#include <cmath>
+#include <algorithm>
+#include "psk.h"
+#include "qam.h"
+#include "modulation.h"
+#include "testbench.h"
+
+using namespace ldpctool;
+
+template <typename TYPE, typename CODE>
+constexpr typename TYPE::value_type PhaseShiftKeying<4, TYPE, CODE>::rcp_sqrt_2;
+template <typename TYPE, typename CODE>
+constexpr TYPE PhaseShiftKeying<8, TYPE, CODE>::rot_acw;
+template <typename TYPE, typename CODE>
+constexpr TYPE PhaseShiftKeying<8, TYPE, CODE>::rot_cw;
+template <typename TYPE, typename CODE>
+constexpr typename TYPE::value_type QuadratureAmplitudeModulation<16, TYPE, CODE>::AMP;
+template <typename TYPE, typename CODE>
+constexpr typename TYPE::value_type QuadratureAmplitudeModulation<64, TYPE, CODE>::AMP;
+template <typename TYPE, typename CODE>
+constexpr typename TYPE::value_type QuadratureAmplitudeModulation<256, TYPE, CODE>::AMP;
+template <typename TYPE, typename CODE>
+constexpr typename TYPE::value_type QuadratureAmplitudeModulation<1024, TYPE, CODE>::AMP;
+
+template <int LEN>
+ModulationInterface<complex_type, code_type> *create_modulation(char *name)
+{
+	if (!strcmp(name, "BPSK"))
+		return new Modulation<PhaseShiftKeying<2, complex_type, code_type>, LEN>();
+	if (!strcmp(name, "QPSK"))
+		return new Modulation<PhaseShiftKeying<4, complex_type, code_type>, LEN / 2>();
+	if (!strcmp(name, "8PSK"))
+		return new Modulation<PhaseShiftKeying<8, complex_type, code_type>, LEN / 3>();
+	if (!strcmp(name, "QAM16"))
+		return new Modulation<QuadratureAmplitudeModulation<16, complex_type, code_type>, LEN / 4>();
+	if (!strcmp(name, "QAM64"))
+		return new Modulation<QuadratureAmplitudeModulation<64, complex_type, code_type>, LEN / 6>();
+	if (!strcmp(name, "QAM256"))
+		return new Modulation<QuadratureAmplitudeModulation<256, complex_type, code_type>, LEN / 8>();
+	if (!strcmp(name, "QAM1024"))
+		return new Modulation<QuadratureAmplitudeModulation<1024, complex_type, code_type>, LEN / 10>();
+	return 0;
+}
+
+ModulationInterface<complex_type, code_type> *create_modulation(char *name, int len)
+{
+	switch (len) {
+	case 16200:
+		return create_modulation<16200>(name);
+	case 32400:
+		return create_modulation<32400>(name);
+	case 64800:
+		return create_modulation<64800>(name);
+	}
+	return 0;
+}
+
--- a/plugins/channelrx/demoddatv/ldpctool/modulation.h
+++ b/plugins/channelrx/demoddatv/ldpctool/modulation.h
@ -0,0 +1,78 @@
+/*
+Modulation interface
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#ifndef MODULATION_HH
+#define MODULATION_HH
+
+namespace ldpctool {
+
+template <typename TYPE, typename CODE>
+struct ModulationInterface
+{
+	typedef TYPE complex_type;
+	typedef typename TYPE::value_type value_type;
+	typedef CODE code_type;
+
+	virtual int bits() = 0;
+	virtual void hardN(code_type *, complex_type *) = 0;
+	virtual void softN(code_type *, complex_type *, value_type) = 0;
+	virtual void mapN(complex_type *, code_type *) = 0;
+	virtual void hard(code_type *, complex_type) = 0;
+	virtual void soft(code_type *, complex_type, value_type) = 0;
+	virtual complex_type map(code_type *) = 0;
+	virtual ~ModulationInterface() = default;
+};
+
+template <typename MOD, int NUM>
+struct Modulation : public ModulationInterface<typename MOD::complex_type, typename MOD::code_type>
+{
+	typedef typename MOD::complex_type complex_type;
+	typedef typename MOD::value_type value_type;
+	typedef typename MOD::code_type code_type;
+
+	int bits()
+	{
+		return MOD::BITS;
+	}
+
+	void hardN(code_type *b, complex_type *c)
+	{
+		for (int i = 0; i < NUM; ++i)
+			MOD::hard(b + i * MOD::BITS, c[i]);
+	}
+
+	void softN(code_type *b, complex_type *c, value_type precision)
+	{
+		for (int i = 0; i < NUM; ++i)
+			MOD::soft(b + i * MOD::BITS, c[i], precision);
+	}
+
+	void mapN(complex_type *c, code_type *b)
+	{
+		for (int i = 0; i < NUM; ++i)
+			c[i] = MOD::map(b + i * MOD::BITS);
+	}
+
+	void hard(code_type *b, complex_type c)
+	{
+		MOD::hard(b, c);
+	}
+
+	void soft(code_type *b, complex_type c, value_type precision)
+	{
+		MOD::soft(b, c, precision);
+	}
+
+	complex_type map(code_type *b)
+	{
+		return MOD::map(b);
+	}
+};
+
+} // namespace ldpctool
+
+#endif
+
--- a/plugins/channelrx/demoddatv/ldpctool/neon.h
+++ b/plugins/channelrx/demoddatv/ldpctool/neon.h
@ -0,0 +1,849 @@
+/*
+ARM NEON acceleration
+
+Copyright 2018 Ahmet Inan <inan@aicodix.de>
+*/
+
+#ifndef NEON_HH
+#define NEON_HH
+
+#include <cstdint>
+#include <arm_neon.h>
+#include "simd.h"
+
+namespace ldpctool {
+
+template <>
+union SIMD<float, 4>
+{
+	static const int SIZE = 4;
+	typedef float value_type;
+	typedef uint32_t uint_type;
+	float32x4_t m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<int8_t, 16>
+{
+	static const int SIZE = 16;
+	typedef int8_t value_type;
+	typedef uint8_t uint_type;
+	int8x16_t m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<int16_t, 8>
+{
+	static const int SIZE = 8;
+	typedef int16_t value_type;
+	typedef uint16_t uint_type;
+	int16x8_t m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<int32_t, 4>
+{
+	static const int SIZE = 4;
+	typedef int32_t value_type;
+	typedef uint32_t uint_type;
+	int32x4_t m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<int64_t, 2>
+{
+	static const int SIZE = 2;
+	typedef int64_t value_type;
+	typedef uint64_t uint_type;
+	int64x2_t m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<uint8_t, 16>
+{
+	static const int SIZE = 16;
+	typedef uint8_t value_type;
+	typedef uint8_t uint_type;
+	uint8x16_t m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<uint16_t, 8>
+{
+	static const int SIZE = 8;
+	typedef uint16_t value_type;
+	typedef uint16_t uint_type;
+	uint16x8_t m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<uint32_t, 4>
+{
+	static const int SIZE = 4;
+	typedef uint32_t value_type;
+	typedef uint32_t uint_type;
+	uint32x4_t m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<uint64_t, 2>
+{
+	static const int SIZE = 2;
+	typedef uint64_t value_type;
+	typedef uint64_t uint_type;
+	uint64x2_t m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+inline SIMD<float, 4> vreinterpret(SIMD<uint32_t, 4> a)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = (float32x4_t)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vreinterpret(SIMD<float, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = (uint32x4_t)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vreinterpret(SIMD<uint8_t, 16> a)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = (int8x16_t)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vreinterpret(SIMD<int8_t, 16> a)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = (uint8x16_t)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vreinterpret(SIMD<uint16_t, 8> a)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = (int16x8_t)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vreinterpret(SIMD<int16_t, 8> a)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = (uint16x8_t)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vreinterpret(SIMD<uint32_t, 4> a)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = (int32x4_t)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vreinterpret(SIMD<int32_t, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = (uint32x4_t)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vdup(float a)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = vdupq_n_f32(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vdup(int8_t a)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = vdupq_n_s8(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vdup(int16_t a)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = vdupq_n_s16(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vdup(int32_t a)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = vdupq_n_s32(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 2> vdup(int64_t a)
+{
+	SIMD<int64_t, 2> tmp;
+	tmp.m = vdupq_n_s64(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vdup(uint8_t a)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = vdupq_n_u8(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vdup(uint16_t a)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = vdupq_n_u16(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vdup(uint32_t a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = vdupq_n_u32(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vdup(uint64_t a)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = vdupq_n_u64(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vzero()
+{
+	SIMD<float, 4> tmp;
+	tmp.m = (float32x4_t)veorq_u32((uint32x4_t)tmp.m, (uint32x4_t)tmp.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vzero()
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = veorq_s8(tmp.m, tmp.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vzero()
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = veorq_s16(tmp.m, tmp.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vzero()
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = veorq_s32(tmp.m, tmp.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 2> vzero()
+{
+	SIMD<int64_t, 2> tmp;
+	tmp.m = veorq_s64(tmp.m, tmp.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vzero()
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = veorq_u8(tmp.m, tmp.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vzero()
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = veorq_u16(tmp.m, tmp.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vzero()
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = veorq_u32(tmp.m, tmp.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vzero()
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = veorq_u64(tmp.m, tmp.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vadd(SIMD<float, 4> a, SIMD<float, 4> b)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = vaddq_f32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vadd(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = vaddq_s8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vadd(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = vaddq_s16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vadd(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = vaddq_s32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 2> vadd(SIMD<int64_t, 2> a, SIMD<int64_t, 2> b)
+{
+	SIMD<int64_t, 2> tmp;
+	tmp.m = vaddq_s64(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vqadd(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = vqaddq_s8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vqadd(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = vqaddq_s16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vsub(SIMD<float, 4> a, SIMD<float, 4> b)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = vsubq_f32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vsub(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = vsubq_s8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vsub(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = vsubq_s16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vsub(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = vsubq_s32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 2> vsub(SIMD<int64_t, 2> a, SIMD<int64_t, 2> b)
+{
+	SIMD<int64_t, 2> tmp;
+	tmp.m = vsubq_s64(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vqsub(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = vqsubq_s8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vqsub(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = vqsubq_s16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vqsub(SIMD<uint8_t, 16> a, SIMD<uint8_t, 16> b)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = vqsubq_u8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vqsub(SIMD<uint16_t, 8> a, SIMD<uint16_t, 8> b)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = vqsubq_u16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vabs(SIMD<float, 4> a)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = vabsq_f32(a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vqabs(SIMD<int8_t, 16> a)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = vqabsq_s8(a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vqabs(SIMD<int16_t, 8> a)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = vqabsq_s16(a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vsign(SIMD<float, 4> a, SIMD<float, 4> b)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = (float32x4_t)vbicq_u32(
+		veorq_u32((uint32x4_t)a.m, vandq_u32((uint32x4_t)vdupq_n_f32(-0.f), (uint32x4_t)b.m)),
+		vceqq_f32(b.m, vdupq_n_f32(0.f)));
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vsign(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = (int8x16_t)vorrq_u8(
+		vandq_u8(vcgtq_s8(vdupq_n_s8(0), b.m), (uint8x16_t)vnegq_s8(a.m)),
+		vandq_u8(vcgtq_s8(b.m, vdupq_n_s8(0)), (uint8x16_t)a.m));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vorr(SIMD<uint8_t, 16> a, SIMD<uint8_t, 16> b)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = vorrq_u8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vorr(SIMD<uint16_t, 8> a, SIMD<uint16_t, 8> b)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = vorrq_u16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vorr(SIMD<uint32_t, 4> a, SIMD<uint32_t, 4> b)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = vorrq_u32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vorr(SIMD<uint64_t, 2> a, SIMD<uint64_t, 2> b)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = vorrq_u64(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vand(SIMD<uint8_t, 16> a, SIMD<uint8_t, 16> b)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = vandq_u8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vand(SIMD<uint16_t, 8> a, SIMD<uint16_t, 8> b)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = vandq_u16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vand(SIMD<uint32_t, 4> a, SIMD<uint32_t, 4> b)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = vandq_u32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vand(SIMD<uint64_t, 2> a, SIMD<uint64_t, 2> b)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = vandq_u64(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> veor(SIMD<uint8_t, 16> a, SIMD<uint8_t, 16> b)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = veorq_u8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> veor(SIMD<uint16_t, 8> a, SIMD<uint16_t, 8> b)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = veorq_u16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> veor(SIMD<uint32_t, 4> a, SIMD<uint32_t, 4> b)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = veorq_u32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> veor(SIMD<uint64_t, 2> a, SIMD<uint64_t, 2> b)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = veorq_u64(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vbic(SIMD<uint8_t, 16> a, SIMD<uint8_t, 16> b)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = vbicq_u8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vbic(SIMD<uint16_t, 8> a, SIMD<uint16_t, 8> b)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = vbicq_u16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vbic(SIMD<uint32_t, 4> a, SIMD<uint32_t, 4> b)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = vbicq_u32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vbic(SIMD<uint64_t, 2> a, SIMD<uint64_t, 2> b)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = vbicq_u64(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vbsl(SIMD<uint8_t, 16> a, SIMD<uint8_t, 16> b, SIMD<uint8_t, 16> c)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = vbslq_u8(a.m, b.m, c.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vbsl(SIMD<uint16_t, 8> a, SIMD<uint16_t, 8> b, SIMD<uint16_t, 8> c)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = vbslq_u16(a.m, b.m, c.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vbsl(SIMD<uint32_t, 4> a, SIMD<uint32_t, 4> b, SIMD<uint32_t, 4> c)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = vbslq_u32(a.m, b.m, c.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vbsl(SIMD<uint64_t, 2> a, SIMD<uint64_t, 2> b, SIMD<uint64_t, 2> c)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = vbslq_u64(a.m, b.m, c.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vceqz(SIMD<float, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = vceqq_f32(a.m, vdupq_n_f32(0.f));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vceqz(SIMD<int8_t, 16> a)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = vceqq_s8(a.m, vdupq_n_s8(0));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vceqz(SIMD<int16_t, 8> a)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = vceqq_s16(a.m, vdupq_n_s16(0));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vceqz(SIMD<int32_t, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = vceqq_s32(a.m, vdupq_n_s32(0));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vceq(SIMD<float, 4> a, SIMD<float, 4> b)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = vceqq_f32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vceq(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = vceqq_s8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vceq(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = vceqq_s16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vceq(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = vceqq_s32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vcgtz(SIMD<float, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = vcgtq_f32(a.m, vdupq_n_f32(0.f));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vcgtz(SIMD<int8_t, 16> a)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = vcgtq_s8(a.m, vdupq_n_s8(0));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vcgtz(SIMD<int16_t, 8> a)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = vcgtq_s16(a.m, vdupq_n_s16(0));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vcgtz(SIMD<int32_t, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = vcgtq_s32(a.m, vdupq_n_s32(0));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vcltz(SIMD<float, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = vcltq_f32(a.m, vdupq_n_f32(0.f));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vcltz(SIMD<int8_t, 16> a)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = vcltq_s8(a.m, vdupq_n_s8(0));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vcltz(SIMD<int16_t, 8> a)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = vcltq_s16(a.m, vdupq_n_s16(0));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vcltz(SIMD<int32_t, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = vcltq_s32(a.m, vdupq_n_s32(0));
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vmin(SIMD<float, 4> a, SIMD<float, 4> b)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = vminq_f32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vmin(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = vminq_s8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vmin(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = vminq_s16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vmin(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = vminq_s32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vmax(SIMD<float, 4> a, SIMD<float, 4> b)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = vmaxq_f32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vmax(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = vmaxq_s8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vmax(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = vmaxq_s16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vmax(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = vmaxq_s32(a.m, b.m);
+	return tmp;
+}
+
+} // namespace ldpctool
+
+#endif
--- a/plugins/channelrx/demoddatv/ldpctool/psk.h
+++ b/plugins/channelrx/demoddatv/ldpctool/psk.h
@ -0,0 +1,153 @@
+/*
+Phase-shift keying
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#ifndef PSK_HH
+#define PSK_HH
+
+namespace ldpctool {
+
+template <int NUM, typename TYPE, typename CODE>
+struct PhaseShiftKeying;
+
+template <typename TYPE, typename CODE>
+struct PhaseShiftKeying<2, TYPE, CODE>
+{
+	static const int NUM = 2;
+	static const int BITS = 1;
+	typedef TYPE complex_type;
+	typedef typename TYPE::value_type value_type;
+	typedef CODE code_type;
+
+	static constexpr value_type DIST = 2;
+
+	static code_type quantize(value_type precision, value_type value)
+	{
+		value *= DIST * precision;
+		if (std::is_integral<code_type>::value)
+			value = std::nearbyint(value);
+		if (std::is_same<code_type, int8_t>::value)
+			value = std::min<value_type>(std::max<value_type>(value, -128), 127);
+		return value;
+	}
+
+	static void hard(code_type *b, complex_type c)
+	{
+		b[0] = c.real() < value_type(0) ? code_type(-1) : code_type(1);
+	}
+
+	static void soft(code_type *b, complex_type c, value_type precision)
+	{
+		b[0] = quantize(precision, c.real());
+	}
+
+	static complex_type map(code_type *b)
+	{
+		return complex_type(b[0], 0);
+	}
+};
+
+template <typename TYPE, typename CODE>
+struct PhaseShiftKeying<4, TYPE, CODE>
+{
+	static const int NUM = 4;
+	static const int BITS = 2;
+	typedef TYPE complex_type;
+	typedef typename TYPE::value_type value_type;
+	typedef CODE code_type;
+
+	// 1/sqrt(2)
+	static constexpr value_type rcp_sqrt_2 = 0.70710678118654752440;
+	static constexpr value_type DIST = 2 * rcp_sqrt_2;
+
+	static code_type quantize(value_type precision, value_type value)
+	{
+		value *= DIST * precision;
+		if (std::is_integral<code_type>::value)
+			value = std::nearbyint(value);
+		if (std::is_same<code_type, int8_t>::value)
+			value = std::min<value_type>(std::max<value_type>(value, -128), 127);
+		return value;
+	}
+
+	static void hard(code_type *b, complex_type c)
+	{
+		b[0] = c.real() < value_type(0) ? code_type(-1) : code_type(1);
+		b[1] = c.imag() < value_type(0) ? code_type(-1) : code_type(1);
+	}
+
+	static void soft(code_type *b, complex_type c, value_type precision)
+	{
+		b[0] = quantize(precision, c.real());
+		b[1] = quantize(precision, c.imag());
+	}
+
+	static complex_type map(code_type *b)
+	{
+		return rcp_sqrt_2 * complex_type(b[0], b[1]);
+	}
+};
+
+template <typename TYPE, typename CODE>
+struct PhaseShiftKeying<8, TYPE, CODE>
+{
+	static const int NUM = 8;
+	static const int BITS = 3;
+	typedef TYPE complex_type;
+	typedef typename TYPE::value_type value_type;
+	typedef CODE code_type;
+
+	// c(a(1)/2)
+	static constexpr value_type cos_pi_8 = 0.92387953251128675613;
+	// s(a(1)/2)
+	static constexpr value_type sin_pi_8 = 0.38268343236508977173;
+	// 1/sqrt(2)
+	static constexpr value_type rcp_sqrt_2 = 0.70710678118654752440;
+
+	static constexpr value_type DIST = 2 * sin_pi_8;
+
+	static constexpr complex_type rot_cw = complex_type(cos_pi_8, -sin_pi_8);
+	static constexpr complex_type rot_acw = complex_type(cos_pi_8, sin_pi_8);
+
+	static code_type quantize(value_type precision, value_type value)
+	{
+		value *= DIST * precision;
+		if (std::is_integral<code_type>::value)
+			value = std::nearbyint(value);
+		if (std::is_same<code_type, int8_t>::value)
+			value = std::min<value_type>(std::max<value_type>(value, -128), 127);
+		return value;
+	}
+
+	static void hard(code_type *b, complex_type c)
+	{
+		c *= rot_cw;
+		b[1] = c.real() < value_type(0) ? code_type(-1) : code_type(1);
+		b[2] = c.imag() < value_type(0) ? code_type(-1) : code_type(1);
+		b[0] = std::abs(c.real()) < std::abs(c.imag()) ? code_type(-1) : code_type(1);
+	}
+
+	static void soft(code_type *b, complex_type c, value_type precision)
+	{
+		c *= rot_cw;
+		b[1] = quantize(precision, c.real());
+		b[2] = quantize(precision, c.imag());
+		b[0] = quantize(precision, rcp_sqrt_2 * (std::abs(c.real()) - std::abs(c.imag())));
+	}
+
+	static complex_type map(code_type *b)
+	{
+		value_type real = cos_pi_8;
+		value_type imag = sin_pi_8;
+		if (b[0] < code_type(0))
+			std::swap(real, imag);
+		return complex_type(real * b[1], imag * b[2]) * rot_acw;
+	}
+};
+
+} // namespace ldpctool
+
+#endif
+
--- a/plugins/channelrx/demoddatv/ldpctool/qam.h
+++ b/plugins/channelrx/demoddatv/ldpctool/qam.h
@ -0,0 +1,258 @@
+/*
+Quadrature amplitude modulation
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#ifndef QAM_HH
+#define QAM_HH
+
+namespace ldpctool {
+
+template <int NUM, typename TYPE, typename CODE>
+struct QuadratureAmplitudeModulation;
+
+template <typename TYPE, typename CODE>
+struct QuadratureAmplitudeModulation<16, TYPE, CODE>
+{
+	static const int NUM = 16;
+	static const int BITS = 4;
+	typedef TYPE complex_type;
+	typedef typename TYPE::value_type value_type;
+	typedef CODE code_type;
+
+	static constexpr value_type FAC = 1.0540925533894596;
+	static constexpr value_type RCP = 3 * FAC;
+	static constexpr value_type AMP = 1 / RCP;
+	static constexpr value_type DIST = 2 * AMP;
+
+	static constexpr value_type amp(int i)
+	{
+		return AMP * i;
+	}
+
+	static code_type quantize(value_type precision, value_type value)
+	{
+		value *= DIST * precision;
+		if (std::is_integral<code_type>::value)
+			value = std::nearbyint(value);
+		if (std::is_same<code_type, int8_t>::value)
+			value = std::min<value_type>(std::max<value_type>(value, -128), 127);
+		return value;
+	}
+
+	static void hard(code_type *b, complex_type c)
+	{
+		b[0] = c.real() < amp(0) ? code_type(-1) : code_type(1);
+		b[1] = c.imag() < amp(0) ? code_type(-1) : code_type(1);
+		b[2] = std::abs(c.real()) < amp(2) ? code_type(-1) : code_type(1);
+		b[3] = std::abs(c.imag()) < amp(2) ? code_type(-1) : code_type(1);
+	}
+
+	static void soft(code_type *b, complex_type c, value_type precision)
+	{
+		b[0] = quantize(precision, c.real());
+		b[1] = quantize(precision, c.imag());
+		b[2] = quantize(precision, std::abs(c.real())-amp(2));
+		b[3] = quantize(precision, std::abs(c.imag())-amp(2));
+	}
+
+	static complex_type map(code_type *b)
+	{
+		return AMP * complex_type(
+			b[0]*(b[2]+value_type(2)),
+			b[1]*(b[3]+value_type(2))
+		);
+	}
+};
+
+template <typename TYPE, typename CODE>
+struct QuadratureAmplitudeModulation<64, TYPE, CODE>
+{
+	static const int NUM = 64;
+	static const int BITS = 6;
+	typedef TYPE complex_type;
+	typedef typename TYPE::value_type value_type;
+	typedef CODE code_type;
+
+	static constexpr value_type FAC = 0.9258200997725516;
+	static constexpr value_type RCP = 7 * FAC;
+	static constexpr value_type AMP = 1 / RCP;
+	static constexpr value_type DIST = 2 * AMP;
+
+	static constexpr value_type amp(int i)
+	{
+		return AMP * i;
+	}
+
+	static code_type quantize(value_type precision, value_type value)
+	{
+		value *= DIST * precision;
+		if (std::is_integral<code_type>::value)
+			value = std::nearbyint(value);
+		if (std::is_same<code_type, int8_t>::value)
+			value = std::min<value_type>(std::max<value_type>(value, -128), 127);
+		return value;
+	}
+
+	static void hard(code_type *b, complex_type c)
+	{
+		b[0] = c.real() < amp(0) ? code_type(-1) : code_type(1);
+		b[1] = c.imag() < amp(0) ? code_type(-1) : code_type(1);
+		b[2] = std::abs(c.real()) < amp(4) ? code_type(-1) : code_type(1);
+		b[3] = std::abs(c.imag()) < amp(4) ? code_type(-1) : code_type(1);
+		b[4] = std::abs(std::abs(c.real())-amp(4)) < amp(2) ? code_type(-1) : code_type(1);
+		b[5] = std::abs(std::abs(c.imag())-amp(4)) < amp(2) ? code_type(-1) : code_type(1);
+	}
+
+	static void soft(code_type *b, complex_type c, value_type precision)
+	{
+		b[0] = quantize(precision, c.real());
+		b[1] = quantize(precision, c.imag());
+		b[2] = quantize(precision, std::abs(c.real())-amp(4));
+		b[3] = quantize(precision, std::abs(c.imag())-amp(4));
+		b[4] = quantize(precision, std::abs(std::abs(c.real())-amp(4))-amp(2));
+		b[5] = quantize(precision, std::abs(std::abs(c.imag())-amp(4))-amp(2));
+	}
+
+	static complex_type map(code_type *b)
+	{
+		return AMP * complex_type(
+			b[0]*(b[2]*(b[4]+value_type(2))+value_type(4)),
+			b[1]*(b[3]*(b[5]+value_type(2))+value_type(4))
+		);
+	}
+};
+
+template <typename TYPE, typename CODE>
+struct QuadratureAmplitudeModulation<256, TYPE, CODE>
+{
+	static const int NUM = 256;
+	static const int BITS = 8;
+	typedef TYPE complex_type;
+	typedef typename TYPE::value_type value_type;
+	typedef CODE code_type;
+
+	static constexpr value_type FAC = 0.8692269873603529;
+	static constexpr value_type RCP = 15 * FAC;
+	static constexpr value_type AMP = 1 / RCP;
+	static constexpr value_type DIST = 2 * AMP;
+
+	static constexpr value_type amp(int i)
+	{
+		return AMP * i;
+	}
+
+	static code_type quantize(value_type precision, value_type value)
+	{
+		value *= DIST * precision;
+		if (std::is_integral<code_type>::value)
+			value = std::nearbyint(value);
+		if (std::is_same<code_type, int8_t>::value)
+			value = std::min<value_type>(std::max<value_type>(value, -128), 127);
+		return value;
+	}
+
+	static void hard(code_type *b, complex_type c)
+	{
+		b[0] = c.real() < amp(0) ? code_type(-1) : code_type(1);
+		b[1] = c.imag() < amp(0) ? code_type(-1) : code_type(1);
+		b[2] = std::abs(c.real()) < amp(8) ? code_type(-1) : code_type(1);
+		b[3] = std::abs(c.imag()) < amp(8) ? code_type(-1) : code_type(1);
+		b[4] = std::abs(std::abs(c.real())-amp(8)) < amp(4) ? code_type(-1) : code_type(1);
+		b[5] = std::abs(std::abs(c.imag())-amp(8)) < amp(4) ? code_type(-1) : code_type(1);
+		b[6] = std::abs(std::abs(std::abs(c.real())-amp(8))-amp(4)) < amp(2) ? code_type(-1) : code_type(1);
+		b[7] = std::abs(std::abs(std::abs(c.imag())-amp(8))-amp(4)) < amp(2) ? code_type(-1) : code_type(1);
+	}
+
+	static void soft(code_type *b, complex_type c, value_type precision)
+	{
+		b[0] = quantize(precision, c.real());
+		b[1] = quantize(precision, c.imag());
+		b[2] = quantize(precision, std::abs(c.real())-amp(8));
+		b[3] = quantize(precision, std::abs(c.imag())-amp(8));
+		b[4] = quantize(precision, std::abs(std::abs(c.real())-amp(8))-amp(4));
+		b[5] = quantize(precision, std::abs(std::abs(c.imag())-amp(8))-amp(4));
+		b[6] = quantize(precision, std::abs(std::abs(std::abs(c.real())-amp(8))-amp(4))-amp(2));
+		b[7] = quantize(precision, std::abs(std::abs(std::abs(c.imag())-amp(8))-amp(4))-amp(2));
+	}
+
+	static complex_type map(code_type *b)
+	{
+		return AMP * complex_type(
+			b[0]*(b[2]*(b[4]*(b[6]+value_type(2))+value_type(4))+value_type(8)),
+			b[1]*(b[3]*(b[5]*(b[7]+value_type(2))+value_type(4))+value_type(8))
+		);
+	}
+};
+
+template <typename TYPE, typename CODE>
+struct QuadratureAmplitudeModulation<1024, TYPE, CODE>
+{
+	static const int NUM = 1024;
+	static const int BITS = 10;
+	typedef TYPE complex_type;
+	typedef typename TYPE::value_type value_type;
+	typedef CODE code_type;
+
+	static constexpr value_type FAC = 0.8424235391742344;
+	static constexpr value_type RCP = 31 * FAC;
+	static constexpr value_type AMP = 1 / RCP;
+	static constexpr value_type DIST = 2 * AMP;
+
+	static constexpr value_type amp(int i)
+	{
+		return AMP * i;
+	}
+
+	static code_type quantize(value_type precision, value_type value)
+	{
+		value *= DIST * precision;
+		if (std::is_integral<code_type>::value)
+			value = std::nearbyint(value);
+		if (std::is_same<code_type, int8_t>::value)
+			value = std::min<value_type>(std::max<value_type>(value, -128), 127);
+		return value;
+	}
+
+	static void hard(code_type *b, complex_type c)
+	{
+		b[0] = c.real() < amp(0) ? code_type(-1) : code_type(1);
+		b[1] = c.imag() < amp(0) ? code_type(-1) : code_type(1);
+		b[2] = std::abs(c.real()) < amp(16) ? code_type(-1) : code_type(1);
+		b[3] = std::abs(c.imag()) < amp(16) ? code_type(-1) : code_type(1);
+		b[4] = std::abs(std::abs(c.real())-amp(16)) < amp(8) ? code_type(-1) : code_type(1);
+		b[5] = std::abs(std::abs(c.imag())-amp(16)) < amp(8) ? code_type(-1) : code_type(1);
+		b[6] = std::abs(std::abs(std::abs(c.real())-amp(16))-amp(8)) < amp(4) ? code_type(-1) : code_type(1);
+		b[7] = std::abs(std::abs(std::abs(c.imag())-amp(16))-amp(8)) < amp(4) ? code_type(-1) : code_type(1);
+		b[8] = std::abs(std::abs(std::abs(std::abs(c.real())-amp(16))-amp(8))-amp(4)) < amp(2) ? code_type(-1) : code_type(1);
+		b[9] = std::abs(std::abs(std::abs(std::abs(c.imag())-amp(16))-amp(8))-amp(4)) < amp(2) ? code_type(-1) : code_type(1);
+	}
+
+	static void soft(code_type *b, complex_type c, value_type precision)
+	{
+		b[0] = quantize(precision, c.real());
+		b[1] = quantize(precision, c.imag());
+		b[2] = quantize(precision, std::abs(c.real())-amp(16));
+		b[3] = quantize(precision, std::abs(c.imag())-amp(16));
+		b[4] = quantize(precision, std::abs(std::abs(c.real())-amp(16))-amp(8));
+		b[5] = quantize(precision, std::abs(std::abs(c.imag())-amp(16))-amp(8));
+		b[6] = quantize(precision, std::abs(std::abs(std::abs(c.real())-amp(16))-amp(8))-amp(4));
+		b[7] = quantize(precision, std::abs(std::abs(std::abs(c.imag())-amp(16))-amp(8))-amp(4));
+		b[8] = quantize(precision, std::abs(std::abs(std::abs(std::abs(c.real())-amp(16))-amp(8))-amp(4))-amp(2));
+		b[9] = quantize(precision, std::abs(std::abs(std::abs(std::abs(c.imag())-amp(16))-amp(8))-amp(4))-amp(2));
+	}
+
+	static complex_type map(code_type *b)
+	{
+		return AMP * complex_type(
+			b[0]*(b[2]*(b[4]*(b[6]*(b[8]+value_type(2))+value_type(4))+value_type(8))+value_type(16)),
+			b[1]*(b[3]*(b[5]*(b[7]*(b[9]+value_type(2))+value_type(4))+value_type(8))+value_type(16))
+		);
+	}
+};
+
+} // namespace ldpctool
+
+#endif
+
--- a/plugins/channelrx/demoddatv/ldpctool/simd.h
+++ b/plugins/channelrx/demoddatv/ldpctool/simd.h
--- a/plugins/channelrx/demoddatv/ldpctool/sse4_1.h
+++ b/plugins/channelrx/demoddatv/ldpctool/sse4_1.h
@ -0,0 +1,981 @@
+/*
+Intel SSE4.1 acceleration
+
+Copyright 2018 Ahmet Inan <inan@aicodix.de>
+*/
+
+#ifndef SSE4_1_HH
+#define SSE4_1_HH
+
+#include <cstdint>
+#include <smmintrin.h>
+#include "simd.h"
+
+namespace ldpctool {
+
+template <>
+union SIMD<float, 4>
+{
+	static const int SIZE = 4;
+	typedef float value_type;
+	typedef uint32_t uint_type;
+	__m128 m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<double, 2>
+{
+	static const int SIZE = 2;
+	typedef double value_type;
+	typedef uint64_t uint_type;
+	__m128d m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<int8_t, 16>
+{
+	static const int SIZE = 16;
+	typedef int8_t value_type;
+	typedef uint8_t uint_type;
+	__m128i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<int16_t, 8>
+{
+	static const int SIZE = 8;
+	typedef int16_t value_type;
+	typedef uint16_t uint_type;
+	__m128i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<int32_t, 4>
+{
+	static const int SIZE = 4;
+	typedef int32_t value_type;
+	typedef uint32_t uint_type;
+	__m128i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<int64_t, 2>
+{
+	static const int SIZE = 2;
+	typedef int64_t value_type;
+	typedef uint64_t uint_type;
+	__m128i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<uint8_t, 16>
+{
+	static const int SIZE = 16;
+	typedef uint8_t value_type;
+	typedef uint8_t uint_type;
+	__m128i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<uint16_t, 8>
+{
+	static const int SIZE = 8;
+	typedef uint16_t value_type;
+	typedef uint16_t uint_type;
+	__m128i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<uint32_t, 4>
+{
+	static const int SIZE = 4;
+	typedef uint32_t value_type;
+	typedef uint32_t uint_type;
+	__m128i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+union SIMD<uint64_t, 2>
+{
+	static const int SIZE = 2;
+	typedef uint64_t value_type;
+	typedef uint64_t uint_type;
+	__m128i m;
+	value_type v[SIZE];
+	uint_type u[SIZE];
+};
+
+template <>
+inline SIMD<float, 4> vreinterpret(SIMD<uint32_t, 4> a)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = (__m128)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vreinterpret(SIMD<float, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = (__m128i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 2> vreinterpret(SIMD<uint64_t, 2> a)
+{
+	SIMD<double, 2> tmp;
+	tmp.m = (__m128d)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vreinterpret(SIMD<double, 2> a)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = (__m128i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vreinterpret(SIMD<int8_t, 16> a)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = (__m128i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vreinterpret(SIMD<uint8_t, 16> a)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = (__m128i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vreinterpret(SIMD<int16_t, 8> a)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = (__m128i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vreinterpret(SIMD<uint16_t, 8> a)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = (__m128i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vreinterpret(SIMD<int32_t, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = (__m128i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vreinterpret(SIMD<uint32_t, 4> a)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = (__m128i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vreinterpret(SIMD<int64_t, 2> a)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = (__m128i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 2> vreinterpret(SIMD<uint64_t, 2> a)
+{
+	SIMD<int64_t, 2> tmp;
+	tmp.m = (__m128i)a.m;
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vdup<SIMD<float, 4>>(float a)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = _mm_set1_ps(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 2> vdup<SIMD<double, 2>>(double a)
+{
+	SIMD<double, 2> tmp;
+	tmp.m = _mm_set1_pd(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vdup<SIMD<int8_t, 16>>(int8_t a)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = _mm_set1_epi8(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vdup<SIMD<int16_t, 8>>(int16_t a)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = _mm_set1_epi16(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vdup<SIMD<int32_t, 4>>(int32_t a)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = _mm_set1_epi32(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 2> vdup<SIMD<int64_t, 2>>(int64_t a)
+{
+	SIMD<int64_t, 2> tmp;
+	tmp.m = _mm_set1_epi64x(a);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vzero()
+{
+	SIMD<float, 4> tmp;
+	tmp.m = _mm_setzero_ps();
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 2> vzero()
+{
+	SIMD<double, 2> tmp;
+	tmp.m = _mm_setzero_pd();
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vzero()
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = _mm_setzero_si128();
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vzero()
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = _mm_setzero_si128();
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vzero()
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = _mm_setzero_si128();
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 2> vzero()
+{
+	SIMD<int64_t, 2> tmp;
+	tmp.m = _mm_setzero_si128();
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vadd(SIMD<float, 4> a, SIMD<float, 4> b)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = _mm_add_ps(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 2> vadd(SIMD<double, 2> a, SIMD<double, 2> b)
+{
+	SIMD<double, 2> tmp;
+	tmp.m = _mm_add_pd(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vadd(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = _mm_add_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vadd(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = _mm_add_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vadd(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = _mm_add_epi32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 2> vadd(SIMD<int64_t, 2> a, SIMD<int64_t, 2> b)
+{
+	SIMD<int64_t, 2> tmp;
+	tmp.m = _mm_add_epi64(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vqadd(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = _mm_adds_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vqadd(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = _mm_adds_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vsub(SIMD<float, 4> a, SIMD<float, 4> b)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = _mm_sub_ps(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 2> vsub(SIMD<double, 2> a, SIMD<double, 2> b)
+{
+	SIMD<double, 2> tmp;
+	tmp.m = _mm_sub_pd(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vsub(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = _mm_sub_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vsub(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = _mm_sub_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vsub(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = _mm_sub_epi32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int64_t, 2> vsub(SIMD<int64_t, 2> a, SIMD<int64_t, 2> b)
+{
+	SIMD<int64_t, 2> tmp;
+	tmp.m = _mm_sub_epi64(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vqsub(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = _mm_subs_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vqsub(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = _mm_subs_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vqsub(SIMD<uint8_t, 16> a, SIMD<uint8_t, 16> b)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = _mm_subs_epu8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vqsub(SIMD<uint16_t, 8> a, SIMD<uint16_t, 8> b)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = _mm_subs_epu16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vabs(SIMD<float, 4> a)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = _mm_andnot_ps(_mm_set1_ps(-0.f), a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 2> vabs(SIMD<double, 2> a)
+{
+	SIMD<double, 2> tmp;
+	tmp.m = _mm_andnot_pd(_mm_set1_pd(-0.), a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vqabs(SIMD<int8_t, 16> a)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = _mm_abs_epi8(_mm_max_epi8(a.m, _mm_set1_epi8(-INT8_MAX)));
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vqabs(SIMD<int16_t, 8> a)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = _mm_abs_epi16(_mm_max_epi16(a.m, _mm_set1_epi16(-INT16_MAX)));
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vqabs(SIMD<int32_t, 4> a)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = _mm_abs_epi32(_mm_max_epi32(a.m, _mm_set1_epi32(-INT32_MAX)));
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vsign(SIMD<float, 4> a, SIMD<float, 4> b)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = _mm_andnot_ps(
+		_mm_cmpeq_ps(b.m, _mm_setzero_ps()),
+		_mm_xor_ps(a.m, _mm_and_ps(_mm_set1_ps(-0.f), b.m)));
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 2> vsign(SIMD<double, 2> a, SIMD<double, 2> b)
+{
+	SIMD<double, 2> tmp;
+	tmp.m = _mm_andnot_pd(
+		_mm_cmpeq_pd(b.m, _mm_setzero_pd()),
+		_mm_xor_pd(a.m, _mm_and_pd(_mm_set1_pd(-0.), b.m)));
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vsign(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = _mm_sign_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vsign(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = _mm_sign_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vsign(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = _mm_sign_epi32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vorr(SIMD<uint8_t, 16> a, SIMD<uint8_t, 16> b)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = _mm_or_si128(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vorr(SIMD<uint16_t, 8> a, SIMD<uint16_t, 8> b)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = _mm_or_si128(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vorr(SIMD<uint32_t, 4> a, SIMD<uint32_t, 4> b)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = _mm_or_si128(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vorr(SIMD<uint64_t, 2> a, SIMD<uint64_t, 2> b)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = _mm_or_si128(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vand(SIMD<uint8_t, 16> a, SIMD<uint8_t, 16> b)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = _mm_and_si128(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vand(SIMD<uint16_t, 8> a, SIMD<uint16_t, 8> b)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = _mm_and_si128(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vand(SIMD<uint32_t, 4> a, SIMD<uint32_t, 4> b)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = _mm_and_si128(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vand(SIMD<uint64_t, 2> a, SIMD<uint64_t, 2> b)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = _mm_and_si128(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> veor(SIMD<uint8_t, 16> a, SIMD<uint8_t, 16> b)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = _mm_xor_si128(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> veor(SIMD<uint16_t, 8> a, SIMD<uint16_t, 8> b)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = _mm_xor_si128(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> veor(SIMD<uint32_t, 4> a, SIMD<uint32_t, 4> b)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = _mm_xor_si128(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> veor(SIMD<uint64_t, 2> a, SIMD<uint64_t, 2> b)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = _mm_xor_si128(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vbic(SIMD<uint8_t, 16> a, SIMD<uint8_t, 16> b)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = _mm_andnot_si128(b.m, a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vbic(SIMD<uint16_t, 8> a, SIMD<uint16_t, 8> b)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = _mm_andnot_si128(b.m, a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vbic(SIMD<uint32_t, 4> a, SIMD<uint32_t, 4> b)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = _mm_andnot_si128(b.m, a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vbic(SIMD<uint64_t, 2> a, SIMD<uint64_t, 2> b)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = _mm_andnot_si128(b.m, a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vbsl(SIMD<uint8_t, 16> a, SIMD<uint8_t, 16> b, SIMD<uint8_t, 16> c)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = _mm_or_si128(_mm_and_si128(a.m, b.m), _mm_andnot_si128(a.m, c.m));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vbsl(SIMD<uint16_t, 8> a, SIMD<uint16_t, 8> b, SIMD<uint16_t, 8> c)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = _mm_or_si128(_mm_and_si128(a.m, b.m), _mm_andnot_si128(a.m, c.m));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vbsl(SIMD<uint32_t, 4> a, SIMD<uint32_t, 4> b, SIMD<uint32_t, 4> c)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = _mm_or_si128(_mm_and_si128(a.m, b.m), _mm_andnot_si128(a.m, c.m));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vbsl(SIMD<uint64_t, 2> a, SIMD<uint64_t, 2> b, SIMD<uint64_t, 2> c)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = _mm_or_si128(_mm_and_si128(a.m, b.m), _mm_andnot_si128(a.m, c.m));
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vceqz(SIMD<float, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = (__m128i)_mm_cmpeq_ps(a.m, _mm_setzero_ps());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vceqz(SIMD<double, 2> a)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = (__m128i)_mm_cmpeq_pd(a.m, _mm_setzero_pd());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vceqz(SIMD<int8_t, 16> a)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = _mm_cmpeq_epi8(a.m, _mm_setzero_si128());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vceqz(SIMD<int16_t, 8> a)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = _mm_cmpeq_epi16(a.m, _mm_setzero_si128());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vceqz(SIMD<int32_t, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = _mm_cmpeq_epi32(a.m, _mm_setzero_si128());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vceqz(SIMD<int64_t, 2> a)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = _mm_cmpeq_epi64(a.m, _mm_setzero_si128());
+	return tmp;
+}
+
+
+template <>
+inline SIMD<uint32_t, 4> vceq(SIMD<float, 4> a, SIMD<float, 4> b)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = (__m128i)_mm_cmpeq_ps(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vceq(SIMD<double, 2> a, SIMD<double, 2> b)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = (__m128i)_mm_cmpeq_pd(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vceq(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = _mm_cmpeq_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vceq(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = _mm_cmpeq_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vceq(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = _mm_cmpeq_epi32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vceq(SIMD<int64_t, 2> a, SIMD<int64_t, 2> b)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = _mm_cmpeq_epi64(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vcgtz(SIMD<float, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = (__m128i)_mm_cmpgt_ps(a.m, _mm_setzero_ps());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vcgtz(SIMD<double, 2> a)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = (__m128i)_mm_cmpgt_pd(a.m, _mm_setzero_pd());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vcgtz(SIMD<int8_t, 16> a)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = _mm_cmpgt_epi8(a.m, _mm_setzero_si128());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vcgtz(SIMD<int16_t, 8> a)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = _mm_cmpgt_epi16(a.m, _mm_setzero_si128());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vcgtz(SIMD<int32_t, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = _mm_cmpgt_epi32(a.m, _mm_setzero_si128());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vcgtz(SIMD<int64_t, 2> a)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = _mm_cmpgt_epi64(a.m, _mm_setzero_si128());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vcltz(SIMD<float, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = (__m128i)_mm_cmplt_ps(a.m, _mm_setzero_ps());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vcltz(SIMD<double, 2> a)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = (__m128i)_mm_cmplt_pd(a.m, _mm_setzero_pd());
+	return tmp;
+}
+
+template <>
+inline SIMD<uint8_t, 16> vcltz(SIMD<int8_t, 16> a)
+{
+	SIMD<uint8_t, 16> tmp;
+	tmp.m = _mm_cmpgt_epi8(_mm_setzero_si128(), a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint16_t, 8> vcltz(SIMD<int16_t, 8> a)
+{
+	SIMD<uint16_t, 8> tmp;
+	tmp.m = _mm_cmpgt_epi16(_mm_setzero_si128(), a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint32_t, 4> vcltz(SIMD<int32_t, 4> a)
+{
+	SIMD<uint32_t, 4> tmp;
+	tmp.m = _mm_cmpgt_epi32(_mm_setzero_si128(), a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<uint64_t, 2> vcltz(SIMD<int64_t, 2> a)
+{
+	SIMD<uint64_t, 2> tmp;
+	tmp.m = _mm_cmpgt_epi64(_mm_setzero_si128(), a.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vmin(SIMD<float, 4> a, SIMD<float, 4> b)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = _mm_min_ps(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 2> vmin(SIMD<double, 2> a, SIMD<double, 2> b)
+{
+	SIMD<double, 2> tmp;
+	tmp.m = _mm_min_pd(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vmin(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = _mm_min_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vmin(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = _mm_min_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vmin(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = _mm_min_epi32(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<float, 4> vmax(SIMD<float, 4> a, SIMD<float, 4> b)
+{
+	SIMD<float, 4> tmp;
+	tmp.m = _mm_max_ps(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<double, 2> vmax(SIMD<double, 2> a, SIMD<double, 2> b)
+{
+	SIMD<double, 2> tmp;
+	tmp.m = _mm_max_pd(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int8_t, 16> vmax(SIMD<int8_t, 16> a, SIMD<int8_t, 16> b)
+{
+	SIMD<int8_t, 16> tmp;
+	tmp.m = _mm_max_epi8(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int16_t, 8> vmax(SIMD<int16_t, 8> a, SIMD<int16_t, 8> b)
+{
+	SIMD<int16_t, 8> tmp;
+	tmp.m = _mm_max_epi16(a.m, b.m);
+	return tmp;
+}
+
+template <>
+inline SIMD<int32_t, 4> vmax(SIMD<int32_t, 4> a, SIMD<int32_t, 4> b)
+{
+	SIMD<int32_t, 4> tmp;
+	tmp.m = _mm_max_epi32(a.m, b.m);
+	return tmp;
+}
+
+} // namespace ldpctool
+
+#endif
--- a/plugins/channelrx/demoddatv/ldpctool/tables_handler.cpp
+++ b/plugins/channelrx/demoddatv/ldpctool/tables_handler.cpp
@ -0,0 +1,486 @@
+/*
+LDPC tables handler
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#include <cstring>
+#include <cstdint>
+#include "ldpc.h"
+#include "dvb_s2_tables.h"
+#include "dvb_s2x_tables.h"
+#include "dvb_t2_tables.h"
+
+namespace ldpctool {
+
+const char *LDPCInterface::mc_tabnames[2][32] = { // [shortframes][modcod]
+	{// Normal frames
+		0, "B1", "B2", "B3", "B4", "B5", "B6", "B7",
+		"B8", "B9", "B10", "B11", "B5", "B6", "B7", "B9",
+		"B10", "B11", "B6", "B7", "B8", "B9", "B10", "B11",
+		"B7", "B8", "B8", "B10", "B11", 0, 0, 0
+	},
+	{// Short frames
+		0, "C1", "C2", "C3", "C4", "C5", "C6", "C7",
+		"C8", "C9", "C10", 0, "C5", "C6", "C7", "C9",
+		"C10", 0, "C6", "C7", "C8", "C9", "C10", 0,
+		"C7", "C8", "C8", "C10", 0, 0, 0, 0
+	}
+};
+
+LDPCInterface *create_ldpc(char *standard, char prefix, int number)
+{
+	if (!strcmp(standard, "S2")) {
+		if (prefix == 'B') {
+			switch (number) {
+			case 1:
+				return new LDPC<DVB_S2_TABLE_B1>();
+			case 2:
+				return new LDPC<DVB_S2_TABLE_B2>();
+			case 3:
+				return new LDPC<DVB_S2_TABLE_B3>();
+			case 4:
+				return new LDPC<DVB_S2_TABLE_B4>();
+			case 5:
+				return new LDPC<DVB_S2_TABLE_B5>();
+			case 6:
+				return new LDPC<DVB_S2_TABLE_B6>();
+			case 7:
+				return new LDPC<DVB_S2_TABLE_B7>();
+			case 8:
+				return new LDPC<DVB_S2_TABLE_B8>();
+			case 9:
+				return new LDPC<DVB_S2_TABLE_B9>();
+			case 10:
+				return new LDPC<DVB_S2_TABLE_B10>();
+			case 11:
+				return new LDPC<DVB_S2_TABLE_B11>();
+			}
+		}
+		if (prefix == 'C') {
+			switch (number) {
+			case 1:
+				return new LDPC<DVB_S2_TABLE_C1>();
+			case 2:
+				return new LDPC<DVB_S2_TABLE_C2>();
+			case 3:
+				return new LDPC<DVB_S2_TABLE_C3>();
+			case 4:
+				return new LDPC<DVB_S2_TABLE_C4>();
+			case 5:
+				return new LDPC<DVB_S2_TABLE_C5>();
+			case 6:
+				return new LDPC<DVB_S2_TABLE_C6>();
+			case 7:
+				return new LDPC<DVB_S2_TABLE_C7>();
+			case 8:
+				return new LDPC<DVB_S2_TABLE_C8>();
+			case 9:
+				return new LDPC<DVB_S2_TABLE_C9>();
+			case 10:
+				return new LDPC<DVB_S2_TABLE_C10>();
+			}
+		}
+	}
+	if (!strcmp(standard, "S2X")) {
+		if (prefix == 'B') {
+			switch (number) {
+			case 1:
+				return new LDPC<DVB_S2X_TABLE_B1>();
+			case 2:
+				return new LDPC<DVB_S2X_TABLE_B2>();
+			case 3:
+				return new LDPC<DVB_S2X_TABLE_B3>();
+			case 4:
+				return new LDPC<DVB_S2X_TABLE_B4>();
+			case 5:
+				return new LDPC<DVB_S2X_TABLE_B5>();
+			case 6:
+				return new LDPC<DVB_S2X_TABLE_B6>();
+			case 7:
+				return new LDPC<DVB_S2X_TABLE_B7>();
+			case 8:
+				return new LDPC<DVB_S2X_TABLE_B8>();
+			case 9:
+				return new LDPC<DVB_S2X_TABLE_B9>();
+			case 10:
+				return new LDPC<DVB_S2X_TABLE_B10>();
+			case 11:
+				return new LDPC<DVB_S2X_TABLE_B11>();
+			case 12:
+				return new LDPC<DVB_S2X_TABLE_B12>();
+			case 13:
+				return new LDPC<DVB_S2X_TABLE_B13>();
+			case 14:
+				return new LDPC<DVB_S2X_TABLE_B14>();
+			case 15:
+				return new LDPC<DVB_S2X_TABLE_B15>();
+			case 16:
+				return new LDPC<DVB_S2X_TABLE_B16>();
+			case 17:
+				return new LDPC<DVB_S2X_TABLE_B17>();
+			case 18:
+				return new LDPC<DVB_S2X_TABLE_B18>();
+			case 19:
+				return new LDPC<DVB_S2X_TABLE_B19>();
+			case 20:
+				return new LDPC<DVB_S2X_TABLE_B20>();
+			case 21:
+				return new LDPC<DVB_S2X_TABLE_B21>();
+			case 22:
+				return new LDPC<DVB_S2X_TABLE_B22>();
+			case 23:
+				return new LDPC<DVB_S2X_TABLE_B23>();
+			case 24:
+				return new LDPC<DVB_S2X_TABLE_B24>();
+			}
+		}
+		if (prefix == 'C') {
+			switch (number) {
+			case 1:
+				return new LDPC<DVB_S2X_TABLE_C1>();
+			case 2:
+				return new LDPC<DVB_S2X_TABLE_C2>();
+			case 3:
+				return new LDPC<DVB_S2X_TABLE_C3>();
+			case 4:
+				return new LDPC<DVB_S2X_TABLE_C4>();
+			case 5:
+				return new LDPC<DVB_S2X_TABLE_C5>();
+			case 6:
+				return new LDPC<DVB_S2X_TABLE_C6>();
+			case 7:
+				return new LDPC<DVB_S2X_TABLE_C7>();
+			case 8:
+				return new LDPC<DVB_S2X_TABLE_C8>();
+			case 9:
+				return new LDPC<DVB_S2X_TABLE_C9>();
+			case 10:
+				return new LDPC<DVB_S2X_TABLE_C10>();
+			}
+		}
+	}
+	if (!strcmp(standard, "T2")) {
+		if (prefix == 'A') {
+			switch (number) {
+			case 1:
+				return new LDPC<DVB_T2_TABLE_A1>();
+			case 2:
+				return new LDPC<DVB_T2_TABLE_A2>();
+			case 3:
+				return new LDPC<DVB_T2_TABLE_A3>();
+			case 4:
+				return new LDPC<DVB_T2_TABLE_A4>();
+			case 5:
+				return new LDPC<DVB_T2_TABLE_A5>();
+			case 6:
+				return new LDPC<DVB_T2_TABLE_A6>();
+			}
+		}
+		if (prefix == 'B') {
+			switch (number) {
+			case 1:
+				return new LDPC<DVB_T2_TABLE_B1>();
+			case 2:
+				return new LDPC<DVB_T2_TABLE_B2>();
+			case 3:
+				return new LDPC<DVB_T2_TABLE_B3>();
+			case 4:
+				return new LDPC<DVB_T2_TABLE_B4>();
+			case 5:
+				return new LDPC<DVB_T2_TABLE_B5>();
+			case 6:
+				return new LDPC<DVB_T2_TABLE_B6>();
+			case 7:
+				return new LDPC<DVB_T2_TABLE_B7>();
+			case 8:
+				return new LDPC<DVB_T2_TABLE_B8>();
+			case 9:
+				return new LDPC<DVB_T2_TABLE_B9>();
+			}
+		}
+	}
+	return 0;
+}
+
+constexpr int DVB_S2_TABLE_B1::DEG[];
+constexpr int DVB_S2_TABLE_B1::LEN[];
+constexpr int DVB_S2_TABLE_B1::POS[];
+
+constexpr int DVB_S2_TABLE_B2::DEG[];
+constexpr int DVB_S2_TABLE_B2::LEN[];
+constexpr int DVB_S2_TABLE_B2::POS[];
+
+constexpr int DVB_S2_TABLE_B3::DEG[];
+constexpr int DVB_S2_TABLE_B3::LEN[];
+constexpr int DVB_S2_TABLE_B3::POS[];
+
+constexpr int DVB_S2_TABLE_B4::DEG[];
+constexpr int DVB_S2_TABLE_B4::LEN[];
+constexpr int DVB_S2_TABLE_B4::POS[];
+
+constexpr int DVB_S2_TABLE_B5::DEG[];
+constexpr int DVB_S2_TABLE_B5::LEN[];
+constexpr int DVB_S2_TABLE_B5::POS[];
+
+constexpr int DVB_S2_TABLE_B6::DEG[];
+constexpr int DVB_S2_TABLE_B6::LEN[];
+constexpr int DVB_S2_TABLE_B6::POS[];
+
+constexpr int DVB_S2_TABLE_B7::DEG[];
+constexpr int DVB_S2_TABLE_B7::LEN[];
+constexpr int DVB_S2_TABLE_B7::POS[];
+
+constexpr int DVB_S2_TABLE_B8::DEG[];
+constexpr int DVB_S2_TABLE_B8::LEN[];
+constexpr int DVB_S2_TABLE_B8::POS[];
+
+constexpr int DVB_S2_TABLE_B9::DEG[];
+constexpr int DVB_S2_TABLE_B9::LEN[];
+constexpr int DVB_S2_TABLE_B9::POS[];
+
+constexpr int DVB_S2_TABLE_B10::DEG[];
+constexpr int DVB_S2_TABLE_B10::LEN[];
+constexpr int DVB_S2_TABLE_B10::POS[];
+
+constexpr int DVB_S2_TABLE_B11::DEG[];
+constexpr int DVB_S2_TABLE_B11::LEN[];
+constexpr int DVB_S2_TABLE_B11::POS[];
+
+constexpr int DVB_S2_TABLE_C1::DEG[];
+constexpr int DVB_S2_TABLE_C1::LEN[];
+constexpr int DVB_S2_TABLE_C1::POS[];
+
+constexpr int DVB_S2_TABLE_C2::DEG[];
+constexpr int DVB_S2_TABLE_C2::LEN[];
+constexpr int DVB_S2_TABLE_C2::POS[];
+
+constexpr int DVB_S2_TABLE_C3::DEG[];
+constexpr int DVB_S2_TABLE_C3::LEN[];
+constexpr int DVB_S2_TABLE_C3::POS[];
+
+constexpr int DVB_S2_TABLE_C4::DEG[];
+constexpr int DVB_S2_TABLE_C4::LEN[];
+constexpr int DVB_S2_TABLE_C4::POS[];
+
+constexpr int DVB_S2_TABLE_C5::DEG[];
+constexpr int DVB_S2_TABLE_C5::LEN[];
+constexpr int DVB_S2_TABLE_C5::POS[];
+
+constexpr int DVB_S2_TABLE_C6::DEG[];
+constexpr int DVB_S2_TABLE_C6::LEN[];
+constexpr int DVB_S2_TABLE_C6::POS[];
+
+constexpr int DVB_S2_TABLE_C7::DEG[];
+constexpr int DVB_S2_TABLE_C7::LEN[];
+constexpr int DVB_S2_TABLE_C7::POS[];
+
+constexpr int DVB_S2_TABLE_C8::DEG[];
+constexpr int DVB_S2_TABLE_C8::LEN[];
+constexpr int DVB_S2_TABLE_C8::POS[];
+
+constexpr int DVB_S2_TABLE_C9::DEG[];
+constexpr int DVB_S2_TABLE_C9::LEN[];
+constexpr int DVB_S2_TABLE_C9::POS[];
+
+constexpr int DVB_S2_TABLE_C10::DEG[];
+constexpr int DVB_S2_TABLE_C10::LEN[];
+constexpr int DVB_S2_TABLE_C10::POS[];
+
+constexpr int DVB_S2X_TABLE_B1::DEG[];
+constexpr int DVB_S2X_TABLE_B1::LEN[];
+constexpr int DVB_S2X_TABLE_B1::POS[];
+
+constexpr int DVB_S2X_TABLE_B2::DEG[];
+constexpr int DVB_S2X_TABLE_B2::LEN[];
+constexpr int DVB_S2X_TABLE_B2::POS[];
+
+constexpr int DVB_S2X_TABLE_B3::DEG[];
+constexpr int DVB_S2X_TABLE_B3::LEN[];
+constexpr int DVB_S2X_TABLE_B3::POS[];
+
+constexpr int DVB_S2X_TABLE_B4::DEG[];
+constexpr int DVB_S2X_TABLE_B4::LEN[];
+constexpr int DVB_S2X_TABLE_B4::POS[];
+
+constexpr int DVB_S2X_TABLE_B5::DEG[];
+constexpr int DVB_S2X_TABLE_B5::LEN[];
+constexpr int DVB_S2X_TABLE_B5::POS[];
+
+constexpr int DVB_S2X_TABLE_B6::DEG[];
+constexpr int DVB_S2X_TABLE_B6::LEN[];
+constexpr int DVB_S2X_TABLE_B6::POS[];
+
+constexpr int DVB_S2X_TABLE_B7::DEG[];
+constexpr int DVB_S2X_TABLE_B7::LEN[];
+constexpr int DVB_S2X_TABLE_B7::POS[];
+
+constexpr int DVB_S2X_TABLE_B8::DEG[];
+constexpr int DVB_S2X_TABLE_B8::LEN[];
+constexpr int DVB_S2X_TABLE_B8::POS[];
+
+constexpr int DVB_S2X_TABLE_B9::DEG[];
+constexpr int DVB_S2X_TABLE_B9::LEN[];
+constexpr int DVB_S2X_TABLE_B9::POS[];
+
+constexpr int DVB_S2X_TABLE_B10::DEG[];
+constexpr int DVB_S2X_TABLE_B10::LEN[];
+constexpr int DVB_S2X_TABLE_B10::POS[];
+
+constexpr int DVB_S2X_TABLE_B11::DEG[];
+constexpr int DVB_S2X_TABLE_B11::LEN[];
+constexpr int DVB_S2X_TABLE_B11::POS[];
+
+constexpr int DVB_S2X_TABLE_B12::DEG[];
+constexpr int DVB_S2X_TABLE_B12::LEN[];
+constexpr int DVB_S2X_TABLE_B12::POS[];
+
+constexpr int DVB_S2X_TABLE_B13::DEG[];
+constexpr int DVB_S2X_TABLE_B13::LEN[];
+constexpr int DVB_S2X_TABLE_B13::POS[];
+
+constexpr int DVB_S2X_TABLE_B14::DEG[];
+constexpr int DVB_S2X_TABLE_B14::LEN[];
+constexpr int DVB_S2X_TABLE_B14::POS[];
+
+constexpr int DVB_S2X_TABLE_B15::DEG[];
+constexpr int DVB_S2X_TABLE_B15::LEN[];
+constexpr int DVB_S2X_TABLE_B15::POS[];
+
+constexpr int DVB_S2X_TABLE_B16::DEG[];
+constexpr int DVB_S2X_TABLE_B16::LEN[];
+constexpr int DVB_S2X_TABLE_B16::POS[];
+
+constexpr int DVB_S2X_TABLE_B17::DEG[];
+constexpr int DVB_S2X_TABLE_B17::LEN[];
+constexpr int DVB_S2X_TABLE_B17::POS[];
+
+constexpr int DVB_S2X_TABLE_B18::DEG[];
+constexpr int DVB_S2X_TABLE_B18::LEN[];
+constexpr int DVB_S2X_TABLE_B18::POS[];
+
+constexpr int DVB_S2X_TABLE_B19::DEG[];
+constexpr int DVB_S2X_TABLE_B19::LEN[];
+constexpr int DVB_S2X_TABLE_B19::POS[];
+
+constexpr int DVB_S2X_TABLE_B20::DEG[];
+constexpr int DVB_S2X_TABLE_B20::LEN[];
+constexpr int DVB_S2X_TABLE_B20::POS[];
+
+constexpr int DVB_S2X_TABLE_B21::DEG[];
+constexpr int DVB_S2X_TABLE_B21::LEN[];
+constexpr int DVB_S2X_TABLE_B21::POS[];
+
+constexpr int DVB_S2X_TABLE_B22::DEG[];
+constexpr int DVB_S2X_TABLE_B22::LEN[];
+constexpr int DVB_S2X_TABLE_B22::POS[];
+
+constexpr int DVB_S2X_TABLE_B23::DEG[];
+constexpr int DVB_S2X_TABLE_B23::LEN[];
+constexpr int DVB_S2X_TABLE_B23::POS[];
+
+constexpr int DVB_S2X_TABLE_B24::DEG[];
+constexpr int DVB_S2X_TABLE_B24::LEN[];
+constexpr int DVB_S2X_TABLE_B24::POS[];
+
+constexpr int DVB_S2X_TABLE_C1::DEG[];
+constexpr int DVB_S2X_TABLE_C1::LEN[];
+constexpr int DVB_S2X_TABLE_C1::POS[];
+
+constexpr int DVB_S2X_TABLE_C2::DEG[];
+constexpr int DVB_S2X_TABLE_C2::LEN[];
+constexpr int DVB_S2X_TABLE_C2::POS[];
+
+constexpr int DVB_S2X_TABLE_C3::DEG[];
+constexpr int DVB_S2X_TABLE_C3::LEN[];
+constexpr int DVB_S2X_TABLE_C3::POS[];
+
+constexpr int DVB_S2X_TABLE_C4::DEG[];
+constexpr int DVB_S2X_TABLE_C4::LEN[];
+constexpr int DVB_S2X_TABLE_C4::POS[];
+
+constexpr int DVB_S2X_TABLE_C5::DEG[];
+constexpr int DVB_S2X_TABLE_C5::LEN[];
+constexpr int DVB_S2X_TABLE_C5::POS[];
+
+constexpr int DVB_S2X_TABLE_C6::DEG[];
+constexpr int DVB_S2X_TABLE_C6::LEN[];
+constexpr int DVB_S2X_TABLE_C6::POS[];
+
+constexpr int DVB_S2X_TABLE_C7::DEG[];
+constexpr int DVB_S2X_TABLE_C7::LEN[];
+constexpr int DVB_S2X_TABLE_C7::POS[];
+
+constexpr int DVB_S2X_TABLE_C8::DEG[];
+constexpr int DVB_S2X_TABLE_C8::LEN[];
+constexpr int DVB_S2X_TABLE_C8::POS[];
+
+constexpr int DVB_S2X_TABLE_C9::DEG[];
+constexpr int DVB_S2X_TABLE_C9::LEN[];
+constexpr int DVB_S2X_TABLE_C9::POS[];
+
+constexpr int DVB_S2X_TABLE_C10::DEG[];
+constexpr int DVB_S2X_TABLE_C10::LEN[];
+constexpr int DVB_S2X_TABLE_C10::POS[];
+
+constexpr int DVB_T2_TABLE_A1::DEG[];
+constexpr int DVB_T2_TABLE_A1::LEN[];
+constexpr int DVB_T2_TABLE_A1::POS[];
+
+constexpr int DVB_T2_TABLE_A2::DEG[];
+constexpr int DVB_T2_TABLE_A2::LEN[];
+constexpr int DVB_T2_TABLE_A2::POS[];
+
+constexpr int DVB_T2_TABLE_A3::DEG[];
+constexpr int DVB_T2_TABLE_A3::LEN[];
+constexpr int DVB_T2_TABLE_A3::POS[];
+
+constexpr int DVB_T2_TABLE_A4::DEG[];
+constexpr int DVB_T2_TABLE_A4::LEN[];
+constexpr int DVB_T2_TABLE_A4::POS[];
+
+constexpr int DVB_T2_TABLE_A5::DEG[];
+constexpr int DVB_T2_TABLE_A5::LEN[];
+constexpr int DVB_T2_TABLE_A5::POS[];
+
+constexpr int DVB_T2_TABLE_A6::DEG[];
+constexpr int DVB_T2_TABLE_A6::LEN[];
+constexpr int DVB_T2_TABLE_A6::POS[];
+
+constexpr int DVB_T2_TABLE_B1::DEG[];
+constexpr int DVB_T2_TABLE_B1::LEN[];
+constexpr int DVB_T2_TABLE_B1::POS[];
+
+constexpr int DVB_T2_TABLE_B2::DEG[];
+constexpr int DVB_T2_TABLE_B2::LEN[];
+constexpr int DVB_T2_TABLE_B2::POS[];
+
+constexpr int DVB_T2_TABLE_B3::DEG[];
+constexpr int DVB_T2_TABLE_B3::LEN[];
+constexpr int DVB_T2_TABLE_B3::POS[];
+
+constexpr int DVB_T2_TABLE_B4::DEG[];
+constexpr int DVB_T2_TABLE_B4::LEN[];
+constexpr int DVB_T2_TABLE_B4::POS[];
+
+constexpr int DVB_T2_TABLE_B5::DEG[];
+constexpr int DVB_T2_TABLE_B5::LEN[];
+constexpr int DVB_T2_TABLE_B5::POS[];
+
+constexpr int DVB_T2_TABLE_B6::DEG[];
+constexpr int DVB_T2_TABLE_B6::LEN[];
+constexpr int DVB_T2_TABLE_B6::POS[];
+
+constexpr int DVB_T2_TABLE_B7::DEG[];
+constexpr int DVB_T2_TABLE_B7::LEN[];
+constexpr int DVB_T2_TABLE_B7::POS[];
+
+constexpr int DVB_T2_TABLE_B8::DEG[];
+constexpr int DVB_T2_TABLE_B8::LEN[];
+constexpr int DVB_T2_TABLE_B8::POS[];
+
+constexpr int DVB_T2_TABLE_B9::DEG[];
+constexpr int DVB_T2_TABLE_B9::LEN[];
+constexpr int DVB_T2_TABLE_B9::POS[];
+
+} // namespace ldpctool
--- a/plugins/channelrx/demoddatv/ldpctool/testbench.cpp
+++ b/plugins/channelrx/demoddatv/ldpctool/testbench.cpp
@ -0,0 +1,246 @@
+/*
+LDPC testbench
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#include <stdlib.h>
+#include <iostream>
+#include <iomanip>
+#include <random>
+#include <cmath>
+#include <cassert>
+#include <chrono>
+#include <cstring>
+#include <algorithm>
+#include <functional>
+#include "testbench.h"
+#include "encoder.h"
+#include "algorithms.h"
+#include "interleaver.h"
+#include "modulation.h"
+
+#if 0
+#include "flooding_decoder.h"
+static const int TRIALS = 50;
+#else
+#include "layered_decoder.h"
+static const int TRIALS = 25;
+#endif
+
+namespace ldpctool {
+
+LDPCInterface *create_ldpc(char *standard, char prefix, int number);
+Interleaver<code_type> *create_interleaver(char *modulation, char *standard, char prefix, int number);
+ModulationInterface<complex_type, code_type> *create_modulation(char *name, int len);
+
+int main(int argc, char **argv)
+{
+	if (argc != 6)
+		return -1;
+
+	typedef NormalUpdate<simd_type> update_type;
+	//typedef SelfCorrectedUpdate<simd_type> update_type;
+
+	//typedef MinSumAlgorithm<simd_type, update_type> algorithm_type;
+	typedef OffsetMinSumAlgorithm<simd_type, update_type, FACTOR> algorithm_type;
+	//typedef MinSumCAlgorithm<simd_type, update_type, FACTOR> algorithm_type;
+	//typedef LogDomainSPA<simd_type, update_type> algorithm_type;
+	//typedef LambdaMinAlgorithm<simd_type, update_type, 3> algorithm_type;
+	//typedef SumProductAlgorithm<simd_type, update_type> algorithm_type;
+
+	LDPCEncoder<code_type> encode;
+	LDPCDecoder<simd_type, algorithm_type> decode;
+
+	LDPCInterface *ldpc = create_ldpc(argv[2], argv[3][0], atoi(argv[3]+1));
+	if (!ldpc) {
+		std::cerr << "no such table!" << std::endl;
+		return -1;
+	}
+	const int CODE_LEN = ldpc->code_len();
+	const int DATA_LEN = ldpc->data_len();
+	std::cerr << "testing LDPC(" << CODE_LEN << ", " << DATA_LEN << ") code." << std::endl;
+
+	encode.init(ldpc);
+	decode.init(ldpc);
+
+	ModulationInterface<complex_type, code_type> *mod = create_modulation(argv[4], CODE_LEN);
+	if (!mod) {
+		std::cerr << "no such modulation!" << std::endl;
+		return -1;
+	}
+	const int MOD_BITS = mod->bits();
+	assert(CODE_LEN % MOD_BITS == 0);
+	const int SYMBOLS = CODE_LEN / MOD_BITS;
+
+	Interleaver<code_type> *itl = create_interleaver(argv[4], argv[2], argv[3][0], atoi(argv[3]+1));
+	assert(itl);
+
+	value_type SNR = atof(argv[1]);
+	//value_type mean_signal = 0;
+	value_type sigma_signal = 1;
+	value_type mean_noise = 0;
+	value_type sigma_noise = std::sqrt(sigma_signal * sigma_signal / (2 * std::pow(10, SNR / 10)));
+	std::cerr << SNR << " Es/N0 => AWGN with standard deviation of " << sigma_noise << " and mean " << mean_noise << std::endl;
+
+	value_type code_rate = (value_type)DATA_LEN / (value_type)CODE_LEN;
+	value_type spectral_efficiency = code_rate * MOD_BITS;
+	value_type EbN0 = 10 * std::log10(sigma_signal * sigma_signal / (spectral_efficiency * 2 * sigma_noise * sigma_noise));
+	std::cerr << EbN0 << " Eb/N0, using spectral efficiency of " << spectral_efficiency << " from " << code_rate << " code rate and " << MOD_BITS << " bits per symbol." << std::endl;
+
+	std::random_device rd;
+	std::default_random_engine generator(rd());
+	typedef std::uniform_int_distribution<int> uniform;
+	typedef std::normal_distribution<value_type> normal;
+	auto data = std::bind(uniform(0, 1), generator);
+	auto awgn = std::bind(normal(mean_noise, sigma_noise), generator);
+
+	int BLOCKS = atoi(argv[5]);
+	if (BLOCKS < 1)
+		return -1;
+	void *aligned_buffer = aligned_alloc(sizeof(simd_type), sizeof(simd_type) * CODE_LEN);
+	simd_type *simd = reinterpret_cast<simd_type *>(aligned_buffer);
+	code_type *code = new code_type[BLOCKS * CODE_LEN];
+	code_type *orig = new code_type[BLOCKS * CODE_LEN];
+	code_type *noisy = new code_type[BLOCKS * CODE_LEN];
+	complex_type *symb = new complex_type[BLOCKS * SYMBOLS];
+
+	for (int j = 0; j < BLOCKS; ++j)
+		for (int i = 0; i < DATA_LEN; ++i)
+			code[j * CODE_LEN + i] = 1 - 2 * data();
+
+	for (int j = 0; j < BLOCKS; ++j)
+		encode(code + j * CODE_LEN, code + j * CODE_LEN + DATA_LEN);
+
+	for (int i = 0; i < BLOCKS * CODE_LEN; ++i)
+		orig[i] = code[i];
+
+	for (int i = 0; i < BLOCKS; ++i)
+		itl->fwd(code + i * CODE_LEN);
+
+	for (int j = 0; j < BLOCKS; ++j)
+		mod->mapN(symb + j * SYMBOLS, code + j * CODE_LEN);
+
+	for (int i = 0; i < BLOCKS * SYMBOLS; ++i)
+		symb[i] += complex_type(awgn(), awgn());
+
+	if (1) {
+		code_type tmp[MOD_BITS];
+		value_type sp = 0, np = 0;
+		for (int i = 0; i < SYMBOLS; ++i) {
+			mod->hard(tmp, symb[i]);
+			complex_type s = mod->map(tmp);
+			complex_type e = symb[i] - s;
+			sp += std::norm(s);
+			np += std::norm(e);
+		}
+		value_type snr = 10 * std::log10(sp / np);
+		sigma_signal = std::sqrt(sp / SYMBOLS);
+		sigma_noise = std::sqrt(np / (2 * sp));
+		std::cerr << snr << " Es/N0, stddev " << sigma_noise << " of noise and " << sigma_signal << " of signal estimated via hard decision." << std::endl;
+	}
+
+	// $LLR=log(\frac{p(x=+1|y)}{p(x=-1|y)})$
+	// $p(x|\mu,\sigma)=\frac{1}{\sqrt{2\pi}\sigma}}e^{-\frac{(x-\mu)^2}{2\sigma^2}}$
+	value_type precision = FACTOR / (sigma_noise * sigma_noise);
+	for (int j = 0; j < BLOCKS; ++j)
+		mod->softN(code + j * CODE_LEN, symb + j * SYMBOLS, precision);
+
+	for (int i = 0; i < BLOCKS; ++i)
+		itl->bwd(code + i * CODE_LEN);
+
+	for (int i = 0; i < BLOCKS * CODE_LEN; ++i)
+		noisy[i] = code[i];
+
+	for (int i = 0; i < BLOCKS * CODE_LEN; ++i)
+		assert(!std::isnan(code[i]));
+
+	int iterations = 0;
+	int num_decodes = 0;
+	auto start = std::chrono::system_clock::now();
+	for (int j = 0; j < BLOCKS; j += SIMD_WIDTH) {
+		int blocks = j + SIMD_WIDTH > BLOCKS ? BLOCKS - j : SIMD_WIDTH;
+		for (int n = 0; n < blocks; ++n)
+			for (int i = 0; i < CODE_LEN; ++i)
+				reinterpret_cast<code_type *>(simd+i)[n] = code[(j+n)*CODE_LEN+i];
+		int trials = TRIALS;
+		int count = decode(simd, simd + DATA_LEN, trials, blocks);
+		++num_decodes;
+		for (int n = 0; n < blocks; ++n)
+			for (int i = 0; i < CODE_LEN; ++i)
+				code[(j+n)*CODE_LEN+i] = reinterpret_cast<code_type *>(simd+i)[n];
+		if (count < 0) {
+			iterations += blocks * trials;
+			std::cerr << "decoder failed at converging to a code word!" << std::endl;
+		} else {
+			iterations += blocks * (trials - count);
+			std::cerr << trials - count << " iterations were needed." << std::endl;
+		}
+	}
+	auto end = std::chrono::system_clock::now();
+	auto msec = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+	int kbs = (BLOCKS * DATA_LEN + msec.count() / 2) / msec.count();
+	std::cerr << kbs << " kilobit per second." << std::endl;
+	float avg_iter = (float)iterations / (float)BLOCKS;
+	std::cerr << avg_iter << " average iterations per block." << std::endl;
+	float avg_msec = (float)msec.count() / (float)num_decodes;
+	std::cerr << avg_msec << " average milliseconds per decode." << std::endl;
+
+	for (int i = 0; i < BLOCKS * CODE_LEN; ++i)
+		assert(!std::isnan(code[i]));
+
+	int awgn_errors = 0;
+	for (int i = 0; i < BLOCKS * CODE_LEN; ++i)
+		awgn_errors += noisy[i] * orig[i] < 0;
+	int quantization_erasures = 0;
+	for (int i = 0; i < BLOCKS * CODE_LEN; ++i)
+		quantization_erasures += !noisy[i];
+	int uncorrected_errors = 0;
+	for (int i = 0; i < BLOCKS * CODE_LEN; ++i)
+		uncorrected_errors += code[i] * orig[i] <= 0;
+	int decoder_errors = 0;
+	for (int i = 0; i < BLOCKS * CODE_LEN; ++i)
+		decoder_errors += code[i] * orig[i] <= 0 && orig[i] * noisy[i] > 0;
+	float bit_error_rate = (float)uncorrected_errors / (float)(BLOCKS * CODE_LEN);
+
+	if (1) {
+		for (int i = 0; i < CODE_LEN; ++i)
+			code[i] = code[i] < 0 ? -1 : 1;
+		itl->fwd(code);
+		value_type sp = 0, np = 0;
+		for (int i = 0; i < SYMBOLS; ++i) {
+			complex_type s = mod->map(code + i * MOD_BITS);
+			complex_type e = symb[i] - s;
+			sp += std::norm(s);
+			np += std::norm(e);
+		}
+		value_type snr = 10 * std::log10(sp / np);
+		sigma_signal = std::sqrt(sp / SYMBOLS);
+		sigma_noise = std::sqrt(np / (2 * sp));
+		std::cerr << snr << " Es/N0, stddev " << sigma_noise << " of noise and " << sigma_signal << " of signal estimated from corrected symbols." << std::endl;
+	}
+
+	std::cerr << awgn_errors << " errors caused by AWGN." << std::endl;
+	std::cerr << quantization_erasures << " erasures caused by quantization." << std::endl;
+	std::cerr << decoder_errors << " errors caused by decoder." << std::endl;
+	std::cerr << uncorrected_errors << " errors uncorrected." << std::endl;
+	std::cerr << bit_error_rate << " bit error rate." << std::endl;
+
+	if (0) {
+		std::cout << SNR << " " << bit_error_rate << " " << avg_iter << " " << EbN0 << std::endl;
+	}
+
+	delete ldpc;
+	delete mod;
+	delete itl;
+
+	free(aligned_buffer);
+	delete[] code;
+	delete[] orig;
+	delete[] noisy;
+	delete[] symb;
+
+	return 0;
+}
+
+} // namespace ldpctool
--- a/plugins/channelrx/demoddatv/ldpctool/testbench.h
+++ b/plugins/channelrx/demoddatv/ldpctool/testbench.h
@ -0,0 +1,38 @@
+/*
+LDPC testbench
+
+Copyright 2018 Ahmet Inan <xdsopl@gmail.com>
+*/
+
+#include <cstdint>
+#include <complex>
+#include "simd.h"
+
+namespace ldpctool {
+
+#ifdef __AVX2__
+const int SIZEOF_SIMD = 32;
+#else
+const int SIZEOF_SIMD = 16;
+#endif
+
+typedef float value_type;
+typedef std::complex<value_type> complex_type;
+
+#if 1
+typedef int8_t code_type;
+const int FACTOR = 2;
+#else
+typedef float code_type;
+const int FACTOR = 1;
+#endif
+
+#if 0
+const int SIMD_WIDTH = 1;
+typedef code_type simd_type;
+#else
+const int SIMD_WIDTH = SIZEOF_SIMD / sizeof(code_type);
+typedef SIMD<code_type, SIMD_WIDTH> simd_type;
+#endif
+
+} // namepsace ldpctool
--- a/plugins/channelrx/demoddatv/leansdr/dvbs2.h
+++ b/plugins/channelrx/demoddatv/leansdr/dvbs2.h
@ -26,15 +26,19 @@
 #include "leansdr/softword.h"
 */

+#include <stdlib.h>
+
 #include "bch.h"
-
 #include "crc.h"
-
 #include "dvb.h"
 #include "softword.h"
 #include "ldpc.h"
 #include "sdr.h"

+#include "ldpctool/layered_decoder.h"
+#include "ldpctool/testbench.h"
+#include "ldpctool/algorithms.h"
+
 namespace leansdr
 {

@ -2169,6 +2173,143 @@ struct s2_fecdec : runnable
    pipewriter<int> *bitcount, *errcount;
 }; // s2_fecdec

+// Soft LDPC decoder
+// Internally implemented LDPC tool. Replaces external LDPC decoder
+
+template <typename SOFTBIT, typename SOFTBYTE>
+struct s2_fecdec_soft : runnable
+{
+    s2_fecdec_soft(scheduler *sch,
+                   pipebuf<fecframe<SOFTBYTE>> &_in,
+                   pipebuf<bbframe> &_out,
+                   int _modcod,
+                   bool _shortframes = true,
+                   int _max_trials = 25,
+                   pipebuf<int> *_bitcount = nullptr,
+                   pipebuf<int> *_errcount = nullptr)
+        : runnable(sch, "S2 fecdec soft"),
+          in(_in), out(_out),
+          modcod(_modcod < 0 ? 0 : _modcod > 31 ? 31 : _modcod),
+          shortframes(_shortframes ? 1 : 0),
+          max_trials(_max_trials),
+          bitcount(opt_writer(_bitcount, 1)),
+          errcount(opt_writer(_errcount, 1))
+    {
+        tabname = ldpctool::LDPCInterface::mc_tabnames[shortframes][modcod];
+        ldpc = ldpctool::create_ldpc((char *)"S2", tabname[0], atoi(tabname + 1));
+        CODE_LEN = ldpc->code_len();
+        DATA_LEN = ldpc->data_len();
+        decode.init(ldpc);
+        code = new ldpctool::code_type[BLOCKS * CODE_LEN];
+        aligned_buffer = aligned_alloc(sizeof(ldpctool::simd_type), sizeof(ldpctool::simd_type) * CODE_LEN);
+        simd = reinterpret_cast<ldpctool::simd_type *>(aligned_buffer);
+    }
+
+    ~s2_fecdec_soft()
+    {
+        delete[] code;
+    }
+
+    void run()
+    {
+        while (in.readable() >= 1 && out.writable() >= 1 &&
+               opt_writable(bitcount, 1) && opt_writable(errcount, 1))
+        {
+            // input
+            fecframe<SOFTBYTE> *pin = in.rd();
+            size_t iosize = (pin->pls.framebits() / 8) * sizeof(SOFTBYTE);
+            int8_t *ibytes = reinterpret_cast<int8_t*>(pin->bytes);
+            int8_t *icode = reinterpret_cast<int8_t*>(code);
+            std::copy(ibytes, ibytes + iosize, icode); // write/read
+
+            // process
+    		int iterations = 0;
+	    	int num_decodes = 0;
+
+            for (int j = 0; j < BLOCKS; j += ldpctool::SIMD_WIDTH)
+            {
+                int blocks = j + ldpctool::SIMD_WIDTH > BLOCKS ? BLOCKS - j : ldpctool::SIMD_WIDTH;
+
+                for (int n = 0; n < blocks; ++n)
+                {
+                    for (int i = 0; i < CODE_LEN; ++i) {
+                        reinterpret_cast<ldpctool::code_type *>(simd + i)[n] = code[(j + n) * CODE_LEN + i];
+                    }
+                }
+
+                int trials = max_trials;
+                int count = decode(simd, simd + DATA_LEN, trials, blocks);
+                ++num_decodes;
+
+                for (int n = 0; n < blocks; ++n)
+                {
+                    for (int i = 0; i < CODE_LEN; ++i) {
+                        code[(j + n) * CODE_LEN + i] = reinterpret_cast<ldpctool::code_type *>(simd + i)[n];
+                    }
+                }
+
+                if (count < 0) {
+                    iterations += blocks * trials;
+                } else {
+                    iterations += blocks * (trials - count);
+                }
+            }
+
+            // output
+            int8_t *ildpc = reinterpret_cast<int8_t*>(ldpc_buf);
+            std::copy(icode, icode + iosize, ildpc); // write/read
+
+            // Decode BCH.
+            const modcod_info *mcinfo = check_modcod(modcod);
+            const fec_info *fi = &fec_infos[pin->pls.sf ? 1 : 0][mcinfo->rate];
+            uint8_t *hardbytes = softbytes_harden(ldpc_buf, fi->kldpc / 8, bch_buf);
+            size_t cwbytes = fi->kldpc / 8;
+            bch_interface *bch = s2bch.bchs[pin->pls.sf ? 1 : 0][mcinfo->rate];
+            int ncorr = bch->decode(hardbytes, cwbytes);
+            bool corrupted = (ncorr < 0);
+            // Report VBER
+            opt_write(bitcount, fi->Kbch);
+            opt_write(errcount, (ncorr >= 0) ? ncorr : fi->Kbch);
+
+            if (!corrupted)
+            {
+                // Descramble and output
+                bbframe *pout = out.wr();
+                pout->pls = pin->pls;
+                bbscrambling.transform(hardbytes, fi->Kbch / 8, pout->bytes);
+                out.written(1);
+            }
+            if (sch->debug)
+                fprintf(stderr, "%c", corrupted ? '!' : ncorr ? '.' : '_');
+            in.read(1);
+        }
+    }
+
+private:
+    pipereader<fecframe<SOFTBYTE>> in;
+    pipewriter<bbframe> out;
+    int modcod;
+    int shortframes;
+    int max_trials;
+    pipewriter<int> *bitcount, *errcount;
+
+    static const int BLOCKS = 32;
+    int CODE_LEN;
+    int DATA_LEN;
+    const char *tabname;
+    ldpctool::LDPCInterface *ldpc;
+    ldpctool::code_type *code;
+    void *aligned_buffer;
+    ldpctool::simd_type *simd;
+    typedef ldpctool::NormalUpdate<ldpctool::simd_type> update_type;
+    typedef ldpctool::MinSumCAlgorithm<ldpctool::simd_type, update_type, ldpctool::FACTOR> algorithm_type;
+    ldpctool::LDPCDecoder<ldpctool::simd_type, algorithm_type> decode;
+    SOFTBYTE ldpc_buf[64800 / 8];
+    uint8_t bch_buf[64800 / 8]; // Temp storage for hardening before BCH
+    s2_bch_engines s2bch;
+    s2_bbscrambling bbscrambling;
+};
+
 // External LDPC decoder
 // Spawns a user-specified command, FEC frames on stdin/stdout.