diff --git a/lib/ftrsd/ftrsd_paper/ftrsd.lyx b/lib/ftrsd/ftrsd_paper/ftrsd.lyx index fdbbde499..9a11fb165 100644 --- a/lib/ftrsd/ftrsd_paper/ftrsd.lyx +++ b/lib/ftrsd/ftrsd_paper/ftrsd.lyx @@ -3,7 +3,25 @@ \begin_document \begin_header \textclass paper +\begin_preamble +\usepackage{ragged2e} +\exhyphenpenalty=10000\hyphenpenalty=10000 + +\fancyhf{} +\fancyhead[L]{Franke \& Taylor: {\it Open Source Soft-Decision Decoder \ldots}} +\fancyhead[R]{\thepage} +\makeatletter +\let\ps@plain\ps@fancy % Plain page style = fancy page style +\makeatother + +\usepackage{nomencl} + +\renewcommand{\nomname}{Sidebar: Glossary of Specialized Terms} +\end_preamble \use_default_options true +\begin_modules +boxedfloat +\end_modules \maintain_unincluded_children false \language english \language_package default @@ -47,7 +65,7 @@ \use_indices false \paperorientation portrait \suppress_date false -\justification true +\justification false \use_refstyle 1 \index Index \shortcut idx @@ -59,12 +77,12 @@ \bottommargin 1in \secnumdepth 3 \tocdepth 3 -\paragraph_separation indent -\paragraph_indentation default +\paragraph_separation skip +\defskip bigskip \quotes_language english \papercolumns 1 \papersides 1 -\paperpagestyle default +\paperpagestyle fancy \tracking_changes false \output_changes false \html_math_output 0 @@ -78,6 +96,13 @@ Open Source Soft-Decision Decoder for the JT65 (63,12) Reed-Solomon Code \end_layout +\begin_layout SubTitle + +\emph on +Under-the-hood description of the JT65 decoding procedure, including a wholly + new algorithm for its powerful error-correcting code. +\end_layout + \begin_layout Author Steven J. Franke, K9AN and Joseph H. @@ -95,7 +120,19 @@ Background and Motivation \end_layout \begin_layout Standard -The JT65 protocol has revolutionized amateur-radio weak-signal communication +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +RaggedRight +\end_layout + +\end_inset + + The JT65 protocol has revolutionized amateur-radio weak-signal communication by enabling operators with small or compromise antennas and relatively low-power transmitters to communicate over propagation paths not usable with traditional technologies. @@ -117,15 +154,120 @@ key "jt65_protocol" , where the scattered return signals are always weak. It was soon found that JT65 also enables worldwide communication on the HF bands with low power, modest antennas, and efficient spectral usage. - At least several thousand amateurs now use JT65 on a regular basis, making - contacts on all bands from 160 meters through microwaves. + Thousands of amateurs now use JT65 on a regular basis, making contacts + on all bands from 160 meters through microwaves. +\end_layout + +\begin_layout Standard +JT65 uses timed transmitting and receiving sequences one minute long. + Messages are short and structured so as to streamline minimal exchanges + between two amateur operators over potentially difficult radio paths. + Most messages contain two callsigns and a grid locator, signal report, + acknowledgment, or sign-off; one of the tokens ``CQ'', ``QRZ'', or ``DE'' + may be substituted for the first callsign. + Alternatively, a message may contain up to 13 Latin characters of arbitrary + text. + All messages are efficiently compressed into exactly 72 bits of digital + information. + It should be obvious that the JT65 protocol is intended for the basic purpose + of completing legitimate, documented two-way contacts, but not for extended + conversations. + Full details of the message structure and encoding procedure are presented + in reference +\begin_inset CommandInset citation +LatexCommand cite +key "jt65_protocol" + +\end_inset + +. + For a concise description of the overall process of transmitting and receiving + a JT65 message, see the accompanying sidebar. \end_layout \begin_layout Standard A major reason for the success and popularity of JT65 is its use of a strong - error-correction code: a short block-length, low-rate Reed-Solomon code - based on a 64-symbol alphabet. - Until now, nearly all programs implementing JT65 have used the patented + error-correction code. + Before transmission, each 72-bit message is divided into 12 six-bit +\emph on +symbols +\begin_inset CommandInset nomenclature +LatexCommand nomenclature +symbol "{\\bf Symbol: }" +description "The information carried in one signalling interval, usually an integral number of bits. JT65 uses 6-bit symbols." + +\end_inset + + +\emph default + and augmented with 51 additional symbols of error-correcting information. + These 51 +\emph on +parity symbols +\emph default + are computed according to information-theory rules that maximize the probabilit +y of correctly decoding the message, even if many symbols are received incorrect +ly. + The JT65 code is properly described as a short block-length, low-rate Reed-Solo +mon code based on a 64-symbol +\emph on +alphabet +\emph default + +\begin_inset CommandInset nomenclature +LatexCommand nomenclature +symbol "{\\bf Alphabet: }" +description "A sequence of possible symbol values used for signaling. JT65 uses a 64-character alphabet, values in the range 0 to 63." + +\end_inset + +. + Characters in this alphabet are mapped onto 64 different frequencies for + transmission. + +\end_layout + +\begin_layout Standard +Reed Solomon codes are widely used to ensure reliability in data transmission + and storage. + In hardware implementations, decoding is generally accomplished with an + algorithm such as the Berlekamp-Massey (BM) algorithm, based on +\emph on +hard decisions +\emph default + +\begin_inset CommandInset nomenclature +LatexCommand nomenclature +symbol "{\\bf Hard decision: }" +description "Received symbols are assigned definite values by the demodulator." + +\end_inset + + for each of the symbol values received. + +\emph on +Soft decisions +\begin_inset CommandInset nomenclature +LatexCommand nomenclature +symbol "{\\bf Soft decision: }" +description "Received symbols are assigned tentative values (most probable, second most probable, etc.) and quality indicators." + +\end_inset + + +\emph default + are potentially more powerful, however. + For each received JT65 symbol we can estimate not only the value most likely + to be correct, but also the second, third, etc., most likely. + Most importantly, we can also estimate the probability that each of those + possible values is the correct one. + Decoders that make use of such information are called +\emph on +soft-decision decoders. +\end_layout + +\begin_layout Standard +Until now, nearly all programs implementing JT65 have used the patented Kötter-Vardy (KV) algebraic soft-decision decoder \begin_inset CommandInset citation LatexCommand cite @@ -137,42 +279,47 @@ key "kv2001" use only in amateur radio applications. Since 2001 the KV decoder has been considered the best available soft-decision decoder for Reed Solomon codes. - \end_layout \begin_layout Standard We describe here a new open-source alternative called the Franke-Taylor (FT, or K9AN-K1JT) soft-decision decoding algorithm. - It is conceptually simple, built around the well-known Berlekamp-Massey - errors-and-erasures algorithm, and in this application it performs even - better than the KV decoder. - The FT algorithm is implemented in the popular program + It is conceptually simple, built on top of the BM hard-decision decoder, + and in this application it performs even better than the KV decoder. + The FT algorithm is implemented in the popular programs +\emph on +WSJT +\emph default +, +\emph on +MAP65 +\emph default +, and \emph on WSJT-X \emph default -, widely used for amateur weak-signal communication with JT65 and other +, widely used for amateur weak-signal communication using JT65 and other specialized digital modes. - The program is freely available and licensed under the GNU General Public - License + These programs are open-source, freely available \begin_inset CommandInset citation LatexCommand cite key "wsjt" \end_inset -. +, and licensed under the GNU General Public License. \end_layout \begin_layout Standard -The JT65 protocol specifies transmissions that normally start one second - into a UTC minute and last for 46.8 seconds. +The JT65 protocol specifies transmissions that start one second into a UTC + minute and last for 46.8 seconds. Receiving software therefore has up to several seconds to decode a message before the start of the next minute, when the operator sends a reply. With today's personal computers, this relatively long available time encourages experimentation with decoders of high computational complexity. - As a result, on a typical fading channel the FT algorithm can extend the - decoding threshold by many dB over the hard-decision Berlekamp-Massey decoder, - and by a meaningful amount over the KV decoder. + As a result, the FT algorithm can extend the decoding threshold on a typical + fading channel by many dB over the hard-decision BM decoder, and by a meaningfu +l amount over the KV decoder. In addition to its excellent performance, the new algorithm has other desirable properties, not least of which is its conceptual simplicity. Decoding performance and computational complexity scale in a convenient @@ -191,12 +338,15 @@ The remainder of this paper is organized as follows. and their error-correcting capabilities. Section 3 provides statistical motivation for the FT algorithm, and Section 4 describes the algorithm in full detail. - (Material in these two sections is important because it documents our approach - and underlines its fundamental technical contributions. - It is heavier in formal mathematics than common in these pages, however; - some readers may choose to skim or skip these sections and proceed to the - results more quickly. - Many readers will benefit by reviewing the original paper on the JT65 protocol + Material in these two sections is important because it documents our approach + and underlines its fundamental technical contribution. + It is heavier in formal mathematics than common in +\emph on +QEX +\emph default +; for this reason, some readers may choose to skip or skim sections 3 and + 4 and proceed more quickly to the results. + Most readers will benefit by reviewing the original paper on the JT65 protocol \begin_inset CommandInset citation LatexCommand cite @@ -204,19 +354,16 @@ key "jt65_protocol" \end_inset -.) A procedure for -\begin_inset Quotes eld -\end_inset - -hinted decoding -\begin_inset Quotes erd -\end_inset - - --- determining which one, if any, of a list of likely messages matches +. + A procedure for +\emph on +hinted decoding +\emph default +--- determining which one, if any, of a list of likely messages matches the one that was received --- is outlined in Section 5. - Finally, in Section 6 we present performance measurements of the FT algorithm - and make explicit comparisons to the BM and KV decoders familiar to users - of programs + Finally, in Section 6 we present performance measurements of the FT and + hinted decoding algorithms and make explicit comparisons to the BM and + KV decoders familiar to users of older versions of \emph on WSJT \emph default @@ -229,8 +376,9 @@ MAP65 WSJT-X \emph default . - Section 7 summarizes some early on-the-air experiences with the new decoder. - + Section 7 summarizes some on-the-air experiences with the new decoder. + You may Refer to the sidebar Glossary for brief definitions of some specialized + terms. \end_layout \begin_layout Section @@ -246,48 +394,96 @@ JT65 Messages and Reed Solomon Codes \begin_layout Standard JT65 message frames consist of a short compressed 72-bit message encoded for transmission with a Reed-Solomon code. - Reed-Solomon codes are block codes characterized by + Reed-Solomon codes are +\emph on +block codes +\emph default + +\begin_inset CommandInset nomenclature +LatexCommand nomenclature +symbol "{\\bf Block code: }" +description "An error-correcting code that treats data in blocks of fixed size." + +\end_inset + + characterized by \begin_inset Formula $n$ \end_inset -, the length of their codewords, +, the length of their +\emph on +codewords +\emph default +; +\begin_inset CommandInset nomenclature +LatexCommand nomenclature +symbol "{\\bf Codeword:}" +description "For the JT65 code, a vector of 63 symbol values each in the range 0 to 63." + +\end_inset + + \begin_inset Formula $k$ \end_inset -, the number of message symbols conveyed by the codeword, and the number - of possible values for each symbol in the codewords. +, the number of message symbols conveyed by the codeword; and the transmission + alphabet or number of possible values for each symbol in the codewords. The codeword length and the number of message symbols are specified with the notation \begin_inset Formula $(n,k)$ \end_inset . - JT65 uses a (63,12) Reed-Solomon code with 64 possible values for each - symbol. + JT65 uses a (63,12) Reed-Solomon code with an alphabet of 64 possible values + for each symbol. Each of the 12 message symbols represents \begin_inset Formula $\log_{2}64=6$ \end_inset message bits. - The source-encoded messages conveyed by a 63-symbol JT65 frame thus consist - of 72 information bits. - The JT65 code is systematic, which means that the 12 message symbols are - embedded in the codeword without modification and another 51 parity symbols - derived from the message symbols are added to form a codeword of 63 symbols. + The source-encoded +\begin_inset CommandInset nomenclature +LatexCommand nomenclature +symbol "{\\bf Source encoding: }" +description "Compression of a message to use a minimum number or bits. JT65 source-encodes all messages to 72 bits." + +\end_inset + + messages conveyed by a 63-symbol JT65 frame thus consist of 72 information + bits. + The JT65 code is +\emph on +systematic +\emph default +, which means that the 12 message symbols are embedded in the codeword without + modification and another 51 parity symbols derived from the message symbols + are added to form a codeword of 63 symbols. \end_layout \begin_layout Standard -In coding theory the concept of Hamming distance is used as a measure of - -\begin_inset Quotes eld +In coding theory the concept of +\emph on +Hamming distance +\emph default + +\begin_inset CommandInset nomenclature +LatexCommand nomenclature +symbol "{\\bf Hamming distance: }" +description "The Hamming distance between two codewords, or between a received word and a codeword, is equal to the number of symbol positions in which they differ." + \end_inset -distance -\begin_inset Quotes erd + is used as a measure of the lack of agreement between different codewords, + or between a received word +\begin_inset CommandInset nomenclature +LatexCommand nomenclature +symbol "{\\bf Received word: }" +description "A vector of symbol values, possibly accompanied by soft information on individual reliabilities." + \end_inset - between different codewords, or between a received word and a codeword. + and a codeword. Hamming distance is the number of code symbols that differ in two words being compared. Reed-Solomon codes have minimum Hamming distance @@ -314,15 +510,28 @@ With \begin_inset Formula $d=52$ \end_inset -, which means that any particular codeword differs from all other codewords - in at least 52 of the 63 symbol positions. +. + With 72 information bits in each message, JT65 can transmit any one of +\begin_inset Formula $2^{72}$ +\end_inset + + possible messages. + The codeword for any message differs from every other codeword in at least + 52 of the 63 symbol positions. \end_layout \begin_layout Standard -Given a received word containing some incorrect symbols (errors), the received - word can be decoded into the correct codeword using a deterministic, algebraic - algorithm provided that no more than +A received word containing some incorrect symbols (errors) can be decoded + into the correct codeword using a deterministic +\begin_inset CommandInset nomenclature +LatexCommand nomenclature +symbol "{\\bf Deterministic algorithm: }" +description "A series of computational steps that for the same input always produces the same output." + +\end_inset + +, algebraic algorithm provided that no more than \begin_inset Formula $t$ \end_inset @@ -340,8 +549,8 @@ For the JT65 code , so it is always possible to decode a received word having 25 or fewer symbol errors. - Any one of several well-known algebraic algorithms, such as the widely - used Berlekamp-Massey (BM) algorithm, can carry out the decoding. + Any one of several well-known algebraic algorithms, such as the BM algorithm, + can carry out this hard-decision decoding. Two steps are necessarily involved in this process. We must (1) determine which symbols were received incorrectly, and (2) find the correct value of the incorrect symbols. @@ -366,13 +575,18 @@ For the JT65 code The FT algorithm creates lists of symbols suspected of being incorrect and sends them to the BM decoder. Symbols flagged in this way are called -\begin_inset Quotes eld -\end_inset - -erasures. -\begin_inset Quotes erd +\emph on +erasures +\emph default + +\begin_inset CommandInset nomenclature +LatexCommand nomenclature +symbol "{\\bf Erasure: }" +description "A received symbol may be ``erased'' when confidence in its value is so low that it is unlikely to provide useful information. " + \end_inset +. With perfect erasure information up to \begin_inset Formula $n-k=51$ \end_inset @@ -405,39 +619,24 @@ If \end_inset , the decoder is said to be an -\begin_inset Quotes eld -\end_inset - +\emph on errors-only -\begin_inset Quotes erd -\end_inset - +\emph default decoder. If \begin_inset Formula $0X$ -\end_inset - -. + number of incorrect symbols. Correspondingly, the FT algorithm works best when the probability of erasing a symbol is somewhat larger than the probability that the symbol is incorrect. For the JT65 code we found empirically that good decoding performance is @@ -1029,10 +1226,22 @@ t educated guesses to select symbols for erasure. \begin_inset Formula $d_{s}$ \end_inset -, the soft distance between the received word and the codeword: +, the +\emph on +soft distance +\begin_inset CommandInset nomenclature +LatexCommand nomenclature +symbol "{\\bf Soft distance: }" +description "The soft distance between a received word and a codeword is a measure of how greatly they differ, taking into account available soft information on symbol values." + +\end_inset + + +\emph default + between the received word and the codeword: \begin_inset Formula \begin{equation} -d_{s}=\sum_{j=1}^{n}\alpha_{j}\,(1+p_{1,\, j}).\label{eq:soft_distance} +d_{s}=\sum_{j=1}^{n}\alpha_{j}\,(1+p_{1,\,j}).\label{eq:soft_distance} \end{equation} \end_inset @@ -1050,7 +1259,7 @@ Here \end_inset if the received symbol and codeword symbol are different, and -\begin_inset Formula $p_{1,\, j}$ +\begin_inset Formula $p_{1,\,j}$ \end_inset is the fractional power associated with received symbol @@ -1071,7 +1280,7 @@ In practice we find that \begin_inset Formula $d_{s}$ \end_inset - can reliably indentify the correct codeword if the signal-to-noise ratio + can reliably identify the correct codeword if the signal-to-noise ratio for individual symbols is greater than about 4 in linear power units. We also find that significantly weaker signals can be decoded by using soft-symbol information beyond that contained in @@ -1094,7 +1303,7 @@ In practice we find that \begin_layout Standard \begin_inset Formula \begin{equation} -u=\frac{1}{n}\sum_{j=1}^{n}S(c_{j},\, j).\label{eq:u-metric} +u=\frac{1}{n}\sum_{j=1}^{n}S(c_{j},\,j).\label{eq:u-metric} \end{equation} \end_inset @@ -1127,7 +1336,7 @@ The correct JT65 codeword produces a value for bins containing noise only. Thus, if the spectral array -\begin_inset Formula $S(i,\, j)$ +\begin_inset Formula $S(i,\,j)$ \end_inset has been normalized so that the average value of the noise-only bins is @@ -1267,16 +1476,12 @@ If \end_inset population, again several standard deviations above the mean. - -\end_layout - -\begin_layout Standard -If the signal-to-noise ratio + If the signal-to-noise ratio \begin_inset Formula $y$ \end_inset - is too small for decoding to be possible, or for some other reason the - correct codeword is never presented as a candidate, the ratio + is too small for decoding to be possible, or the correct codeword is never + presented as a candidate, the ratio \begin_inset Formula $r=u_{2}/u_{1}$ \end_inset @@ -1311,7 +1516,7 @@ reference "sec:Theory,-Simulation,-and" \end_inset that maximizes the probability of correct decodes while ensuring a low - rate of false decodes. + rate of false positives. \end_layout \begin_layout Standard @@ -1325,12 +1530,12 @@ As with all decoding algorithms that generate a list of possible codewords, \begin_inset Formula $d_{s}$ \end_inset - are less than specified limits -\begin_inset Formula $X_{0}$ + obey specified criteria +\begin_inset Formula $X