more merging

2013-01-20 00:08:14 +01:00 · 2013-01-20 00:08:14 +01:00 · f0d3f40577
commit f0d3f40577
parent 49f20a82e8
28 changed files with 702 additions and 2747 deletions
--- a/src/fe.c
+++ b/src/fe.c
@ -693,3 +693,658 @@ void fe_mul(fe h, const fe f, const fe g) {
    h[8] = (int32_t) h8;
    h[9] = (int32_t) h9;
 }
+
+
+/*
+h = -f
+
+Preconditions:
+   |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+
+Postconditions:
+   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+*/
+
+void fe_neg(fe h, const fe f) {
+    int32_t f0 = f[0];
+    int32_t f1 = f[1];
+    int32_t f2 = f[2];
+    int32_t f3 = f[3];
+    int32_t f4 = f[4];
+    int32_t f5 = f[5];
+    int32_t f6 = f[6];
+    int32_t f7 = f[7];
+    int32_t f8 = f[8];
+    int32_t f9 = f[9];
+    int32_t h0 = -f0;
+    int32_t h1 = -f1;
+    int32_t h2 = -f2;
+    int32_t h3 = -f3;
+    int32_t h4 = -f4;
+    int32_t h5 = -f5;
+    int32_t h6 = -f6;
+    int32_t h7 = -f7;
+    int32_t h8 = -f8;
+    int32_t h9 = -f9;
+    h[0] = h0;
+    h[1] = h1;
+    h[2] = h2;
+    h[3] = h3;
+    h[4] = h4;
+    h[5] = h5;
+    h[6] = h6;
+    h[7] = h7;
+    h[8] = h8;
+    h[9] = h9;
+}
+
+
+void fe_pow22523(fe out, const fe z) {
+    fe t0;
+    fe t1;
+    fe t2;
+    int i;
+    fe_sq(t0, z);
+
+    for (i = 1; i < 1; ++i) {
+        fe_sq(t0, t0);
+    }
+
+    fe_sq(t1, t0);
+
+    for (i = 1; i < 2; ++i) {
+        fe_sq(t1, t1);
+    }
+
+    fe_mul(t1, z, t1);
+    fe_mul(t0, t0, t1);
+    fe_sq(t0, t0);
+
+    for (i = 1; i < 1; ++i) {
+        fe_sq(t0, t0);
+    }
+
+    fe_mul(t0, t1, t0);
+    fe_sq(t1, t0);
+
+    for (i = 1; i < 5; ++i) {
+        fe_sq(t1, t1);
+    }
+
+    fe_mul(t0, t1, t0);
+    fe_sq(t1, t0);
+
+    for (i = 1; i < 10; ++i) {
+        fe_sq(t1, t1);
+    }
+
+    fe_mul(t1, t1, t0);
+    fe_sq(t2, t1);
+
+    for (i = 1; i < 20; ++i) {
+        fe_sq(t2, t2);
+    }
+
+    fe_mul(t1, t2, t1);
+    fe_sq(t1, t1);
+
+    for (i = 1; i < 10; ++i) {
+        fe_sq(t1, t1);
+    }
+
+    fe_mul(t0, t1, t0);
+    fe_sq(t1, t0);
+
+    for (i = 1; i < 50; ++i) {
+        fe_sq(t1, t1);
+    }
+
+    fe_mul(t1, t1, t0);
+    fe_sq(t2, t1);
+
+    for (i = 1; i < 100; ++i) {
+        fe_sq(t2, t2);
+    }
+
+    fe_mul(t1, t2, t1);
+    fe_sq(t1, t1);
+
+    for (i = 1; i < 50; ++i) {
+        fe_sq(t1, t1);
+    }
+
+    fe_mul(t0, t1, t0);
+    fe_sq(t0, t0);
+
+    for (i = 1; i < 2; ++i) {
+        fe_sq(t0, t0);
+    }
+
+    fe_mul(out, t0, z);
+    return;
+}
+
+
+/*
+h = f * f
+Can overlap h with f.
+
+Preconditions:
+   |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
+
+Postconditions:
+   |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
+*/
+
+/*
+See fe_mul.c for discussion of implementation strategy.
+*/
+
+void fe_sq(fe h, const fe f) {
+    int32_t f0 = f[0];
+    int32_t f1 = f[1];
+    int32_t f2 = f[2];
+    int32_t f3 = f[3];
+    int32_t f4 = f[4];
+    int32_t f5 = f[5];
+    int32_t f6 = f[6];
+    int32_t f7 = f[7];
+    int32_t f8 = f[8];
+    int32_t f9 = f[9];
+    int32_t f0_2 = 2 * f0;
+    int32_t f1_2 = 2 * f1;
+    int32_t f2_2 = 2 * f2;
+    int32_t f3_2 = 2 * f3;
+    int32_t f4_2 = 2 * f4;
+    int32_t f5_2 = 2 * f5;
+    int32_t f6_2 = 2 * f6;
+    int32_t f7_2 = 2 * f7;
+    int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
+    int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
+    int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
+    int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
+    int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
+    int64_t f0f0    = f0   * (int64_t) f0;
+    int64_t f0f1_2  = f0_2 * (int64_t) f1;
+    int64_t f0f2_2  = f0_2 * (int64_t) f2;
+    int64_t f0f3_2  = f0_2 * (int64_t) f3;
+    int64_t f0f4_2  = f0_2 * (int64_t) f4;
+    int64_t f0f5_2  = f0_2 * (int64_t) f5;
+    int64_t f0f6_2  = f0_2 * (int64_t) f6;
+    int64_t f0f7_2  = f0_2 * (int64_t) f7;
+    int64_t f0f8_2  = f0_2 * (int64_t) f8;
+    int64_t f0f9_2  = f0_2 * (int64_t) f9;
+    int64_t f1f1_2  = f1_2 * (int64_t) f1;
+    int64_t f1f2_2  = f1_2 * (int64_t) f2;
+    int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
+    int64_t f1f4_2  = f1_2 * (int64_t) f4;
+    int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
+    int64_t f1f6_2  = f1_2 * (int64_t) f6;
+    int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
+    int64_t f1f8_2  = f1_2 * (int64_t) f8;
+    int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
+    int64_t f2f2    = f2   * (int64_t) f2;
+    int64_t f2f3_2  = f2_2 * (int64_t) f3;
+    int64_t f2f4_2  = f2_2 * (int64_t) f4;
+    int64_t f2f5_2  = f2_2 * (int64_t) f5;
+    int64_t f2f6_2  = f2_2 * (int64_t) f6;
+    int64_t f2f7_2  = f2_2 * (int64_t) f7;
+    int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
+    int64_t f2f9_38 = f2   * (int64_t) f9_38;
+    int64_t f3f3_2  = f3_2 * (int64_t) f3;
+    int64_t f3f4_2  = f3_2 * (int64_t) f4;
+    int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
+    int64_t f3f6_2  = f3_2 * (int64_t) f6;
+    int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
+    int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
+    int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
+    int64_t f4f4    = f4   * (int64_t) f4;
+    int64_t f4f5_2  = f4_2 * (int64_t) f5;
+    int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
+    int64_t f4f7_38 = f4   * (int64_t) f7_38;
+    int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
+    int64_t f4f9_38 = f4   * (int64_t) f9_38;
+    int64_t f5f5_38 = f5   * (int64_t) f5_38;
+    int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
+    int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
+    int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
+    int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
+    int64_t f6f6_19 = f6   * (int64_t) f6_19;
+    int64_t f6f7_38 = f6   * (int64_t) f7_38;
+    int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
+    int64_t f6f9_38 = f6   * (int64_t) f9_38;
+    int64_t f7f7_38 = f7   * (int64_t) f7_38;
+    int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
+    int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
+    int64_t f8f8_19 = f8   * (int64_t) f8_19;
+    int64_t f8f9_38 = f8   * (int64_t) f9_38;
+    int64_t f9f9_38 = f9   * (int64_t) f9_38;
+    int64_t h0 = f0f0  + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
+    int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
+    int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
+    int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
+    int64_t h4 = f0f4_2 + f1f3_4 + f2f2   + f5f9_76 + f6f8_38 + f7f7_38;
+    int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
+    int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
+    int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
+    int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4   + f9f9_38;
+    int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
+    int64_t carry0;
+    int64_t carry1;
+    int64_t carry2;
+    int64_t carry3;
+    int64_t carry4;
+    int64_t carry5;
+    int64_t carry6;
+    int64_t carry7;
+    int64_t carry8;
+    int64_t carry9;
+    carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
+    h1 += carry0;
+    h0 -= carry0 << 26;
+    carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
+    h5 += carry4;
+    h4 -= carry4 << 26;
+    carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
+    h2 += carry1;
+    h1 -= carry1 << 25;
+    carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
+    h6 += carry5;
+    h5 -= carry5 << 25;
+    carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
+    h3 += carry2;
+    h2 -= carry2 << 26;
+    carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
+    h7 += carry6;
+    h6 -= carry6 << 26;
+    carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
+    h4 += carry3;
+    h3 -= carry3 << 25;
+    carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
+    h8 += carry7;
+    h7 -= carry7 << 25;
+    carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
+    h5 += carry4;
+    h4 -= carry4 << 26;
+    carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
+    h9 += carry8;
+    h8 -= carry8 << 26;
+    carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
+    h0 += carry9 * 19;
+    h9 -= carry9 << 25;
+    carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
+    h1 += carry0;
+    h0 -= carry0 << 26;
+    h[0] = (int32_t) h0;
+    h[1] = (int32_t) h1;
+    h[2] = (int32_t) h2;
+    h[3] = (int32_t) h3;
+    h[4] = (int32_t) h4;
+    h[5] = (int32_t) h5;
+    h[6] = (int32_t) h6;
+    h[7] = (int32_t) h7;
+    h[8] = (int32_t) h8;
+    h[9] = (int32_t) h9;
+}
+
+
+/*
+h = 2 * f * f
+Can overlap h with f.
+
+Preconditions:
+   |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
+
+Postconditions:
+   |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
+*/
+
+/*
+See fe_mul.c for discussion of implementation strategy.
+*/
+
+void fe_sq2(fe h, const fe f) {
+    int32_t f0 = f[0];
+    int32_t f1 = f[1];
+    int32_t f2 = f[2];
+    int32_t f3 = f[3];
+    int32_t f4 = f[4];
+    int32_t f5 = f[5];
+    int32_t f6 = f[6];
+    int32_t f7 = f[7];
+    int32_t f8 = f[8];
+    int32_t f9 = f[9];
+    int32_t f0_2 = 2 * f0;
+    int32_t f1_2 = 2 * f1;
+    int32_t f2_2 = 2 * f2;
+    int32_t f3_2 = 2 * f3;
+    int32_t f4_2 = 2 * f4;
+    int32_t f5_2 = 2 * f5;
+    int32_t f6_2 = 2 * f6;
+    int32_t f7_2 = 2 * f7;
+    int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
+    int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
+    int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
+    int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
+    int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
+    int64_t f0f0    = f0   * (int64_t) f0;
+    int64_t f0f1_2  = f0_2 * (int64_t) f1;
+    int64_t f0f2_2  = f0_2 * (int64_t) f2;
+    int64_t f0f3_2  = f0_2 * (int64_t) f3;
+    int64_t f0f4_2  = f0_2 * (int64_t) f4;
+    int64_t f0f5_2  = f0_2 * (int64_t) f5;
+    int64_t f0f6_2  = f0_2 * (int64_t) f6;
+    int64_t f0f7_2  = f0_2 * (int64_t) f7;
+    int64_t f0f8_2  = f0_2 * (int64_t) f8;
+    int64_t f0f9_2  = f0_2 * (int64_t) f9;
+    int64_t f1f1_2  = f1_2 * (int64_t) f1;
+    int64_t f1f2_2  = f1_2 * (int64_t) f2;
+    int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
+    int64_t f1f4_2  = f1_2 * (int64_t) f4;
+    int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
+    int64_t f1f6_2  = f1_2 * (int64_t) f6;
+    int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
+    int64_t f1f8_2  = f1_2 * (int64_t) f8;
+    int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
+    int64_t f2f2    = f2   * (int64_t) f2;
+    int64_t f2f3_2  = f2_2 * (int64_t) f3;
+    int64_t f2f4_2  = f2_2 * (int64_t) f4;
+    int64_t f2f5_2  = f2_2 * (int64_t) f5;
+    int64_t f2f6_2  = f2_2 * (int64_t) f6;
+    int64_t f2f7_2  = f2_2 * (int64_t) f7;
+    int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
+    int64_t f2f9_38 = f2   * (int64_t) f9_38;
+    int64_t f3f3_2  = f3_2 * (int64_t) f3;
+    int64_t f3f4_2  = f3_2 * (int64_t) f4;
+    int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
+    int64_t f3f6_2  = f3_2 * (int64_t) f6;
+    int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
+    int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
+    int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
+    int64_t f4f4    = f4   * (int64_t) f4;
+    int64_t f4f5_2  = f4_2 * (int64_t) f5;
+    int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
+    int64_t f4f7_38 = f4   * (int64_t) f7_38;
+    int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
+    int64_t f4f9_38 = f4   * (int64_t) f9_38;
+    int64_t f5f5_38 = f5   * (int64_t) f5_38;
+    int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
+    int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
+    int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
+    int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
+    int64_t f6f6_19 = f6   * (int64_t) f6_19;
+    int64_t f6f7_38 = f6   * (int64_t) f7_38;
+    int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
+    int64_t f6f9_38 = f6   * (int64_t) f9_38;
+    int64_t f7f7_38 = f7   * (int64_t) f7_38;
+    int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
+    int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
+    int64_t f8f8_19 = f8   * (int64_t) f8_19;
+    int64_t f8f9_38 = f8   * (int64_t) f9_38;
+    int64_t f9f9_38 = f9   * (int64_t) f9_38;
+    int64_t h0 = f0f0  + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
+    int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
+    int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
+    int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
+    int64_t h4 = f0f4_2 + f1f3_4 + f2f2   + f5f9_76 + f6f8_38 + f7f7_38;
+    int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
+    int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
+    int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
+    int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4   + f9f9_38;
+    int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
+    int64_t carry0;
+    int64_t carry1;
+    int64_t carry2;
+    int64_t carry3;
+    int64_t carry4;
+    int64_t carry5;
+    int64_t carry6;
+    int64_t carry7;
+    int64_t carry8;
+    int64_t carry9;
+    h0 += h0;
+    h1 += h1;
+    h2 += h2;
+    h3 += h3;
+    h4 += h4;
+    h5 += h5;
+    h6 += h6;
+    h7 += h7;
+    h8 += h8;
+    h9 += h9;
+    carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
+    h1 += carry0;
+    h0 -= carry0 << 26;
+    carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
+    h5 += carry4;
+    h4 -= carry4 << 26;
+    carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
+    h2 += carry1;
+    h1 -= carry1 << 25;
+    carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
+    h6 += carry5;
+    h5 -= carry5 << 25;
+    carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
+    h3 += carry2;
+    h2 -= carry2 << 26;
+    carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
+    h7 += carry6;
+    h6 -= carry6 << 26;
+    carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
+    h4 += carry3;
+    h3 -= carry3 << 25;
+    carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
+    h8 += carry7;
+    h7 -= carry7 << 25;
+    carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
+    h5 += carry4;
+    h4 -= carry4 << 26;
+    carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
+    h9 += carry8;
+    h8 -= carry8 << 26;
+    carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
+    h0 += carry9 * 19;
+    h9 -= carry9 << 25;
+    carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
+    h1 += carry0;
+    h0 -= carry0 << 26;
+    h[0] = (int32_t) h0;
+    h[1] = (int32_t) h1;
+    h[2] = (int32_t) h2;
+    h[3] = (int32_t) h3;
+    h[4] = (int32_t) h4;
+    h[5] = (int32_t) h5;
+    h[6] = (int32_t) h6;
+    h[7] = (int32_t) h7;
+    h[8] = (int32_t) h8;
+    h[9] = (int32_t) h9;
+}
+
+
+/*
+h = f - g
+Can overlap h with f or g.
+
+Preconditions:
+   |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+   |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+
+Postconditions:
+   |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+*/
+
+void fe_sub(fe h, const fe f, const fe g) {
+    int32_t f0 = f[0];
+    int32_t f1 = f[1];
+    int32_t f2 = f[2];
+    int32_t f3 = f[3];
+    int32_t f4 = f[4];
+    int32_t f5 = f[5];
+    int32_t f6 = f[6];
+    int32_t f7 = f[7];
+    int32_t f8 = f[8];
+    int32_t f9 = f[9];
+    int32_t g0 = g[0];
+    int32_t g1 = g[1];
+    int32_t g2 = g[2];
+    int32_t g3 = g[3];
+    int32_t g4 = g[4];
+    int32_t g5 = g[5];
+    int32_t g6 = g[6];
+    int32_t g7 = g[7];
+    int32_t g8 = g[8];
+    int32_t g9 = g[9];
+    int32_t h0 = f0 - g0;
+    int32_t h1 = f1 - g1;
+    int32_t h2 = f2 - g2;
+    int32_t h3 = f3 - g3;
+    int32_t h4 = f4 - g4;
+    int32_t h5 = f5 - g5;
+    int32_t h6 = f6 - g6;
+    int32_t h7 = f7 - g7;
+    int32_t h8 = f8 - g8;
+    int32_t h9 = f9 - g9;
+    h[0] = h0;
+    h[1] = h1;
+    h[2] = h2;
+    h[3] = h3;
+    h[4] = h4;
+    h[5] = h5;
+    h[6] = h6;
+    h[7] = h7;
+    h[8] = h8;
+    h[9] = h9;
+}
+
+
+
+/*
+Preconditions:
+  |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+
+Write p=2^255-19; q=floor(h/p).
+Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
+
+Proof:
+  Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
+  Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
+
+  Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
+  Then 0<y<1.
+
+  Write r=h-pq.
+  Have 0<=r<=p-1=2^255-20.
+  Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
+
+  Write x=r+19(2^-255)r+y.
+  Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
+
+  Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
+  so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
+*/
+
+void fe_tobytes(unsigned char *s, const fe h) {
+    int32_t h0 = h[0];
+    int32_t h1 = h[1];
+    int32_t h2 = h[2];
+    int32_t h3 = h[3];
+    int32_t h4 = h[4];
+    int32_t h5 = h[5];
+    int32_t h6 = h[6];
+    int32_t h7 = h[7];
+    int32_t h8 = h[8];
+    int32_t h9 = h[9];
+    int32_t q;
+    int32_t carry0;
+    int32_t carry1;
+    int32_t carry2;
+    int32_t carry3;
+    int32_t carry4;
+    int32_t carry5;
+    int32_t carry6;
+    int32_t carry7;
+    int32_t carry8;
+    int32_t carry9;
+    q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
+    q = (h0 + q) >> 26;
+    q = (h1 + q) >> 25;
+    q = (h2 + q) >> 26;
+    q = (h3 + q) >> 25;
+    q = (h4 + q) >> 26;
+    q = (h5 + q) >> 25;
+    q = (h6 + q) >> 26;
+    q = (h7 + q) >> 25;
+    q = (h8 + q) >> 26;
+    q = (h9 + q) >> 25;
+    /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
+    h0 += 19 * q;
+    /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
+    carry0 = h0 >> 26;
+    h1 += carry0;
+    h0 -= carry0 << 26;
+    carry1 = h1 >> 25;
+    h2 += carry1;
+    h1 -= carry1 << 25;
+    carry2 = h2 >> 26;
+    h3 += carry2;
+    h2 -= carry2 << 26;
+    carry3 = h3 >> 25;
+    h4 += carry3;
+    h3 -= carry3 << 25;
+    carry4 = h4 >> 26;
+    h5 += carry4;
+    h4 -= carry4 << 26;
+    carry5 = h5 >> 25;
+    h6 += carry5;
+    h5 -= carry5 << 25;
+    carry6 = h6 >> 26;
+    h7 += carry6;
+    h6 -= carry6 << 26;
+    carry7 = h7 >> 25;
+    h8 += carry7;
+    h7 -= carry7 << 25;
+    carry8 = h8 >> 26;
+    h9 += carry8;
+    h8 -= carry8 << 26;
+    carry9 = h9 >> 25;
+    h9 -= carry9 << 25;
+    /* h10 = carry9 */
+    /*
+    Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
+    Have h0+...+2^230 h9 between 0 and 2^255-1;
+    evidently 2^255 h10-2^255 q = 0.
+    Goal: Output h0+...+2^230 h9.
+    */
+    s[0] = (unsigned char) (h0 >> 0);
+    s[1] = (unsigned char) (h0 >> 8);
+    s[2] = (unsigned char) (h0 >> 16);
+    s[3] = (unsigned char) ((h0 >> 24) | (h1 << 2));
+    s[4] = (unsigned char) (h1 >> 6);
+    s[5] = (unsigned char) (h1 >> 14);
+    s[6] = (unsigned char) ((h1 >> 22) | (h2 << 3));
+    s[7] = (unsigned char) (h2 >> 5);
+    s[8] = (unsigned char) (h2 >> 13);
+    s[9] = (unsigned char) ((h2 >> 21) | (h3 << 5));
+    s[10] = (unsigned char) (h3 >> 3);
+    s[11] = (unsigned char) (h3 >> 11);
+    s[12] = (unsigned char) ((h3 >> 19) | (h4 << 6));
+    s[13] = (unsigned char) (h4 >> 2);
+    s[14] = (unsigned char) (h4 >> 10);
+    s[15] = (unsigned char) (h4 >> 18);
+    s[16] = (unsigned char) (h5 >> 0);
+    s[17] = (unsigned char) (h5 >> 8);
+    s[18] = (unsigned char) (h5 >> 16);
+    s[19] = (unsigned char) ((h5 >> 24) | (h6 << 1));
+    s[20] = (unsigned char) (h6 >> 7);
+    s[21] = (unsigned char) (h6 >> 15);
+    s[22] = (unsigned char) ((h6 >> 23) | (h7 << 3));
+    s[23] = (unsigned char) (h7 >> 5);
+    s[24] = (unsigned char) (h7 >> 13);
+    s[25] = (unsigned char) ((h7 >> 21) | (h8 << 4));
+    s[26] = (unsigned char) (h8 >> 4);
+    s[27] = (unsigned char) (h8 >> 12);
+    s[28] = (unsigned char) ((h8 >> 20) | (h9 << 6));
+    s[29] = (unsigned char) (h9 >> 2);
+    s[30] = (unsigned char) (h9 >> 10);
+    s[31] = (unsigned char) (h9 >> 18);
+}
--- a/src/fe.h
+++ b/src/fe.h
@ -20,35 +20,20 @@ void fe_0(fe h);
 void fe_1(fe h);

 void fe_frombytes(fe h, const unsigned char *s);
+void fe_tobytes(unsigned char *s, const fe h);

 void fe_copy(fe h, const fe f);
 int fe_isnegative(const fe f);
 int fe_isnonzero(const fe f);
 void fe_cmov(fe f, const fe g, unsigned int b);

+void fe_neg(fe h, const fe f);
 void fe_add(fe h, const fe f, const fe g);
 void fe_invert(fe out, const fe z);
+void fe_sq(fe h, const fe f);
+void fe_sq2(fe h, const fe f);
 void fe_mul(fe h, const fe f, const fe g);
-
-
-
-
-
-
-
-
-
-
-void fe_tobytes(unsigned char *,const fe);
-
-extern int fe_isnonzero(const fe);
-extern void fe_cswap(fe,fe,unsigned int);
-
-extern void fe_sub(fe,const fe,const fe);
-extern void fe_neg(fe,const fe);
-extern void fe_sq(fe,const fe);
-extern void fe_sq2(fe,const fe);
-extern void fe_mul121666(fe,const fe);
-extern void fe_pow22523(fe,const fe);
+void fe_pow22523(fe out, const fe z);
+void fe_sub(fe h, const fe f, const fe g);

 #endif
--- a/src/fe_neg.c
+++ b/src/fe_neg.c
@ -1,44 +0,0 @@
-#include "fe.h"
-
-/*
-h = -f
-
-Preconditions:
-   |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-
-Postconditions:
-   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-*/
-
-void fe_neg(fe h, const fe f) {
-    int32_t f0 = f[0];
-    int32_t f1 = f[1];
-    int32_t f2 = f[2];
-    int32_t f3 = f[3];
-    int32_t f4 = f[4];
-    int32_t f5 = f[5];
-    int32_t f6 = f[6];
-    int32_t f7 = f[7];
-    int32_t f8 = f[8];
-    int32_t f9 = f[9];
-    int32_t h0 = -f0;
-    int32_t h1 = -f1;
-    int32_t h2 = -f2;
-    int32_t h3 = -f3;
-    int32_t h4 = -f4;
-    int32_t h5 = -f5;
-    int32_t h6 = -f6;
-    int32_t h7 = -f7;
-    int32_t h8 = -f8;
-    int32_t h9 = -f9;
-    h[0] = h0;
-    h[1] = h1;
-    h[2] = h2;
-    h[3] = h3;
-    h[4] = h4;
-    h[5] = h5;
-    h[6] = h6;
-    h[7] = h7;
-    h[8] = h8;
-    h[9] = h9;
-}
--- a/src/fe_pow22523.c
+++ b/src/fe_pow22523.c
@ -1,86 +0,0 @@
-#include "fe.h"
-
-void fe_pow22523(fe out, const fe z) {
-    fe t0;
-    fe t1;
-    fe t2;
-    int i;
-    fe_sq(t0, z);
-
-    for (i = 1; i < 1; ++i) {
-        fe_sq(t0, t0);
-    }
-
-    fe_sq(t1, t0);
-
-    for (i = 1; i < 2; ++i) {
-        fe_sq(t1, t1);
-    }
-
-    fe_mul(t1, z, t1);
-    fe_mul(t0, t0, t1);
-    fe_sq(t0, t0);
-
-    for (i = 1; i < 1; ++i) {
-        fe_sq(t0, t0);
-    }
-
-    fe_mul(t0, t1, t0);
-    fe_sq(t1, t0);
-
-    for (i = 1; i < 5; ++i) {
-        fe_sq(t1, t1);
-    }
-
-    fe_mul(t0, t1, t0);
-    fe_sq(t1, t0);
-
-    for (i = 1; i < 10; ++i) {
-        fe_sq(t1, t1);
-    }
-
-    fe_mul(t1, t1, t0);
-    fe_sq(t2, t1);
-
-    for (i = 1; i < 20; ++i) {
-        fe_sq(t2, t2);
-    }
-
-    fe_mul(t1, t2, t1);
-    fe_sq(t1, t1);
-
-    for (i = 1; i < 10; ++i) {
-        fe_sq(t1, t1);
-    }
-
-    fe_mul(t0, t1, t0);
-    fe_sq(t1, t0);
-
-    for (i = 1; i < 50; ++i) {
-        fe_sq(t1, t1);
-    }
-
-    fe_mul(t1, t1, t0);
-    fe_sq(t2, t1);
-
-    for (i = 1; i < 100; ++i) {
-        fe_sq(t2, t2);
-    }
-
-    fe_mul(t1, t2, t1);
-    fe_sq(t1, t1);
-
-    for (i = 1; i < 50; ++i) {
-        fe_sq(t1, t1);
-    }
-
-    fe_mul(t0, t1, t0);
-    fe_sq(t0, t0);
-
-    for (i = 1; i < 2; ++i) {
-        fe_sq(t0, t0);
-    }
-
-    fe_mul(out, t0, z);
-    return;
-}
--- a/src/fe_sq.c
+++ b/src/fe_sq.c
@ -1,164 +0,0 @@
-#include "fe.h"
-#include "pstdint.h"
-
-/*
-h = f * f
-Can overlap h with f.
-
-Preconditions:
-   |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
-
-Postconditions:
-   |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
-*/
-
-/*
-See fe_mul.c for discussion of implementation strategy.
-*/
-
-void fe_sq(fe h, const fe f) {
-    int32_t f0 = f[0];
-    int32_t f1 = f[1];
-    int32_t f2 = f[2];
-    int32_t f3 = f[3];
-    int32_t f4 = f[4];
-    int32_t f5 = f[5];
-    int32_t f6 = f[6];
-    int32_t f7 = f[7];
-    int32_t f8 = f[8];
-    int32_t f9 = f[9];
-    int32_t f0_2 = 2 * f0;
-    int32_t f1_2 = 2 * f1;
-    int32_t f2_2 = 2 * f2;
-    int32_t f3_2 = 2 * f3;
-    int32_t f4_2 = 2 * f4;
-    int32_t f5_2 = 2 * f5;
-    int32_t f6_2 = 2 * f6;
-    int32_t f7_2 = 2 * f7;
-    int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
-    int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
-    int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
-    int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
-    int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
-    int64_t f0f0    = f0   * (int64_t) f0;
-    int64_t f0f1_2  = f0_2 * (int64_t) f1;
-    int64_t f0f2_2  = f0_2 * (int64_t) f2;
-    int64_t f0f3_2  = f0_2 * (int64_t) f3;
-    int64_t f0f4_2  = f0_2 * (int64_t) f4;
-    int64_t f0f5_2  = f0_2 * (int64_t) f5;
-    int64_t f0f6_2  = f0_2 * (int64_t) f6;
-    int64_t f0f7_2  = f0_2 * (int64_t) f7;
-    int64_t f0f8_2  = f0_2 * (int64_t) f8;
-    int64_t f0f9_2  = f0_2 * (int64_t) f9;
-    int64_t f1f1_2  = f1_2 * (int64_t) f1;
-    int64_t f1f2_2  = f1_2 * (int64_t) f2;
-    int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
-    int64_t f1f4_2  = f1_2 * (int64_t) f4;
-    int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
-    int64_t f1f6_2  = f1_2 * (int64_t) f6;
-    int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
-    int64_t f1f8_2  = f1_2 * (int64_t) f8;
-    int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
-    int64_t f2f2    = f2   * (int64_t) f2;
-    int64_t f2f3_2  = f2_2 * (int64_t) f3;
-    int64_t f2f4_2  = f2_2 * (int64_t) f4;
-    int64_t f2f5_2  = f2_2 * (int64_t) f5;
-    int64_t f2f6_2  = f2_2 * (int64_t) f6;
-    int64_t f2f7_2  = f2_2 * (int64_t) f7;
-    int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
-    int64_t f2f9_38 = f2   * (int64_t) f9_38;
-    int64_t f3f3_2  = f3_2 * (int64_t) f3;
-    int64_t f3f4_2  = f3_2 * (int64_t) f4;
-    int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
-    int64_t f3f6_2  = f3_2 * (int64_t) f6;
-    int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
-    int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
-    int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
-    int64_t f4f4    = f4   * (int64_t) f4;
-    int64_t f4f5_2  = f4_2 * (int64_t) f5;
-    int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
-    int64_t f4f7_38 = f4   * (int64_t) f7_38;
-    int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
-    int64_t f4f9_38 = f4   * (int64_t) f9_38;
-    int64_t f5f5_38 = f5   * (int64_t) f5_38;
-    int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
-    int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
-    int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
-    int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
-    int64_t f6f6_19 = f6   * (int64_t) f6_19;
-    int64_t f6f7_38 = f6   * (int64_t) f7_38;
-    int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
-    int64_t f6f9_38 = f6   * (int64_t) f9_38;
-    int64_t f7f7_38 = f7   * (int64_t) f7_38;
-    int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
-    int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
-    int64_t f8f8_19 = f8   * (int64_t) f8_19;
-    int64_t f8f9_38 = f8   * (int64_t) f9_38;
-    int64_t f9f9_38 = f9   * (int64_t) f9_38;
-    int64_t h0 = f0f0  + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
-    int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
-    int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
-    int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
-    int64_t h4 = f0f4_2 + f1f3_4 + f2f2   + f5f9_76 + f6f8_38 + f7f7_38;
-    int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
-    int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
-    int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
-    int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4   + f9f9_38;
-    int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
-    int64_t carry0;
-    int64_t carry1;
-    int64_t carry2;
-    int64_t carry3;
-    int64_t carry4;
-    int64_t carry5;
-    int64_t carry6;
-    int64_t carry7;
-    int64_t carry8;
-    int64_t carry9;
-    carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
-    h1 += carry0;
-    h0 -= carry0 << 26;
-    carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
-    h5 += carry4;
-    h4 -= carry4 << 26;
-    carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
-    h2 += carry1;
-    h1 -= carry1 << 25;
-    carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
-    h6 += carry5;
-    h5 -= carry5 << 25;
-    carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
-    h3 += carry2;
-    h2 -= carry2 << 26;
-    carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
-    h7 += carry6;
-    h6 -= carry6 << 26;
-    carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
-    h4 += carry3;
-    h3 -= carry3 << 25;
-    carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
-    h8 += carry7;
-    h7 -= carry7 << 25;
-    carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
-    h5 += carry4;
-    h4 -= carry4 << 26;
-    carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
-    h9 += carry8;
-    h8 -= carry8 << 26;
-    carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
-    h0 += carry9 * 19;
-    h9 -= carry9 << 25;
-    carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
-    h1 += carry0;
-    h0 -= carry0 << 26;
-    h[0] = (int32_t) h0;
-    h[1] = (int32_t) h1;
-    h[2] = (int32_t) h2;
-    h[3] = (int32_t) h3;
-    h[4] = (int32_t) h4;
-    h[5] = (int32_t) h5;
-    h[6] = (int32_t) h6;
-    h[7] = (int32_t) h7;
-    h[8] = (int32_t) h8;
-    h[9] = (int32_t) h9;
-}
--- a/src/fe_sq2.c
+++ b/src/fe_sq2.c
@ -1,174 +0,0 @@
-#include "fe.h"
-#include "pstdint.h"
-
-/*
-h = 2 * f * f
-Can overlap h with f.
-
-Preconditions:
-   |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
-
-Postconditions:
-   |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
-*/
-
-/*
-See fe_mul.c for discussion of implementation strategy.
-*/
-
-void fe_sq2(fe h, const fe f) {
-    int32_t f0 = f[0];
-    int32_t f1 = f[1];
-    int32_t f2 = f[2];
-    int32_t f3 = f[3];
-    int32_t f4 = f[4];
-    int32_t f5 = f[5];
-    int32_t f6 = f[6];
-    int32_t f7 = f[7];
-    int32_t f8 = f[8];
-    int32_t f9 = f[9];
-    int32_t f0_2 = 2 * f0;
-    int32_t f1_2 = 2 * f1;
-    int32_t f2_2 = 2 * f2;
-    int32_t f3_2 = 2 * f3;
-    int32_t f4_2 = 2 * f4;
-    int32_t f5_2 = 2 * f5;
-    int32_t f6_2 = 2 * f6;
-    int32_t f7_2 = 2 * f7;
-    int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
-    int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
-    int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
-    int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
-    int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
-    int64_t f0f0    = f0   * (int64_t) f0;
-    int64_t f0f1_2  = f0_2 * (int64_t) f1;
-    int64_t f0f2_2  = f0_2 * (int64_t) f2;
-    int64_t f0f3_2  = f0_2 * (int64_t) f3;
-    int64_t f0f4_2  = f0_2 * (int64_t) f4;
-    int64_t f0f5_2  = f0_2 * (int64_t) f5;
-    int64_t f0f6_2  = f0_2 * (int64_t) f6;
-    int64_t f0f7_2  = f0_2 * (int64_t) f7;
-    int64_t f0f8_2  = f0_2 * (int64_t) f8;
-    int64_t f0f9_2  = f0_2 * (int64_t) f9;
-    int64_t f1f1_2  = f1_2 * (int64_t) f1;
-    int64_t f1f2_2  = f1_2 * (int64_t) f2;
-    int64_t f1f3_4  = f1_2 * (int64_t) f3_2;
-    int64_t f1f4_2  = f1_2 * (int64_t) f4;
-    int64_t f1f5_4  = f1_2 * (int64_t) f5_2;
-    int64_t f1f6_2  = f1_2 * (int64_t) f6;
-    int64_t f1f7_4  = f1_2 * (int64_t) f7_2;
-    int64_t f1f8_2  = f1_2 * (int64_t) f8;
-    int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
-    int64_t f2f2    = f2   * (int64_t) f2;
-    int64_t f2f3_2  = f2_2 * (int64_t) f3;
-    int64_t f2f4_2  = f2_2 * (int64_t) f4;
-    int64_t f2f5_2  = f2_2 * (int64_t) f5;
-    int64_t f2f6_2  = f2_2 * (int64_t) f6;
-    int64_t f2f7_2  = f2_2 * (int64_t) f7;
-    int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
-    int64_t f2f9_38 = f2   * (int64_t) f9_38;
-    int64_t f3f3_2  = f3_2 * (int64_t) f3;
-    int64_t f3f4_2  = f3_2 * (int64_t) f4;
-    int64_t f3f5_4  = f3_2 * (int64_t) f5_2;
-    int64_t f3f6_2  = f3_2 * (int64_t) f6;
-    int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
-    int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
-    int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
-    int64_t f4f4    = f4   * (int64_t) f4;
-    int64_t f4f5_2  = f4_2 * (int64_t) f5;
-    int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
-    int64_t f4f7_38 = f4   * (int64_t) f7_38;
-    int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
-    int64_t f4f9_38 = f4   * (int64_t) f9_38;
-    int64_t f5f5_38 = f5   * (int64_t) f5_38;
-    int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
-    int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
-    int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
-    int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
-    int64_t f6f6_19 = f6   * (int64_t) f6_19;
-    int64_t f6f7_38 = f6   * (int64_t) f7_38;
-    int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
-    int64_t f6f9_38 = f6   * (int64_t) f9_38;
-    int64_t f7f7_38 = f7   * (int64_t) f7_38;
-    int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
-    int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
-    int64_t f8f8_19 = f8   * (int64_t) f8_19;
-    int64_t f8f9_38 = f8   * (int64_t) f9_38;
-    int64_t f9f9_38 = f9   * (int64_t) f9_38;
-    int64_t h0 = f0f0  + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
-    int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
-    int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
-    int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
-    int64_t h4 = f0f4_2 + f1f3_4 + f2f2   + f5f9_76 + f6f8_38 + f7f7_38;
-    int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
-    int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
-    int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
-    int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4   + f9f9_38;
-    int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
-    int64_t carry0;
-    int64_t carry1;
-    int64_t carry2;
-    int64_t carry3;
-    int64_t carry4;
-    int64_t carry5;
-    int64_t carry6;
-    int64_t carry7;
-    int64_t carry8;
-    int64_t carry9;
-    h0 += h0;
-    h1 += h1;
-    h2 += h2;
-    h3 += h3;
-    h4 += h4;
-    h5 += h5;
-    h6 += h6;
-    h7 += h7;
-    h8 += h8;
-    h9 += h9;
-    carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
-    h1 += carry0;
-    h0 -= carry0 << 26;
-    carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
-    h5 += carry4;
-    h4 -= carry4 << 26;
-    carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
-    h2 += carry1;
-    h1 -= carry1 << 25;
-    carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
-    h6 += carry5;
-    h5 -= carry5 << 25;
-    carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
-    h3 += carry2;
-    h2 -= carry2 << 26;
-    carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
-    h7 += carry6;
-    h6 -= carry6 << 26;
-    carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
-    h4 += carry3;
-    h3 -= carry3 << 25;
-    carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
-    h8 += carry7;
-    h7 -= carry7 << 25;
-    carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
-    h5 += carry4;
-    h4 -= carry4 << 26;
-    carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
-    h9 += carry8;
-    h8 -= carry8 << 26;
-    carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
-    h0 += carry9 * 19;
-    h9 -= carry9 << 25;
-    carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
-    h1 += carry0;
-    h0 -= carry0 << 26;
-    h[0] = (int32_t) h0;
-    h[1] = (int32_t) h1;
-    h[2] = (int32_t) h2;
-    h[3] = (int32_t) h3;
-    h[4] = (int32_t) h4;
-    h[5] = (int32_t) h5;
-    h[6] = (int32_t) h6;
-    h[7] = (int32_t) h7;
-    h[8] = (int32_t) h8;
-    h[9] = (int32_t) h9;
-}
--- a/src/fe_sub.c
+++ b/src/fe_sub.c
@ -1,56 +0,0 @@
-#include "fe.h"
-
-/*
-h = f - g
-Can overlap h with f or g.
-
-Preconditions:
-   |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-   |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-
-Postconditions:
-   |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-*/
-
-void fe_sub(fe h, const fe f, const fe g) {
-    int32_t f0 = f[0];
-    int32_t f1 = f[1];
-    int32_t f2 = f[2];
-    int32_t f3 = f[3];
-    int32_t f4 = f[4];
-    int32_t f5 = f[5];
-    int32_t f6 = f[6];
-    int32_t f7 = f[7];
-    int32_t f8 = f[8];
-    int32_t f9 = f[9];
-    int32_t g0 = g[0];
-    int32_t g1 = g[1];
-    int32_t g2 = g[2];
-    int32_t g3 = g[3];
-    int32_t g4 = g[4];
-    int32_t g5 = g[5];
-    int32_t g6 = g[6];
-    int32_t g7 = g[7];
-    int32_t g8 = g[8];
-    int32_t g9 = g[9];
-    int32_t h0 = f0 - g0;
-    int32_t h1 = f1 - g1;
-    int32_t h2 = f2 - g2;
-    int32_t h3 = f3 - g3;
-    int32_t h4 = f4 - g4;
-    int32_t h5 = f5 - g5;
-    int32_t h6 = f6 - g6;
-    int32_t h7 = f7 - g7;
-    int32_t h8 = f8 - g8;
-    int32_t h9 = f9 - g9;
-    h[0] = h0;
-    h[1] = h1;
-    h[2] = h2;
-    h[3] = h3;
-    h[4] = h4;
-    h[5] = h5;
-    h[6] = h6;
-    h[7] = h7;
-    h[8] = h8;
-    h[9] = h9;
-}
--- a/src/fe_tobytes.c
+++ b/src/fe_tobytes.c
@ -1,132 +0,0 @@
-#include "fe.h"
-
-/*
-Preconditions:
-  |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-
-Write p=2^255-19; q=floor(h/p).
-Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
-
-Proof:
-  Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
-  Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
-
-  Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
-  Then 0<y<1.
-
-  Write r=h-pq.
-  Have 0<=r<=p-1=2^255-20.
-  Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
-
-  Write x=r+19(2^-255)r+y.
-  Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
-
-  Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
-  so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
-*/
-
-void fe_tobytes(unsigned char *s, const fe h) {
-    int32_t h0 = h[0];
-    int32_t h1 = h[1];
-    int32_t h2 = h[2];
-    int32_t h3 = h[3];
-    int32_t h4 = h[4];
-    int32_t h5 = h[5];
-    int32_t h6 = h[6];
-    int32_t h7 = h[7];
-    int32_t h8 = h[8];
-    int32_t h9 = h[9];
-    int32_t q;
-    int32_t carry0;
-    int32_t carry1;
-    int32_t carry2;
-    int32_t carry3;
-    int32_t carry4;
-    int32_t carry5;
-    int32_t carry6;
-    int32_t carry7;
-    int32_t carry8;
-    int32_t carry9;
-    q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
-    q = (h0 + q) >> 26;
-    q = (h1 + q) >> 25;
-    q = (h2 + q) >> 26;
-    q = (h3 + q) >> 25;
-    q = (h4 + q) >> 26;
-    q = (h5 + q) >> 25;
-    q = (h6 + q) >> 26;
-    q = (h7 + q) >> 25;
-    q = (h8 + q) >> 26;
-    q = (h9 + q) >> 25;
-    /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
-    h0 += 19 * q;
-    /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
-    carry0 = h0 >> 26;
-    h1 += carry0;
-    h0 -= carry0 << 26;
-    carry1 = h1 >> 25;
-    h2 += carry1;
-    h1 -= carry1 << 25;
-    carry2 = h2 >> 26;
-    h3 += carry2;
-    h2 -= carry2 << 26;
-    carry3 = h3 >> 25;
-    h4 += carry3;
-    h3 -= carry3 << 25;
-    carry4 = h4 >> 26;
-    h5 += carry4;
-    h4 -= carry4 << 26;
-    carry5 = h5 >> 25;
-    h6 += carry5;
-    h5 -= carry5 << 25;
-    carry6 = h6 >> 26;
-    h7 += carry6;
-    h6 -= carry6 << 26;
-    carry7 = h7 >> 25;
-    h8 += carry7;
-    h7 -= carry7 << 25;
-    carry8 = h8 >> 26;
-    h9 += carry8;
-    h8 -= carry8 << 26;
-    carry9 = h9 >> 25;
-    h9 -= carry9 << 25;
-    /* h10 = carry9 */
-    /*
-    Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
-    Have h0+...+2^230 h9 between 0 and 2^255-1;
-    evidently 2^255 h10-2^255 q = 0.
-    Goal: Output h0+...+2^230 h9.
-    */
-    s[0] = (unsigned char) (h0 >> 0);
-    s[1] = (unsigned char) (h0 >> 8);
-    s[2] = (unsigned char) (h0 >> 16);
-    s[3] = (unsigned char) ((h0 >> 24) | (h1 << 2));
-    s[4] = (unsigned char) (h1 >> 6);
-    s[5] = (unsigned char) (h1 >> 14);
-    s[6] = (unsigned char) ((h1 >> 22) | (h2 << 3));
-    s[7] = (unsigned char) (h2 >> 5);
-    s[8] = (unsigned char) (h2 >> 13);
-    s[9] = (unsigned char) ((h2 >> 21) | (h3 << 5));
-    s[10] = (unsigned char) (h3 >> 3);
-    s[11] = (unsigned char) (h3 >> 11);
-    s[12] = (unsigned char) ((h3 >> 19) | (h4 << 6));
-    s[13] = (unsigned char) (h4 >> 2);
-    s[14] = (unsigned char) (h4 >> 10);
-    s[15] = (unsigned char) (h4 >> 18);
-    s[16] = (unsigned char) (h5 >> 0);
-    s[17] = (unsigned char) (h5 >> 8);
-    s[18] = (unsigned char) (h5 >> 16);
-    s[19] = (unsigned char) ((h5 >> 24) | (h6 << 1));
-    s[20] = (unsigned char) (h6 >> 7);
-    s[21] = (unsigned char) (h6 >> 15);
-    s[22] = (unsigned char) ((h6 >> 23) | (h7 << 3));
-    s[23] = (unsigned char) (h7 >> 5);
-    s[24] = (unsigned char) (h7 >> 13);
-    s[25] = (unsigned char) ((h7 >> 21) | (h8 << 4));
-    s[26] = (unsigned char) (h8 >> 4);
-    s[27] = (unsigned char) (h8 >> 12);
-    s[28] = (unsigned char) ((h8 >> 20) | (h9 << 6));
-    s[29] = (unsigned char) (h9 >> 2);
-    s[30] = (unsigned char) (h9 >> 10);
-    s[31] = (unsigned char) (h9 >> 18);
-}
--- a/src/ge.h
+++ b/src/ge.h
@ -50,25 +50,24 @@ typedef struct {
  fe T2d;
 } ge_cached;

-extern void ge_tobytes(unsigned char *,const ge_p2 *);
-extern void ge_p3_tobytes(unsigned char *,const ge_p3 *);
-extern int ge_frombytes_negate_vartime(ge_p3 *,const unsigned char *);
+void ge_p3_tobytes(unsigned char *s, const ge_p3 *h);
+void ge_tobytes(unsigned char *s, const ge_p2 *h);
+int ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s);

-extern void ge_p2_0(ge_p2 *);
-extern void ge_p3_0(ge_p3 *);
-extern void ge_precomp_0(ge_precomp *);
-extern void ge_p3_to_p2(ge_p2 *,const ge_p3 *);
-extern void ge_p3_to_cached(ge_cached *,const ge_p3 *);
-extern void ge_p1p1_to_p2(ge_p2 *,const ge_p1p1 *);
-extern void ge_p1p1_to_p3(ge_p3 *,const ge_p1p1 *);
-extern void ge_p2_dbl(ge_p1p1 *,const ge_p2 *);
-extern void ge_p3_dbl(ge_p1p1 *,const ge_p3 *);
+void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
+void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);
+void ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b);
+void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
+void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q);
+void ge_scalarmult_base(ge_p3 *h, const unsigned char *a);

-extern void ge_madd(ge_p1p1 *,const ge_p3 *,const ge_precomp *);
-extern void ge_msub(ge_p1p1 *,const ge_p3 *,const ge_precomp *);
-extern void ge_add(ge_p1p1 *,const ge_p3 *,const ge_cached *);
-extern void ge_sub(ge_p1p1 *,const ge_p3 *,const ge_cached *);
-extern void ge_scalarmult_base(ge_p3 *,const unsigned char *);
-extern void ge_double_scalarmult_vartime(ge_p2 *,const unsigned char *,const ge_p3 *,const unsigned char *);
+void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p);
+void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p);
+void ge_p2_0(ge_p2 *h);
+void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p);
+void ge_p3_0(ge_p3 *h);
+void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p);
+void ge_p3_to_cached(ge_cached *r, const ge_p3 *p);
+void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p);

 #endif
--- a/src/ge_add.c
+++ b/src/ge_add.c
@ -1,74 +0,0 @@
-#include "ge.h"
-
-/*
-r = p + q
-*/
-
-void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
-    fe t0;
-    /* qhasm: enter ge_add */
-    /* qhasm: fe X1 */
-    /* qhasm: fe Y1 */
-    /* qhasm: fe Z1 */
-    /* qhasm: fe Z2 */
-    /* qhasm: fe T1 */
-    /* qhasm: fe ZZ */
-    /* qhasm: fe YpX2 */
-    /* qhasm: fe YmX2 */
-    /* qhasm: fe T2d2 */
-    /* qhasm: fe X3 */
-    /* qhasm: fe Y3 */
-    /* qhasm: fe Z3 */
-    /* qhasm: fe T3 */
-    /* qhasm: fe YpX1 */
-    /* qhasm: fe YmX1 */
-    /* qhasm: fe A */
-    /* qhasm: fe B */
-    /* qhasm: fe C */
-    /* qhasm: fe D */
-    /* qhasm: YpX1 = Y1+X1 */
-    /* asm 1: fe_add(>YpX1=fe#1,<Y1=fe#12,<X1=fe#11); */
-    /* asm 2: fe_add(>YpX1=r->X,<Y1=p->Y,<X1=p->X); */
-    fe_add(r->X, p->Y, p->X);
-    /* qhasm: YmX1 = Y1-X1 */
-    /* asm 1: fe_sub(>YmX1=fe#2,<Y1=fe#12,<X1=fe#11); */
-    /* asm 2: fe_sub(>YmX1=r->Y,<Y1=p->Y,<X1=p->X); */
-    fe_sub(r->Y, p->Y, p->X);
-    /* qhasm: A = YpX1*YpX2 */
-    /* asm 1: fe_mul(>A=fe#3,<YpX1=fe#1,<YpX2=fe#15); */
-    /* asm 2: fe_mul(>A=r->Z,<YpX1=r->X,<YpX2=q->YplusX); */
-    fe_mul(r->Z, r->X, q->YplusX);
-    /* qhasm: B = YmX1*YmX2 */
-    /* asm 1: fe_mul(>B=fe#2,<YmX1=fe#2,<YmX2=fe#16); */
-    /* asm 2: fe_mul(>B=r->Y,<YmX1=r->Y,<YmX2=q->YminusX); */
-    fe_mul(r->Y, r->Y, q->YminusX);
-    /* qhasm: C = T2d2*T1 */
-    /* asm 1: fe_mul(>C=fe#4,<T2d2=fe#18,<T1=fe#14); */
-    /* asm 2: fe_mul(>C=r->T,<T2d2=q->T2d,<T1=p->T); */
-    fe_mul(r->T, q->T2d, p->T);
-    /* qhasm: ZZ = Z1*Z2 */
-    /* asm 1: fe_mul(>ZZ=fe#1,<Z1=fe#13,<Z2=fe#17); */
-    /* asm 2: fe_mul(>ZZ=r->X,<Z1=p->Z,<Z2=q->Z); */
-    fe_mul(r->X, p->Z, q->Z);
-    /* qhasm: D = 2*ZZ */
-    /* asm 1: fe_add(>D=fe#5,<ZZ=fe#1,<ZZ=fe#1); */
-    /* asm 2: fe_add(>D=t0,<ZZ=r->X,<ZZ=r->X); */
-    fe_add(t0, r->X, r->X);
-    /* qhasm: X3 = A-B */
-    /* asm 1: fe_sub(>X3=fe#1,<A=fe#3,<B=fe#2); */
-    /* asm 2: fe_sub(>X3=r->X,<A=r->Z,<B=r->Y); */
-    fe_sub(r->X, r->Z, r->Y);
-    /* qhasm: Y3 = A+B */
-    /* asm 1: fe_add(>Y3=fe#2,<A=fe#3,<B=fe#2); */
-    /* asm 2: fe_add(>Y3=r->Y,<A=r->Z,<B=r->Y); */
-    fe_add(r->Y, r->Z, r->Y);
-    /* qhasm: Z3 = D+C */
-    /* asm 1: fe_add(>Z3=fe#3,<D=fe#5,<C=fe#4); */
-    /* asm 2: fe_add(>Z3=r->Z,<D=t0,<C=r->T); */
-    fe_add(r->Z, t0, r->T);
-    /* qhasm: T3 = D-C */
-    /* asm 1: fe_sub(>T3=fe#4,<D=fe#5,<C=fe#4); */
-    /* asm 2: fe_sub(>T3=r->T,<D=t0,<C=r->T); */
-    fe_sub(r->T, t0, r->T);
-    /* qhasm: return */
-}
--- a/src/ge_double_scalarmult.c
+++ b/src/ge_double_scalarmult.c
@ -1,151 +0,0 @@
-#include "ge.h"
-
-static void slide(signed char *r, const unsigned char *a) {
-    int i;
-    int b;
-    int k;
-
-    for (i = 0; i < 256; ++i) {
-        r[i] = 1 & (a[i >> 3] >> (i & 7));
-    }
-
-    for (i = 0; i < 256; ++i)
-        if (r[i]) {
-            for (b = 1; b <= 6 && i + b < 256; ++b) {
-                if (r[i + b]) {
-                    if (r[i] + (r[i + b] << b) <= 15) {
-                        r[i] += r[i + b] << b;
-                        r[i + b] = 0;
-                    } else if (r[i] - (r[i + b] << b) >= -15) {
-                        r[i] -= r[i + b] << b;
-
-                        for (k = i + b; k < 256; ++k) {
-                            if (!r[k]) {
-                                r[k] = 1;
-                                break;
-                            }
-
-                            r[k] = 0;
-                        }
-                    } else {
-                        break;
-                    }
-                }
-            }
-        }
-}
-
-static ge_precomp Bi[8] = {
-    {
-        { 25967493, -14356035, 29566456, 3660896, -12694345, 4014787, 27544626, -11754271, -6079156, 2047605 },
-        { -12545711, 934262, -2722910, 3049990, -727428, 9406986, 12720692, 5043384, 19500929, -15469378 },
-        { -8738181, 4489570, 9688441, -14785194, 10184609, -12363380, 29287919, 11864899, -24514362, -4438546 },
-    },
-    {
-        { 15636291, -9688557, 24204773, -7912398, 616977, -16685262, 27787600, -14772189, 28944400, -1550024 },
-        { 16568933, 4717097, -11556148, -1102322, 15682896, -11807043, 16354577, -11775962, 7689662, 11199574 },
-        { 30464156, -5976125, -11779434, -15670865, 23220365, 15915852, 7512774, 10017326, -17749093, -9920357 },
-    },
-    {
-        { 10861363, 11473154, 27284546, 1981175, -30064349, 12577861, 32867885, 14515107, -15438304, 10819380 },
-        { 4708026, 6336745, 20377586, 9066809, -11272109, 6594696, -25653668, 12483688, -12668491, 5581306 },
-        { 19563160, 16186464, -29386857, 4097519, 10237984, -4348115, 28542350, 13850243, -23678021, -15815942 },
-    },
-    {
-        { 5153746, 9909285, 1723747, -2777874, 30523605, 5516873, 19480852, 5230134, -23952439, -15175766 },
-        { -30269007, -3463509, 7665486, 10083793, 28475525, 1649722, 20654025, 16520125, 30598449, 7715701 },
-        { 28881845, 14381568, 9657904, 3680757, -20181635, 7843316, -31400660, 1370708, 29794553, -1409300 },
-    },
-    {
-        { -22518993, -6692182, 14201702, -8745502, -23510406, 8844726, 18474211, -1361450, -13062696, 13821877 },
-        { -6455177, -7839871, 3374702, -4740862, -27098617, -10571707, 31655028, -7212327, 18853322, -14220951 },
-        { 4566830, -12963868, -28974889, -12240689, -7602672, -2830569, -8514358, -10431137, 2207753, -3209784 },
-    },
-    {
-        { -25154831, -4185821, 29681144, 7868801, -6854661, -9423865, -12437364, -663000, -31111463, -16132436 },
-        { 25576264, -2703214, 7349804, -11814844, 16472782, 9300885, 3844789, 15725684, 171356, 6466918 },
-        { 23103977, 13316479, 9739013, -16149481, 817875, -15038942, 8965339, -14088058, -30714912, 16193877 },
-    },
-    {
-        { -33521811, 3180713, -2394130, 14003687, -16903474, -16270840, 17238398, 4729455, -18074513, 9256800 },
-        { -25182317, -4174131, 32336398, 5036987, -21236817, 11360617, 22616405, 9761698, -19827198, 630305 },
-        { -13720693, 2639453, -24237460, -7406481, 9494427, -5774029, -6554551, -15960994, -2449256, -14291300 },
-    },
-    {
-        { -3151181, -5046075, 9282714, 6866145, -31907062, -863023, -18940575, 15033784, 25105118, -7894876 },
-        { -24326370, 15950226, -31801215, -14592823, -11662737, -5090925, 1573892, -2625887, 2198790, -15804619 },
-        { -3099351, 10324967, -2241613, 7453183, -5446979, -2735503, -13812022, -16236442, -32461234, -12290683 },
-    },
-} ;
-
-/*
-r = a * A + b * B
-where a = a[0]+256*a[1]+...+256^31 a[31].
-and b = b[0]+256*b[1]+...+256^31 b[31].
-B is the Ed25519 base point (x,4/5) with x positive.
-*/
-
-void ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b) {
-    signed char aslide[256];
-    signed char bslide[256];
-    ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
-    ge_p1p1 t;
-    ge_p3 u;
-    ge_p3 A2;
-    int i;
-    slide(aslide, a);
-    slide(bslide, b);
-    ge_p3_to_cached(&Ai[0], A);
-    ge_p3_dbl(&t, A);
-    ge_p1p1_to_p3(&A2, &t);
-    ge_add(&t, &A2, &Ai[0]);
-    ge_p1p1_to_p3(&u, &t);
-    ge_p3_to_cached(&Ai[1], &u);
-    ge_add(&t, &A2, &Ai[1]);
-    ge_p1p1_to_p3(&u, &t);
-    ge_p3_to_cached(&Ai[2], &u);
-    ge_add(&t, &A2, &Ai[2]);
-    ge_p1p1_to_p3(&u, &t);
-    ge_p3_to_cached(&Ai[3], &u);
-    ge_add(&t, &A2, &Ai[3]);
-    ge_p1p1_to_p3(&u, &t);
-    ge_p3_to_cached(&Ai[4], &u);
-    ge_add(&t, &A2, &Ai[4]);
-    ge_p1p1_to_p3(&u, &t);
-    ge_p3_to_cached(&Ai[5], &u);
-    ge_add(&t, &A2, &Ai[5]);
-    ge_p1p1_to_p3(&u, &t);
-    ge_p3_to_cached(&Ai[6], &u);
-    ge_add(&t, &A2, &Ai[6]);
-    ge_p1p1_to_p3(&u, &t);
-    ge_p3_to_cached(&Ai[7], &u);
-    ge_p2_0(r);
-
-    for (i = 255; i >= 0; --i) {
-        if (aslide[i] || bslide[i]) {
-            break;
-        }
-    }
-
-    for (; i >= 0; --i) {
-        ge_p2_dbl(&t, r);
-
-        if (aslide[i] > 0) {
-            ge_p1p1_to_p3(&u, &t);
-            ge_add(&t, &u, &Ai[aslide[i] / 2]);
-        } else if (aslide[i] < 0) {
-            ge_p1p1_to_p3(&u, &t);
-            ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
-        }
-
-        if (bslide[i] > 0) {
-            ge_p1p1_to_p3(&u, &t);
-            ge_madd(&t, &u, &Bi[bslide[i] / 2]);
-        } else if (bslide[i] < 0) {
-            ge_p1p1_to_p3(&u, &t);
-            ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
-        }
-
-        ge_p1p1_to_p2(r, &t);
-    }
-}
--- a/src/ge_frombytes.c
+++ b/src/ge_frombytes.c
@ -1,53 +0,0 @@
-#include "ge.h"
-
-static const fe d = {
-    -10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, -18696448, -12055116
-
-} ;
-
-static const fe sqrtm1 = {
-    -32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482
-
-} ;
-
-int ge_frombytes_negate_vartime(ge_p3 *h, const unsigned char *s) {
-    fe u;
-    fe v;
-    fe v3;
-    fe vxx;
-    fe check;
-    fe_frombytes(h->Y, s);
-    fe_1(h->Z);
-    fe_sq(u, h->Y);
-    fe_mul(v, u, d);
-    fe_sub(u, u, h->Z);     /* u = y^2-1 */
-    fe_add(v, v, h->Z);     /* v = dy^2+1 */
-    fe_sq(v3, v);
-    fe_mul(v3, v3, v);      /* v3 = v^3 */
-    fe_sq(h->X, v3);
-    fe_mul(h->X, h->X, v);
-    fe_mul(h->X, h->X, u);  /* x = uv^7 */
-    fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
-    fe_mul(h->X, h->X, v3);
-    fe_mul(h->X, h->X, u);  /* x = uv^3(uv^7)^((q-5)/8) */
-    fe_sq(vxx, h->X);
-    fe_mul(vxx, vxx, v);
-    fe_sub(check, vxx, u);  /* vx^2-u */
-
-    if (fe_isnonzero(check)) {
-        fe_add(check, vxx, u); /* vx^2+u */
-
-        if (fe_isnonzero(check)) {
-            return -1;
-        }
-
-        fe_mul(h->X, h->X, sqrtm1);
-    }
-
-    if (fe_isnegative(h->X) == (s[31] >> 7)) {
-        fe_neg(h->X, h->X);
-    }
-
-    fe_mul(h->T, h->X, h->Y);
-    return 0;
-}
--- a/src/ge_madd.c
+++ b/src/ge_madd.c
@ -1,68 +0,0 @@
-#include "ge.h"
-
-/*
-r = p + q
-*/
-
-void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
-    fe t0;
-    /* qhasm: enter ge_madd */
-    /* qhasm: fe X1 */
-    /* qhasm: fe Y1 */
-    /* qhasm: fe Z1 */
-    /* qhasm: fe T1 */
-    /* qhasm: fe ypx2 */
-    /* qhasm: fe ymx2 */
-    /* qhasm: fe xy2d2 */
-    /* qhasm: fe X3 */
-    /* qhasm: fe Y3 */
-    /* qhasm: fe Z3 */
-    /* qhasm: fe T3 */
-    /* qhasm: fe YpX1 */
-    /* qhasm: fe YmX1 */
-    /* qhasm: fe A */
-    /* qhasm: fe B */
-    /* qhasm: fe C */
-    /* qhasm: fe D */
-    /* qhasm: YpX1 = Y1+X1 */
-    /* asm 1: fe_add(>YpX1=fe#1,<Y1=fe#12,<X1=fe#11); */
-    /* asm 2: fe_add(>YpX1=r->X,<Y1=p->Y,<X1=p->X); */
-    fe_add(r->X, p->Y, p->X);
-    /* qhasm: YmX1 = Y1-X1 */
-    /* asm 1: fe_sub(>YmX1=fe#2,<Y1=fe#12,<X1=fe#11); */
-    /* asm 2: fe_sub(>YmX1=r->Y,<Y1=p->Y,<X1=p->X); */
-    fe_sub(r->Y, p->Y, p->X);
-    /* qhasm: A = YpX1*ypx2 */
-    /* asm 1: fe_mul(>A=fe#3,<YpX1=fe#1,<ypx2=fe#15); */
-    /* asm 2: fe_mul(>A=r->Z,<YpX1=r->X,<ypx2=q->yplusx); */
-    fe_mul(r->Z, r->X, q->yplusx);
-    /* qhasm: B = YmX1*ymx2 */
-    /* asm 1: fe_mul(>B=fe#2,<YmX1=fe#2,<ymx2=fe#16); */
-    /* asm 2: fe_mul(>B=r->Y,<YmX1=r->Y,<ymx2=q->yminusx); */
-    fe_mul(r->Y, r->Y, q->yminusx);
-    /* qhasm: C = xy2d2*T1 */
-    /* asm 1: fe_mul(>C=fe#4,<xy2d2=fe#17,<T1=fe#14); */
-    /* asm 2: fe_mul(>C=r->T,<xy2d2=q->xy2d,<T1=p->T); */
-    fe_mul(r->T, q->xy2d, p->T);
-    /* qhasm: D = 2*Z1 */
-    /* asm 1: fe_add(>D=fe#5,<Z1=fe#13,<Z1=fe#13); */
-    /* asm 2: fe_add(>D=t0,<Z1=p->Z,<Z1=p->Z); */
-    fe_add(t0, p->Z, p->Z);
-    /* qhasm: X3 = A-B */
-    /* asm 1: fe_sub(>X3=fe#1,<A=fe#3,<B=fe#2); */
-    /* asm 2: fe_sub(>X3=r->X,<A=r->Z,<B=r->Y); */
-    fe_sub(r->X, r->Z, r->Y);
-    /* qhasm: Y3 = A+B */
-    /* asm 1: fe_add(>Y3=fe#2,<A=fe#3,<B=fe#2); */
-    /* asm 2: fe_add(>Y3=r->Y,<A=r->Z,<B=r->Y); */
-    fe_add(r->Y, r->Z, r->Y);
-    /* qhasm: Z3 = D+C */
-    /* asm 1: fe_add(>Z3=fe#3,<D=fe#5,<C=fe#4); */
-    /* asm 2: fe_add(>Z3=r->Z,<D=t0,<C=r->T); */
-    fe_add(r->Z, t0, r->T);
-    /* qhasm: T3 = D-C */
-    /* asm 1: fe_sub(>T3=fe#4,<D=fe#5,<C=fe#4); */
-    /* asm 2: fe_sub(>T3=r->T,<D=t0,<C=r->T); */
-    fe_sub(r->T, t0, r->T);
-    /* qhasm: return */
-}
--- a/src/ge_msub.c
+++ b/src/ge_msub.c
@ -1,68 +0,0 @@
-#include "ge.h"
-
-/*
-r = p - q
-*/
-
-void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
-    fe t0;
-    /* qhasm: enter ge_msub */
-    /* qhasm: fe X1 */
-    /* qhasm: fe Y1 */
-    /* qhasm: fe Z1 */
-    /* qhasm: fe T1 */
-    /* qhasm: fe ypx2 */
-    /* qhasm: fe ymx2 */
-    /* qhasm: fe xy2d2 */
-    /* qhasm: fe X3 */
-    /* qhasm: fe Y3 */
-    /* qhasm: fe Z3 */
-    /* qhasm: fe T3 */
-    /* qhasm: fe YpX1 */
-    /* qhasm: fe YmX1 */
-    /* qhasm: fe A */
-    /* qhasm: fe B */
-    /* qhasm: fe C */
-    /* qhasm: fe D */
-    /* qhasm: YpX1 = Y1+X1 */
-    /* asm 1: fe_add(>YpX1=fe#1,<Y1=fe#12,<X1=fe#11); */
-    /* asm 2: fe_add(>YpX1=r->X,<Y1=p->Y,<X1=p->X); */
-    fe_add(r->X, p->Y, p->X);
-    /* qhasm: YmX1 = Y1-X1 */
-    /* asm 1: fe_sub(>YmX1=fe#2,<Y1=fe#12,<X1=fe#11); */
-    /* asm 2: fe_sub(>YmX1=r->Y,<Y1=p->Y,<X1=p->X); */
-    fe_sub(r->Y, p->Y, p->X);
-    /* qhasm: A = YpX1*ymx2 */
-    /* asm 1: fe_mul(>A=fe#3,<YpX1=fe#1,<ymx2=fe#16); */
-    /* asm 2: fe_mul(>A=r->Z,<YpX1=r->X,<ymx2=q->yminusx); */
-    fe_mul(r->Z, r->X, q->yminusx);
-    /* qhasm: B = YmX1*ypx2 */
-    /* asm 1: fe_mul(>B=fe#2,<YmX1=fe#2,<ypx2=fe#15); */
-    /* asm 2: fe_mul(>B=r->Y,<YmX1=r->Y,<ypx2=q->yplusx); */
-    fe_mul(r->Y, r->Y, q->yplusx);
-    /* qhasm: C = xy2d2*T1 */
-    /* asm 1: fe_mul(>C=fe#4,<xy2d2=fe#17,<T1=fe#14); */
-    /* asm 2: fe_mul(>C=r->T,<xy2d2=q->xy2d,<T1=p->T); */
-    fe_mul(r->T, q->xy2d, p->T);
-    /* qhasm: D = 2*Z1 */
-    /* asm 1: fe_add(>D=fe#5,<Z1=fe#13,<Z1=fe#13); */
-    /* asm 2: fe_add(>D=t0,<Z1=p->Z,<Z1=p->Z); */
-    fe_add(t0, p->Z, p->Z);
-    /* qhasm: X3 = A-B */
-    /* asm 1: fe_sub(>X3=fe#1,<A=fe#3,<B=fe#2); */
-    /* asm 2: fe_sub(>X3=r->X,<A=r->Z,<B=r->Y); */
-    fe_sub(r->X, r->Z, r->Y);
-    /* qhasm: Y3 = A+B */
-    /* asm 1: fe_add(>Y3=fe#2,<A=fe#3,<B=fe#2); */
-    /* asm 2: fe_add(>Y3=r->Y,<A=r->Z,<B=r->Y); */
-    fe_add(r->Y, r->Z, r->Y);
-    /* qhasm: Z3 = D-C */
-    /* asm 1: fe_sub(>Z3=fe#3,<D=fe#5,<C=fe#4); */
-    /* asm 2: fe_sub(>Z3=r->Z,<D=t0,<C=r->T); */
-    fe_sub(r->Z, t0, r->T);
-    /* qhasm: T3 = D+C */
-    /* asm 1: fe_add(>T3=fe#4,<D=fe#5,<C=fe#4); */
-    /* asm 2: fe_add(>T3=r->T,<D=t0,<C=r->T); */
-    fe_add(r->T, t0, r->T);
-    /* qhasm: return */
-}
--- a/src/ge_p1p1_to_p2.c
+++ b/src/ge_p1p1_to_p2.c
@ -1,11 +0,0 @@
-#include "ge.h"
-
-/*
-r = p
-*/
-
-extern void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
-    fe_mul(r->X, p->X, p->T);
-    fe_mul(r->Y, p->Y, p->Z);
-    fe_mul(r->Z, p->Z, p->T);
-}
--- a/src/ge_p1p1_to_p3.c
+++ b/src/ge_p1p1_to_p3.c
@ -1,12 +0,0 @@
-#include "ge.h"
-
-/*
-r = p
-*/
-
-extern void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
-    fe_mul(r->X, p->X, p->T);
-    fe_mul(r->Y, p->Y, p->Z);
-    fe_mul(r->Z, p->Z, p->T);
-    fe_mul(r->T, p->X, p->Y);
-}
--- a/src/ge_p2_0.c
+++ b/src/ge_p2_0.c
@ -1,7 +0,0 @@
-#include "ge.h"
-
-void ge_p2_0(ge_p2 *h) {
-    fe_0(h->X);
-    fe_1(h->Y);
-    fe_1(h->Z);
-}
--- a/src/ge_p2_dbl.c
+++ b/src/ge_p2_dbl.c
@ -1,59 +0,0 @@
-#include "ge.h"
-
-/*
-r = 2 * p
-*/
-
-void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
-    fe t0;
-    /* qhasm: enter ge_p2_dbl */
-    /* qhasm: fe X1 */
-    /* qhasm: fe Y1 */
-    /* qhasm: fe Z1 */
-    /* qhasm: fe A */
-    /* qhasm: fe AA */
-    /* qhasm: fe XX */
-    /* qhasm: fe YY */
-    /* qhasm: fe B */
-    /* qhasm: fe X3 */
-    /* qhasm: fe Y3 */
-    /* qhasm: fe Z3 */
-    /* qhasm: fe T3 */
-    /* qhasm: XX=X1^2 */
-    /* asm 1: fe_sq(>XX=fe#1,<X1=fe#11); */
-    /* asm 2: fe_sq(>XX=r->X,<X1=p->X); */
-    fe_sq(r->X, p->X);
-    /* qhasm: YY=Y1^2 */
-    /* asm 1: fe_sq(>YY=fe#3,<Y1=fe#12); */
-    /* asm 2: fe_sq(>YY=r->Z,<Y1=p->Y); */
-    fe_sq(r->Z, p->Y);
-    /* qhasm: B=2*Z1^2 */
-    /* asm 1: fe_sq2(>B=fe#4,<Z1=fe#13); */
-    /* asm 2: fe_sq2(>B=r->T,<Z1=p->Z); */
-    fe_sq2(r->T, p->Z);
-    /* qhasm: A=X1+Y1 */
-    /* asm 1: fe_add(>A=fe#2,<X1=fe#11,<Y1=fe#12); */
-    /* asm 2: fe_add(>A=r->Y,<X1=p->X,<Y1=p->Y); */
-    fe_add(r->Y, p->X, p->Y);
-    /* qhasm: AA=A^2 */
-    /* asm 1: fe_sq(>AA=fe#5,<A=fe#2); */
-    /* asm 2: fe_sq(>AA=t0,<A=r->Y); */
-    fe_sq(t0, r->Y);
-    /* qhasm: Y3=YY+XX */
-    /* asm 1: fe_add(>Y3=fe#2,<YY=fe#3,<XX=fe#1); */
-    /* asm 2: fe_add(>Y3=r->Y,<YY=r->Z,<XX=r->X); */
-    fe_add(r->Y, r->Z, r->X);
-    /* qhasm: Z3=YY-XX */
-    /* asm 1: fe_sub(>Z3=fe#3,<YY=fe#3,<XX=fe#1); */
-    /* asm 2: fe_sub(>Z3=r->Z,<YY=r->Z,<XX=r->X); */
-    fe_sub(r->Z, r->Z, r->X);
-    /* qhasm: X3=AA-Y3 */
-    /* asm 1: fe_sub(>X3=fe#1,<AA=fe#5,<Y3=fe#2); */
-    /* asm 2: fe_sub(>X3=r->X,<AA=t0,<Y3=r->Y); */
-    fe_sub(r->X, t0, r->Y);
-    /* qhasm: T3=B-Z3 */
-    /* asm 1: fe_sub(>T3=fe#4,<B=fe#4,<Z3=fe#3); */
-    /* asm 2: fe_sub(>T3=r->T,<B=r->T,<Z3=r->Z); */
-    fe_sub(r->T, r->T, r->Z);
-    /* qhasm: return */
-}
--- a/src/ge_p3_0.c
+++ b/src/ge_p3_0.c
@ -1,8 +0,0 @@
-#include "ge.h"
-
-void ge_p3_0(ge_p3 *h) {
-    fe_0(h->X);
-    fe_1(h->Y);
-    fe_1(h->Z);
-    fe_0(h->T);
-}
--- a/src/ge_p3_dbl.c
+++ b/src/ge_p3_dbl.c
@ -1,11 +0,0 @@
-#include "ge.h"
-
-/*
-r = 2 * p
-*/
-
-void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
-    ge_p2 q;
-    ge_p3_to_p2(&q, p);
-    ge_p2_dbl(r, &q);
-}
--- a/src/ge_p3_to_cached.c
+++ b/src/ge_p3_to_cached.c
@ -1,16 +0,0 @@
-#include "ge.h"
-
-/*
-r = p
-*/
-
-static const fe d2 = {
-    -21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, -6495438, 29715968, 9444199
-} ;
-
-extern void ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
-    fe_add(r->YplusX, p->Y, p->X);
-    fe_sub(r->YminusX, p->Y, p->X);
-    fe_copy(r->Z, p->Z);
-    fe_mul(r->T2d, p->T, d2);
-}
--- a/src/ge_p3_to_p2.c
+++ b/src/ge_p3_to_p2.c
@ -1,11 +0,0 @@
-#include "ge.h"
-
-/*
-r = p
-*/
-
-extern void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
-    fe_copy(r->X, p->X);
-    fe_copy(r->Y, p->Y);
-    fe_copy(r->Z, p->Z);
-}
--- a/src/ge_p3_tobytes.c
+++ b/src/ge_p3_tobytes.c
@ -1,12 +0,0 @@
-#include "ge.h"
-
-void ge_p3_tobytes(unsigned char *s, const ge_p3 *h) {
-    fe recip;
-    fe x;
-    fe y;
-    fe_invert(recip, h->Z);
-    fe_mul(x, h->X, recip);
-    fe_mul(y, h->Y, recip);
-    fe_tobytes(s, y);
-    s[31] ^= fe_isnegative(x) << 7;
-}
--- a/src/ge_precomp_0.c
+++ b/src/ge_precomp_0.c
@ -1,7 +0,0 @@
-#include "ge.h"
-
-void ge_precomp_0(ge_precomp *h) {
-    fe_1(h->yplusx);
-    fe_1(h->yminusx);
-    fe_0(h->xy2d);
-}
--- a/src/ge_scalarmult_base.c
+++ b/src/ge_scalarmult_base.c
--- a/src/ge_sub.c
+++ b/src/ge_sub.c
@ -1,21 +0,0 @@
-#include "ge.h"
-
-/*
-r = p - q
-*/
-
-void ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
-    fe t0;
-    
-    fe_add(r->X, p->Y, p->X);
-    fe_sub(r->Y, p->Y, p->X);
-    fe_mul(r->Z, r->X, q->YminusX);
-    fe_mul(r->Y, r->Y, q->YplusX);
-    fe_mul(r->T, q->T2d, p->T);
-    fe_mul(r->X, p->Z, q->Z);
-    fe_add(t0, r->X, r->X);
-    fe_sub(r->X, r->Z, r->Y);
-    fe_add(r->Y, r->Z, r->Y);
-    fe_sub(r->Z, t0, r->T);
-    fe_add(r->T, t0, r->T);
-}
--- a/src/ge_tobytes.c
+++ b/src/ge_tobytes.c
@ -1,12 +0,0 @@
-#include "ge.h"
-
-void ge_tobytes(unsigned char *s, const ge_p2 *h) {
-    fe recip;
-    fe x;
-    fe y;
-    fe_invert(recip, h->Z);
-    fe_mul(x, h->X, recip);
-    fe_mul(y, h->Y, recip);
-    fe_tobytes(s, y);
-    s[31] ^= fe_isnegative(x) << 7;
-}
--- a/src/test.c
+++ b/src/test.c
@ -3,6 +3,7 @@
 #include <string.h>
 #include "ed25519.h"
 #include <time.h>
+
 char msg[] = "Hello World";

 int main(int argc, char *argv[]) {
@ -10,6 +11,11 @@ int main(int argc, char *argv[]) {
    unsigned char *sigmsg;
    FILE *f;
    int ret;
+	clock_t start;
+	clock_t end;
+	int i;
+	double millis;
+
    ed25519_create_seed(seed);
    f = fopen("seed.txt", "wb");
    fwrite(seed, 32, 1, f);
@ -45,5 +51,23 @@ int main(int argc, char *argv[]) {
        printf("good: detected simple corruption\n");
    }

+    start = clock();
+    for (i = 0; i < 10000; ++i) {
+        ed25519_sign(sigmsg, (unsigned char *)msg, strlen(msg), sk);
+    }
+    end = clock();
+
+    millis = ((double) ((end - start) * 1000)) / CLOCKS_PER_SEC / i * 1000;
+    printf("Sign time in %fus per signature\n", millis);
+
+    start = clock();
+    for (i = 0; i < 10000; ++i) {
+        ed25519_verify(sigmsg, "Hello World", strlen(msg), vk);
+    }
+    end = clock();
+
+    millis = ((double) ((end - start) * 1000)) / CLOCKS_PER_SEC / i * 1000;
+    printf("Verify time in %fus per signature\n", millis);
+
    return 0;
 }