https://github.com/vesselinux/yaarx
Raw File
Tip revision: fb5f6dfc302214da6182b6055737410a8246d78d authored by Vesselin Velichkov on 17 January 2022, 09:01:16 UTC
added best Pr for Speck32 rounds 8,9,10
Tip revision: fb5f6df
dump.c

/* --- */

void test_h_is_group()
{
  assert(WORD_SIZE <= 5);
#if(WORD_SIZE <= 5)
  uint64_t N = ALL_WORDS;
  std::vector<WORD_T> G(N);
  std::iota (std::begin(G), std::end(G), 0);

  for(WORD_T a = 0; a < N; a++) {
	 for(WORD_T b = 0; b < N; b++) {
		WORD_T c = a & b;//H(a, b);
		// printf("%X %X %X\n", a, b, c);
		// closed-ness
		bool b_closed = (std::find(G.begin(), G.end(), c) != G.end());
		assert(b_closed);
		// associativity
		WORD_T x = a & (b & c);  // H(a, H(b, c));
		WORD_T y = (a & b) & c; // H(H(a, b), c);
		bool b_assoc = (x == y);
		assert(b_assoc);
		// identity
		WORD_T e = 0xffffffff & MASK;
		bool b_id = ((c & e) == c);
		assert(b_id);
	 }
  } 
  // inverse
  WORD_T e = 0xffffffff & MASK;
  for(WORD_T a = 1; a < N; a++) {
	 bool b_inverse = false;
	 uint32_t i = 0;
	 while((!b_inverse) && (i < N)) {
		b_inverse = ((a & G[i]) == e);
		printf("%2d: a %X %X %X\n", i, a, G[i], (a & G[i]));
		i++;
	 }
	 //	 printf("[%s:%d] a %X %X %X\n", __FILE__, __LINE__, a, G[i-1], (a & G[i-1]));
	 assert(b_inverse);
  }
#endif // #if(WORD_SIZE <= 5)
}

/* --- */

void test_xdp_h_sort()
{
  assert(WORD_SIZE <= 10);
#if(WORD_SIZE <= 10)
  std::vector<differential_3d_t> diff_vec;
  uint64_t N = ALL_WORDS;
  for(WORD_T da = 0; da < N; da++) {
	 for(WORD_T db = 0; db < N; db++) {
		for(WORD_T dc = 0; dc < N; dc++) {
#if 0 // XDP_H
		  uint32_t w = xdp_h(da, db, dc, WORD_SIZE);
		  double p = std::pow(2, -(int)w);
#endif // #if 0 // XDP_H
#if 0 // XDP_ADD
		  double p = xdp_add_lm(da, db, dc);
#endif // #if 0 // XDP_ADD
#if 1 // ADP_H
		  double p = adp_h_exper(da, db, dc, WORD_SIZE);
#endif // #if 1 // ADP_H
		  differential_3d_t diff;
		  diff.dx = da;
		  diff.dy = db;
		  diff.dz = dc;
		  diff.p = p;
		  diff_vec.push_back(diff);
		}
	 }
  }
  uint32_t cnt_nz = 0;
#if 0
  WORD_T mask_nomsb = ~(1U << (WORD_SIZE - 1));
#endif // #if 0
  std::sort(diff_vec.begin(), diff_vec.end(), sort_comp_diff_3d_p);
  for(std::vector<differential_3d_t>::iterator vec_iter = diff_vec.begin(); vec_iter != diff_vec.end(); vec_iter++) {
	 differential_3d_t diff = *vec_iter;
	 double p = diff.p;
	 WORD_T da = diff.dx;
	 WORD_T db = diff.dy;
	 WORD_T dc = diff.dz;
#if 0
	 uint32_t hwa = hamming_weight(da & mask_nomsb);
	 uint32_t hwb = hamming_weight(db & mask_nomsb);
	 uint32_t hwc = hamming_weight(dc & mask_nomsb);
#endif // #if 0
	 if(p > 0.0) {
		cnt_nz++;
#if 0
		printf("%10d HW %2d %2d %2d ", cnt_nz, hwa, hwb, hwc);
		printf(" %2X %2X %2X | %2.0f %2d\n", da, db, dc, log2(p), hwa + hwb);
#endif // #if 0
		print_binary(da); printf(" ");
		print_binary(db); printf(" ");
		print_binary(dc); printf(" ");
		printf(" %2X %2X %2X %2.0f\n", da, db, dc, log2(p));
	 }
  }
  double ratio_nz = ((double)cnt_nz / (double)diff_vec.size()) * 100.0;
  printf("[%s:%d] nonzero %7d 2^%4.2f zero %7d 2^%4.2f all %7d 2^%4.2f | nz/all %4.2f%%\n", __FILE__, __LINE__, 
			cnt_nz, log2(cnt_nz), 
			(uint32_t)diff_vec.size() - cnt_nz, log2(diff_vec.size() - cnt_nz), 
			(uint32_t)diff_vec.size(), log2(diff_vec.size()), ratio_nz);
  printf("[%s:%d] Test OK.\n", __FILE__, __LINE__);
#endif // #if(WORD_SIZE <= X)
}


/* --- */

/*
[./tests/norx-lwc-search-tests.cc:470]  INPUT:
8080808          0                0                0
0                0                0                0
0                0                0                0
0                0                0                0
[./tests/norx-lwc-search-tests.cc:472] OUTPUT:
0                A0A0A0A          4E4E4E4E         4040404
C0C0C0C          8080808          C4C4C4C4         8080808
8A8A8A8A         4040404          8A8A8A8A         A0A0A0A
7A7A7A7A         3E3E3E3E         90909090         3E3E3E3E
[./tests/norx-lwc-search-tests.cc:475] b_symmetric_in 1 (1) b_symmetric_out 1 (15)
[./tests/norx-lwc-search-tests.cc:483] log2p_trail 2^-1484 min_wt 4308

*/
/* --- */

#if 1 // DEBUG
				uint32_t w_max = 0;
				for(uint32_t i = 0; i < 4; i++) { // rconst
				  if(b_good) {
					 uint32_t w[12] = {0};

					 w[0] = xdp_quarter_round(rconst[i], da, db, dd, de, WORD_SIZE);
					 w[1] = xdp_quarter_round(rconst[i], da, db, de, dd, WORD_SIZE);
					 w[2] = xdp_quarter_round(rconst[i], da, dd, db, de, WORD_SIZE);
					 w[3] = xdp_quarter_round(rconst[i], da, dd, de, db, WORD_SIZE);
					 w[4] = xdp_quarter_round(rconst[i], da, de, db, dd, WORD_SIZE);
					 w[5] = xdp_quarter_round(rconst[i], da, de, dd, db, WORD_SIZE);
					 w[6] = xdp_quarter_round(rconst[i], db, dd, da, de, WORD_SIZE);
					 w[7] = xdp_quarter_round(rconst[i], db, dd, de, da, WORD_SIZE);
					 w[8] = xdp_quarter_round(rconst[i], db, de, da, dd, WORD_SIZE);
					 w[9] = xdp_quarter_round(rconst[i], db, de, dd, da, WORD_SIZE);
					 w[10] = xdp_quarter_round(rconst[i], dd, de, da, db, WORD_SIZE);
					 w[11] = xdp_quarter_round(rconst[i], dd, de, db, da, WORD_SIZE);

					 uint32_t j = 0;
					 while(j < 12) {
						w_max = std::max(w_max, w[j]);
						assert(w[j] != INF);
						j++;
					 }
				  }
				}
				//				if(w_max <= 3) {
				  printf("%4d %X %X %X %X w_max %2d\n", ngood, da, db, dd, de, w_max);
				  //				}
#endif // #if 1 // DEBUG

/* --- */

				  w[ 0] = xdp_h(da, db, dx, WORD_SIZE);
				  w[ 1] = xdp_h(da, dx, db, WORD_SIZE);
				  w[ 2] = xdp_h(dx, db, da, WORD_SIZE);

				  w[ 3] = xdp_h(da, db, dd, WORD_SIZE);
				  w[ 4] = xdp_h(da, dd, db, WORD_SIZE);
				  w[ 5] = xdp_h(dd, db, da, WORD_SIZE);

				  w[ 6] = xdp_h(da, db, de, WORD_SIZE);
				  w[ 7] = xdp_h(da, de, db, WORD_SIZE);
				  w[ 8] = xdp_h(de, db, da, WORD_SIZE);

				  w[ 9] = xdp_h(dd, dx, de, WORD_SIZE);
				  w[10] = xdp_h(dd, de, dx, WORD_SIZE);
				  w[11] = xdp_h(de, dx, dd, WORD_SIZE);



/* --- */

#if 1 // DEBUG
				uint32_t w_max = 0;
				for(uint32_t i = 0; i < 4; i++) { // rconst
				  WORD_T dx = LROT(de, rconst[i]) ^ dd;
				  uint32_t w1 = xdp_h(da, db, dx, WORD_SIZE);
				  uint32_t w2 = xdp_h(da, dx, db, WORD_SIZE);
				  uint32_t w3 = xdp_h(dx, db, da, WORD_SIZE);
				  assert((w1 != INF) && (w2 != INF) && (w3 != INF));

				  w_max = std::max(w1, w2);
				  w_max = std::max(w_max, w3);

				  // printf("R[%2d] %2d %X %X %X %X w %2d %2d %2d\n", i, rconst[i], da, db, dd, de, w1, w2, w3);

				}
				if(w_max <= 1) {
				  printf("%11d 2^%f %X %X %X %X w_max %2d\n", ngood, log2(ngood), da, db, dd, de, w_max);
				}
#endif // #if 1 // DEBUG


/* --- */

void test_invariant_diffs()
{
  assert(WORD_SIZE <= 10);
#if(WORD_SIZE <= 10)
  uint32_t rconst[4] = {R0, R1, R2, R3};
  WORD_T N = ALL_WORDS;
  uint32_t ngood = 0;
  for(WORD_T da = 0; da < N; da++) {
	 for(WORD_T db = 0; db < N; db++) {
		for(WORD_T de = 0; de < N; de++) {
		  bool b_good = true;
		  for(uint32_t i = 0; i < 4; i++) { // rconst
			 if(b_good) {
				WORD_T dx = LROT(de, rconst[i]);

				uint32_t w1 = xdp_h(da, db, dx, WORD_SIZE);
				uint32_t w2 = xdp_h(da, dx, db, WORD_SIZE);
				uint32_t w3 = xdp_h(dx, db, da, WORD_SIZE);

				if((w1 == INF) || (w2 == INF) || (w3 == INF)) {
				  b_good = false;
				}
			 }
		  }
		  if(b_good) {
			 ngood++;
#if 1 // DEBUG
			 uint32_t w_max = 0;
			 for(uint32_t i = 0; i < 4; i++) { // rconst
				WORD_T dx = LROT(de, rconst[i]);
				uint32_t w1 = xdp_h(da, db, dx, WORD_SIZE);
				uint32_t w2 = xdp_h(da, dx, db, WORD_SIZE);
				uint32_t w3 = xdp_h(dx, db, da, WORD_SIZE);
				assert((w1 != INF) && (w2 != INF) && (w3 != INF));

				w_max = std::max(w1, w2);
				w_max = std::max(w_max, w3);

			 }
			 if(w_max <= 1) {
				printf("%4d %X %X %X w_max %2d\n", ngood, da, db, de, w_max);
			 }
#endif // #if 1 // DEBUG
		  }
		} 
	 } 
  } 
#endif // #if(WORD_SIZE <= 5)
}

/* --- */

void test_invariant_diffs()
{
  assert(WORD_SIZE <= 10);
#if(WORD_SIZE <= 10)
  uint32_t rconst = 1;
  WORD_T N = ALL_WORDS;
  for(WORD_T da = 0; da < N; da++) {
	 for(WORD_T db = 0; db < N; db++) {
		for(WORD_T i = 0; i < N; i++) {

		  WORD_T dc = RROT(i, rconst);

		  uint32_t w1 = xdp_h(da, db, dc, WORD_SIZE);
		  uint32_t w2 = xdp_h(da, dc, db, WORD_SIZE);
		  uint32_t w3 = xdp_h(db, dc, da, WORD_SIZE);

		  uint32_t w_max = std::max(w1, w2);
		  w_max = std::max(w_max, w3);

		  if((w1 != INF) && (w2 != INF) && (w3 != INF)) {
			 if(w_max <= 1) {
				printf("%X %X %X %2d %2d %2d max %2d\n", da, db, dc, w1, w2, w3, w_max);
			 }
		  }

		} 
	 } 
  } 
#endif // #if(WORD_SIZE <= 5)
}

/* --- */

void test_invariant_sets()
{
  assert(WORD_SIZE <= 16);
#if(WORD_SIZE <= 16)
  WORD_T N = ALL_WORDS;
  std::set<WORD_T> U;
  for(WORD_T x = 0; x < N; x++) {
	 printf("[%s:%d] Process x = %X\n", __FILE__, __LINE__, x);
	 bool b_found = (U.find(x) != U.end());
	 if(!b_found) {
		U.insert(x);
		printf("[%s:%d] U insert %X\n", __FILE__, __LINE__, x);
	 }
	 std::set<WORD_T> V = U;
	 while(!V.empty()) {
		WORD_T y = *(V.begin());
		V.erase(y);
		printf("[%s:%d] V erase %X\n", __FILE__, __LINE__, y);
		WORD_T z = x ^ RROT(y, 1);//H(x, y);
		bool b_found = (U.find(z) != U.end());
		printf("[%s:%d] Is %X in U? %d\n", __FILE__, __LINE__, z, b_found);
		if(!b_found) {
		  U.insert(z);
		  V.insert(z);
		  printf("[%s:%d] U insert %X\n", __FILE__, __LINE__, z);
		  printf("[%s:%d] V insert %X\n", __FILE__, __LINE__, z);
		}
	 }
	 //		WORD_T c = H(a, b);
  } 
  uint32_t i=0;
  for (std::set<WORD_T>::iterator it = U.begin(); it != U.end(); it++, i++) {
	 printf("U[%2d] %X\n", i, *it);
  }
#endif // #if(WORD_SIZE <= 5)
}


/* --- */
/**
 * Compare two implementations of norx_gfun_linear_encrypt
 */
void test_gfun_linear_encryp()
{
  // rows in the generator matrix bits
#if ZERO_CAPACITY  // 0-capacity
  int G_nrows = (NORX_LWCS_NWORDS * WORD_SIZE) - (4 * WORD_SIZE); 
#else // normal
  int G_nrows = (NORX_LWCS_NWORDS * WORD_SIZE);
#endif
  // create an empty generator matrix
  CodeMatrix oGenerator;
  // use the build function to create the generator matrix
  oGenerator.Build(&norx_lwcs_build_function, G_nrows);
  oGenerator.PrintMatrix("norx-lwcs-matrix.cm"); // save to file

  // create an empty generator matrix
  CodeMatrix oGenerator_compact;
  // use the build function to create the generator matrix
  oGenerator.Build(&norx_lwcs_build_function_compact, G_nrows);
  oGenerator.PrintMatrix("norx-lwcs-matrix-compact.cm"); // save to file

}


/* --- */
void norx_gfun_linear_encrypt(WORD_T * matrix, const uint32_t nsteps)
{
#if 1 // norx_gfun_linear_encrypt
  assert(nsteps > 0);
  //assert(nsteps <= 8);
  WORD_T S[16] = {0};
  // copy the input state
  for(uint32_t j = 0; j < NORX_LWCS_NWORDS; j++) {
	 S[j] = matrix[j];
  }

  // 'offset' is equal to number of 32-bit words from the beginning of the codeword 
  // the first 2 32-bit words are the input to LEA
  uint32_t offset = NORX_LWCS_NWORDS;

  switch(nsteps) {
  case 0:
	 break;
  case 1:
#if 1 // step 1
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 1
	 break;
  case 2:
#if 1 // step 2
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 2
	 break;
  case 3:
#if 1 // step 3
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 3
	 break;
  case 4: // 0.5 round
#if 1 // step 4
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 //	 norx_array_state_print(S);
#endif // #if 1 // step 4
	 break;
  case 5:
#if 1 // step 5
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 5
	 break;
  case 6:
#if 1 // step 6
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 6
	 break;
  case 7:
#if 1 // step 7
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 7
	 break;
  case 8: // 1 round
#if 1 // step 8
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 8
	 break;
  case 9:
#if 1 // step 9
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 9
	 break;
  case 10:
#if 1 // step 10
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 10
	 break;
  case 11:
#if 1 // step 11
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 11
	 break;
  case 12: // 1.5 round
#if 1 // step 12
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 12
	 break;
  case 13:
#if 1 // step 13
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 13
	 break;
  case 14:
#if 1 // step 14
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 14
	 break;
  case 15:
#if 1 // step 15
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 15
	 break;
  case 16: // 2 rounds
#if 1 // step 16
	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_col(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(0, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(1, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(2, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
	 offset += NORX_LWCS_NWORDS;

	 G_lin_parallel_dia(3, S);
	 norx_lwcs_copy_state_to_matrix(S, matrix, offset);
#endif // #if 1 // step 16
	 break;
  default: /* Optional */
	 printf("[%s:%d] Invalid number of G rounds %d. Terminating...\n", __FILE__, __LINE__, nsteps);
  }
#endif // #if 1 // norx_gfun_linear_encrypt
}

/**
 * Construct generator matrix for the low-weight codeword search (LWCS)
 *
 * \see rc5_lwcs_build_function
 */
CodeWord norx_lwcs_build_function(uint64_t & i) 
{
  assert(WORD_SIZE == 32);
#if 0									  // DEBUG
  printf("[%s:%d]\n", __FUNCTION__, __LINE__);
#endif
  // first 2 words are the input X[0], X[1]
  // the next 2*NORX_LWCS_NSTEPS words are the outputs of R rounds: 
  // Y[0][0],Y[0][1], ..., Y[R-1][0],Y[R-1][1] of which
  // The final four words Y[R-1][0],Y[R-1][1] are the output after R rounds
  WORD_T m[NORX_LWCS_NWORDS + (NORX_LWCS_NWORDS * NORX_LWCS_NSTEPS)];
  int r = NORX_LWCS_NSTEPS;

  CodeWord oCodeWord;
  uint32_t unitv = 1;

  // 16 byte input block
  for(uint32_t j = 0; j < NORX_LWCS_NWORDS; j++) {
	 m[j] = 0;
  }

  // create i-th unit vector for the input
  unitv = RROT(unitv , (i+1));

  // set input to i-th unit vector
  m[i/WORD_SIZE] = unitv;
#if 0									  // DEBUG
  printf("%3ld : x[%2ld] %08x\n", i+1, i/WORD_SIZE, unitv);
#endif	

  // call the linearized function
  norx_gfun_linear_encrypt(m, r);

  // add message to the code
  for(uint32_t j = 0; j < (NORX_LWCS_NWORDS + (NORX_LWCS_NWORDS * NORX_LWCS_NSTEPS)); j++) {
	 oCodeWord.Push32(m[j]);
#if 0									  // DEBUG
	 printf("[%2d]%08x", j, m[j]);
#endif	
  }
#if 0									  // DEBUG
  printf("\n");
#endif	

  return oCodeWord;
}

/* --- */
for(WORD_T x = 0; x < 4; x++) {
  WORD_T a = ((x << 6) | (x << 4) | (x << 2) | (x << 0));
  for(WORD_T y = 0; y < 4; y++) {
	 WORD_T b = ((y << 6) | (y << 4) | (y << 2) | (y << 0));
	 WORD_T c = H(a, b);
	 printf("%2X %2X %2X\n", a, b, c);
  }
 }


/* --- */

double norx_xdp_gfun_exper(const WORD_T da, const WORD_T db, const WORD_T dc, const WORD_T dd)
{
  double prob = 0.0;
  uint32_t cnt = 0;
  assert(WORD_SIZE <= 7);
#if(WORD_SIZE <= 5)
  uint64_t N = std::pow(ALL_WORDS, 8);
  for(WORD_T i = 0; i < N; i++) {
	 for(WORD_T j = 0; j < N; j++) {
		for(WORD_T k = 0; k < N; k++) {
		  for(WORD_T l = 0; l < N; l++) {
			 WORD_T a = i;
			 WORD_T b = j;
			 WORD_T c = k;
			 WORD_T d = l;
			 WORD_T aa = a ^ da;
			 WORD_T bb = b ^ db;
			 WORD_T cc = c ^ dc;
			 WORD_T dd = d ^ dd;
			 gfun(a, b, c, d);
			 gfun(aa, bb, cc, dd);
		  }
		}
	 }
  }
#endif // #if(WORD_SIZE <= 5)
  return prob;
}

/* --- */

/**
 * Return the i-th bit of the output from H
 */
uint32_t heuristic_set_bit(uint32_t a, uint32_t b)
{
  assert(a <= 1);
  assert(b <= 1);
  uint32_t ret = 0;
#if (HEURISTIC_BIT_VAL_ZERO == false)
  ret = (a ^ b);
#endif // #if HEURISTIC_BIT_VAL_ZERO
  return ret;
}

/* --- */

/**
 * In half cases return 0; in half cases return x ^ y
 */
uint32_t heuristic_set_bit(uint32_t x, uint32_t y)
{
  assert(x <= 1);
  assert(y <= 1);
  uint32_t ret = 0;
#if (HEURISTIC_BIT_VAL_ZERO == false)
  //  uint32_t r = 1 + (random() % 100);
  //  if(r < 50) {
  //  if((random() % 2) == 1) {
  ret = (x ^ y);
	 //  }
#endif // #if HEURISTIC_BIT_VAL_ZERO
  return ret;
}

/* --- */

/* The quarter-round */
#if 0
#define G(A, B, C, D)                               \
do                                                  \
{                                                   \
    (A) = H(A, B); (D) ^= (A); (D) = RROT((D), R0); \
    (C) = H(C, D); (B) ^= (C); (B) = RROT((B), R1); \
    (A) = H(A, B); (D) ^= (A); (D) = RROT((D), R2); \
    (C) = H(C, D); (B) ^= (C); (B) = RROT((B), R3); \
} while (0)
#endif

/* --- */

/* 
#if 0 // Siwei trail WORD_SIZE 32
#define NORX_TRAIL_LEN 16
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0xA4480000, 0x04480900, 0xC0000604, 0xA0060D44, 0x0},
{0x24080048, 0x04180340, 0x04000200, 0x20000308, 0x0},
{0x40080010, 0x42080010, 0x86080008, 0x82000884, 0x0},
{0xC0020001, 0x40020E01, 0xC008020A, 0x8A0209C0, 0x0},
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0xA0000900, 0x04480900, 0xC0000604, 0x44000604, 0x0},
{0x20000308, 0x04180340, 0x04000200, 0x00000000, 0x0},
{0x82000000, 0x42080010, 0x86080008, 0x84000008, 0x0},
{0x80000200, 0x40020E01, 0xC008020A, 0xC00A020B, 0x0},
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0xA0000900, 0xA0000900, 0x04000C00, 0x44000604, 0x0},
{0x20000308, 0x28000300, 0x04000200, 0x00000000, 0x0},
{0x82000000, 0x02080000, 0x02080000, 0x84000008, 0x0},
{0x80000200, 0x40080001, 0x00020401, 0xC00A020B, 0x0},
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x40000200, 0xA0000900, 0x04000C00, 0x04040400, 0x0},
{0x08000408, 0x28000300, 0x04000200, 0x04080800, 0x0},
{0x80080000, 0x02080000, 0x02080000, 0x00080408, 0x0},
{0xC0080201, 0x40080001, 0x00020401, 0x000A0002, 0x0},
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x40000200, 0x40301A00, 0x06100C08, 0x000A0002, 0x0},
{0x08000408, 0x08301810, 0x000C0401, 0x04040400, 0x0},
{0x80080000, 0x80080800, 0x00041000, 0x04080800, 0x0},
{0xC0080201, 0x40083201, 0x08180E00, 0x00080408, 0x0},
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00101800, 0x40301A00, 0x06100C08, 0x02001A18, 0x0},
{0x00101408, 0x08301810, 0x000C0401, 0x08041410, 0x0},
{0x00100800, 0x80080800, 0x00041000, 0x00041800, 0x0},
{0x00101400, 0x40083201, 0x08180E00, 0x08001810, 0x0},
}, // T.state[ 5].w =   0
{ // R[ 6] abcde
{0x00101800, 0x00080400, 0x00101A00, 0x02001A18, 0x0},
{0x00101408, 0x00200400, 0x08101811, 0x08041410, 0x0},
{0x00100800, 0x00100000, 0x00080800, 0x00041800, 0x0},
{0x00101400, 0x02280404, 0x00281210, 0x08001810, 0x0},
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00081C00, 0x00080400, 0x00101A00, 0x06180208, 0x0},
{0x00101818, 0x00200400, 0x08101811, 0x0C080814, 0x0},
{0x00001800, 0x00100000, 0x00080800, 0x00000004, 0x0},
{0x06081804, 0x02280404, 0x00281210, 0x00140E08, 0x0},
}, // T.state[ 7].w =   1
// T.w =   6
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00081C00, 0x04080818, 0x00080804, 0x0C080814, 0x0},
{0x00101818, 0x04001010, 0x002C0008, 0x00000004, 0x0},
{0x00001800, 0x0810080A, 0x02080C08, 0x00140E08, 0x0},
{0x06081804, 0x00301008, 0x04280005, 0x06180208, 0x0},
}, // T.state[ 4].w =   1
{ // R[ 9] abcde
{0x0C000418, 0x04080818, 0x00080804, 0x0C00080C, 0x0},
{0x04100808, 0x04001010, 0x002C0008, 0x0C041008, 0x0},
{0x0810000E, 0x0810080A, 0x02080C08, 0x0608040E, 0x0},
{0x0E18080C, 0x00301008, 0x04280005, 0x0408000A, 0x0},
}, // T.state[ 5].w =   0
{ // R[10] abcde
{0x0C000418, 0x00000001, 0x04080018, 0x0C00080C, 0x0},
{0x04100808, 0x00010400, 0x0C201010, 0x0C041008, 0x0},
{0x0810000E, 0x00000202, 0x0800180A, 0x0608040E, 0x0},
{0x0E18080C, 0x01E00002, 0x00300007, 0x0408000A, 0x0},
}, // T.state[ 6].w =   1
{ // R[11] abcde
{0x0C000C09, 0x00000001, 0x04080018, 0x04050000, 0x0},
{0x04111C18, 0x00010400, 0x0C201010, 0x0C100815, 0x0},
{0x08100214, 0x00000202, 0x0800180A, 0x061A0E18, 0x0},
{0x04080812, 0x01E00002, 0x00300007, 0x08180000, 0x0},
}, // T.state[ 7].w =   1
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x0C000C09, 0x1022180A, 0x063A0A02, 0x08180000, 0x0},
{0x04111C18, 0x0C741000, 0x0868000B, 0x04050000, 0x0},
{0x08100214, 0x13100012, 0x00050008, 0x0C100815, 0x0},
{0x04080812, 0x000A0012, 0x08100805, 0x061A0E18, 0x0},
}, // T.state[ 4].w =   1
{ // R[13] abcde
{0x1C220405, 0x1022180A, 0x063A0A02, 0x05143A04, 0x0},
{0x18850C08, 0x0C741000, 0x0868000B, 0x081C800C, 0x0},
{0x0F000206, 0x13100012, 0x00050008, 0x1303100A, 0x0},
{0x0C120800, 0x000A0012, 0x08100805, 0x180A0806, 0x0},
}, // T.state[ 5].w =   0
{ // R[114] abcde
{0x1C220405, 0x01022603, 0x01120002, 0x05143A04, 0x0},
{0x18850C08, 0x00618C12, 0x00148003, 0x081C800C, 0x0},
{0x0F000206, 0x02000202, 0x13001002, 0x1303100A, 0x0},
{0x0C120800, 0x03200002, 0x000A100B, 0x180A0806, 0x0},
}, // T.state[ 6].w =   1
{ // R[15] abcde
{0x05242A06, 0x01022603, 0x01120002, 0x10020030, 0x0},
{0x082D800A, 0x00618C12, 0x00148003, 0x00060031, 0x0},
{0x03000008, 0x02000202, 0x13001002, 0x10021003, 0x0},
{0x09120806, 0x03200002, 0x000A100B, 0x00001118, 0x0},
}, // T.state[ 7].w =   1
};
#endif // #if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1


 */

/* 

Start time: 1471596959766774
[./tests/norx-best-diff-search-tests.cc:3819] Tests, WORD_SIZE  = 8, MASK = FF
[./tests/norx-best-diff-search-tests.cc:3820] Rotations: R0  1 R1  3 R2  5 R3  7
[./tests/norx-best-diff-search-tests.cc:3821] Attack scenario: INIT_N 0 INIT_NK 0 RATE 0 FULL 1 NONE 0
--- Heuristic search parameters:
FIND_ALL_TRAILS 1
SET_TIME_LIMIT 1
TIME_LIMIT_SECONDS 600
BRANCH_FACTOR_PERCENTAGE 75
MAX_TRIES 64 = 0x40
BOUND_DECREASE_STEP 100
[./tests/norx-best-diff-search-tests.cc:3436] Enter test_norx_diff_trail_heuristic_search_time_limit()
[./tests/norx-best-diff-search-tests.cc:2692] Update bound: 100 -> 1
#if 1 // WORD_SIZE 8 nrounds 2 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 3
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   0
// T.w =   1
};
#endif // #if 1 // WORD_SIZE 8 nrounds 2 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

[./tests/norx-best-diff-search-tests.cc:3372] norx_print_bounds_file(): Print bounds for first 2 rounds:
B[ 0]  0
B[ 1]  1

[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 101 -> 4
#if 1 // WORD_SIZE 8 nrounds 3 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 4
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
// T.w =   4
};
#endif // #if 1 // WORD_SIZE 8 nrounds 3 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 4 -> 2
#if 1 // WORD_SIZE 8 nrounds 3 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 4
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
// T.w =   2
};
#endif // #if 1 // WORD_SIZE 8 nrounds 3 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

[./tests/norx-best-diff-search-tests.cc:3372] norx_print_bounds_file(): Print bounds for first 3 rounds:
B[ 0]  0
B[ 1]  1
B[ 2]  2

[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 102 -> 9
#if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 5
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x00000004}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x00000084}, // L[3]
}, // T.state[ 3].w =   3
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000084, 0x00000000}, // L[0]
{0x00000000, 0x00000028, 0x00000084, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000029, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000084, 0x00000000}, // L[3]
}, // T.state[ 4].w =   4
// T.w =   9
};
#endif // #if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 9 -> 8
#if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 5
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x0000000C}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000038, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000001, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000084, 0x00000000}, // L[3]
}, // T.state[ 4].w =   3
// T.w =   8
};
#endif // #if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 8 -> 7
#if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 5
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000020, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000001, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
// T.w =   7
};
#endif // #if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 7 -> 6
#if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 5
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000008, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
// T.w =   6
};
#endif // #if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 6 -> 5
#if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 5
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000008, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
// T.w =   5
};
#endif // #if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 5 -> 4
#if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 5
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000004, 0x00000084}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000084, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000009, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
// T.w =   4
};
#endif // #if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 4 -> 3
#if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 5
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
// T.w =   3
};
#endif // #if 1 // WORD_SIZE 8 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

[./tests/norx-best-diff-search-tests.cc:3372] norx_print_bounds_file(): Print bounds for first 4 rounds:
B[ 0]  0
B[ 1]  1
B[ 2]  2
B[ 3]  3

[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 103 -> 10
#if 1 // WORD_SIZE 8 nrounds 5 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 6
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x00000084}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000084, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000084, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000029, 0x00000000, 0x00000000, 0x0000002B}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x000000B0}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000042, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000084, 0x00000000, 0x00000000}, // L[1]
{0x0000002B, 0x00000029, 0x00000000, 0x00000095, 0x00000000}, // L[2]
{0x000000B0, 0x00000000, 0x00000000, 0x00000058, 0x00000000}, // L[3]
}, // T.state[ 5].w =   5
// T.w =  10
};
#endif // #if 1 // WORD_SIZE 8 nrounds 5 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 10 -> 9
#if 1 // WORD_SIZE 8 nrounds 5 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 6
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000021}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000021, 0x00000021, 0x00000000, 0x00000090, 0x00000000}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000000}, // L[3]
}, // T.state[ 5].w =   4
// T.w =   9
};
#endif // #if 1 // WORD_SIZE 8 nrounds 5 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 9 -> 8
#if 1 // WORD_SIZE 8 nrounds 5 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 6
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000014, 0x000000A1, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000014, 0x00000090, 0x00000084}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000084, 0x00000090, 0x00000000}, // L[3]
}, // T.state[ 2].w =   3
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000084, 0x00000084, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000084, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000042, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
// T.w =   8
};
#endif // #if 1 // WORD_SIZE 8 nrounds 5 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 8 -> 7
#if 1 // WORD_SIZE 8 nrounds 5 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 6
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000019, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000000}, // L[1]
{0x00000009, 0x00000019, 0x00000000, 0x00000084, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   3
// T.w =   7
};
#endif // #if 1 // WORD_SIZE 8 nrounds 5 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 7 -> 4
#if 1 // WORD_SIZE 8 nrounds 5 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 6
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
// T.w =   4
};
#endif // #if 1 // WORD_SIZE 8 nrounds 5 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

[./tests/norx-best-diff-search-tests.cc:3372] norx_print_bounds_file(): Print bounds for first 5 rounds:
B[ 0]  0
B[ 1]  1
B[ 2]  2
B[ 3]  3
B[ 4]  4

[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 104 -> 28
#if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 7
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x000000B0}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x000000B0, 0x00000010, 0x00000000, 0x00000085, 0x0000008F}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000085, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x0000008F, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x0000003F, 0x00000000, 0x00000000, 0x00000049}, // L[2]
{0x000000B0, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x000000C2, 0x00000042}, // L[0]
{0x00000000, 0x00000000, 0x0000008F, 0x00000000, 0x00000085}, // L[1]
{0x00000049, 0x0000003F, 0x00000000, 0x000000A4, 0x000000A4}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000048}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x000000C2, 0x00000000}, // L[0]
{0x00000000, 0x000000B0, 0x00000085, 0x00000000, 0x00000000}, // L[1]
{0x00000049, 0x00000073, 0x000000A4, 0x000000A4, 0x00000000}, // L[2]
{0x00000090, 0x00000009, 0x00000048, 0x00000048, 0x00000000}, // L[3]
}, // T.state[ 6].w =  13
// T.w =  28
};
#endif // #if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 28 -> 15
#if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 7
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000021}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000021, 0x00000021, 0x00000000, 0x00000090, 0x000000B0}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000058}, // L[3]
}, // T.state[ 5].w =   4
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000021, 0x00000032, 0x000000B0, 0x00000090, 0x00000000}, // L[2]
{0x00000090, 0x0000000B, 0x00000058, 0x00000048, 0x00000000}, // L[3]
}, // T.state[ 6].w =   6
// T.w =  15
};
#endif // #if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 15 -> 14
#if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 7
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000020, 0x00000040, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000020, 0x00000020, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000020, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000001}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000001, 0x00000000, 0x00000003}, // L[2]
{0x00000080, 0x00000002, 0x00000000, 0x00000000, 0x00000082}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000003, 0x00000001, 0x00000001, 0x00000081, 0x00000080}, // L[2]
{0x00000082, 0x00000002, 0x00000000, 0x00000041, 0x00000041}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[1]
{0x00000003, 0x00000030, 0x00000080, 0x00000081, 0x00000000}, // L[2]
{0x00000082, 0x00000068, 0x00000041, 0x00000041, 0x00000000}, // L[3]
}, // T.state[ 6].w =   5
// T.w =  14
};
#endif // #if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 14 -> 11
#if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 7
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000090, 0x000000A1, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000090, 0x00000090, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000090, 0x00000090}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000020}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000020, 0x00000000, 0x00000010, 0x00000010}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000048}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000006, 0x00000010, 0x00000010, 0x00000000}, // L[2]
{0x00000090, 0x00000009, 0x00000048, 0x00000048, 0x00000000}, // L[3]
}, // T.state[ 6].w =   3
// T.w =  11
};
#endif // #if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 11 -> 9
#if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 7
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000010, 0x00000020, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000081, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000010, 0x00000010, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000010, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   2
// T.w =   9
};
#endif // #if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 9 -> 8
#if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 7
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000084, 0x00000081, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000084, 0x00000080, 0x00000004}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000004, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000002}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000000}, // L[3]
}, // T.state[ 6].w =   2
// T.w =   8
};
#endif // #if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 8 -> 5
#if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 7
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000080, 0x00000000, 0x00000080, 0x00000081, 0x00000080}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
// T.w =   5
};
#endif // #if 1 // WORD_SIZE 8 nrounds 6 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

[./tests/norx-best-diff-search-tests.cc:3372] norx_print_bounds_file(): Print bounds for first 6 rounds:
B[ 0]  0
B[ 1]  1
B[ 2]  2
B[ 3]  3
B[ 4]  4
B[ 5]  5

[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 105 -> 30
#if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000030}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000030, 0x00000010, 0x00000000, 0x00000081, 0x00000081}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000081, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000023, 0x00000000, 0x00000000, 0x00000061}, // L[1]
{0x00000030, 0x00000000, 0x00000000, 0x00000000, 0x00000010}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000081, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000081, 0x00000000, 0x00000083}, // L[0]
{0x00000061, 0x00000023, 0x00000000, 0x000000B0, 0x000000D0}, // L[1]
{0x00000010, 0x00000000, 0x00000000, 0x00000008, 0x00000008}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000C0, 0x00000040}, // L[3]
}, // T.state[ 5].w =   5
{ // R[ 6] abcde
{0x00000000, 0x00000070, 0x00000083, 0x00000000, 0x00000010}, // L[0]
{0x00000061, 0x0000007E, 0x000000D0, 0x000000B0, 0x00000001}, // L[1]
{0x00000010, 0x00000001, 0x00000008, 0x00000008, 0x00000011}, // L[2]
{0x00000000, 0x00000008, 0x00000040, 0x000000C0, 0x00000008}, // L[3]
}, // T.state[ 6].w =   8
{ // R[ 7] abcde
{0x00000010, 0x00000070, 0x00000083, 0x00000080, 0x00000000}, // L[0]
{0x00000001, 0x0000007E, 0x000000D0, 0x0000008D, 0x00000000}, // L[1]
{0x00000011, 0x00000001, 0x00000008, 0x000000C8, 0x00000000}, // L[2]
{0x00000008, 0x00000008, 0x00000040, 0x00000046, 0x00000000}, // L[3]
}, // T.state[ 7].w =  13
// T.w =  30
};
#endif // #if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 30 -> 29
#if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000080, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000021, 0x00000004, 0x00000000, 0x00000023}, // L[1]
{0x00000010, 0x00000008, 0x00000000, 0x00000000, 0x00000008}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000080, 0x00000002, 0x00000082}, // L[0]
{0x00000023, 0x00000021, 0x00000004, 0x00000091, 0x00000095}, // L[1]
{0x00000008, 0x00000008, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   4
{ // R[ 6] abcde
{0x00000000, 0x00000050, 0x00000082, 0x00000002, 0x00000050}, // L[0]
{0x00000023, 0x00000096, 0x00000095, 0x00000091, 0x00000091}, // L[1]
{0x00000008, 0x00000081, 0x00000004, 0x00000004, 0x00000089}, // L[2]
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[3]
}, // T.state[ 6].w =   8
{ // R[ 7] abcde
{0x00000050, 0x00000050, 0x00000082, 0x00000092, 0x00000000}, // L[0]
{0x00000091, 0x00000096, 0x00000095, 0x00000000, 0x00000000}, // L[1]
{0x00000089, 0x00000081, 0x00000004, 0x0000006C, 0x00000000}, // L[2]
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000000}, // L[3]
}, // T.state[ 7].w =  12
// T.w =  29
};
#endif // #if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 29 -> 28
#if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000080, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000021, 0x00000004, 0x00000000, 0x00000023}, // L[1]
{0x00000010, 0x00000008, 0x00000000, 0x00000000, 0x00000008}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000080, 0x00000002, 0x00000082}, // L[0]
{0x00000023, 0x00000021, 0x00000004, 0x00000091, 0x000000B5}, // L[1]
{0x00000008, 0x00000008, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   4
{ // R[ 6] abcde
{0x00000000, 0x00000050, 0x00000082, 0x00000002, 0x00000070}, // L[0]
{0x00000023, 0x00000092, 0x000000B5, 0x00000091, 0x00000093}, // L[1]
{0x00000008, 0x00000081, 0x00000004, 0x00000004, 0x0000008B}, // L[2]
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000018}, // L[3]
}, // T.state[ 6].w =   8
{ // R[ 7] abcde
{0x00000070, 0x00000050, 0x00000082, 0x00000093, 0x00000000}, // L[0]
{0x00000093, 0x00000092, 0x000000B5, 0x00000010, 0x00000000}, // L[1]
{0x0000008B, 0x00000081, 0x00000004, 0x0000007C, 0x00000000}, // L[2]
{0x00000018, 0x00000008, 0x00000040, 0x000000C2, 0x00000000}, // L[3]
}, // T.state[ 7].w =  11
// T.w =  28
};
#endif // #if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 28 -> 22
#if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000020, 0x00000080, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x00000001, 0x00000000, 0x00000000, 0x00000011}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000020, 0x00000020, 0x00000080, 0x00000010, 0x00000090}, // L[0]
{0x00000011, 0x00000001, 0x00000000, 0x00000088, 0x00000088}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000020, 0x00000016, 0x00000090, 0x00000010, 0x0000001A}, // L[0]
{0x00000011, 0x00000031, 0x00000088, 0x00000088, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x0000001A, 0x00000016, 0x00000090, 0x00000050, 0x00000000}, // L[0]
{0x00000000, 0x00000031, 0x00000088, 0x00000044, 0x00000000}, // L[1]
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   8
// T.w =  22
};
#endif // #if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 22 -> 21
#if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000040, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000040, 0x00000000, 0x00000040}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000018, 0x00000040, 0x00000000, 0x00000008}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000008, 0x00000018, 0x00000040, 0x00000040, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000030, 0x00000000, 0x00000000, 0x00000070}, // L[1]
{0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000008}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000070, 0x00000030, 0x00000000, 0x00000038, 0x00000018}, // L[1]
{0x00000008, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000020, 0x00000020}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000070, 0x00000005, 0x00000018, 0x00000038, 0x00000017}, // L[1]
{0x00000008, 0x00000080, 0x00000004, 0x00000004, 0x00000088}, // L[2]
{0x00000000, 0x00000004, 0x00000020, 0x00000020, 0x00000004}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000017, 0x00000005, 0x00000018, 0x00000079, 0x00000000}, // L[1]
{0x00000088, 0x00000080, 0x00000004, 0x00000064, 0x00000000}, // L[2]
{0x00000004, 0x00000004, 0x00000020, 0x00000021, 0x00000000}, // L[3]
}, // T.state[ 7].w =   8
// T.w =  21
};
#endif // #if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 21 -> 20
#if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000040, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000040, 0x00000000, 0x00000040}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000018, 0x00000040, 0x00000000, 0x00000008}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000008, 0x00000018, 0x00000040, 0x00000040, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000030, 0x00000000, 0x00000000, 0x00000070}, // L[1]
{0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000008}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000070, 0x00000030, 0x00000000, 0x00000038, 0x00000038}, // L[1]
{0x00000008, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000020, 0x00000020}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000070, 0x00000001, 0x00000038, 0x00000038, 0x00000011}, // L[1]
{0x00000008, 0x00000080, 0x00000004, 0x00000004, 0x00000088}, // L[2]
{0x00000000, 0x00000004, 0x00000020, 0x00000020, 0x0000000C}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000011, 0x00000001, 0x00000038, 0x00000049, 0x00000000}, // L[1]
{0x00000088, 0x00000080, 0x00000004, 0x00000064, 0x00000000}, // L[2]
{0x0000000C, 0x00000004, 0x00000020, 0x00000061, 0x00000000}, // L[3]
}, // T.state[ 7].w =   7
// T.w =  20
};
#endif // #if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 20 -> 16
#if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000040, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000040, 0x00000000, 0x000000C0}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x000000C0, 0x00000000, 0x00000008}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000008, 0x00000008, 0x000000C0, 0x00000040, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[1]
{0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000008}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000000, 0x00000008, 0x00000008}, // L[1]
{0x00000008, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000020, 0x00000020}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000003, 0x00000008, 0x00000008, 0x00000035}, // L[1]
{0x00000008, 0x00000080, 0x00000004, 0x00000004, 0x00000088}, // L[2]
{0x00000000, 0x00000004, 0x00000020, 0x00000020, 0x0000000C}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000035, 0x00000003, 0x00000008, 0x000000E9, 0x00000000}, // L[1]
{0x00000088, 0x00000080, 0x00000004, 0x00000064, 0x00000000}, // L[2]
{0x0000000C, 0x00000004, 0x00000020, 0x00000061, 0x00000000}, // L[3]
}, // T.state[ 7].w =   6
// T.w =  16
};
#endif // #if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 16 -> 15
#if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000040, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000040, 0x00000000, 0x000000C0}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x000000C0, 0x00000000, 0x00000008}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000008, 0x00000008, 0x000000C0, 0x00000040, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[1]
{0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000008}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000000, 0x00000008, 0x00000018}, // L[1]
{0x00000008, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000020, 0x00000020}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000001, 0x00000018, 0x00000008, 0x00000011}, // L[1]
{0x00000008, 0x00000080, 0x00000004, 0x00000004, 0x00000088}, // L[2]
{0x00000000, 0x00000004, 0x00000020, 0x00000020, 0x00000004}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000011, 0x00000001, 0x00000018, 0x000000C8, 0x00000000}, // L[1]
{0x00000088, 0x00000080, 0x00000004, 0x00000064, 0x00000000}, // L[2]
{0x00000004, 0x00000004, 0x00000020, 0x00000021, 0x00000000}, // L[3]
}, // T.state[ 7].w =   5
// T.w =  15
};
#endif // #if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 15 -> 13
#if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000010, 0x00000020, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000010, 0x00000010, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000000, 0x00000010, 0x00000010}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000020}, // L[1]
{0x00000010, 0x00000000, 0x00000000, 0x00000000, 0x00000030}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000020, 0x00000020, 0x00000000, 0x00000010, 0x00000030}, // L[1]
{0x00000030, 0x00000000, 0x00000000, 0x00000018, 0x00000008}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000020, 0x00000002, 0x00000030, 0x00000010, 0x00000022}, // L[1]
{0x00000030, 0x00000001, 0x00000008, 0x00000018, 0x00000011}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000022, 0x00000002, 0x00000030, 0x00000091, 0x00000000}, // L[1]
{0x00000011, 0x00000001, 0x00000008, 0x00000048, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   5
// T.w =  13
};
#endif // #if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 13 -> 8
#if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x000000B0, 0x00000020, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x000000B0, 0x00000010, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000010, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   3
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[3]
}, // T.state[ 5].w =   0
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000018, 0x000000C0, 0x00000040, 0x00000008}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000008, 0x00000018, 0x000000C0, 0x00000042, 0x00000000}, // L[3]
}, // T.state[ 7].w =   2
// T.w =   8
};
#endif // #if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 8 -> 7
#if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000060, 0x00000020, 0x00000024, 0x00000008, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000020, 0x00000024, 0x00000004, 0x00000020}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   2
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000020, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000020, 0x00000020, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000020, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000010, 0x00000010}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   0
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000002, 0x00000010, 0x00000010, 0x00000002}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000002, 0x00000002, 0x00000010, 0x00000090, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   1
// T.w =   7
};
#endif // #if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 7 -> 6
#if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000002, 0x00000002, 0x00000042, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000002, 0x00000042, 0x00000040, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000040, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000002, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000001}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   0
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000020, 0x00000001, 0x00000001, 0x00000020}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000020, 0x00000020, 0x00000001, 0x00000009, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   1
// T.w =   6
};
#endif // #if 1 // WORD_SIZE 8 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

[./tests/norx-best-diff-search-tests.cc:3372] norx_print_bounds_file(): Print bounds for first 7 rounds:
B[ 0]  0
B[ 1]  1
B[ 2]  2
B[ 3]  3
B[ 4]  4
B[ 5]  5
B[ 6]  6

[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 106 -> 23
#if 1 // WORD_SIZE 8 nrounds 8 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 9
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000003}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000003, 0x00000001, 0x00000000, 0x00000081, 0x00000081}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000003, 0x00000010, 0x00000081, 0x00000081, 0x00000031}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000031, 0x00000010, 0x00000081, 0x00000085, 0x00000004}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x00000002}, // L[3]
}, // T.state[ 7].w =   6
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000004, 0x00000046, 0x00000000}, // L[0]
{0x00000010, 0x00000028, 0x00000002, 0x00000000, 0x00000000}, // L[1]
{0x00000031, 0x00000014, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000085, 0x00000000}, // L[3]
}, // T.state[ 8].w =   7
// T.w =  23
};
#endif // #if 1 // WORD_SIZE 8 nrounds 8 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 23 -> 19
#if 1 // WORD_SIZE 8 nrounds 8 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 9
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000010, 0x00000020, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000010, 0x00000010, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000010, 0x00000010}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000010, 0x00000010, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000060}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000060, 0x00000020, 0x00000000, 0x00000030, 0x00000030}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000008, 0x00000008}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000060, 0x00000002, 0x00000030, 0x00000030, 0x00000022}, // L[2]
{0x00000010, 0x00000001, 0x00000008, 0x00000008, 0x00000011}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000022, 0x00000002, 0x00000030, 0x00000090, 0x000000A0}, // L[2]
{0x00000011, 0x00000001, 0x00000008, 0x000000C8, 0x00000040}, // L[3]
}, // T.state[ 7].w =   5
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000000, 0x000000A0, 0x000000C8, 0x00000000}, // L[0]
{0x00000000, 0x00000045, 0x00000040, 0x00000000, 0x00000000}, // L[1]
{0x00000022, 0x00000082, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000011, 0x00000000, 0x00000000, 0x00000090, 0x00000000}, // L[3]
}, // T.state[ 8].w =   6
// T.w =  19
};
#endif // #if 1 // WORD_SIZE 8 nrounds 8 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 19 -> 17
#if 1 // WORD_SIZE 8 nrounds 8 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 9
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000010, 0x000000A1, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000010, 0x00000090, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000090, 0x00000080}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000018}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   2
{ // R[ 7] abcde
{0x00000018, 0x00000008, 0x00000040, 0x000000C2, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x00000002}, // L[3]
}, // T.state[ 7].w =   3
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000018, 0x00000000, 0x00000000, 0x00000046, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000002, 0x000000C2, 0x00000000}, // L[1]
{0x00000000, 0x00000014, 0x00000002, 0x00000000, 0x00000000}, // L[2]
{0x00000088, 0x00000014, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 8].w =   6
// T.w =  17
};
#endif // #if 1 // WORD_SIZE 8 nrounds 8 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 17 -> 9
#if 1 // WORD_SIZE 8 nrounds 8 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 9
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000090, 0x00000020, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000090, 0x00000010, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000010, 0x00000000}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   0
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000018}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000018, 0x00000008, 0x00000040, 0x000000C2, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   1
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000018, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x000000C2, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000014, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 8].w =   3
// T.w =   9
};
#endif // #if 1 // WORD_SIZE 8 nrounds 8 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 9 -> 8
#if 1 // WORD_SIZE 8 nrounds 8 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 9
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000020, 0x00000020, 0x00000024, 0x00000008, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000020, 0x00000024, 0x00000004, 0x00000020}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000020, 0x00000004, 0x00000000}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000020, 0x00000020, 0x00000000}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000020, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000010, 0x00000010}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   0
{ // R[ 6] abcde
{0x00000000, 0x00000002, 0x00000010, 0x00000010, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000002, 0x00000002, 0x00000010, 0x00000090, 0x00000080}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   1
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000090, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000005, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 8].w =   2
// T.w =   8
};
#endif // #if 1 // WORD_SIZE 8 nrounds 8 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

[./tests/norx-best-diff-search-tests.cc:3372] norx_print_bounds_file(): Print bounds for first 8 rounds:
B[ 0]  0
B[ 1]  1
B[ 2]  2
B[ 3]  3
B[ 4]  4
B[ 5]  5
B[ 6]  6
B[ 7]  8

[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 108 -> 81
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000030}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000030, 0x00000010, 0x00000080, 0x00000085, 0x00000007}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x0000002E, 0x0000008C, 0x00000000, 0x00000022}, // L[1]
{0x00000030, 0x00000019, 0x00000000, 0x00000000, 0x00000029}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000085, 0x00000080}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000002, 0x00000009}, // L[0]
{0x00000022, 0x0000002E, 0x0000008C, 0x00000011, 0x00000097}, // L[1]
{0x00000029, 0x00000019, 0x00000000, 0x00000094, 0x00000094}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000082, 0x00000082}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000021, 0x00000009, 0x00000002, 0x00000021}, // L[0]
{0x00000022, 0x00000037, 0x00000097, 0x00000011, 0x00000019}, // L[1]
{0x00000029, 0x000000B1, 0x00000094, 0x00000094, 0x00000088}, // L[2]
{0x00000080, 0x00000050, 0x00000082, 0x00000082, 0x00000050}, // L[3]
}, // T.state[ 6].w =  13
{ // R[ 7] abcde
{0x00000021, 0x00000021, 0x00000009, 0x00000019, 0x00000000}, // L[0]
{0x00000019, 0x00000037, 0x00000097, 0x00000040, 0x00000071}, // L[1]
{0x00000088, 0x000000B1, 0x00000094, 0x000000E0, 0x00000094}, // L[2]
{0x00000050, 0x00000050, 0x00000082, 0x00000096, 0x00000018}, // L[3]
}, // T.state[ 7].w =  15
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000021, 0x0000008C, 0x00000094, 0x00000096, 0x000000E5}, // L[0]
{0x00000019, 0x0000004A, 0x00000018, 0x00000019, 0x00000051}, // L[1]
{0x00000088, 0x00000090, 0x00000000, 0x00000040, 0x00000018}, // L[2]
{0x00000050, 0x00000042, 0x00000071, 0x000000E0, 0x00000016}, // L[3]
}, // T.state[ 8].w =  18
{ // R[ 9] abcde
{0x000000E5, 0x0000008C, 0x00000094, 0x000000B9, 0x00000000}, // L[0]
{0x00000051, 0x0000004A, 0x00000018, 0x00000024, 0x00000000}, // L[1]
{0x00000018, 0x00000090, 0x00000000, 0x0000002C, 0x00000000}, // L[2]
{0x00000016, 0x00000042, 0x00000071, 0x0000007B, 0x00000000}, // L[3]
}, // T.state[ 9].w =  16
// T.w =  81
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 81 -> 80
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000030}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000030, 0x00000010, 0x00000080, 0x00000085, 0x00000007}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x0000002E, 0x0000008C, 0x00000000, 0x00000022}, // L[1]
{0x00000030, 0x00000019, 0x00000000, 0x00000000, 0x00000029}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000085, 0x00000080}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000002, 0x00000009}, // L[0]
{0x00000022, 0x0000002E, 0x0000008C, 0x00000011, 0x00000097}, // L[1]
{0x00000029, 0x00000019, 0x00000000, 0x00000094, 0x00000094}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000082, 0x00000082}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000021, 0x00000009, 0x00000002, 0x00000021}, // L[0]
{0x00000022, 0x00000037, 0x00000097, 0x00000011, 0x00000019}, // L[1]
{0x00000029, 0x000000B1, 0x00000094, 0x00000094, 0x00000088}, // L[2]
{0x00000080, 0x00000050, 0x00000082, 0x00000082, 0x00000050}, // L[3]
}, // T.state[ 6].w =  13
{ // R[ 7] abcde
{0x00000021, 0x00000021, 0x00000009, 0x00000019, 0x00000000}, // L[0]
{0x00000019, 0x00000037, 0x00000097, 0x00000040, 0x00000079}, // L[1]
{0x00000088, 0x000000B1, 0x00000094, 0x000000E0, 0x000000B4}, // L[2]
{0x00000050, 0x00000050, 0x00000082, 0x00000096, 0x00000010}, // L[3]
}, // T.state[ 7].w =  15
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000021, 0x0000009C, 0x000000B4, 0x00000096, 0x00000085}, // L[0]
{0x00000019, 0x0000000A, 0x00000010, 0x00000019, 0x00000001}, // L[1]
{0x00000088, 0x00000080, 0x00000000, 0x00000040, 0x00000008}, // L[2]
{0x00000050, 0x00000042, 0x00000079, 0x000000E0, 0x00000012}, // L[3]
}, // T.state[ 8].w =  18
{ // R[ 9] abcde
{0x00000085, 0x0000009C, 0x000000B4, 0x00000089, 0x00000000}, // L[0]
{0x00000001, 0x0000000A, 0x00000010, 0x0000000C, 0x00000000}, // L[1]
{0x00000008, 0x00000080, 0x00000000, 0x00000024, 0x00000000}, // L[2]
{0x00000012, 0x00000042, 0x00000079, 0x00000079, 0x00000000}, // L[3]
}, // T.state[ 9].w =  15
// T.w =  80
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 80 -> 79
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000030}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000030, 0x00000010, 0x00000080, 0x00000085, 0x00000007}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x0000002E, 0x0000008C, 0x00000000, 0x00000022}, // L[1]
{0x00000030, 0x00000019, 0x00000000, 0x00000000, 0x00000029}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000085, 0x00000080}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000002, 0x00000009}, // L[0]
{0x00000022, 0x0000002E, 0x0000008C, 0x00000011, 0x00000097}, // L[1]
{0x00000029, 0x00000019, 0x00000000, 0x00000094, 0x00000094}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000082, 0x00000082}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000021, 0x00000009, 0x00000002, 0x00000021}, // L[0]
{0x00000022, 0x00000037, 0x00000097, 0x00000011, 0x00000019}, // L[1]
{0x00000029, 0x000000B1, 0x00000094, 0x00000094, 0x00000088}, // L[2]
{0x00000080, 0x00000050, 0x00000082, 0x00000082, 0x00000050}, // L[3]
}, // T.state[ 6].w =  13
{ // R[ 7] abcde
{0x00000021, 0x00000021, 0x00000009, 0x00000019, 0x00000000}, // L[0]
{0x00000019, 0x00000037, 0x00000097, 0x00000040, 0x000000F5}, // L[1]
{0x00000088, 0x000000B1, 0x00000094, 0x000000E0, 0x00000034}, // L[2]
{0x00000050, 0x00000050, 0x00000082, 0x00000096, 0x00000018}, // L[3]
}, // T.state[ 7].w =  15
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000021, 0x00000085, 0x00000034, 0x00000096, 0x000000A4}, // L[0]
{0x00000019, 0x0000000B, 0x00000018, 0x00000019, 0x00000000}, // L[1]
{0x00000088, 0x00000090, 0x00000000, 0x00000040, 0x00000018}, // L[2]
{0x00000050, 0x00000042, 0x000000F5, 0x000000E0, 0x00000012}, // L[3]
}, // T.state[ 8].w =  18
{ // R[ 9] abcde
{0x000000A4, 0x00000085, 0x00000034, 0x00000019, 0x00000000}, // L[0]
{0x00000000, 0x0000000B, 0x00000018, 0x0000008C, 0x00000000}, // L[1]
{0x00000018, 0x00000090, 0x00000000, 0x0000002C, 0x00000000}, // L[2]
{0x00000012, 0x00000042, 0x000000F5, 0x00000079, 0x00000000}, // L[3]
}, // T.state[ 9].w =  14
// T.w =  79
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 79 -> 78
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000030}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000030, 0x00000010, 0x00000080, 0x00000085, 0x00000007}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x0000002E, 0x0000008C, 0x00000000, 0x00000022}, // L[1]
{0x00000030, 0x00000019, 0x00000000, 0x00000000, 0x00000029}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000085, 0x00000080}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000002, 0x00000009}, // L[0]
{0x00000022, 0x0000002E, 0x0000008C, 0x00000011, 0x00000097}, // L[1]
{0x00000029, 0x00000019, 0x00000000, 0x00000094, 0x00000094}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000082, 0x00000082}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000021, 0x00000009, 0x00000002, 0x00000021}, // L[0]
{0x00000022, 0x00000037, 0x00000097, 0x00000011, 0x00000015}, // L[1]
{0x00000029, 0x000000B1, 0x00000094, 0x00000094, 0x00000088}, // L[2]
{0x00000080, 0x00000050, 0x00000082, 0x00000082, 0x000000D0}, // L[3]
}, // T.state[ 6].w =  13
{ // R[ 7] abcde
{0x00000021, 0x00000021, 0x00000009, 0x00000019, 0x00000000}, // L[0]
{0x00000015, 0x00000037, 0x00000097, 0x00000020, 0x000000B5}, // L[1]
{0x00000088, 0x000000B1, 0x00000094, 0x000000E0, 0x00000034}, // L[2]
{0x000000D0, 0x00000050, 0x00000082, 0x00000092, 0x00000010}, // L[3]
}, // T.state[ 7].w =  15
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000021, 0x00000005, 0x00000034, 0x00000092, 0x00000024}, // L[0]
{0x00000015, 0x0000000B, 0x00000010, 0x00000019, 0x00000008}, // L[1]
{0x00000088, 0x00000080, 0x00000000, 0x00000020, 0x00000008}, // L[2]
{0x000000D0, 0x00000042, 0x000000B5, 0x000000E0, 0x00000016}, // L[3]
}, // T.state[ 8].w =  17
{ // R[ 9] abcde
{0x00000024, 0x00000005, 0x00000034, 0x0000005B, 0x00000000}, // L[0]
{0x00000008, 0x0000000B, 0x00000010, 0x00000088, 0x00000000}, // L[1]
{0x00000008, 0x00000080, 0x00000000, 0x00000014, 0x00000000}, // L[2]
{0x00000016, 0x00000042, 0x000000B5, 0x0000007B, 0x00000000}, // L[3]
}, // T.state[ 9].w =  14
// T.w =  78
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 78 -> 77
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000030}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000030, 0x00000010, 0x00000080, 0x00000085, 0x00000007}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x0000002E, 0x0000008C, 0x00000000, 0x00000022}, // L[1]
{0x00000030, 0x00000019, 0x00000000, 0x00000000, 0x00000029}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000085, 0x00000080}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000002, 0x00000009}, // L[0]
{0x00000022, 0x0000002E, 0x0000008C, 0x00000011, 0x00000097}, // L[1]
{0x00000029, 0x00000019, 0x00000000, 0x00000094, 0x00000094}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000082, 0x00000082}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000021, 0x00000009, 0x00000002, 0x00000021}, // L[0]
{0x00000022, 0x00000037, 0x00000097, 0x00000011, 0x00000053}, // L[1]
{0x00000029, 0x000000B1, 0x00000094, 0x00000094, 0x00000088}, // L[2]
{0x00000080, 0x00000050, 0x00000082, 0x00000082, 0x00000050}, // L[3]
}, // T.state[ 6].w =  13
{ // R[ 7] abcde
{0x00000021, 0x00000021, 0x00000009, 0x00000019, 0x00000000}, // L[0]
{0x00000053, 0x00000037, 0x00000097, 0x00000012, 0x000000A5}, // L[1]
{0x00000088, 0x000000B1, 0x00000094, 0x000000E0, 0x0000001C}, // L[2]
{0x00000050, 0x00000050, 0x00000082, 0x00000096, 0x00000010}, // L[3]
}, // T.state[ 7].w =  15
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000021, 0x00000025, 0x0000001C, 0x00000096, 0x00000004}, // L[0]
{0x00000053, 0x0000005B, 0x00000010, 0x00000019, 0x00000008}, // L[1]
{0x00000088, 0x00000080, 0x00000000, 0x00000012, 0x00000008}, // L[2]
{0x00000050, 0x00000042, 0x000000A5, 0x000000E0, 0x00000012}, // L[3]
}, // T.state[ 8].w =  17
{ // R[ 9] abcde
{0x00000004, 0x00000025, 0x0000001C, 0x00000049, 0x00000000}, // L[0]
{0x00000008, 0x0000005B, 0x00000010, 0x00000088, 0x00000000}, // L[1]
{0x00000008, 0x00000080, 0x00000000, 0x0000000D, 0x00000000}, // L[2]
{0x00000012, 0x00000042, 0x000000A5, 0x00000079, 0x00000000}, // L[3]
}, // T.state[ 9].w =  13
// T.w =  77
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 77 -> 76
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000030}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000030, 0x00000010, 0x00000080, 0x00000085, 0x00000007}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x0000002E, 0x0000008C, 0x00000000, 0x00000022}, // L[1]
{0x00000030, 0x00000019, 0x00000000, 0x00000000, 0x00000029}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000085, 0x00000080}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000002, 0x00000009}, // L[0]
{0x00000022, 0x0000002E, 0x0000008C, 0x00000011, 0x00000097}, // L[1]
{0x00000029, 0x00000019, 0x00000000, 0x00000094, 0x00000094}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000082, 0x00000082}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000021, 0x00000009, 0x00000002, 0x00000021}, // L[0]
{0x00000022, 0x00000037, 0x00000097, 0x00000011, 0x00000053}, // L[1]
{0x00000029, 0x000000B1, 0x00000094, 0x00000094, 0x00000088}, // L[2]
{0x00000080, 0x00000050, 0x00000082, 0x00000082, 0x00000050}, // L[3]
}, // T.state[ 6].w =  13
{ // R[ 7] abcde
{0x00000021, 0x00000021, 0x00000009, 0x00000019, 0x00000000}, // L[0]
{0x00000053, 0x00000037, 0x00000097, 0x00000012, 0x000000A7}, // L[1]
{0x00000088, 0x000000B1, 0x00000094, 0x000000E0, 0x0000001C}, // L[2]
{0x00000050, 0x00000050, 0x00000082, 0x00000096, 0x00000014}, // L[3]
}, // T.state[ 7].w =  15
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000021, 0x00000021, 0x0000001C, 0x00000096, 0x00000000}, // L[0]
{0x00000053, 0x0000005B, 0x00000014, 0x00000019, 0x0000000C}, // L[1]
{0x00000088, 0x00000088, 0x00000000, 0x00000012, 0x00000000}, // L[2]
{0x00000050, 0x00000042, 0x000000A7, 0x000000E0, 0x00000012}, // L[3]
}, // T.state[ 8].w =  17
{ // R[ 9] abcde
{0x00000000, 0x00000021, 0x0000001C, 0x0000004B, 0x00000000}, // L[0]
{0x0000000C, 0x0000005B, 0x00000014, 0x0000008A, 0x00000000}, // L[1]
{0x00000000, 0x00000088, 0x00000000, 0x00000009, 0x00000000}, // L[2]
{0x00000012, 0x00000042, 0x000000A7, 0x00000079, 0x00000000}, // L[3]
}, // T.state[ 9].w =  12
// T.w =  76
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 76 -> 75
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000030}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000030, 0x00000010, 0x00000080, 0x00000085, 0x00000007}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x0000002E, 0x0000008C, 0x00000000, 0x00000022}, // L[1]
{0x00000030, 0x00000019, 0x00000000, 0x00000000, 0x00000029}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000085, 0x00000080}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000002, 0x00000001}, // L[0]
{0x00000022, 0x0000002E, 0x0000008C, 0x00000011, 0x00000087}, // L[1]
{0x00000029, 0x00000019, 0x00000000, 0x00000094, 0x00000094}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000082, 0x00000086}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000020, 0x00000001, 0x00000002, 0x00000020}, // L[0]
{0x00000022, 0x00000035, 0x00000087, 0x00000011, 0x00000011}, // L[1]
{0x00000029, 0x000000B1, 0x00000094, 0x00000094, 0x00000088}, // L[2]
{0x00000080, 0x000000D0, 0x00000086, 0x00000082, 0x00000050}, // L[3]
}, // T.state[ 6].w =  13
{ // R[ 7] abcde
{0x00000020, 0x00000020, 0x00000001, 0x00000011, 0x00000030}, // L[0]
{0x00000011, 0x00000035, 0x00000087, 0x00000000, 0x00000085}, // L[1]
{0x00000088, 0x000000B1, 0x00000094, 0x000000E0, 0x00000014}, // L[2]
{0x00000050, 0x000000D0, 0x00000086, 0x00000096, 0x00000010}, // L[3]
}, // T.state[ 7].w =  14
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000020, 0x00000061, 0x00000014, 0x00000096, 0x00000001}, // L[0]
{0x00000011, 0x0000004B, 0x00000010, 0x00000011, 0x0000004C}, // L[1]
{0x00000088, 0x00000081, 0x00000030, 0x00000000, 0x00000009}, // L[2]
{0x00000050, 0x00000020, 0x00000085, 0x000000E0, 0x00000010}, // L[3]
}, // T.state[ 8].w =  15
{ // R[ 9] abcde
{0x00000001, 0x00000061, 0x00000014, 0x000000CB, 0x00000000}, // L[0]
{0x0000004C, 0x0000004B, 0x00000010, 0x000000AE, 0x00000000}, // L[1]
{0x00000009, 0x00000081, 0x00000030, 0x00000084, 0x00000000}, // L[2]
{0x00000010, 0x00000020, 0x00000085, 0x00000078, 0x00000000}, // L[3]
}, // T.state[ 9].w =  14
// T.w =  75
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 75 -> 74
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000030}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000030, 0x00000010, 0x00000080, 0x00000085, 0x00000007}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x0000002E, 0x0000008C, 0x00000000, 0x00000022}, // L[1]
{0x00000030, 0x00000019, 0x00000000, 0x00000000, 0x00000029}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000085, 0x00000080}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000007, 0x00000002, 0x00000001}, // L[0]
{0x00000022, 0x0000002E, 0x0000008C, 0x00000011, 0x00000087}, // L[1]
{0x00000029, 0x00000019, 0x00000000, 0x00000094, 0x00000094}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000082, 0x00000086}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000020, 0x00000001, 0x00000002, 0x00000020}, // L[0]
{0x00000022, 0x00000035, 0x00000087, 0x00000011, 0x00000011}, // L[1]
{0x00000029, 0x000000B1, 0x00000094, 0x00000094, 0x00000088}, // L[2]
{0x00000080, 0x000000D0, 0x00000086, 0x00000082, 0x00000050}, // L[3]
}, // T.state[ 6].w =  13
{ // R[ 7] abcde
{0x00000020, 0x00000020, 0x00000001, 0x00000011, 0x00000010}, // L[0]
{0x00000011, 0x00000035, 0x00000087, 0x00000000, 0x00000085}, // L[1]
{0x00000088, 0x000000B1, 0x00000094, 0x000000E0, 0x0000003C}, // L[2]
{0x00000050, 0x000000D0, 0x00000086, 0x00000096, 0x00000014}, // L[3]
}, // T.state[ 7].w =  14
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000020, 0x00000061, 0x0000003C, 0x00000096, 0x00000001}, // L[0]
{0x00000011, 0x0000001B, 0x00000014, 0x00000011, 0x00000008}, // L[1]
{0x00000088, 0x00000089, 0x00000010, 0x00000000, 0x00000001}, // L[2]
{0x00000050, 0x00000060, 0x00000085, 0x000000E0, 0x00000010}, // L[3]
}, // T.state[ 8].w =  15
{ // R[ 9] abcde
{0x00000001, 0x00000061, 0x0000003C, 0x000000CB, 0x00000000}, // L[0]
{0x00000008, 0x0000001B, 0x00000014, 0x0000008C, 0x00000000}, // L[1]
{0x00000001, 0x00000089, 0x00000010, 0x00000080, 0x00000000}, // L[2]
{0x00000010, 0x00000060, 0x00000085, 0x00000078, 0x00000000}, // L[3]
}, // T.state[ 9].w =  13
// T.w =  74
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 74 -> 73
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000000, 0x00000040, 0x00000040}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000018, 0x00000040, 0x00000040, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000088, 0x00000018, 0x00000040, 0x00000046, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   3
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000A, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000024, 0x00000000, 0x00000000, 0x0000006C}, // L[1]
{0x00000088, 0x00000000, 0x00000000, 0x00000000, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000046, 0x00000000}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000A, 0x00000000, 0x0000000A}, // L[0]
{0x0000006C, 0x00000024, 0x00000000, 0x00000036, 0x00000012}, // L[1]
{0x00000088, 0x00000000, 0x00000000, 0x00000044, 0x000000CC}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000023, 0x00000021}, // L[3]
}, // T.state[ 5].w =   4
{ // R[ 6] abcde
{0x00000000, 0x00000041, 0x0000000A, 0x00000000, 0x00000041}, // L[0]
{0x0000006C, 0x000000C6, 0x00000012, 0x00000036, 0x00000022}, // L[1]
{0x00000088, 0x00000099, 0x000000CC, 0x00000044, 0x00000011}, // L[2]
{0x00000000, 0x00000024, 0x00000021, 0x00000023, 0x00000024}, // L[3]
}, // T.state[ 6].w =  11
{ // R[ 7] abcde
{0x00000041, 0x00000041, 0x0000000A, 0x0000000A, 0x00000000}, // L[0]
{0x00000022, 0x000000C6, 0x00000012, 0x000000A0, 0x000000D2}, // L[1]
{0x00000011, 0x00000099, 0x000000CC, 0x000000AA, 0x000000A2}, // L[2]
{0x00000024, 0x00000024, 0x00000021, 0x00000038, 0x00000059}, // L[3]
}, // T.state[ 7].w =  14
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000041, 0x00000028, 0x000000A2, 0x00000038, 0x00000029}, // L[0]
{0x00000022, 0x00000076, 0x00000059, 0x0000000A, 0x00000010}, // L[1]
{0x00000011, 0x000000FA, 0x00000000, 0x000000A0, 0x0000000D}, // L[2]
{0x00000024, 0x00000082, 0x000000D2, 0x000000AA, 0x000000A6}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000029, 0x00000028, 0x000000A2, 0x00000088, 0x00000000}, // L[0]
{0x00000010, 0x00000076, 0x00000059, 0x0000000D, 0x00000000}, // L[1]
{0x0000000D, 0x000000FA, 0x00000000, 0x000000D6, 0x00000000}, // L[2]
{0x000000A6, 0x00000082, 0x000000D2, 0x00000006, 0x00000000}, // L[3]
}, // T.state[ 9].w =  20
// T.w =  73
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 73 -> 72
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000000, 0x00000040, 0x00000040}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000018, 0x00000040, 0x00000040, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000088, 0x00000018, 0x00000040, 0x00000046, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   3
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000A, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000024, 0x00000000, 0x00000000, 0x0000006C}, // L[1]
{0x00000088, 0x00000000, 0x00000000, 0x00000000, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000046, 0x00000000}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000A, 0x00000000, 0x0000000A}, // L[0]
{0x0000006C, 0x00000024, 0x00000000, 0x00000036, 0x00000012}, // L[1]
{0x00000088, 0x00000000, 0x00000000, 0x00000044, 0x000000CC}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000023, 0x00000021}, // L[3]
}, // T.state[ 5].w =   4
{ // R[ 6] abcde
{0x00000000, 0x00000041, 0x0000000A, 0x00000000, 0x00000041}, // L[0]
{0x0000006C, 0x000000C6, 0x00000012, 0x00000036, 0x00000022}, // L[1]
{0x00000088, 0x00000099, 0x000000CC, 0x00000044, 0x00000011}, // L[2]
{0x00000000, 0x00000024, 0x00000021, 0x00000023, 0x00000024}, // L[3]
}, // T.state[ 6].w =  11
{ // R[ 7] abcde
{0x00000041, 0x00000041, 0x0000000A, 0x0000000A, 0x00000000}, // L[0]
{0x00000022, 0x000000C6, 0x00000012, 0x000000A0, 0x000000B2}, // L[1]
{0x00000011, 0x00000099, 0x000000CC, 0x000000AA, 0x000000A2}, // L[2]
{0x00000024, 0x00000024, 0x00000021, 0x00000038, 0x00000039}, // L[3]
}, // T.state[ 7].w =  14
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000041, 0x000000E8, 0x000000A2, 0x00000038, 0x00000039}, // L[0]
{0x00000022, 0x00000076, 0x00000039, 0x0000000A, 0x00000010}, // L[1]
{0x00000011, 0x0000003A, 0x00000000, 0x000000A0, 0x0000000F}, // L[2]
{0x00000024, 0x00000082, 0x000000B2, 0x000000AA, 0x000000A6}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000039, 0x000000E8, 0x000000A2, 0x00000080, 0x00000000}, // L[0]
{0x00000010, 0x00000076, 0x00000039, 0x0000000D, 0x00000000}, // L[1]
{0x0000000F, 0x0000003A, 0x00000000, 0x000000D7, 0x00000000}, // L[2]
{0x000000A6, 0x00000082, 0x000000B2, 0x00000006, 0x00000000}, // L[3]
}, // T.state[ 9].w =  19
// T.w =  72
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 72 -> 71
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000000, 0x00000040, 0x00000040}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000018, 0x00000040, 0x00000040, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000088, 0x00000018, 0x00000040, 0x00000046, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   3
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000A, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000024, 0x00000000, 0x00000000, 0x0000006C}, // L[1]
{0x00000088, 0x00000000, 0x00000000, 0x00000000, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000046, 0x00000000}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000A, 0x00000000, 0x0000000A}, // L[0]
{0x0000006C, 0x00000024, 0x00000000, 0x00000036, 0x00000012}, // L[1]
{0x00000088, 0x00000000, 0x00000000, 0x00000044, 0x000000CC}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000023, 0x00000021}, // L[3]
}, // T.state[ 5].w =   4
{ // R[ 6] abcde
{0x00000000, 0x00000041, 0x0000000A, 0x00000000, 0x00000041}, // L[0]
{0x0000006C, 0x000000C6, 0x00000012, 0x00000036, 0x00000022}, // L[1]
{0x00000088, 0x00000099, 0x000000CC, 0x00000044, 0x00000011}, // L[2]
{0x00000000, 0x00000024, 0x00000021, 0x00000023, 0x00000024}, // L[3]
}, // T.state[ 6].w =  11
{ // R[ 7] abcde
{0x00000041, 0x00000041, 0x0000000A, 0x0000000A, 0x00000000}, // L[0]
{0x00000022, 0x000000C6, 0x00000012, 0x000000A0, 0x000000D2}, // L[1]
{0x00000011, 0x00000099, 0x000000CC, 0x000000AA, 0x00000032}, // L[2]
{0x00000024, 0x00000024, 0x00000021, 0x00000038, 0x00000029}, // L[3]
}, // T.state[ 7].w =  14
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000041, 0x00000028, 0x00000032, 0x00000038, 0x00000029}, // L[0]
{0x00000022, 0x00000057, 0x00000029, 0x0000000A, 0x00000019}, // L[1]
{0x00000011, 0x0000001A, 0x00000000, 0x000000A0, 0x00000009}, // L[2]
{0x00000024, 0x00000082, 0x000000D2, 0x000000AA, 0x000000A2}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000029, 0x00000028, 0x00000032, 0x00000088, 0x00000000}, // L[0]
{0x00000019, 0x00000057, 0x00000029, 0x00000089, 0x00000000}, // L[1]
{0x00000009, 0x0000001A, 0x00000000, 0x000000D4, 0x00000000}, // L[2]
{0x000000A2, 0x00000082, 0x000000D2, 0x00000004, 0x00000000}, // L[3]
}, // T.state[ 9].w =  18
// T.w =  71
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 71 -> 70
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000000, 0x00000040, 0x00000040}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000018, 0x00000040, 0x00000040, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000088, 0x00000018, 0x00000040, 0x00000046, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   3
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000A, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000024, 0x00000000, 0x00000000, 0x0000006C}, // L[1]
{0x00000088, 0x00000000, 0x00000000, 0x00000000, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000046, 0x00000000}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000A, 0x00000000, 0x0000000A}, // L[0]
{0x0000006C, 0x00000024, 0x00000000, 0x00000036, 0x00000012}, // L[1]
{0x00000088, 0x00000000, 0x00000000, 0x00000044, 0x000000CC}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000023, 0x00000021}, // L[3]
}, // T.state[ 5].w =   4
{ // R[ 6] abcde
{0x00000000, 0x00000041, 0x0000000A, 0x00000000, 0x00000041}, // L[0]
{0x0000006C, 0x000000C6, 0x00000012, 0x00000036, 0x00000022}, // L[1]
{0x00000088, 0x00000099, 0x000000CC, 0x00000044, 0x00000011}, // L[2]
{0x00000000, 0x00000024, 0x00000021, 0x00000023, 0x00000024}, // L[3]
}, // T.state[ 6].w =  11
{ // R[ 7] abcde
{0x00000041, 0x00000041, 0x0000000A, 0x0000000A, 0x00000010}, // L[0]
{0x00000022, 0x000000C6, 0x00000012, 0x000000A0, 0x000000D2}, // L[1]
{0x00000011, 0x00000099, 0x000000CC, 0x000000AA, 0x000000B2}, // L[2]
{0x00000024, 0x00000024, 0x00000021, 0x00000038, 0x00000029}, // L[3]
}, // T.state[ 7].w =  14
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000041, 0x00000028, 0x000000B2, 0x00000038, 0x00000079}, // L[0]
{0x00000022, 0x00000056, 0x00000029, 0x0000000A, 0x00000010}, // L[1]
{0x00000011, 0x0000001A, 0x00000010, 0x000000A0, 0x00000019}, // L[2]
{0x00000024, 0x000000A2, 0x000000D2, 0x000000AA, 0x000000CA}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000079, 0x00000028, 0x000000B2, 0x000000A0, 0x00000000}, // L[0]
{0x00000010, 0x00000056, 0x00000029, 0x0000000D, 0x00000000}, // L[1]
{0x00000019, 0x0000001A, 0x00000010, 0x000000DC, 0x00000000}, // L[2]
{0x000000CA, 0x000000A2, 0x000000D2, 0x00000030, 0x00000000}, // L[3]
}, // T.state[ 9].w =  17
// T.w =  70
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 70 -> 69
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000000, 0x00000040, 0x00000040}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000018, 0x00000040, 0x00000040, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000088, 0x00000018, 0x00000040, 0x00000046, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   3
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000A, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000024, 0x00000000, 0x00000000, 0x0000006C}, // L[1]
{0x00000088, 0x00000000, 0x00000000, 0x00000000, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000046, 0x00000000}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000A, 0x00000000, 0x0000000A}, // L[0]
{0x0000006C, 0x00000024, 0x00000000, 0x00000036, 0x00000012}, // L[1]
{0x00000088, 0x00000000, 0x00000000, 0x00000044, 0x000000CC}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000023, 0x00000021}, // L[3]
}, // T.state[ 5].w =   4
{ // R[ 6] abcde
{0x00000000, 0x00000041, 0x0000000A, 0x00000000, 0x00000041}, // L[0]
{0x0000006C, 0x000000C6, 0x00000012, 0x00000036, 0x00000022}, // L[1]
{0x00000088, 0x00000099, 0x000000CC, 0x00000044, 0x00000011}, // L[2]
{0x00000000, 0x00000024, 0x00000021, 0x00000023, 0x00000024}, // L[3]
}, // T.state[ 6].w =  11
{ // R[ 7] abcde
{0x00000041, 0x00000041, 0x0000000A, 0x0000000A, 0x00000000}, // L[0]
{0x00000022, 0x000000C6, 0x00000012, 0x000000A0, 0x000000F2}, // L[1]
{0x00000011, 0x00000099, 0x000000CC, 0x000000AA, 0x000000BA}, // L[2]
{0x00000024, 0x00000024, 0x00000021, 0x00000038, 0x00000029}, // L[3]
}, // T.state[ 7].w =  14
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000041, 0x00000068, 0x000000BA, 0x00000038, 0x00000029}, // L[0]
{0x00000022, 0x00000046, 0x00000029, 0x0000000A, 0x00000020}, // L[1]
{0x00000011, 0x0000001A, 0x00000000, 0x000000A0, 0x00000019}, // L[2]
{0x00000024, 0x00000082, 0x000000F2, 0x000000AA, 0x000000A6}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000029, 0x00000068, 0x000000BA, 0x00000088, 0x00000000}, // L[0]
{0x00000020, 0x00000046, 0x00000029, 0x00000015, 0x00000000}, // L[1]
{0x00000019, 0x0000001A, 0x00000000, 0x000000DC, 0x00000000}, // L[2]
{0x000000A6, 0x00000082, 0x000000F2, 0x00000006, 0x00000000}, // L[3]
}, // T.state[ 9].w =  16
// T.w =  69
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 69 -> 46
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000063}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000063, 0x00000021, 0x00000000, 0x000000B1, 0x000000D1}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000008, 0x00000008}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000063, 0x0000001E, 0x000000D1, 0x000000B1, 0x00000001}, // L[2]
{0x00000010, 0x00000001, 0x00000008, 0x00000008, 0x00000011}, // L[3]
}, // T.state[ 6].w =   7
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000002}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000001, 0x0000001E, 0x000000D1, 0x00000085, 0x0000005C}, // L[2]
{0x00000011, 0x00000001, 0x00000008, 0x000000C8, 0x000000C0}, // L[3]
}, // T.state[ 7].w =  11
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000020, 0x0000005C, 0x000000C8, 0x00000068}, // L[0]
{0x00000010, 0x00000084, 0x000000C0, 0x00000042, 0x00000094}, // L[1]
{0x00000001, 0x00000083, 0x00000002, 0x00000080, 0x00000080}, // L[2]
{0x00000011, 0x00000014, 0x00000000, 0x00000085, 0x00000005}, // L[3]
}, // T.state[ 8].w =  11
{ // R[ 9] abcde
{0x00000068, 0x00000020, 0x0000005C, 0x00000050, 0x00000000}, // L[0]
{0x00000094, 0x00000084, 0x000000C0, 0x0000006B, 0x00000000}, // L[1]
{0x00000080, 0x00000083, 0x00000002, 0x00000000, 0x00000000}, // L[2]
{0x00000005, 0x00000014, 0x00000000, 0x00000040, 0x00000000}, // L[3]
}, // T.state[ 9].w =  11
// T.w =  46
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 46 -> 45
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000019, 0x00000000, 0x00000000, 0x00000019}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000084}, // L[1]
{0x00000019, 0x00000019, 0x00000000, 0x0000008C, 0x0000009C}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000090, 0x00000084, 0x00000000, 0x00000090}, // L[1]
{0x00000019, 0x000000B0, 0x0000009C, 0x0000008C, 0x000000C9}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   7
{ // R[ 7] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000012, 0x00000010}, // L[0]
{0x00000090, 0x00000090, 0x00000084, 0x00000084, 0x00000000}, // L[1]
{0x000000C9, 0x000000B0, 0x0000009C, 0x0000002A, 0x000000BE}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   8
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000021, 0x000000BE, 0x00000000, 0x00000021}, // L[0]
{0x00000090, 0x0000001C, 0x00000000, 0x00000012, 0x00000094}, // L[1]
{0x000000C9, 0x00000000, 0x00000010, 0x00000084, 0x00000049}, // L[2]
{0x00000000, 0x000000A0, 0x00000000, 0x0000002A, 0x000000A0}, // L[3]
}, // T.state[ 8].w =  10
{ // R[ 9] abcde
{0x00000021, 0x00000021, 0x000000BE, 0x00000090, 0x00000000}, // L[0]
{0x00000094, 0x0000001C, 0x00000000, 0x00000043, 0x00000000}, // L[1]
{0x00000049, 0x00000000, 0x00000010, 0x000000E6, 0x00000000}, // L[2]
{0x000000A0, 0x000000A0, 0x00000000, 0x00000045, 0x00000000}, // L[3]
}, // T.state[ 9].w =  13
// T.w =  45
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 45 -> 44
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000004, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x0000000C, 0x00000000, 0x00000001}, // L[2]
{0x00000000, 0x00000018, 0x00000000, 0x00000000, 0x00000018}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000002, 0x00000082}, // L[1]
{0x00000001, 0x00000001, 0x0000000C, 0x00000080, 0x00000084}, // L[2]
{0x00000018, 0x00000018, 0x00000000, 0x0000000C, 0x0000001C}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000050, 0x00000082, 0x00000002, 0x00000050}, // L[1]
{0x00000001, 0x000000B0, 0x00000084, 0x00000080, 0x00000091}, // L[2]
{0x00000018, 0x00000080, 0x0000001C, 0x0000000C, 0x00000088}, // L[3]
}, // T.state[ 6].w =   7
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000050, 0x00000050, 0x00000082, 0x00000092, 0x00000010}, // L[1]
{0x00000091, 0x000000B0, 0x00000084, 0x00000088, 0x0000001C}, // L[2]
{0x00000088, 0x00000080, 0x0000001C, 0x00000024, 0x00000048}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000080, 0x0000001C, 0x00000024, 0x00000080}, // L[0]
{0x00000050, 0x00000059, 0x00000048, 0x00000000, 0x00000009}, // L[1]
{0x00000091, 0x00000091, 0x00000000, 0x00000092, 0x00000000}, // L[2]
{0x00000088, 0x00000000, 0x00000010, 0x00000088, 0x00000088}, // L[3]
}, // T.state[ 8].w =  10
{ // R[ 9] abcde
{0x00000080, 0x00000080, 0x0000001C, 0x00000052, 0x00000000}, // L[0]
{0x00000009, 0x00000059, 0x00000048, 0x00000084, 0x00000000}, // L[1]
{0x00000000, 0x00000091, 0x00000000, 0x00000049, 0x00000000}, // L[2]
{0x00000088, 0x00000000, 0x00000010, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 9].w =  10
// T.w =  44
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 44 -> 43
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000040, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000040, 0x00000000, 0x00000040}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000018, 0x00000040, 0x00000000, 0x00000008}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000008, 0x00000018, 0x00000040, 0x00000040, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000030, 0x00000000, 0x00000000, 0x00000010}, // L[2]
{0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000008}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000020, 0x00000020}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000030, 0x00000000, 0x00000008, 0x00000018}, // L[2]
{0x00000008, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000004, 0x00000020, 0x00000020, 0x0000000C}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000005, 0x00000018, 0x00000008, 0x00000015}, // L[2]
{0x00000008, 0x00000081, 0x0000000C, 0x00000004, 0x0000008B}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x0000000C, 0x00000004, 0x00000020, 0x00000061, 0x00000001}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000015, 0x00000005, 0x00000018, 0x000000E8, 0x00000000}, // L[2]
{0x0000008B, 0x00000081, 0x0000000C, 0x0000007C, 0x00000000}, // L[3]
}, // T.state[ 7].w =   7
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x0000000C, 0x00000000, 0x00000000, 0x0000007C, 0x00000004}, // L[0]
{0x00000000, 0x0000000A, 0x00000000, 0x00000061, 0x0000000A}, // L[1]
{0x00000015, 0x00000003, 0x00000001, 0x00000000, 0x00000010}, // L[2]
{0x0000008B, 0x0000000A, 0x00000000, 0x000000E8, 0x00000081}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000004, 0x00000000, 0x00000000, 0x0000003C, 0x00000000}, // L[0]
{0x0000000A, 0x0000000A, 0x00000000, 0x000000B5, 0x00000000}, // L[1]
{0x00000010, 0x00000003, 0x00000001, 0x00000008, 0x00000000}, // L[2]
{0x00000081, 0x0000000A, 0x00000000, 0x000000B4, 0x00000000}, // L[3]
}, // T.state[ 9].w =  12
// T.w =  43
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 43 -> 41
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000040, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000040, 0x00000000, 0x00000040}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000018, 0x00000040, 0x00000000, 0x00000008}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000008, 0x00000018, 0x00000040, 0x00000040, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000030, 0x00000000, 0x00000000, 0x00000070}, // L[2]
{0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000008}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000020, 0x00000020}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000070, 0x00000030, 0x00000000, 0x00000038, 0x00000048}, // L[2]
{0x00000008, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000004, 0x00000020, 0x00000020, 0x00000004}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000070, 0x0000000F, 0x00000048, 0x00000038, 0x00000001}, // L[2]
{0x00000008, 0x00000080, 0x00000004, 0x00000004, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000004, 0x00000004, 0x00000020, 0x00000021, 0x00000041}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000001, 0x0000000F, 0x00000048, 0x000000C9, 0x00000001}, // L[2]
{0x00000088, 0x00000080, 0x00000004, 0x00000064, 0x00000020}, // L[3]
}, // T.state[ 7].w =  10
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000004, 0x00000000, 0x00000001, 0x00000064, 0x00000004}, // L[0]
{0x00000000, 0x0000001C, 0x00000020, 0x00000021, 0x00000004}, // L[1]
{0x00000001, 0x00000041, 0x00000041, 0x00000000, 0x00000042}, // L[2]
{0x00000088, 0x0000008A, 0x00000000, 0x000000C9, 0x00000002}, // L[3]
}, // T.state[ 8].w =   9
{ // R[ 9] abcde
{0x00000004, 0x00000000, 0x00000001, 0x00000030, 0x00000000}, // L[0]
{0x00000004, 0x0000001C, 0x00000020, 0x00000092, 0x00000000}, // L[1]
{0x00000042, 0x00000041, 0x00000041, 0x00000021, 0x00000000}, // L[2]
{0x00000002, 0x0000008A, 0x00000000, 0x000000E5, 0x00000000}, // L[3]
}, // T.state[ 9].w =   9
// T.w =  41
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 41 -> 40
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000040, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000040, 0x00000000, 0x00000040}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000018, 0x00000040, 0x00000000, 0x00000008}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000008, 0x00000018, 0x00000040, 0x00000040, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000030, 0x00000000, 0x00000000, 0x00000070}, // L[2]
{0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000008}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000020, 0x00000020}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000070, 0x00000030, 0x00000000, 0x00000038, 0x00000048}, // L[2]
{0x00000008, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000004, 0x00000020, 0x00000020, 0x00000004}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000070, 0x0000000F, 0x00000048, 0x00000038, 0x00000001}, // L[2]
{0x00000008, 0x00000080, 0x00000004, 0x00000004, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000004, 0x00000004, 0x00000020, 0x00000021, 0x00000001}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000001, 0x0000000F, 0x00000048, 0x000000C9, 0x00000003}, // L[2]
{0x00000088, 0x00000080, 0x00000004, 0x00000064, 0x00000020}, // L[3]
}, // T.state[ 7].w =  10
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000004, 0x00000000, 0x00000003, 0x00000064, 0x00000004}, // L[0]
{0x00000000, 0x00000018, 0x00000020, 0x00000021, 0x00000028}, // L[1]
{0x00000001, 0x00000041, 0x00000001, 0x00000000, 0x00000042}, // L[2]
{0x00000088, 0x0000000A, 0x00000000, 0x000000C9, 0x00000082}, // L[3]
}, // T.state[ 8].w =   9
{ // R[ 9] abcde
{0x00000004, 0x00000000, 0x00000003, 0x00000030, 0x00000000}, // L[0]
{0x00000028, 0x00000018, 0x00000020, 0x00000084, 0x00000000}, // L[1]
{0x00000042, 0x00000041, 0x00000001, 0x00000021, 0x00000000}, // L[2]
{0x00000082, 0x0000000A, 0x00000000, 0x000000A5, 0x00000000}, // L[3]
}, // T.state[ 9].w =   8
// T.w =  40
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 40 -> 39
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000040, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000040, 0x00000000, 0x00000040}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000018, 0x00000040, 0x00000000, 0x00000008}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000008, 0x00000018, 0x00000040, 0x00000040, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000030, 0x00000000, 0x00000000, 0x00000070}, // L[2]
{0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000008}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000020, 0x00000020}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000070, 0x00000030, 0x00000000, 0x00000038, 0x00000028}, // L[2]
{0x00000008, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000004, 0x00000020, 0x00000020, 0x00000004}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000070, 0x00000003, 0x00000028, 0x00000038, 0x00000055}, // L[2]
{0x00000008, 0x00000080, 0x00000004, 0x00000004, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000004, 0x00000004, 0x00000020, 0x00000021, 0x00000001}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000055, 0x00000003, 0x00000028, 0x0000006B, 0x00000003}, // L[2]
{0x00000088, 0x00000080, 0x00000004, 0x00000064, 0x00000020}, // L[3]
}, // T.state[ 7].w =   8
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000004, 0x00000000, 0x00000003, 0x00000064, 0x00000004}, // L[0]
{0x00000000, 0x00000000, 0x00000020, 0x00000021, 0x00000000}, // L[1]
{0x00000055, 0x00000041, 0x00000001, 0x00000000, 0x00000034}, // L[2]
{0x00000088, 0x0000000A, 0x00000000, 0x0000006B, 0x00000082}, // L[3]
}, // T.state[ 8].w =  10
{ // R[ 9] abcde
{0x00000004, 0x00000000, 0x00000003, 0x00000030, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000020, 0x00000090, 0x00000000}, // L[1]
{0x00000034, 0x00000041, 0x00000001, 0x0000001A, 0x00000000}, // L[2]
{0x00000082, 0x0000000A, 0x00000000, 0x000000F4, 0x00000000}, // L[3]
}, // T.state[ 9].w =   8
// T.w =  39
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 39 -> 37
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000040, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000040, 0x00000000, 0x000000C0}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000008, 0x000000C0, 0x00000000, 0x00000008}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000008, 0x00000008, 0x000000C0, 0x00000040, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000030}, // L[2]
{0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000008}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000020, 0x00000020}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000030, 0x00000010, 0x00000000, 0x00000018, 0x00000008}, // L[2]
{0x00000008, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000004, 0x00000020, 0x00000020, 0x00000004}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000030, 0x00000003, 0x00000008, 0x00000018, 0x00000031}, // L[2]
{0x00000008, 0x00000080, 0x00000004, 0x00000004, 0x00000088}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000004, 0x00000004, 0x00000020, 0x00000021, 0x00000001}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000031, 0x00000003, 0x00000008, 0x00000049, 0x000000C1}, // L[2]
{0x00000088, 0x00000080, 0x00000004, 0x00000064, 0x00000020}, // L[3]
}, // T.state[ 7].w =   7
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000004, 0x00000000, 0x000000C1, 0x00000064, 0x00000004}, // L[0]
{0x00000000, 0x00000085, 0x00000020, 0x00000021, 0x00000085}, // L[1]
{0x00000031, 0x00000041, 0x00000001, 0x00000000, 0x00000010}, // L[2]
{0x00000088, 0x0000000A, 0x00000000, 0x00000049, 0x00000082}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x00000004, 0x00000000, 0x000000C1, 0x00000030, 0x00000000}, // L[0]
{0x00000085, 0x00000085, 0x00000020, 0x00000052, 0x00000000}, // L[1]
{0x00000010, 0x00000041, 0x00000001, 0x00000008, 0x00000000}, // L[2]
{0x00000082, 0x0000000A, 0x00000000, 0x000000E5, 0x00000000}, // L[3]
}, // T.state[ 9].w =  11
// T.w =  37
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 37 -> 36
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000040, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000040, 0x00000000, 0x000000C0}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000008, 0x000000C0, 0x00000000, 0x00000008}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000008, 0x00000008, 0x000000C0, 0x00000040, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000030}, // L[2]
{0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000008}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000020, 0x00000020}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000030, 0x00000010, 0x00000000, 0x00000018, 0x00000008}, // L[2]
{0x00000008, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000004, 0x00000020, 0x00000020, 0x00000004}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000030, 0x00000003, 0x00000008, 0x00000018, 0x00000031}, // L[2]
{0x00000008, 0x00000080, 0x00000004, 0x00000004, 0x00000088}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000004, 0x00000004, 0x00000020, 0x00000021, 0x00000041}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000031, 0x00000003, 0x00000008, 0x00000049, 0x00000041}, // L[2]
{0x00000088, 0x00000080, 0x00000004, 0x00000064, 0x00000020}, // L[3]
}, // T.state[ 7].w =   7
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000004, 0x00000000, 0x00000041, 0x00000064, 0x00000004}, // L[0]
{0x00000000, 0x00000084, 0x00000020, 0x00000021, 0x0000008C}, // L[1]
{0x00000031, 0x00000041, 0x00000041, 0x00000000, 0x00000010}, // L[2]
{0x00000088, 0x0000008A, 0x00000000, 0x00000049, 0x00000002}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x00000004, 0x00000000, 0x00000041, 0x00000030, 0x00000000}, // L[0]
{0x0000008C, 0x00000084, 0x00000020, 0x000000D6, 0x00000000}, // L[1]
{0x00000010, 0x00000041, 0x00000041, 0x00000008, 0x00000000}, // L[2]
{0x00000002, 0x0000008A, 0x00000000, 0x000000A5, 0x00000000}, // L[3]
}, // T.state[ 9].w =  10
// T.w =  36
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 36 -> 15
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x00000082}, // L[3]
}, // T.state[ 7].w =   2
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000046, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000082, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000015, 0x00000000, 0x00000000, 0x00000015}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000000, 0x00000088}, // L[3]
}, // T.state[ 8].w =   3
{ // R[ 9] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000023, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000082, 0x00000000, 0x00000000}, // L[1]
{0x00000015, 0x00000015, 0x00000000, 0x0000008A, 0x00000000}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000044, 0x00000000}, // L[3]
}, // T.state[ 9].w =   5
// T.w =  15
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 15 -> 13
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x0000000A}, // L[3]
}, // T.state[ 7].w =   2
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000046, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x0000000A, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000004, 0x00000000, 0x00000000, 0x00000004}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000000, 0x00000088}, // L[3]
}, // T.state[ 8].w =   3
{ // R[ 9] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000023, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x0000000A, 0x00000000, 0x00000000}, // L[1]
{0x00000004, 0x00000004, 0x00000000, 0x00000002, 0x00000000}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000044, 0x00000000}, // L[3]
}, // T.state[ 9].w =   3
// T.w =  13
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 13 -> 12
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000070, 0x00000020, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000070, 0x00000010, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000010, 0x00000000}, // L[3]
}, // T.state[ 2].w =   3
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   0
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   1
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000042, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000014, 0x00000000, 0x00000000, 0x00000014}, // L[3]
}, // T.state[ 8].w =   2
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000021, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[2]
{0x00000014, 0x00000014, 0x00000000, 0x0000000A, 0x00000000}, // L[3]
}, // T.state[ 9].w =   3
// T.w =  12
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 12 -> 11
#if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 10
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000090, 0x00000020, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000090, 0x00000010, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000010, 0x00000000}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   0
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   1
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000042, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000014, 0x00000000, 0x00000000, 0x0000001C}, // L[3]
}, // T.state[ 8].w =   2
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000021, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[2]
{0x0000001C, 0x00000014, 0x00000000, 0x0000000E, 0x00000000}, // L[3]
}, // T.state[ 9].w =   3
// T.w =  11
};
#endif // #if 1 // WORD_SIZE 8 nrounds 9 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

[./tests/norx-best-diff-search-tests.cc:3372] norx_print_bounds_file(): Print bounds for first 9 rounds:
B[ 0]  0
B[ 1]  1
B[ 2]  2
B[ 3]  3
B[ 4]  4
B[ 5]  5
B[ 6]  6
B[ 7]  8
B[ 8] 11

[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 111 -> 87
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[2]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000018, 0x00000000, 0x00000000, 0x00000008}, // L[1]
{0x00000080, 0x00000020, 0x00000000, 0x00000000, 0x000000E0}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000004, 0x00000030}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000040, 0x000000C4}, // L[0]
{0x00000008, 0x00000018, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x000000E0, 0x00000020, 0x00000000, 0x00000070, 0x00000010}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x0000001A, 0x0000000E}, // L[3]
}, // T.state[ 5].w =   5
{ // R[ 6] abcde
{0x00000000, 0x00000098, 0x000000C4, 0x00000040, 0x00000088}, // L[0]
{0x00000008, 0x00000083, 0x00000004, 0x00000004, 0x00000089}, // L[1]
{0x000000E0, 0x00000006, 0x00000010, 0x00000070, 0x00000062}, // L[2]
{0x00000030, 0x000000C1, 0x0000000E, 0x0000001A, 0x00000013}, // L[3]
}, // T.state[ 6].w =  10
{ // R[ 7] abcde
{0x00000088, 0x00000098, 0x000000C4, 0x00000046, 0x00000002}, // L[0]
{0x00000089, 0x00000083, 0x00000004, 0x0000006C, 0x00000020}, // L[1]
{0x00000062, 0x00000006, 0x00000010, 0x00000090, 0x00000080}, // L[2]
{0x00000013, 0x000000C1, 0x0000000E, 0x00000048, 0x00000042}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000088, 0x00000047, 0x00000080, 0x00000048, 0x00000041}, // L[0]
{0x00000089, 0x0000000D, 0x00000042, 0x00000046, 0x00000094}, // L[1]
{0x00000062, 0x00000007, 0x00000002, 0x0000006C, 0x00000021}, // L[2]
{0x00000013, 0x00000035, 0x00000020, 0x00000090, 0x00000002}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000041, 0x00000047, 0x00000080, 0x00000084, 0x00000004}, // L[0]
{0x00000094, 0x0000000D, 0x00000042, 0x00000069, 0x0000007D}, // L[1]
{0x00000021, 0x00000007, 0x00000002, 0x000000A6, 0x000000A0}, // L[2]
{0x00000002, 0x00000035, 0x00000020, 0x00000049, 0x00000029}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x00000041, 0x00000068, 0x00000004, 0x00000084, 0x00000000}, // L[0]
{0x00000094, 0x0000000E, 0x0000007D, 0x00000069, 0x00000000}, // L[1]
{0x00000021, 0x000000F4, 0x000000A0, 0x000000A6, 0x00000000}, // L[2]
{0x00000002, 0x00000083, 0x00000029, 0x00000049, 0x00000000}, // L[3]
}, // T.state[10].w =  15
// T.w =  87
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 87 -> 85
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[2]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000018, 0x00000000, 0x00000000, 0x00000008}, // L[1]
{0x00000080, 0x00000020, 0x00000000, 0x00000000, 0x000000E0}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000004, 0x00000030}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000040, 0x000000C4}, // L[0]
{0x00000008, 0x00000018, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x000000E0, 0x00000020, 0x00000000, 0x00000070, 0x00000010}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x0000001A, 0x0000000E}, // L[3]
}, // T.state[ 5].w =   5
{ // R[ 6] abcde
{0x00000000, 0x00000098, 0x000000C4, 0x00000040, 0x00000088}, // L[0]
{0x00000008, 0x00000083, 0x00000004, 0x00000004, 0x00000089}, // L[1]
{0x000000E0, 0x00000006, 0x00000010, 0x00000070, 0x00000062}, // L[2]
{0x00000030, 0x000000C1, 0x0000000E, 0x0000001A, 0x00000013}, // L[3]
}, // T.state[ 6].w =  10
{ // R[ 7] abcde
{0x00000088, 0x00000098, 0x000000C4, 0x00000046, 0x00000002}, // L[0]
{0x00000089, 0x00000083, 0x00000004, 0x0000006C, 0x00000020}, // L[1]
{0x00000062, 0x00000006, 0x00000010, 0x00000090, 0x00000080}, // L[2]
{0x00000013, 0x000000C1, 0x0000000E, 0x00000048, 0x00000042}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000088, 0x00000047, 0x00000080, 0x00000048, 0x00000041}, // L[0]
{0x00000089, 0x0000000D, 0x00000042, 0x00000046, 0x00000094}, // L[1]
{0x00000062, 0x00000007, 0x00000002, 0x0000006C, 0x00000069}, // L[2]
{0x00000013, 0x00000035, 0x00000020, 0x00000090, 0x00000002}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000041, 0x00000047, 0x00000080, 0x00000084, 0x00000004}, // L[0]
{0x00000094, 0x0000000D, 0x00000042, 0x00000069, 0x0000006F}, // L[1]
{0x00000069, 0x00000007, 0x00000002, 0x00000082, 0x00000080}, // L[2]
{0x00000002, 0x00000035, 0x00000020, 0x00000049, 0x00000029}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x00000041, 0x00000068, 0x00000004, 0x00000084, 0x00000000}, // L[0]
{0x00000094, 0x0000004C, 0x0000006F, 0x00000069, 0x00000000}, // L[1]
{0x00000069, 0x000000F0, 0x00000080, 0x00000082, 0x00000000}, // L[2]
{0x00000002, 0x00000083, 0x00000029, 0x00000049, 0x00000000}, // L[3]
}, // T.state[10].w =  13
// T.w =  85
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 85 -> 84
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[2]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000018, 0x00000000, 0x00000000, 0x00000008}, // L[1]
{0x00000080, 0x00000020, 0x00000000, 0x00000000, 0x000000E0}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000004, 0x00000030}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000040, 0x000000C4}, // L[0]
{0x00000008, 0x00000018, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x000000E0, 0x00000020, 0x00000000, 0x00000070, 0x00000010}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x0000001A, 0x0000000E}, // L[3]
}, // T.state[ 5].w =   5
{ // R[ 6] abcde
{0x00000000, 0x00000098, 0x000000C4, 0x00000040, 0x00000088}, // L[0]
{0x00000008, 0x00000083, 0x00000004, 0x00000004, 0x00000089}, // L[1]
{0x000000E0, 0x00000006, 0x00000010, 0x00000070, 0x00000062}, // L[2]
{0x00000030, 0x000000C1, 0x0000000E, 0x0000001A, 0x00000013}, // L[3]
}, // T.state[ 6].w =  10
{ // R[ 7] abcde
{0x00000088, 0x00000098, 0x000000C4, 0x00000046, 0x00000002}, // L[0]
{0x00000089, 0x00000083, 0x00000004, 0x0000006C, 0x00000020}, // L[1]
{0x00000062, 0x00000006, 0x00000010, 0x00000090, 0x00000080}, // L[2]
{0x00000013, 0x000000C1, 0x0000000E, 0x00000048, 0x00000042}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000088, 0x00000047, 0x00000080, 0x00000048, 0x00000049}, // L[0]
{0x00000089, 0x0000000D, 0x00000042, 0x00000046, 0x00000084}, // L[1]
{0x00000062, 0x00000007, 0x00000002, 0x0000006C, 0x00000061}, // L[2]
{0x00000013, 0x00000035, 0x00000020, 0x00000090, 0x00000042}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000049, 0x00000047, 0x00000080, 0x00000080, 0x00000000}, // L[0]
{0x00000084, 0x0000000D, 0x00000042, 0x00000061, 0x00000021}, // L[1]
{0x00000061, 0x00000007, 0x00000002, 0x00000086, 0x00000084}, // L[2]
{0x00000042, 0x00000035, 0x00000020, 0x00000069, 0x00000009}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x00000049, 0x000000E8, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000084, 0x00000085, 0x00000021, 0x00000061, 0x00000000}, // L[1]
{0x00000061, 0x00000070, 0x00000084, 0x00000086, 0x00000000}, // L[2]
{0x00000042, 0x00000087, 0x00000009, 0x00000069, 0x00000000}, // L[3]
}, // T.state[10].w =  12
// T.w =  84
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 84 -> 83
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[2]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000018, 0x00000000, 0x00000000, 0x00000008}, // L[1]
{0x00000080, 0x00000020, 0x00000000, 0x00000000, 0x000000E0}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000004, 0x00000030}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000040, 0x000000C4}, // L[0]
{0x00000008, 0x00000018, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x000000E0, 0x00000020, 0x00000000, 0x00000070, 0x00000010}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x0000001A, 0x0000000E}, // L[3]
}, // T.state[ 5].w =   5
{ // R[ 6] abcde
{0x00000000, 0x00000098, 0x000000C4, 0x00000040, 0x00000088}, // L[0]
{0x00000008, 0x00000083, 0x00000004, 0x00000004, 0x00000089}, // L[1]
{0x000000E0, 0x00000006, 0x00000010, 0x00000070, 0x00000062}, // L[2]
{0x00000030, 0x000000C1, 0x0000000E, 0x0000001A, 0x00000013}, // L[3]
}, // T.state[ 6].w =  10
{ // R[ 7] abcde
{0x00000088, 0x00000098, 0x000000C4, 0x00000046, 0x00000002}, // L[0]
{0x00000089, 0x00000083, 0x00000004, 0x0000006C, 0x00000020}, // L[1]
{0x00000062, 0x00000006, 0x00000010, 0x00000090, 0x00000080}, // L[2]
{0x00000013, 0x000000C1, 0x0000000E, 0x00000048, 0x00000042}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000088, 0x00000047, 0x00000080, 0x00000048, 0x00000041}, // L[0]
{0x00000089, 0x0000000D, 0x00000042, 0x00000046, 0x00000086}, // L[1]
{0x00000062, 0x00000007, 0x00000002, 0x0000006C, 0x00000061}, // L[2]
{0x00000013, 0x00000035, 0x00000020, 0x00000090, 0x00000000}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000041, 0x00000047, 0x00000080, 0x00000084, 0x00000004}, // L[0]
{0x00000086, 0x0000000D, 0x00000042, 0x00000060, 0x00000022}, // L[1]
{0x00000061, 0x00000007, 0x00000002, 0x00000086, 0x00000080}, // L[2]
{0x00000000, 0x00000035, 0x00000020, 0x00000048, 0x00000028}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x00000041, 0x00000068, 0x00000004, 0x00000084, 0x00000000}, // L[0]
{0x00000086, 0x000000E5, 0x00000022, 0x00000060, 0x00000000}, // L[1]
{0x00000061, 0x000000F0, 0x00000080, 0x00000086, 0x00000000}, // L[2]
{0x00000000, 0x000000A3, 0x00000028, 0x00000048, 0x00000000}, // L[3]
}, // T.state[10].w =  11
// T.w =  83
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 83 -> 82
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[2]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000018, 0x00000000, 0x00000000, 0x00000008}, // L[1]
{0x00000080, 0x00000020, 0x00000000, 0x00000000, 0x000000E0}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000004, 0x00000030}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000040, 0x000000C4}, // L[0]
{0x00000008, 0x00000018, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x000000E0, 0x00000020, 0x00000000, 0x00000070, 0x00000010}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x0000001A, 0x0000000E}, // L[3]
}, // T.state[ 5].w =   5
{ // R[ 6] abcde
{0x00000000, 0x00000098, 0x000000C4, 0x00000040, 0x00000088}, // L[0]
{0x00000008, 0x00000083, 0x00000004, 0x00000004, 0x00000089}, // L[1]
{0x000000E0, 0x00000006, 0x00000010, 0x00000070, 0x00000062}, // L[2]
{0x00000030, 0x000000C1, 0x0000000E, 0x0000001A, 0x00000013}, // L[3]
}, // T.state[ 6].w =  10
{ // R[ 7] abcde
{0x00000088, 0x00000098, 0x000000C4, 0x00000046, 0x00000002}, // L[0]
{0x00000089, 0x00000083, 0x00000004, 0x0000006C, 0x00000020}, // L[1]
{0x00000062, 0x00000006, 0x00000010, 0x00000090, 0x00000080}, // L[2]
{0x00000013, 0x000000C1, 0x0000000E, 0x00000048, 0x00000042}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000088, 0x00000047, 0x00000080, 0x00000048, 0x00000049}, // L[0]
{0x00000089, 0x0000000D, 0x00000042, 0x00000046, 0x00000086}, // L[1]
{0x00000062, 0x00000007, 0x00000002, 0x0000006C, 0x00000061}, // L[2]
{0x00000013, 0x00000035, 0x00000020, 0x00000090, 0x00000000}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000049, 0x00000047, 0x00000080, 0x00000080, 0x00000000}, // L[0]
{0x00000086, 0x0000000D, 0x00000042, 0x00000060, 0x00000022}, // L[1]
{0x00000061, 0x00000007, 0x00000002, 0x00000086, 0x00000088}, // L[2]
{0x00000000, 0x00000035, 0x00000020, 0x00000048, 0x00000078}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x00000049, 0x000000E8, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000086, 0x000000E5, 0x00000022, 0x00000060, 0x00000000}, // L[1]
{0x00000061, 0x000000F1, 0x00000088, 0x00000086, 0x00000000}, // L[2]
{0x00000000, 0x000000A9, 0x00000078, 0x00000048, 0x00000000}, // L[3]
}, // T.state[10].w =  10
// T.w =  82
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 82 -> 81
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[2]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000018, 0x00000000, 0x00000000, 0x00000008}, // L[1]
{0x00000080, 0x00000020, 0x00000000, 0x00000000, 0x000000E0}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000004, 0x00000030}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000040, 0x000000C4}, // L[0]
{0x00000008, 0x00000018, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x000000E0, 0x00000020, 0x00000000, 0x00000070, 0x00000010}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x0000001A, 0x0000000E}, // L[3]
}, // T.state[ 5].w =   5
{ // R[ 6] abcde
{0x00000000, 0x00000098, 0x000000C4, 0x00000040, 0x00000088}, // L[0]
{0x00000008, 0x00000083, 0x00000004, 0x00000004, 0x00000089}, // L[1]
{0x000000E0, 0x00000006, 0x00000010, 0x00000070, 0x00000062}, // L[2]
{0x00000030, 0x000000C1, 0x0000000E, 0x0000001A, 0x00000013}, // L[3]
}, // T.state[ 6].w =  10
{ // R[ 7] abcde
{0x00000088, 0x00000098, 0x000000C4, 0x00000046, 0x00000002}, // L[0]
{0x00000089, 0x00000083, 0x00000004, 0x0000006C, 0x00000020}, // L[1]
{0x00000062, 0x00000006, 0x00000010, 0x00000090, 0x00000080}, // L[2]
{0x00000013, 0x000000C1, 0x0000000E, 0x00000048, 0x00000042}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000088, 0x00000047, 0x00000080, 0x00000048, 0x00000049}, // L[0]
{0x00000089, 0x0000000D, 0x00000042, 0x00000046, 0x00000086}, // L[1]
{0x00000062, 0x00000007, 0x00000002, 0x0000006C, 0x00000069}, // L[2]
{0x00000013, 0x00000035, 0x00000020, 0x00000090, 0x00000000}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000049, 0x00000047, 0x00000080, 0x00000080, 0x00000000}, // L[0]
{0x00000086, 0x0000000D, 0x00000042, 0x00000060, 0x00000022}, // L[1]
{0x00000069, 0x00000007, 0x00000002, 0x00000082, 0x00000084}, // L[2]
{0x00000000, 0x00000035, 0x00000020, 0x00000048, 0x00000038}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x00000049, 0x000000E8, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000086, 0x000000E5, 0x00000022, 0x00000060, 0x00000000}, // L[1]
{0x00000069, 0x00000070, 0x00000084, 0x00000082, 0x00000000}, // L[2]
{0x00000000, 0x000000A1, 0x00000038, 0x00000048, 0x00000000}, // L[3]
}, // T.state[10].w =   9
// T.w =  81
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 81 -> 80
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[2]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000018, 0x00000000, 0x00000000, 0x00000008}, // L[1]
{0x00000080, 0x00000020, 0x00000000, 0x00000000, 0x000000E0}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000004, 0x00000030}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000040, 0x000000C4}, // L[0]
{0x00000008, 0x00000018, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x000000E0, 0x00000020, 0x00000000, 0x00000070, 0x00000010}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x0000001A, 0x0000000E}, // L[3]
}, // T.state[ 5].w =   5
{ // R[ 6] abcde
{0x00000000, 0x00000098, 0x000000C4, 0x00000040, 0x00000098}, // L[0]
{0x00000008, 0x00000083, 0x00000004, 0x00000004, 0x00000089}, // L[1]
{0x000000E0, 0x00000006, 0x00000010, 0x00000070, 0x00000062}, // L[2]
{0x00000030, 0x000000C1, 0x0000000E, 0x0000001A, 0x00000053}, // L[3]
}, // T.state[ 6].w =  10
{ // R[ 7] abcde
{0x00000098, 0x00000098, 0x000000C4, 0x000000C6, 0x00000002}, // L[0]
{0x00000089, 0x00000083, 0x00000004, 0x0000006C, 0x000000A0}, // L[1]
{0x00000062, 0x00000006, 0x00000010, 0x00000090, 0x00000080}, // L[2]
{0x00000053, 0x000000C1, 0x0000000E, 0x0000004A, 0x000000C0}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000098, 0x00000046, 0x00000080, 0x0000004A, 0x0000004A}, // L[0]
{0x00000089, 0x0000000D, 0x000000C0, 0x000000C6, 0x00000086}, // L[1]
{0x00000062, 0x00000002, 0x00000002, 0x0000006C, 0x00000064}, // L[2]
{0x00000053, 0x00000035, 0x000000A0, 0x00000090, 0x00000080}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x0000004A, 0x00000046, 0x00000080, 0x00000000, 0x00000080}, // L[0]
{0x00000086, 0x0000000D, 0x000000C0, 0x00000020, 0x00000020}, // L[1]
{0x00000064, 0x00000002, 0x00000002, 0x00000004, 0x00000006}, // L[2]
{0x00000080, 0x00000035, 0x000000A0, 0x00000008, 0x000000E8}, // L[3]
}, // T.state[ 9].w =  19
{ // R[10] abcde
{0x0000004A, 0x000000D8, 0x00000080, 0x00000000, 0x00000000}, // L[0]
{0x00000086, 0x000000A5, 0x00000020, 0x00000020, 0x00000000}, // L[1]
{0x00000064, 0x00000080, 0x00000006, 0x00000004, 0x00000000}, // L[2]
{0x00000080, 0x000000BB, 0x000000E8, 0x00000008, 0x00000000}, // L[3]
}, // T.state[10].w =   9
// T.w =  80
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 80 -> 65
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000021}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000021, 0x00000021, 0x00000000, 0x00000090, 0x000000B0}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000008, 0x00000008}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000021, 0x00000032, 0x000000B0, 0x00000090, 0x00000011}, // L[2]
{0x00000010, 0x00000001, 0x00000008, 0x00000008, 0x00000033}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000002}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000011, 0x00000032, 0x000000B0, 0x0000000C, 0x00000084}, // L[2]
{0x00000033, 0x00000001, 0x00000008, 0x000000D9, 0x00000041}, // L[3]
}, // T.state[ 7].w =   8
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000020, 0x00000084, 0x000000D9, 0x00000028}, // L[0]
{0x00000010, 0x0000006D, 0x00000041, 0x00000042, 0x00000005}, // L[1]
{0x00000011, 0x00000080, 0x00000002, 0x00000080, 0x00000091}, // L[2]
{0x00000033, 0x00000014, 0x00000000, 0x0000000C, 0x00000001}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000028, 0x00000020, 0x00000084, 0x000000F8, 0x00000004}, // L[0]
{0x00000005, 0x0000006D, 0x00000041, 0x000000A3, 0x00000020}, // L[1]
{0x00000091, 0x00000080, 0x00000002, 0x00000088, 0x0000008A}, // L[2]
{0x00000001, 0x00000014, 0x00000000, 0x00000086, 0x00000082}, // L[3]
}, // T.state[ 9].w =  16
{ // R[10] abcde
{0x00000028, 0x00000084, 0x00000004, 0x000000F8, 0x00000000}, // L[0]
{0x00000005, 0x000000A9, 0x00000020, 0x000000A3, 0x00000000}, // L[1]
{0x00000091, 0x00000041, 0x0000008A, 0x00000088, 0x00000000}, // L[2]
{0x00000001, 0x000000D2, 0x00000082, 0x00000086, 0x00000000}, // L[3]
}, // T.state[10].w =  17
// T.w =  65
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 65 -> 64
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000021}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000021, 0x00000021, 0x00000000, 0x00000090, 0x000000B0}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000008, 0x00000008}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000021, 0x00000032, 0x000000B0, 0x00000090, 0x00000011}, // L[2]
{0x00000010, 0x00000001, 0x00000008, 0x00000008, 0x00000033}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000002}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000011, 0x00000032, 0x000000B0, 0x0000000C, 0x00000084}, // L[2]
{0x00000033, 0x00000001, 0x00000008, 0x000000D9, 0x00000041}, // L[3]
}, // T.state[ 7].w =   8
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000020, 0x00000084, 0x000000D9, 0x00000068}, // L[0]
{0x00000010, 0x0000006D, 0x00000041, 0x00000042, 0x000000C5}, // L[1]
{0x00000011, 0x00000080, 0x00000002, 0x00000080, 0x00000091}, // L[2]
{0x00000033, 0x00000014, 0x00000000, 0x0000000C, 0x00000021}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000068, 0x00000020, 0x00000084, 0x000000D8, 0x00000044}, // L[0]
{0x000000C5, 0x0000006D, 0x00000041, 0x000000C3, 0x00000000}, // L[1]
{0x00000091, 0x00000080, 0x00000002, 0x00000088, 0x0000008A}, // L[2]
{0x00000021, 0x00000014, 0x00000000, 0x00000096, 0x00000092}, // L[3]
}, // T.state[ 9].w =  16
{ // R[10] abcde
{0x00000068, 0x0000008C, 0x00000044, 0x000000D8, 0x00000000}, // L[0]
{0x000000C5, 0x000000AD, 0x00000000, 0x000000C3, 0x00000000}, // L[1]
{0x00000091, 0x00000041, 0x0000008A, 0x00000088, 0x00000000}, // L[2]
{0x00000021, 0x000000D0, 0x00000092, 0x00000096, 0x00000000}, // L[3]
}, // T.state[10].w =  16
// T.w =  64
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 64 -> 62
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000021}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000021, 0x00000021, 0x00000000, 0x00000090, 0x000000B0}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000008, 0x00000008}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000021, 0x00000032, 0x000000B0, 0x00000090, 0x00000011}, // L[2]
{0x00000010, 0x00000001, 0x00000008, 0x00000008, 0x00000033}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000002}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000011, 0x00000032, 0x000000B0, 0x0000000C, 0x00000084}, // L[2]
{0x00000033, 0x00000001, 0x00000008, 0x000000D9, 0x00000041}, // L[3]
}, // T.state[ 7].w =   8
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000020, 0x00000084, 0x000000D9, 0x00000078}, // L[0]
{0x00000010, 0x0000006D, 0x00000041, 0x00000042, 0x000000C5}, // L[1]
{0x00000011, 0x00000080, 0x00000002, 0x00000080, 0x00000091}, // L[2]
{0x00000033, 0x00000014, 0x00000000, 0x0000000C, 0x00000001}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000078, 0x00000020, 0x00000084, 0x000000D0, 0x00000054}, // L[0]
{0x000000C5, 0x0000006D, 0x00000041, 0x000000C3, 0x00000000}, // L[1]
{0x00000091, 0x00000080, 0x00000002, 0x00000088, 0x0000008A}, // L[2]
{0x00000001, 0x00000014, 0x00000000, 0x00000086, 0x00000082}, // L[3]
}, // T.state[ 9].w =  16
{ // R[10] abcde
{0x00000078, 0x0000008E, 0x00000054, 0x000000D0, 0x00000000}, // L[0]
{0x000000C5, 0x000000AD, 0x00000000, 0x000000C3, 0x00000000}, // L[1]
{0x00000091, 0x00000041, 0x0000008A, 0x00000088, 0x00000000}, // L[2]
{0x00000001, 0x000000D2, 0x00000082, 0x00000086, 0x00000000}, // L[3]
}, // T.state[10].w =  14
// T.w =  62
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 62 -> 61
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000021}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000021, 0x00000021, 0x00000000, 0x00000090, 0x000000B0}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000008, 0x00000008}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000021, 0x00000032, 0x000000B0, 0x00000090, 0x00000011}, // L[2]
{0x00000010, 0x00000001, 0x00000008, 0x00000008, 0x00000033}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000002}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000011, 0x00000032, 0x000000B0, 0x0000000C, 0x00000084}, // L[2]
{0x00000033, 0x00000001, 0x00000008, 0x000000D9, 0x000000C1}, // L[3]
}, // T.state[ 7].w =   8
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000020, 0x00000084, 0x000000D9, 0x00000068}, // L[0]
{0x00000010, 0x0000006D, 0x000000C1, 0x00000042, 0x00000045}, // L[1]
{0x00000011, 0x00000081, 0x00000002, 0x00000080, 0x00000090}, // L[2]
{0x00000033, 0x00000014, 0x00000000, 0x0000000C, 0x00000009}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000068, 0x00000020, 0x00000084, 0x000000D8, 0x0000006C}, // L[0]
{0x00000045, 0x0000006D, 0x000000C1, 0x00000083, 0x00000040}, // L[1]
{0x00000090, 0x00000081, 0x00000002, 0x00000008, 0x0000000A}, // L[2]
{0x00000009, 0x00000014, 0x00000000, 0x00000082, 0x00000086}, // L[3]
}, // T.state[ 9].w =  16
{ // R[10] abcde
{0x00000068, 0x00000089, 0x0000006C, 0x000000D8, 0x00000000}, // L[0]
{0x00000045, 0x000000A5, 0x00000040, 0x00000083, 0x00000000}, // L[1]
{0x00000090, 0x00000071, 0x0000000A, 0x00000008, 0x00000000}, // L[2]
{0x00000009, 0x00000052, 0x00000086, 0x00000082, 0x00000000}, // L[3]
}, // T.state[10].w =  13
// T.w =  61
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 61 -> 60
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000021}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000021, 0x00000021, 0x00000000, 0x00000090, 0x000000B0}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000008, 0x00000008}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000021, 0x00000032, 0x000000B0, 0x00000090, 0x00000011}, // L[2]
{0x00000010, 0x00000001, 0x00000008, 0x00000008, 0x00000033}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000082}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000011, 0x00000032, 0x000000B0, 0x0000000C, 0x00000084}, // L[2]
{0x00000033, 0x00000001, 0x00000008, 0x000000D9, 0x000000C1}, // L[3]
}, // T.state[ 7].w =   8
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000020, 0x00000084, 0x000000D9, 0x00000068}, // L[0]
{0x00000010, 0x0000006D, 0x000000C1, 0x00000042, 0x00000047}, // L[1]
{0x00000011, 0x00000081, 0x00000082, 0x00000080, 0x00000090}, // L[2]
{0x00000033, 0x00000015, 0x00000000, 0x0000000C, 0x0000000C}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000068, 0x00000020, 0x00000084, 0x000000D8, 0x00000064}, // L[0]
{0x00000047, 0x0000006D, 0x000000C1, 0x00000082, 0x00000047}, // L[1]
{0x00000090, 0x00000081, 0x00000082, 0x00000008, 0x0000009E}, // L[2]
{0x0000000C, 0x00000015, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 9].w =  16
{ // R[10] abcde
{0x00000068, 0x00000088, 0x00000064, 0x000000D8, 0x00000000}, // L[0]
{0x00000047, 0x00000045, 0x00000047, 0x00000082, 0x00000000}, // L[1]
{0x00000090, 0x000000E3, 0x0000009E, 0x00000008, 0x00000000}, // L[2]
{0x0000000C, 0x000000A2, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[10].w =  12
// T.w =  60
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 60 -> 59
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000021}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000021, 0x00000021, 0x00000000, 0x00000090, 0x000000B0}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000008, 0x00000008}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000021, 0x00000032, 0x000000B0, 0x00000090, 0x00000031}, // L[2]
{0x00000010, 0x00000001, 0x00000008, 0x00000008, 0x00000013}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000002}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000031, 0x00000032, 0x000000B0, 0x0000000D, 0x000000B5}, // L[2]
{0x00000013, 0x00000001, 0x00000008, 0x000000D8, 0x000000C0}, // L[3]
}, // T.state[ 7].w =   8
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000020, 0x000000B5, 0x000000D8, 0x00000038}, // L[0]
{0x00000010, 0x0000000F, 0x000000C0, 0x00000042, 0x00000003}, // L[1]
{0x00000031, 0x00000083, 0x00000002, 0x00000080, 0x00000090}, // L[2]
{0x00000013, 0x00000014, 0x00000000, 0x0000000D, 0x0000000D}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000038, 0x00000020, 0x000000B5, 0x00000070, 0x0000000D}, // L[0]
{0x00000003, 0x0000000F, 0x000000C0, 0x000000A0, 0x00000020}, // L[1]
{0x00000090, 0x00000083, 0x00000002, 0x00000008, 0x0000000E}, // L[2]
{0x0000000D, 0x00000014, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 9].w =  16
{ // R[10] abcde
{0x00000038, 0x000000A5, 0x0000000D, 0x00000070, 0x00000000}, // L[0]
{0x00000003, 0x000000E5, 0x00000020, 0x000000A0, 0x00000000}, // L[1]
{0x00000090, 0x000000B1, 0x0000000E, 0x00000008, 0x00000000}, // L[2]
{0x0000000D, 0x00000082, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[10].w =  11
// T.w =  59
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 59 -> 38
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000003}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000003, 0x00000001, 0x00000000, 0x00000081, 0x00000083}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000003, 0x00000050, 0x00000083, 0x00000081, 0x00000071}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000071, 0x00000050, 0x00000083, 0x00000087, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   5
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x0000000A, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x000000B4, 0x00000000, 0x00000000, 0x00000084}, // L[1]
{0x00000071, 0x00000000, 0x00000000, 0x00000080, 0x00000011}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000087, 0x00000000}, // L[3]
}, // T.state[ 8].w =   5
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x0000000A, 0x00000010, 0x0000000A}, // L[0]
{0x00000084, 0x000000B4, 0x00000000, 0x00000042, 0x00000042}, // L[1]
{0x00000011, 0x00000000, 0x00000000, 0x000000C8, 0x00000048}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000C3, 0x00000041}, // L[3]
}, // T.state[ 9].w =   9
{ // R[10] abcde
{0x00000020, 0x00000045, 0x0000000A, 0x00000010, 0x00000000}, // L[0]
{0x00000084, 0x000000DE, 0x00000042, 0x00000042, 0x00000000}, // L[1]
{0x00000011, 0x00000009, 0x00000048, 0x000000C8, 0x00000000}, // L[2]
{0x00000000, 0x00000028, 0x00000041, 0x000000C3, 0x00000000}, // L[3]
}, // T.state[10].w =  12
// T.w =  38
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 38 -> 37
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000003}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000003, 0x00000001, 0x00000000, 0x00000081, 0x00000083}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000003, 0x00000050, 0x00000083, 0x00000081, 0x00000071}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000071, 0x00000050, 0x00000083, 0x00000087, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   5
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x0000000A, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x000000B4, 0x00000000, 0x00000000, 0x00000084}, // L[1]
{0x00000071, 0x00000000, 0x00000000, 0x00000080, 0x00000091}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000087, 0x00000000}, // L[3]
}, // T.state[ 8].w =   5
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x0000000A, 0x00000010, 0x0000002A}, // L[0]
{0x00000084, 0x000000B4, 0x00000000, 0x00000042, 0x00000042}, // L[1]
{0x00000091, 0x00000000, 0x00000000, 0x00000088, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000C3, 0x00000041}, // L[3]
}, // T.state[ 9].w =   9
{ // R[10] abcde
{0x00000020, 0x00000041, 0x0000002A, 0x00000010, 0x00000000}, // L[0]
{0x00000084, 0x000000DE, 0x00000042, 0x00000042, 0x00000000}, // L[1]
{0x00000091, 0x00000011, 0x00000088, 0x00000088, 0x00000000}, // L[2]
{0x00000000, 0x00000028, 0x00000041, 0x000000C3, 0x00000000}, // L[3]
}, // T.state[10].w =  11
// T.w =  37
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 37 -> 26
#if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 11
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000098}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000098, 0x00000008, 0x00000040, 0x000000C6, 0x00000002}, // L[3]
}, // T.state[ 7].w =   2
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000000, 0x00000000, 0x000000C6, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000014, 0x00000000, 0x00000000, 0x00000034}, // L[2]
{0x00000098, 0x00000000, 0x00000000, 0x00000000, 0x00000088}, // L[3]
}, // T.state[ 8].w =   4
{ // R[ 9] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000063, 0x00000021}, // L[0]
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000002}, // L[1]
{0x00000034, 0x00000014, 0x00000000, 0x0000001A, 0x0000000A}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000044, 0x00000044}, // L[3]
}, // T.state[ 9].w =   5
{ // R[10] abcde
{0x00000000, 0x00000024, 0x00000021, 0x00000063, 0x00000000}, // L[0]
{0x00000000, 0x00000040, 0x00000002, 0x00000000, 0x00000000}, // L[1]
{0x00000034, 0x000000C3, 0x0000000A, 0x0000001A, 0x00000000}, // L[2]
{0x00000088, 0x00000088, 0x00000044, 0x00000044, 0x00000000}, // L[3]
}, // T.state[10].w =  10
// T.w =  26
};
#endif // #if 1 // WORD_SIZE 8 nrounds 10 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

[./tests/norx-best-diff-search-tests.cc:3372] norx_print_bounds_file(): Print bounds for first 10 rounds:
B[ 0]  0
B[ 1]  1
B[ 2]  2
B[ 3]  3
B[ 4]  4
B[ 5]  5
B[ 6]  6
B[ 7]  8
B[ 8] 11
B[ 9] 26

[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 126 -> 98
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000012}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000002}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000040, 0x00000044}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000018, 0x00000043, 0x00000012, 0x0000000E, 0x000000EB}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000041, 0x00000002, 0x0000000E, 0x00000049}, // L[2]
{0x00000080, 0x00000088, 0x00000044, 0x00000040, 0x00000008}, // L[3]
}, // T.state[ 6].w =   9
{ // R[ 7] abcde
{0x000000EB, 0x00000043, 0x00000012, 0x0000002F, 0x00000001}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000049, 0x00000041, 0x00000002, 0x0000003A, 0x00000008}, // L[2]
{0x00000008, 0x00000088, 0x00000044, 0x00000042, 0x00000082}, // L[3]
}, // T.state[ 7].w =  12
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x000000EB, 0x00000001, 0x00000008, 0x00000042, 0x00000068}, // L[0]
{0x00000080, 0x00000092, 0x00000082, 0x0000002F, 0x00000012}, // L[1]
{0x00000049, 0x00000014, 0x00000001, 0x00000004, 0x00000047}, // L[2]
{0x00000008, 0x00000084, 0x00000000, 0x0000003A, 0x00000094}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000068, 0x00000001, 0x00000008, 0x00000015, 0x00000005}, // L[0]
{0x00000012, 0x00000092, 0x00000082, 0x0000009E, 0x00000020}, // L[1]
{0x00000047, 0x00000014, 0x00000001, 0x000000A1, 0x000000A0}, // L[2]
{0x00000094, 0x00000084, 0x00000000, 0x00000057, 0x00000053}, // L[3]
}, // T.state[ 9].w =  17
{ // R[10] abcde
{0x00000068, 0x00000080, 0x00000005, 0x00000015, 0x00000028}, // L[0]
{0x00000012, 0x00000056, 0x00000020, 0x0000009E, 0x00000060}, // L[1]
{0x00000047, 0x00000096, 0x000000A0, 0x000000A1, 0x00000051}, // L[2]
{0x00000094, 0x000000FA, 0x00000053, 0x00000057, 0x00000006}, // L[3]
}, // T.state[10].w =  17
{ // R[11] abcde
{0x00000028, 0x00000080, 0x00000005, 0x000000E9, 0x00000000}, // L[0]
{0x00000060, 0x00000056, 0x00000020, 0x000000F7, 0x00000000}, // L[1]
{0x00000051, 0x00000096, 0x000000A0, 0x00000087, 0x00000000}, // L[2]
{0x00000006, 0x000000FA, 0x00000053, 0x0000008A, 0x00000000}, // L[3]
}, // T.state[11].w =  21
// T.w =  98
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 98 -> 97
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000012}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000002}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000040, 0x00000044}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000018, 0x00000043, 0x00000012, 0x0000000E, 0x000000EB}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000041, 0x00000002, 0x0000000E, 0x00000049}, // L[2]
{0x00000080, 0x00000088, 0x00000044, 0x00000040, 0x00000008}, // L[3]
}, // T.state[ 6].w =   9
{ // R[ 7] abcde
{0x000000EB, 0x00000043, 0x00000012, 0x0000002F, 0x00000001}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000049, 0x00000041, 0x00000002, 0x0000003A, 0x00000008}, // L[2]
{0x00000008, 0x00000088, 0x00000044, 0x00000042, 0x00000082}, // L[3]
}, // T.state[ 7].w =  12
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x000000EB, 0x00000001, 0x00000008, 0x00000042, 0x00000068}, // L[0]
{0x00000080, 0x00000092, 0x00000082, 0x0000002F, 0x00000012}, // L[1]
{0x00000049, 0x00000014, 0x00000001, 0x00000004, 0x00000047}, // L[2]
{0x00000008, 0x00000084, 0x00000000, 0x0000003A, 0x00000094}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000068, 0x00000001, 0x00000008, 0x00000015, 0x00000005}, // L[0]
{0x00000012, 0x00000092, 0x00000082, 0x0000009E, 0x00000018}, // L[1]
{0x00000047, 0x00000014, 0x00000001, 0x000000A1, 0x000000A2}, // L[2]
{0x00000094, 0x00000084, 0x00000000, 0x00000057, 0x00000071}, // L[3]
}, // T.state[ 9].w =  17
{ // R[10] abcde
{0x00000068, 0x00000080, 0x00000005, 0x00000015, 0x00000028}, // L[0]
{0x00000012, 0x00000051, 0x00000018, 0x0000009E, 0x00000041}, // L[1]
{0x00000047, 0x000000D6, 0x000000A2, 0x000000A1, 0x00000031}, // L[2]
{0x00000094, 0x000000BE, 0x00000071, 0x00000057, 0x00000002}, // L[3]
}, // T.state[10].w =  17
{ // R[11] abcde
{0x00000028, 0x00000080, 0x00000005, 0x000000E9, 0x00000000}, // L[0]
{0x00000041, 0x00000051, 0x00000018, 0x000000FE, 0x00000000}, // L[1]
{0x00000031, 0x000000D6, 0x000000A2, 0x00000084, 0x00000000}, // L[2]
{0x00000002, 0x000000BE, 0x00000071, 0x000000AA, 0x00000000}, // L[3]
}, // T.state[11].w =  20
// T.w =  97
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 97 -> 96
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000012}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000002}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000040, 0x00000044}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000018, 0x00000043, 0x00000012, 0x0000000E, 0x000000EB}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000041, 0x00000002, 0x0000000E, 0x00000049}, // L[2]
{0x00000080, 0x00000088, 0x00000044, 0x00000040, 0x00000008}, // L[3]
}, // T.state[ 6].w =   9
{ // R[ 7] abcde
{0x000000EB, 0x00000043, 0x00000012, 0x0000002F, 0x00000001}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000049, 0x00000041, 0x00000002, 0x0000003A, 0x00000008}, // L[2]
{0x00000008, 0x00000088, 0x00000044, 0x00000042, 0x00000082}, // L[3]
}, // T.state[ 7].w =  12
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x000000EB, 0x00000001, 0x00000008, 0x00000042, 0x00000068}, // L[0]
{0x00000080, 0x00000092, 0x00000082, 0x0000002F, 0x00000012}, // L[1]
{0x00000049, 0x00000014, 0x00000001, 0x00000004, 0x00000047}, // L[2]
{0x00000008, 0x00000084, 0x00000000, 0x0000003A, 0x00000094}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000068, 0x00000001, 0x00000008, 0x00000015, 0x00000007}, // L[0]
{0x00000012, 0x00000092, 0x00000082, 0x0000009E, 0x00000000}, // L[1]
{0x00000047, 0x00000014, 0x00000001, 0x000000A1, 0x000000A0}, // L[2]
{0x00000094, 0x00000084, 0x00000000, 0x00000057, 0x00000071}, // L[3]
}, // T.state[ 9].w =  17
{ // R[10] abcde
{0x00000068, 0x000000C0, 0x00000007, 0x00000015, 0x00000028}, // L[0]
{0x00000012, 0x00000052, 0x00000000, 0x0000009E, 0x00000040}, // L[1]
{0x00000047, 0x00000096, 0x000000A0, 0x000000A1, 0x00000051}, // L[2]
{0x00000094, 0x000000BE, 0x00000071, 0x00000057, 0x00000006}, // L[3]
}, // T.state[10].w =  17
{ // R[11] abcde
{0x00000028, 0x000000C0, 0x00000007, 0x000000E9, 0x00000000}, // L[0]
{0x00000040, 0x00000052, 0x00000000, 0x000000F6, 0x00000000}, // L[1]
{0x00000051, 0x00000096, 0x000000A0, 0x00000087, 0x00000000}, // L[2]
{0x00000006, 0x000000BE, 0x00000071, 0x0000008A, 0x00000000}, // L[3]
}, // T.state[11].w =  19
// T.w =  96
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 96 -> 95
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000012}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000002}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000040, 0x00000044}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000018, 0x00000043, 0x00000012, 0x0000000E, 0x000000EB}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000041, 0x00000002, 0x0000000E, 0x00000049}, // L[2]
{0x00000080, 0x00000088, 0x00000044, 0x00000040, 0x00000008}, // L[3]
}, // T.state[ 6].w =   9
{ // R[ 7] abcde
{0x000000EB, 0x00000043, 0x00000012, 0x0000002F, 0x00000001}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000049, 0x00000041, 0x00000002, 0x0000003A, 0x00000008}, // L[2]
{0x00000008, 0x00000088, 0x00000044, 0x00000042, 0x00000082}, // L[3]
}, // T.state[ 7].w =  12
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x000000EB, 0x00000001, 0x00000008, 0x00000042, 0x00000028}, // L[0]
{0x00000080, 0x00000092, 0x00000082, 0x0000002F, 0x00000012}, // L[1]
{0x00000049, 0x00000014, 0x00000001, 0x00000004, 0x000000C7}, // L[2]
{0x00000008, 0x00000084, 0x00000000, 0x0000003A, 0x0000008C}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000028, 0x00000001, 0x00000008, 0x00000035, 0x00000005}, // L[0]
{0x00000012, 0x00000092, 0x00000082, 0x0000009E, 0x00000000}, // L[1]
{0x000000C7, 0x00000014, 0x00000001, 0x000000E1, 0x00000020}, // L[2]
{0x0000008C, 0x00000084, 0x00000000, 0x0000005B, 0x000000C9}, // L[3]
}, // T.state[ 9].w =  17
{ // R[10] abcde
{0x00000028, 0x00000080, 0x00000005, 0x00000035, 0x000000A8}, // L[0]
{0x00000012, 0x00000052, 0x00000000, 0x0000009E, 0x00000040}, // L[1]
{0x000000C7, 0x00000086, 0x00000020, 0x000000E1, 0x00000041}, // L[2]
{0x0000008C, 0x000000A9, 0x000000C9, 0x0000005B, 0x00000035}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x000000A8, 0x00000080, 0x00000005, 0x000000EC, 0x00000000}, // L[0]
{0x00000040, 0x00000052, 0x00000000, 0x000000F6, 0x00000000}, // L[1]
{0x00000041, 0x00000086, 0x00000020, 0x00000005, 0x00000000}, // L[2]
{0x00000035, 0x000000A9, 0x000000C9, 0x00000073, 0x00000000}, // L[3]
}, // T.state[11].w =  16
// T.w =  95
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 95 -> 92
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000012}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000002}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000040, 0x00000044}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000018, 0x00000043, 0x00000012, 0x0000000E, 0x000000EB}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000041, 0x00000002, 0x0000000E, 0x00000049}, // L[2]
{0x00000080, 0x00000088, 0x00000044, 0x00000040, 0x00000008}, // L[3]
}, // T.state[ 6].w =   9
{ // R[ 7] abcde
{0x000000EB, 0x00000043, 0x00000012, 0x0000002F, 0x00000041}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000049, 0x00000041, 0x00000002, 0x0000003A, 0x00000008}, // L[2]
{0x00000008, 0x00000088, 0x00000044, 0x00000042, 0x00000002}, // L[3]
}, // T.state[ 7].w =  12
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x000000EB, 0x00000001, 0x00000008, 0x00000042, 0x00000028}, // L[0]
{0x00000080, 0x00000092, 0x00000002, 0x0000002F, 0x00000012}, // L[1]
{0x00000049, 0x00000015, 0x00000041, 0x00000004, 0x000000CC}, // L[2]
{0x00000008, 0x00000004, 0x00000000, 0x0000003A, 0x00000004}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000028, 0x00000001, 0x00000008, 0x00000035, 0x00000005}, // L[0]
{0x00000012, 0x00000092, 0x00000002, 0x0000009E, 0x00000080}, // L[1]
{0x000000CC, 0x00000015, 0x00000041, 0x00000064, 0x00000025}, // L[2]
{0x00000004, 0x00000004, 0x00000000, 0x0000001F, 0x00000001}, // L[3]
}, // T.state[ 9].w =  16
{ // R[10] abcde
{0x00000028, 0x00000080, 0x00000005, 0x00000035, 0x000000E8}, // L[0]
{0x00000012, 0x00000042, 0x00000080, 0x0000009E, 0x00000070}, // L[1]
{0x000000CC, 0x00000006, 0x00000025, 0x00000064, 0x00000042}, // L[2]
{0x00000004, 0x000000A0, 0x00000001, 0x0000001F, 0x000000A4}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x000000E8, 0x00000080, 0x00000005, 0x000000EE, 0x00000000}, // L[0]
{0x00000070, 0x00000042, 0x00000080, 0x00000077, 0x00000000}, // L[1]
{0x00000042, 0x00000006, 0x00000025, 0x00000031, 0x00000000}, // L[2]
{0x000000A4, 0x000000A0, 0x00000001, 0x000000DD, 0x00000000}, // L[3]
}, // T.state[11].w =  14
// T.w =  92
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 92 -> 91
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000012}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000002}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000040, 0x00000044}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000018, 0x00000043, 0x00000012, 0x0000000E, 0x000000EB}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000041, 0x00000002, 0x0000000E, 0x00000049}, // L[2]
{0x00000080, 0x00000088, 0x00000044, 0x00000040, 0x00000008}, // L[3]
}, // T.state[ 6].w =   9
{ // R[ 7] abcde
{0x000000EB, 0x00000043, 0x00000012, 0x0000002F, 0x00000041}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000049, 0x00000041, 0x00000002, 0x0000003A, 0x00000008}, // L[2]
{0x00000008, 0x00000088, 0x00000044, 0x00000042, 0x00000002}, // L[3]
}, // T.state[ 7].w =  12
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x000000EB, 0x00000001, 0x00000008, 0x00000042, 0x000000A8}, // L[0]
{0x00000080, 0x00000092, 0x00000002, 0x0000002F, 0x00000012}, // L[1]
{0x00000049, 0x00000015, 0x00000041, 0x00000004, 0x0000004C}, // L[2]
{0x00000008, 0x00000004, 0x00000000, 0x0000003A, 0x00000004}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x000000A8, 0x00000001, 0x00000008, 0x00000075, 0x00000005}, // L[0]
{0x00000012, 0x00000092, 0x00000002, 0x0000009E, 0x00000080}, // L[1]
{0x0000004C, 0x00000015, 0x00000041, 0x00000024, 0x00000067}, // L[2]
{0x00000004, 0x00000004, 0x00000000, 0x0000001F, 0x00000025}, // L[3]
}, // T.state[ 9].w =  16
{ // R[10] abcde
{0x000000A8, 0x00000080, 0x00000005, 0x00000075, 0x00000028}, // L[0]
{0x00000012, 0x00000042, 0x00000080, 0x0000009E, 0x00000050}, // L[1]
{0x0000004C, 0x0000004E, 0x00000067, 0x00000024, 0x0000000A}, // L[2]
{0x00000004, 0x00000024, 0x00000025, 0x0000001F, 0x00000028}, // L[3]
}, // T.state[10].w =  20
{ // R[11] abcde
{0x00000028, 0x00000080, 0x00000005, 0x000000EA, 0x00000000}, // L[0]
{0x00000050, 0x00000042, 0x00000080, 0x00000076, 0x00000000}, // L[1]
{0x0000000A, 0x0000004E, 0x00000067, 0x00000071, 0x00000000}, // L[2]
{0x00000028, 0x00000024, 0x00000025, 0x000000B9, 0x00000000}, // L[3]
}, // T.state[11].w =  12
// T.w =  91
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 91 -> 90
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000012}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000002}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000040, 0x00000044}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000018, 0x00000043, 0x00000012, 0x0000000E, 0x000000EB}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000041, 0x00000002, 0x0000000E, 0x00000049}, // L[2]
{0x00000080, 0x00000088, 0x00000044, 0x00000040, 0x00000008}, // L[3]
}, // T.state[ 6].w =   9
{ // R[ 7] abcde
{0x000000EB, 0x00000043, 0x00000012, 0x0000002F, 0x00000041}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000049, 0x00000041, 0x00000002, 0x0000003A, 0x00000008}, // L[2]
{0x00000008, 0x00000088, 0x00000044, 0x00000042, 0x00000002}, // L[3]
}, // T.state[ 7].w =  12
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x000000EB, 0x00000001, 0x00000008, 0x00000042, 0x0000002A}, // L[0]
{0x00000080, 0x00000092, 0x00000002, 0x0000002F, 0x00000016}, // L[1]
{0x00000049, 0x00000015, 0x00000041, 0x00000004, 0x00000044}, // L[2]
{0x00000008, 0x00000004, 0x00000000, 0x0000003A, 0x00000004}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x0000002A, 0x00000001, 0x00000008, 0x00000034, 0x00000004}, // L[0]
{0x00000016, 0x00000092, 0x00000002, 0x0000009C, 0x00000082}, // L[1]
{0x00000044, 0x00000015, 0x00000041, 0x00000020, 0x00000023}, // L[2]
{0x00000004, 0x00000004, 0x00000000, 0x0000001F, 0x00000007}, // L[3]
}, // T.state[ 9].w =  16
{ // R[10] abcde
{0x0000002A, 0x000000A0, 0x00000004, 0x00000034, 0x0000008A}, // L[0]
{0x00000016, 0x00000002, 0x00000082, 0x0000009C, 0x00000010}, // L[1]
{0x00000044, 0x000000C6, 0x00000023, 0x00000020, 0x0000000A}, // L[2]
{0x00000004, 0x00000060, 0x00000007, 0x0000001F, 0x00000024}, // L[3]
}, // T.state[10].w =  17
{ // R[11] abcde
{0x0000008A, 0x000000A0, 0x00000004, 0x000000F5, 0x00000000}, // L[0]
{0x00000010, 0x00000002, 0x00000082, 0x00000064, 0x00000000}, // L[1]
{0x0000000A, 0x000000C6, 0x00000023, 0x00000051, 0x00000000}, // L[2]
{0x00000024, 0x00000060, 0x00000007, 0x000000D9, 0x00000000}, // L[3]
}, // T.state[11].w =  14
// T.w =  90
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 90 -> 89
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000004, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000012}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000E, 0x00000002}, // L[2]
{0x00000080, 0x00000000, 0x00000004, 0x00000040, 0x00000044}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000018, 0x00000043, 0x00000012, 0x0000000E, 0x000000EB}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000041, 0x00000002, 0x0000000E, 0x00000049}, // L[2]
{0x00000080, 0x00000088, 0x00000044, 0x00000040, 0x00000008}, // L[3]
}, // T.state[ 6].w =   9
{ // R[ 7] abcde
{0x000000EB, 0x00000043, 0x00000012, 0x0000002F, 0x00000041}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000049, 0x00000041, 0x00000002, 0x0000003A, 0x00000008}, // L[2]
{0x00000008, 0x00000088, 0x00000044, 0x00000042, 0x00000002}, // L[3]
}, // T.state[ 7].w =  12
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x000000EB, 0x00000001, 0x00000008, 0x00000042, 0x0000002A}, // L[0]
{0x00000080, 0x00000092, 0x00000002, 0x0000002F, 0x00000036}, // L[1]
{0x00000049, 0x00000015, 0x00000041, 0x00000004, 0x00000044}, // L[2]
{0x00000008, 0x00000004, 0x00000000, 0x0000003A, 0x00000004}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x0000002A, 0x00000001, 0x00000008, 0x00000034, 0x00000004}, // L[0]
{0x00000036, 0x00000092, 0x00000002, 0x0000008C, 0x00000082}, // L[1]
{0x00000044, 0x00000015, 0x00000041, 0x00000020, 0x00000021}, // L[2]
{0x00000004, 0x00000004, 0x00000000, 0x0000001F, 0x00000005}, // L[3]
}, // T.state[ 9].w =  16
{ // R[10] abcde
{0x0000002A, 0x000000A0, 0x00000004, 0x00000034, 0x0000008A}, // L[0]
{0x00000036, 0x00000002, 0x00000082, 0x0000008C, 0x00000010}, // L[1]
{0x00000044, 0x00000086, 0x00000021, 0x00000020, 0x00000042}, // L[2]
{0x00000004, 0x00000020, 0x00000005, 0x0000001F, 0x0000002C}, // L[3]
}, // T.state[10].w =  16
{ // R[11] abcde
{0x0000008A, 0x000000A0, 0x00000004, 0x000000F5, 0x00000000}, // L[0]
{0x00000010, 0x00000002, 0x00000082, 0x000000E4, 0x00000000}, // L[1]
{0x00000042, 0x00000086, 0x00000021, 0x00000013, 0x00000000}, // L[2]
{0x0000002C, 0x00000020, 0x00000005, 0x00000099, 0x00000000}, // L[3]
}, // T.state[11].w =  14
// T.w =  89
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 89 -> 84
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000020}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000020, 0x00000000, 0x00000010, 0x00000010}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000058}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000018, 0x000000C0, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000006, 0x00000010, 0x00000010, 0x00000066}, // L[2]
{0x00000090, 0x0000000B, 0x00000058, 0x00000048, 0x00000089}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000008, 0x00000018, 0x000000C0, 0x00000042, 0x00000082}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000066, 0x00000006, 0x00000010, 0x000000B3, 0x00000081}, // L[2]
{0x00000089, 0x0000000B, 0x00000058, 0x0000000E, 0x00000042}, // L[3]
}, // T.state[ 7].w =  10
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000081, 0x0000000E, 0x00000008}, // L[0]
{0x00000000, 0x0000000F, 0x00000042, 0x00000042, 0x00000001}, // L[1]
{0x00000066, 0x00000092, 0x00000082, 0x00000000, 0x00000014}, // L[2]
{0x00000089, 0x00000035, 0x00000000, 0x000000B3, 0x00000084}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000081, 0x00000003, 0x00000080}, // L[0]
{0x00000001, 0x0000000F, 0x00000042, 0x000000A1, 0x00000021}, // L[1]
{0x00000014, 0x00000092, 0x00000082, 0x0000000A, 0x00000088}, // L[2]
{0x00000084, 0x00000035, 0x00000000, 0x0000009B, 0x0000008D}, // L[3]
}, // T.state[ 9].w =  17
{ // R[10] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000003, 0x00000028}, // L[0]
{0x00000001, 0x000000C5, 0x00000021, 0x000000A1, 0x00000044}, // L[1]
{0x00000014, 0x00000043, 0x00000088, 0x0000000A, 0x00000071}, // L[2]
{0x00000084, 0x00000017, 0x0000008D, 0x0000009B, 0x0000009D}, // L[3]
}, // T.state[10].w =  16
{ // R[11] abcde
{0x00000028, 0x00000010, 0x00000080, 0x00000059, 0x00000000}, // L[0]
{0x00000044, 0x000000C5, 0x00000021, 0x0000002F, 0x00000000}, // L[1]
{0x00000071, 0x00000043, 0x00000088, 0x000000DB, 0x00000000}, // L[2]
{0x0000009D, 0x00000017, 0x0000008D, 0x00000030, 0x00000000}, // L[3]
}, // T.state[11].w =  16
// T.w =  84
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 84 -> 83
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000020}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000020, 0x00000000, 0x00000010, 0x00000010}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000058}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000018, 0x000000C0, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000006, 0x00000010, 0x00000010, 0x00000066}, // L[2]
{0x00000090, 0x0000000B, 0x00000058, 0x00000048, 0x00000089}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000008, 0x00000018, 0x000000C0, 0x00000042, 0x00000082}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000066, 0x00000006, 0x00000010, 0x000000B3, 0x00000081}, // L[2]
{0x00000089, 0x0000000B, 0x00000058, 0x0000000E, 0x00000042}, // L[3]
}, // T.state[ 7].w =  10
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000081, 0x0000000E, 0x00000008}, // L[0]
{0x00000000, 0x0000000F, 0x00000042, 0x00000042, 0x00000001}, // L[1]
{0x00000066, 0x00000092, 0x00000082, 0x00000000, 0x00000014}, // L[2]
{0x00000089, 0x00000035, 0x00000000, 0x000000B3, 0x00000084}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000081, 0x00000003, 0x00000080}, // L[0]
{0x00000001, 0x0000000F, 0x00000042, 0x000000A1, 0x00000021}, // L[1]
{0x00000014, 0x00000092, 0x00000082, 0x0000000A, 0x0000009C}, // L[2]
{0x00000084, 0x00000035, 0x00000000, 0x0000009B, 0x00000099}, // L[3]
}, // T.state[ 9].w =  17
{ // R[10] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000003, 0x00000008}, // L[0]
{0x00000001, 0x000000C5, 0x00000021, 0x000000A1, 0x00000044}, // L[1]
{0x00000014, 0x000000C1, 0x0000009C, 0x0000000A, 0x00000055}, // L[2]
{0x00000084, 0x00000095, 0x00000099, 0x0000009B, 0x00000011}, // L[3]
}, // T.state[10].w =  16
{ // R[11] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000058, 0x00000000}, // L[0]
{0x00000044, 0x000000C5, 0x00000021, 0x0000002F, 0x00000000}, // L[1]
{0x00000055, 0x000000C1, 0x0000009C, 0x000000FA, 0x00000000}, // L[2]
{0x00000011, 0x00000095, 0x00000099, 0x00000054, 0x00000000}, // L[3]
}, // T.state[11].w =  15
// T.w =  83
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 83 -> 82
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000020}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000020, 0x00000000, 0x00000010, 0x00000010}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000058}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000018, 0x000000C0, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000006, 0x00000010, 0x00000010, 0x00000066}, // L[2]
{0x00000090, 0x0000000B, 0x00000058, 0x00000048, 0x00000089}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000008, 0x00000018, 0x000000C0, 0x00000042, 0x00000082}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000066, 0x00000006, 0x00000010, 0x000000B3, 0x00000081}, // L[2]
{0x00000089, 0x0000000B, 0x00000058, 0x0000000E, 0x00000042}, // L[3]
}, // T.state[ 7].w =  10
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000081, 0x0000000E, 0x00000008}, // L[0]
{0x00000000, 0x0000000F, 0x00000042, 0x00000042, 0x00000001}, // L[1]
{0x00000066, 0x00000092, 0x00000082, 0x00000000, 0x00000014}, // L[2]
{0x00000089, 0x00000035, 0x00000000, 0x000000B3, 0x00000084}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000081, 0x00000003, 0x00000080}, // L[0]
{0x00000001, 0x0000000F, 0x00000042, 0x000000A1, 0x00000023}, // L[1]
{0x00000014, 0x00000092, 0x00000082, 0x0000000A, 0x00000098}, // L[2]
{0x00000084, 0x00000035, 0x00000000, 0x0000009B, 0x000000A9}, // L[3]
}, // T.state[ 9].w =  17
{ // R[10] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000003, 0x00000008}, // L[0]
{0x00000001, 0x00000085, 0x00000023, 0x000000A1, 0x00000084}, // L[1]
{0x00000014, 0x00000041, 0x00000098, 0x0000000A, 0x00000055}, // L[2]
{0x00000084, 0x00000093, 0x000000A9, 0x0000009B, 0x00000013}, // L[3]
}, // T.state[10].w =  16
{ // R[11] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000058, 0x00000000}, // L[0]
{0x00000084, 0x00000085, 0x00000023, 0x00000029, 0x00000000}, // L[1]
{0x00000055, 0x00000041, 0x00000098, 0x000000FA, 0x00000000}, // L[2]
{0x00000013, 0x00000093, 0x000000A9, 0x00000044, 0x00000000}, // L[3]
}, // T.state[11].w =  14
// T.w =  82
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 82 -> 81
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000020}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000020, 0x00000000, 0x00000010, 0x00000010}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000058}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000018, 0x000000C0, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000006, 0x00000010, 0x00000010, 0x00000066}, // L[2]
{0x00000090, 0x0000000B, 0x00000058, 0x00000048, 0x00000089}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000008, 0x00000018, 0x000000C0, 0x00000042, 0x00000082}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000066, 0x00000006, 0x00000010, 0x000000B3, 0x00000081}, // L[2]
{0x00000089, 0x0000000B, 0x00000058, 0x0000000E, 0x00000042}, // L[3]
}, // T.state[ 7].w =  10
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000081, 0x0000000E, 0x00000008}, // L[0]
{0x00000000, 0x0000000F, 0x00000042, 0x00000042, 0x00000001}, // L[1]
{0x00000066, 0x00000092, 0x00000082, 0x00000000, 0x00000014}, // L[2]
{0x00000089, 0x00000035, 0x00000000, 0x000000B3, 0x00000094}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000081, 0x00000003, 0x00000080}, // L[0]
{0x00000001, 0x0000000F, 0x00000042, 0x000000A1, 0x00000025}, // L[1]
{0x00000014, 0x00000092, 0x00000082, 0x0000000A, 0x00000088}, // L[2]
{0x00000094, 0x00000035, 0x00000000, 0x00000093, 0x00000093}, // L[3]
}, // T.state[ 9].w =  17
{ // R[10] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000003, 0x00000008}, // L[0]
{0x00000001, 0x00000045, 0x00000025, 0x000000A1, 0x00000044}, // L[1]
{0x00000014, 0x00000043, 0x00000088, 0x0000000A, 0x00000071}, // L[2]
{0x00000094, 0x000000D4, 0x00000093, 0x00000093, 0x00000040}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000058, 0x00000000}, // L[0]
{0x00000044, 0x00000045, 0x00000025, 0x0000002F, 0x00000000}, // L[1]
{0x00000071, 0x00000043, 0x00000088, 0x000000DB, 0x00000000}, // L[2]
{0x00000040, 0x000000D4, 0x00000093, 0x0000009E, 0x00000000}, // L[3]
}, // T.state[11].w =  14
// T.w =  81
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 81 -> 80
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000020}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000020, 0x00000000, 0x00000010, 0x00000010}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000058}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000018, 0x000000C0, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000006, 0x00000010, 0x00000010, 0x00000066}, // L[2]
{0x00000090, 0x0000000B, 0x00000058, 0x00000048, 0x00000089}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000008, 0x00000018, 0x000000C0, 0x00000042, 0x00000082}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000066, 0x00000006, 0x00000010, 0x000000B3, 0x00000081}, // L[2]
{0x00000089, 0x0000000B, 0x00000058, 0x0000000E, 0x00000042}, // L[3]
}, // T.state[ 7].w =  10
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000081, 0x0000000E, 0x00000008}, // L[0]
{0x00000000, 0x0000000F, 0x00000042, 0x00000042, 0x00000001}, // L[1]
{0x00000066, 0x00000092, 0x00000082, 0x00000000, 0x00000014}, // L[2]
{0x00000089, 0x00000035, 0x00000000, 0x000000B3, 0x00000094}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000081, 0x00000003, 0x00000080}, // L[0]
{0x00000001, 0x0000000F, 0x00000042, 0x000000A1, 0x00000023}, // L[1]
{0x00000014, 0x00000092, 0x00000082, 0x0000000A, 0x0000009C}, // L[2]
{0x00000094, 0x00000035, 0x00000000, 0x00000093, 0x00000091}, // L[3]
}, // T.state[ 9].w =  17
{ // R[10] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000003, 0x00000008}, // L[0]
{0x00000001, 0x00000085, 0x00000023, 0x000000A1, 0x00000086}, // L[1]
{0x00000014, 0x000000C1, 0x0000009C, 0x0000000A, 0x00000057}, // L[2]
{0x00000094, 0x00000094, 0x00000091, 0x00000093, 0x00000000}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000058, 0x00000000}, // L[0]
{0x00000086, 0x00000085, 0x00000023, 0x00000039, 0x00000000}, // L[1]
{0x00000057, 0x000000C1, 0x0000009C, 0x000000EA, 0x00000000}, // L[2]
{0x00000000, 0x00000094, 0x00000091, 0x0000009C, 0x00000000}, // L[3]
}, // T.state[11].w =  13
// T.w =  80
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 80 -> 79
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000020}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000020, 0x00000000, 0x00000010, 0x00000010}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000058}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000018, 0x000000C0, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000006, 0x00000010, 0x00000010, 0x00000066}, // L[2]
{0x00000090, 0x0000000B, 0x00000058, 0x00000048, 0x00000089}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000008, 0x00000018, 0x000000C0, 0x00000042, 0x00000082}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000066, 0x00000006, 0x00000010, 0x000000B3, 0x00000081}, // L[2]
{0x00000089, 0x0000000B, 0x00000058, 0x0000000E, 0x00000042}, // L[3]
}, // T.state[ 7].w =  10
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000081, 0x0000000E, 0x00000008}, // L[0]
{0x00000000, 0x0000000F, 0x00000042, 0x00000042, 0x00000001}, // L[1]
{0x00000066, 0x00000092, 0x00000082, 0x00000000, 0x00000014}, // L[2]
{0x00000089, 0x00000035, 0x00000000, 0x000000B3, 0x00000094}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000081, 0x00000003, 0x00000080}, // L[0]
{0x00000001, 0x0000000F, 0x00000042, 0x000000A1, 0x00000027}, // L[1]
{0x00000014, 0x00000092, 0x00000082, 0x0000000A, 0x0000009C}, // L[2]
{0x00000094, 0x00000035, 0x00000000, 0x00000093, 0x000000B1}, // L[3]
}, // T.state[ 9].w =  17
{ // R[10] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000003, 0x00000008}, // L[0]
{0x00000001, 0x00000005, 0x00000027, 0x000000A1, 0x00000004}, // L[1]
{0x00000014, 0x000000C1, 0x0000009C, 0x0000000A, 0x00000057}, // L[2]
{0x00000094, 0x00000090, 0x000000B1, 0x00000093, 0x00000004}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000058, 0x00000000}, // L[0]
{0x00000004, 0x00000005, 0x00000027, 0x0000002D, 0x00000000}, // L[1]
{0x00000057, 0x000000C1, 0x0000009C, 0x000000EA, 0x00000000}, // L[2]
{0x00000004, 0x00000090, 0x000000B1, 0x000000BC, 0x00000000}, // L[3]
}, // T.state[11].w =  12
// T.w =  79
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 79 -> 78
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000020}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000020, 0x00000000, 0x00000010, 0x00000010}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000058}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000018, 0x000000C0, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000006, 0x00000010, 0x00000010, 0x00000066}, // L[2]
{0x00000090, 0x0000000B, 0x00000058, 0x00000048, 0x00000089}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000008, 0x00000018, 0x000000C0, 0x00000042, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000066, 0x00000006, 0x00000010, 0x000000B3, 0x00000081}, // L[2]
{0x00000089, 0x0000000B, 0x00000058, 0x0000000E, 0x0000006A}, // L[3]
}, // T.state[ 7].w =  10
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000081, 0x0000000E, 0x00000008}, // L[0]
{0x00000000, 0x0000000F, 0x0000006A, 0x00000042, 0x00000001}, // L[1]
{0x00000066, 0x000000C2, 0x00000002, 0x00000000, 0x00000020}, // L[2]
{0x00000089, 0x00000034, 0x00000000, 0x000000B3, 0x00000085}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000081, 0x00000003, 0x00000080}, // L[0]
{0x00000001, 0x0000000F, 0x0000006A, 0x000000A1, 0x0000000D}, // L[1]
{0x00000020, 0x000000C2, 0x00000002, 0x00000010, 0x00000012}, // L[2]
{0x00000085, 0x00000034, 0x00000000, 0x0000001B, 0x00000039}, // L[3]
}, // T.state[ 9].w =  16
{ // R[10] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000003, 0x00000018}, // L[0]
{0x00000001, 0x00000040, 0x0000000D, 0x000000A1, 0x00000043}, // L[1]
{0x00000020, 0x0000001A, 0x00000012, 0x00000010, 0x0000000A}, // L[2]
{0x00000085, 0x000000A1, 0x00000039, 0x0000001B, 0x00000024}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000018, 0x00000010, 0x00000080, 0x000000D8, 0x00000000}, // L[0]
{0x00000043, 0x00000040, 0x0000000D, 0x00000017, 0x00000000}, // L[1]
{0x0000000A, 0x0000001A, 0x00000012, 0x000000D0, 0x00000000}, // L[2]
{0x00000024, 0x000000A1, 0x00000039, 0x000000F9, 0x00000000}, // L[3]
}, // T.state[11].w =  12
// T.w =  78
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 78 -> 77
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000020}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000020, 0x00000000, 0x00000010, 0x00000010}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000058}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000018, 0x000000C0, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000006, 0x00000010, 0x00000010, 0x00000066}, // L[2]
{0x00000090, 0x0000000B, 0x00000058, 0x00000048, 0x00000089}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000008, 0x00000018, 0x000000C0, 0x00000042, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000066, 0x00000006, 0x00000010, 0x000000B3, 0x00000081}, // L[2]
{0x00000089, 0x0000000B, 0x00000058, 0x0000000E, 0x0000006A}, // L[3]
}, // T.state[ 7].w =  10
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000081, 0x0000000E, 0x00000008}, // L[0]
{0x00000000, 0x0000000F, 0x0000006A, 0x00000042, 0x00000001}, // L[1]
{0x00000066, 0x000000C2, 0x00000002, 0x00000000, 0x00000020}, // L[2]
{0x00000089, 0x00000034, 0x00000000, 0x000000B3, 0x00000085}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000081, 0x00000003, 0x00000080}, // L[0]
{0x00000001, 0x0000000F, 0x0000006A, 0x000000A1, 0x0000000F}, // L[1]
{0x00000020, 0x000000C2, 0x00000002, 0x00000010, 0x00000012}, // L[2]
{0x00000085, 0x00000034, 0x00000000, 0x0000001B, 0x00000019}, // L[3]
}, // T.state[ 9].w =  16
{ // R[10] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000003, 0x00000008}, // L[0]
{0x00000001, 0x00000000, 0x0000000F, 0x000000A1, 0x00000001}, // L[1]
{0x00000020, 0x0000001A, 0x00000012, 0x00000010, 0x0000000A}, // L[2]
{0x00000085, 0x000000A5, 0x00000019, 0x0000001B, 0x00000022}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000058, 0x00000000}, // L[0]
{0x00000001, 0x00000000, 0x0000000F, 0x00000005, 0x00000000}, // L[1]
{0x0000000A, 0x0000001A, 0x00000012, 0x000000D0, 0x00000000}, // L[2]
{0x00000022, 0x000000A5, 0x00000019, 0x000000C9, 0x00000000}, // L[3]
}, // T.state[11].w =  11
// T.w =  77
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 77 -> 76
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000020}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000020, 0x00000000, 0x00000010, 0x00000010}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000058}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000018, 0x000000C0, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000006, 0x00000010, 0x00000010, 0x00000066}, // L[2]
{0x00000090, 0x0000000B, 0x00000058, 0x00000048, 0x00000089}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000008, 0x00000018, 0x000000C0, 0x00000042, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000066, 0x00000006, 0x00000010, 0x000000B3, 0x00000085}, // L[2]
{0x00000089, 0x0000000B, 0x00000058, 0x0000000E, 0x00000062}, // L[3]
}, // T.state[ 7].w =  10
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000085, 0x0000000E, 0x00000008}, // L[0]
{0x00000000, 0x00000007, 0x00000062, 0x00000042, 0x00000001}, // L[1]
{0x00000066, 0x000000D2, 0x00000002, 0x00000000, 0x00000010}, // L[2]
{0x00000089, 0x00000034, 0x00000000, 0x000000B3, 0x000000B7}, // L[3]
}, // T.state[ 8].w =  13
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000085, 0x00000003, 0x00000080}, // L[0]
{0x00000001, 0x00000007, 0x00000062, 0x000000A1, 0x00000007}, // L[1]
{0x00000010, 0x000000D2, 0x00000002, 0x00000008, 0x0000001A}, // L[2]
{0x000000B7, 0x00000034, 0x00000000, 0x00000002, 0x00000002}, // L[3]
}, // T.state[ 9].w =  16
{ // R[10] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000003, 0x00000008}, // L[0]
{0x00000001, 0x00000000, 0x00000007, 0x000000A1, 0x00000001}, // L[1]
{0x00000010, 0x00000019, 0x0000001A, 0x00000008, 0x00000009}, // L[2]
{0x000000B7, 0x000000C6, 0x00000002, 0x00000002, 0x00000019}, // L[3]
}, // T.state[10].w =  12
{ // R[11] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000058, 0x00000000}, // L[0]
{0x00000001, 0x00000000, 0x00000007, 0x00000005, 0x00000000}, // L[1]
{0x00000009, 0x00000019, 0x0000001A, 0x00000008, 0x00000000}, // L[2]
{0x00000019, 0x000000C6, 0x00000002, 0x000000D8, 0x00000000}, // L[3]
}, // T.state[11].w =  13
// T.w =  76
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 76 -> 75
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000008, 0x0000000C, 0x00000000, 0x00000008}, // L[1]
{0x00000080, 0x00000018, 0x00000000, 0x00000000, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000002, 0x00000002}, // L[0]
{0x00000008, 0x00000008, 0x0000000C, 0x00000004, 0x00000000}, // L[1]
{0x00000088, 0x00000018, 0x00000000, 0x00000044, 0x0000004C}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000002}, // L[3]
}, // T.state[ 5].w =   4
{ // R[ 6] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000040}, // L[0]
{0x00000008, 0x00000001, 0x00000000, 0x00000004, 0x00000019}, // L[1]
{0x00000088, 0x0000008A, 0x0000004C, 0x00000044, 0x00000002}, // L[2]
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000040}, // L[3]
}, // T.state[ 6].w =   7
{ // R[ 7] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000012, 0x00000010}, // L[0]
{0x00000019, 0x00000001, 0x00000000, 0x000000E8, 0x00000028}, // L[1]
{0x00000002, 0x0000008A, 0x0000004C, 0x00000032, 0x00000006}, // L[2]
{0x00000040, 0x00000040, 0x00000002, 0x00000012, 0x00000010}, // L[3]
}, // T.state[ 7].w =   7
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000052, 0x00000006, 0x00000012, 0x00000092}, // L[0]
{0x00000019, 0x00000019, 0x00000010, 0x00000012, 0x00000000}, // L[1]
{0x00000002, 0x000000A0, 0x00000010, 0x000000E8, 0x000000A2}, // L[2]
{0x00000040, 0x000000A0, 0x00000028, 0x00000032, 0x00000020}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000092, 0x00000052, 0x00000006, 0x00000040, 0x0000004A}, // L[0]
{0x00000000, 0x00000019, 0x00000010, 0x00000009, 0x00000009}, // L[1]
{0x000000A2, 0x000000A0, 0x00000010, 0x00000025, 0x00000015}, // L[2]
{0x00000020, 0x000000A0, 0x00000028, 0x00000009, 0x00000061}, // L[3]
}, // T.state[ 9].w =  12
{ // R[10] abcde
{0x00000092, 0x00000003, 0x0000004A, 0x00000040, 0x00000091}, // L[0]
{0x00000000, 0x00000002, 0x00000009, 0x00000009, 0x00000002}, // L[1]
{0x000000A2, 0x000000B6, 0x00000015, 0x00000025, 0x00000014}, // L[2]
{0x00000020, 0x00000038, 0x00000061, 0x00000009, 0x00000048}, // L[3]
}, // T.state[10].w =  13
{ // R[11] abcde
{0x00000091, 0x00000003, 0x0000004A, 0x0000008E, 0x00000000}, // L[0]
{0x00000002, 0x00000002, 0x00000009, 0x00000058, 0x00000000}, // L[1]
{0x00000014, 0x000000B6, 0x00000015, 0x00000089, 0x00000000}, // L[2]
{0x00000048, 0x00000038, 0x00000061, 0x0000000A, 0x00000000}, // L[3]
}, // T.state[11].w =  13
// T.w =  75
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 75 -> 73
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000008, 0x0000000C, 0x00000000, 0x00000008}, // L[1]
{0x00000080, 0x00000018, 0x00000000, 0x00000000, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000002, 0x00000002}, // L[0]
{0x00000008, 0x00000008, 0x0000000C, 0x00000004, 0x00000000}, // L[1]
{0x00000088, 0x00000018, 0x00000000, 0x00000044, 0x0000004C}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000002}, // L[3]
}, // T.state[ 5].w =   4
{ // R[ 6] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000040}, // L[0]
{0x00000008, 0x00000001, 0x00000000, 0x00000004, 0x00000019}, // L[1]
{0x00000088, 0x0000008A, 0x0000004C, 0x00000044, 0x00000002}, // L[2]
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000040}, // L[3]
}, // T.state[ 6].w =   7
{ // R[ 7] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000012, 0x00000010}, // L[0]
{0x00000019, 0x00000001, 0x00000000, 0x000000E8, 0x00000028}, // L[1]
{0x00000002, 0x0000008A, 0x0000004C, 0x00000032, 0x00000006}, // L[2]
{0x00000040, 0x00000040, 0x00000002, 0x00000012, 0x00000010}, // L[3]
}, // T.state[ 7].w =   7
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000052, 0x00000006, 0x00000012, 0x00000092}, // L[0]
{0x00000019, 0x00000019, 0x00000010, 0x00000012, 0x00000000}, // L[1]
{0x00000002, 0x000000A0, 0x00000010, 0x000000E8, 0x000000A2}, // L[2]
{0x00000040, 0x000000A0, 0x00000028, 0x00000032, 0x00000020}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000092, 0x00000052, 0x00000006, 0x00000040, 0x0000004A}, // L[0]
{0x00000000, 0x00000019, 0x00000010, 0x00000009, 0x00000009}, // L[1]
{0x000000A2, 0x000000A0, 0x00000010, 0x00000025, 0x00000035}, // L[2]
{0x00000020, 0x000000A0, 0x00000028, 0x00000009, 0x00000021}, // L[3]
}, // T.state[ 9].w =  12
{ // R[10] abcde
{0x00000092, 0x00000003, 0x0000004A, 0x00000040, 0x00000091}, // L[0]
{0x00000000, 0x00000002, 0x00000009, 0x00000009, 0x00000002}, // L[1]
{0x000000A2, 0x000000B2, 0x00000035, 0x00000025, 0x00000010}, // L[2]
{0x00000020, 0x00000030, 0x00000021, 0x00000009, 0x00000010}, // L[3]
}, // T.state[10].w =  13
{ // R[11] abcde
{0x00000091, 0x00000003, 0x0000004A, 0x0000008E, 0x00000000}, // L[0]
{0x00000002, 0x00000002, 0x00000009, 0x00000058, 0x00000000}, // L[1]
{0x00000010, 0x000000B2, 0x00000035, 0x000000A9, 0x00000000}, // L[2]
{0x00000010, 0x00000030, 0x00000021, 0x000000C8, 0x00000000}, // L[3]
}, // T.state[11].w =  11
// T.w =  73
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 73 -> 72
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000008, 0x0000000C, 0x00000000, 0x00000008}, // L[1]
{0x00000080, 0x00000018, 0x00000000, 0x00000000, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000002, 0x00000002}, // L[0]
{0x00000008, 0x00000008, 0x0000000C, 0x00000004, 0x00000000}, // L[1]
{0x00000088, 0x00000018, 0x00000000, 0x00000044, 0x0000004C}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000002}, // L[3]
}, // T.state[ 5].w =   4
{ // R[ 6] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000040}, // L[0]
{0x00000008, 0x00000001, 0x00000000, 0x00000004, 0x00000019}, // L[1]
{0x00000088, 0x0000008A, 0x0000004C, 0x00000044, 0x00000002}, // L[2]
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000040}, // L[3]
}, // T.state[ 6].w =   7
{ // R[ 7] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000012, 0x00000010}, // L[0]
{0x00000019, 0x00000001, 0x00000000, 0x000000E8, 0x00000028}, // L[1]
{0x00000002, 0x0000008A, 0x0000004C, 0x00000032, 0x00000006}, // L[2]
{0x00000040, 0x00000040, 0x00000002, 0x00000012, 0x00000010}, // L[3]
}, // T.state[ 7].w =   7
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000052, 0x00000006, 0x00000012, 0x00000092}, // L[0]
{0x00000019, 0x00000019, 0x00000010, 0x00000012, 0x00000000}, // L[1]
{0x00000002, 0x000000A0, 0x00000010, 0x000000E8, 0x000000A2}, // L[2]
{0x00000040, 0x000000A0, 0x00000028, 0x00000032, 0x00000020}, // L[3]
}, // T.state[ 8].w =  14
{ // R[ 9] abcde
{0x00000092, 0x00000052, 0x00000006, 0x00000040, 0x000000CA}, // L[0]
{0x00000000, 0x00000019, 0x00000010, 0x00000009, 0x00000019}, // L[1]
{0x000000A2, 0x000000A0, 0x00000010, 0x00000025, 0x00000035}, // L[2]
{0x00000020, 0x000000A0, 0x00000028, 0x00000009, 0x00000021}, // L[3]
}, // T.state[ 9].w =  12
{ // R[10] abcde
{0x00000092, 0x00000013, 0x000000CA, 0x00000040, 0x00000081}, // L[0]
{0x00000000, 0x00000000, 0x00000019, 0x00000009, 0x00000000}, // L[1]
{0x000000A2, 0x000000B2, 0x00000035, 0x00000025, 0x00000014}, // L[2]
{0x00000020, 0x00000030, 0x00000021, 0x00000009, 0x00000010}, // L[3]
}, // T.state[10].w =  13
{ // R[11] abcde
{0x00000081, 0x00000013, 0x000000CA, 0x0000000E, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000019, 0x00000048, 0x00000000}, // L[1]
{0x00000014, 0x000000B2, 0x00000035, 0x00000089, 0x00000000}, // L[2]
{0x00000010, 0x00000030, 0x00000021, 0x000000C8, 0x00000000}, // L[3]
}, // T.state[11].w =  10
// T.w =  72
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 72 -> 71
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000008}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000008, 0x00000008, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000008, 0x00000081, 0x00000004, 0x00000004, 0x00000099}, // L[2]
{0x00000080, 0x00000018, 0x000000C0, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000012, 0x00000010}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000099, 0x00000081, 0x00000004, 0x000000EC, 0x000000A0}, // L[2]
{0x00000088, 0x00000018, 0x000000C0, 0x00000046, 0x0000000A}, // L[3]
}, // T.state[ 7].w =   8
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000001, 0x000000A0, 0x00000046, 0x00000041}, // L[0]
{0x00000080, 0x00000042, 0x0000000A, 0x00000012, 0x00000046}, // L[1]
{0x00000099, 0x00000024, 0x00000010, 0x00000004, 0x000000C5}, // L[2]
{0x00000088, 0x000000A0, 0x00000000, 0x000000EC, 0x00000028}, // L[3]
}, // T.state[ 8].w =  12
{ // R[ 9] abcde
{0x00000041, 0x00000001, 0x000000A0, 0x00000083, 0x00000023}, // L[0]
{0x00000046, 0x00000042, 0x0000000A, 0x0000002A, 0x00000070}, // L[1]
{0x000000C5, 0x00000024, 0x00000010, 0x000000E0, 0x00000030}, // L[2]
{0x00000028, 0x000000A0, 0x00000000, 0x00000062, 0x000000E2}, // L[3]
}, // T.state[ 9].w =  14
{ // R[10] abcde
{0x00000041, 0x00000044, 0x00000023, 0x00000083, 0x00000005}, // L[0]
{0x00000046, 0x00000046, 0x00000070, 0x0000002A, 0x00000000}, // L[1]
{0x000000C5, 0x00000082, 0x00000030, 0x000000E0, 0x00000041}, // L[2]
{0x00000028, 0x00000048, 0x000000E2, 0x00000062, 0x00000030}, // L[3]
}, // T.state[10].w =  14
{ // R[11] abcde
{0x00000005, 0x00000044, 0x00000023, 0x00000034, 0x00000000}, // L[0]
{0x00000000, 0x00000046, 0x00000070, 0x00000051, 0x00000000}, // L[1]
{0x00000041, 0x00000082, 0x00000030, 0x0000000D, 0x00000000}, // L[2]
{0x00000030, 0x00000048, 0x000000E2, 0x00000092, 0x00000000}, // L[3]
}, // T.state[11].w =  14
// T.w =  71
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 71 -> 70
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000020}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000020, 0x00000000, 0x00000010, 0x00000010}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000048, 0x00000048}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000018}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000020, 0x00000006, 0x00000010, 0x00000010, 0x00000062}, // L[2]
{0x00000090, 0x00000009, 0x00000048, 0x00000048, 0x00000089}, // L[3]
}, // T.state[ 6].w =   4
{ // R[ 7] abcde
{0x00000018, 0x00000008, 0x00000040, 0x000000C2, 0x00000082}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000062, 0x00000006, 0x00000010, 0x00000093, 0x00000085}, // L[2]
{0x00000089, 0x00000009, 0x00000048, 0x0000000E, 0x0000005A}, // L[3]
}, // T.state[ 7].w =   8
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000018, 0x00000000, 0x00000085, 0x0000000E, 0x00000008}, // L[0]
{0x00000000, 0x00000007, 0x0000005A, 0x000000C2, 0x00000001}, // L[1]
{0x00000062, 0x000000A6, 0x00000082, 0x00000000, 0x00000000}, // L[2]
{0x00000089, 0x00000015, 0x00000000, 0x00000093, 0x00000094}, // L[3]
}, // T.state[ 8].w =  11
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000085, 0x00000003, 0x00000080}, // L[0]
{0x00000001, 0x00000007, 0x0000005A, 0x000000E1, 0x0000000F}, // L[1]
{0x00000000, 0x000000A6, 0x00000082, 0x00000000, 0x00000086}, // L[2]
{0x00000094, 0x00000015, 0x00000000, 0x00000083, 0x00000085}, // L[3]
}, // T.state[ 9].w =  15
{ // R[10] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000003, 0x00000008}, // L[0]
{0x00000001, 0x00000001, 0x0000000F, 0x000000E1, 0x00000000}, // L[1]
{0x00000000, 0x00000004, 0x00000086, 0x00000000, 0x00000004}, // L[2]
{0x00000094, 0x00000012, 0x00000085, 0x00000083, 0x00000082}, // L[3]
}, // T.state[10].w =  16
{ // R[11] abcde
{0x00000008, 0x00000010, 0x00000080, 0x00000058, 0x00000000}, // L[0]
{0x00000000, 0x00000001, 0x0000000F, 0x0000000F, 0x00000000}, // L[1]
{0x00000004, 0x00000004, 0x00000086, 0x00000020, 0x00000000}, // L[2]
{0x00000082, 0x00000012, 0x00000085, 0x00000008, 0x00000000}, // L[3]
}, // T.state[11].w =   8
// T.w =  70
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 70 -> 53
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000013}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   2
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000013, 0x00000030, 0x00000080, 0x0000009C, 0x00000014}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   4
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000014, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x00000048, 0x00000000, 0x00000000, 0x000000F8}, // L[1]
{0x00000013, 0x00000000, 0x00000000, 0x00000080, 0x00000015}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x0000009C, 0x00000000}, // L[3]
}, // T.state[ 8].w =   5
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000014, 0x00000010, 0x00000004}, // L[0]
{0x000000F8, 0x00000048, 0x00000000, 0x0000007C, 0x00000004}, // L[1]
{0x00000015, 0x00000000, 0x00000000, 0x000000CA, 0x0000004A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x0000004E, 0x00000042}, // L[3]
}, // T.state[ 9].w =   7
{ // R[10] abcde
{0x00000020, 0x00000084, 0x00000004, 0x00000010, 0x000000A4}, // L[0]
{0x000000F8, 0x00000089, 0x00000004, 0x0000007C, 0x00000001}, // L[1]
{0x00000015, 0x00000049, 0x0000004A, 0x000000CA, 0x00000044}, // L[2]
{0x00000000, 0x00000048, 0x00000042, 0x0000004E, 0x00000048}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x000000A4, 0x00000084, 0x00000004, 0x000000A5, 0x00000000}, // L[0]
{0x00000001, 0x00000089, 0x00000004, 0x000000EB, 0x00000000}, // L[1]
{0x00000044, 0x00000049, 0x0000004A, 0x00000074, 0x00000000}, // L[2]
{0x00000048, 0x00000048, 0x00000042, 0x00000030, 0x00000000}, // L[3]
}, // T.state[11].w =  16
// T.w =  53
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 53 -> 52
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000013}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   2
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000013, 0x00000030, 0x00000080, 0x0000009C, 0x00000014}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   4
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000014, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x00000048, 0x00000000, 0x00000000, 0x000000F8}, // L[1]
{0x00000013, 0x00000000, 0x00000000, 0x00000080, 0x00000015}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x0000009C, 0x00000000}, // L[3]
}, // T.state[ 8].w =   5
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000014, 0x00000010, 0x00000024}, // L[0]
{0x000000F8, 0x00000048, 0x00000000, 0x0000007C, 0x00000004}, // L[1]
{0x00000015, 0x00000000, 0x00000000, 0x000000CA, 0x0000004A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x0000004E, 0x00000042}, // L[3]
}, // T.state[ 9].w =   7
{ // R[10] abcde
{0x00000020, 0x00000080, 0x00000024, 0x00000010, 0x000000A0}, // L[0]
{0x000000F8, 0x00000089, 0x00000004, 0x0000007C, 0x00000003}, // L[1]
{0x00000015, 0x00000049, 0x0000004A, 0x000000CA, 0x00000046}, // L[2]
{0x00000000, 0x00000048, 0x00000042, 0x0000004E, 0x00000048}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x000000A0, 0x00000080, 0x00000024, 0x00000085, 0x00000000}, // L[0]
{0x00000003, 0x00000089, 0x00000004, 0x000000FB, 0x00000000}, // L[1]
{0x00000046, 0x00000049, 0x0000004A, 0x00000064, 0x00000000}, // L[2]
{0x00000048, 0x00000048, 0x00000042, 0x00000030, 0x00000000}, // L[3]
}, // T.state[11].w =  15
// T.w =  52
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 52 -> 44
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000013}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   2
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000013, 0x00000030, 0x00000080, 0x0000009C, 0x00000034}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   4
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000034, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x00000008, 0x00000000, 0x00000000, 0x00000008}, // L[1]
{0x00000013, 0x00000000, 0x00000000, 0x00000080, 0x00000011}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x0000009C, 0x00000000}, // L[3]
}, // T.state[ 8].w =   5
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000034, 0x00000010, 0x00000004}, // L[0]
{0x00000008, 0x00000008, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x00000011, 0x00000000, 0x00000000, 0x000000C8, 0x00000048}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x0000004E, 0x000000C2}, // L[3]
}, // T.state[ 9].w =   6
{ // R[10] abcde
{0x00000020, 0x00000084, 0x00000004, 0x00000010, 0x000000A4}, // L[0]
{0x00000008, 0x00000081, 0x00000004, 0x00000004, 0x00000089}, // L[1]
{0x00000011, 0x00000009, 0x00000048, 0x000000C8, 0x00000018}, // L[2]
{0x00000000, 0x00000058, 0x000000C2, 0x0000004E, 0x00000068}, // L[3]
}, // T.state[10].w =  11
{ // R[11] abcde
{0x000000A4, 0x00000084, 0x00000004, 0x000000A5, 0x00000000}, // L[0]
{0x00000089, 0x00000081, 0x00000004, 0x0000006C, 0x00000000}, // L[1]
{0x00000018, 0x00000009, 0x00000048, 0x00000086, 0x00000000}, // L[2]
{0x00000068, 0x00000058, 0x000000C2, 0x00000031, 0x00000000}, // L[3]
}, // T.state[11].w =  12
// T.w =  44
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 44 -> 42
#if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 12
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000013}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   2
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000013, 0x00000030, 0x00000080, 0x0000009C, 0x00000034}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   4
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000034, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x00000008, 0x00000000, 0x00000000, 0x00000008}, // L[1]
{0x00000013, 0x00000000, 0x00000000, 0x00000080, 0x00000011}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x0000009C, 0x00000000}, // L[3]
}, // T.state[ 8].w =   5
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000034, 0x00000010, 0x00000024}, // L[0]
{0x00000008, 0x00000008, 0x00000000, 0x00000004, 0x00000004}, // L[1]
{0x00000011, 0x00000000, 0x00000000, 0x000000C8, 0x00000048}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x0000004E, 0x00000042}, // L[3]
}, // T.state[ 9].w =   6
{ // R[10] abcde
{0x00000020, 0x00000080, 0x00000024, 0x00000010, 0x000000E0}, // L[0]
{0x00000008, 0x00000081, 0x00000004, 0x00000004, 0x00000089}, // L[1]
{0x00000011, 0x00000009, 0x00000048, 0x000000C8, 0x0000000A}, // L[2]
{0x00000000, 0x00000048, 0x00000042, 0x0000004E, 0x00000058}, // L[3]
}, // T.state[10].w =  11
{ // R[11] abcde
{0x000000E0, 0x00000080, 0x00000024, 0x00000087, 0x00000000}, // L[0]
{0x00000089, 0x00000081, 0x00000004, 0x0000006C, 0x00000000}, // L[1]
{0x0000000A, 0x00000009, 0x00000048, 0x00000016, 0x00000000}, // L[2]
{0x00000058, 0x00000048, 0x00000042, 0x000000B0, 0x00000000}, // L[3]
}, // T.state[11].w =  10
// T.w =  42
};
#endif // #if 1 // WORD_SIZE 8 nrounds 11 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

[./tests/norx-best-diff-search-tests.cc:3372] norx_print_bounds_file(): Print bounds for first 11 rounds:
B[ 0]  0
B[ 1]  1
B[ 2]  2
B[ 3]  3
B[ 4]  4
B[ 5]  5
B[ 6]  6
B[ 7]  8
B[ 8] 11
B[ 9] 26
B[10] 42

[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 142 -> 141
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000080, 0x00000030}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x00000004}, // L[2]
{0x00000030, 0x00000010, 0x00000000, 0x00000085, 0x00000085}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000085, 0x00000000}, // L[0]
{0x00000000, 0x00000028, 0x00000085, 0x00000000, 0x00000038}, // L[1]
{0x00000010, 0x0000002B, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x00000084, 0x00000010}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x000000C2, 0x00000042}, // L[0]
{0x00000038, 0x00000028, 0x00000085, 0x0000001C, 0x000000A1}, // L[1]
{0x00000009, 0x0000002B, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x0000004A, 0x0000004A}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x000000C2, 0x00000048}, // L[0]
{0x00000038, 0x00000031, 0x000000A1, 0x0000001C, 0x0000000B}, // L[1]
{0x00000009, 0x000000F5, 0x00000084, 0x00000084, 0x00000006}, // L[2]
{0x00000010, 0x00000049, 0x0000004A, 0x0000004A, 0x000000CB}, // L[3]
}, // T.state[ 6].w =  14
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000054, 0x00000012}, // L[0]
{0x0000000B, 0x00000031, 0x000000A1, 0x000000B8, 0x00000009}, // L[1]
{0x00000006, 0x000000F5, 0x00000084, 0x00000014, 0x00000090}, // L[2]
{0x000000CB, 0x00000049, 0x0000004A, 0x0000000C, 0x000000C2}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000070, 0x00000090, 0x0000000C, 0x00000008}, // L[0]
{0x0000000B, 0x000000CA, 0x000000C2, 0x00000054, 0x000000C1}, // L[1]
{0x00000006, 0x00000017, 0x00000012, 0x000000B8, 0x00000011}, // L[2]
{0x000000CB, 0x000000B4, 0x00000009, 0x00000014, 0x00000005}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000008, 0x00000070, 0x00000090, 0x00000002, 0x00000092}, // L[0]
{0x000000C1, 0x000000CA, 0x000000C2, 0x000000CA, 0x00000018}, // L[1]
{0x00000011, 0x00000017, 0x00000012, 0x000000D4, 0x00000042}, // L[2]
{0x00000005, 0x000000B4, 0x00000009, 0x00000088, 0x00000081}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000008, 0x0000005C, 0x00000092, 0x00000002, 0x00000044}, // L[0]
{0x000000C1, 0x0000005A, 0x00000018, 0x000000CA, 0x000000A9}, // L[1]
{0x00000011, 0x000000AA, 0x00000042, 0x000000D4, 0x000000C9}, // L[2]
{0x00000005, 0x000000A6, 0x00000081, 0x00000088, 0x000000A3}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000044, 0x0000005C, 0x00000092, 0x00000032, 0x00000080}, // L[0]
{0x000000A9, 0x0000005A, 0x00000018, 0x0000001B, 0x00000001}, // L[1]
{0x000000C9, 0x000000AA, 0x00000042, 0x000000E8, 0x0000006A}, // L[2]
{0x000000A3, 0x000000A6, 0x00000081, 0x00000059, 0x00000048}, // L[3]
}, // T.state[11].w =  21
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000044, 0x000000B6, 0x0000006A, 0x00000059, 0x00000000}, // L[0]
{0x000000A9, 0x00000081, 0x00000048, 0x00000032, 0x00000000}, // L[1]
{0x000000C9, 0x000000DD, 0x00000080, 0x0000001B, 0x00000000}, // L[2]
{0x000000A3, 0x000000B9, 0x00000001, 0x000000E8, 0x00000000}, // L[3]
}, // T.state[12].w =  18
// T.w = 141
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 141 -> 140
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000080, 0x00000030}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x00000004}, // L[2]
{0x00000030, 0x00000010, 0x00000000, 0x00000085, 0x00000085}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000085, 0x00000000}, // L[0]
{0x00000000, 0x00000028, 0x00000085, 0x00000000, 0x00000038}, // L[1]
{0x00000010, 0x0000002B, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x00000084, 0x00000010}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x000000C2, 0x00000042}, // L[0]
{0x00000038, 0x00000028, 0x00000085, 0x0000001C, 0x000000A1}, // L[1]
{0x00000009, 0x0000002B, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x0000004A, 0x0000004A}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x000000C2, 0x00000048}, // L[0]
{0x00000038, 0x00000031, 0x000000A1, 0x0000001C, 0x0000000B}, // L[1]
{0x00000009, 0x000000F5, 0x00000084, 0x00000084, 0x00000006}, // L[2]
{0x00000010, 0x00000049, 0x0000004A, 0x0000004A, 0x000000CB}, // L[3]
}, // T.state[ 6].w =  14
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000054, 0x00000012}, // L[0]
{0x0000000B, 0x00000031, 0x000000A1, 0x000000B8, 0x00000009}, // L[1]
{0x00000006, 0x000000F5, 0x00000084, 0x00000014, 0x00000090}, // L[2]
{0x000000CB, 0x00000049, 0x0000004A, 0x0000000C, 0x000000C2}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000070, 0x00000090, 0x0000000C, 0x00000008}, // L[0]
{0x0000000B, 0x000000CA, 0x000000C2, 0x00000054, 0x000000C1}, // L[1]
{0x00000006, 0x00000017, 0x00000012, 0x000000B8, 0x00000011}, // L[2]
{0x000000CB, 0x000000B4, 0x00000009, 0x00000014, 0x00000005}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000008, 0x00000070, 0x00000090, 0x00000002, 0x00000092}, // L[0]
{0x000000C1, 0x000000CA, 0x000000C2, 0x000000CA, 0x00000018}, // L[1]
{0x00000011, 0x00000017, 0x00000012, 0x000000D4, 0x00000042}, // L[2]
{0x00000005, 0x000000B4, 0x00000009, 0x00000088, 0x00000081}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000008, 0x0000005C, 0x00000092, 0x00000002, 0x00000044}, // L[0]
{0x000000C1, 0x0000005A, 0x00000018, 0x000000CA, 0x00000029}, // L[1]
{0x00000011, 0x000000AA, 0x00000042, 0x000000D4, 0x000000D9}, // L[2]
{0x00000005, 0x000000A6, 0x00000081, 0x00000088, 0x000000AB}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000044, 0x0000005C, 0x00000092, 0x00000032, 0x00000080}, // L[0]
{0x00000029, 0x0000005A, 0x00000018, 0x0000001F, 0x00000001}, // L[1]
{0x000000D9, 0x000000AA, 0x00000042, 0x00000068, 0x0000006A}, // L[2]
{0x000000AB, 0x000000A6, 0x00000081, 0x00000019, 0x00000088}, // L[3]
}, // T.state[11].w =  21
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000044, 0x000000B6, 0x0000006A, 0x00000019, 0x00000000}, // L[0]
{0x00000029, 0x00000081, 0x00000088, 0x00000032, 0x00000000}, // L[1]
{0x000000D9, 0x0000005C, 0x00000080, 0x0000001F, 0x00000000}, // L[2]
{0x000000AB, 0x000000B9, 0x00000001, 0x00000068, 0x00000000}, // L[3]
}, // T.state[12].w =  17
// T.w = 140
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 140 -> 139
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000080, 0x00000030}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x00000004}, // L[2]
{0x00000030, 0x00000010, 0x00000000, 0x00000085, 0x00000085}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000085, 0x00000000}, // L[0]
{0x00000000, 0x00000028, 0x00000085, 0x00000000, 0x00000038}, // L[1]
{0x00000010, 0x0000002B, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x00000084, 0x00000010}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x000000C2, 0x00000042}, // L[0]
{0x00000038, 0x00000028, 0x00000085, 0x0000001C, 0x000000A1}, // L[1]
{0x00000009, 0x0000002B, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x0000004A, 0x0000004A}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x000000C2, 0x00000048}, // L[0]
{0x00000038, 0x00000031, 0x000000A1, 0x0000001C, 0x0000000B}, // L[1]
{0x00000009, 0x000000F5, 0x00000084, 0x00000084, 0x00000006}, // L[2]
{0x00000010, 0x00000049, 0x0000004A, 0x0000004A, 0x000000CB}, // L[3]
}, // T.state[ 6].w =  14
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000054, 0x00000012}, // L[0]
{0x0000000B, 0x00000031, 0x000000A1, 0x000000B8, 0x00000009}, // L[1]
{0x00000006, 0x000000F5, 0x00000084, 0x00000014, 0x00000090}, // L[2]
{0x000000CB, 0x00000049, 0x0000004A, 0x0000000C, 0x000000C2}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000070, 0x00000090, 0x0000000C, 0x00000008}, // L[0]
{0x0000000B, 0x000000CA, 0x000000C2, 0x00000054, 0x000000C1}, // L[1]
{0x00000006, 0x00000017, 0x00000012, 0x000000B8, 0x00000011}, // L[2]
{0x000000CB, 0x000000B4, 0x00000009, 0x00000014, 0x00000005}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000008, 0x00000070, 0x00000090, 0x00000002, 0x00000092}, // L[0]
{0x000000C1, 0x000000CA, 0x000000C2, 0x000000CA, 0x00000018}, // L[1]
{0x00000011, 0x00000017, 0x00000012, 0x000000D4, 0x00000042}, // L[2]
{0x00000005, 0x000000B4, 0x00000009, 0x00000088, 0x00000081}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000008, 0x0000005C, 0x00000092, 0x00000002, 0x0000004C}, // L[0]
{0x000000C1, 0x0000005A, 0x00000018, 0x000000CA, 0x00000089}, // L[1]
{0x00000011, 0x000000AA, 0x00000042, 0x000000D4, 0x00000099}, // L[2]
{0x00000005, 0x000000A6, 0x00000081, 0x00000088, 0x000000AB}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x0000004C, 0x0000005C, 0x00000092, 0x00000072, 0x00000000}, // L[0]
{0x00000089, 0x0000005A, 0x00000018, 0x0000001A, 0x00000002}, // L[1]
{0x00000099, 0x000000AA, 0x00000042, 0x0000006A, 0x00000028}, // L[2]
{0x000000AB, 0x000000A6, 0x00000081, 0x00000019, 0x00000088}, // L[3]
}, // T.state[11].w =  21
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x0000004C, 0x000000B0, 0x00000028, 0x00000019, 0x00000000}, // L[0]
{0x00000089, 0x00000005, 0x00000088, 0x00000072, 0x00000000}, // L[1]
{0x00000099, 0x0000005C, 0x00000000, 0x0000001A, 0x00000000}, // L[2]
{0x000000AB, 0x000000B8, 0x00000002, 0x0000006A, 0x00000000}, // L[3]
}, // T.state[12].w =  16
// T.w = 139
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 139 -> 138
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000080, 0x00000030}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x00000004}, // L[2]
{0x00000030, 0x00000010, 0x00000000, 0x00000085, 0x00000085}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000085, 0x00000000}, // L[0]
{0x00000000, 0x00000028, 0x00000085, 0x00000000, 0x00000038}, // L[1]
{0x00000010, 0x0000002B, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x00000084, 0x00000010}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x000000C2, 0x00000042}, // L[0]
{0x00000038, 0x00000028, 0x00000085, 0x0000001C, 0x000000A1}, // L[1]
{0x00000009, 0x0000002B, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x0000004A, 0x0000004A}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x000000C2, 0x00000048}, // L[0]
{0x00000038, 0x00000031, 0x000000A1, 0x0000001C, 0x0000000B}, // L[1]
{0x00000009, 0x000000F5, 0x00000084, 0x00000084, 0x00000006}, // L[2]
{0x00000010, 0x00000049, 0x0000004A, 0x0000004A, 0x000000CB}, // L[3]
}, // T.state[ 6].w =  14
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000054, 0x00000012}, // L[0]
{0x0000000B, 0x00000031, 0x000000A1, 0x000000B8, 0x00000009}, // L[1]
{0x00000006, 0x000000F5, 0x00000084, 0x00000014, 0x00000090}, // L[2]
{0x000000CB, 0x00000049, 0x0000004A, 0x0000000C, 0x000000C2}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000070, 0x00000090, 0x0000000C, 0x00000008}, // L[0]
{0x0000000B, 0x000000CA, 0x000000C2, 0x00000054, 0x000000C1}, // L[1]
{0x00000006, 0x00000017, 0x00000012, 0x000000B8, 0x00000011}, // L[2]
{0x000000CB, 0x000000B4, 0x00000009, 0x00000014, 0x00000005}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000008, 0x00000070, 0x00000090, 0x00000002, 0x00000092}, // L[0]
{0x000000C1, 0x000000CA, 0x000000C2, 0x000000CA, 0x00000098}, // L[1]
{0x00000011, 0x00000017, 0x00000012, 0x000000D4, 0x00000042}, // L[2]
{0x00000005, 0x000000B4, 0x00000009, 0x00000088, 0x00000081}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000008, 0x0000005C, 0x00000092, 0x00000002, 0x0000004C}, // L[0]
{0x000000C1, 0x0000004A, 0x00000098, 0x000000CA, 0x00000089}, // L[1]
{0x00000011, 0x000000AA, 0x00000042, 0x000000D4, 0x0000009F}, // L[2]
{0x00000005, 0x000000A6, 0x00000081, 0x00000088, 0x000000A9}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x0000004C, 0x0000005C, 0x00000092, 0x00000072, 0x00000040}, // L[0]
{0x00000089, 0x0000004A, 0x00000098, 0x0000001A, 0x00000082}, // L[1]
{0x0000009F, 0x000000AA, 0x00000042, 0x0000005A, 0x0000000C}, // L[2]
{0x000000A9, 0x000000A6, 0x00000081, 0x00000009, 0x00000088}, // L[3]
}, // T.state[11].w =  20
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x0000004C, 0x00000091, 0x0000000C, 0x00000009, 0x00000000}, // L[0]
{0x00000089, 0x0000004D, 0x00000088, 0x00000072, 0x00000000}, // L[1]
{0x0000009F, 0x0000005C, 0x00000040, 0x0000001A, 0x00000000}, // L[2]
{0x000000A9, 0x00000038, 0x00000082, 0x0000005A, 0x00000000}, // L[3]
}, // T.state[12].w =  16
// T.w = 138
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 138 -> 137
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000080, 0x00000030}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x00000004}, // L[2]
{0x00000030, 0x00000010, 0x00000000, 0x00000085, 0x00000085}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000085, 0x00000000}, // L[0]
{0x00000000, 0x00000028, 0x00000085, 0x00000000, 0x00000038}, // L[1]
{0x00000010, 0x0000002B, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x00000084, 0x00000010}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x000000C2, 0x00000042}, // L[0]
{0x00000038, 0x00000028, 0x00000085, 0x0000001C, 0x000000A1}, // L[1]
{0x00000009, 0x0000002B, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x0000004A, 0x0000004A}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x000000C2, 0x00000048}, // L[0]
{0x00000038, 0x00000031, 0x000000A1, 0x0000001C, 0x0000000B}, // L[1]
{0x00000009, 0x000000F5, 0x00000084, 0x00000084, 0x00000006}, // L[2]
{0x00000010, 0x00000049, 0x0000004A, 0x0000004A, 0x000000CB}, // L[3]
}, // T.state[ 6].w =  14
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000054, 0x00000012}, // L[0]
{0x0000000B, 0x00000031, 0x000000A1, 0x000000B8, 0x00000009}, // L[1]
{0x00000006, 0x000000F5, 0x00000084, 0x00000014, 0x00000090}, // L[2]
{0x000000CB, 0x00000049, 0x0000004A, 0x0000000C, 0x000000C2}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000070, 0x00000090, 0x0000000C, 0x00000008}, // L[0]
{0x0000000B, 0x000000CA, 0x000000C2, 0x00000054, 0x000000C1}, // L[1]
{0x00000006, 0x00000017, 0x00000012, 0x000000B8, 0x00000011}, // L[2]
{0x000000CB, 0x000000B4, 0x00000009, 0x00000014, 0x00000005}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000008, 0x00000070, 0x00000090, 0x00000002, 0x00000092}, // L[0]
{0x000000C1, 0x000000CA, 0x000000C2, 0x000000CA, 0x00000008}, // L[1]
{0x00000011, 0x00000017, 0x00000012, 0x000000D4, 0x00000066}, // L[2]
{0x00000005, 0x000000B4, 0x00000009, 0x00000088, 0x00000081}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000008, 0x0000005C, 0x00000092, 0x00000002, 0x00000054}, // L[0]
{0x000000C1, 0x00000058, 0x00000008, 0x000000CA, 0x0000008B}, // L[1]
{0x00000011, 0x0000002E, 0x00000066, 0x000000D4, 0x00000015}, // L[2]
{0x00000005, 0x000000A6, 0x00000081, 0x00000088, 0x000000A1}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000054, 0x0000005C, 0x00000092, 0x000000B2, 0x00000000}, // L[0]
{0x0000008B, 0x00000058, 0x00000008, 0x0000000A, 0x00000002}, // L[1]
{0x00000015, 0x0000002E, 0x00000066, 0x0000000E, 0x00000020}, // L[2]
{0x000000A1, 0x000000A6, 0x00000081, 0x00000049, 0x00000048}, // L[3]
}, // T.state[11].w =  20
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000054, 0x000000B4, 0x00000020, 0x00000049, 0x00000000}, // L[0]
{0x0000008B, 0x0000001C, 0x00000048, 0x000000B2, 0x00000000}, // L[1]
{0x00000015, 0x000000DD, 0x00000000, 0x0000000A, 0x00000000}, // L[2]
{0x000000A1, 0x000000B8, 0x00000002, 0x0000000E, 0x00000000}, // L[3]
}, // T.state[12].w =  15
// T.w = 137
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 137 -> 136
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000080, 0x00000030}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x00000004}, // L[2]
{0x00000030, 0x00000010, 0x00000000, 0x00000085, 0x00000085}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000085, 0x00000000}, // L[0]
{0x00000000, 0x00000028, 0x00000085, 0x00000000, 0x00000038}, // L[1]
{0x00000010, 0x0000002B, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x00000084, 0x00000010}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x000000C2, 0x00000042}, // L[0]
{0x00000038, 0x00000028, 0x00000085, 0x0000001C, 0x000000A1}, // L[1]
{0x00000009, 0x0000002B, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x0000004A, 0x0000004A}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x000000C2, 0x00000048}, // L[0]
{0x00000038, 0x00000031, 0x000000A1, 0x0000001C, 0x0000000B}, // L[1]
{0x00000009, 0x000000F5, 0x00000084, 0x00000084, 0x00000006}, // L[2]
{0x00000010, 0x00000049, 0x0000004A, 0x0000004A, 0x000000CB}, // L[3]
}, // T.state[ 6].w =  14
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000054, 0x00000012}, // L[0]
{0x0000000B, 0x00000031, 0x000000A1, 0x000000B8, 0x00000009}, // L[1]
{0x00000006, 0x000000F5, 0x00000084, 0x00000014, 0x00000090}, // L[2]
{0x000000CB, 0x00000049, 0x0000004A, 0x0000000C, 0x000000C2}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000070, 0x00000090, 0x0000000C, 0x00000008}, // L[0]
{0x0000000B, 0x000000CA, 0x000000C2, 0x00000054, 0x000000C1}, // L[1]
{0x00000006, 0x00000017, 0x00000012, 0x000000B8, 0x00000011}, // L[2]
{0x000000CB, 0x000000B4, 0x00000009, 0x00000014, 0x00000005}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000008, 0x00000070, 0x00000090, 0x00000002, 0x000000B2}, // L[0]
{0x000000C1, 0x000000CA, 0x000000C2, 0x000000CA, 0x00000008}, // L[1]
{0x00000011, 0x00000017, 0x00000012, 0x000000D4, 0x00000046}, // L[2]
{0x00000005, 0x000000B4, 0x00000009, 0x00000088, 0x00000091}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000008, 0x00000058, 0x000000B2, 0x00000002, 0x00000040}, // L[0]
{0x000000C1, 0x00000058, 0x00000008, 0x000000CA, 0x00000009}, // L[1]
{0x00000011, 0x0000002A, 0x00000046, 0x000000D4, 0x0000000D}, // L[2]
{0x00000005, 0x000000A4, 0x00000091, 0x00000088, 0x000000A9}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000040, 0x00000058, 0x000000B2, 0x00000012, 0x00000080}, // L[0]
{0x00000009, 0x00000058, 0x00000008, 0x0000001E, 0x00000002}, // L[1]
{0x0000000D, 0x0000002A, 0x00000046, 0x000000CE, 0x00000000}, // L[2]
{0x000000A9, 0x000000A4, 0x00000091, 0x00000009, 0x00000088}, // L[3]
}, // T.state[11].w =  17
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x000000B4, 0x00000000, 0x00000009, 0x00000000}, // L[0]
{0x00000009, 0x00000054, 0x00000088, 0x00000012, 0x00000000}, // L[1]
{0x0000000D, 0x00000058, 0x00000080, 0x0000001E, 0x00000000}, // L[2]
{0x000000A9, 0x000000B1, 0x00000002, 0x000000CE, 0x00000000}, // L[3]
}, // T.state[12].w =  17
// T.w = 136
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 136 -> 135
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000080, 0x00000030}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x00000004}, // L[2]
{0x00000030, 0x00000010, 0x00000000, 0x00000085, 0x00000085}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000085, 0x00000000}, // L[0]
{0x00000000, 0x00000028, 0x00000085, 0x00000000, 0x00000038}, // L[1]
{0x00000010, 0x0000002B, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x00000084, 0x00000010}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x000000C2, 0x00000042}, // L[0]
{0x00000038, 0x00000028, 0x00000085, 0x0000001C, 0x000000A1}, // L[1]
{0x00000009, 0x0000002B, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x0000004A, 0x0000004A}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x000000C2, 0x00000048}, // L[0]
{0x00000038, 0x00000031, 0x000000A1, 0x0000001C, 0x0000000B}, // L[1]
{0x00000009, 0x000000F5, 0x00000084, 0x00000084, 0x00000006}, // L[2]
{0x00000010, 0x00000049, 0x0000004A, 0x0000004A, 0x000000CB}, // L[3]
}, // T.state[ 6].w =  14
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000054, 0x00000012}, // L[0]
{0x0000000B, 0x00000031, 0x000000A1, 0x000000B8, 0x00000009}, // L[1]
{0x00000006, 0x000000F5, 0x00000084, 0x00000014, 0x00000090}, // L[2]
{0x000000CB, 0x00000049, 0x0000004A, 0x0000000C, 0x000000C2}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000070, 0x00000090, 0x0000000C, 0x00000008}, // L[0]
{0x0000000B, 0x000000CA, 0x000000C2, 0x00000054, 0x000000C1}, // L[1]
{0x00000006, 0x00000017, 0x00000012, 0x000000B8, 0x00000011}, // L[2]
{0x000000CB, 0x000000B4, 0x00000009, 0x00000014, 0x00000005}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000008, 0x00000070, 0x00000090, 0x00000002, 0x000000B2}, // L[0]
{0x000000C1, 0x000000CA, 0x000000C2, 0x000000CA, 0x00000008}, // L[1]
{0x00000011, 0x00000017, 0x00000012, 0x000000D4, 0x00000046}, // L[2]
{0x00000005, 0x000000B4, 0x00000009, 0x00000088, 0x00000091}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000008, 0x00000058, 0x000000B2, 0x00000002, 0x00000040}, // L[0]
{0x000000C1, 0x00000058, 0x00000008, 0x000000CA, 0x00000089}, // L[1]
{0x00000011, 0x0000002A, 0x00000046, 0x000000D4, 0x0000004D}, // L[2]
{0x00000005, 0x000000A4, 0x00000091, 0x00000088, 0x000000A9}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000040, 0x00000058, 0x000000B2, 0x00000012, 0x000000C0}, // L[0]
{0x00000089, 0x00000058, 0x00000008, 0x0000001A, 0x00000002}, // L[1]
{0x0000004D, 0x0000002A, 0x00000046, 0x000000CC, 0x00000002}, // L[2]
{0x000000A9, 0x000000A4, 0x00000091, 0x00000009, 0x00000088}, // L[3]
}, // T.state[11].w =  17
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x000000B4, 0x00000002, 0x00000009, 0x00000000}, // L[0]
{0x00000089, 0x00000050, 0x00000088, 0x00000012, 0x00000000}, // L[1]
{0x0000004D, 0x00000058, 0x000000C0, 0x0000001A, 0x00000000}, // L[2]
{0x000000A9, 0x00000031, 0x00000002, 0x000000CC, 0x00000000}, // L[3]
}, // T.state[12].w =  16
// T.w = 135
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 135 -> 134
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000080, 0x00000030}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x00000004}, // L[2]
{0x00000030, 0x00000010, 0x00000000, 0x00000085, 0x00000085}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000085, 0x00000000}, // L[0]
{0x00000000, 0x00000028, 0x00000085, 0x00000000, 0x00000038}, // L[1]
{0x00000010, 0x0000002B, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x00000084, 0x00000010}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x000000C2, 0x00000042}, // L[0]
{0x00000038, 0x00000028, 0x00000085, 0x0000001C, 0x000000A1}, // L[1]
{0x00000009, 0x0000002B, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x0000004A, 0x0000004A}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x000000C2, 0x00000048}, // L[0]
{0x00000038, 0x00000031, 0x000000A1, 0x0000001C, 0x0000000B}, // L[1]
{0x00000009, 0x000000F5, 0x00000084, 0x00000084, 0x00000006}, // L[2]
{0x00000010, 0x00000049, 0x0000004A, 0x0000004A, 0x000000CB}, // L[3]
}, // T.state[ 6].w =  14
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000054, 0x00000012}, // L[0]
{0x0000000B, 0x00000031, 0x000000A1, 0x000000B8, 0x00000009}, // L[1]
{0x00000006, 0x000000F5, 0x00000084, 0x00000014, 0x00000090}, // L[2]
{0x000000CB, 0x00000049, 0x0000004A, 0x0000000C, 0x000000C2}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000070, 0x00000090, 0x0000000C, 0x00000008}, // L[0]
{0x0000000B, 0x000000CA, 0x000000C2, 0x00000054, 0x000000C1}, // L[1]
{0x00000006, 0x00000017, 0x00000012, 0x000000B8, 0x00000011}, // L[2]
{0x000000CB, 0x000000B4, 0x00000009, 0x00000014, 0x00000005}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000008, 0x00000070, 0x00000090, 0x00000002, 0x000000B2}, // L[0]
{0x000000C1, 0x000000CA, 0x000000C2, 0x000000CA, 0x00000008}, // L[1]
{0x00000011, 0x00000017, 0x00000012, 0x000000D4, 0x00000046}, // L[2]
{0x00000005, 0x000000B4, 0x00000009, 0x00000088, 0x00000091}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000008, 0x00000058, 0x000000B2, 0x00000002, 0x00000040}, // L[0]
{0x000000C1, 0x00000058, 0x00000008, 0x000000CA, 0x00000089}, // L[1]
{0x00000011, 0x0000002A, 0x00000046, 0x000000D4, 0x0000001D}, // L[2]
{0x00000005, 0x000000A4, 0x00000091, 0x00000088, 0x000000A9}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000040, 0x00000058, 0x000000B2, 0x00000012, 0x00000080}, // L[0]
{0x00000089, 0x00000058, 0x00000008, 0x0000001A, 0x00000002}, // L[1]
{0x0000001D, 0x0000002A, 0x00000046, 0x0000004E, 0x00000000}, // L[2]
{0x000000A9, 0x000000A4, 0x00000091, 0x00000009, 0x00000088}, // L[3]
}, // T.state[11].w =  17
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x000000B4, 0x00000000, 0x00000009, 0x00000000}, // L[0]
{0x00000089, 0x00000054, 0x00000088, 0x00000012, 0x00000000}, // L[1]
{0x0000001D, 0x00000058, 0x00000080, 0x0000001A, 0x00000000}, // L[2]
{0x000000A9, 0x000000B1, 0x00000002, 0x0000004E, 0x00000000}, // L[3]
}, // T.state[12].w =  15
// T.w = 134
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 134 -> 133
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000080, 0x00000030}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x00000004}, // L[2]
{0x00000030, 0x00000010, 0x00000000, 0x00000085, 0x00000085}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000085, 0x00000000}, // L[0]
{0x00000000, 0x00000028, 0x00000085, 0x00000000, 0x00000038}, // L[1]
{0x00000010, 0x0000002B, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x00000084, 0x00000010}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x000000C2, 0x00000042}, // L[0]
{0x00000038, 0x00000028, 0x00000085, 0x0000001C, 0x000000A1}, // L[1]
{0x00000009, 0x0000002B, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x0000004A, 0x0000004A}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x000000C2, 0x00000048}, // L[0]
{0x00000038, 0x00000031, 0x000000A1, 0x0000001C, 0x0000000B}, // L[1]
{0x00000009, 0x000000F5, 0x00000084, 0x00000084, 0x00000006}, // L[2]
{0x00000010, 0x00000049, 0x0000004A, 0x0000004A, 0x000000CB}, // L[3]
}, // T.state[ 6].w =  14
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000054, 0x00000012}, // L[0]
{0x0000000B, 0x00000031, 0x000000A1, 0x000000B8, 0x00000009}, // L[1]
{0x00000006, 0x000000F5, 0x00000084, 0x00000014, 0x00000090}, // L[2]
{0x000000CB, 0x00000049, 0x0000004A, 0x0000000C, 0x000000C2}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000070, 0x00000090, 0x0000000C, 0x00000008}, // L[0]
{0x0000000B, 0x000000CA, 0x000000C2, 0x00000054, 0x000000C1}, // L[1]
{0x00000006, 0x00000017, 0x00000012, 0x000000B8, 0x00000011}, // L[2]
{0x000000CB, 0x000000B4, 0x00000009, 0x00000014, 0x00000005}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000008, 0x00000070, 0x00000090, 0x00000002, 0x000000B2}, // L[0]
{0x000000C1, 0x000000CA, 0x000000C2, 0x000000CA, 0x00000008}, // L[1]
{0x00000011, 0x00000017, 0x00000012, 0x000000D4, 0x00000046}, // L[2]
{0x00000005, 0x000000B4, 0x00000009, 0x00000088, 0x00000091}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000008, 0x00000058, 0x000000B2, 0x00000002, 0x00000050}, // L[0]
{0x000000C1, 0x00000058, 0x00000008, 0x000000CA, 0x0000008B}, // L[1]
{0x00000011, 0x0000002A, 0x00000046, 0x000000D4, 0x0000001D}, // L[2]
{0x00000005, 0x000000A4, 0x00000091, 0x00000088, 0x000000A9}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000050, 0x00000058, 0x000000B2, 0x00000092, 0x00000000}, // L[0]
{0x0000008B, 0x00000058, 0x00000008, 0x0000000A, 0x00000002}, // L[1]
{0x0000001D, 0x0000002A, 0x00000046, 0x0000004E, 0x00000000}, // L[2]
{0x000000A9, 0x000000A4, 0x00000091, 0x00000009, 0x00000088}, // L[3]
}, // T.state[11].w =  17
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000050, 0x000000B4, 0x00000000, 0x00000009, 0x00000000}, // L[0]
{0x0000008B, 0x00000054, 0x00000088, 0x00000092, 0x00000000}, // L[1]
{0x0000001D, 0x00000058, 0x00000000, 0x0000000A, 0x00000000}, // L[2]
{0x000000A9, 0x000000B0, 0x00000002, 0x0000004E, 0x00000000}, // L[3]
}, // T.state[12].w =  14
// T.w = 133
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 133 -> 132
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000080, 0x00000030}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x00000004}, // L[2]
{0x00000030, 0x00000010, 0x00000000, 0x00000085, 0x00000085}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000085, 0x00000000}, // L[0]
{0x00000000, 0x00000028, 0x00000085, 0x00000000, 0x00000038}, // L[1]
{0x00000010, 0x0000002B, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x00000084, 0x00000010}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x000000C2, 0x00000042}, // L[0]
{0x00000038, 0x00000028, 0x00000085, 0x0000001C, 0x000000A1}, // L[1]
{0x00000009, 0x0000002B, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x0000004A, 0x0000004A}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x000000C2, 0x00000048}, // L[0]
{0x00000038, 0x00000031, 0x000000A1, 0x0000001C, 0x0000000B}, // L[1]
{0x00000009, 0x000000F5, 0x00000084, 0x00000084, 0x00000006}, // L[2]
{0x00000010, 0x00000049, 0x0000004A, 0x0000004A, 0x000000CB}, // L[3]
}, // T.state[ 6].w =  14
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000054, 0x00000012}, // L[0]
{0x0000000B, 0x00000031, 0x000000A1, 0x000000B8, 0x00000009}, // L[1]
{0x00000006, 0x000000F5, 0x00000084, 0x00000014, 0x00000090}, // L[2]
{0x000000CB, 0x00000049, 0x0000004A, 0x0000000C, 0x000000C2}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000070, 0x00000090, 0x0000000C, 0x00000008}, // L[0]
{0x0000000B, 0x000000CA, 0x000000C2, 0x00000054, 0x00000051}, // L[1]
{0x00000006, 0x00000017, 0x00000012, 0x000000B8, 0x00000011}, // L[2]
{0x000000CB, 0x000000B4, 0x00000009, 0x00000014, 0x00000025}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000008, 0x00000070, 0x00000090, 0x00000002, 0x00000092}, // L[0]
{0x00000051, 0x000000CA, 0x000000C2, 0x00000082, 0x00000040}, // L[1]
{0x00000011, 0x00000017, 0x00000012, 0x000000D4, 0x00000042}, // L[2]
{0x00000025, 0x000000B4, 0x00000009, 0x00000098, 0x00000081}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000008, 0x0000005C, 0x00000092, 0x00000002, 0x00000044}, // L[0]
{0x00000051, 0x00000051, 0x00000040, 0x00000082, 0x00000082}, // L[1]
{0x00000011, 0x000000AA, 0x00000042, 0x000000D4, 0x0000008D}, // L[2]
{0x00000025, 0x000000A6, 0x00000081, 0x00000098, 0x00000089}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000044, 0x0000005C, 0x00000092, 0x00000032, 0x000000C0}, // L[0]
{0x00000082, 0x00000051, 0x00000040, 0x00000000, 0x00000040}, // L[1]
{0x0000008D, 0x000000AA, 0x00000042, 0x000000CA, 0x0000001C}, // L[2]
{0x00000089, 0x000000A6, 0x00000081, 0x00000088, 0x00000019}, // L[3]
}, // T.state[11].w =  18
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000044, 0x00000022, 0x0000001C, 0x00000088, 0x00000000}, // L[0]
{0x00000082, 0x0000006D, 0x00000019, 0x00000032, 0x00000000}, // L[1]
{0x0000008D, 0x0000007F, 0x000000C0, 0x00000000, 0x00000000}, // L[2]
{0x00000089, 0x00000039, 0x00000040, 0x000000CA, 0x00000000}, // L[3]
}, // T.state[12].w =  12
// T.w = 132
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 132 -> 131
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000080, 0x00000030}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x00000004}, // L[2]
{0x00000030, 0x00000010, 0x00000000, 0x00000085, 0x00000085}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000085, 0x00000000}, // L[0]
{0x00000000, 0x00000028, 0x00000085, 0x00000000, 0x00000038}, // L[1]
{0x00000010, 0x0000002B, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x00000084, 0x00000010}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x000000C2, 0x00000042}, // L[0]
{0x00000038, 0x00000028, 0x00000085, 0x0000001C, 0x000000A1}, // L[1]
{0x00000009, 0x0000002B, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x0000004A, 0x0000004A}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x000000C2, 0x00000048}, // L[0]
{0x00000038, 0x00000031, 0x000000A1, 0x0000001C, 0x0000000B}, // L[1]
{0x00000009, 0x000000F5, 0x00000084, 0x00000084, 0x00000006}, // L[2]
{0x00000010, 0x00000049, 0x0000004A, 0x0000004A, 0x000000CB}, // L[3]
}, // T.state[ 6].w =  14
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000054, 0x00000012}, // L[0]
{0x0000000B, 0x00000031, 0x000000A1, 0x000000B8, 0x00000009}, // L[1]
{0x00000006, 0x000000F5, 0x00000084, 0x00000014, 0x00000090}, // L[2]
{0x000000CB, 0x00000049, 0x0000004A, 0x0000000C, 0x000000C2}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000070, 0x00000090, 0x0000000C, 0x00000028}, // L[0]
{0x0000000B, 0x000000CA, 0x000000C2, 0x00000054, 0x00000051}, // L[1]
{0x00000006, 0x00000017, 0x00000012, 0x000000B8, 0x00000011}, // L[2]
{0x000000CB, 0x000000B4, 0x00000009, 0x00000014, 0x00000005}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000028, 0x00000070, 0x00000090, 0x00000012, 0x00000082}, // L[0]
{0x00000051, 0x000000CA, 0x000000C2, 0x00000082, 0x00000040}, // L[1]
{0x00000011, 0x00000017, 0x00000012, 0x000000D4, 0x0000004A}, // L[2]
{0x00000005, 0x000000B4, 0x00000009, 0x00000088, 0x00000091}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000028, 0x0000005E, 0x00000082, 0x00000012, 0x00000042}, // L[0]
{0x00000051, 0x00000051, 0x00000040, 0x00000082, 0x00000082}, // L[1]
{0x00000011, 0x000000AB, 0x0000004A, 0x000000D4, 0x00000098}, // L[2]
{0x00000005, 0x000000A4, 0x00000091, 0x00000088, 0x000000A9}, // L[3]
}, // T.state[10].w =  14
{ // R[11] abcde
{0x00000042, 0x0000005E, 0x00000082, 0x00000082, 0x00000000}, // L[0]
{0x00000082, 0x00000051, 0x00000040, 0x00000000, 0x00000040}, // L[1]
{0x00000098, 0x000000AB, 0x0000004A, 0x00000062, 0x00000028}, // L[2]
{0x000000A9, 0x000000A4, 0x00000091, 0x00000009, 0x000000A8}, // L[3]
}, // T.state[11].w =  19
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000042, 0x00000022, 0x00000028, 0x00000009, 0x00000000}, // L[0]
{0x00000082, 0x00000007, 0x000000A8, 0x00000082, 0x00000000}, // L[1]
{0x00000098, 0x00000018, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x000000A9, 0x000000BC, 0x00000040, 0x00000062, 0x00000000}, // L[3]
}, // T.state[12].w =  11
// T.w = 131
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 131 -> 130
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000010}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000080, 0x00000030}, // L[3]
}, // T.state[ 2].w =   2
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000010, 0x00000010, 0x00000080, 0x00000084, 0x00000004}, // L[2]
{0x00000030, 0x00000010, 0x00000000, 0x00000085, 0x00000085}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000085, 0x00000000}, // L[0]
{0x00000000, 0x00000028, 0x00000085, 0x00000000, 0x00000038}, // L[1]
{0x00000010, 0x0000002B, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x00000084, 0x00000010}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x000000C2, 0x00000042}, // L[0]
{0x00000038, 0x00000028, 0x00000085, 0x0000001C, 0x000000A1}, // L[1]
{0x00000009, 0x0000002B, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x0000004A, 0x0000004A}, // L[3]
}, // T.state[ 5].w =   9
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x000000C2, 0x00000048}, // L[0]
{0x00000038, 0x00000031, 0x000000A1, 0x0000001C, 0x0000000B}, // L[1]
{0x00000009, 0x000000F5, 0x00000084, 0x00000084, 0x00000006}, // L[2]
{0x00000010, 0x00000049, 0x0000004A, 0x0000004A, 0x000000CB}, // L[3]
}, // T.state[ 6].w =  14
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000054, 0x00000012}, // L[0]
{0x0000000B, 0x00000031, 0x000000A1, 0x000000B8, 0x00000009}, // L[1]
{0x00000006, 0x000000F5, 0x00000084, 0x00000014, 0x00000090}, // L[2]
{0x000000CB, 0x00000049, 0x0000004A, 0x0000000C, 0x000000C2}, // L[3]
}, // T.state[ 7].w =  17
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000070, 0x00000090, 0x0000000C, 0x00000028}, // L[0]
{0x0000000B, 0x000000CA, 0x000000C2, 0x00000054, 0x00000051}, // L[1]
{0x00000006, 0x00000017, 0x00000012, 0x000000B8, 0x00000011}, // L[2]
{0x000000CB, 0x000000B4, 0x00000009, 0x00000014, 0x00000005}, // L[3]
}, // T.state[ 8].w =  16
{ // R[ 9] abcde
{0x00000028, 0x00000070, 0x00000090, 0x00000012, 0x00000082}, // L[0]
{0x00000051, 0x000000CA, 0x000000C2, 0x00000082, 0x00000040}, // L[1]
{0x00000011, 0x00000017, 0x00000012, 0x000000D4, 0x0000004A}, // L[2]
{0x00000005, 0x000000B4, 0x00000009, 0x00000088, 0x00000091}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000028, 0x0000005E, 0x00000082, 0x00000012, 0x00000042}, // L[0]
{0x00000051, 0x00000051, 0x00000040, 0x00000082, 0x00000082}, // L[1]
{0x00000011, 0x000000AB, 0x0000004A, 0x000000D4, 0x000000DC}, // L[2]
{0x00000005, 0x000000A4, 0x00000091, 0x00000088, 0x000000A9}, // L[3]
}, // T.state[10].w =  14
{ // R[11] abcde
{0x00000042, 0x0000005E, 0x00000082, 0x00000082, 0x00000000}, // L[0]
{0x00000082, 0x00000051, 0x00000040, 0x00000000, 0x00000040}, // L[1]
{0x000000DC, 0x000000AB, 0x0000004A, 0x00000040, 0x0000000E}, // L[2]
{0x000000A9, 0x000000A4, 0x00000091, 0x00000009, 0x00000088}, // L[3]
}, // T.state[11].w =  19
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000042, 0x00000022, 0x0000000E, 0x00000009, 0x00000000}, // L[0]
{0x00000082, 0x0000004B, 0x00000088, 0x00000082, 0x00000000}, // L[1]
{0x000000DC, 0x00000058, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x000000A9, 0x000000BC, 0x00000040, 0x00000040, 0x00000000}, // L[3]
}, // T.state[12].w =  10
// T.w = 130
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 130 -> 108
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000006}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000C, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x000000C0, 0x00000006, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000081, 0x00000004, 0x0000000C, 0x00000099}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000040, 0x000000C0, 0x00000006, 0x00000012, 0x00000030}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000099, 0x00000081, 0x00000004, 0x000000AC, 0x000000A0}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x0000008A}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000001, 0x000000A0, 0x00000046, 0x000000C1}, // L[0]
{0x00000080, 0x00000042, 0x0000008A, 0x00000012, 0x00000042}, // L[1]
{0x00000099, 0x00000005, 0x00000030, 0x00000004, 0x00000094}, // L[2]
{0x00000088, 0x000000E1, 0x00000000, 0x000000AC, 0x0000003B}, // L[3]
}, // T.state[ 8].w =  11
{ // R[ 9] abcde
{0x000000C1, 0x00000001, 0x000000A0, 0x000000C3, 0x00000025}, // L[0]
{0x00000042, 0x00000042, 0x0000008A, 0x00000028, 0x000000A6}, // L[1]
{0x00000094, 0x00000005, 0x00000030, 0x00000048, 0x00000018}, // L[2]
{0x0000003B, 0x000000E1, 0x00000000, 0x000000CB, 0x00000049}, // L[3]
}, // T.state[ 9].w =  15
{ // R[10] abcde
{0x000000C1, 0x00000084, 0x00000025, 0x000000C3, 0x00000045}, // L[0]
{0x00000042, 0x0000009C, 0x000000A6, 0x00000028, 0x00000042}, // L[1]
{0x00000094, 0x000000A3, 0x00000018, 0x00000048, 0x00000031}, // L[2]
{0x0000003B, 0x00000015, 0x00000049, 0x000000CB, 0x00000000}, // L[3]
}, // T.state[10].w =  18
{ // R[11] abcde
{0x00000045, 0x00000084, 0x00000025, 0x00000034, 0x00000011}, // L[0]
{0x00000042, 0x0000009C, 0x000000A6, 0x00000053, 0x00000011}, // L[1]
{0x00000031, 0x000000A3, 0x00000018, 0x000000CB, 0x00000041}, // L[2]
{0x00000000, 0x00000015, 0x00000049, 0x0000005E, 0x00000009}, // L[3]
}, // T.state[11].w =  22
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000045, 0x0000001B, 0x00000041, 0x0000005E, 0x00000000}, // L[0]
{0x00000042, 0x000000C5, 0x00000009, 0x00000034, 0x00000000}, // L[1]
{0x00000031, 0x00000038, 0x00000011, 0x00000053, 0x00000000}, // L[2]
{0x00000000, 0x0000002B, 0x00000011, 0x000000CB, 0x00000000}, // L[3]
}, // T.state[12].w =  23
// T.w = 108
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 108 -> 107
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000006}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000C, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x000000C0, 0x00000006, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000081, 0x00000004, 0x0000000C, 0x00000099}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000040, 0x000000C0, 0x00000006, 0x00000012, 0x00000030}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000099, 0x00000081, 0x00000004, 0x000000AC, 0x000000A0}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x0000008A}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000001, 0x000000A0, 0x00000046, 0x000000C1}, // L[0]
{0x00000080, 0x00000042, 0x0000008A, 0x00000012, 0x00000042}, // L[1]
{0x00000099, 0x00000005, 0x00000030, 0x00000004, 0x00000094}, // L[2]
{0x00000088, 0x000000E1, 0x00000000, 0x000000AC, 0x0000003B}, // L[3]
}, // T.state[ 8].w =  11
{ // R[ 9] abcde
{0x000000C1, 0x00000001, 0x000000A0, 0x000000C3, 0x00000025}, // L[0]
{0x00000042, 0x00000042, 0x0000008A, 0x00000028, 0x000000A6}, // L[1]
{0x00000094, 0x00000005, 0x00000030, 0x00000048, 0x00000018}, // L[2]
{0x0000003B, 0x000000E1, 0x00000000, 0x000000CB, 0x00000049}, // L[3]
}, // T.state[ 9].w =  15
{ // R[10] abcde
{0x000000C1, 0x00000084, 0x00000025, 0x000000C3, 0x00000045}, // L[0]
{0x00000042, 0x0000009C, 0x000000A6, 0x00000028, 0x00000042}, // L[1]
{0x00000094, 0x000000A3, 0x00000018, 0x00000048, 0x00000031}, // L[2]
{0x0000003B, 0x00000015, 0x00000049, 0x000000CB, 0x00000060}, // L[3]
}, // T.state[10].w =  18
{ // R[11] abcde
{0x00000045, 0x00000084, 0x00000025, 0x00000034, 0x00000011}, // L[0]
{0x00000042, 0x0000009C, 0x000000A6, 0x00000053, 0x00000011}, // L[1]
{0x00000031, 0x000000A3, 0x00000018, 0x000000CB, 0x00000045}, // L[2]
{0x00000060, 0x00000015, 0x00000049, 0x0000005D, 0x0000000C}, // L[3]
}, // T.state[11].w =  22
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000045, 0x0000001B, 0x00000045, 0x0000005D, 0x00000000}, // L[0]
{0x00000042, 0x000000CD, 0x0000000C, 0x00000034, 0x00000000}, // L[1]
{0x00000031, 0x00000032, 0x00000011, 0x00000053, 0x00000000}, // L[2]
{0x00000060, 0x0000002B, 0x00000011, 0x000000CB, 0x00000000}, // L[3]
}, // T.state[12].w =  22
// T.w = 107
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 107 -> 106
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000006}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000C, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x000000C0, 0x00000006, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000081, 0x00000004, 0x0000000C, 0x00000099}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000040, 0x000000C0, 0x00000006, 0x00000012, 0x00000030}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000099, 0x00000081, 0x00000004, 0x000000AC, 0x000000A0}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x0000008A}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000001, 0x000000A0, 0x00000046, 0x000000C1}, // L[0]
{0x00000080, 0x00000042, 0x0000008A, 0x00000012, 0x00000042}, // L[1]
{0x00000099, 0x00000005, 0x00000030, 0x00000004, 0x00000094}, // L[2]
{0x00000088, 0x000000E1, 0x00000000, 0x000000AC, 0x0000003B}, // L[3]
}, // T.state[ 8].w =  11
{ // R[ 9] abcde
{0x000000C1, 0x00000001, 0x000000A0, 0x000000C3, 0x00000025}, // L[0]
{0x00000042, 0x00000042, 0x0000008A, 0x00000028, 0x000000A6}, // L[1]
{0x00000094, 0x00000005, 0x00000030, 0x00000048, 0x00000018}, // L[2]
{0x0000003B, 0x000000E1, 0x00000000, 0x000000CB, 0x00000049}, // L[3]
}, // T.state[ 9].w =  15
{ // R[10] abcde
{0x000000C1, 0x00000084, 0x00000025, 0x000000C3, 0x00000045}, // L[0]
{0x00000042, 0x0000009C, 0x000000A6, 0x00000028, 0x000000E2}, // L[1]
{0x00000094, 0x000000A3, 0x00000018, 0x00000048, 0x00000011}, // L[2]
{0x0000003B, 0x00000015, 0x00000049, 0x000000CB, 0x00000000}, // L[3]
}, // T.state[10].w =  18
{ // R[11] abcde
{0x00000045, 0x00000084, 0x00000025, 0x00000034, 0x00000011}, // L[0]
{0x000000E2, 0x0000009C, 0x000000A6, 0x00000056, 0x00000010}, // L[1]
{0x00000011, 0x000000A3, 0x00000018, 0x000000CA, 0x00000042}, // L[2]
{0x00000000, 0x00000015, 0x00000049, 0x0000005E, 0x00000001}, // L[3]
}, // T.state[11].w =  22
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000045, 0x00000019, 0x00000042, 0x0000005E, 0x00000000}, // L[0]
{0x000000E2, 0x000000C3, 0x00000001, 0x00000034, 0x00000000}, // L[1]
{0x00000011, 0x00000028, 0x00000011, 0x00000056, 0x00000000}, // L[2]
{0x00000000, 0x0000002B, 0x00000010, 0x000000CA, 0x00000000}, // L[3]
}, // T.state[12].w =  21
// T.w = 106
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 106 -> 105
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000006}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000C, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x000000C0, 0x00000006, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000081, 0x00000004, 0x0000000C, 0x00000099}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000040, 0x000000C0, 0x00000006, 0x00000012, 0x00000030}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000099, 0x00000081, 0x00000004, 0x000000AC, 0x000000A0}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x0000008A}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000001, 0x000000A0, 0x00000046, 0x000000C1}, // L[0]
{0x00000080, 0x00000042, 0x0000008A, 0x00000012, 0x00000042}, // L[1]
{0x00000099, 0x00000005, 0x00000030, 0x00000004, 0x00000094}, // L[2]
{0x00000088, 0x000000E1, 0x00000000, 0x000000AC, 0x0000003B}, // L[3]
}, // T.state[ 8].w =  11
{ // R[ 9] abcde
{0x000000C1, 0x00000001, 0x000000A0, 0x000000C3, 0x00000025}, // L[0]
{0x00000042, 0x00000042, 0x0000008A, 0x00000028, 0x000000A6}, // L[1]
{0x00000094, 0x00000005, 0x00000030, 0x00000048, 0x00000018}, // L[2]
{0x0000003B, 0x000000E1, 0x00000000, 0x000000CB, 0x00000049}, // L[3]
}, // T.state[ 9].w =  15
{ // R[10] abcde
{0x000000C1, 0x00000084, 0x00000025, 0x000000C3, 0x00000045}, // L[0]
{0x00000042, 0x0000009C, 0x000000A6, 0x00000028, 0x00000062}, // L[1]
{0x00000094, 0x000000A3, 0x00000018, 0x00000048, 0x00000011}, // L[2]
{0x0000003B, 0x00000015, 0x00000049, 0x000000CB, 0x00000060}, // L[3]
}, // T.state[10].w =  18
{ // R[11] abcde
{0x00000045, 0x00000084, 0x00000025, 0x00000034, 0x00000031}, // L[0]
{0x00000062, 0x0000009C, 0x000000A6, 0x00000052, 0x00000010}, // L[1]
{0x00000011, 0x000000A3, 0x00000018, 0x000000CA, 0x00000042}, // L[2]
{0x00000060, 0x00000015, 0x00000049, 0x0000005D, 0x0000000C}, // L[3]
}, // T.state[11].w =  22
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000045, 0x00000019, 0x00000042, 0x0000005D, 0x00000000}, // L[0]
{0x00000062, 0x000000C3, 0x0000000C, 0x00000034, 0x00000000}, // L[1]
{0x00000011, 0x00000032, 0x00000031, 0x00000052, 0x00000000}, // L[2]
{0x00000060, 0x0000006B, 0x00000010, 0x000000CA, 0x00000000}, // L[3]
}, // T.state[12].w =  20
// T.w = 105
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 105 -> 104
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000006}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000C, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x000000C0, 0x00000006, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000081, 0x00000004, 0x0000000C, 0x00000099}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000040, 0x000000C0, 0x00000006, 0x00000012, 0x00000030}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000099, 0x00000081, 0x00000004, 0x000000AC, 0x000000A0}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x0000008A}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000001, 0x000000A0, 0x00000046, 0x000000C1}, // L[0]
{0x00000080, 0x00000042, 0x0000008A, 0x00000012, 0x00000042}, // L[1]
{0x00000099, 0x00000005, 0x00000030, 0x00000004, 0x00000094}, // L[2]
{0x00000088, 0x000000E1, 0x00000000, 0x000000AC, 0x0000003B}, // L[3]
}, // T.state[ 8].w =  11
{ // R[ 9] abcde
{0x000000C1, 0x00000001, 0x000000A0, 0x000000C3, 0x00000025}, // L[0]
{0x00000042, 0x00000042, 0x0000008A, 0x00000028, 0x000000A6}, // L[1]
{0x00000094, 0x00000005, 0x00000030, 0x00000048, 0x00000018}, // L[2]
{0x0000003B, 0x000000E1, 0x00000000, 0x000000CB, 0x00000049}, // L[3]
}, // T.state[ 9].w =  15
{ // R[10] abcde
{0x000000C1, 0x00000084, 0x00000025, 0x000000C3, 0x000000C5}, // L[0]
{0x00000042, 0x0000009C, 0x000000A6, 0x00000028, 0x00000062}, // L[1]
{0x00000094, 0x000000A3, 0x00000018, 0x00000048, 0x00000051}, // L[2]
{0x0000003B, 0x00000015, 0x00000049, 0x000000CB, 0x00000060}, // L[3]
}, // T.state[10].w =  18
{ // R[11] abcde
{0x000000C5, 0x00000084, 0x00000025, 0x00000030, 0x00000015}, // L[0]
{0x00000062, 0x0000009C, 0x000000A6, 0x00000052, 0x00000010}, // L[1]
{0x00000051, 0x000000A3, 0x00000018, 0x000000C8, 0x00000040}, // L[2]
{0x00000060, 0x00000015, 0x00000049, 0x0000005D, 0x00000004}, // L[3]
}, // T.state[11].w =  22
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x000000C5, 0x00000019, 0x00000040, 0x0000005D, 0x00000000}, // L[0]
{0x00000062, 0x000000C7, 0x00000004, 0x00000030, 0x00000000}, // L[1]
{0x00000051, 0x00000022, 0x00000015, 0x00000052, 0x00000000}, // L[2]
{0x00000060, 0x00000023, 0x00000010, 0x000000C8, 0x00000000}, // L[3]
}, // T.state[12].w =  19
// T.w = 104
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 104 -> 103
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000006}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000C, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x000000C0, 0x00000006, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000081, 0x00000004, 0x0000000C, 0x00000099}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000040, 0x000000C0, 0x00000006, 0x00000012, 0x00000030}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000099, 0x00000081, 0x00000004, 0x000000AC, 0x000000A0}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x0000008A}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000001, 0x000000A0, 0x00000046, 0x000000C1}, // L[0]
{0x00000080, 0x00000042, 0x0000008A, 0x00000012, 0x00000042}, // L[1]
{0x00000099, 0x00000005, 0x00000030, 0x00000004, 0x00000094}, // L[2]
{0x00000088, 0x000000E1, 0x00000000, 0x000000AC, 0x0000003B}, // L[3]
}, // T.state[ 8].w =  11
{ // R[ 9] abcde
{0x000000C1, 0x00000001, 0x000000A0, 0x000000C3, 0x00000025}, // L[0]
{0x00000042, 0x00000042, 0x0000008A, 0x00000028, 0x000000A6}, // L[1]
{0x00000094, 0x00000005, 0x00000030, 0x00000048, 0x00000018}, // L[2]
{0x0000003B, 0x000000E1, 0x00000000, 0x000000CB, 0x00000049}, // L[3]
}, // T.state[ 9].w =  15
{ // R[10] abcde
{0x000000C1, 0x00000084, 0x00000025, 0x000000C3, 0x00000045}, // L[0]
{0x00000042, 0x0000009C, 0x000000A6, 0x00000028, 0x000000EA}, // L[1]
{0x00000094, 0x000000A3, 0x00000018, 0x00000048, 0x00000059}, // L[2]
{0x0000003B, 0x00000015, 0x00000049, 0x000000CB, 0x0000006C}, // L[3]
}, // T.state[10].w =  18
{ // R[11] abcde
{0x00000045, 0x00000084, 0x00000025, 0x00000034, 0x00000011}, // L[0]
{0x000000EA, 0x0000009C, 0x000000A6, 0x00000016, 0x00000090}, // L[1]
{0x00000059, 0x000000A3, 0x00000018, 0x00000088, 0x00000080}, // L[2]
{0x0000006C, 0x00000015, 0x00000049, 0x0000003D, 0x00000044}, // L[3]
}, // T.state[11].w =  22
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000045, 0x00000018, 0x00000080, 0x0000003D, 0x00000000}, // L[0]
{0x000000EA, 0x00000046, 0x00000044, 0x00000034, 0x00000000}, // L[1]
{0x00000059, 0x000000A2, 0x00000011, 0x00000016, 0x00000000}, // L[2]
{0x0000006C, 0x0000002B, 0x00000090, 0x00000088, 0x00000000}, // L[3]
}, // T.state[12].w =  18
// T.w = 103
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 103 -> 102
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000006}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000C, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x000000C0, 0x00000006, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000081, 0x00000004, 0x0000000C, 0x00000099}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000040, 0x000000C0, 0x00000006, 0x00000012, 0x00000030}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000099, 0x00000081, 0x00000004, 0x000000AC, 0x000000A0}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x0000008A}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000001, 0x000000A0, 0x00000046, 0x000000C1}, // L[0]
{0x00000080, 0x00000042, 0x0000008A, 0x00000012, 0x00000042}, // L[1]
{0x00000099, 0x00000005, 0x00000030, 0x00000004, 0x00000094}, // L[2]
{0x00000088, 0x000000E1, 0x00000000, 0x000000AC, 0x0000003B}, // L[3]
}, // T.state[ 8].w =  11
{ // R[ 9] abcde
{0x000000C1, 0x00000001, 0x000000A0, 0x000000C3, 0x00000025}, // L[0]
{0x00000042, 0x00000042, 0x0000008A, 0x00000028, 0x000000A6}, // L[1]
{0x00000094, 0x00000005, 0x00000030, 0x00000048, 0x00000018}, // L[2]
{0x0000003B, 0x000000E1, 0x00000000, 0x000000CB, 0x00000049}, // L[3]
}, // T.state[ 9].w =  15
{ // R[10] abcde
{0x000000C1, 0x00000084, 0x00000025, 0x000000C3, 0x00000047}, // L[0]
{0x00000042, 0x0000009C, 0x000000A6, 0x00000028, 0x0000007A}, // L[1]
{0x00000094, 0x000000A3, 0x00000018, 0x00000048, 0x00000051}, // L[2]
{0x0000003B, 0x00000015, 0x00000049, 0x000000CB, 0x00000040}, // L[3]
}, // T.state[10].w =  18
{ // R[11] abcde
{0x00000047, 0x00000084, 0x00000025, 0x00000024, 0x00000001}, // L[0]
{0x0000007A, 0x0000009C, 0x000000A6, 0x00000092, 0x00000010}, // L[1]
{0x00000051, 0x000000A3, 0x00000018, 0x000000C8, 0x00000040}, // L[2]
{0x00000040, 0x00000015, 0x00000049, 0x0000005C, 0x00000005}, // L[3]
}, // T.state[11].w =  22
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000047, 0x00000019, 0x00000040, 0x0000005C, 0x00000000}, // L[0]
{0x0000007A, 0x000000C7, 0x00000005, 0x00000024, 0x00000000}, // L[1]
{0x00000051, 0x00000020, 0x00000001, 0x00000092, 0x00000000}, // L[2]
{0x00000040, 0x0000000B, 0x00000010, 0x000000C8, 0x00000000}, // L[3]
}, // T.state[12].w =  17
// T.w = 102
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 102 -> 101
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000006}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000C, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x000000C0, 0x00000006, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000081, 0x00000004, 0x0000000C, 0x00000099}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000040, 0x000000C0, 0x00000006, 0x00000012, 0x00000030}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000099, 0x00000081, 0x00000004, 0x000000AC, 0x000000A0}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x0000008A}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000001, 0x000000A0, 0x00000046, 0x000000C1}, // L[0]
{0x00000080, 0x00000042, 0x0000008A, 0x00000012, 0x00000042}, // L[1]
{0x00000099, 0x00000005, 0x00000030, 0x00000004, 0x00000094}, // L[2]
{0x00000088, 0x000000E1, 0x00000000, 0x000000AC, 0x0000003B}, // L[3]
}, // T.state[ 8].w =  11
{ // R[ 9] abcde
{0x000000C1, 0x00000001, 0x000000A0, 0x000000C3, 0x00000021}, // L[0]
{0x00000042, 0x00000042, 0x0000008A, 0x00000028, 0x000000E2}, // L[1]
{0x00000094, 0x00000005, 0x00000030, 0x00000048, 0x00000028}, // L[2]
{0x0000003B, 0x000000E1, 0x00000000, 0x000000CB, 0x000000CB}, // L[3]
}, // T.state[ 9].w =  15
{ // R[10] abcde
{0x000000C1, 0x00000004, 0x00000021, 0x000000C3, 0x000000C5}, // L[0]
{0x00000042, 0x00000014, 0x000000E2, 0x00000028, 0x00000072}, // L[1]
{0x00000094, 0x000000A5, 0x00000028, 0x00000048, 0x00000019}, // L[2]
{0x0000003B, 0x00000045, 0x000000CB, 0x000000CB, 0x00000080}, // L[3]
}, // T.state[10].w =  18
{ // R[11] abcde
{0x000000C5, 0x00000004, 0x00000021, 0x00000030, 0x00000011}, // L[0]
{0x00000072, 0x00000014, 0x000000E2, 0x000000D2, 0x00000010}, // L[1]
{0x00000019, 0x000000A5, 0x00000028, 0x0000008A, 0x000000A2}, // L[2]
{0x00000080, 0x00000045, 0x000000CB, 0x0000005A, 0x00000011}, // L[3]
}, // T.state[11].w =  20
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x000000C5, 0x00000008, 0x000000A2, 0x0000005A, 0x00000000}, // L[0]
{0x00000072, 0x0000000E, 0x00000011, 0x00000030, 0x00000000}, // L[1]
{0x00000019, 0x000000A8, 0x00000011, 0x000000D2, 0x00000000}, // L[2]
{0x00000080, 0x0000002A, 0x00000010, 0x0000008A, 0x00000000}, // L[3]
}, // T.state[12].w =  18
// T.w = 101
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 101 -> 100
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000006}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000C, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x000000C0, 0x00000006, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000081, 0x00000004, 0x0000000C, 0x00000099}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000040, 0x000000C0, 0x00000006, 0x00000012, 0x00000030}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000099, 0x00000081, 0x00000004, 0x000000AC, 0x000000A0}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x0000008A}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000001, 0x000000A0, 0x00000046, 0x000000C1}, // L[0]
{0x00000080, 0x00000042, 0x0000008A, 0x00000012, 0x00000042}, // L[1]
{0x00000099, 0x00000005, 0x00000030, 0x00000004, 0x00000094}, // L[2]
{0x00000088, 0x000000E1, 0x00000000, 0x000000AC, 0x0000003B}, // L[3]
}, // T.state[ 8].w =  11
{ // R[ 9] abcde
{0x000000C1, 0x00000001, 0x000000A0, 0x000000C3, 0x00000021}, // L[0]
{0x00000042, 0x00000042, 0x0000008A, 0x00000028, 0x000000E2}, // L[1]
{0x00000094, 0x00000005, 0x00000030, 0x00000048, 0x00000028}, // L[2]
{0x0000003B, 0x000000E1, 0x00000000, 0x000000CB, 0x000000CB}, // L[3]
}, // T.state[ 9].w =  15
{ // R[10] abcde
{0x000000C1, 0x00000004, 0x00000021, 0x000000C3, 0x000000C5}, // L[0]
{0x00000042, 0x00000014, 0x000000E2, 0x00000028, 0x00000072}, // L[1]
{0x00000094, 0x000000A5, 0x00000028, 0x00000048, 0x00000059}, // L[2]
{0x0000003B, 0x00000045, 0x000000CB, 0x000000CB, 0x00000080}, // L[3]
}, // T.state[10].w =  18
{ // R[11] abcde
{0x000000C5, 0x00000004, 0x00000021, 0x00000030, 0x00000011}, // L[0]
{0x00000072, 0x00000014, 0x000000E2, 0x000000D2, 0x00000014}, // L[1]
{0x00000059, 0x000000A5, 0x00000028, 0x00000088, 0x000000A0}, // L[2]
{0x00000080, 0x00000045, 0x000000CB, 0x0000005A, 0x00000001}, // L[3]
}, // T.state[11].w =  20
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x000000C5, 0x00000000, 0x000000A0, 0x0000005A, 0x00000000}, // L[0]
{0x00000072, 0x0000000A, 0x00000001, 0x00000030, 0x00000000}, // L[1]
{0x00000059, 0x00000088, 0x00000011, 0x000000D2, 0x00000000}, // L[2]
{0x00000080, 0x0000002A, 0x00000014, 0x00000088, 0x00000000}, // L[3]
}, // T.state[12].w =  17
// T.w = 100
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 100 -> 99
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000006}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000C, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x000000C0, 0x00000006, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000081, 0x00000004, 0x0000000C, 0x00000099}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000040, 0x000000C0, 0x00000006, 0x00000012, 0x00000030}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000099, 0x00000081, 0x00000004, 0x000000AC, 0x000000A0}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x0000008A}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000001, 0x000000A0, 0x00000046, 0x000000C1}, // L[0]
{0x00000080, 0x00000042, 0x0000008A, 0x00000012, 0x00000042}, // L[1]
{0x00000099, 0x00000005, 0x00000030, 0x00000004, 0x00000094}, // L[2]
{0x00000088, 0x000000E1, 0x00000000, 0x000000AC, 0x0000003B}, // L[3]
}, // T.state[ 8].w =  11
{ // R[ 9] abcde
{0x000000C1, 0x00000001, 0x000000A0, 0x000000C3, 0x00000021}, // L[0]
{0x00000042, 0x00000042, 0x0000008A, 0x00000028, 0x000000E2}, // L[1]
{0x00000094, 0x00000005, 0x00000030, 0x00000048, 0x000000A8}, // L[2]
{0x0000003B, 0x000000E1, 0x00000000, 0x000000CB, 0x0000004B}, // L[3]
}, // T.state[ 9].w =  15
{ // R[10] abcde
{0x000000C1, 0x00000004, 0x00000021, 0x000000C3, 0x000000C7}, // L[0]
{0x00000042, 0x00000014, 0x000000E2, 0x00000028, 0x00000072}, // L[1]
{0x00000094, 0x000000B5, 0x000000A8, 0x00000048, 0x0000004B}, // L[2]
{0x0000003B, 0x00000055, 0x0000004B, 0x000000CB, 0x000000C0}, // L[3]
}, // T.state[10].w =  18
{ // R[11] abcde
{0x000000C7, 0x00000004, 0x00000021, 0x00000020, 0x00000001}, // L[0]
{0x00000072, 0x00000014, 0x000000E2, 0x000000D2, 0x00000010}, // L[1]
{0x0000004B, 0x000000B5, 0x000000A8, 0x00000018, 0x00000080}, // L[2]
{0x000000C0, 0x00000055, 0x0000004B, 0x00000058, 0x00000001}, // L[3]
}, // T.state[11].w =  20
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x000000C7, 0x00000008, 0x00000080, 0x00000058, 0x00000000}, // L[0]
{0x00000072, 0x0000006A, 0x00000001, 0x00000020, 0x00000000}, // L[1]
{0x0000004B, 0x000000A8, 0x00000001, 0x000000D2, 0x00000000}, // L[2]
{0x000000C0, 0x0000000A, 0x00000010, 0x00000018, 0x00000000}, // L[3]
}, // T.state[12].w =  16
// T.w =  99
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 99 -> 98
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 2].w =   0
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000000, 0x00000018}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000006}, // L[0]
{0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000004}, // L[1]
{0x00000018, 0x00000008, 0x00000000, 0x0000000C, 0x00000004}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x000000C0, 0x00000006, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000080, 0x00000004, 0x00000000, 0x00000080}, // L[1]
{0x00000018, 0x00000081, 0x00000004, 0x0000000C, 0x00000099}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   5
{ // R[ 7] abcde
{0x00000040, 0x000000C0, 0x00000006, 0x00000012, 0x00000030}, // L[0]
{0x00000080, 0x00000080, 0x00000004, 0x00000004, 0x00000000}, // L[1]
{0x00000099, 0x00000081, 0x00000004, 0x000000AC, 0x000000A0}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x0000008A}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000001, 0x000000A0, 0x00000046, 0x000000C1}, // L[0]
{0x00000080, 0x00000042, 0x0000008A, 0x00000012, 0x00000042}, // L[1]
{0x00000099, 0x00000005, 0x00000030, 0x00000004, 0x00000094}, // L[2]
{0x00000088, 0x000000E1, 0x00000000, 0x000000AC, 0x0000003B}, // L[3]
}, // T.state[ 8].w =  11
{ // R[ 9] abcde
{0x000000C1, 0x00000001, 0x000000A0, 0x000000C3, 0x00000021}, // L[0]
{0x00000042, 0x00000042, 0x0000008A, 0x00000028, 0x000000E2}, // L[1]
{0x00000094, 0x00000005, 0x00000030, 0x00000048, 0x000000A8}, // L[2]
{0x0000003B, 0x000000E1, 0x00000000, 0x000000CB, 0x000000CB}, // L[3]
}, // T.state[ 9].w =  15
{ // R[10] abcde
{0x000000C1, 0x00000004, 0x00000021, 0x000000C3, 0x000000C7}, // L[0]
{0x00000042, 0x00000014, 0x000000E2, 0x00000028, 0x00000076}, // L[1]
{0x00000094, 0x000000B5, 0x000000A8, 0x00000048, 0x00000049}, // L[2]
{0x0000003B, 0x00000045, 0x000000CB, 0x000000CB, 0x00000082}, // L[3]
}, // T.state[10].w =  18
{ // R[11] abcde
{0x000000C7, 0x00000004, 0x00000021, 0x00000020, 0x00000001}, // L[0]
{0x00000076, 0x00000014, 0x000000E2, 0x000000F2, 0x00000010}, // L[1]
{0x00000049, 0x000000B5, 0x000000A8, 0x00000008, 0x000000A0}, // L[2]
{0x00000082, 0x00000045, 0x000000CB, 0x0000004A, 0x00000001}, // L[3]
}, // T.state[11].w =  20
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x000000C7, 0x00000008, 0x000000A0, 0x0000004A, 0x00000000}, // L[0]
{0x00000076, 0x0000002A, 0x00000001, 0x00000020, 0x00000000}, // L[1]
{0x00000049, 0x00000088, 0x00000001, 0x000000F2, 0x00000000}, // L[2]
{0x00000082, 0x0000000A, 0x00000010, 0x00000008, 0x00000000}, // L[3]
}, // T.state[12].w =  15
// T.w =  98
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 98 -> 96
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000060}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x000000B0}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000060, 0x00000020, 0x00000000, 0x00000030, 0x00000010}, // L[2]
{0x000000B0, 0x00000000, 0x00000000, 0x00000058, 0x00000078}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000060, 0x00000006, 0x00000010, 0x00000030, 0x000000A2}, // L[2]
{0x000000B0, 0x0000000F, 0x00000078, 0x00000058, 0x00000081}, // L[3]
}, // T.state[ 6].w =   6
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x000000A2, 0x00000006, 0x00000010, 0x00000094, 0x00000084}, // L[2]
{0x00000081, 0x0000000F, 0x00000078, 0x000000CE, 0x00000042}, // L[3]
}, // T.state[ 7].w =  12
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000084, 0x000000CE, 0x00000008}, // L[0]
{0x00000000, 0x00000005, 0x00000042, 0x00000042, 0x00000005}, // L[1]
{0x000000A2, 0x0000009A, 0x00000002, 0x00000000, 0x0000002C}, // L[2]
{0x00000081, 0x00000014, 0x00000000, 0x00000094, 0x00000097}, // L[3]
}, // T.state[ 8].w =  12
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000084, 0x00000063, 0x00000021}, // L[0]
{0x00000005, 0x00000005, 0x00000042, 0x000000A3, 0x00000021}, // L[1]
{0x0000002C, 0x0000009A, 0x00000002, 0x00000016, 0x00000010}, // L[2]
{0x00000097, 0x00000014, 0x00000000, 0x00000081, 0x00000081}, // L[3]
}, // T.state[ 9].w =  12
{ // R[10] abcde
{0x00000008, 0x00000024, 0x00000021, 0x00000063, 0x00000064}, // L[0]
{0x00000005, 0x00000084, 0x00000021, 0x000000A3, 0x00000081}, // L[1]
{0x0000002C, 0x00000051, 0x00000010, 0x00000016, 0x00000015}, // L[2]
{0x00000097, 0x000000B2, 0x00000081, 0x00000081, 0x00000001}, // L[3]
}, // T.state[10].w =  16
{ // R[11] abcde
{0x00000064, 0x00000024, 0x00000021, 0x00000038, 0x00000009}, // L[0]
{0x00000081, 0x00000084, 0x00000021, 0x00000011, 0x00000010}, // L[1]
{0x00000015, 0x00000051, 0x00000010, 0x00000018, 0x00000008}, // L[2]
{0x00000001, 0x000000B2, 0x00000081, 0x00000004, 0x0000008F}, // L[3]
}, // T.state[11].w =  18
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000064, 0x00000029, 0x00000008, 0x00000004, 0x00000000}, // L[0]
{0x00000081, 0x000000B2, 0x0000008F, 0x00000038, 0x00000000}, // L[1]
{0x00000015, 0x0000007A, 0x00000009, 0x00000011, 0x00000000}, // L[2]
{0x00000001, 0x0000005A, 0x00000010, 0x00000018, 0x00000000}, // L[3]
}, // T.state[12].w =  12
// T.w =  96
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 96 -> 95
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000060}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x000000B0}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000060, 0x00000020, 0x00000000, 0x00000030, 0x00000010}, // L[2]
{0x000000B0, 0x00000000, 0x00000000, 0x00000058, 0x00000078}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000060, 0x00000006, 0x00000010, 0x00000030, 0x000000A2}, // L[2]
{0x000000B0, 0x0000000F, 0x00000078, 0x00000058, 0x00000081}, // L[3]
}, // T.state[ 6].w =   6
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x000000A2, 0x00000006, 0x00000010, 0x00000094, 0x00000084}, // L[2]
{0x00000081, 0x0000000F, 0x00000078, 0x000000CE, 0x00000042}, // L[3]
}, // T.state[ 7].w =  12
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000084, 0x000000CE, 0x00000008}, // L[0]
{0x00000000, 0x00000005, 0x00000042, 0x00000042, 0x00000005}, // L[1]
{0x000000A2, 0x0000009A, 0x00000002, 0x00000000, 0x0000002C}, // L[2]
{0x00000081, 0x00000014, 0x00000000, 0x00000094, 0x00000097}, // L[3]
}, // T.state[ 8].w =  12
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000084, 0x00000063, 0x00000061}, // L[0]
{0x00000005, 0x00000005, 0x00000042, 0x000000A3, 0x00000021}, // L[1]
{0x0000002C, 0x0000009A, 0x00000002, 0x00000016, 0x00000018}, // L[2]
{0x00000097, 0x00000014, 0x00000000, 0x00000081, 0x00000081}, // L[3]
}, // T.state[ 9].w =  12
{ // R[10] abcde
{0x00000008, 0x0000002C, 0x00000061, 0x00000063, 0x0000006C}, // L[0]
{0x00000005, 0x00000084, 0x00000021, 0x000000A3, 0x00000083}, // L[1]
{0x0000002C, 0x00000050, 0x00000018, 0x00000016, 0x00000014}, // L[2]
{0x00000097, 0x000000B2, 0x00000081, 0x00000081, 0x00000001}, // L[3]
}, // T.state[10].w =  16
{ // R[11] abcde
{0x0000006C, 0x0000002C, 0x00000061, 0x00000078, 0x00000009}, // L[0]
{0x00000083, 0x00000084, 0x00000021, 0x00000001, 0x00000062}, // L[1]
{0x00000014, 0x00000050, 0x00000018, 0x00000010, 0x00000018}, // L[2]
{0x00000001, 0x000000B2, 0x00000081, 0x00000004, 0x00000085}, // L[3]
}, // T.state[11].w =  17
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x0000006C, 0x000000CD, 0x00000018, 0x00000004, 0x00000000}, // L[0]
{0x00000083, 0x00000090, 0x00000085, 0x00000078, 0x00000000}, // L[1]
{0x00000014, 0x0000006E, 0x00000009, 0x00000001, 0x00000000}, // L[2]
{0x00000001, 0x0000004A, 0x00000062, 0x00000010, 0x00000000}, // L[3]
}, // T.state[12].w =  12
// T.w =  95
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 95 -> 94
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000060}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x000000B0}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000060, 0x00000020, 0x00000000, 0x00000030, 0x00000010}, // L[2]
{0x000000B0, 0x00000000, 0x00000000, 0x00000058, 0x00000078}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000060, 0x00000006, 0x00000010, 0x00000030, 0x000000A2}, // L[2]
{0x000000B0, 0x0000000F, 0x00000078, 0x00000058, 0x00000081}, // L[3]
}, // T.state[ 6].w =   6
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000082}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x000000A2, 0x00000006, 0x00000010, 0x00000094, 0x00000084}, // L[2]
{0x00000081, 0x0000000F, 0x00000078, 0x000000CE, 0x00000042}, // L[3]
}, // T.state[ 7].w =  12
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000084, 0x000000CE, 0x00000008}, // L[0]
{0x00000000, 0x00000005, 0x00000042, 0x00000042, 0x00000005}, // L[1]
{0x000000A2, 0x0000009A, 0x00000082, 0x00000000, 0x00000008}, // L[2]
{0x00000081, 0x00000015, 0x00000000, 0x00000094, 0x00000094}, // L[3]
}, // T.state[ 8].w =  12
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000084, 0x00000063, 0x00000021}, // L[0]
{0x00000005, 0x00000005, 0x00000042, 0x000000A3, 0x000000A1}, // L[1]
{0x00000008, 0x0000009A, 0x00000082, 0x00000004, 0x0000008A}, // L[2]
{0x00000094, 0x00000015, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 9].w =  12
{ // R[10] abcde
{0x00000008, 0x00000024, 0x00000021, 0x00000063, 0x00000024}, // L[0]
{0x00000005, 0x00000094, 0x000000A1, 0x000000A3, 0x00000091}, // L[1]
{0x00000008, 0x00000002, 0x0000008A, 0x00000004, 0x0000000E}, // L[2]
{0x00000094, 0x000000A2, 0x00000000, 0x00000000, 0x00000012}, // L[3]
}, // T.state[10].w =  14
{ // R[11] abcde
{0x00000024, 0x00000024, 0x00000021, 0x0000003A, 0x00000009}, // L[0]
{0x00000091, 0x00000094, 0x000000A1, 0x00000091, 0x00000010}, // L[1]
{0x0000000E, 0x00000002, 0x0000008A, 0x00000050, 0x0000004A}, // L[2]
{0x00000012, 0x000000A2, 0x00000000, 0x00000090, 0x00000090}, // L[3]
}, // T.state[11].w =  14
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000024, 0x00000009, 0x0000004A, 0x00000090, 0x00000000}, // L[0]
{0x00000091, 0x00000090, 0x00000090, 0x0000003A, 0x00000000}, // L[1]
{0x0000000E, 0x00000064, 0x00000009, 0x00000091, 0x00000000}, // L[2]
{0x00000012, 0x0000005A, 0x00000010, 0x00000050, 0x00000000}, // L[3]
}, // T.state[12].w =  16
// T.w =  94
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 94 -> 93
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000060}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x000000B0}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000060, 0x00000020, 0x00000000, 0x00000030, 0x00000010}, // L[2]
{0x000000B0, 0x00000000, 0x00000000, 0x00000058, 0x00000078}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000060, 0x00000006, 0x00000010, 0x00000030, 0x000000A2}, // L[2]
{0x000000B0, 0x0000000F, 0x00000078, 0x00000058, 0x00000081}, // L[3]
}, // T.state[ 6].w =   6
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000082}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x000000A2, 0x00000006, 0x00000010, 0x00000094, 0x00000084}, // L[2]
{0x00000081, 0x0000000F, 0x00000078, 0x000000CE, 0x00000042}, // L[3]
}, // T.state[ 7].w =  12
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000084, 0x000000CE, 0x00000008}, // L[0]
{0x00000000, 0x00000005, 0x00000042, 0x00000042, 0x00000005}, // L[1]
{0x000000A2, 0x0000009A, 0x00000082, 0x00000000, 0x00000008}, // L[2]
{0x00000081, 0x00000015, 0x00000000, 0x00000094, 0x00000094}, // L[3]
}, // T.state[ 8].w =  12
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000084, 0x00000063, 0x00000061}, // L[0]
{0x00000005, 0x00000005, 0x00000042, 0x000000A3, 0x00000021}, // L[1]
{0x00000008, 0x0000009A, 0x00000082, 0x00000004, 0x0000008A}, // L[2]
{0x00000094, 0x00000015, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 9].w =  12
{ // R[10] abcde
{0x00000008, 0x0000002C, 0x00000061, 0x00000063, 0x00000064}, // L[0]
{0x00000005, 0x00000084, 0x00000021, 0x000000A3, 0x00000083}, // L[1]
{0x00000008, 0x00000002, 0x0000008A, 0x00000004, 0x0000000A}, // L[2]
{0x00000094, 0x000000A2, 0x00000000, 0x00000000, 0x00000032}, // L[3]
}, // T.state[10].w =  14
{ // R[11] abcde
{0x00000064, 0x0000002C, 0x00000061, 0x00000038, 0x00000009}, // L[0]
{0x00000083, 0x00000084, 0x00000021, 0x00000001, 0x00000020}, // L[1]
{0x0000000A, 0x00000002, 0x0000008A, 0x00000070, 0x0000001E}, // L[2]
{0x00000032, 0x000000A2, 0x00000000, 0x00000091, 0x00000093}, // L[3]
}, // T.state[11].w =  13
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000064, 0x00000049, 0x0000001E, 0x00000091, 0x00000000}, // L[0]
{0x00000083, 0x00000038, 0x00000093, 0x00000038, 0x00000000}, // L[1]
{0x0000000A, 0x00000062, 0x00000009, 0x00000001, 0x00000000}, // L[2]
{0x00000032, 0x0000004A, 0x00000020, 0x00000070, 0x00000000}, // L[3]
}, // T.state[12].w =  16
// T.w =  93
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 93 -> 92
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000080, 0x00000080, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000020, 0x00000000, 0x00000000, 0x00000060}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000000, 0x000000B0}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000060, 0x00000020, 0x00000000, 0x00000030, 0x00000010}, // L[2]
{0x000000B0, 0x00000000, 0x00000000, 0x00000058, 0x00000078}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000008, 0x00000040, 0x00000040, 0x00000008}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000060, 0x00000006, 0x00000010, 0x00000030, 0x000000A2}, // L[2]
{0x000000B0, 0x0000000F, 0x00000078, 0x00000058, 0x00000081}, // L[3]
}, // T.state[ 6].w =   6
{ // R[ 7] abcde
{0x00000008, 0x00000008, 0x00000040, 0x00000042, 0x00000082}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x000000A2, 0x00000006, 0x00000010, 0x00000094, 0x00000084}, // L[2]
{0x00000081, 0x0000000F, 0x00000078, 0x000000CE, 0x00000042}, // L[3]
}, // T.state[ 7].w =  12
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000008, 0x00000000, 0x00000084, 0x000000CE, 0x00000008}, // L[0]
{0x00000000, 0x00000005, 0x00000042, 0x00000042, 0x00000005}, // L[1]
{0x000000A2, 0x0000009A, 0x00000082, 0x00000000, 0x00000008}, // L[2]
{0x00000081, 0x00000015, 0x00000000, 0x00000094, 0x00000094}, // L[3]
}, // T.state[ 8].w =  12
{ // R[ 9] abcde
{0x00000008, 0x00000000, 0x00000084, 0x00000063, 0x00000061}, // L[0]
{0x00000005, 0x00000005, 0x00000042, 0x000000A3, 0x00000021}, // L[1]
{0x00000008, 0x0000009A, 0x00000082, 0x00000004, 0x0000008A}, // L[2]
{0x00000094, 0x00000015, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 9].w =  12
{ // R[10] abcde
{0x00000008, 0x0000002C, 0x00000061, 0x00000063, 0x00000064}, // L[0]
{0x00000005, 0x00000084, 0x00000021, 0x000000A3, 0x0000008B}, // L[1]
{0x00000008, 0x00000002, 0x0000008A, 0x00000004, 0x0000000E}, // L[2]
{0x00000094, 0x000000A2, 0x00000000, 0x00000000, 0x00000012}, // L[3]
}, // T.state[10].w =  14
{ // R[11] abcde
{0x00000064, 0x0000002C, 0x00000061, 0x00000038, 0x00000009}, // L[0]
{0x0000008B, 0x00000084, 0x00000021, 0x00000041, 0x00000022}, // L[1]
{0x0000000E, 0x00000002, 0x0000008A, 0x00000050, 0x0000004A}, // L[2]
{0x00000012, 0x000000A2, 0x00000000, 0x00000090, 0x00000090}, // L[3]
}, // T.state[11].w =  13
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000064, 0x0000004D, 0x0000004A, 0x00000090, 0x00000000}, // L[0]
{0x0000008B, 0x00000090, 0x00000090, 0x00000038, 0x00000000}, // L[1]
{0x0000000E, 0x00000064, 0x00000009, 0x00000041, 0x00000000}, // L[2]
{0x00000012, 0x0000004A, 0x00000022, 0x00000050, 0x00000000}, // L[3]
}, // T.state[12].w =  15
// T.w =  92
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 92 -> 91
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000019, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000084}, // L[1]
{0x00000009, 0x00000019, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000090, 0x00000084, 0x00000000, 0x00000090}, // L[1]
{0x00000009, 0x000000B3, 0x00000084, 0x00000084, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   6
{ // R[ 7] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000012, 0x00000010}, // L[0]
{0x00000090, 0x00000090, 0x00000084, 0x00000084, 0x00000000}, // L[1]
{0x00000088, 0x000000B3, 0x00000084, 0x00000060, 0x00000024}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000021, 0x00000024, 0x00000000, 0x000000A1}, // L[0]
{0x00000090, 0x0000002F, 0x00000000, 0x00000012, 0x00000089}, // L[1]
{0x00000088, 0x00000000, 0x00000010, 0x00000084, 0x00000088}, // L[2]
{0x00000000, 0x000000A0, 0x00000000, 0x00000060, 0x000000A0}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x000000A1, 0x00000021, 0x00000024, 0x000000D0, 0x00000034}, // L[0]
{0x00000089, 0x0000002F, 0x00000000, 0x000000CD, 0x000000CF}, // L[1]
{0x00000088, 0x00000000, 0x00000010, 0x00000006, 0x00000012}, // L[2]
{0x000000A0, 0x000000A0, 0x00000000, 0x00000060, 0x000000A0}, // L[3]
}, // T.state[ 9].w =  14
{ // R[10] abcde
{0x000000A1, 0x000000A2, 0x00000034, 0x000000D0, 0x00000043}, // L[0]
{0x00000089, 0x0000001C, 0x000000CF, 0x000000CD, 0x000000AD}, // L[1]
{0x00000088, 0x00000042, 0x00000012, 0x00000006, 0x0000004E}, // L[2]
{0x000000A0, 0x00000000, 0x000000A0, 0x00000060, 0x000000E0}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000043, 0x000000A2, 0x00000034, 0x0000009C, 0x00000090}, // L[0]
{0x000000AD, 0x0000001C, 0x000000CF, 0x00000003, 0x00000040}, // L[1]
{0x0000004E, 0x00000042, 0x00000012, 0x00000042, 0x00000050}, // L[2]
{0x000000E0, 0x00000000, 0x000000A0, 0x00000004, 0x000000EC}, // L[3]
}, // T.state[11].w =  15
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000043, 0x000000B8, 0x00000050, 0x00000004, 0x00000000}, // L[0]
{0x000000AD, 0x00000024, 0x000000EC, 0x0000009C, 0x00000000}, // L[1]
{0x0000004E, 0x000000D9, 0x00000090, 0x00000003, 0x00000000}, // L[2]
{0x000000E0, 0x00000064, 0x00000040, 0x00000042, 0x00000000}, // L[3]
}, // T.state[12].w =  17
// T.w =  91
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 91 -> 90
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000019, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000084}, // L[1]
{0x00000009, 0x00000019, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000090, 0x00000084, 0x00000000, 0x00000090}, // L[1]
{0x00000009, 0x000000B3, 0x00000084, 0x00000084, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   6
{ // R[ 7] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000012, 0x00000010}, // L[0]
{0x00000090, 0x00000090, 0x00000084, 0x00000084, 0x00000000}, // L[1]
{0x00000088, 0x000000B3, 0x00000084, 0x00000060, 0x000000E4}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000021, 0x000000E4, 0x00000000, 0x00000061}, // L[0]
{0x00000090, 0x000000AE, 0x00000000, 0x00000012, 0x00000036}, // L[1]
{0x00000088, 0x00000000, 0x00000010, 0x00000084, 0x00000088}, // L[2]
{0x00000000, 0x000000A0, 0x00000000, 0x00000060, 0x000000A0}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x00000061, 0x00000021, 0x000000E4, 0x000000B0, 0x00000014}, // L[0]
{0x00000036, 0x000000AE, 0x00000000, 0x00000012, 0x00000012}, // L[1]
{0x00000088, 0x00000000, 0x00000010, 0x00000006, 0x00000032}, // L[2]
{0x000000A0, 0x000000A0, 0x00000000, 0x00000060, 0x000000A0}, // L[3]
}, // T.state[ 9].w =  13
{ // R[10] abcde
{0x00000061, 0x000000A6, 0x00000014, 0x000000B0, 0x00000001}, // L[0]
{0x00000036, 0x00000097, 0x00000012, 0x00000012, 0x000000C1}, // L[1]
{0x00000088, 0x00000046, 0x00000032, 0x00000006, 0x00000042}, // L[2]
{0x000000A0, 0x00000000, 0x000000A0, 0x00000060, 0x000000E0}, // L[3]
}, // T.state[10].w =  12
{ // R[11] abcde
{0x00000001, 0x000000A6, 0x00000014, 0x0000008D, 0x00000081}, // L[0]
{0x000000C1, 0x00000097, 0x00000012, 0x0000009E, 0x00000080}, // L[1]
{0x00000042, 0x00000046, 0x00000032, 0x00000022, 0x00000010}, // L[2]
{0x000000E0, 0x00000000, 0x000000A0, 0x00000004, 0x000000A4}, // L[3]
}, // T.state[11].w =  19
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000001, 0x0000002E, 0x00000010, 0x00000004, 0x00000000}, // L[0]
{0x000000C1, 0x000000AC, 0x000000A4, 0x0000008D, 0x00000000}, // L[1]
{0x00000042, 0x00000049, 0x00000081, 0x0000009E, 0x00000000}, // L[2]
{0x000000E0, 0x0000004E, 0x00000080, 0x00000022, 0x00000000}, // L[3]
}, // T.state[12].w =  16
// T.w =  90
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 90 -> 88
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000019, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000084}, // L[1]
{0x00000009, 0x00000019, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000090, 0x00000084, 0x00000000, 0x00000090}, // L[1]
{0x00000009, 0x000000B3, 0x00000084, 0x00000084, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   6
{ // R[ 7] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000012, 0x00000010}, // L[0]
{0x00000090, 0x00000090, 0x00000084, 0x00000084, 0x00000000}, // L[1]
{0x00000088, 0x000000B3, 0x00000084, 0x00000060, 0x000000E4}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000021, 0x000000E4, 0x00000000, 0x000000E1}, // L[0]
{0x00000090, 0x000000AE, 0x00000000, 0x00000012, 0x00000016}, // L[1]
{0x00000088, 0x00000000, 0x00000010, 0x00000084, 0x00000098}, // L[2]
{0x00000000, 0x000000A0, 0x00000000, 0x00000060, 0x000000A0}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x000000E1, 0x00000021, 0x000000E4, 0x000000F0, 0x00000014}, // L[0]
{0x00000016, 0x000000AE, 0x00000000, 0x00000002, 0x00000002}, // L[1]
{0x00000098, 0x00000000, 0x00000010, 0x0000000E, 0x00000002}, // L[2]
{0x000000A0, 0x000000A0, 0x00000000, 0x00000060, 0x00000020}, // L[3]
}, // T.state[ 9].w =  13
{ // R[10] abcde
{0x000000E1, 0x000000A6, 0x00000014, 0x000000F0, 0x000000C5}, // L[0]
{0x00000016, 0x00000095, 0x00000002, 0x00000002, 0x00000083}, // L[1]
{0x00000098, 0x00000040, 0x00000002, 0x0000000E, 0x00000048}, // L[2]
{0x000000A0, 0x00000010, 0x00000020, 0x00000060, 0x000000F0}, // L[3]
}, // T.state[10].w =  12
{ // R[11] abcde
{0x000000C5, 0x000000A6, 0x00000014, 0x000000A9, 0x00000085}, // L[0]
{0x00000083, 0x00000095, 0x00000002, 0x0000000C, 0x00000002}, // L[1]
{0x00000048, 0x00000040, 0x00000002, 0x00000032, 0x00000010}, // L[2]
{0x000000F0, 0x00000010, 0x00000020, 0x00000084, 0x000000A4}, // L[3]
}, // T.state[11].w =  18
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x000000C5, 0x0000002F, 0x00000010, 0x00000084, 0x00000000}, // L[0]
{0x00000083, 0x000000A0, 0x000000A4, 0x000000A9, 0x00000000}, // L[1]
{0x00000048, 0x00000069, 0x00000085, 0x0000000C, 0x00000000}, // L[2]
{0x000000F0, 0x00000046, 0x00000002, 0x00000032, 0x00000000}, // L[3]
}, // T.state[12].w =  15
// T.w =  88
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 88 -> 87
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000019, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000084}, // L[1]
{0x00000009, 0x00000019, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000090, 0x00000084, 0x00000000, 0x00000090}, // L[1]
{0x00000009, 0x000000B3, 0x00000084, 0x00000084, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   6
{ // R[ 7] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000012, 0x00000010}, // L[0]
{0x00000090, 0x00000090, 0x00000084, 0x00000084, 0x00000000}, // L[1]
{0x00000088, 0x000000B3, 0x00000084, 0x00000060, 0x000000E4}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000021, 0x000000E4, 0x00000000, 0x000000E1}, // L[0]
{0x00000090, 0x000000AE, 0x00000000, 0x00000012, 0x00000016}, // L[1]
{0x00000088, 0x00000000, 0x00000010, 0x00000084, 0x00000098}, // L[2]
{0x00000000, 0x000000A0, 0x00000000, 0x00000060, 0x000000A0}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x000000E1, 0x00000021, 0x000000E4, 0x000000F0, 0x0000001C}, // L[0]
{0x00000016, 0x000000AE, 0x00000000, 0x00000002, 0x00000002}, // L[1]
{0x00000098, 0x00000000, 0x00000010, 0x0000000E, 0x00000022}, // L[2]
{0x000000A0, 0x000000A0, 0x00000000, 0x00000060, 0x000000A0}, // L[3]
}, // T.state[ 9].w =  13
{ // R[10] abcde
{0x000000E1, 0x000000A7, 0x0000001C, 0x000000F0, 0x00000042}, // L[0]
{0x00000016, 0x00000095, 0x00000002, 0x00000002, 0x00000083}, // L[1]
{0x00000098, 0x00000044, 0x00000022, 0x0000000E, 0x0000004C}, // L[2]
{0x000000A0, 0x00000000, 0x000000A0, 0x00000060, 0x000000E0}, // L[3]
}, // T.state[10].w =  12
{ // R[11] abcde
{0x00000042, 0x000000A7, 0x0000001C, 0x00000095, 0x00000089}, // L[0]
{0x00000083, 0x00000095, 0x00000002, 0x0000000C, 0x00000002}, // L[1]
{0x0000004C, 0x00000044, 0x00000022, 0x00000012, 0x00000010}, // L[2]
{0x000000E0, 0x00000000, 0x000000A0, 0x00000004, 0x000000A4}, // L[3]
}, // T.state[11].w =  18
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000042, 0x0000002F, 0x00000010, 0x00000004, 0x00000000}, // L[0]
{0x00000083, 0x000000A8, 0x000000A4, 0x00000095, 0x00000000}, // L[1]
{0x0000004C, 0x00000049, 0x00000089, 0x0000000C, 0x00000000}, // L[2]
{0x000000E0, 0x0000005C, 0x00000002, 0x00000012, 0x00000000}, // L[3]
}, // T.state[12].w =  14
// T.w =  87
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 87 -> 86
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000004, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000019, 0x00000000, 0x00000000, 0x00000009}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000002}, // L[0]
{0x00000000, 0x00000000, 0x0000008C, 0x00000000, 0x00000084}, // L[1]
{0x00000009, 0x00000019, 0x00000000, 0x00000084, 0x00000084}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   3
{ // R[ 6] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000002, 0x00000040}, // L[0]
{0x00000000, 0x00000090, 0x00000084, 0x00000000, 0x00000090}, // L[1]
{0x00000009, 0x000000B3, 0x00000084, 0x00000084, 0x00000088}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   6
{ // R[ 7] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000012, 0x00000010}, // L[0]
{0x00000090, 0x00000090, 0x00000084, 0x00000084, 0x00000000}, // L[1]
{0x00000088, 0x000000B3, 0x00000084, 0x00000060, 0x000000E4}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   9
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000040, 0x00000021, 0x000000E4, 0x00000000, 0x000000E1}, // L[0]
{0x00000090, 0x000000AE, 0x00000000, 0x00000012, 0x00000016}, // L[1]
{0x00000088, 0x00000000, 0x00000010, 0x00000084, 0x00000098}, // L[2]
{0x00000000, 0x000000A0, 0x00000000, 0x00000060, 0x000000A0}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x000000E1, 0x00000021, 0x000000E4, 0x000000F0, 0x0000001C}, // L[0]
{0x00000016, 0x000000AE, 0x00000000, 0x00000002, 0x00000006}, // L[1]
{0x00000098, 0x00000000, 0x00000010, 0x0000000E, 0x00000002}, // L[2]
{0x000000A0, 0x000000A0, 0x00000000, 0x00000060, 0x000000A0}, // L[3]
}, // T.state[ 9].w =  13
{ // R[10] abcde
{0x000000E1, 0x000000A7, 0x0000001C, 0x000000F0, 0x00000080}, // L[0]
{0x00000016, 0x00000015, 0x00000006, 0x00000002, 0x00000003}, // L[1]
{0x00000098, 0x00000040, 0x00000002, 0x0000000E, 0x00000048}, // L[2]
{0x000000A0, 0x00000000, 0x000000A0, 0x00000060, 0x000000E0}, // L[3]
}, // T.state[10].w =  12
{ // R[11] abcde
{0x00000080, 0x000000A7, 0x0000001C, 0x00000083, 0x00000081}, // L[0]
{0x00000003, 0x00000015, 0x00000006, 0x00000008, 0x0000000A}, // L[1]
{0x00000048, 0x00000040, 0x00000002, 0x00000032, 0x00000010}, // L[2]
{0x000000E0, 0x00000000, 0x000000A0, 0x00000004, 0x000000A4}, // L[3]
}, // T.state[11].w =  16
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000080, 0x0000003E, 0x00000010, 0x00000004, 0x00000000}, // L[0]
{0x00000003, 0x000000A0, 0x000000A4, 0x00000083, 0x00000000}, // L[1]
{0x00000048, 0x00000049, 0x00000081, 0x00000008, 0x00000000}, // L[2]
{0x000000E0, 0x0000004C, 0x0000000A, 0x00000032, 0x00000000}, // L[3]
}, // T.state[12].w =  15
// T.w =  86
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 86 -> 85
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000003}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000003, 0x00000001, 0x00000000, 0x00000081, 0x00000083}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000003, 0x00000050, 0x00000083, 0x00000081, 0x00000055}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000055, 0x00000050, 0x00000083, 0x000000A6, 0x00000029}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   5
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000029, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x000000F2, 0x00000000, 0x00000000, 0x00000086}, // L[1]
{0x00000055, 0x00000000, 0x00000000, 0x00000080, 0x00000075}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000A6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   6
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000029, 0x00000010, 0x00000009}, // L[0]
{0x00000086, 0x000000F2, 0x00000000, 0x00000043, 0x00000041}, // L[1]
{0x00000075, 0x00000000, 0x00000000, 0x000000FA, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000053, 0x00000055}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x00000025, 0x00000009, 0x00000010, 0x0000000D}, // L[0]
{0x00000086, 0x00000076, 0x00000041, 0x00000043, 0x00000050}, // L[1]
{0x00000075, 0x00000041, 0x0000000A, 0x000000FA, 0x00000016}, // L[2]
{0x00000000, 0x000000AA, 0x00000055, 0x00000053, 0x000000EA}, // L[3]
}, // T.state[10].w =  17
{ // R[11] abcde
{0x0000000D, 0x00000025, 0x00000009, 0x000000E8, 0x00000021}, // L[0]
{0x00000050, 0x00000076, 0x00000041, 0x00000098, 0x00000049}, // L[1]
{0x00000016, 0x00000041, 0x0000000A, 0x00000067, 0x00000025}, // L[2]
{0x000000EA, 0x000000AA, 0x00000055, 0x000000CD, 0x00000020}, // L[3]
}, // T.state[11].w =  18
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x0000000D, 0x0000007E, 0x00000025, 0x000000CD, 0x00000000}, // L[0]
{0x00000050, 0x000000C8, 0x00000020, 0x000000E8, 0x00000000}, // L[1]
{0x00000016, 0x00000015, 0x00000021, 0x00000098, 0x00000000}, // L[2]
{0x000000EA, 0x00000008, 0x00000049, 0x00000067, 0x00000000}, // L[3]
}, // T.state[12].w =  22
// T.w =  85
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 85 -> 84
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000003}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000003, 0x00000001, 0x00000000, 0x00000081, 0x00000083}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000003, 0x00000050, 0x00000083, 0x00000081, 0x00000055}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000055, 0x00000050, 0x00000083, 0x000000A6, 0x00000029}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   5
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000029, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x000000F2, 0x00000000, 0x00000000, 0x00000086}, // L[1]
{0x00000055, 0x00000000, 0x00000000, 0x00000080, 0x00000075}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000A6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   6
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000029, 0x00000010, 0x00000009}, // L[0]
{0x00000086, 0x000000F2, 0x00000000, 0x00000043, 0x00000041}, // L[1]
{0x00000075, 0x00000000, 0x00000000, 0x000000FA, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000053, 0x00000055}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x00000025, 0x00000009, 0x00000010, 0x0000000D}, // L[0]
{0x00000086, 0x00000076, 0x00000041, 0x00000043, 0x00000050}, // L[1]
{0x00000075, 0x00000041, 0x0000000A, 0x000000FA, 0x000000B6}, // L[2]
{0x00000000, 0x000000AA, 0x00000055, 0x00000053, 0x000000AA}, // L[3]
}, // T.state[10].w =  17
{ // R[11] abcde
{0x0000000D, 0x00000025, 0x00000009, 0x000000E8, 0x00000021}, // L[0]
{0x00000050, 0x00000076, 0x00000041, 0x00000098, 0x00000049}, // L[1]
{0x000000B6, 0x00000041, 0x0000000A, 0x00000062, 0x0000002C}, // L[2]
{0x000000AA, 0x000000AA, 0x00000055, 0x000000CF, 0x00000000}, // L[3]
}, // T.state[11].w =  18
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x0000000D, 0x0000007E, 0x0000002C, 0x000000CF, 0x00000000}, // L[0]
{0x00000050, 0x000000DA, 0x00000000, 0x000000E8, 0x00000000}, // L[1]
{0x000000B6, 0x00000055, 0x00000021, 0x00000098, 0x00000000}, // L[2]
{0x000000AA, 0x00000008, 0x00000049, 0x00000062, 0x00000000}, // L[3]
}, // T.state[12].w =  21
// T.w =  84
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 84 -> 83
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000003}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000003, 0x00000001, 0x00000000, 0x00000081, 0x00000083}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000003, 0x00000050, 0x00000083, 0x00000081, 0x00000055}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000055, 0x00000050, 0x00000083, 0x000000A6, 0x00000029}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   5
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000029, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x000000F2, 0x00000000, 0x00000000, 0x00000086}, // L[1]
{0x00000055, 0x00000000, 0x00000000, 0x00000080, 0x00000075}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000A6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   6
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000029, 0x00000010, 0x00000009}, // L[0]
{0x00000086, 0x000000F2, 0x00000000, 0x00000043, 0x00000041}, // L[1]
{0x00000075, 0x00000000, 0x00000000, 0x000000FA, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000053, 0x00000055}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x00000025, 0x00000009, 0x00000010, 0x0000000D}, // L[0]
{0x00000086, 0x00000076, 0x00000041, 0x00000043, 0x00000010}, // L[1]
{0x00000075, 0x00000041, 0x0000000A, 0x000000FA, 0x000000B6}, // L[2]
{0x00000000, 0x000000AA, 0x00000055, 0x00000053, 0x000000FA}, // L[3]
}, // T.state[10].w =  17
{ // R[11] abcde
{0x0000000D, 0x00000025, 0x00000009, 0x000000E8, 0x00000021}, // L[0]
{0x00000010, 0x00000076, 0x00000041, 0x0000009A, 0x00000049}, // L[1]
{0x000000B6, 0x00000041, 0x0000000A, 0x00000062, 0x00000028}, // L[2]
{0x000000FA, 0x000000AA, 0x00000055, 0x0000004D, 0x00000000}, // L[3]
}, // T.state[11].w =  18
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x0000000D, 0x0000007E, 0x00000028, 0x0000004D, 0x00000000}, // L[0]
{0x00000010, 0x000000D2, 0x00000000, 0x000000E8, 0x00000000}, // L[1]
{0x000000B6, 0x00000055, 0x00000021, 0x0000009A, 0x00000000}, // L[2]
{0x000000FA, 0x00000008, 0x00000049, 0x00000062, 0x00000000}, // L[3]
}, // T.state[12].w =  20
// T.w =  83
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 83 -> 82
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000003}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000003, 0x00000001, 0x00000000, 0x00000081, 0x00000083}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000003, 0x00000050, 0x00000083, 0x00000081, 0x00000055}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000055, 0x00000050, 0x00000083, 0x000000A6, 0x00000029}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   5
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000029, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x000000F2, 0x00000000, 0x00000000, 0x00000086}, // L[1]
{0x00000055, 0x00000000, 0x00000000, 0x00000080, 0x00000075}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000A6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   6
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000029, 0x00000010, 0x00000049}, // L[0]
{0x00000086, 0x000000F2, 0x00000000, 0x00000043, 0x000000C1}, // L[1]
{0x00000075, 0x00000000, 0x00000000, 0x000000FA, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000053, 0x00000055}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x0000002D, 0x00000049, 0x00000010, 0x00000015}, // L[0]
{0x00000086, 0x00000066, 0x000000C1, 0x00000043, 0x00000020}, // L[1]
{0x00000075, 0x00000041, 0x0000000A, 0x000000FA, 0x000000B6}, // L[2]
{0x00000000, 0x000000AA, 0x00000055, 0x00000053, 0x000000FA}, // L[3]
}, // T.state[10].w =  17
{ // R[11] abcde
{0x00000015, 0x0000002D, 0x00000049, 0x00000028, 0x00000021}, // L[0]
{0x00000020, 0x00000066, 0x000000C1, 0x0000001B, 0x00000048}, // L[1]
{0x000000B6, 0x00000041, 0x0000000A, 0x00000062, 0x00000028}, // L[2]
{0x000000FA, 0x000000AA, 0x00000055, 0x0000004D, 0x00000020}, // L[3]
}, // T.state[11].w =  18
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000015, 0x0000005C, 0x00000028, 0x0000004D, 0x00000000}, // L[0]
{0x00000020, 0x000000D2, 0x00000020, 0x00000028, 0x00000000}, // L[1]
{0x000000B6, 0x00000015, 0x00000021, 0x0000001B, 0x00000000}, // L[2]
{0x000000FA, 0x00000018, 0x00000048, 0x00000062, 0x00000000}, // L[3]
}, // T.state[12].w =  19
// T.w =  82
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 82 -> 81
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000003}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000003, 0x00000001, 0x00000000, 0x00000081, 0x00000083}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000003, 0x00000050, 0x00000083, 0x00000081, 0x00000055}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000055, 0x00000050, 0x00000083, 0x000000A6, 0x00000029}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   5
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000029, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x000000F2, 0x00000000, 0x00000000, 0x00000086}, // L[1]
{0x00000055, 0x00000000, 0x00000000, 0x00000080, 0x00000075}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000A6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   6
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000029, 0x00000010, 0x00000049}, // L[0]
{0x00000086, 0x000000F2, 0x00000000, 0x00000043, 0x000000C1}, // L[1]
{0x00000075, 0x00000000, 0x00000000, 0x000000FA, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000053, 0x00000055}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x0000002D, 0x00000049, 0x00000010, 0x00000005}, // L[0]
{0x00000086, 0x00000066, 0x000000C1, 0x00000043, 0x00000060}, // L[1]
{0x00000075, 0x00000041, 0x0000000A, 0x000000FA, 0x000000BE}, // L[2]
{0x00000000, 0x000000AA, 0x00000055, 0x00000053, 0x000000FA}, // L[3]
}, // T.state[10].w =  17
{ // R[11] abcde
{0x00000005, 0x0000002D, 0x00000049, 0x000000A8, 0x00000021}, // L[0]
{0x00000060, 0x00000066, 0x000000C1, 0x00000019, 0x0000004A}, // L[1]
{0x000000BE, 0x00000041, 0x0000000A, 0x00000022, 0x00000028}, // L[2]
{0x000000FA, 0x000000AA, 0x00000055, 0x0000004D, 0x00000020}, // L[3]
}, // T.state[11].w =  18
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000005, 0x00000058, 0x00000028, 0x0000004D, 0x00000000}, // L[0]
{0x00000060, 0x000000D2, 0x00000020, 0x000000A8, 0x00000000}, // L[1]
{0x000000BE, 0x00000015, 0x00000021, 0x00000019, 0x00000000}, // L[2]
{0x000000FA, 0x00000018, 0x0000004A, 0x00000022, 0x00000000}, // L[3]
}, // T.state[12].w =  18
// T.w =  81
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 81 -> 80
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000003}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000003, 0x00000001, 0x00000000, 0x00000081, 0x00000083}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000003, 0x00000050, 0x00000083, 0x00000081, 0x00000055}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000055, 0x00000050, 0x00000083, 0x000000A6, 0x00000029}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   5
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000029, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x000000F2, 0x00000000, 0x00000000, 0x00000086}, // L[1]
{0x00000055, 0x00000000, 0x00000000, 0x00000080, 0x00000075}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000A6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   6
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000029, 0x00000010, 0x00000029}, // L[0]
{0x00000086, 0x000000F2, 0x00000000, 0x00000043, 0x00000041}, // L[1]
{0x00000075, 0x00000000, 0x00000000, 0x000000FA, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000053, 0x00000055}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x00000021, 0x00000029, 0x00000010, 0x00000001}, // L[0]
{0x00000086, 0x00000076, 0x00000041, 0x00000043, 0x00000070}, // L[1]
{0x00000075, 0x00000041, 0x0000000A, 0x000000FA, 0x000000FE}, // L[2]
{0x00000000, 0x000000AA, 0x00000055, 0x00000053, 0x000000EA}, // L[3]
}, // T.state[10].w =  17
{ // R[11] abcde
{0x00000001, 0x00000021, 0x00000029, 0x00000088, 0x000000A1}, // L[0]
{0x00000070, 0x00000076, 0x00000041, 0x00000099, 0x00000048}, // L[1]
{0x000000FE, 0x00000041, 0x0000000A, 0x00000020, 0x0000002A}, // L[2]
{0x000000EA, 0x000000AA, 0x00000055, 0x000000CD, 0x00000000}, // L[3]
}, // T.state[11].w =  17
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000001, 0x0000007C, 0x0000002A, 0x000000CD, 0x00000000}, // L[0]
{0x00000070, 0x000000D6, 0x00000000, 0x00000088, 0x00000000}, // L[1]
{0x000000FE, 0x00000055, 0x000000A1, 0x00000099, 0x00000000}, // L[2]
{0x000000EA, 0x00000001, 0x00000048, 0x00000020, 0x00000000}, // L[3]
}, // T.state[12].w =  18
// T.w =  80
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 80 -> 79
#if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 13
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000003}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000003, 0x00000001, 0x00000000, 0x00000081, 0x00000083}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000003, 0x00000050, 0x00000083, 0x00000081, 0x00000055}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000055, 0x00000050, 0x00000083, 0x000000A6, 0x00000029}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   5
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000029, 0x00000000, 0x00000020}, // L[0]
{0x00000010, 0x000000F2, 0x00000000, 0x00000000, 0x00000086}, // L[1]
{0x00000055, 0x00000000, 0x00000000, 0x00000080, 0x00000075}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000A6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   6
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000029, 0x00000010, 0x00000029}, // L[0]
{0x00000086, 0x000000F2, 0x00000000, 0x00000043, 0x00000041}, // L[1]
{0x00000075, 0x00000000, 0x00000000, 0x000000FA, 0x0000000A}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000053, 0x00000055}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x00000021, 0x00000029, 0x00000010, 0x00000001}, // L[0]
{0x00000086, 0x00000076, 0x00000041, 0x00000043, 0x00000070}, // L[1]
{0x00000075, 0x00000041, 0x0000000A, 0x000000FA, 0x000000FE}, // L[2]
{0x00000000, 0x000000AA, 0x00000055, 0x00000053, 0x000000FA}, // L[3]
}, // T.state[10].w =  17
{ // R[11] abcde
{0x00000001, 0x00000021, 0x00000029, 0x00000088, 0x000000A1}, // L[0]
{0x00000070, 0x00000076, 0x00000041, 0x00000099, 0x00000048}, // L[1]
{0x000000FE, 0x00000041, 0x0000000A, 0x00000020, 0x0000003A}, // L[2]
{0x000000FA, 0x000000AA, 0x00000055, 0x0000004D, 0x00000000}, // L[3]
}, // T.state[11].w =  17
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000001, 0x0000007C, 0x0000003A, 0x0000004D, 0x00000000}, // L[0]
{0x00000070, 0x000000F6, 0x00000000, 0x00000088, 0x00000000}, // L[1]
{0x000000FE, 0x00000055, 0x000000A1, 0x00000099, 0x00000000}, // L[2]
{0x000000FA, 0x00000001, 0x00000048, 0x00000020, 0x00000000}, // L[3]
}, // T.state[12].w =  17
// T.w =  79
};
#endif // #if 1 // WORD_SIZE 8 nrounds 12 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

[./tests/norx-best-diff-search-tests.cc:3372] norx_print_bounds_file(): Print bounds for first 12 rounds:
B[ 0]  0
B[ 1]  1
B[ 2]  2
B[ 3]  3
B[ 4]  4
B[ 5]  5
B[ 6]  6
B[ 7]  8
B[ 8] 11
B[ 9] 26
B[10] 42
B[11] 79

[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 179 -> 178
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000030}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000030, 0x00000010, 0x00000080, 0x00000085, 0x00000005}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   3
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x0000002A, 0x00000000, 0x00000084, 0x0000002E}, // L[0]
{0x00000030, 0x00000000, 0x0000008C, 0x00000000, 0x00000050}, // L[1]
{0x00000000, 0x00000039, 0x00000000, 0x00000085, 0x00000049}, // L[2]
{0x00000090, 0x00000000, 0x00000005, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x0000002E, 0x0000002A, 0x00000000, 0x00000055, 0x00000055}, // L[0]
{0x00000050, 0x00000000, 0x0000008C, 0x00000028, 0x000000A4}, // L[1]
{0x00000049, 0x00000039, 0x00000000, 0x00000066, 0x0000002A}, // L[2]
{0x00000090, 0x00000000, 0x00000005, 0x00000048, 0x00000055}, // L[3]
}, // T.state[ 5].w =  11
{ // R[ 6] abcde
{0x0000002E, 0x000000EF, 0x00000055, 0x00000055, 0x00000001}, // L[0]
{0x00000050, 0x00000094, 0x000000A4, 0x00000028, 0x000000C4}, // L[1]
{0x00000049, 0x00000062, 0x0000002A, 0x00000066, 0x0000002B}, // L[2]
{0x00000090, 0x000000AA, 0x00000055, 0x00000048, 0x0000000A}, // L[3]
}, // T.state[ 6].w =  16
{ // R[ 7] abcde
{0x00000001, 0x000000EF, 0x00000055, 0x000000A2, 0x00000031}, // L[0]
{0x000000C4, 0x00000094, 0x000000A4, 0x00000067, 0x00000001}, // L[1]
{0x0000002B, 0x00000062, 0x0000002A, 0x0000006A, 0x00000000}, // L[2]
{0x0000000A, 0x000000AA, 0x00000055, 0x00000012, 0x000000C9}, // L[3]
}, // T.state[ 7].w =  21
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000001, 0x0000002B, 0x00000000, 0x00000012, 0x00000028}, // L[0]
{0x000000C4, 0x000000C4, 0x000000C9, 0x000000A2, 0x00000000}, // L[1]
{0x0000002B, 0x000000C6, 0x00000031, 0x00000067, 0x00000021}, // L[2]
{0x0000000A, 0x000000BD, 0x00000001, 0x0000006A, 0x00000081}, // L[3]
}, // T.state[ 8].w =  22
{ // R[ 9] abcde
{0x00000028, 0x0000002B, 0x00000000, 0x0000001D, 0x00000005}, // L[0]
{0x00000000, 0x000000C4, 0x000000C9, 0x00000051, 0x0000008A}, // L[1]
{0x00000021, 0x000000C6, 0x00000031, 0x00000023, 0x00000010}, // L[2]
{0x00000081, 0x000000BD, 0x00000001, 0x000000F5, 0x0000001C}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000028, 0x000000C5, 0x00000005, 0x0000001D, 0x00000025}, // L[0]
{0x00000000, 0x000000C9, 0x0000008A, 0x00000051, 0x00000049}, // L[1]
{0x00000021, 0x000000DA, 0x00000010, 0x00000023, 0x00000019}, // L[2]
{0x00000081, 0x00000034, 0x0000001C, 0x000000F5, 0x000000F5}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000025, 0x000000C5, 0x00000005, 0x000000C1, 0x00000044}, // L[0]
{0x00000049, 0x000000C9, 0x0000008A, 0x000000C0, 0x0000004A}, // L[1]
{0x00000019, 0x000000DA, 0x00000010, 0x000000D1, 0x00000041}, // L[2]
{0x000000F5, 0x00000034, 0x0000001C, 0x00000000, 0x0000003C}, // L[3]
}, // T.state[11].w =  22
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000025, 0x00000007, 0x00000041, 0x00000000, 0x00000020}, // L[0]
{0x00000049, 0x00000037, 0x0000003C, 0x000000C1, 0x00000000}, // L[1]
{0x00000019, 0x00000010, 0x00000044, 0x000000C0, 0x00000009}, // L[2]
{0x000000F5, 0x00000003, 0x0000004A, 0x000000D1, 0x00000010}, // L[3]
}, // T.state[12].w =  15
{ // R[13] abcde
{0x00000020, 0x00000007, 0x00000041, 0x00000010, 0x00000000}, // L[0]
{0x00000000, 0x00000037, 0x0000003C, 0x000000E0, 0x00000000}, // L[1]
{0x00000009, 0x00000010, 0x00000044, 0x000000E4, 0x00000000}, // L[2]
{0x00000010, 0x00000003, 0x0000004A, 0x000000E0, 0x00000000}, // L[3]
}, // T.state[13].w =  21
// T.w = 178
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 178 -> 177
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000030}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000030, 0x00000010, 0x00000080, 0x00000085, 0x00000005}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   3
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x0000002A, 0x00000000, 0x00000084, 0x0000002E}, // L[0]
{0x00000030, 0x00000000, 0x0000008C, 0x00000000, 0x00000050}, // L[1]
{0x00000000, 0x00000039, 0x00000000, 0x00000085, 0x00000049}, // L[2]
{0x00000090, 0x00000000, 0x00000005, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x0000002E, 0x0000002A, 0x00000000, 0x00000055, 0x00000055}, // L[0]
{0x00000050, 0x00000000, 0x0000008C, 0x00000028, 0x000000A4}, // L[1]
{0x00000049, 0x00000039, 0x00000000, 0x00000066, 0x0000002A}, // L[2]
{0x00000090, 0x00000000, 0x00000005, 0x00000048, 0x00000055}, // L[3]
}, // T.state[ 5].w =  11
{ // R[ 6] abcde
{0x0000002E, 0x000000EF, 0x00000055, 0x00000055, 0x00000001}, // L[0]
{0x00000050, 0x00000094, 0x000000A4, 0x00000028, 0x000000C4}, // L[1]
{0x00000049, 0x00000062, 0x0000002A, 0x00000066, 0x0000002B}, // L[2]
{0x00000090, 0x000000AA, 0x00000055, 0x00000048, 0x0000000A}, // L[3]
}, // T.state[ 6].w =  16
{ // R[ 7] abcde
{0x00000001, 0x000000EF, 0x00000055, 0x000000A2, 0x00000031}, // L[0]
{0x000000C4, 0x00000094, 0x000000A4, 0x00000067, 0x00000001}, // L[1]
{0x0000002B, 0x00000062, 0x0000002A, 0x0000006A, 0x00000000}, // L[2]
{0x0000000A, 0x000000AA, 0x00000055, 0x00000012, 0x000000C9}, // L[3]
}, // T.state[ 7].w =  21
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000001, 0x0000002B, 0x00000000, 0x00000012, 0x00000028}, // L[0]
{0x000000C4, 0x000000C4, 0x000000C9, 0x000000A2, 0x00000000}, // L[1]
{0x0000002B, 0x000000C6, 0x00000031, 0x00000067, 0x00000021}, // L[2]
{0x0000000A, 0x000000BD, 0x00000001, 0x0000006A, 0x00000081}, // L[3]
}, // T.state[ 8].w =  22
{ // R[ 9] abcde
{0x00000028, 0x0000002B, 0x00000000, 0x0000001D, 0x00000005}, // L[0]
{0x00000000, 0x000000C4, 0x000000C9, 0x00000051, 0x0000008A}, // L[1]
{0x00000021, 0x000000C6, 0x00000031, 0x00000023, 0x00000010}, // L[2]
{0x00000081, 0x000000BD, 0x00000001, 0x000000F5, 0x0000001C}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000028, 0x000000C5, 0x00000005, 0x0000001D, 0x00000025}, // L[0]
{0x00000000, 0x000000C9, 0x0000008A, 0x00000051, 0x00000049}, // L[1]
{0x00000021, 0x000000DA, 0x00000010, 0x00000023, 0x00000019}, // L[2]
{0x00000081, 0x00000034, 0x0000001C, 0x000000F5, 0x000000F5}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000025, 0x000000C5, 0x00000005, 0x000000C1, 0x0000004C}, // L[0]
{0x00000049, 0x000000C9, 0x0000008A, 0x000000C0, 0x0000004A}, // L[1]
{0x00000019, 0x000000DA, 0x00000010, 0x000000D1, 0x00000063}, // L[2]
{0x000000F5, 0x00000034, 0x0000001C, 0x00000000, 0x0000003C}, // L[3]
}, // T.state[11].w =  22
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000025, 0x00000007, 0x00000063, 0x00000000, 0x00000020}, // L[0]
{0x00000049, 0x00000073, 0x0000003C, 0x000000C1, 0x00000008}, // L[1]
{0x00000019, 0x00000010, 0x0000004C, 0x000000C0, 0x0000000B}, // L[2]
{0x000000F5, 0x00000013, 0x0000004A, 0x000000D1, 0x00000000}, // L[3]
}, // T.state[12].w =  15
{ // R[13] abcde
{0x00000020, 0x00000007, 0x00000063, 0x00000010, 0x00000000}, // L[0]
{0x00000008, 0x00000073, 0x0000003C, 0x000000E4, 0x00000000}, // L[1]
{0x0000000B, 0x00000010, 0x0000004C, 0x000000E5, 0x00000000}, // L[2]
{0x00000000, 0x00000013, 0x0000004A, 0x000000E8, 0x00000000}, // L[3]
}, // T.state[13].w =  20
// T.w = 177
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 177 -> 176
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000030}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000030, 0x00000010, 0x00000080, 0x00000085, 0x00000005}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   3
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x0000002A, 0x00000000, 0x00000084, 0x0000002E}, // L[0]
{0x00000030, 0x00000000, 0x0000008C, 0x00000000, 0x00000050}, // L[1]
{0x00000000, 0x00000039, 0x00000000, 0x00000085, 0x00000049}, // L[2]
{0x00000090, 0x00000000, 0x00000005, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x0000002E, 0x0000002A, 0x00000000, 0x00000055, 0x00000055}, // L[0]
{0x00000050, 0x00000000, 0x0000008C, 0x00000028, 0x000000A4}, // L[1]
{0x00000049, 0x00000039, 0x00000000, 0x00000066, 0x0000002A}, // L[2]
{0x00000090, 0x00000000, 0x00000005, 0x00000048, 0x00000055}, // L[3]
}, // T.state[ 5].w =  11
{ // R[ 6] abcde
{0x0000002E, 0x000000EF, 0x00000055, 0x00000055, 0x00000001}, // L[0]
{0x00000050, 0x00000094, 0x000000A4, 0x00000028, 0x000000C4}, // L[1]
{0x00000049, 0x00000062, 0x0000002A, 0x00000066, 0x0000002B}, // L[2]
{0x00000090, 0x000000AA, 0x00000055, 0x00000048, 0x0000000A}, // L[3]
}, // T.state[ 6].w =  16
{ // R[ 7] abcde
{0x00000001, 0x000000EF, 0x00000055, 0x000000A2, 0x00000031}, // L[0]
{0x000000C4, 0x00000094, 0x000000A4, 0x00000067, 0x00000001}, // L[1]
{0x0000002B, 0x00000062, 0x0000002A, 0x0000006A, 0x00000000}, // L[2]
{0x0000000A, 0x000000AA, 0x00000055, 0x00000012, 0x000000C9}, // L[3]
}, // T.state[ 7].w =  21
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000001, 0x0000002B, 0x00000000, 0x00000012, 0x00000028}, // L[0]
{0x000000C4, 0x000000C4, 0x000000C9, 0x000000A2, 0x00000000}, // L[1]
{0x0000002B, 0x000000C6, 0x00000031, 0x00000067, 0x00000021}, // L[2]
{0x0000000A, 0x000000BD, 0x00000001, 0x0000006A, 0x00000081}, // L[3]
}, // T.state[ 8].w =  22
{ // R[ 9] abcde
{0x00000028, 0x0000002B, 0x00000000, 0x0000001D, 0x00000005}, // L[0]
{0x00000000, 0x000000C4, 0x000000C9, 0x00000051, 0x0000008A}, // L[1]
{0x00000021, 0x000000C6, 0x00000031, 0x00000023, 0x00000030}, // L[2]
{0x00000081, 0x000000BD, 0x00000001, 0x000000F5, 0x0000001C}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000028, 0x000000C5, 0x00000005, 0x0000001D, 0x00000025}, // L[0]
{0x00000000, 0x000000C9, 0x0000008A, 0x00000051, 0x00000059}, // L[1]
{0x00000021, 0x000000DE, 0x00000030, 0x00000023, 0x00000001}, // L[2]
{0x00000081, 0x00000034, 0x0000001C, 0x000000F5, 0x000000F5}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000025, 0x000000C5, 0x00000005, 0x000000C1, 0x0000004C}, // L[0]
{0x00000059, 0x000000C9, 0x0000008A, 0x00000040, 0x0000004A}, // L[1]
{0x00000001, 0x000000DE, 0x00000030, 0x00000011, 0x00000041}, // L[2]
{0x000000F5, 0x00000034, 0x0000001C, 0x00000000, 0x00000034}, // L[3]
}, // T.state[11].w =  23
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000025, 0x00000007, 0x00000041, 0x00000000, 0x00000020}, // L[0]
{0x00000059, 0x0000003F, 0x00000034, 0x000000C1, 0x00000000}, // L[1]
{0x00000001, 0x00000000, 0x0000004C, 0x00000040, 0x00000001}, // L[2]
{0x000000F5, 0x00000013, 0x0000004A, 0x00000011, 0x00000000}, // L[3]
}, // T.state[12].w =  14
{ // R[13] abcde
{0x00000020, 0x00000007, 0x00000041, 0x00000010, 0x00000000}, // L[0]
{0x00000000, 0x0000003F, 0x00000034, 0x000000E0, 0x00000000}, // L[1]
{0x00000001, 0x00000000, 0x0000004C, 0x000000A0, 0x00000000}, // L[2]
{0x00000000, 0x00000013, 0x0000004A, 0x00000088, 0x00000000}, // L[3]
}, // T.state[13].w =  19
// T.w = 176
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 176 -> 175
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000080, 0x00000030}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000030, 0x00000010, 0x00000080, 0x00000085, 0x00000005}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x0000008C}, // L[3]
}, // T.state[ 3].w =   3
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x0000002A, 0x00000000, 0x00000084, 0x0000002E}, // L[0]
{0x00000030, 0x00000000, 0x0000008C, 0x00000000, 0x00000050}, // L[1]
{0x00000000, 0x00000039, 0x00000000, 0x00000085, 0x00000049}, // L[2]
{0x00000090, 0x00000000, 0x00000005, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 4].w =   5
{ // R[ 5] abcde
{0x0000002E, 0x0000002A, 0x00000000, 0x00000055, 0x00000055}, // L[0]
{0x00000050, 0x00000000, 0x0000008C, 0x00000028, 0x000000A4}, // L[1]
{0x00000049, 0x00000039, 0x00000000, 0x00000066, 0x0000002A}, // L[2]
{0x00000090, 0x00000000, 0x00000005, 0x00000048, 0x00000055}, // L[3]
}, // T.state[ 5].w =  11
{ // R[ 6] abcde
{0x0000002E, 0x000000EF, 0x00000055, 0x00000055, 0x00000001}, // L[0]
{0x00000050, 0x00000094, 0x000000A4, 0x00000028, 0x000000C4}, // L[1]
{0x00000049, 0x00000062, 0x0000002A, 0x00000066, 0x0000002B}, // L[2]
{0x00000090, 0x000000AA, 0x00000055, 0x00000048, 0x0000000A}, // L[3]
}, // T.state[ 6].w =  16
{ // R[ 7] abcde
{0x00000001, 0x000000EF, 0x00000055, 0x000000A2, 0x00000031}, // L[0]
{0x000000C4, 0x00000094, 0x000000A4, 0x00000067, 0x00000001}, // L[1]
{0x0000002B, 0x00000062, 0x0000002A, 0x0000006A, 0x00000000}, // L[2]
{0x0000000A, 0x000000AA, 0x00000055, 0x00000012, 0x000000C9}, // L[3]
}, // T.state[ 7].w =  21
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000001, 0x0000002B, 0x00000000, 0x00000012, 0x00000028}, // L[0]
{0x000000C4, 0x000000C4, 0x000000C9, 0x000000A2, 0x00000000}, // L[1]
{0x0000002B, 0x000000C6, 0x00000031, 0x00000067, 0x00000021}, // L[2]
{0x0000000A, 0x000000BD, 0x00000001, 0x0000006A, 0x00000081}, // L[3]
}, // T.state[ 8].w =  22
{ // R[ 9] abcde
{0x00000028, 0x0000002B, 0x00000000, 0x0000001D, 0x00000005}, // L[0]
{0x00000000, 0x000000C4, 0x000000C9, 0x00000051, 0x0000008A}, // L[1]
{0x00000021, 0x000000C6, 0x00000031, 0x00000023, 0x00000030}, // L[2]
{0x00000081, 0x000000BD, 0x00000001, 0x000000F5, 0x0000001C}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000028, 0x000000C5, 0x00000005, 0x0000001D, 0x00000025}, // L[0]
{0x00000000, 0x000000C9, 0x0000008A, 0x00000051, 0x00000059}, // L[1]
{0x00000021, 0x000000DE, 0x00000030, 0x00000023, 0x00000001}, // L[2]
{0x00000081, 0x00000034, 0x0000001C, 0x000000F5, 0x000000F5}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000025, 0x000000C5, 0x00000005, 0x000000C1, 0x00000046}, // L[0]
{0x00000059, 0x000000C9, 0x0000008A, 0x00000040, 0x000000CA}, // L[1]
{0x00000001, 0x000000DE, 0x00000030, 0x00000011, 0x00000063}, // L[2]
{0x000000F5, 0x00000034, 0x0000001C, 0x00000000, 0x00000034}, // L[3]
}, // T.state[11].w =  23
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000025, 0x00000006, 0x00000063, 0x00000000, 0x00000021}, // L[0]
{0x00000059, 0x0000007B, 0x00000034, 0x000000C1, 0x00000000}, // L[1]
{0x00000001, 0x00000000, 0x00000046, 0x00000040, 0x00000001}, // L[2]
{0x000000F5, 0x00000007, 0x000000CA, 0x00000011, 0x00000010}, // L[3]
}, // T.state[12].w =  14
{ // R[13] abcde
{0x00000021, 0x00000006, 0x00000063, 0x00000090, 0x00000000}, // L[0]
{0x00000000, 0x0000007B, 0x00000034, 0x000000E0, 0x00000000}, // L[1]
{0x00000001, 0x00000000, 0x00000046, 0x000000A0, 0x00000000}, // L[2]
{0x00000010, 0x00000007, 0x000000CA, 0x00000080, 0x00000000}, // L[3]
}, // T.state[13].w =  18
// T.w = 175
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 175 -> 158
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x00000084}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000084, 0x00000000}, // L[0]
{0x00000000, 0x00000008, 0x00000084, 0x00000000, 0x00000008}, // L[1]
{0x00000000, 0x00000029, 0x00000000, 0x00000000, 0x00000069}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000004, 0x00000090}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000042, 0x00000042}, // L[0]
{0x00000008, 0x00000008, 0x00000084, 0x00000004, 0x00000080}, // L[1]
{0x00000069, 0x00000029, 0x00000000, 0x000000B4, 0x000000D4}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x0000004A, 0x0000005A}, // L[3]
}, // T.state[ 5].w =   6
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x00000042, 0x00000048}, // L[0]
{0x00000008, 0x00000011, 0x00000080, 0x00000004, 0x00000009}, // L[1]
{0x00000069, 0x000000BF, 0x000000D4, 0x000000B4, 0x00000080}, // L[2]
{0x00000090, 0x0000004B, 0x0000005A, 0x0000004A, 0x000000C9}, // L[3]
}, // T.state[ 6].w =  12
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000050, 0x00000012}, // L[0]
{0x00000009, 0x00000011, 0x00000080, 0x00000068, 0x00000028}, // L[1]
{0x00000080, 0x000000BF, 0x000000D4, 0x000000A1, 0x00000015}, // L[2]
{0x000000C9, 0x0000004B, 0x0000005A, 0x0000001C, 0x00000042}, // L[3]
}, // T.state[ 7].w =  19
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000072, 0x00000015, 0x0000001C, 0x0000000E}, // L[0]
{0x00000009, 0x00000055, 0x00000042, 0x00000050, 0x00000044}, // L[1]
{0x00000080, 0x00000012, 0x00000012, 0x00000068, 0x00000092}, // L[2]
{0x000000C9, 0x000000B4, 0x00000028, 0x000000A1, 0x000000AF}, // L[3]
}, // T.state[ 8].w =  18
{ // R[ 9] abcde
{0x0000000E, 0x00000072, 0x00000015, 0x00000009, 0x00000004}, // L[0]
{0x00000044, 0x00000055, 0x00000042, 0x0000000A, 0x00000048}, // L[1]
{0x00000092, 0x00000012, 0x00000012, 0x0000007D, 0x000000A1}, // L[2]
{0x000000AF, 0x000000B4, 0x00000028, 0x00000007, 0x00000021}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x0000000E, 0x000000CE, 0x00000004, 0x00000009, 0x00000040}, // L[0]
{0x00000044, 0x000000A3, 0x00000048, 0x0000000A, 0x00000021}, // L[1]
{0x00000092, 0x00000076, 0x000000A1, 0x0000007D, 0x00000080}, // L[2]
{0x000000AF, 0x000000B2, 0x00000021, 0x00000007, 0x00000001}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000040, 0x000000CE, 0x00000004, 0x0000004A, 0x00000042}, // L[0]
{0x00000021, 0x000000A3, 0x00000048, 0x00000059, 0x00000081}, // L[1]
{0x00000080, 0x00000076, 0x000000A1, 0x000000EF, 0x00000080}, // L[2]
{0x00000001, 0x000000B2, 0x00000021, 0x00000030, 0x00000031}, // L[3]
}, // T.state[11].w =  24
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000044, 0x00000080, 0x00000030, 0x00000004}, // L[0]
{0x00000021, 0x000000ED, 0x00000031, 0x0000004A, 0x00000044}, // L[1]
{0x00000080, 0x00000007, 0x00000042, 0x00000059, 0x00000081}, // L[2]
{0x00000001, 0x00000019, 0x00000081, 0x000000EF, 0x0000000A}, // L[3]
}, // T.state[12].w =  18
{ // R[13] abcde
{0x00000004, 0x00000044, 0x00000080, 0x0000001A, 0x00000000}, // L[0]
{0x00000044, 0x000000ED, 0x00000031, 0x00000007, 0x00000000}, // L[1]
{0x00000081, 0x00000007, 0x00000042, 0x0000006C, 0x00000000}, // L[2]
{0x0000000A, 0x00000019, 0x00000081, 0x000000F2, 0x00000000}, // L[3]
}, // T.state[13].w =  15
// T.w = 158
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 158 -> 157
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x00000084}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000084, 0x00000000}, // L[0]
{0x00000000, 0x00000008, 0x00000084, 0x00000000, 0x00000008}, // L[1]
{0x00000000, 0x00000029, 0x00000000, 0x00000000, 0x00000069}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000004, 0x00000090}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000042, 0x00000042}, // L[0]
{0x00000008, 0x00000008, 0x00000084, 0x00000004, 0x00000080}, // L[1]
{0x00000069, 0x00000029, 0x00000000, 0x000000B4, 0x000000D4}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x0000004A, 0x0000005A}, // L[3]
}, // T.state[ 5].w =   6
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x00000042, 0x00000048}, // L[0]
{0x00000008, 0x00000011, 0x00000080, 0x00000004, 0x00000009}, // L[1]
{0x00000069, 0x000000BF, 0x000000D4, 0x000000B4, 0x00000080}, // L[2]
{0x00000090, 0x0000004B, 0x0000005A, 0x0000004A, 0x000000C9}, // L[3]
}, // T.state[ 6].w =  12
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000050, 0x00000012}, // L[0]
{0x00000009, 0x00000011, 0x00000080, 0x00000068, 0x00000028}, // L[1]
{0x00000080, 0x000000BF, 0x000000D4, 0x000000A1, 0x00000015}, // L[2]
{0x000000C9, 0x0000004B, 0x0000005A, 0x0000001C, 0x00000042}, // L[3]
}, // T.state[ 7].w =  19
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000072, 0x00000015, 0x0000001C, 0x0000000E}, // L[0]
{0x00000009, 0x00000055, 0x00000042, 0x00000050, 0x00000044}, // L[1]
{0x00000080, 0x00000012, 0x00000012, 0x00000068, 0x00000092}, // L[2]
{0x000000C9, 0x000000B4, 0x00000028, 0x000000A1, 0x000000AF}, // L[3]
}, // T.state[ 8].w =  18
{ // R[ 9] abcde
{0x0000000E, 0x00000072, 0x00000015, 0x00000009, 0x00000004}, // L[0]
{0x00000044, 0x00000055, 0x00000042, 0x0000000A, 0x00000048}, // L[1]
{0x00000092, 0x00000012, 0x00000012, 0x0000007D, 0x000000A1}, // L[2]
{0x000000AF, 0x000000B4, 0x00000028, 0x00000007, 0x00000021}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x0000000E, 0x000000CE, 0x00000004, 0x00000009, 0x00000040}, // L[0]
{0x00000044, 0x000000A3, 0x00000048, 0x0000000A, 0x00000021}, // L[1]
{0x00000092, 0x00000076, 0x000000A1, 0x0000007D, 0x00000080}, // L[2]
{0x000000AF, 0x000000B2, 0x00000021, 0x00000007, 0x00000001}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000040, 0x000000CE, 0x00000004, 0x0000004A, 0x00000042}, // L[0]
{0x00000021, 0x000000A3, 0x00000048, 0x00000059, 0x00000081}, // L[1]
{0x00000080, 0x00000076, 0x000000A1, 0x000000EF, 0x00000040}, // L[2]
{0x00000001, 0x000000B2, 0x00000021, 0x00000030, 0x00000031}, // L[3]
}, // T.state[11].w =  24
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000044, 0x00000040, 0x00000030, 0x00000004}, // L[0]
{0x00000021, 0x0000006C, 0x00000031, 0x0000004A, 0x00000005}, // L[1]
{0x00000080, 0x00000007, 0x00000042, 0x00000059, 0x00000085}, // L[2]
{0x00000001, 0x00000019, 0x00000081, 0x000000EF, 0x0000000A}, // L[3]
}, // T.state[12].w =  18
{ // R[13] abcde
{0x00000004, 0x00000044, 0x00000040, 0x0000001A, 0x00000000}, // L[0]
{0x00000005, 0x0000006C, 0x00000031, 0x000000A7, 0x00000000}, // L[1]
{0x00000085, 0x00000007, 0x00000042, 0x0000006E, 0x00000000}, // L[2]
{0x0000000A, 0x00000019, 0x00000081, 0x000000F2, 0x00000000}, // L[3]
}, // T.state[13].w =  14
// T.w = 157
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 157 -> 156
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x00000084}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000084, 0x00000000}, // L[0]
{0x00000000, 0x00000008, 0x00000084, 0x00000000, 0x00000008}, // L[1]
{0x00000000, 0x00000029, 0x00000000, 0x00000000, 0x00000069}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000004, 0x00000090}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000042, 0x00000042}, // L[0]
{0x00000008, 0x00000008, 0x00000084, 0x00000004, 0x00000080}, // L[1]
{0x00000069, 0x00000029, 0x00000000, 0x000000B4, 0x000000D4}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x0000004A, 0x0000005A}, // L[3]
}, // T.state[ 5].w =   6
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x00000042, 0x00000048}, // L[0]
{0x00000008, 0x00000011, 0x00000080, 0x00000004, 0x00000009}, // L[1]
{0x00000069, 0x000000BF, 0x000000D4, 0x000000B4, 0x00000080}, // L[2]
{0x00000090, 0x0000004B, 0x0000005A, 0x0000004A, 0x000000C9}, // L[3]
}, // T.state[ 6].w =  12
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000050, 0x00000012}, // L[0]
{0x00000009, 0x00000011, 0x00000080, 0x00000068, 0x00000028}, // L[1]
{0x00000080, 0x000000BF, 0x000000D4, 0x000000A1, 0x00000015}, // L[2]
{0x000000C9, 0x0000004B, 0x0000005A, 0x0000001C, 0x00000042}, // L[3]
}, // T.state[ 7].w =  19
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000072, 0x00000015, 0x0000001C, 0x0000000E}, // L[0]
{0x00000009, 0x00000055, 0x00000042, 0x00000050, 0x00000044}, // L[1]
{0x00000080, 0x00000012, 0x00000012, 0x00000068, 0x00000092}, // L[2]
{0x000000C9, 0x000000B4, 0x00000028, 0x000000A1, 0x000000AF}, // L[3]
}, // T.state[ 8].w =  18
{ // R[ 9] abcde
{0x0000000E, 0x00000072, 0x00000015, 0x00000009, 0x00000004}, // L[0]
{0x00000044, 0x00000055, 0x00000042, 0x0000000A, 0x00000048}, // L[1]
{0x00000092, 0x00000012, 0x00000012, 0x0000007D, 0x000000A1}, // L[2]
{0x000000AF, 0x000000B4, 0x00000028, 0x00000007, 0x00000021}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x0000000E, 0x000000CE, 0x00000004, 0x00000009, 0x00000040}, // L[0]
{0x00000044, 0x000000A3, 0x00000048, 0x0000000A, 0x00000021}, // L[1]
{0x00000092, 0x00000076, 0x000000A1, 0x0000007D, 0x00000080}, // L[2]
{0x000000AF, 0x000000B2, 0x00000021, 0x00000007, 0x00000001}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000040, 0x000000CE, 0x00000004, 0x0000004A, 0x0000004A}, // L[0]
{0x00000021, 0x000000A3, 0x00000048, 0x00000059, 0x00000081}, // L[1]
{0x00000080, 0x00000076, 0x000000A1, 0x000000EF, 0x00000040}, // L[2]
{0x00000001, 0x000000B2, 0x00000021, 0x00000030, 0x00000071}, // L[3]
}, // T.state[11].w =  24
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000044, 0x00000040, 0x00000030, 0x00000004}, // L[0]
{0x00000021, 0x0000006C, 0x00000071, 0x0000004A, 0x00000005}, // L[1]
{0x00000080, 0x00000087, 0x0000004A, 0x00000059, 0x00000007}, // L[2]
{0x00000001, 0x00000009, 0x00000081, 0x000000EF, 0x00000008}, // L[3]
}, // T.state[12].w =  18
{ // R[13] abcde
{0x00000004, 0x00000044, 0x00000040, 0x0000001A, 0x00000000}, // L[0]
{0x00000005, 0x0000006C, 0x00000071, 0x000000A7, 0x00000000}, // L[1]
{0x00000007, 0x00000087, 0x0000004A, 0x0000002F, 0x00000000}, // L[2]
{0x00000008, 0x00000009, 0x00000081, 0x000000F3, 0x00000000}, // L[3]
}, // T.state[13].w =  13
// T.w = 156
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 156 -> 155
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x00000084}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000084, 0x00000000}, // L[0]
{0x00000000, 0x00000008, 0x00000084, 0x00000000, 0x00000008}, // L[1]
{0x00000000, 0x00000029, 0x00000000, 0x00000000, 0x00000069}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000004, 0x00000090}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000042, 0x00000042}, // L[0]
{0x00000008, 0x00000008, 0x00000084, 0x00000004, 0x00000080}, // L[1]
{0x00000069, 0x00000029, 0x00000000, 0x000000B4, 0x000000D4}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x0000004A, 0x0000005A}, // L[3]
}, // T.state[ 5].w =   6
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x00000042, 0x00000048}, // L[0]
{0x00000008, 0x00000011, 0x00000080, 0x00000004, 0x00000009}, // L[1]
{0x00000069, 0x000000BF, 0x000000D4, 0x000000B4, 0x00000080}, // L[2]
{0x00000090, 0x0000004B, 0x0000005A, 0x0000004A, 0x000000C9}, // L[3]
}, // T.state[ 6].w =  12
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000050, 0x00000012}, // L[0]
{0x00000009, 0x00000011, 0x00000080, 0x00000068, 0x00000028}, // L[1]
{0x00000080, 0x000000BF, 0x000000D4, 0x000000A1, 0x00000015}, // L[2]
{0x000000C9, 0x0000004B, 0x0000005A, 0x0000001C, 0x00000042}, // L[3]
}, // T.state[ 7].w =  19
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000072, 0x00000015, 0x0000001C, 0x0000000E}, // L[0]
{0x00000009, 0x00000055, 0x00000042, 0x00000050, 0x00000044}, // L[1]
{0x00000080, 0x00000012, 0x00000012, 0x00000068, 0x00000092}, // L[2]
{0x000000C9, 0x000000B4, 0x00000028, 0x000000A1, 0x000000AF}, // L[3]
}, // T.state[ 8].w =  18
{ // R[ 9] abcde
{0x0000000E, 0x00000072, 0x00000015, 0x00000009, 0x00000004}, // L[0]
{0x00000044, 0x00000055, 0x00000042, 0x0000000A, 0x00000048}, // L[1]
{0x00000092, 0x00000012, 0x00000012, 0x0000007D, 0x000000A1}, // L[2]
{0x000000AF, 0x000000B4, 0x00000028, 0x00000007, 0x00000021}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x0000000E, 0x000000CE, 0x00000004, 0x00000009, 0x00000040}, // L[0]
{0x00000044, 0x000000A3, 0x00000048, 0x0000000A, 0x00000021}, // L[1]
{0x00000092, 0x00000076, 0x000000A1, 0x0000007D, 0x00000080}, // L[2]
{0x000000AF, 0x000000B2, 0x00000021, 0x00000007, 0x00000001}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000040, 0x000000CE, 0x00000004, 0x0000004A, 0x0000004A}, // L[0]
{0x00000021, 0x000000A3, 0x00000048, 0x00000059, 0x000000A1}, // L[1]
{0x00000080, 0x00000076, 0x000000A1, 0x000000EF, 0x000000D4}, // L[2]
{0x00000001, 0x000000B2, 0x00000021, 0x00000030, 0x00000031}, // L[3]
}, // T.state[11].w =  24
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000004, 0x000000D4, 0x00000030, 0x00000044}, // L[0]
{0x00000021, 0x00000045, 0x00000031, 0x0000004A, 0x00000024}, // L[1]
{0x00000080, 0x00000007, 0x0000004A, 0x00000059, 0x00000081}, // L[2]
{0x00000001, 0x00000009, 0x000000A1, 0x000000EF, 0x0000000A}, // L[3]
}, // T.state[12].w =  18
{ // R[13] abcde
{0x00000044, 0x00000004, 0x000000D4, 0x0000003A, 0x00000000}, // L[0]
{0x00000024, 0x00000045, 0x00000031, 0x00000037, 0x00000000}, // L[1]
{0x00000081, 0x00000007, 0x0000004A, 0x0000006C, 0x00000000}, // L[2]
{0x0000000A, 0x00000009, 0x000000A1, 0x000000F2, 0x00000000}, // L[3]
}, // T.state[13].w =  12
// T.w = 155
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 155 -> 154
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x00000084}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000084, 0x00000000}, // L[0]
{0x00000000, 0x00000008, 0x00000084, 0x00000000, 0x00000008}, // L[1]
{0x00000000, 0x00000029, 0x00000000, 0x00000000, 0x00000069}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000004, 0x00000090}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000042, 0x00000042}, // L[0]
{0x00000008, 0x00000008, 0x00000084, 0x00000004, 0x00000080}, // L[1]
{0x00000069, 0x00000029, 0x00000000, 0x000000B4, 0x000000D4}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x0000004A, 0x0000005A}, // L[3]
}, // T.state[ 5].w =   6
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x00000042, 0x00000048}, // L[0]
{0x00000008, 0x00000011, 0x00000080, 0x00000004, 0x00000009}, // L[1]
{0x00000069, 0x000000BF, 0x000000D4, 0x000000B4, 0x00000080}, // L[2]
{0x00000090, 0x0000004B, 0x0000005A, 0x0000004A, 0x000000C9}, // L[3]
}, // T.state[ 6].w =  12
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000050, 0x00000012}, // L[0]
{0x00000009, 0x00000011, 0x00000080, 0x00000068, 0x00000028}, // L[1]
{0x00000080, 0x000000BF, 0x000000D4, 0x000000A1, 0x00000015}, // L[2]
{0x000000C9, 0x0000004B, 0x0000005A, 0x0000001C, 0x00000042}, // L[3]
}, // T.state[ 7].w =  19
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000072, 0x00000015, 0x0000001C, 0x0000000E}, // L[0]
{0x00000009, 0x00000055, 0x00000042, 0x00000050, 0x00000044}, // L[1]
{0x00000080, 0x00000012, 0x00000012, 0x00000068, 0x00000092}, // L[2]
{0x000000C9, 0x000000B4, 0x00000028, 0x000000A1, 0x000000AF}, // L[3]
}, // T.state[ 8].w =  18
{ // R[ 9] abcde
{0x0000000E, 0x00000072, 0x00000015, 0x00000009, 0x00000004}, // L[0]
{0x00000044, 0x00000055, 0x00000042, 0x0000000A, 0x00000048}, // L[1]
{0x00000092, 0x00000012, 0x00000012, 0x0000007D, 0x000000A1}, // L[2]
{0x000000AF, 0x000000B4, 0x00000028, 0x00000007, 0x00000021}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x0000000E, 0x000000CE, 0x00000004, 0x00000009, 0x00000040}, // L[0]
{0x00000044, 0x000000A3, 0x00000048, 0x0000000A, 0x00000021}, // L[1]
{0x00000092, 0x00000076, 0x000000A1, 0x0000007D, 0x00000080}, // L[2]
{0x000000AF, 0x000000B2, 0x00000021, 0x00000007, 0x00000001}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000040, 0x000000CE, 0x00000004, 0x0000004A, 0x0000004E}, // L[0]
{0x00000021, 0x000000A3, 0x00000048, 0x00000059, 0x000000A1}, // L[1]
{0x00000080, 0x00000076, 0x000000A1, 0x000000EF, 0x00000046}, // L[2]
{0x00000001, 0x000000B2, 0x00000021, 0x00000030, 0x00000011}, // L[3]
}, // T.state[11].w =  24
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000004, 0x00000046, 0x00000030, 0x0000004C}, // L[0]
{0x00000021, 0x00000060, 0x00000011, 0x0000004A, 0x00000001}, // L[1]
{0x00000080, 0x00000047, 0x0000004E, 0x00000059, 0x00000041}, // L[2]
{0x00000001, 0x00000001, 0x000000A1, 0x000000EF, 0x00000000}, // L[3]
}, // T.state[12].w =  18
{ // R[13] abcde
{0x0000004C, 0x00000004, 0x00000046, 0x0000003E, 0x00000000}, // L[0]
{0x00000001, 0x00000060, 0x00000011, 0x000000A5, 0x00000000}, // L[1]
{0x00000041, 0x00000047, 0x0000004E, 0x0000000C, 0x00000000}, // L[2]
{0x00000000, 0x00000001, 0x000000A1, 0x000000F7, 0x00000000}, // L[3]
}, // T.state[13].w =  11
// T.w = 154
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 154 -> 153
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x00000084}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000084, 0x00000000}, // L[0]
{0x00000000, 0x00000008, 0x00000084, 0x00000000, 0x00000008}, // L[1]
{0x00000000, 0x00000029, 0x00000000, 0x00000000, 0x00000069}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000004, 0x00000090}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000042, 0x00000042}, // L[0]
{0x00000008, 0x00000008, 0x00000084, 0x00000004, 0x00000080}, // L[1]
{0x00000069, 0x00000029, 0x00000000, 0x000000B4, 0x000000D4}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x0000004A, 0x0000005A}, // L[3]
}, // T.state[ 5].w =   6
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x00000042, 0x00000048}, // L[0]
{0x00000008, 0x00000011, 0x00000080, 0x00000004, 0x00000009}, // L[1]
{0x00000069, 0x000000BF, 0x000000D4, 0x000000B4, 0x00000080}, // L[2]
{0x00000090, 0x0000004B, 0x0000005A, 0x0000004A, 0x000000C9}, // L[3]
}, // T.state[ 6].w =  12
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000050, 0x00000012}, // L[0]
{0x00000009, 0x00000011, 0x00000080, 0x00000068, 0x00000028}, // L[1]
{0x00000080, 0x000000BF, 0x000000D4, 0x000000A1, 0x00000015}, // L[2]
{0x000000C9, 0x0000004B, 0x0000005A, 0x0000001C, 0x00000042}, // L[3]
}, // T.state[ 7].w =  19
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000072, 0x00000015, 0x0000001C, 0x0000000E}, // L[0]
{0x00000009, 0x00000055, 0x00000042, 0x00000050, 0x00000044}, // L[1]
{0x00000080, 0x00000012, 0x00000012, 0x00000068, 0x00000092}, // L[2]
{0x000000C9, 0x000000B4, 0x00000028, 0x000000A1, 0x000000AF}, // L[3]
}, // T.state[ 8].w =  18
{ // R[ 9] abcde
{0x0000000E, 0x00000072, 0x00000015, 0x00000009, 0x00000004}, // L[0]
{0x00000044, 0x00000055, 0x00000042, 0x0000000A, 0x00000048}, // L[1]
{0x00000092, 0x00000012, 0x00000012, 0x0000007D, 0x000000A1}, // L[2]
{0x000000AF, 0x000000B4, 0x00000028, 0x00000007, 0x00000021}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x0000000E, 0x000000CE, 0x00000004, 0x00000009, 0x00000040}, // L[0]
{0x00000044, 0x000000A3, 0x00000048, 0x0000000A, 0x00000021}, // L[1]
{0x00000092, 0x00000076, 0x000000A1, 0x0000007D, 0x00000080}, // L[2]
{0x000000AF, 0x000000B2, 0x00000021, 0x00000007, 0x00000001}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000040, 0x000000CE, 0x00000004, 0x0000004A, 0x0000004E}, // L[0]
{0x00000021, 0x000000A3, 0x00000048, 0x00000059, 0x000000A1}, // L[1]
{0x00000080, 0x00000076, 0x000000A1, 0x000000EF, 0x00000056}, // L[2]
{0x00000001, 0x000000B2, 0x00000021, 0x00000030, 0x00000031}, // L[3]
}, // T.state[11].w =  24
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000004, 0x00000056, 0x00000030, 0x00000044}, // L[0]
{0x00000021, 0x00000040, 0x00000031, 0x0000004A, 0x00000021}, // L[1]
{0x00000080, 0x00000007, 0x0000004E, 0x00000059, 0x00000085}, // L[2]
{0x00000001, 0x00000001, 0x000000A1, 0x000000EF, 0x00000000}, // L[3]
}, // T.state[12].w =  18
{ // R[13] abcde
{0x00000044, 0x00000004, 0x00000056, 0x0000003A, 0x00000000}, // L[0]
{0x00000021, 0x00000040, 0x00000031, 0x000000B5, 0x00000000}, // L[1]
{0x00000085, 0x00000007, 0x0000004E, 0x0000006E, 0x00000000}, // L[2]
{0x00000000, 0x00000001, 0x000000A1, 0x000000F7, 0x00000000}, // L[3]
}, // T.state[13].w =  10
// T.w = 153
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 153 -> 152
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x00000084}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000084, 0x00000000}, // L[0]
{0x00000000, 0x00000008, 0x00000084, 0x00000000, 0x00000008}, // L[1]
{0x00000000, 0x00000029, 0x00000000, 0x00000000, 0x00000069}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000004, 0x00000090}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000042, 0x00000042}, // L[0]
{0x00000008, 0x00000008, 0x00000084, 0x00000004, 0x00000080}, // L[1]
{0x00000069, 0x00000029, 0x00000000, 0x000000B4, 0x000000D4}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x0000004A, 0x0000005A}, // L[3]
}, // T.state[ 5].w =   6
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x00000042, 0x00000048}, // L[0]
{0x00000008, 0x00000011, 0x00000080, 0x00000004, 0x00000009}, // L[1]
{0x00000069, 0x000000BF, 0x000000D4, 0x000000B4, 0x00000080}, // L[2]
{0x00000090, 0x0000004B, 0x0000005A, 0x0000004A, 0x000000C9}, // L[3]
}, // T.state[ 6].w =  12
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000050, 0x00000012}, // L[0]
{0x00000009, 0x00000011, 0x00000080, 0x00000068, 0x00000028}, // L[1]
{0x00000080, 0x000000BF, 0x000000D4, 0x000000A1, 0x00000015}, // L[2]
{0x000000C9, 0x0000004B, 0x0000005A, 0x0000001C, 0x00000042}, // L[3]
}, // T.state[ 7].w =  19
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000072, 0x00000015, 0x0000001C, 0x0000000E}, // L[0]
{0x00000009, 0x00000055, 0x00000042, 0x00000050, 0x00000044}, // L[1]
{0x00000080, 0x00000012, 0x00000012, 0x00000068, 0x00000092}, // L[2]
{0x000000C9, 0x000000B4, 0x00000028, 0x000000A1, 0x000000AF}, // L[3]
}, // T.state[ 8].w =  18
{ // R[ 9] abcde
{0x0000000E, 0x00000072, 0x00000015, 0x00000009, 0x00000004}, // L[0]
{0x00000044, 0x00000055, 0x00000042, 0x0000000A, 0x00000048}, // L[1]
{0x00000092, 0x00000012, 0x00000012, 0x0000007D, 0x000000A1}, // L[2]
{0x000000AF, 0x000000B4, 0x00000028, 0x00000007, 0x00000021}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x0000000E, 0x000000CE, 0x00000004, 0x00000009, 0x00000040}, // L[0]
{0x00000044, 0x000000A3, 0x00000048, 0x0000000A, 0x00000021}, // L[1]
{0x00000092, 0x00000076, 0x000000A1, 0x0000007D, 0x00000080}, // L[2]
{0x000000AF, 0x000000B2, 0x00000021, 0x00000007, 0x00000001}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000040, 0x000000CE, 0x00000004, 0x0000004A, 0x0000004E}, // L[0]
{0x00000021, 0x000000A3, 0x00000048, 0x00000059, 0x00000083}, // L[1]
{0x00000080, 0x00000076, 0x000000A1, 0x000000EF, 0x00000046}, // L[2]
{0x00000001, 0x000000B2, 0x00000021, 0x00000030, 0x00000031}, // L[3]
}, // T.state[11].w =  24
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000040, 0x00000046, 0x00000030, 0x00000000}, // L[0]
{0x00000021, 0x00000060, 0x00000031, 0x0000004A, 0x00000001}, // L[1]
{0x00000080, 0x00000007, 0x0000004E, 0x00000059, 0x00000081}, // L[2]
{0x00000001, 0x00000001, 0x00000083, 0x000000EF, 0x00000000}, // L[3]
}, // T.state[12].w =  18
{ // R[13] abcde
{0x00000000, 0x00000040, 0x00000046, 0x00000018, 0x00000000}, // L[0]
{0x00000001, 0x00000060, 0x00000031, 0x000000A5, 0x00000000}, // L[1]
{0x00000081, 0x00000007, 0x0000004E, 0x0000006C, 0x00000000}, // L[2]
{0x00000000, 0x00000001, 0x00000083, 0x000000F7, 0x00000000}, // L[3]
}, // T.state[13].w =   9
// T.w = 152
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 152 -> 151
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000010, 0x00000000, 0x00000000, 0x00000090}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[2]
{0x00000090, 0x00000010, 0x00000000, 0x00000084, 0x00000084}, // L[3]
}, // T.state[ 3].w =   2
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000084, 0x00000000}, // L[0]
{0x00000000, 0x00000008, 0x00000084, 0x00000000, 0x00000008}, // L[1]
{0x00000000, 0x00000029, 0x00000000, 0x00000000, 0x00000069}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x00000004, 0x00000090}, // L[3]
}, // T.state[ 4].w =   3
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000004, 0x00000042, 0x00000042}, // L[0]
{0x00000008, 0x00000008, 0x00000084, 0x00000004, 0x00000080}, // L[1]
{0x00000069, 0x00000029, 0x00000000, 0x000000B4, 0x000000D4}, // L[2]
{0x00000090, 0x00000000, 0x00000000, 0x0000004A, 0x0000005A}, // L[3]
}, // T.state[ 5].w =   6
{ // R[ 6] abcde
{0x00000000, 0x00000048, 0x00000042, 0x00000042, 0x00000048}, // L[0]
{0x00000008, 0x00000011, 0x00000080, 0x00000004, 0x00000009}, // L[1]
{0x00000069, 0x000000BF, 0x000000D4, 0x000000B4, 0x00000080}, // L[2]
{0x00000090, 0x0000004B, 0x0000005A, 0x0000004A, 0x000000C9}, // L[3]
}, // T.state[ 6].w =  12
{ // R[ 7] abcde
{0x00000048, 0x00000048, 0x00000042, 0x00000050, 0x00000012}, // L[0]
{0x00000009, 0x00000011, 0x00000080, 0x00000068, 0x00000028}, // L[1]
{0x00000080, 0x000000BF, 0x000000D4, 0x000000A1, 0x00000015}, // L[2]
{0x000000C9, 0x0000004B, 0x0000005A, 0x0000001C, 0x00000042}, // L[3]
}, // T.state[ 7].w =  19
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000048, 0x00000072, 0x00000015, 0x0000001C, 0x0000000E}, // L[0]
{0x00000009, 0x00000055, 0x00000042, 0x00000050, 0x00000044}, // L[1]
{0x00000080, 0x00000012, 0x00000012, 0x00000068, 0x00000092}, // L[2]
{0x000000C9, 0x000000B4, 0x00000028, 0x000000A1, 0x000000AF}, // L[3]
}, // T.state[ 8].w =  18
{ // R[ 9] abcde
{0x0000000E, 0x00000072, 0x00000015, 0x00000009, 0x00000004}, // L[0]
{0x00000044, 0x00000055, 0x00000042, 0x0000000A, 0x00000048}, // L[1]
{0x00000092, 0x00000012, 0x00000012, 0x0000007D, 0x000000A1}, // L[2]
{0x000000AF, 0x000000B4, 0x00000028, 0x00000007, 0x00000021}, // L[3]
}, // T.state[ 9].w =  20
{ // R[10] abcde
{0x0000000E, 0x000000CE, 0x00000004, 0x00000009, 0x00000040}, // L[0]
{0x00000044, 0x000000A3, 0x00000048, 0x0000000A, 0x00000021}, // L[1]
{0x00000092, 0x00000076, 0x000000A1, 0x0000007D, 0x00000004}, // L[2]
{0x000000AF, 0x000000B2, 0x00000021, 0x00000007, 0x00000001}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000040, 0x000000CE, 0x00000004, 0x0000004A, 0x0000004E}, // L[0]
{0x00000021, 0x000000A3, 0x00000048, 0x00000059, 0x00000081}, // L[1]
{0x00000004, 0x00000076, 0x000000A1, 0x000000CB, 0x000000FE}, // L[2]
{0x00000001, 0x000000B2, 0x00000021, 0x00000030, 0x00000031}, // L[3]
}, // T.state[11].w =  24
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000044, 0x000000FE, 0x00000030, 0x00000004}, // L[0]
{0x00000021, 0x00000011, 0x00000031, 0x0000004A, 0x00000010}, // L[1]
{0x00000004, 0x00000007, 0x0000004E, 0x00000059, 0x00000005}, // L[2]
{0x00000001, 0x00000001, 0x00000081, 0x000000CB, 0x00000000}, // L[3]
}, // T.state[12].w =  17
{ // R[13] abcde
{0x00000004, 0x00000044, 0x000000FE, 0x0000001A, 0x00000000}, // L[0]
{0x00000010, 0x00000011, 0x00000031, 0x0000002D, 0x00000000}, // L[1]
{0x00000005, 0x00000007, 0x0000004E, 0x0000002E, 0x00000000}, // L[2]
{0x00000000, 0x00000001, 0x00000081, 0x000000E5, 0x00000000}, // L[3]
}, // T.state[13].w =   9
// T.w = 151
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 151 -> 150
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[2]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000018, 0x00000080, 0x00000000, 0x00000008}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000023}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000004, 0x00000030}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000040, 0x00000044}, // L[0]
{0x00000008, 0x00000018, 0x00000080, 0x00000004, 0x00000084}, // L[1]
{0x00000023, 0x00000021, 0x00000000, 0x00000091, 0x000000B1}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x0000001A, 0x0000000A}, // L[3]
}, // T.state[ 5].w =   5
{ // R[ 6] abcde
{0x00000000, 0x00000088, 0x00000044, 0x00000040, 0x00000088}, // L[0]
{0x00000008, 0x00000093, 0x00000084, 0x00000004, 0x00000089}, // L[1]
{0x00000023, 0x00000012, 0x000000B1, 0x00000091, 0x00000031}, // L[2]
{0x00000030, 0x00000041, 0x0000000A, 0x0000001A, 0x00000093}, // L[3]
}, // T.state[ 6].w =  11
{ // R[ 7] abcde
{0x00000088, 0x00000088, 0x00000044, 0x00000046, 0x00000002}, // L[0]
{0x00000089, 0x00000093, 0x00000084, 0x0000006C, 0x00000020}, // L[1]
{0x00000031, 0x00000012, 0x000000B1, 0x00000005, 0x000000D4}, // L[2]
{0x00000093, 0x00000041, 0x0000000A, 0x0000004C, 0x00000052}, // L[3]
}, // T.state[ 7].w =  15
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000088, 0x00000067, 0x000000D4, 0x0000004C, 0x00000021}, // L[0]
{0x00000089, 0x0000008D, 0x00000052, 0x00000046, 0x00000004}, // L[1]
{0x00000031, 0x00000026, 0x00000002, 0x0000006C, 0x0000007B}, // L[2]
{0x00000093, 0x00000015, 0x00000020, 0x00000005, 0x00000080}, // L[3]
}, // T.state[ 8].w =  17
{ // R[ 9] abcde
{0x00000021, 0x00000067, 0x000000D4, 0x000000B6, 0x0000000A}, // L[0]
{0x00000004, 0x0000008D, 0x00000052, 0x00000021, 0x00000011}, // L[1]
{0x0000007B, 0x00000026, 0x00000002, 0x0000008B, 0x00000089}, // L[2]
{0x00000080, 0x00000015, 0x00000020, 0x000000C2, 0x00000022}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000021, 0x000000AD, 0x0000000A, 0x000000B6, 0x000000C4}, // L[0]
{0x00000004, 0x00000093, 0x00000011, 0x00000021, 0x000000B1}, // L[1]
{0x0000007B, 0x000000F5, 0x00000089, 0x0000008B, 0x00000080}, // L[2]
{0x00000080, 0x000000E6, 0x00000022, 0x000000C2, 0x00000022}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x000000C4, 0x000000AD, 0x0000000A, 0x00000093, 0x000000AD}, // L[0]
{0x000000B1, 0x00000093, 0x00000011, 0x00000084, 0x00000095}, // L[1]
{0x00000080, 0x000000F5, 0x00000089, 0x00000058, 0x00000041}, // L[2]
{0x00000022, 0x000000E6, 0x00000022, 0x00000007, 0x00000025}, // L[3]
}, // T.state[11].w =  23
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x000000C4, 0x0000000C, 0x00000041, 0x00000007, 0x00000040}, // L[0]
{0x000000B1, 0x00000069, 0x00000025, 0x00000093, 0x00000038}, // L[1]
{0x00000080, 0x00000087, 0x000000AD, 0x00000084, 0x00000005}, // L[2]
{0x00000022, 0x00000000, 0x00000095, 0x00000058, 0x00000022}, // L[3]
}, // T.state[12].w =  18
{ // R[13] abcde
{0x00000040, 0x0000000C, 0x00000041, 0x000000A3, 0x00000000}, // L[0]
{0x00000038, 0x00000069, 0x00000025, 0x000000D5, 0x00000000}, // L[1]
{0x00000005, 0x00000087, 0x000000AD, 0x000000C0, 0x00000000}, // L[2]
{0x00000022, 0x00000000, 0x00000095, 0x0000003D, 0x00000000}, // L[3]
}, // T.state[13].w =  16
// T.w = 150
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 150 -> 149
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[2]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000018, 0x00000080, 0x00000000, 0x00000008}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000023}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000004, 0x00000030}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000040, 0x00000044}, // L[0]
{0x00000008, 0x00000018, 0x00000080, 0x00000004, 0x00000084}, // L[1]
{0x00000023, 0x00000021, 0x00000000, 0x00000091, 0x000000B1}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x0000001A, 0x0000000A}, // L[3]
}, // T.state[ 5].w =   5
{ // R[ 6] abcde
{0x00000000, 0x00000088, 0x00000044, 0x00000040, 0x00000088}, // L[0]
{0x00000008, 0x00000093, 0x00000084, 0x00000004, 0x00000089}, // L[1]
{0x00000023, 0x00000012, 0x000000B1, 0x00000091, 0x00000031}, // L[2]
{0x00000030, 0x00000041, 0x0000000A, 0x0000001A, 0x00000093}, // L[3]
}, // T.state[ 6].w =  11
{ // R[ 7] abcde
{0x00000088, 0x00000088, 0x00000044, 0x00000046, 0x00000002}, // L[0]
{0x00000089, 0x00000093, 0x00000084, 0x0000006C, 0x00000020}, // L[1]
{0x00000031, 0x00000012, 0x000000B1, 0x00000005, 0x000000D4}, // L[2]
{0x00000093, 0x00000041, 0x0000000A, 0x0000004C, 0x00000052}, // L[3]
}, // T.state[ 7].w =  15
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000088, 0x00000067, 0x000000D4, 0x0000004C, 0x00000021}, // L[0]
{0x00000089, 0x0000008D, 0x00000052, 0x00000046, 0x00000004}, // L[1]
{0x00000031, 0x00000026, 0x00000002, 0x0000006C, 0x0000007B}, // L[2]
{0x00000093, 0x00000015, 0x00000020, 0x00000005, 0x00000080}, // L[3]
}, // T.state[ 8].w =  17
{ // R[ 9] abcde
{0x00000021, 0x00000067, 0x000000D4, 0x000000B6, 0x0000000A}, // L[0]
{0x00000004, 0x0000008D, 0x00000052, 0x00000021, 0x00000011}, // L[1]
{0x0000007B, 0x00000026, 0x00000002, 0x0000008B, 0x00000089}, // L[2]
{0x00000080, 0x00000015, 0x00000020, 0x000000C2, 0x00000022}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000021, 0x000000AD, 0x0000000A, 0x000000B6, 0x000000C4}, // L[0]
{0x00000004, 0x00000093, 0x00000011, 0x00000021, 0x000000B1}, // L[1]
{0x0000007B, 0x000000F5, 0x00000089, 0x0000008B, 0x00000080}, // L[2]
{0x00000080, 0x000000E6, 0x00000022, 0x000000C2, 0x00000022}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x000000C4, 0x000000AD, 0x0000000A, 0x00000093, 0x000000AD}, // L[0]
{0x000000B1, 0x00000093, 0x00000011, 0x00000084, 0x00000095}, // L[1]
{0x00000080, 0x000000F5, 0x00000089, 0x00000058, 0x00000061}, // L[2]
{0x00000022, 0x000000E6, 0x00000022, 0x00000007, 0x00000025}, // L[3]
}, // T.state[11].w =  23
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x000000C4, 0x0000000C, 0x00000061, 0x00000007, 0x00000040}, // L[0]
{0x000000B1, 0x00000029, 0x00000025, 0x00000093, 0x00000088}, // L[1]
{0x00000080, 0x00000087, 0x000000AD, 0x00000084, 0x00000001}, // L[2]
{0x00000022, 0x00000000, 0x00000095, 0x00000058, 0x00000062}, // L[3]
}, // T.state[12].w =  18
{ // R[13] abcde
{0x00000040, 0x0000000C, 0x00000061, 0x000000A3, 0x00000000}, // L[0]
{0x00000088, 0x00000029, 0x00000025, 0x0000008D, 0x00000000}, // L[1]
{0x00000001, 0x00000087, 0x000000AD, 0x000000C2, 0x00000000}, // L[2]
{0x00000062, 0x00000000, 0x00000095, 0x0000001D, 0x00000000}, // L[3]
}, // T.state[13].w =  15
// T.w = 149
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 149 -> 148
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[2]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000018, 0x00000080, 0x00000000, 0x00000008}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000023}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000004, 0x00000030}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000040, 0x00000044}, // L[0]
{0x00000008, 0x00000018, 0x00000080, 0x00000004, 0x00000084}, // L[1]
{0x00000023, 0x00000021, 0x00000000, 0x00000091, 0x000000B1}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x0000001A, 0x0000000A}, // L[3]
}, // T.state[ 5].w =   5
{ // R[ 6] abcde
{0x00000000, 0x00000088, 0x00000044, 0x00000040, 0x00000088}, // L[0]
{0x00000008, 0x00000093, 0x00000084, 0x00000004, 0x00000089}, // L[1]
{0x00000023, 0x00000012, 0x000000B1, 0x00000091, 0x00000031}, // L[2]
{0x00000030, 0x00000041, 0x0000000A, 0x0000001A, 0x00000093}, // L[3]
}, // T.state[ 6].w =  11
{ // R[ 7] abcde
{0x00000088, 0x00000088, 0x00000044, 0x00000046, 0x00000002}, // L[0]
{0x00000089, 0x00000093, 0x00000084, 0x0000006C, 0x00000020}, // L[1]
{0x00000031, 0x00000012, 0x000000B1, 0x00000005, 0x000000D4}, // L[2]
{0x00000093, 0x00000041, 0x0000000A, 0x0000004C, 0x00000052}, // L[3]
}, // T.state[ 7].w =  15
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000088, 0x00000067, 0x000000D4, 0x0000004C, 0x00000021}, // L[0]
{0x00000089, 0x0000008D, 0x00000052, 0x00000046, 0x00000004}, // L[1]
{0x00000031, 0x00000026, 0x00000002, 0x0000006C, 0x0000007B}, // L[2]
{0x00000093, 0x00000015, 0x00000020, 0x00000005, 0x00000080}, // L[3]
}, // T.state[ 8].w =  17
{ // R[ 9] abcde
{0x00000021, 0x00000067, 0x000000D4, 0x000000B6, 0x0000000A}, // L[0]
{0x00000004, 0x0000008D, 0x00000052, 0x00000021, 0x00000011}, // L[1]
{0x0000007B, 0x00000026, 0x00000002, 0x0000008B, 0x00000089}, // L[2]
{0x00000080, 0x00000015, 0x00000020, 0x000000C2, 0x00000022}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000021, 0x000000AD, 0x0000000A, 0x000000B6, 0x00000084}, // L[0]
{0x00000004, 0x00000093, 0x00000011, 0x00000021, 0x000000B1}, // L[1]
{0x0000007B, 0x000000F5, 0x00000089, 0x0000008B, 0x00000010}, // L[2]
{0x00000080, 0x000000E6, 0x00000022, 0x000000C2, 0x00000022}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000084, 0x000000AD, 0x0000000A, 0x00000091, 0x000000A9}, // L[0]
{0x000000B1, 0x00000093, 0x00000011, 0x00000084, 0x00000095}, // L[1]
{0x00000010, 0x000000F5, 0x00000089, 0x000000DC, 0x000000F5}, // L[2]
{0x00000022, 0x000000E6, 0x00000022, 0x00000007, 0x00000067}, // L[3]
}, // T.state[11].w =  23
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000084, 0x0000000C, 0x000000F5, 0x00000007, 0x00000090}, // L[0]
{0x000000B1, 0x00000000, 0x00000067, 0x00000091, 0x00000091}, // L[1]
{0x00000010, 0x00000003, 0x000000A9, 0x00000084, 0x00000011}, // L[2]
{0x00000022, 0x00000008, 0x00000095, 0x000000DC, 0x0000002E}, // L[3]
}, // T.state[12].w =  19
{ // R[13] abcde
{0x00000090, 0x0000000C, 0x000000F5, 0x000000CB, 0x00000000}, // L[0]
{0x00000091, 0x00000000, 0x00000067, 0x00000000, 0x00000000}, // L[1]
{0x00000011, 0x00000003, 0x000000A9, 0x000000CA, 0x00000000}, // L[2]
{0x0000002E, 0x00000008, 0x00000095, 0x00000079, 0x00000000}, // L[3]
}, // T.state[13].w =  13
// T.w = 148
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 148 -> 147
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[2]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000018, 0x00000080, 0x00000000, 0x00000008}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000023}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000004, 0x00000030}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000040, 0x00000044}, // L[0]
{0x00000008, 0x00000018, 0x00000080, 0x00000004, 0x00000084}, // L[1]
{0x00000023, 0x00000021, 0x00000000, 0x00000091, 0x000000B1}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x0000001A, 0x0000000A}, // L[3]
}, // T.state[ 5].w =   5
{ // R[ 6] abcde
{0x00000000, 0x00000088, 0x00000044, 0x00000040, 0x00000088}, // L[0]
{0x00000008, 0x00000093, 0x00000084, 0x00000004, 0x00000089}, // L[1]
{0x00000023, 0x00000012, 0x000000B1, 0x00000091, 0x00000031}, // L[2]
{0x00000030, 0x00000041, 0x0000000A, 0x0000001A, 0x00000093}, // L[3]
}, // T.state[ 6].w =  11
{ // R[ 7] abcde
{0x00000088, 0x00000088, 0x00000044, 0x00000046, 0x00000002}, // L[0]
{0x00000089, 0x00000093, 0x00000084, 0x0000006C, 0x00000020}, // L[1]
{0x00000031, 0x00000012, 0x000000B1, 0x00000005, 0x000000D4}, // L[2]
{0x00000093, 0x00000041, 0x0000000A, 0x0000004C, 0x00000052}, // L[3]
}, // T.state[ 7].w =  15
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000088, 0x00000067, 0x000000D4, 0x0000004C, 0x00000021}, // L[0]
{0x00000089, 0x0000008D, 0x00000052, 0x00000046, 0x00000004}, // L[1]
{0x00000031, 0x00000026, 0x00000002, 0x0000006C, 0x0000007B}, // L[2]
{0x00000093, 0x00000015, 0x00000020, 0x00000005, 0x00000080}, // L[3]
}, // T.state[ 8].w =  17
{ // R[ 9] abcde
{0x00000021, 0x00000067, 0x000000D4, 0x000000B6, 0x0000000A}, // L[0]
{0x00000004, 0x0000008D, 0x00000052, 0x00000021, 0x00000011}, // L[1]
{0x0000007B, 0x00000026, 0x00000002, 0x0000008B, 0x00000089}, // L[2]
{0x00000080, 0x00000015, 0x00000020, 0x000000C2, 0x00000022}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000021, 0x000000AD, 0x0000000A, 0x000000B6, 0x00000084}, // L[0]
{0x00000004, 0x00000093, 0x00000011, 0x00000021, 0x000000B1}, // L[1]
{0x0000007B, 0x000000F5, 0x00000089, 0x0000008B, 0x00000010}, // L[2]
{0x00000080, 0x000000E6, 0x00000022, 0x000000C2, 0x00000022}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000084, 0x000000AD, 0x0000000A, 0x00000091, 0x000000AD}, // L[0]
{0x000000B1, 0x00000093, 0x00000011, 0x00000084, 0x00000095}, // L[1]
{0x00000010, 0x000000F5, 0x00000089, 0x000000DC, 0x00000075}, // L[2]
{0x00000022, 0x000000E6, 0x00000022, 0x00000007, 0x0000006F}, // L[3]
}, // T.state[11].w =  23
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000084, 0x0000000C, 0x00000075, 0x00000007, 0x00000080}, // L[0]
{0x000000B1, 0x00000001, 0x0000006F, 0x00000091, 0x00000090}, // L[1]
{0x00000010, 0x00000013, 0x000000AD, 0x00000084, 0x00000001}, // L[2]
{0x00000022, 0x00000000, 0x00000095, 0x000000DC, 0x00000022}, // L[3]
}, // T.state[12].w =  19
{ // R[13] abcde
{0x00000080, 0x0000000C, 0x00000075, 0x000000C3, 0x00000000}, // L[0]
{0x00000090, 0x00000001, 0x0000006F, 0x00000080, 0x00000000}, // L[1]
{0x00000001, 0x00000013, 0x000000AD, 0x000000C2, 0x00000000}, // L[2]
{0x00000022, 0x00000000, 0x00000095, 0x0000007F, 0x00000000}, // L[3]
}, // T.state[13].w =  12
// T.w = 147
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 147 -> 146
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000001, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000010, 0x00000000, 0x00000000, 0x00000010}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x0000000C}, // L[2]
{0x00000010, 0x00000010, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000080, 0x00000000}, // L[0]
{0x00000000, 0x00000018, 0x00000080, 0x00000000, 0x00000008}, // L[1]
{0x00000000, 0x00000021, 0x00000000, 0x00000000, 0x00000023}, // L[2]
{0x00000010, 0x00000000, 0x00000000, 0x00000004, 0x00000030}, // L[3]
}, // T.state[ 4].w =   2
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x0000000C, 0x00000040, 0x00000044}, // L[0]
{0x00000008, 0x00000018, 0x00000080, 0x00000004, 0x00000084}, // L[1]
{0x00000023, 0x00000021, 0x00000000, 0x00000091, 0x000000B1}, // L[2]
{0x00000030, 0x00000000, 0x00000000, 0x0000001A, 0x0000000A}, // L[3]
}, // T.state[ 5].w =   5
{ // R[ 6] abcde
{0x00000000, 0x00000088, 0x00000044, 0x00000040, 0x00000088}, // L[0]
{0x00000008, 0x00000093, 0x00000084, 0x00000004, 0x00000089}, // L[1]
{0x00000023, 0x00000012, 0x000000B1, 0x00000091, 0x00000031}, // L[2]
{0x00000030, 0x00000041, 0x0000000A, 0x0000001A, 0x00000093}, // L[3]
}, // T.state[ 6].w =  11
{ // R[ 7] abcde
{0x00000088, 0x00000088, 0x00000044, 0x00000046, 0x00000002}, // L[0]
{0x00000089, 0x00000093, 0x00000084, 0x0000006C, 0x00000020}, // L[1]
{0x00000031, 0x00000012, 0x000000B1, 0x00000005, 0x000000D4}, // L[2]
{0x00000093, 0x00000041, 0x0000000A, 0x0000004C, 0x00000052}, // L[3]
}, // T.state[ 7].w =  15
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000088, 0x00000067, 0x000000D4, 0x0000004C, 0x00000021}, // L[0]
{0x00000089, 0x0000008D, 0x00000052, 0x00000046, 0x00000004}, // L[1]
{0x00000031, 0x00000026, 0x00000002, 0x0000006C, 0x0000007B}, // L[2]
{0x00000093, 0x00000015, 0x00000020, 0x00000005, 0x00000080}, // L[3]
}, // T.state[ 8].w =  17
{ // R[ 9] abcde
{0x00000021, 0x00000067, 0x000000D4, 0x000000B6, 0x0000000A}, // L[0]
{0x00000004, 0x0000008D, 0x00000052, 0x00000021, 0x00000011}, // L[1]
{0x0000007B, 0x00000026, 0x00000002, 0x0000008B, 0x00000089}, // L[2]
{0x00000080, 0x00000015, 0x00000020, 0x000000C2, 0x00000022}, // L[3]
}, // T.state[ 9].w =  21
{ // R[10] abcde
{0x00000021, 0x000000AD, 0x0000000A, 0x000000B6, 0x00000086}, // L[0]
{0x00000004, 0x00000093, 0x00000011, 0x00000021, 0x00000091}, // L[1]
{0x0000007B, 0x000000F5, 0x00000089, 0x0000008B, 0x00000002}, // L[2]
{0x00000080, 0x000000E6, 0x00000022, 0x000000C2, 0x000000E2}, // L[3]
}, // T.state[10].w =  19
{ // R[11] abcde
{0x00000086, 0x000000AD, 0x0000000A, 0x00000081, 0x0000008D}, // L[0]
{0x00000091, 0x00000093, 0x00000011, 0x00000085, 0x00000096}, // L[1]
{0x00000002, 0x000000F5, 0x00000089, 0x0000004C, 0x00000055}, // L[2]
{0x000000E2, 0x000000E6, 0x00000022, 0x00000001, 0x00000067}, // L[3]
}, // T.state[11].w =  23
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000086, 0x0000000A, 0x00000055, 0x00000001, 0x00000088}, // L[0]
{0x00000091, 0x00000041, 0x00000067, 0x00000081, 0x00000050}, // L[1]
{0x00000002, 0x00000003, 0x0000008D, 0x00000085, 0x00000001}, // L[2]
{0x000000E2, 0x00000040, 0x00000096, 0x0000004C, 0x00000022}, // L[3]
}, // T.state[12].w =  16
{ // R[13] abcde
{0x00000088, 0x0000000A, 0x00000055, 0x000000C4, 0x00000000}, // L[0]
{0x00000050, 0x00000041, 0x00000067, 0x000000E8, 0x00000000}, // L[1]
{0x00000001, 0x00000003, 0x0000008D, 0x00000042, 0x00000000}, // L[2]
{0x00000022, 0x00000040, 0x00000096, 0x00000037, 0x00000000}, // L[3]
}, // T.state[13].w =  14
// T.w = 146
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 146 -> 107
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000051}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000051, 0x00000030, 0x00000080, 0x0000008E, 0x00000012}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x00000002}, // L[3]
}, // T.state[ 7].w =   6
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000012, 0x00000046, 0x00000020}, // L[0]
{0x00000010, 0x00000044, 0x00000002, 0x00000000, 0x000000DC}, // L[1]
{0x00000051, 0x00000014, 0x00000000, 0x00000080, 0x00000045}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x0000008E, 0x00000088}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000012, 0x00000033, 0x00000001}, // L[0]
{0x000000DC, 0x00000044, 0x00000002, 0x0000006E, 0x00000028}, // L[1]
{0x00000045, 0x00000014, 0x00000000, 0x000000E2, 0x00000022}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000003, 0x00000001}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x00000024, 0x00000001, 0x00000033, 0x00000044}, // L[0]
{0x000000DC, 0x0000008D, 0x00000028, 0x0000006E, 0x00000041}, // L[1]
{0x00000045, 0x000000C6, 0x00000022, 0x000000E2, 0x00000081}, // L[2]
{0x00000088, 0x00000020, 0x00000001, 0x00000003, 0x000000B8}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000044, 0x00000024, 0x00000001, 0x000000BB, 0x00000088}, // L[0]
{0x00000041, 0x0000008D, 0x00000028, 0x00000079, 0x00000001}, // L[1]
{0x00000081, 0x000000C6, 0x00000022, 0x0000001B, 0x0000000B}, // L[2]
{0x000000B8, 0x00000020, 0x00000001, 0x000000DD, 0x00000044}, // L[3]
}, // T.state[11].w =  16
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000044, 0x00000019, 0x0000000B, 0x000000DD, 0x00000045}, // L[0]
{0x00000041, 0x0000009B, 0x00000044, 0x000000BB, 0x00000048}, // L[1]
{0x00000081, 0x000000C8, 0x00000088, 0x00000079, 0x00000049}, // L[2]
{0x000000B8, 0x00000059, 0x00000001, 0x0000001B, 0x00000001}, // L[3]
}, // T.state[12].w =  22
{ // R[13] abcde
{0x00000045, 0x00000019, 0x0000000B, 0x0000004C, 0x00000000}, // L[0]
{0x00000048, 0x0000009B, 0x00000044, 0x000000F9, 0x00000000}, // L[1]
{0x00000049, 0x000000C8, 0x00000088, 0x00000018, 0x00000000}, // L[2]
{0x00000001, 0x00000059, 0x00000001, 0x0000000D, 0x00000000}, // L[3]
}, // T.state[13].w =  21
// T.w = 107
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 107 -> 106
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000051}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000051, 0x00000030, 0x00000080, 0x0000008E, 0x00000012}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x00000002}, // L[3]
}, // T.state[ 7].w =   6
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000012, 0x00000046, 0x00000020}, // L[0]
{0x00000010, 0x00000044, 0x00000002, 0x00000000, 0x000000DC}, // L[1]
{0x00000051, 0x00000014, 0x00000000, 0x00000080, 0x00000045}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x0000008E, 0x00000088}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000012, 0x00000033, 0x00000001}, // L[0]
{0x000000DC, 0x00000044, 0x00000002, 0x0000006E, 0x00000028}, // L[1]
{0x00000045, 0x00000014, 0x00000000, 0x000000E2, 0x00000022}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000003, 0x00000001}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x00000024, 0x00000001, 0x00000033, 0x00000044}, // L[0]
{0x000000DC, 0x0000008D, 0x00000028, 0x0000006E, 0x00000041}, // L[1]
{0x00000045, 0x000000C6, 0x00000022, 0x000000E2, 0x00000081}, // L[2]
{0x00000088, 0x00000020, 0x00000001, 0x00000003, 0x000000B8}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000044, 0x00000024, 0x00000001, 0x000000BB, 0x00000088}, // L[0]
{0x00000041, 0x0000008D, 0x00000028, 0x00000079, 0x00000081}, // L[1]
{0x00000081, 0x000000C6, 0x00000022, 0x0000001B, 0x0000000B}, // L[2]
{0x000000B8, 0x00000020, 0x00000001, 0x000000DD, 0x00000044}, // L[3]
}, // T.state[11].w =  16
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000044, 0x00000018, 0x0000000B, 0x000000DD, 0x00000044}, // L[0]
{0x00000041, 0x0000009B, 0x00000044, 0x000000BB, 0x0000006C}, // L[1]
{0x00000081, 0x000000C8, 0x00000088, 0x00000079, 0x00000049}, // L[2]
{0x000000B8, 0x00000059, 0x00000081, 0x0000001B, 0x00000001}, // L[3]
}, // T.state[12].w =  22
{ // R[13] abcde
{0x00000044, 0x00000018, 0x0000000B, 0x000000CC, 0x00000000}, // L[0]
{0x0000006C, 0x0000009B, 0x00000044, 0x000000EB, 0x00000000}, // L[1]
{0x00000049, 0x000000C8, 0x00000088, 0x00000018, 0x00000000}, // L[2]
{0x00000001, 0x00000059, 0x00000081, 0x0000000D, 0x00000000}, // L[3]
}, // T.state[13].w =  20
// T.w = 106
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 106 -> 105
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000051}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000051, 0x00000030, 0x00000080, 0x0000008E, 0x00000012}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x00000002}, // L[3]
}, // T.state[ 7].w =   6
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000012, 0x00000046, 0x00000020}, // L[0]
{0x00000010, 0x00000044, 0x00000002, 0x00000000, 0x000000DC}, // L[1]
{0x00000051, 0x00000014, 0x00000000, 0x00000080, 0x00000045}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x0000008E, 0x00000088}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000012, 0x00000033, 0x00000001}, // L[0]
{0x000000DC, 0x00000044, 0x00000002, 0x0000006E, 0x00000028}, // L[1]
{0x00000045, 0x00000014, 0x00000000, 0x000000E2, 0x00000022}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000003, 0x00000001}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x00000024, 0x00000001, 0x00000033, 0x00000044}, // L[0]
{0x000000DC, 0x0000008D, 0x00000028, 0x0000006E, 0x00000041}, // L[1]
{0x00000045, 0x000000C6, 0x00000022, 0x000000E2, 0x00000081}, // L[2]
{0x00000088, 0x00000020, 0x00000001, 0x00000003, 0x000000B8}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000044, 0x00000024, 0x00000001, 0x000000BB, 0x000000E8}, // L[0]
{0x00000041, 0x0000008D, 0x00000028, 0x00000079, 0x00000091}, // L[1]
{0x00000081, 0x000000C6, 0x00000022, 0x0000001B, 0x0000004B}, // L[2]
{0x000000B8, 0x00000020, 0x00000001, 0x000000DD, 0x00000044}, // L[3]
}, // T.state[11].w =  16
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000044, 0x00000038, 0x0000004B, 0x000000DD, 0x00000004}, // L[0]
{0x00000041, 0x0000001B, 0x00000044, 0x000000BB, 0x00000048}, // L[1]
{0x00000081, 0x000000C8, 0x000000E8, 0x00000079, 0x00000049}, // L[2]
{0x000000B8, 0x00000099, 0x00000091, 0x0000001B, 0x00000021}, // L[3]
}, // T.state[12].w =  22
{ // R[13] abcde
{0x00000004, 0x00000038, 0x0000004B, 0x000000EC, 0x00000000}, // L[0]
{0x00000048, 0x0000001B, 0x00000044, 0x000000F9, 0x00000000}, // L[1]
{0x00000049, 0x000000C8, 0x000000E8, 0x00000018, 0x00000000}, // L[2]
{0x00000021, 0x00000099, 0x00000091, 0x0000001D, 0x00000000}, // L[3]
}, // T.state[13].w =  19
// T.w = 105
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 105 -> 104
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000051}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000051, 0x00000030, 0x00000080, 0x0000008E, 0x00000012}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x00000002}, // L[3]
}, // T.state[ 7].w =   6
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000012, 0x00000046, 0x00000020}, // L[0]
{0x00000010, 0x00000044, 0x00000002, 0x00000000, 0x000000DC}, // L[1]
{0x00000051, 0x00000014, 0x00000000, 0x00000080, 0x00000045}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x0000008E, 0x00000088}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000012, 0x00000033, 0x00000001}, // L[0]
{0x000000DC, 0x00000044, 0x00000002, 0x0000006E, 0x00000028}, // L[1]
{0x00000045, 0x00000014, 0x00000000, 0x000000E2, 0x00000022}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000003, 0x00000001}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x00000024, 0x00000001, 0x00000033, 0x00000044}, // L[0]
{0x000000DC, 0x0000008D, 0x00000028, 0x0000006E, 0x00000041}, // L[1]
{0x00000045, 0x000000C6, 0x00000022, 0x000000E2, 0x00000081}, // L[2]
{0x00000088, 0x00000020, 0x00000001, 0x00000003, 0x000000B8}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000044, 0x00000024, 0x00000001, 0x000000BB, 0x000000A8}, // L[0]
{0x00000041, 0x0000008D, 0x00000028, 0x00000079, 0x000000B1}, // L[1]
{0x00000081, 0x000000C6, 0x00000022, 0x0000001B, 0x0000006B}, // L[2]
{0x000000B8, 0x00000020, 0x00000001, 0x000000DD, 0x00000064}, // L[3]
}, // T.state[11].w =  16
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000044, 0x00000078, 0x0000006B, 0x000000DD, 0x00000004}, // L[0]
{0x00000041, 0x0000005B, 0x00000064, 0x000000BB, 0x0000000A}, // L[1]
{0x00000081, 0x00000088, 0x000000A8, 0x00000079, 0x0000000B}, // L[2]
{0x000000B8, 0x00000019, 0x000000B1, 0x0000001B, 0x00000081}, // L[3]
}, // T.state[12].w =  22
{ // R[13] abcde
{0x00000004, 0x00000078, 0x0000006B, 0x000000EC, 0x00000000}, // L[0]
{0x0000000A, 0x0000005B, 0x00000064, 0x000000D8, 0x00000000}, // L[1]
{0x0000000B, 0x00000088, 0x000000A8, 0x00000039, 0x00000000}, // L[2]
{0x00000081, 0x00000019, 0x000000B1, 0x0000004D, 0x00000000}, // L[3]
}, // T.state[13].w =  18
// T.w = 104
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 104 -> 103
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000051}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000051, 0x00000030, 0x00000080, 0x0000008E, 0x00000012}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x00000002}, // L[3]
}, // T.state[ 7].w =   6
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000012, 0x00000046, 0x00000020}, // L[0]
{0x00000010, 0x00000044, 0x00000002, 0x00000000, 0x000000DC}, // L[1]
{0x00000051, 0x00000014, 0x00000000, 0x00000080, 0x00000045}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x0000008E, 0x00000088}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000012, 0x00000033, 0x00000001}, // L[0]
{0x000000DC, 0x00000044, 0x00000002, 0x0000006E, 0x00000028}, // L[1]
{0x00000045, 0x00000014, 0x00000000, 0x000000E2, 0x00000022}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000003, 0x00000001}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x00000024, 0x00000001, 0x00000033, 0x00000044}, // L[0]
{0x000000DC, 0x0000008D, 0x00000028, 0x0000006E, 0x00000041}, // L[1]
{0x00000045, 0x000000C6, 0x00000022, 0x000000E2, 0x00000081}, // L[2]
{0x00000088, 0x00000020, 0x00000001, 0x00000003, 0x000000B8}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000044, 0x00000024, 0x00000001, 0x000000BB, 0x000000AA}, // L[0]
{0x00000041, 0x0000008D, 0x00000028, 0x00000079, 0x00000081}, // L[1]
{0x00000081, 0x000000C6, 0x00000022, 0x0000001B, 0x0000004F}, // L[2]
{0x000000B8, 0x00000020, 0x00000001, 0x000000DD, 0x00000064}, // L[3]
}, // T.state[11].w =  16
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000044, 0x00000018, 0x0000004F, 0x000000DD, 0x00000044}, // L[0]
{0x00000041, 0x00000013, 0x00000064, 0x000000BB, 0x00000050}, // L[1]
{0x00000081, 0x00000088, 0x000000AA, 0x00000079, 0x00000009}, // L[2]
{0x000000B8, 0x0000001D, 0x00000081, 0x0000001B, 0x000000C5}, // L[3]
}, // T.state[12].w =  22
{ // R[13] abcde
{0x00000044, 0x00000018, 0x0000004F, 0x000000CC, 0x00000000}, // L[0]
{0x00000050, 0x00000013, 0x00000064, 0x000000F5, 0x00000000}, // L[1]
{0x00000009, 0x00000088, 0x000000AA, 0x00000038, 0x00000000}, // L[2]
{0x000000C5, 0x0000001D, 0x00000081, 0x0000006F, 0x00000000}, // L[3]
}, // T.state[13].w =  17
// T.w = 103
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 103 -> 102
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000051}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000051, 0x00000030, 0x00000080, 0x0000008E, 0x00000012}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x00000002}, // L[3]
}, // T.state[ 7].w =   6
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000012, 0x00000046, 0x00000020}, // L[0]
{0x00000010, 0x00000044, 0x00000002, 0x00000000, 0x000000DC}, // L[1]
{0x00000051, 0x00000014, 0x00000000, 0x00000080, 0x00000045}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x0000008E, 0x00000088}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000012, 0x00000033, 0x00000001}, // L[0]
{0x000000DC, 0x00000044, 0x00000002, 0x0000006E, 0x00000028}, // L[1]
{0x00000045, 0x00000014, 0x00000000, 0x000000E2, 0x00000022}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000003, 0x00000001}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x00000024, 0x00000001, 0x00000033, 0x00000004}, // L[0]
{0x000000DC, 0x0000008D, 0x00000028, 0x0000006E, 0x00000041}, // L[1]
{0x00000045, 0x000000C6, 0x00000022, 0x000000E2, 0x00000009}, // L[2]
{0x00000088, 0x00000020, 0x00000001, 0x00000003, 0x000000A8}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000004, 0x00000024, 0x00000001, 0x000000B9, 0x00000088}, // L[0]
{0x00000041, 0x0000008D, 0x00000028, 0x00000079, 0x00000083}, // L[1]
{0x00000009, 0x000000C6, 0x00000022, 0x0000005F, 0x00000065}, // L[2]
{0x000000A8, 0x00000020, 0x00000001, 0x0000005D, 0x00000064}, // L[3]
}, // T.state[11].w =  16
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000004, 0x0000001C, 0x00000065, 0x0000005D, 0x00000000}, // L[0]
{0x00000041, 0x00000047, 0x00000064, 0x000000B9, 0x00000000}, // L[1]
{0x00000009, 0x00000088, 0x00000088, 0x00000079, 0x00000081}, // L[2]
{0x000000A8, 0x00000059, 0x00000083, 0x0000005F, 0x00000001}, // L[3]
}, // T.state[12].w =  22
{ // R[13] abcde
{0x00000000, 0x0000001C, 0x00000065, 0x000000AE, 0x00000000}, // L[0]
{0x00000000, 0x00000047, 0x00000064, 0x000000DC, 0x00000000}, // L[1]
{0x00000081, 0x00000088, 0x00000088, 0x0000007C, 0x00000000}, // L[2]
{0x00000001, 0x00000059, 0x00000083, 0x0000002F, 0x00000000}, // L[3]
}, // T.state[13].w =  16
// T.w = 102
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 102 -> 101
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000051}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000051, 0x00000030, 0x00000080, 0x0000008E, 0x00000012}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x00000002}, // L[3]
}, // T.state[ 7].w =   6
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000012, 0x00000046, 0x00000020}, // L[0]
{0x00000010, 0x00000044, 0x00000002, 0x00000000, 0x000000DC}, // L[1]
{0x00000051, 0x00000014, 0x00000000, 0x00000080, 0x00000045}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x0000008E, 0x00000088}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000012, 0x00000033, 0x00000001}, // L[0]
{0x000000DC, 0x00000044, 0x00000002, 0x0000006E, 0x00000028}, // L[1]
{0x00000045, 0x00000014, 0x00000000, 0x000000E2, 0x00000022}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000003, 0x00000001}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x00000024, 0x00000001, 0x00000033, 0x00000004}, // L[0]
{0x000000DC, 0x0000008D, 0x00000028, 0x0000006E, 0x00000049}, // L[1]
{0x00000045, 0x000000C6, 0x00000022, 0x000000E2, 0x00000001}, // L[2]
{0x00000088, 0x00000020, 0x00000001, 0x00000003, 0x000000A8}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000004, 0x00000024, 0x00000001, 0x000000B9, 0x000000A8}, // L[0]
{0x00000049, 0x0000008D, 0x00000028, 0x00000039, 0x00000001}, // L[1]
{0x00000001, 0x000000C6, 0x00000022, 0x0000001F, 0x00000041}, // L[2]
{0x000000A8, 0x00000020, 0x00000001, 0x0000005D, 0x00000064}, // L[3]
}, // T.state[11].w =  16
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000004, 0x00000019, 0x00000041, 0x0000005D, 0x00000025}, // L[0]
{0x00000049, 0x0000000F, 0x00000064, 0x000000B9, 0x00000040}, // L[1]
{0x00000001, 0x00000088, 0x000000A8, 0x00000039, 0x0000008B}, // L[2]
{0x000000A8, 0x00000019, 0x00000001, 0x0000001F, 0x000000C1}, // L[3]
}, // T.state[12].w =  20
{ // R[13] abcde
{0x00000025, 0x00000019, 0x00000041, 0x0000003C, 0x00000000}, // L[0]
{0x00000040, 0x0000000F, 0x00000064, 0x000000FC, 0x00000000}, // L[1]
{0x0000008B, 0x00000088, 0x000000A8, 0x00000059, 0x00000000}, // L[2]
{0x000000C1, 0x00000019, 0x00000001, 0x0000006F, 0x00000000}, // L[3]
}, // T.state[13].w =  17
// T.w = 101
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 101 -> 100
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000000, 0x00000081, 0x00000080}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 4].w =   1
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000001, 0x00000001, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x00000040}, // L[3]
}, // T.state[ 5].w =   2
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000001, 0x00000030, 0x00000080, 0x00000080, 0x00000051}, // L[2]
{0x00000080, 0x00000008, 0x00000040, 0x00000040, 0x00000088}, // L[3]
}, // T.state[ 6].w =   3
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000051, 0x00000030, 0x00000080, 0x0000008E, 0x00000012}, // L[2]
{0x00000088, 0x00000008, 0x00000040, 0x00000046, 0x00000002}, // L[3]
}, // T.state[ 7].w =   6
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000012, 0x00000046, 0x00000020}, // L[0]
{0x00000010, 0x00000044, 0x00000002, 0x00000000, 0x000000DC}, // L[1]
{0x00000051, 0x00000014, 0x00000000, 0x00000080, 0x00000045}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x0000008E, 0x00000088}, // L[3]
}, // T.state[ 8].w =   8
{ // R[ 9] abcde
{0x00000020, 0x00000020, 0x00000012, 0x00000033, 0x00000001}, // L[0]
{0x000000DC, 0x00000044, 0x00000002, 0x0000006E, 0x00000028}, // L[1]
{0x00000045, 0x00000014, 0x00000000, 0x000000E2, 0x00000022}, // L[2]
{0x00000088, 0x00000000, 0x00000000, 0x00000003, 0x00000001}, // L[3]
}, // T.state[ 9].w =  10
{ // R[10] abcde
{0x00000020, 0x00000024, 0x00000001, 0x00000033, 0x00000004}, // L[0]
{0x000000DC, 0x0000008D, 0x00000028, 0x0000006E, 0x00000049}, // L[1]
{0x00000045, 0x000000C6, 0x00000022, 0x000000E2, 0x00000001}, // L[2]
{0x00000088, 0x00000020, 0x00000001, 0x00000003, 0x000000A8}, // L[3]
}, // T.state[10].w =  15
{ // R[11] abcde
{0x00000004, 0x00000024, 0x00000001, 0x000000B9, 0x000000E8}, // L[0]
{0x00000049, 0x0000008D, 0x00000028, 0x00000039, 0x00000001}, // L[1]
{0x00000001, 0x000000C6, 0x00000022, 0x0000001F, 0x00000043}, // L[2]
{0x000000A8, 0x00000020, 0x00000001, 0x0000005D, 0x00000064}, // L[3]
}, // T.state[11].w =  16
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000004, 0x00000019, 0x00000043, 0x0000005D, 0x00000005}, // L[0]
{0x00000049, 0x0000000B, 0x00000064, 0x000000B9, 0x00000040}, // L[1]
{0x00000001, 0x00000088, 0x000000E8, 0x00000039, 0x00000089}, // L[2]
{0x000000A8, 0x00000099, 0x00000001, 0x0000001F, 0x00000021}, // L[3]
}, // T.state[12].w =  20
{ // R[13] abcde
{0x00000005, 0x00000019, 0x00000043, 0x0000002C, 0x00000000}, // L[0]
{0x00000040, 0x0000000B, 0x00000064, 0x000000FC, 0x00000000}, // L[1]
{0x00000089, 0x00000088, 0x000000E8, 0x00000058, 0x00000000}, // L[2]
{0x00000021, 0x00000099, 0x00000001, 0x0000001F, 0x00000000}, // L[3]
}, // T.state[13].w =  16
// T.w = 100
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 100 -> 82
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000081, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000018, 0x000000C0, 0x00000040, 0x00000098}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000098, 0x00000018, 0x000000C0, 0x000000C6, 0x00000002}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   3
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000034, 0x00000000, 0x00000000, 0x00000054}, // L[1]
{0x00000098, 0x00000000, 0x00000000, 0x00000000, 0x000000A8}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000C6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   4
{ // R[ 9] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000002}, // L[0]
{0x00000054, 0x00000034, 0x00000000, 0x0000002A, 0x0000003A}, // L[1]
{0x000000A8, 0x00000000, 0x00000000, 0x00000054, 0x00000054}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000063, 0x000000A5}, // L[3]
}, // T.state[ 9].w =   6
{ // R[10] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000000, 0x00000040}, // L[0]
{0x00000054, 0x000000C1, 0x0000003A, 0x0000002A, 0x00000015}, // L[1]
{0x000000A8, 0x0000008A, 0x00000054, 0x00000054, 0x00000022}, // L[2]
{0x00000000, 0x000000B4, 0x000000A5, 0x00000063, 0x0000009C}, // L[3]
}, // T.state[10].w =  11
{ // R[11] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000002, 0x00000000}, // L[0]
{0x00000015, 0x000000C1, 0x0000003A, 0x000000F9, 0x00000001}, // L[1]
{0x00000022, 0x0000008A, 0x00000054, 0x000000B3, 0x00000001}, // L[2]
{0x0000009C, 0x000000B4, 0x000000A5, 0x000000FF, 0x00000000}, // L[3]
}, // T.state[11].w =  14
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000081, 0x00000001, 0x000000FF, 0x00000041}, // L[0]
{0x00000015, 0x00000017, 0x00000000, 0x00000002, 0x00000020}, // L[1]
{0x00000022, 0x00000069, 0x00000000, 0x000000F9, 0x00000009}, // L[2]
{0x0000009C, 0x00000080, 0x00000001, 0x000000B3, 0x00000004}, // L[3]
}, // T.state[12].w =  23
{ // R[13] abcde
{0x00000041, 0x00000081, 0x00000001, 0x0000005F, 0x00000000}, // L[0]
{0x00000020, 0x00000017, 0x00000000, 0x00000011, 0x00000000}, // L[1]
{0x00000009, 0x00000069, 0x00000000, 0x00000078, 0x00000000}, // L[2]
{0x00000004, 0x00000080, 0x00000001, 0x000000DB, 0x00000000}, // L[3]
}, // T.state[13].w =  16
// T.w =  82
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 82 -> 81
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000081, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000018, 0x000000C0, 0x00000040, 0x00000098}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000098, 0x00000018, 0x000000C0, 0x000000C6, 0x00000002}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   3
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000034, 0x00000000, 0x00000000, 0x00000054}, // L[1]
{0x00000098, 0x00000000, 0x00000000, 0x00000000, 0x000000A8}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000C6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   4
{ // R[ 9] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000002}, // L[0]
{0x00000054, 0x00000034, 0x00000000, 0x0000002A, 0x0000003A}, // L[1]
{0x000000A8, 0x00000000, 0x00000000, 0x00000054, 0x00000054}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000063, 0x000000A5}, // L[3]
}, // T.state[ 9].w =   6
{ // R[10] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000000, 0x00000040}, // L[0]
{0x00000054, 0x000000C1, 0x0000003A, 0x0000002A, 0x00000015}, // L[1]
{0x000000A8, 0x0000008A, 0x00000054, 0x00000054, 0x00000022}, // L[2]
{0x00000000, 0x000000B4, 0x000000A5, 0x00000063, 0x0000009C}, // L[3]
}, // T.state[10].w =  11
{ // R[11] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000002, 0x00000000}, // L[0]
{0x00000015, 0x000000C1, 0x0000003A, 0x000000F9, 0x00000001}, // L[1]
{0x00000022, 0x0000008A, 0x00000054, 0x000000B3, 0x00000001}, // L[2]
{0x0000009C, 0x000000B4, 0x000000A5, 0x000000FF, 0x00000080}, // L[3]
}, // T.state[11].w =  14
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000081, 0x00000001, 0x000000FF, 0x00000041}, // L[0]
{0x00000015, 0x00000017, 0x00000080, 0x00000002, 0x00000000}, // L[1]
{0x00000022, 0x00000068, 0x00000000, 0x000000F9, 0x0000000A}, // L[2]
{0x0000009C, 0x00000080, 0x00000001, 0x000000B3, 0x00000004}, // L[3]
}, // T.state[12].w =  23
{ // R[13] abcde
{0x00000041, 0x00000081, 0x00000001, 0x0000005F, 0x00000000}, // L[0]
{0x00000000, 0x00000017, 0x00000080, 0x00000001, 0x00000000}, // L[1]
{0x0000000A, 0x00000068, 0x00000000, 0x000000F9, 0x00000000}, // L[2]
{0x00000004, 0x00000080, 0x00000001, 0x000000DB, 0x00000000}, // L[3]
}, // T.state[13].w =  15
// T.w =  81
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 81 -> 80
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000081, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000018, 0x000000C0, 0x00000040, 0x00000098}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000098, 0x00000018, 0x000000C0, 0x000000C6, 0x00000002}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   3
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000034, 0x00000000, 0x00000000, 0x00000054}, // L[1]
{0x00000098, 0x00000000, 0x00000000, 0x00000000, 0x000000A8}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000C6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   4
{ // R[ 9] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000002}, // L[0]
{0x00000054, 0x00000034, 0x00000000, 0x0000002A, 0x0000003A}, // L[1]
{0x000000A8, 0x00000000, 0x00000000, 0x00000054, 0x00000054}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000063, 0x000000A5}, // L[3]
}, // T.state[ 9].w =   6
{ // R[10] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000000, 0x00000040}, // L[0]
{0x00000054, 0x000000C1, 0x0000003A, 0x0000002A, 0x00000015}, // L[1]
{0x000000A8, 0x0000008A, 0x00000054, 0x00000054, 0x00000022}, // L[2]
{0x00000000, 0x000000B4, 0x000000A5, 0x00000063, 0x0000009C}, // L[3]
}, // T.state[10].w =  11
{ // R[11] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000002, 0x00000000}, // L[0]
{0x00000015, 0x000000C1, 0x0000003A, 0x000000F9, 0x00000081}, // L[1]
{0x00000022, 0x0000008A, 0x00000054, 0x000000B3, 0x00000081}, // L[2]
{0x0000009C, 0x000000B4, 0x000000A5, 0x000000FF, 0x00000080}, // L[3]
}, // T.state[11].w =  14
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000080, 0x00000081, 0x000000FF, 0x00000040}, // L[0]
{0x00000015, 0x00000016, 0x00000080, 0x00000002, 0x00000001}, // L[1]
{0x00000022, 0x00000068, 0x00000000, 0x000000F9, 0x0000000E}, // L[2]
{0x0000009C, 0x00000080, 0x00000081, 0x000000B3, 0x00000014}, // L[3]
}, // T.state[12].w =  23
{ // R[13] abcde
{0x00000040, 0x00000080, 0x00000081, 0x000000DF, 0x00000000}, // L[0]
{0x00000001, 0x00000016, 0x00000080, 0x00000081, 0x00000000}, // L[1]
{0x0000000E, 0x00000068, 0x00000000, 0x000000FB, 0x00000000}, // L[2]
{0x00000014, 0x00000080, 0x00000081, 0x000000D3, 0x00000000}, // L[3]
}, // T.state[13].w =  14
// T.w =  80
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 80 -> 79
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000081, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000018, 0x000000C0, 0x00000040, 0x00000098}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000098, 0x00000018, 0x000000C0, 0x000000C6, 0x00000002}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   3
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000034, 0x00000000, 0x00000000, 0x00000054}, // L[1]
{0x00000098, 0x00000000, 0x00000000, 0x00000000, 0x000000A8}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000C6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   4
{ // R[ 9] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000002}, // L[0]
{0x00000054, 0x00000034, 0x00000000, 0x0000002A, 0x0000003A}, // L[1]
{0x000000A8, 0x00000000, 0x00000000, 0x00000054, 0x00000054}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000063, 0x000000A5}, // L[3]
}, // T.state[ 9].w =   6
{ // R[10] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000000, 0x00000040}, // L[0]
{0x00000054, 0x000000C1, 0x0000003A, 0x0000002A, 0x00000015}, // L[1]
{0x000000A8, 0x0000008A, 0x00000054, 0x00000054, 0x00000022}, // L[2]
{0x00000000, 0x000000B4, 0x000000A5, 0x00000063, 0x0000009C}, // L[3]
}, // T.state[10].w =  11
{ // R[11] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000002, 0x00000000}, // L[0]
{0x00000015, 0x000000C1, 0x0000003A, 0x000000F9, 0x00000081}, // L[1]
{0x00000022, 0x0000008A, 0x00000054, 0x000000B3, 0x00000081}, // L[2]
{0x0000009C, 0x000000B4, 0x000000A5, 0x000000FF, 0x000000A0}, // L[3]
}, // T.state[11].w =  14
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000080, 0x00000081, 0x000000FF, 0x00000040}, // L[0]
{0x00000015, 0x00000016, 0x000000A0, 0x00000002, 0x00000001}, // L[1]
{0x00000022, 0x00000028, 0x00000000, 0x000000F9, 0x0000000A}, // L[2]
{0x0000009C, 0x00000080, 0x00000081, 0x000000B3, 0x00000024}, // L[3]
}, // T.state[12].w =  23
{ // R[13] abcde
{0x00000040, 0x00000080, 0x00000081, 0x000000DF, 0x00000000}, // L[0]
{0x00000001, 0x00000016, 0x000000A0, 0x00000081, 0x00000000}, // L[1]
{0x0000000A, 0x00000028, 0x00000000, 0x000000F9, 0x00000000}, // L[2]
{0x00000024, 0x00000080, 0x00000081, 0x000000CB, 0x00000000}, // L[3]
}, // T.state[13].w =  13
// T.w =  79
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 79 -> 78
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000081, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000018, 0x000000C0, 0x00000040, 0x00000098}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000098, 0x00000018, 0x000000C0, 0x000000C6, 0x00000002}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   3
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000034, 0x00000000, 0x00000000, 0x00000054}, // L[1]
{0x00000098, 0x00000000, 0x00000000, 0x00000000, 0x000000A8}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000C6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   4
{ // R[ 9] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000002}, // L[0]
{0x00000054, 0x00000034, 0x00000000, 0x0000002A, 0x0000003A}, // L[1]
{0x000000A8, 0x00000000, 0x00000000, 0x00000054, 0x00000054}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000063, 0x000000A5}, // L[3]
}, // T.state[ 9].w =   6
{ // R[10] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000000, 0x00000040}, // L[0]
{0x00000054, 0x000000C1, 0x0000003A, 0x0000002A, 0x00000015}, // L[1]
{0x000000A8, 0x0000008A, 0x00000054, 0x00000054, 0x00000022}, // L[2]
{0x00000000, 0x000000B4, 0x000000A5, 0x00000063, 0x0000009C}, // L[3]
}, // T.state[10].w =  11
{ // R[11] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000002, 0x00000000}, // L[0]
{0x00000015, 0x000000C1, 0x0000003A, 0x000000F9, 0x000000E1}, // L[1]
{0x00000022, 0x0000008A, 0x00000054, 0x000000B3, 0x00000001}, // L[2]
{0x0000009C, 0x000000B4, 0x000000A5, 0x000000FF, 0x000000A0}, // L[3]
}, // T.state[11].w =  14
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000040, 0x00000001, 0x000000FF, 0x00000000}, // L[0]
{0x00000015, 0x00000017, 0x000000A0, 0x00000002, 0x00000000}, // L[1]
{0x00000022, 0x00000028, 0x00000000, 0x000000F9, 0x0000000A}, // L[2]
{0x0000009C, 0x00000080, 0x000000E1, 0x000000B3, 0x00000004}, // L[3]
}, // T.state[12].w =  23
{ // R[13] abcde
{0x00000000, 0x00000040, 0x00000001, 0x000000FF, 0x00000000}, // L[0]
{0x00000000, 0x00000017, 0x000000A0, 0x00000001, 0x00000000}, // L[1]
{0x0000000A, 0x00000028, 0x00000000, 0x000000F9, 0x00000000}, // L[2]
{0x00000004, 0x00000080, 0x000000E1, 0x000000DB, 0x00000000}, // L[3]
}, // T.state[13].w =  12
// T.w =  78
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 78 -> 77
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000081, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000018, 0x000000C0, 0x00000040, 0x00000098}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000098, 0x00000018, 0x000000C0, 0x000000C6, 0x00000002}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   3
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000034, 0x00000000, 0x00000000, 0x00000054}, // L[1]
{0x00000098, 0x00000000, 0x00000000, 0x00000000, 0x000000A8}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000C6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   4
{ // R[ 9] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000002}, // L[0]
{0x00000054, 0x00000034, 0x00000000, 0x0000002A, 0x0000003A}, // L[1]
{0x000000A8, 0x00000000, 0x00000000, 0x00000054, 0x000000D4}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000063, 0x000000A5}, // L[3]
}, // T.state[ 9].w =   6
{ // R[10] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000000, 0x00000040}, // L[0]
{0x00000054, 0x000000C1, 0x0000003A, 0x0000002A, 0x00000015}, // L[1]
{0x000000A8, 0x0000009A, 0x000000D4, 0x00000054, 0x00000002}, // L[2]
{0x00000000, 0x000000B4, 0x000000A5, 0x00000063, 0x000000D4}, // L[3]
}, // T.state[10].w =  11
{ // R[11] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000002, 0x00000000}, // L[0]
{0x00000015, 0x000000C1, 0x0000003A, 0x000000F9, 0x000000C1}, // L[1]
{0x00000002, 0x0000009A, 0x000000D4, 0x000000B2, 0x00000002}, // L[2]
{0x000000D4, 0x000000B4, 0x000000A5, 0x000000BD, 0x00000020}, // L[3]
}, // T.state[11].w =  15
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000000, 0x00000002, 0x000000BD, 0x00000040}, // L[0]
{0x00000015, 0x00000031, 0x00000020, 0x00000002, 0x0000000C}, // L[1]
{0x00000002, 0x00000029, 0x00000000, 0x000000F9, 0x00000029}, // L[2]
{0x000000D4, 0x00000080, 0x000000C1, 0x000000B2, 0x00000054}, // L[3]
}, // T.state[12].w =  20
{ // R[13] abcde
{0x00000040, 0x00000000, 0x00000002, 0x000000FE, 0x00000000}, // L[0]
{0x0000000C, 0x00000031, 0x00000020, 0x00000007, 0x00000000}, // L[1]
{0x00000029, 0x00000029, 0x00000000, 0x00000068, 0x00000000}, // L[2]
{0x00000054, 0x00000080, 0x000000C1, 0x00000073, 0x00000000}, // L[3]
}, // T.state[13].w =  13
// T.w =  77
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 77 -> 76
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000081, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000018, 0x000000C0, 0x00000040, 0x00000098}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000098, 0x00000018, 0x000000C0, 0x000000C6, 0x00000002}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   3
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000034, 0x00000000, 0x00000000, 0x00000054}, // L[1]
{0x00000098, 0x00000000, 0x00000000, 0x00000000, 0x000000A8}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000C6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   4
{ // R[ 9] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000002}, // L[0]
{0x00000054, 0x00000034, 0x00000000, 0x0000002A, 0x0000003A}, // L[1]
{0x000000A8, 0x00000000, 0x00000000, 0x00000054, 0x000000D4}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000063, 0x000000A5}, // L[3]
}, // T.state[ 9].w =   6
{ // R[10] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000000, 0x00000040}, // L[0]
{0x00000054, 0x000000C1, 0x0000003A, 0x0000002A, 0x00000015}, // L[1]
{0x000000A8, 0x0000009A, 0x000000D4, 0x00000054, 0x00000002}, // L[2]
{0x00000000, 0x000000B4, 0x000000A5, 0x00000063, 0x000000D4}, // L[3]
}, // T.state[10].w =  11
{ // R[11] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000002, 0x00000000}, // L[0]
{0x00000015, 0x000000C1, 0x0000003A, 0x000000F9, 0x000000E1}, // L[1]
{0x00000002, 0x0000009A, 0x000000D4, 0x000000B2, 0x00000002}, // L[2]
{0x000000D4, 0x000000B4, 0x000000A5, 0x000000BD, 0x00000030}, // L[3]
}, // T.state[11].w =  15
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000040, 0x00000002, 0x000000BD, 0x00000000}, // L[0]
{0x00000015, 0x00000031, 0x00000030, 0x00000002, 0x00000004}, // L[1]
{0x00000002, 0x00000009, 0x00000000, 0x000000F9, 0x0000000B}, // L[2]
{0x000000D4, 0x00000080, 0x000000E1, 0x000000B2, 0x00000074}, // L[3]
}, // T.state[12].w =  20
{ // R[13] abcde
{0x00000000, 0x00000040, 0x00000002, 0x000000DE, 0x00000000}, // L[0]
{0x00000004, 0x00000031, 0x00000030, 0x00000003, 0x00000000}, // L[1]
{0x0000000B, 0x00000009, 0x00000000, 0x00000079, 0x00000000}, // L[2]
{0x00000074, 0x00000080, 0x000000E1, 0x00000063, 0x00000000}, // L[3]
}, // T.state[13].w =  12
// T.w =  76
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
[./tests/norx-best-diff-search-tests.cc:3125] Update bound: 76 -> 75
#if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 14
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000081, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   1
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000000, 0x00000000, 0x00000040, 0x000000C0}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   1
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000018, 0x000000C0, 0x00000040, 0x00000098}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000098, 0x00000018, 0x000000C0, 0x000000C6, 0x00000002}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   3
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000034, 0x00000000, 0x00000000, 0x00000054}, // L[1]
{0x00000098, 0x00000000, 0x00000000, 0x00000000, 0x000000A8}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x000000C6, 0x00000000}, // L[3]
}, // T.state[ 8].w =   4
{ // R[ 9] abcde
{0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000002}, // L[0]
{0x00000054, 0x00000034, 0x00000000, 0x0000002A, 0x0000003A}, // L[1]
{0x000000A8, 0x00000000, 0x00000000, 0x00000054, 0x000000D4}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000063, 0x00000065}, // L[3]
}, // T.state[ 9].w =   6
{ // R[10] abcde
{0x00000000, 0x00000040, 0x00000002, 0x00000000, 0x00000040}, // L[0]
{0x00000054, 0x000000C1, 0x0000003A, 0x0000002A, 0x00000015}, // L[1]
{0x000000A8, 0x0000009A, 0x000000D4, 0x00000054, 0x00000042}, // L[2]
{0x00000000, 0x000000AC, 0x00000065, 0x00000063, 0x000000A4}, // L[3]
}, // T.state[10].w =  11
{ // R[11] abcde
{0x00000040, 0x00000040, 0x00000002, 0x00000002, 0x00000000}, // L[0]
{0x00000015, 0x000000C1, 0x0000003A, 0x000000F9, 0x000000E1}, // L[1]
{0x00000042, 0x0000009A, 0x000000D4, 0x000000B0, 0x00000004}, // L[2]
{0x000000A4, 0x000000AC, 0x00000065, 0x0000003E, 0x0000008D}, // L[3]
}, // T.state[11].w =  15
//         --- Permuted state after round 12: diagonals to columns ---
{ // R[12] abcde
{0x00000040, 0x00000040, 0x00000004, 0x0000003E, 0x00000000}, // L[0]
{0x00000015, 0x0000003D, 0x0000008D, 0x00000002, 0x00000010}, // L[1]
{0x00000042, 0x00000042, 0x00000000, 0x000000F9, 0x00000000}, // L[2]
{0x000000A4, 0x00000080, 0x000000E1, 0x000000B0, 0x00000024}, // L[3]
}, // T.state[12].w =  20
{ // R[13] abcde
{0x00000000, 0x00000040, 0x00000004, 0x0000001F, 0x00000000}, // L[0]
{0x00000010, 0x0000003D, 0x0000008D, 0x00000009, 0x00000000}, // L[1]
{0x00000000, 0x00000042, 0x00000000, 0x000000FC, 0x00000000}, // L[2]
{0x00000024, 0x00000080, 0x000000E1, 0x0000004A, 0x00000000}, // L[3]
}, // T.state[13].w =  11
// T.w =  75
};
#endif // #if 1 // WORD_SIZE 8 nrounds 13 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

 */

/* --- */

#if INIT_N
const WORD_T g_NBITS[NLANES][4] = {
  {2, 1, 1, 1}, // lane 0: a0, b0, c0, d0 == s0 s4  s8 s12 ==  u0  k0  u2  u6  
  {2, 1, 1, 1}, // lane 1: a1, b1, c1, d1 == s1 s5  s9 s13 ==  n0  k1  u3  u7
  {1, 1, 1, 1}, // lane 1: a1, b1, c1, d1 == s2 s6 s10 s14 ==  n1  k2  u4  u8
  {1, 1, 1, 1}, // lane 3: a3, b3, c3, d3 == s3 s7 s11 s15 ==  u1  k3  u5  u9
};
#elif INIT_NK
const WORD_T g_NBITS[NLANES][4] = {
  {2, 2, 1, 1}, // lane 0: a0, b0, c0, d0 == s0 s4  s8 s12 ==  u0  k0  u2  u6  
  {2, 2, 1, 1}, // lane 1: a1, b1, c1, d1 == s1 s5  s9 s13 ==  n0  k1  u3  u7 
  {1, 2, 1, 1}, // lane 1: a1, b1, c1, d1 == s2 s6 s10 s14 ==  n1  k2  u4  u8
  {1, 2, 1, 1}, // lane 3: a3, b3, c3, d3 == s3 s7 s11 s15 ==  u1  k3  u5  u9
};
#elif RATE
//const WORD_T g_NBITS[NLANES][4] = {
//  {2, 2, 1, 1}, // lane 0: a0, b0, c0, d0 == s0 s4  s8 s12 ==
//  {2, 2, 1, 1}, // lane 1: a1, b1, c1, d1 == s1 s5  s9 s13 ==
//  {2, 2, 1, 1}, // lane 1: a1, b1, c1, d1 == s2 s6 s10 s14 ==
//  {2, 2, 1, 1}, // lane 3: a3, b3, c3, d3 == s3 s7 s11 s15 ==
//};
const WORD_T g_NBITS[NLANES][4] = { // according to the Latincrypt paper: "Analysis of NORX"
  {2, 2, 2, 1}, // lane 0: a0, b0, c0, d0 == s0 s4  s8 s12 ==  u0  k0  u2  u6  
  {2, 2, 2, 1}, // lane 1: a1, b1, c1, d1 == s1 s5  s9 s13 ==  n0  k1  u3  u7 
  {2, 2, 1, 1}, // lane 1: a1, b1, c1, d1 == s2 s6 s10 s14 ==  n1  k2  u4  u8
  {2, 2, 1, 1}, // lane 3: a3, b3, c3, d3 == s3 s7 s11 s15 ==  u1  k3  u5  u9
};
#elif FULL
const WORD_T g_NBITS[NLANES][4] = {
  {2, 2, 2, 2}, // lane 0: a0, b0, c0, d0 == s0 s4  s8 s12 ==  u0  k0  u2  u6  
  {2, 2, 2, 2}, // lane 1: a1, b1, c1, d1 == s1 s5  s9 s13 ==  n0  k1  u3  u7 
  {2, 2, 2, 2}, // lane 1: a1, b1, c1, d1 == s2 s6 s10 s14 ==  n1  k2  u4  u8
  {2, 2, 2, 2}, // lane 3: a3, b3, c3, d3 == s3 s7 s11 s15 ==  u1  k3  u5  u9
};
#elif NONE
const WORD_T g_NBITS[NLANES][4] = {
  {1, 1, 1, 1},
  {1, 1, 1, 1},
  {1, 1, 1, 1},
  {2, 2, 2, 2},
};
#else
#error("Invalid attack scenario!")
#endif

/* --- */

//const WORD_T g_NBITS[NLANES][4] = {
//  {2, 1, 1, 1}, // lane 0: a0, b0, c0, d0 == s0 s4  s8 s12 == n0 k0  u8 u12 
//  {2, 1, 1, 1}, // lane 1: a1, b1, c1, d1 == s1 s5  s9 s13 == n1 k1  u9 u13
//  {1, 1, 1, 1}, // lane 1: a1, b1, c1, d1 == s2 s6 s10 s14 == u2 k2 u10 u14
//  {1, 1, 1, 1}, // lane 3: a3, b3, c3, d3 == s3 s7 s11 s15 == u3 k3 u11 u15
//};

/* --- */
/* 

NORX32-RATE: problem case, 20160810

vesselin@LACS-BIGMAN:~/exper-logs$ time ./norx-best-diff-search-tests.0010
[./tests/norx-best-diff-search-tests.cc:2152] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF
[./tests/norx-best-diff-search-tests.cc:2153] Attack scenario: INIT_N 0 INIT_NK 0 RATE 1 FULL 0
-- g_nrounds =  1
-- g_Bn =   0
-- g_nrounds =  2
-- g_Bn =   0 ... [./tests/norx-best-diff-search-tests.cc:1503] Update bound: 0 -> 0
Trail found!
-- g_nrounds =  3
-- g_Bn =   0 ... No trail found.
-- g_Bn =   1 ... [./tests/norx-best-diff-search-tests.cc:1794] Update bound: 1 -> 1
Trail found!
-- g_nrounds =  4
-- g_Bn =   1 ... No trail found.
-- g_Bn =   2 ... [./tests/norx-best-diff-search-tests.cc:1794] Update bound: 2 -> 2
Trail found!
-- g_nrounds =  5
-- g_Bn =   2 ... No trail found.
-- g_Bn =   3 ... No trail found.
-- g_Bn =   4 ... [./tests/norx-best-diff-search-tests.cc:1794] Update bound: 4 -> 4
Trail found!
-- g_nrounds =  6
-- g_Bn =   4 ... No trail found.
-- g_Bn =   5 ... No trail found.
-- g_Bn =   6 ... No trail found.
-- g_Bn =   7 ... [./tests/norx-best-diff-search-tests.cc:1794] Update bound: 7 -> 7
Trail found!
-- g_nrounds =  7
-- g_Bn =   7 ... No trail found.
-- g_Bn =   8 ... No trail found.
-- g_Bn =   9 ... No trail found.
xxx
-- g_Bn =  10 ... No trail found.
xxx

Time:

real    57122m20.213s = 952 hrs = 40 days
user    57204m21.056s
sys     0m5.104s



 */

/* --- */

#if 1 // WORD_SIZE 32 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000400, 0x80000400, 0x80000000, 0x80000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
//{0x80000000, 0x80000000, 0x80008000, 0x00800000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x80000000, 0x80000400, 0x80000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 1].w =   0
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x80000000, 0x80000000, 0x80000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x80000000, 0x80000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x80000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x80000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   0
{ // R[ 6] abcde
{0x00000000, 0x00100000, 0x80000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00100000, 0x00100000, 0x80000000, 0x00000010, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   1
// T.w =   3
};
#endif // #if 1 // WORD_SIZE 32 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

/* --- */

#if 1 // WORD_SIZE 32 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 8
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x80000000, 0x80000000, 0x80008000, 0x00800000, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x80000000, 0x80008000, 0x00008000, 0x80000000}, // L[3]
}, // T.state[ 1].w =   0
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x80000000, 0x00008000, 0x00000000}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x80000000, 0x80000000, 0x00000000}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x80000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00800000, 0x00800000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   0
{ // R[ 6] abcde
{0x00000000, 0x00001000, 0x00800000, 0x00800000, 0x00001000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 6].w =   1
{ // R[ 7] abcde
{0x00001000, 0x00001000, 0x00800000, 0x10000080, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 7].w =   1
// T.w =   3
};
#endif // #if 1 // WORD_SIZE 32 nrounds 7 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

/* --- */

#if 0 // WORD_SIZE 64 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 5
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
  { // R[ 0] abcde
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, 0x0000000000000000},
  }, // T.state[ 0].w =   0
  { // R[ 1{abcde
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, 0x0000000000000000},
  }, // T.state[ 1].w =   0
  { // R[ 2{abcde
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x0000000000000000, 0x0000000000000000},
  }, // T.state[ 2].w =   1
  { // R[ 3{abcde
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x0000000000000000, 0x8000000000000000},
  }, // T.state[ 3].w =   0
  //         --- Permuted state after round 4: diagonals to columns ---
  { // R[ 4{abcde
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
  }, // T.state[ 4].w =   0
};
#endif // #if 1 // WORD_SIZE 64 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

#if 1 // WORD_SIZE 8 nrounds 8 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 9
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
{ // R[ 0] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000080, 0x00000080, 0x00000090, 0x00000020, 0x00000000}, // L[2]
{0x00000084, 0x00000084, 0x00000000, 0x00000001, 0x00000000}, // L[3]
}, // T.state[ 0].w =   0
{ // R[ 1] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000080, 0x00000090, 0x00000010, 0x00000080}, // L[2]
{0x00000000, 0x00000084, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 1].w =   1
{ // R[ 2] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000010, 0x00000000}, // L[2]
{0x00000000, 0x00000080, 0x00000080, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 2].w =   1
{ // R[ 3] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000080, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000080, 0x00000080, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 3].w =   0
//         --- Permuted state after round 4: diagonals to columns ---
{ // R[ 4] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[3]
}, // T.state[ 4].w =   0
{ // R[ 5] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000000, 0x00000080, 0x00000000, 0x00000080}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 5].w =   0
{ // R[ 6] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000000, 0x00000010, 0x00000080, 0x00000000, 0x00000010}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000080}, // L[3]
}, // T.state[ 6].w =   0
{ // R[ 7] abcde
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
{0x00000010, 0x00000010, 0x00000080, 0x00000080, 0x00000000}, // L[1]
{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000004, 0x00000004}, // L[3]
}, // T.state[ 7].w =   1
//         --- Permuted state after round 8: diagonals to columns ---
{ // R[ 8] abcde
{0x00000000, 0x00000020, 0x00000000, 0x00000004, 0x00000000}, // L[0]
{0x00000010, 0x00000000, 0x00000004, 0x00000000, 0x00000000}, // L[1]
{0x00000000, 0x00000008, 0x00000000, 0x00000080, 0x00000000}, // L[2]
{0x00000080, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
}, // T.state[ 8].w =   1
// T.w =   4
};
#endif // #if 1 // WORD_SIZE 8 nrounds 8 INIT_N 0 INIT_NK 0 RATE 0 FULL 1

/* --- */

#if 0 // DEBUG
						  //						  if((ibit == 63) && (je[THREE] == 1)) 
						  //						  if(ibit == 31) 
						  if((ibit == 31) && (je[THREE] == 0))
						  {
							 if(
								 (g_T.state[0].lane[THREE].a == 0x8000000000000000) && 
								 (g_T.state[0].lane[THREE].b == 0x8000000000000000) && 
								 (g_T.state[0].lane[THREE].c == 0x8000000000000000) &&
								 (g_T.state[0].lane[THREE].e == 0) && 
								 // 
								 (g_T.state[1].lane[THREE].b == 0x8000000000000000) && 
								 (g_T.state[1].lane[THREE].c == 0x8000000000000000) &&
								 // 
								 (g_T.state[2].lane[THREE].a == 0) &&
								 (g_T.state[2].lane[THREE].b == 0) &&
								 (g_T.state[2].lane[THREE].c == 0x8000000000000000) &&
								 (g_T.state[2].lane[THREE].d == 0) &&
								 //								 (g_T.state[2].lane[THREE].e == 0)
								 (g_T.state[2].lane[THREE].e == 0x8000000000000000)
								 ) { 
								printf("[%s:%d] ibit %d iround %d\n", __FILE__, __LINE__, ibit, iround);
								//		  norx_state_print(g_T.state[0]);
								norx_trail_print(g_T, iround);
								printf("[%s:%d] L_zero.w %d\n", __FILE__, __LINE__, L_zero.w);
								printf("[%s:%d] L_one.w %d\n", __FILE__, __LINE__, L_one.w);
								printf("[%s:%d] L_two.w %d\n", __FILE__, __LINE__, L_two.w);
								printf("[%s:%d] L_three.w %d\n", __FILE__, __LINE__, L_three.w);
								assert(1 == 0);
							 }
						  }
#endif // #if 1 // DEBUG


/* --- */

#if 1 // DEBUG
		if((g_T.state[0].lane[THREE].a == 0x8000000000000000) && 
			(g_T.state[0].lane[THREE].b == 0x8000000000000000) && 
			(g_T.state[0].lane[THREE].c == 0x8000000000000000) &&
			(g_T.state[0].lane[THREE].e == 0)) { 
		  printf("[%s:%d]\n", __FILE__, __LINE__);
		  //		  norx_state_print(g_T.state[0]);
		  norx_trail_print(g_T, iround);
		  assert(1 == 0);
		}
#endif



/* --- */

if((ibit == 63) && (a_i == 1) && (b_i == 1) && (je[THREE] == 0))  {
  printf("BEFORE [%s:%d] ja jb je %ld %ld %ld\n", __FILE__, __LINE__, a_i, b_i, je[THREE]);
  norx_state_print(S);
 }

if((ibit == 63) && (a_i == 1) && (b_i == 1) && (je[THREE] == 0))  {
  printf("AFTER [%s:%d] ja jb je %ld %ld %ld\n", __FILE__, __LINE__, a_i, b_i, je[THREE]);
  norx_state_print(S);
 }

/* --- */

/* Prob. 1 differential for NORX64! */

#if 1 // WORD_SIZE 64 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1
#define NORX_TRAIL_LEN 5
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
  { // R[ 0] abcde
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x8000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000, 0x0000000000000000},
	 //	 {0x8000000000000000, 0x8000000000000000, 0x8000008000000000, 0x0000800000000000, 0x0000000000000000},
  }, // T.state[ 0].w =   0
  { // R[ 1{abcde
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x8000000000000000, 0x8000008000000000, 0x0000008000000000, 0x8000000000000000},
  }, // T.state[ 1].w =   0
  { // R[ 2{abcde
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x0000008000000000, 0x0000000000000000},
  }, // T.state[ 2].w =   1
  { // R[ 3{abcde
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x8000000000000000, 0x0000000000000000},
  }, // T.state[ 3].w =   0
  //         --- Permuted state after round 4: diagonals to columns ---
  { // R[ 4{abcde
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
  }, // T.state[ 4].w =   0
  //  { // R[ 4{abcde
  //	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, 0x0000000000000000},
  //	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
  //	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
  //	 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},
  //  }, // T.state[ 4].w =   0
  // T.w =   1
};
#endif // #if 1 // WORD_SIZE 64 nrounds 4 INIT_N 0 INIT_NK 0 RATE 0 FULL 1


/* 

INIT_N 32

[./tests/norx-best-diff-search-tests.cc:3235] Print bounds for first 16 rounds
B[ 0]  0
B[ 1]  1
B[ 2]  2
B[ 3]  6
B[ 4] 12
B[ 5] 25
B[ 6] 44
B[ 7] 73
B[ 8] 111
B[ 9] 160
B[10] 216
B[11] 265
B[12] 319
B[13] 376
B[14] 437
B[15] 496

real    5m40.049s
user    5m40.119s
sys     0m0.016s
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$

 */

/* --- */

/* 
[./tests/norx-best-diff-search-tests.cc:3489] Start Time 1469702110 sec
[./tests/norx-best-diff-search-tests.cc:3491] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF
[./tests/norx-best-diff-search-tests.cc:3492] Rotations: R0  8 R1 11 R2 16 R3 31
[./tests/norx-best-diff-search-tests.cc:3493] Attack scenario: INIT_N 1 INIT_NK 0 RATE 0 FULL 0
-- g_nrounds =  2
-- g_Bn = 100 ... [./tests/norx-best-diff-search-tests.cc:1701] Update bound: 100 -> 1
[./tests/norx-best-diff-search-tests.cc:3076] Restart timer (time limit 3 sec): g_Bn = 1
-- g_nrounds =  3
-- g_Bn = 101 ... [./tests/norx-best-diff-search-tests.cc:2025] Update bound: 101 -> 2
[./tests/norx-best-diff-search-tests.cc:3076] Restart timer (time limit 3 sec): g_Bn = 2
-- g_nrounds =  4
-- g_Bn = 102 ... [./tests/norx-best-diff-search-tests.cc:2025] Update bound: 102 -> 6
[./tests/norx-best-diff-search-tests.cc:3076] Restart timer (time limit 3 sec): g_Bn = 6
-- g_nrounds =  5
-- g_Bn = 106 ... [./tests/norx-best-diff-search-tests.cc:2025] Update bound: 106 -> 12
[./tests/norx-best-diff-search-tests.cc:3076] Restart timer (time limit 3 sec): g_Bn = 12
-- g_nrounds =  6
-- g_Bn = 112 ... [./tests/norx-best-diff-search-tests.cc:2025] Update bound: 112 -> 25
[./tests/norx-best-diff-search-tests.cc:3076] Restart timer (time limit 3 sec): g_Bn = 25
-- g_nrounds =  7
-- g_Bn = 125 ... [./tests/norx-best-diff-search-tests.cc:2025] Update bound: 125 -> 44
[./tests/norx-best-diff-search-tests.cc:3076] Restart timer (time limit 3 sec): g_Bn = 44
-- g_nrounds =  8
-- g_Bn = 144 ... [./tests/norx-best-diff-search-tests.cc:2025] Update bound: 144 -> 73
[./tests/norx-best-diff-search-tests.cc:3076] Restart timer (time limit 3 sec): g_Bn = 73
-- g_nrounds =  9
-- g_Bn = 173 ... [./tests/norx-best-diff-search-tests.cc:2025] Update bound: 173 -> 111
[./tests/norx-best-diff-search-tests.cc:3076] Restart timer (time limit 3 sec): g_Bn = 111
-- g_nrounds = 10
-- g_Bn = 211 ... [./tests/norx-best-diff-search-tests.cc:2025] Update bound: 211 -> 160
[./tests/norx-best-diff-search-tests.cc:3076] Restart timer (time limit 3 sec): g_Bn = 160
-- g_nrounds = 11
-- g_Bn = 260 ... [./tests/norx-best-diff-search-tests.cc:2025] Update bound: 260 -> 216
[./tests/norx-best-diff-search-tests.cc:3076] Restart timer (time limit 3 sec): g_Bn = 216
-- g_nrounds = 12
-- g_Bn = 316 ... [./tests/norx-best-diff-search-tests.cc:2025] Update bound: 316 -> 265
[./tests/norx-best-diff-search-tests.cc:3076] Restart timer (time limit 3 sec): g_Bn = 265

real    0m35.579s
user    0m35.589s
sys     0m0.000s

 */

/* --- */

/*
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ make norx-best-diff-search-tests
g++ -O3 -std=c++11 -Wall -c -I./include/ ./tests/norx-best-diff-search-tests.cc -o ./obj/norx-best-diff-search-tests.o
g++  ./obj/common.o ./obj/norx-best-diff-search-tests.o -o ./bin/norx-best-diff-search-tests -lgsl -lgslcblas -lgmpxx -lgmp
  vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/norx-best-diff-search-tests
  [./tests/norx-best-diff-search-tests.cc:3360] Start Time 1469699354 sec
  [./tests/norx-best-diff-search-tests.cc:3362] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF
  [./tests/norx-best-diff-search-tests.cc:3363] Rotations: R0  8 R1 11 R2 16 R3 31
  [./tests/norx-best-diff-search-tests.cc:3364] Attack scenario: INIT_N 1 INIT_NK 0 RATE 0 FULL 0
-- g_nrounds =  2
  -- g_Bn = 100 ... [./tests/norx-best-diff-search-tests.cc:2404] Update bound: 100 -> 1
  [./tests/norx-best-diff-search-tests.cc:2979] Try  1 /  1 restart timer: g_Bn = 1
-- g_nrounds =  3
  -- g_Bn = 101 ... [./tests/norx-best-diff-search-tests.cc:2819] Update bound: 101 -> 2
  [./tests/norx-best-diff-search-tests.cc:2979] Try  1 /  1 restart timer: g_Bn = 2
-- g_nrounds =  4
  -- g_Bn = 102 ... [./tests/norx-best-diff-search-tests.cc:2819] Update bound: 102 -> 6
  [./tests/norx-best-diff-search-tests.cc:2979] Try  1 /  1 restart timer: g_Bn = 6
-- g_nrounds =  5
  -- g_Bn = 106 ... [./tests/norx-best-diff-search-tests.cc:2819] Update bound: 106 -> 12
  [./tests/norx-best-diff-search-tests.cc:2979] Try  1 /  1 restart timer: g_Bn = 12
-- g_nrounds =  6
  -- g_Bn = 112 ... [./tests/norx-best-diff-search-tests.cc:2819] Update bound: 112 -> 25
  [./tests/norx-best-diff-search-tests.cc:2979] Try  1 /  1 restart timer: g_Bn = 25
-- g_nrounds =  7
  -- g_Bn = 125 ... [./tests/norx-best-diff-search-tests.cc:2819] Update bound: 125 -> 44
  [./tests/norx-best-diff-search-tests.cc:2979] Try  1 /  1 restart timer: g_Bn = 44
-- g_nrounds =  8
  -- g_Bn = 144 ... [./tests/norx-best-diff-search-tests.cc:2819] Update bound: 144 -> 73
  [./tests/norx-best-diff-search-tests.cc:2979] Try  1 /  1 restart timer: g_Bn = 73

real    2m34.327s
user    2m34.372s
sys     0m0.000s
*/

/* --- */

/*
-- g_nrounds =  6
  -- g_Bn = 112 ... [./tests/norx-best-diff-search-tests.cc:2813] Update bound: 112 -> 26
  [./tests/norx-best-diff-search-tests.cc:2973] Try  0 /  5 restart timer: g_Bn = 26
  -- g_Bn =  26 ... [./tests/norx-best-diff-search-tests.cc:2973] Try  1 /  5 restart timer: g_Bn = 26

-- g_nrounds =  6
-- g_Bn = 112 ... [./tests/norx-best-diff-search-tests.cc:2818] Update bound: 112 -> 25
[./tests/norx-best-diff-search-tests.cc:2978] Try  0 /  5 restart timer: g_Bn = 25


*/

/* --- */

/*
 * Get a random number from 0 to (n-1) inclusive.
 */
uint32_t roll_die(uint32_t n)
{
  uint32_t r = random() % n;
  return r;
}

uint32_t gen_random_bit()
{
  uint32_t r = random() % 2;
  return r;
}

uint32_t gen_random_bit_biased()
{
  uint32_t r = random() % 4;
  if(r <= 2) {
	 return 0;
  } else {
	 return 1;
  }
}



/* --- */

/* 

[./tests/norx-best-diff-search-tests.cc:3011] Attack scenario: INIT_N 1 INIT_NK 0 RATE 0 FULL 0
-- g_nrounds =  1
-- g_Bn =   0
-- g_nrounds =  2
-- g_Bn =   0 ... No trail found.
-- g_Bn =   1 ... [./tests/norx-best-diff-search-tests.cc:2283] Update bound: 1 -> 1
Trail found!
-- g_nrounds =  3
-- g_Bn =   1 ... No trail found.
-- g_Bn =   2 ... [./tests/norx-best-diff-search-tests.cc:2574] Update bound: 2 -> 2
Trail found!
-- g_nrounds =  4
-- g_Bn =   2 ... No trail found.
-- g_Bn =   3 ... No trail found.
-- g_Bn =   4 ... No trail found.
-- g_Bn =   5 ... No trail found.
-- g_Bn =   6 ... [./tests/norx-best-diff-search-tests.cc:2574] Update bound: 6 -> 6
Trail found!
-- g_nrounds =  5
-- g_Bn =   6 ... No trail found.
-- g_Bn =   7 ... No trail found.
-- g_Bn =   8 ... No trail found.
-- g_Bn =   9 ... No trail found.
-- g_Bn =  10 ... No trail found.

^C
real    26m14.461s
user    26m13.737s
sys     0m1.032s


 */

/* --- */

/*
 * A swap of the 0-th and 3-rd columns after round 8 in order to fix a
 * bug in storing the state after round 8.
 *
 * WARNING! This is a hack! TODO: to be fixed properly.
 */
void norx_array_state_permute_round8(WORD_T S[16])
{
  WORD_T S_copy[16] = {0};
  for(uint32_t i = 0; i < 16; i++) {
	 S_copy[i] = S[i];
  }

  S[0 ] = S_copy[ 2];
  S[4 ] = S_copy[ 6];
  S[8 ] = S_copy[10];
  S[12] = S_copy[14];

  S[2 ] = S_copy[ 0];
  S[6 ] = S_copy[ 4];
  S[10] = S_copy[ 8];
  S[14] = S_copy[12];
}


/* variant with a hash map */

/*
 * Search for differential trails in NORX (variant with a hash map)
 *
 * \param iround current round
 * \param ibit current bit position
 * \param S internal state input to round \p r
 * \param e the output word from the H operation of this round
 */
bool norx_diff_trail_search(const uint32_t iround, const uint32_t ibit, const norx_diff_state_t S)
{
  //  printf("[%s:%d] Enter %s() g_Bn %d\n", __FILE__, __LINE__, __FUNCTION__, g_Bn);
  /*
	* First round
	*/
  if(iround == 0) {
	 //	 printf("[%s:%d] First iround %d ibit %d g_Bn %d g_T.w %d g_nrounds %d\n", __FILE__, __LINE__, 
	 //			  iround, ibit, g_Bn, g_T.w, g_nrounds);
#if 1 // FIRST ROUND
	 if (ibit == WORD_SIZE) {

		if(!norx_state_is_all_zero(S)) { // skip the all-zero state

		  norx_trail_add_state(&g_T, S, iround); // add state 0 (input state)
		  assert(g_T.w == 0);
		  norx_diff_state_t S_next;
		  norx_state_init(&S_next);
		  norx_compute_next_state(&S_next, S, iround);
		  norx_trail_add_state(&g_T, S_next, iround + 1); // add state 1
		  assert((g_T.w  + g_B[g_nrounds - 2]) <= g_Bn);

#if FIND_ALL_TRAILS
		  //		  norx_diff_trail_search(iround + 1, 0, S); <------------ S_next!!!
		  norx_diff_trail_search(iround + 1, 0, S_next);
#else
		  bool b_state_found = norx_hash_map_diff_state_find(S_next, &g_HMAP[iround + 1]); 
		  if(b_state_found) {
			 //			 printf("[%s:%d] State already added! g_HMAP[%d].size() 2^%4.1f\n", __FILE__, __LINE__, iround + 1, log2(g_HMAP[iround + 1].size()));
			 norx_trail_remove_state(&g_T, S_next, iround + 1); // remove state 1
			 norx_trail_remove_state(&g_T, S, iround); // remove state 0
			 assert(g_T.w == 0);
			 return false;
		  } else {
			 norx_hash_map_diff_state_add(S_next, &g_HMAP[iround + 1]);
			 bool b_found = norx_diff_trail_search(iround + 1, 0, S_next);
			 if(b_found) {
				return true;
			 }
		  }

#endif // #if !FIND_ALL_TRAILS

		  norx_trail_remove_state(&g_T, S_next, iround + 1); // remove state 1
		  norx_trail_remove_state(&g_T, S, iround); // remove state 0
		  assert(g_T.w == 0);
		}

	 } else {
		// counters
		WORD_T ja[NLANES] = {0};
		WORD_T jb[NLANES] = {0};
		WORD_T je[NLANES] = {0};
		WORD_T a_i = 0;
		WORD_T b_i = 0;

		// lane 0
		for (ja[ZERO] = 0; ja[ZERO] < g_NBITS[ZERO][A]; ja[ZERO]++) { // a
		  for (jb[ZERO] = 0; jb[ZERO] < g_NBITS[ZERO][B]; jb[ZERO]++) { // b
			 for (je[ZERO] = 0; je[ZERO] < 2; je[ZERO]++) { // e

				//				WORD_T weight_agg = g_T.w + g_B[g_nrounds - 2];
				norx_diff_lane_t L_zero;
				norx_lane_init(&L_zero);

				a_i = ja[ZERO];
				b_i = jb[ZERO];
				if(g_NBITS[ZERO][A] == 1) { // A word is fixed
				  assert(g_T.state[iround].lane[ZERO].a == 0);
				  WORD_T a = g_T.state[iround].lane[ZERO].a;
				  a_i = (a >> ibit) & 1; 
				}
				if(g_NBITS[ZERO][B] == 1) { // B word is fixed
				  assert(g_T.state[iround].lane[ZERO].b == 0);
				  WORD_T b = g_T.state[iround].lane[ZERO].b;
				  b_i = (b >> ibit) & 1; 
				}

				// norx_lane_assign_bits_xyz(&L_zero, S.lane[ZERO], ja[ZERO], jb[ZERO], je[ZERO], ibit, iround);
				norx_lane_assign_bits_xyz(&L_zero, S.lane[ZERO], a_i, b_i, je[ZERO], ibit, iround);

				// printf("[%s:%d] g_B[g_nrounds - 2] = g_B[%d] = %d\n", __FILE__, __LINE__, g_nrounds - 2, g_B[g_nrounds - 2]);
				//				printf("[%s:%d] %d <= %d\n", __FILE__, __LINE__, g_T.w + L_zero.w + g_B[g_nrounds - 2], g_Bn);
				if((g_T.w + L_zero.w + g_B[g_nrounds - 2]) <= g_Bn) {

				  // lane 1
				  for (ja[ONE] = 0; ja[ONE] < g_NBITS[ONE][A]; ja[ONE]++) { // a
					 for (jb[ONE] = 0; jb[ONE] < g_NBITS[ONE][B]; jb[ONE]++) { // b
						for (je[ONE] = 0; je[ONE] < 2; je[ONE]++) { // e

						  norx_diff_lane_t L_one;
						  norx_lane_init(&L_one);

						  a_i = ja[ONE];
						  b_i = jb[ONE];
						  if(g_NBITS[ONE][A] == 1) { // A word is fixed
							 assert(g_T.state[iround].lane[ONE].a == 0);
							 WORD_T a = g_T.state[iround].lane[ONE].a;
							 a_i = (a >> ibit) & 1; 
						  }
						  if(g_NBITS[ONE][B] == 1) { // B word is fixed
							 assert(g_T.state[iround].lane[ONE].b == 0);
							 WORD_T b = g_T.state[iround].lane[ONE].b;
							 b_i = (b >> ibit) & 1; 
						  }

						  // norx_lane_assign_bits_xyz(&L_one, S.lane[ONE], ja[ONE], jb[ONE], je[ONE], ibit, iround);
						  norx_lane_assign_bits_xyz(&L_one, S.lane[ONE], a_i, b_i, je[ONE], ibit, iround);

						  //						  printf("[%s:%d] %d <= %d\n", __FILE__, __LINE__, g_T.w + L_zero.w + L_one.w + g_B[g_nrounds - 2], g_Bn);
						  if((g_T.w + L_zero.w + L_one.w + g_B[g_nrounds - 2]) <= g_Bn) {

				          // lane 2
							 for (ja[TWO] = 0; ja[TWO] < g_NBITS[TWO][A]; ja[TWO]++) { // a
								for (jb[TWO] = 0; jb[TWO] < g_NBITS[TWO][B]; jb[TWO]++) { // b
								  for (je[TWO] = 0; je[TWO] < 2; je[TWO]++) { // e

									 norx_diff_lane_t L_two;
									 norx_lane_init(&L_two);

									 a_i = ja[TWO];
									 b_i = jb[TWO];
									 if(g_NBITS[TWO][A] == 1) { // A word is fixed
										assert(g_T.state[iround].lane[TWO].a == 0);
										WORD_T a = g_T.state[iround].lane[TWO].a;
										a_i = (a >> ibit) & 1; 
									 }
									 if(g_NBITS[TWO][B] == 1) { // B word is fixed
										assert(g_T.state[iround].lane[TWO].b == 0);
										WORD_T b = g_T.state[iround].lane[TWO].b;
										b_i = (b >> ibit) & 1; 
									 }

									 // norx_lane_assign_bits_xyz(&L_two, S.lane[TWO], ja[TWO], jb[TWO], je[TWO], ibit, iround);
									 norx_lane_assign_bits_xyz(&L_two, S.lane[TWO], a_i, b_i, je[TWO], ibit, iround);

									 //									 printf("[%s:%d] %d <= %d\n", __FILE__, __LINE__, g_T.w + L_zero.w + L_one.w + L_two.w + g_B[g_nrounds - 2], g_Bn);
									 if((g_T.w + L_zero.w + L_one.w + L_two.w + g_B[g_nrounds - 2]) <= g_Bn) {

				                  // lane 3
										for (ja[THREE] = 0; ja[THREE] < g_NBITS[THREE][A]; ja[THREE]++) { // a
										  for (jb[THREE] = 0; jb[THREE] < g_NBITS[THREE][B]; jb[THREE]++) { // b
											 for (je[THREE] = 0; je[THREE] < 2; je[THREE]++) { // e

												norx_diff_lane_t L_three;
												norx_lane_init(&L_three);

												a_i = ja[THREE];
												b_i = jb[THREE];
												if(g_NBITS[THREE][A] == 1) { // A word is fixed
												  assert(g_T.state[iround].lane[THREE].a == 0);
												  WORD_T a = g_T.state[iround].lane[THREE].a;
												  a_i = (a >> ibit) & 1; 
												}
												if(g_NBITS[THREE][B] == 1) { // B word is fixed
												  assert(g_T.state[iround].lane[THREE].b == 0);
												  WORD_T b = g_T.state[iround].lane[THREE].b;
												  b_i = (b >> ibit) & 1; 
												}

												//norx_lane_assign_bits_xyz(&L_three, S.lane[THREE], ja[THREE], jb[THREE], je[THREE], ibit, iround);
												norx_lane_assign_bits_xyz(&L_three, S.lane[THREE], a_i, b_i, je[THREE], ibit, iround);

												//												printf("[%s:%d] %d <= %d\n", __FILE__, __LINE__, g_T.w + L_zero.w + L_one.w + L_two.w + L_three.w + g_B[g_nrounds - 2], g_Bn);
												if((g_T.w + L_zero.w + L_one.w + L_two.w + L_three.w + g_B[g_nrounds - 2]) <= g_Bn) {
												  norx_diff_state_t S_part;
												  norx_state_init(&S_part);
												  S_part.lane[ZERO] = L_zero;
												  S_part.lane[ONE] = L_one;
												  S_part.lane[TWO] = L_two;
												  S_part.lane[THREE] = L_three;
												  S_part.w = L_zero.w + L_one.w + L_two.w + L_three.w;
												  assert(S_part.w < INF);
#if FIND_ALL_TRAILS
												  norx_diff_trail_search(iround, ibit + 1, S_part);
#else
												  bool b_found = norx_diff_trail_search(iround, ibit + 1, S_part);
												  if(b_found) {
													 return true;
												  }
#endif // #if !FIND_ALL_TRAILS
												}
											 }
										  }
										}
									 }
								  }
								}
							 }
						  }
						}
					 }
				  }
				}
			 }
		  }
		}
	 }
#endif // #if 1 // FIRST ROUND
  }

  /*
	* Second round
	*/
  if(iround == 1) {
	 //	 printf("[%s:%d] Second iround %d ibit %d g_Bn %d g_T.w %d g_nrounds %d\n", __FILE__, __LINE__, 
	 //			  iround, ibit, g_Bn, g_T.w, g_nrounds);
#if 1 // SECOND ROUND
	 if (ibit == WORD_SIZE) {

		norx_diff_state_t S_next;
		norx_state_init(&S_next);
		norx_compute_next_state(&S_next, S, iround);
		assert(S_next.w == S.w);

		// Add correction to the first two states
		for(uint32_t ilane = 0; ilane < NLANES; ilane++) {

		  assert(S.lane[ilane].e == S_next.lane[ilane].c);

		  g_T.state[1].lane[ilane].e = S.lane[ilane].e;

		  if(g_NBITS[ilane][C] == 2) { // C word is not fixed
			 g_T.state[0].lane[ilane].c = S.lane[ilane].c;
			 g_T.state[1].lane[ilane].c = S.lane[ilane].c;
		  }

		  if(g_NBITS[ilane][D] == 2) { // D word is not fixed
			 //			 g_T.state[0].lane[ilane].d = (RROT(S.lane[ilane].d, ROTCONST[0]) ^ g_T.state[0].lane[ilane].e) & MASK;
			 g_T.state[0].lane[ilane].d = (LROT(S.lane[ilane].d, ROTCONST[0]) ^ g_T.state[0].lane[ilane].e) & MASK; // <--- bug! RROT must be LROT as we are rotationg backwards
			 g_T.state[1].lane[ilane].d = S.lane[ilane].d;
#if 0 // DEBUG
			 if(g_T.state[0].lane[ilane].e == 0x10) {
				printf("[%s:%d] D_prev %X = (D %X <<< %d) ^ A %X\n", __FILE__, __LINE__, 
						 g_T.state[0].lane[ilane].d, S.lane[ilane].d, ROTCONST[0], g_T.state[0].lane[ilane].e);
			 }
#endif // #if 1 // DEBUG
		  }

#if 1 // DEBUG
		  assert(S_next.lane[ilane].e == 0);
		  assert(g_T.state[iround+1].lane[ilane].e == 0);
#endif // #if 1 // DEBUG
		}

		assert((iround + 1) != 4);
		norx_trail_add_state(&g_T, S_next, iround + 1);

		if(iround == (g_nrounds - 1)) { // last round (g_nrounds == 2)
		  assert(g_nrounds == 2);
		  assert(g_B[g_nrounds - 2] == 0);
		  assert(g_T.w <= g_Bn);
		  if(g_T.w <= g_Bn) {
			 printf("[%s:%d] Update bound: %lld -> %lld\n", __FILE__, __LINE__, (WORD_MAX_T)g_Bn, (WORD_MAX_T)g_T.w);
			 g_Bn = g_T.w;
#if 0 // DEBUG
			 norx_trail_print(g_T, iround + 1);
#endif // #if 1 // DEBUG
			 norx_trail_assert(g_T, iround + 1);
#if !FIND_ALL_TRAILS
			 return true; /* We have a winner! */
#endif // #if !FIND_ALL_TRAILS
		  }
		} else { // not last round
		assert((g_T.w  + g_B[g_nrounds - iround - 2]) <= g_Bn);
#if FIND_ALL_TRAILS
		  norx_diff_trail_search(iround + 1, 0, S_next); // <--- S_next !!!
#else
		  bool b_state_found = norx_hash_map_diff_state_find(S_next, &g_HMAP[iround + 1]); 
		  if(b_state_found) {
			 norx_trail_remove_state(&g_T, S_next, iround + 1); // remove state 1
			 return false;
		  } else {
			 norx_hash_map_diff_state_add(S_next, &g_HMAP[iround + 1]);
			 bool b_found = norx_diff_trail_search(iround + 1, 0, S_next); // <--- S_next !!!
			 if(b_found) {
				return true;
			 }
		  }
#endif // #if !FIND_ALL_TRAILS
		}
		norx_trail_remove_state(&g_T, S_next, iround + 1);

	 } else {
		// counters
		WORD_T jc[NLANES] = {0};
		WORD_T jd[NLANES] = {0};
		WORD_T je[NLANES] = {0};
		WORD_T c_i = 0;
		WORD_T d_i = 0;

		/*
		 * Best weight for the remaining (g_nrounds - iround - 2) rounds
		 */
		uint32_t bound_w = 0; // if last round
		if(iround != (g_nrounds - 1)) { // not last round (g_nrounds != 2)
		  bound_w = g_B[g_nrounds - iround - 2];
		}

		// lane 0
		for (jc[ZERO] = 0; jc[ZERO] < g_NBITS[ZERO][C]; jc[ZERO]++) { // c
		  for (jd[ZERO] = 0; jd[ZERO] < g_NBITS[ZERO][D]; jd[ZERO]++) { // d
			 for (je[ZERO] = 0; je[ZERO] < 2; je[ZERO]++) { // e

				norx_diff_lane_t L_zero;
				norx_lane_init(&L_zero);

				c_i = jc[ZERO];
				d_i = jd[ZERO];
				if(g_NBITS[ZERO][C] == 1) { // C word is fixed
				  WORD_T c = g_T.state[iround].lane[ZERO].c;
				  c_i = (c >> ibit) & 1; 
				}
				if(g_NBITS[ZERO][D] == 1) { // D word is fixed
				  WORD_T d = g_T.state[iround].lane[ZERO].d;
				  d_i = (d >> ibit) & 1; 
				}

				norx_lane_assign_bits_xyz(&L_zero, S.lane[ZERO], c_i, d_i, je[ZERO], ibit, iround);

				assert(L_zero.a == S.lane[ZERO].a);
				assert(L_zero.b == S.lane[ZERO].b);

				if((g_T.w + L_zero.w + bound_w) <= g_Bn) { // <--- g_B[g_nrounds - iround - 1]

		        // lane 1
				  c_i = 0; 
				  d_i = 0;
				  for (jc[ONE] = 0; jc[ONE] < g_NBITS[ONE][C]; jc[ONE]++) { // c
					 for (jd[ONE] = 0; jd[ONE] < g_NBITS[ONE][D]; jd[ONE]++) { // d
						for (je[ONE] = 0; je[ONE] < 2; je[ONE]++) { // e

						  norx_diff_lane_t L_one;
						  norx_lane_init(&L_one);

						  c_i = jc[ONE];
						  d_i = jd[ONE];
						  if(g_NBITS[ONE][C] == 1) { // C word is fixed
							 WORD_T c = g_T.state[iround].lane[ONE].c;
							 c_i = (c >> ibit) & 1; 
						  }
						  if(g_NBITS[ONE][D] == 1) { // D word is fixed
							 WORD_T d = g_T.state[iround].lane[ONE].d;
							 d_i = (d >> ibit) & 1; 
						  }

						  norx_lane_assign_bits_xyz(&L_one, S.lane[ONE], c_i, d_i, je[ONE], ibit, iround);

						  if((g_T.w + L_zero.w + L_one.w + bound_w) <= g_Bn) {

							 // lane 2
							 c_i = 0; 
							 d_i = 0;
							 for (jc[TWO] = 0; jc[TWO] < g_NBITS[TWO][C]; jc[TWO]++) { // c
								for (jd[TWO] = 0; jd[TWO] < g_NBITS[TWO][D]; jd[TWO]++) { // d
								  for (je[TWO] = 0; je[TWO] < 2; je[TWO]++) { // e

									 norx_diff_lane_t L_two;
									 norx_lane_init(&L_two);

									 c_i = jc[TWO];
									 d_i = jd[TWO];
									 if(g_NBITS[TWO][C] == 1) { // C word is fixed
										WORD_T c = g_T.state[iround].lane[TWO].c;
										c_i = (c >> ibit) & 1; 
									 }
									 if(g_NBITS[TWO][D] == 1) { // D word is fixed
										WORD_T d = g_T.state[iround].lane[TWO].d;
										d_i = (d >> ibit) & 1; 
									 }

									 norx_lane_assign_bits_xyz(&L_two, S.lane[TWO], c_i, d_i, je[TWO], ibit, iround);

									 if((g_T.w + L_zero.w + L_one.w + L_two.w + bound_w) <= g_Bn) {

										// lane 2
										c_i = 0; 
										d_i = 0;
										for (jc[THREE] = 0; jc[THREE] < g_NBITS[THREE][C]; jc[THREE]++) { // c
										  for (jd[THREE] = 0; jd[THREE] < g_NBITS[THREE][D]; jd[THREE]++) { // d
											 for (je[THREE] = 0; je[THREE] < 2; je[THREE]++) { // e

												norx_diff_lane_t L_three;
												norx_lane_init(&L_three);

												c_i = jc[THREE];
												d_i = jd[THREE];
												if(g_NBITS[THREE][C] == 1) { // C word is fixed
												  WORD_T c = g_T.state[iround].lane[THREE].c;
												  c_i = (c >> ibit) & 1; 
												}
												if(g_NBITS[THREE][D] == 1) { // D word is fixed
												  WORD_T d = g_T.state[iround].lane[THREE].d;
												  d_i = (d >> ibit) & 1; 
												}

												norx_lane_assign_bits_xyz(&L_three, S.lane[THREE], c_i, d_i, je[THREE], ibit, iround);

												if((g_T.w + L_zero.w + L_one.w + L_two.w + L_three.w + bound_w) <= g_Bn) {
												  norx_diff_state_t S_part;
												  norx_state_init(&S_part);
												  S_part.lane[ZERO] = L_zero;
												  S_part.lane[ONE] = L_one;
												  S_part.lane[TWO] = L_two;
												  S_part.lane[THREE] = L_three;
												  S_part.w = L_zero.w + L_one.w + L_two.w + L_three.w;
#if FIND_ALL_TRAILS
												  norx_diff_trail_search(iround, ibit + 1, S_part);
#else
												  bool b_found = norx_diff_trail_search(iround, ibit + 1, S_part);
												  if(b_found) {
													 return true;
												  }
#endif // #if !FIND_ALL_TRAILS
												}
											 }
										  }
										}
									 }
								  }
								}
							 }
						  }
						}
					 }
				  }
				}
			 }
		  }
		}

	 }
#endif // #if 1 // SECOND ROUND
  }

  /*
	* Intermediate rounds
	*/
  if((iround > 1) && (iround != (g_nrounds - 1))) {
	 //	 printf("[%s:%d] Intermediate iround %d ibit %d g_Bn %d g_T.w %d g_nrounds %d\n", __FILE__, __LINE__, 
	 //			  iround, ibit, g_Bn, g_T.w, g_nrounds);
#if 1 // INTERMEDIATE ROUND
	 if (ibit == WORD_SIZE) {
		norx_diff_state_t S_next;
		norx_state_init(&S_next);
		norx_compute_next_state(&S_next, S, iround);
		if(((iround + 1) % 4) == 0) {
		  // printf("[%s:%d] Permute state: iround %d \n", __FILE__, __LINE__, iround);
		  norx_state_permute(&S_next);
		}
		assert((g_T.w  + g_B[g_nrounds - iround - 2]) <= g_Bn);
		norx_trail_add_state(&g_T, S_next, iround + 1);
#if FIND_ALL_TRAILS
		norx_diff_trail_search(iround + 1, 0, S_next);
#else
		bool b_state_found = norx_hash_map_diff_state_find(S_next, &g_HMAP[iround + 1]); 
		if(b_state_found) {
		  norx_trail_remove_state(&g_T, S_next, iround + 1); // remove state 1
		  return false;
		} else {
		  norx_hash_map_diff_state_add(S_next, &g_HMAP[iround + 1]);
		  bool b_found = norx_diff_trail_search(iround + 1, 0, S_next);
		  if(b_found) {
			 return true;
		  }
		}
#endif // #if !FIND_ALL_TRAILS
		norx_trail_remove_state(&g_T, S_next, iround + 1);
	 } else {
		WORD_T je[NLANES] = {0}; // counter

		for (je[ZERO] = 0; je[ZERO] < 2; je[ZERO]++) { // e
		  norx_diff_lane_t L_zero;
		  norx_lane_init(&L_zero);
		  norx_lane_assign_bit_e(&L_zero, S.lane[ZERO], je[ZERO], ibit, iround);

		  if((g_T.w + L_zero.w + g_B[g_nrounds - iround - 2]) <= g_Bn) { // <-- bug! g_B[g_nrounds - iround - 1] :rounds are counted from 0

			 for (je[ONE] = 0; je[ONE] < 2; je[ONE]++) { // e
				norx_diff_lane_t L_one;
				norx_lane_init(&L_one);
				norx_lane_assign_bit_e(&L_one, S.lane[ONE], je[ONE], ibit, iround);

				if((g_T.w + L_zero.w + L_one.w + g_B[g_nrounds - iround - 2]) <= g_Bn) {

				  for (je[TWO] = 0; je[TWO] < 2; je[TWO]++) { // e
					 norx_diff_lane_t L_two;
					 norx_lane_init(&L_two);
					 norx_lane_assign_bit_e(&L_two, S.lane[TWO], je[TWO], ibit, iround);

					 if((g_T.w + L_zero.w + L_one.w + L_two.w + g_B[g_nrounds - iround - 2]) <= g_Bn) {

						for (je[THREE] = 0; je[THREE] < 2; je[THREE]++) { // e
						  norx_diff_lane_t L_three;
						  norx_lane_init(&L_three);
						  norx_lane_assign_bit_e(&L_three, S.lane[THREE], je[THREE], ibit, iround);

						  if((g_T.w + L_zero.w + L_one.w + L_two.w + L_three.w + g_B[g_nrounds - iround - 2]) <= g_Bn) {
							 norx_diff_state_t S_part;
							 norx_state_init(&S_part);
							 S_part.lane[ZERO] = L_zero;
							 S_part.lane[ONE] = L_one;
							 S_part.lane[TWO] = L_two;
							 S_part.lane[THREE] = L_three;
							 S_part.w = L_zero.w + L_one.w + L_two.w + L_three.w;
#if FIND_ALL_TRAILS
							 norx_diff_trail_search(iround, ibit + 1, S_part);
#else
							 bool b_found = norx_diff_trail_search(iround, ibit + 1, S_part);
							 if(b_found) {
								return true;
							 }
#endif // #if !FIND_ALL_TRAILS
						  }
						}
					 }
				  }
				}
			 }
		  }
		}

	 }
#endif // #if 1 // INTERMEDIATE ROUND
  }

  /*
	* Last round
	*/
  if((iround > 1) && (iround == (g_nrounds - 1))) {
	 //	 printf("[%s:%d] Last iround %d\n", __FILE__, __LINE__, iround);
	 //	 printf("[%s:%d] Last iround %d ibit %d g_Bn %d g_T.w %d g_nrounds %d\n", __FILE__, __LINE__, 
	 //			  iround, ibit, g_Bn, g_T.w, g_nrounds);
#if 1 // LAST ROUND
#if 1 // DEBUG
		for(uint32_t i = 0; i < NLANES; i++) {
		  assert(g_T.state[iround].lane[i].a == S.lane[i].a);
		  assert(g_T.state[iround].lane[i].b == S.lane[i].b);
		  assert(g_T.state[iround].lane[i].c == S.lane[i].c);
		  assert(g_T.state[iround].lane[i].d == S.lane[i].d);
		  assert(g_T.state[iround].lane[i].e == 0);
		}
#endif // #if 1 // DEBUG

	 if (ibit == WORD_SIZE) {

		norx_diff_state_t S_next;
		norx_state_init(&S_next);
		norx_compute_next_state(&S_next, S, iround);
#if 1
		if(((iround + 1) % 4) == 0) {
		  // printf("[%s:%d] Permute state: iround %d \n", __FILE__, __LINE__, iround);
		  norx_state_permute(&S_next);
		}
#endif
		norx_trail_add_state(&g_T, S_next, iround + 1);
		assert(g_T.w <= g_Bn);
		if(g_T.w <= g_Bn) {
		  printf("[%s:%d] Update bound: %lld -> %lld\n", __FILE__, __LINE__, (WORD_MAX_T)g_Bn, (WORD_MAX_T)g_T.w);
		  g_Bn = g_T.w;
#if 0 // DEBUG
		  norx_trail_print(g_T, iround + 1);
#endif // #if 1 // DEBUG
		  norx_trail_assert(g_T, iround + 1);
#if !FIND_ALL_TRAILS
		  return true; /* We have a winner! */
#endif // #if !FIND_ALL_TRAILS
		}
		norx_trail_remove_state(&g_T, S_next, iround + 1);
	 } else {
		WORD_T je[NLANES] = {0}; // counter

		for (je[ZERO] = 0; je[ZERO] < 2; je[ZERO]++) { // e
		  norx_diff_lane_t L_zero;
		  norx_lane_init(&L_zero);
		  norx_lane_assign_bit_e(&L_zero, S.lane[ZERO], je[ZERO], ibit, iround);

		  assert((g_nrounds - iround - 1) == 0);
		  if((g_T.w + L_zero.w) <= g_Bn) {

			 for (je[ONE] = 0; je[ONE] < 2; je[ONE]++) { // e
				norx_diff_lane_t L_one;
				norx_lane_init(&L_one);
				norx_lane_assign_bit_e(&L_one, S.lane[ONE], je[ONE], ibit, iround);

				if((g_T.w + L_zero.w + L_one.w) <= g_Bn) {

				  for (je[TWO] = 0; je[TWO] < 2; je[TWO]++) { // e
					 norx_diff_lane_t L_two;
					 norx_lane_init(&L_two);
					 norx_lane_assign_bit_e(&L_two, S.lane[TWO], je[TWO], ibit, iround);

					 if((g_T.w + L_zero.w + L_one.w + L_two.w) <= g_Bn) {

						for (je[THREE] = 0; je[THREE] < 2; je[THREE]++) { // e
						  norx_diff_lane_t L_three;
						  norx_lane_init(&L_three);
						  norx_lane_assign_bit_e(&L_three, S.lane[THREE], je[THREE], ibit, iround);

						  if((g_T.w + L_zero.w + L_one.w + L_two.w + L_three.w) <= g_Bn) {
							 norx_diff_state_t S_part;
							 norx_state_init(&S_part);
							 S_part.lane[ZERO] = L_zero;
							 S_part.lane[ONE] = L_one;
							 S_part.lane[TWO] = L_two;
							 S_part.lane[THREE] = L_three;
							 S_part.w = L_zero.w + L_one.w + L_two.w + L_three.w;
#if FIND_ALL_TRAILS
							 norx_diff_trail_search(iround, ibit + 1, S_part);
#else
							 bool b_found = norx_diff_trail_search(iround, ibit + 1, S_part);
							 if(b_found) {
								return true;
							 }
#endif // #if !FIND_ALL_TRAILS
						  }
						}
					 }
				  }
				}
			 }
		  }
		}

	 }
#endif // #if 1 // LAST ROUND
  }
  //  printf("[%s:%d] Exit %s() g_Bn %d\n", __FILE__, __LINE__, __FUNCTION__, g_Bn);
  return false;
}


/* --- */
bool norx_hash_map_diff_state_add(const norx_diff_state_t diff_state,
											 boost::unordered_map<norx_diff_state_t, uint32_t, norx_diff_state_hash, norx_diff_state_equal_to>* diff_state_hash_map)
{
  norx_diff_state_t new_diff_state = diff_state;

  boost::unordered_map<norx_diff_state_t, uint32_t, norx_diff_state_hash, norx_diff_state_equal_to>::iterator hash_map_iter = 
	 diff_state_hash_map->find(new_diff_state);

  if(hash_map_iter != diff_state_hash_map->end()) { // already added
	 printf("[%s:%d] Hash %X exists: ", __FILE__, __LINE__, hash_map_iter->second);
	 diff_state_hash_map->erase(hash_map_iter);
	 new_diff_state.w = std::max(diff_state.w, hash_map_iter->first.w);
	 printf("[%s:%d] Update weight %d -> %d\n", __FILE__, __LINE__, diff_state.w, new_diff_state.w);
  }

  // Add new hash value
  norx_diff_state_hash diff_state_hash;  // diff state hash function
  uint32_t new_diff_state_hash_val = diff_state_hash(new_diff_state);
  std::pair<norx_diff_state_t, uint32_t> new_pair (new_diff_state, new_diff_state_hash_val);
  diff_state_hash_map->insert(new_pair);
}

/* --- */

#if 1 // WORD_SIZE 16 nrounds 5 INIT_N 1 INIT_NK 0 RATE 0 FULL 0
#define NORX_TRAIL_LEN 6
WORD_T g_norx_trail[NORX_TRAIL_LEN][NLANES][5] = {
  { // R[ 0] abcde
	 {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
	 {0x00008000, 0x00000000, 0x00000000, 0x00000000, 0x00008000}, // L[1]
	 {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
	 {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
  }, // T.state[ 0].w =   0
  { // R[ 1] abcde
	 {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
	 {0x00008000, 0x00000000, 0x00000000, 0x00000080, 0x00000080}, // L[1]
	 {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
	 {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
  }, // T.state[ 1].w =   0
  { // R[ 2] abcde
	 {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
	 {0x00008000, 0x00001000, 0x00000080, 0x00000080, 0x00009000}, // L[1]
	 {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
	 {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
  }, // T.state[ 2].w =   1
  { // R[ 3] abcde
	 {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[0]
	 {0x00009000, 0x00001000, 0x00000080, 0x00009080, 0x00009000}, // L[1]
	 {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[2]
	 {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, // L[3]
  }, // T.state[ 3].w =   1
  //         --- Permuted state after round 4: diagonals to columns ---
  { // R[ 4] abcde
	 {0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000001}, // L[0]
	 {0x00009000, 0x00000000, 0x00000000, 0x00000000, 0x00009000}, // L[1]
	 {0x00000000, 0x00000000, 0x00000000, 0x00009080, 0x00000000}, // L[2]
	 {0x00000000, 0x00000000, 0x00009000, 0x00000000, 0x00000000}, // L[3]
  }, // T.state[ 4].w =   2
  { // R[ 5] abcde
	 {0x00000001, 0x00000001, 0x00000000, 0x00000100, 0x00000000}, // L[0]
	 {0x00009000, 0x00000000, 0x00000000, 0x00000090, 0x00000000}, // L[1]
	 {0x00000000, 0x00000000, 0x00000000, 0x00008090, 0x00000000}, // L[2]
	 {0x00000000, 0x00000000, 0x00009000, 0x00000000, 0x00000000}, // L[3]
  }, // T.state[ 5].w =   2
  // T.w =   6
};
#endif // #if 1 // WORD_SIZE 16 nrounds 5 INIT_N 1 INIT_NK 0 RATE 0 FULL 0


/* --- */
	 printf("[%s:%d] BEFORE: D %X B %X A %X\n", __FILE__, __LINE__, D, B, A);
    (A) = H(A, B); 
	 printf("[%s:%d] AFTER: A %X\n", __FILE__, __LINE__, A);
	 (D) ^= (A); 
	 D = RROT(D, R0);

/* --- */

void norx_trail_print(const norx_diff_trail_t T, uint32_t nrounds)
{
  //  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);
  uint32_t wtrail = 0;
  for(uint32_t iround = 0; iround < (nrounds + 1); iround++) {
	 uint32_t wstate = 0;
	 if((iround > 0) && ((iround % 4) == 0)) {
		printf("           --- Permuted state after round %d: diagonals to columns ---\n", iround);
	 }
#if 0 // dcbae
	 printf("R[%2d] dcba|e \n", iround);
#else // abcde
	 printf("R[%2d] abcd|e \n", iround);
#endif // #if 0 // dcbae
	 //	 for(int ilane = (NLANES-1); ilane >= 0; ilane--) {
	 for(uint32_t ilane = 0; ilane < NLANES; ilane++) {
#if 0 // dcbae
		printf("L[%d] %-16llX %-16llX %-16llX %-16llX | %-16llX\n", ilane, 
				 (WORD_MAX_T)T.state[iround].lane[ilane].d, (WORD_MAX_T)T.state[iround].lane[ilane].c, 
				 (WORD_MAX_T)T.state[iround].lane[ilane].b, (WORD_MAX_T)T.state[iround].lane[ilane].a,
				 (WORD_MAX_T)T.state[iround].lane[ilane].e);
#else // abcde
		printf("L[%d] %-16llX %-16llX %-16llX %-16llX | %-16llX\n", ilane, 
				 (WORD_MAX_T)T.state[iround].lane[ilane].a, (WORD_MAX_T)T.state[iround].lane[ilane].b, 
				 (WORD_MAX_T)T.state[iround].lane[ilane].c, (WORD_MAX_T)T.state[iround].lane[ilane].d,
				 (WORD_MAX_T)T.state[iround].lane[ilane].e);
#endif // #if 0 // dcbae
		wstate += T.state[iround].lane[ilane].w;
	 }
	 wtrail += wstate;
	 printf(" | %d\n", T.state[iround].w);
	 if(!(wstate == T.state[iround].w)) {
		printf("[%s:%d] iround %d wstate %d T.state[iround].w %d\n", __FILE__, __LINE__, iround, wstate, T.state[iround].w);
	 }
	 assert(wstate == T.state[iround].w);
  }
  printf("T.w = %3d\n", T.w);
  if(!(wtrail == T.w)) {
	 printf("[%s:%d] wtrail %d T.w %d\n", __FILE__, __LINE__, wtrail, T.w);
  }
  assert(wtrail == T.w);
  //  printf("[%s:%d]  Exit %s()\n", __FILE__, __LINE__, __FUNCTION__);
}

/* --- */

void norx_encrypt(const uint32_t nsingle_rounds, WORD_T S[16])
{
  const uint32_t ndouble_rounds = (nsingle_rounds / 2);
  for(uint32_t i = 0; i < ndouble_rounds; i++) {
	 F(S);
  }
  if((nsingle_rounds % 2) == 1) {

  }
}


/* --- */

void norx_array_state_apply_diff(WORD_T XX[16], const WORD_T X[16], const norx_diff_state_t DX)
{
  // column ZERO
  XX[0] = X[0] ^ DX.lane[ZERO].a;
  XX[4] = X[4] ^ DX.lane[ZERO].b;
  XX[8] = X[8] ^ DX.lane[ZERO].c;
  XX[12] = X[12] ^ DX.lane[ZERO].d;

  // column ONE
  XX[1] = X[1] ^ DX.lane[ONE].a;
  XX[5] = X[5] ^ DX.lane[ONE].b;
  XX[9] = X[9] ^ DX.lane[ONE].c;
  XX[13] = X[13] ^ DX.lane[ONE].d;

  // column TWO
  XX[2] = X[2] ^ DX.lane[TWO].a;
  XX[6] = X[6] ^ DX.lane[TWO].b;
  XX[10] = X[10] ^ DX.lane[TWO].c;
  XX[14] = X[14] ^ DX.lane[TWO].d;

  // column THREE
  XX[3] = X[3] ^ DX.lane[THREE].a;
  XX[7] = X[7] ^ DX.lane[THREE].b;
  XX[11] = X[11] ^ DX.lane[THREE].c;
  XX[15] = X[15] ^ DX.lane[THREE].d;
}


/* --- */

// init_N32
WORD_T g_norx_trail[6][4][5] = {
  {
	 {0,                0,                0,                0,                0},
	 {0,                0,                0,                0,                0},
	 {0,                0,                0,                0x80000000,       0x80000000},
	 {0,                0,                0,                0,                0},
  },
  {
	 {0,                0,                0,                0,                0},
	 {0,                0,                0,                0,                0},
	 {0x80,             0,                0,                0x80000000,       0x80},
	 {0,                0,                0,                0,                0},
  },
  {
	 {0,                0,                0,                0,                0},
	 {0,                0,                0,                0,                0},
	 {0x80,             0x80,             0x40000,          0x80000000,       0x80040000},
	 {0,                0,                0,                0,                0},
  },
  {
	 {0,                0,                0,                0,                0},
	 {0,                0,                0,                0,                0},
	 {0x808004,         0x80,             0x40000,          0x80040000,       0x808084},
	 {0,                0,                0,                0,                0},
  },
  {
	 {0,                0x808084,         0,                0,                0},
	 {0x808004          0,                0,                0,                0},
	 {0,                0,                0,                0x80040000,       0x80040000},
	 {0,                0,                0x424042,         0,                0x424042},
  },
  {
	 {0,                0x808084,         0,                0,                0},
	 {0x80800400,       0,                0,                0,                0},
	 {0x4000080,        0,                0,                0x80040000,       0},
	 {0x42404200,       0,                0x424042,         0x424042,         0},
  }
};


/* --- Stripped down version working --- */

bool norx_diff_trail_search(const uint32_t iround, const uint32_t ibit, const norx_diff_state_t S)
{
  //  printf("[%s:%d] Enter %s() g_Bn %d\n", __FILE__, __LINE__, __FUNCTION__, g_Bn);
  //  printf("[%s:%d] S.w = %d\n", __FILE__, __LINE__, S.w);
  /*
	* First round
	*/
  if(iround == 0) {
	 //	 printf("[%s:%d] First iround %d ibit %d g_Bn %d g_T.w %d g_nrounds %d\n", __FILE__, __LINE__, 
	 //			  iround, ibit, g_Bn, g_T.w, g_nrounds);
#if 1 // FIRST ROUND
	 if (ibit == WORD_SIZE) {

		if(!norx_state_is_all_zero(S)) { // skip the all-zero state

		  norx_trail_add_state(&g_T, S, iround); // add state 0 (input state)
		  assert(g_T.w == 0);
		  norx_diff_state_t S_next;
		  norx_state_init(&S_next);
		  norx_compute_next_state(&S_next, S, iround);
		  norx_trail_add_state(&g_T, S_next, iround + 1); // add state 1
		  assert((g_T.w  + g_B[g_nrounds - 2]) <= g_Bn);

		  //		  norx_diff_trail_search(iround + 1, 0, S); <------------ S_next!!!
#if FIND_ALL_TRAILS
		  norx_diff_trail_search(iround + 1, 0, S_next);
#else
		  bool b_found = norx_diff_trail_search(iround + 1, 0, S_next);
		  if(b_found) {
			 return true;
		  }
#endif // #if !FIND_ALL_TRAILS

		  norx_trail_remove_state(&g_T, S_next, iround + 1); // remove state 1
		  norx_trail_remove_state(&g_T, S, iround); // remove state 0
		  assert(g_T.w == 0);
		}

	 } else {
		// lane 0
		for(uint32_t jb_zero = 0; jb_zero < 2; jb_zero++) { // b
		  for(uint32_t je_zero = 0; je_zero < 2; je_zero++) { // e

			 //				WORD_T weight_agg = g_T.w + g_B[g_nrounds - 2];
			 norx_diff_lane_t L_zero;
			 norx_lane_init(&L_zero);
			 norx_lane_assign_bits_xyz(&L_zero, S.lane[ZERO], 0, jb_zero, je_zero, ibit, iround);
#if 0 // DEBUG
			 printf("\n[%s:%d] ibit %d jb_zero %d ie_zero %d\n", __FILE__, __LINE__, ibit, jb_zero, je_zero);
			 norx_lane_print(S.lane[ZERO]);
			 norx_lane_print(L_zero);
#endif // #if 0 // DEBUG
			 // printf("[%s:%d] g_B[g_nrounds - 2] = g_B[%d] = %d\n", __FILE__, __LINE__, g_nrounds - 2, g_B[g_nrounds - 2]);
			 //				printf("[%s:%d] %d <= %d\n", __FILE__, __LINE__, g_T.w + L_zero.w + g_B[g_nrounds - 2], g_Bn);
			 if((g_T.w + L_zero.w + g_B[g_nrounds - 2]) <= g_Bn) {

				// lane 1
				for(uint32_t jb_one = 0; jb_one < 2; jb_one++) { // b
				  for(uint32_t je_one = 0; je_one < 2; je_one++) { // e

					 norx_diff_lane_t L_one;
					 norx_lane_init(&L_one);
					 norx_lane_assign_bits_xyz(&L_one, S.lane[ONE], 0, jb_one, je_one, ibit, iround);

					 //						  printf("[%s:%d] %d <= %d\n", __FILE__, __LINE__, g_T.w + L_zero.w + L_one.w + g_B[g_nrounds - 2], g_Bn);
					 if((g_T.w + L_zero.w + L_one.w + g_B[g_nrounds - 2]) <= g_Bn) {

						// lane 2
						for(uint32_t jb_two = 0; jb_two < 2; jb_two++) { // b
						  for(uint32_t je_two = 0; je_two < 2; je_two++) { // e

							 norx_diff_lane_t L_two;
							 norx_lane_init(&L_two);
							 norx_lane_assign_bits_xyz(&L_two, S.lane[TWO], 0, jb_two, je_two, ibit, iround);

							 //									 printf("[%s:%d] %d <= %d\n", __FILE__, __LINE__, g_T.w + L_zero.w + L_one.w + L_two.w + g_B[g_nrounds - 2], g_Bn);
							 if((g_T.w + L_zero.w + L_one.w + L_two.w + g_B[g_nrounds - 2]) <= g_Bn) {

								// lane 3
								for(uint32_t jb_three = 0; jb_three < 2; jb_three++) { // b
								  for(uint32_t je_three = 0; je_three < 2; je_three++) { // e

									 norx_diff_lane_t L_three;
									 norx_lane_init(&L_three);
									 norx_lane_assign_bits_xyz(&L_three, S.lane[THREE], 0, jb_three, je_three, ibit, iround);

									 //												printf("[%s:%d] %d <= %d\n", __FILE__, __LINE__, g_T.w + L_zero.w + L_one.w + L_two.w + L_three.w + g_B[g_nrounds - 2], g_Bn);
									 if((g_T.w + L_zero.w + L_one.w + L_two.w + L_three.w + g_B[g_nrounds - 2]) <= g_Bn) {
										norx_diff_state_t S_part;
										norx_state_init(&S_part);
										S_part.lane[ZERO] = L_zero;
										S_part.lane[ONE] = L_one;
										S_part.lane[TWO] = L_two;
										S_part.lane[THREE] = L_three;
										S_part.w = L_zero.w + L_one.w + L_two.w + L_three.w;
										assert(S_part.w < INF);
#if 0 // DEBUG
										printf("[%s:%d] ibit %d calling ibit + 1 %d\n", __FILE__, __LINE__, ibit, ibit + 1);
										printf("[%s:%d] S_part\n", __FILE__, __LINE__);
										norx_state_print(S_part);
										printf("[%s:%d] L_parts\n", __FILE__, __LINE__);
										norx_lane_print(L_zero);
										norx_lane_print(L_one);
										norx_lane_print(L_two);
										norx_lane_print(L_three);
#endif // #if 0 // DEBUG
#if FIND_ALL_TRAILS
										norx_diff_trail_search(iround, ibit + 1, S_part);
#else
										bool b_found = norx_diff_trail_search(iround, ibit + 1, S_part);
										if(b_found) {
										  return true;
										}
#endif // #if !FIND_ALL_TRAILS
									 }
								  }
								}
							 }
						  }
						}
					 }
				  }
				}
			 }
		  }
		}
	 }
#endif // #if 1 // FIRST ROUND
  }

  /*
	* Second round
	*/
  if(iround == 1) {
	 //	 printf("[%s:%d] Second iround %d ibit %d g_Bn %d g_T.w %d g_nrounds %d\n", __FILE__, __LINE__, 
	 //			  iround, ibit, g_Bn, g_T.w, g_nrounds);
#if 1 // SECOND ROUND
	 if (ibit == WORD_SIZE) {

		norx_diff_state_t S_next;
		norx_state_init(&S_next);
		norx_compute_next_state(&S_next, S, iround);
		assert(S_next.w == S.w);

		// Add correction to the first two states
		for(uint32_t ilane = 0; ilane < NLANES; ilane++) {

		  assert(S.lane[ilane].e == S_next.lane[ilane].c);

		  g_T.state[1].lane[ilane].e = S.lane[ilane].e;

		  if(g_NBITS[ilane][C] == 2) { // C word is not fixed
			 g_T.state[0].lane[ilane].c = S.lane[ilane].c;
			 g_T.state[1].lane[ilane].c = S.lane[ilane].c;
		  }

		  if(g_NBITS[ilane][D] == 2) { // D word is not fixed
			 g_T.state[0].lane[ilane].d = (RROT(S.lane[ilane].d, ROTCONST[0]) ^ g_T.state[0].lane[ilane].e) & MASK;
			 g_T.state[1].lane[ilane].d = S.lane[ilane].d;
		  }

#if 1 // DEBUG
		  assert(S_next.lane[ilane].e == 0);
		  assert(g_T.state[iround+1].lane[ilane].e == 0);
#endif // #if 1 // DEBUG
		}

		norx_trail_add_state(&g_T, S_next, iround + 1);

		if(iround == (g_nrounds - 1)) { // last round (g_nrounds == 2)
		  assert(g_nrounds == 2);
		  assert(g_B[g_nrounds - 2] == 0);
		  assert(g_T.w <= g_Bn);
		  if(g_T.w <= g_Bn) {
			 printf("[%s:%d] Update bound: %lld -> %lld\n", __FILE__, __LINE__, (WORD_MAX_T)g_Bn, (WORD_MAX_T)g_T.w);
			 g_Bn = g_T.w;
#if 0 // DEBUG
			 norx_trail_print(g_T, iround + 1);
#endif // #if 1 // DEBUG
			 norx_trail_assert(g_T, iround + 1);
#if !FIND_ALL_TRAILS
			 return true; /* We have a winner! */
#endif // #if !FIND_ALL_TRAILS
		  }
		} else { // not last round
		  //		  printf("[%s:%d] iround %d g_T.w = %d\n", __FILE__, __LINE__, iround, g_T.w);
		  assert((g_T.w  + g_B[g_nrounds - iround - 2]) <= g_Bn);
		  //		  assert(0 == 1);
#if FIND_ALL_TRAILS
		  norx_diff_trail_search(iround + 1, 0, S_next); // <--- S_next !!!
#else
		  bool b_found = norx_diff_trail_search(iround + 1, 0, S_next); // <--- S_next !!!
		  if(b_found) {
			 return true;
		  }
#endif // #if !FIND_ALL_TRAILS
		}
		norx_trail_remove_state(&g_T, S_next, iround + 1);

	 } else {

		uint32_t w_bound = 0; // if last round
		if(iround != (g_nrounds - 1)) { // not last round (g_nrounds != 2)
		  w_bound = g_B[g_nrounds - iround - 2];
		  //		  printf("[%s:%d] iround %d w_bound %d = g_B[%d] %d\n", __FILE__, __LINE__, iround, w_bound, g_nrounds - iround - 2, g_B[g_nrounds - iround - 2]);
		}


		// lane 0
		for(uint32_t jc_zero = 0; jc_zero < 2; jc_zero++) { // c
		  for(uint32_t jd_zero = 0; jd_zero < 2; jd_zero++) { // d
			 for(uint32_t je_zero = 0; je_zero < 2; je_zero++) { // e

				norx_diff_lane_t L_zero;
				norx_lane_init(&L_zero);
				norx_lane_assign_bits_xyz(&L_zero, S.lane[ZERO], jc_zero, jd_zero, je_zero, ibit, iround);

#if 0 // DEBUG
				printf("\n[%s:%d] ibit %d jc_zero %d id_zero %d\n", __FILE__, __LINE__, ibit, jc_zero, jd_zero);
				norx_lane_print(S.lane[ZERO]);
				norx_lane_print(L_zero);
#endif // #if 0 // DEBUG

				assert(L_zero.a == S.lane[ZERO].a);
				assert(L_zero.b == S.lane[ZERO].b);

				// if((g_T.w + L_zero.w + g_B[g_nrounds - 2]) <= g_Bn) {
				//				printf("[%s:%d] %d <= %d\n", __FILE__, __LINE__, g_T.w + L_zero.w + g_B[g_nrounds - iround - 1], g_Bn);
				if((g_T.w + L_zero.w + w_bound) <= g_Bn) { // <--- g_B[g_nrounds - iround - 1]

		        // lane 1
				  for(uint32_t jc_one = 0; jc_one < 2; jc_one++) { // c
					 for(uint32_t jd_one = 0; jd_one < 2; jd_one++) { // d
						for(uint32_t je_one = 0; je_one < 2; je_one++) { // e

						  norx_diff_lane_t L_one;
						  norx_lane_init(&L_one);
						  norx_lane_assign_bits_xyz(&L_one, S.lane[ONE], jc_one, jd_one, je_one, ibit, iround);

#if 0 // DEBUG
						  printf("\n[%s:%d] ibit %d jc_one %d id_one %d\n", __FILE__, __LINE__, ibit, jc_one, jd_one);
						  norx_lane_print(S.lane[ONE]);
						  norx_lane_print(L_one);
#endif // #if 0 // DEBUG
						  // if((g_T.w + L_zero.w + L_one.w + g_B[g_nrounds - 2]) <= g_Bn) {
						  //						  printf("[%s:%d] %d <= %d\n", __FILE__, __LINE__, g_T.w + L_zero.w + L_one.w + g_B[g_nrounds - iround - 1], g_Bn);
						  if((g_T.w + L_zero.w + L_one.w + w_bound) <= g_Bn) {

							 // lane 2
							 for(uint32_t jc_two = 0; jc_two < 2; jc_two++) { // c
								for(uint32_t jd_two = 0; jd_two < 2; jd_two++) { // d
								  for(uint32_t je_two = 0; je_two < 2; je_two++) { // e

									 norx_diff_lane_t L_two;
									 norx_lane_init(&L_two);
									 norx_lane_assign_bits_xyz(&L_two, S.lane[TWO], jc_two, jd_two, je_two, ibit, iround);

#if 0 // DEBUG
									 printf("\n[%s:%d] ibit %d jc_two %d id_two %d\n", __FILE__, __LINE__, ibit, jc_two, jd_two);
									 norx_lane_print(S.lane[TWO]);
									 norx_lane_print(L_two);
#endif // #if 0 // DEBUG
									 // if((g_T.w + L_zero.w + L_one.w + L_two.w + g_B[g_nrounds - 2]) <= g_Bn) {
									 //									 printf("[%s:%d] %d <= %d\n", __FILE__, __LINE__, g_T.w + L_zero.w + L_one.w + L_two.w + g_B[g_nrounds - iround - 1], g_Bn);
									 if((g_T.w + L_zero.w + L_one.w + L_two.w + w_bound) <= g_Bn) {

										// lane 2
										for(uint32_t jc_three = 0; jc_three < 2; jc_three++) { // c
										  for(uint32_t jd_three = 0; jd_three < 2; jd_three++) { // d
											 for(uint32_t je_three = 0; je_three < 2; je_three++) { // e

												norx_diff_lane_t L_three;
												norx_lane_init(&L_three);
												norx_lane_assign_bits_xyz(&L_three, S.lane[THREE], jc_three, jd_three, je_three, ibit, iround);
#if 0 // DEBUG
												printf("\n[%s:%d] ibit %d jc_three %d id_three %d\n", __FILE__, __LINE__, ibit, jc_three, jd_three);
												norx_lane_print(S.lane[THREE]);
												norx_lane_print(L_three);
#endif // #if 0 // DEBUG
												if((g_T.w + L_zero.w + L_one.w + L_two.w + L_three.w + w_bound) <= g_Bn) {
												  norx_diff_state_t S_part;
												  norx_state_init(&S_part);
												  S_part.lane[ZERO] = L_zero;
												  S_part.lane[ONE] = L_one;
												  S_part.lane[TWO] = L_two;
												  S_part.lane[THREE] = L_three;
												  S_part.w = L_zero.w + L_one.w + L_two.w + L_three.w;
												  //												  assert(ibit < 3);
#if 0 // DEBUG
												  if(ibit == 3) {
													 printf("[%s:%d] ibit %d calling ibit + 1 %d\n", __FILE__, __LINE__, ibit, ibit + 1);
													 printf("[%s:%d] S_part\n", __FILE__, __LINE__);
													 norx_state_print(S_part);
													 printf("[%s:%d] L_parts\n", __FILE__, __LINE__);
													 norx_lane_print(L_zero);
													 norx_lane_print(L_one);
													 norx_lane_print(L_two);
													 norx_lane_print(L_three);
													 printf("[%s:%d] iround %d g_T.w + S_part.w = %d + %d = %d\n", __FILE__, __LINE__,
															  iround, g_T.w, S_part.w, g_T.w + S_part.w);
													 //													 assert((g_T.w + S_part.w) >= 3);
												  }
#endif // #if 0 // DEBUG
												  //												  assert(0 == 1);
#if FIND_ALL_TRAILS
												  norx_diff_trail_search(iround, ibit + 1, S_part);
#else
												  bool b_found = norx_diff_trail_search(iround, ibit + 1, S_part);
												  if(b_found) {
													 return true;
												  }
#endif // #if !FIND_ALL_TRAILS
												}
											 }
										  }
										}
									 }
								  }
								}
							 }
						  }
						}
					 }
				  }
				}
			 }
		  }
		}

	 }
#endif // #if 1 // SECOND ROUND
  }

  /*
	* Intermediate rounds
	*/
  if((iround > 1) && (iround != (g_nrounds - 1))) {
	 //	 printf("[%s:%d] Intermediate iround %d ibit %d g_Bn %d g_T.w %d g_nrounds %d\n", __FILE__, __LINE__, 
	 //			  iround, ibit, g_Bn, g_T.w, g_nrounds);
#if 1 // INTERMEDIATE ROUND
	 if (ibit == WORD_SIZE) {
		//		printf("[%s:%d] Intermediate iround %d ibit %d g_Bn %d g_T.w %d S.w %d g_nrounds %d\n", __FILE__, __LINE__, 
		//				 iround, ibit, g_Bn, g_T.w, S.w, g_nrounds);
		norx_diff_state_t S_next;
		norx_state_init(&S_next);
		norx_compute_next_state(&S_next, S, iround);
		if(((iround + 1) % 4) == 0) {
		  // printf("[%s:%d] Permute state: iround %d \n", __FILE__, __LINE__, iround);
		  norx_state_permute(&S_next);
		}
		//		printf("[%s:%d] S\n", __FILE__, __LINE__);
		//		norx_state_print(S);
		//		printf("[%s:%d] S_next\n", __FILE__, __LINE__);
		//		norx_state_print(S_next);
		//		assert((g_T.w  + g_B[g_nrounds - 2]) <= g_Bn);
		assert((g_T.w  + g_B[g_nrounds - iround - 2]) <= g_Bn);
		//		printf("[%s:%d] Trail BEFORE: \n", __FILE__, __LINE__);
		//		norx_trail_print(g_T, iround);
		norx_trail_add_state(&g_T, S_next, iround + 1);
		//		printf("[%s:%d] Trail AFTER: \n", __FILE__, __LINE__);
		//		norx_trail_print(g_T, iround + 1);
#if FIND_ALL_TRAILS
		norx_diff_trail_search(iround + 1, 0, S_next);
#else
		bool b_found = norx_diff_trail_search(iround + 1, 0, S_next);
		if(b_found) {
		  return true;
		}
#endif // #if !FIND_ALL_TRAILS
		norx_trail_remove_state(&g_T, S_next, iround + 1);
	 } else {
		WORD_T je[NLANES] = {0}; // counter

		for (je[ZERO] = 0; je[ZERO] < 2; je[ZERO]++) { // e
		  norx_diff_lane_t L_zero;
		  norx_lane_init(&L_zero);
		  norx_lane_assign_bit_e(&L_zero, S.lane[ZERO], je[ZERO], ibit, iround);

		  // if((g_T.w + L_zero.w + g_B[g_nrounds - 2]) <= g_Bn) {
		  if((g_T.w + L_zero.w + g_B[g_nrounds - iround - 2]) <= g_Bn) {

			 for (je[ONE] = 0; je[ONE] < 2; je[ONE]++) { // e
				norx_diff_lane_t L_one;
				norx_lane_init(&L_one);
				norx_lane_assign_bit_e(&L_one, S.lane[ONE], je[ONE], ibit, iround);

				// if((g_T.w + L_zero.w + L_one.w + g_B[g_nrounds - 2]) <= g_Bn) {
				if((g_T.w + L_zero.w + L_one.w + g_B[g_nrounds - iround - 2]) <= g_Bn) {

				  for (je[TWO] = 0; je[TWO] < 2; je[TWO]++) { // e
					 norx_diff_lane_t L_two;
					 norx_lane_init(&L_two);
					 norx_lane_assign_bit_e(&L_two, S.lane[TWO], je[TWO], ibit, iround);

					 // if((g_T.w + L_zero.w + L_one.w + L_two.w + g_B[g_nrounds - 2]) <= g_Bn) {
					 if((g_T.w + L_zero.w + L_one.w + L_two.w + g_B[g_nrounds - iround - 2]) <= g_Bn) {

						for (je[THREE] = 0; je[THREE] < 2; je[THREE]++) { // e
						  norx_diff_lane_t L_three;
						  norx_lane_init(&L_three);
						  norx_lane_assign_bit_e(&L_three, S.lane[THREE], je[THREE], ibit, iround);

						  // if((g_T.w + L_zero.w + L_one.w + L_two.w + L_three.w + g_B[g_nrounds - 2]) <= g_Bn) {
						  if((g_T.w + L_zero.w + L_one.w + L_two.w + L_three.w + g_B[g_nrounds - iround - 2]) <= g_Bn) {
							 norx_diff_state_t S_part;
							 norx_state_init(&S_part);
							 S_part.lane[ZERO] = L_zero;
							 S_part.lane[ONE] = L_one;
							 S_part.lane[TWO] = L_two;
							 S_part.lane[THREE] = L_three;
							 S_part.w = L_zero.w + L_one.w + L_two.w + L_three.w;
#if 0 // DEBUG
							 if(ibit == 3) {
								printf("[%s:%d] ibit %d calling ibit + 1 %d\n", __FILE__, __LINE__, ibit, ibit + 1);
								printf("[%s:%d] S_part\n", __FILE__, __LINE__);
								norx_state_print(S_part);
								printf("[%s:%d] L_parts\n", __FILE__, __LINE__);
								norx_lane_print(L_zero);
								norx_lane_print(L_one);
								norx_lane_print(L_two);
								norx_lane_print(L_three);
							 }
#endif // #if 0 // DEBUG
#if FIND_ALL_TRAILS
							 norx_diff_trail_search(iround, ibit + 1, S_part);
#else
							 bool b_found = norx_diff_trail_search(iround, ibit + 1, S_part);
							 if(b_found) {
								return true;
							 }
#endif // #if !FIND_ALL_TRAILS
						  }
						}
					 }
				  }
				}
			 }
		  }
		}

	 }
#endif // #if 1 // INTERMEDIATE ROUND
  }

  /*
	* Last round
	*/
  if((iround > 1) && (iround == (g_nrounds - 1))) {
	 //	 printf("[%s:%d] Last iround %d\n", __FILE__, __LINE__, iround);
	 //	 printf("[%s:%d] Last iround %d ibit %d g_Bn %d g_T.w %d g_nrounds %d\n", __FILE__, __LINE__, 
	 //			  iround, ibit, g_Bn, g_T.w, g_nrounds);
	 //	 assert(g_T.w >= 2);
#if 1 // LAST ROUND
#if 1 // DEBUG
		for(uint32_t i = 0; i < NLANES; i++) {
		  assert(g_T.state[iround].lane[i].a == S.lane[i].a);
		  assert(g_T.state[iround].lane[i].b == S.lane[i].b);
		  assert(g_T.state[iround].lane[i].c == S.lane[i].c);
		  assert(g_T.state[iround].lane[i].d == S.lane[i].d);
		  assert(g_T.state[iround].lane[i].e == 0);
		}
#endif // #if 1 // DEBUG

		//		printf("[%s:%d] S.w = %d ibit = %d\n", __FILE__, __LINE__, S.w, ibit);

	 if (ibit == WORD_SIZE) {

		//		printf("[%s:%d] S.w = %d\n", __FILE__, __LINE__, S.w);

		//		printf("[%s:%d] Last iround %d ibit %d g_Bn %d g_T.w %d S.w %d g_nrounds %d\n", __FILE__, __LINE__, 
		//				 iround, ibit, g_Bn, g_T.w, S.w, g_nrounds);

		//		norx_state_print(S);

		//		printf("[%s:%d] iround %d g_T.w = %d S.w = %d\n", __FILE__, __LINE__, iround, g_T.w, S.w);

		norx_diff_state_t S_next;
		norx_state_init(&S_next);
		norx_compute_next_state(&S_next, S, iround);
		assert(S_next.w == S.w);

#if 1
		if(((iround + 1) % 4) == 0) {
		  //		  printf("[%s:%d] Permute state: iround %d \n", __FILE__, __LINE__, iround);
		  norx_state_permute(&S_next);
		}
#endif
		norx_trail_add_state(&g_T, S_next, iround + 1);
		assert(g_T.w <= g_Bn);
		if(g_T.w <= g_Bn) {
		  printf("[%s:%d] Update bound: %lld -> %lld\n", __FILE__, __LINE__, (WORD_MAX_T)g_Bn, (WORD_MAX_T)g_T.w);
		  g_Bn = g_T.w;
#if 0 // DEBUG
		  norx_trail_print(g_T, iround + 1);
#endif // #if 1 // DEBUG
		  norx_trail_assert(g_T, iround + 1);
#if !FIND_ALL_TRAILS
		  return true; /* We have a winner! */
#endif // #if !FIND_ALL_TRAILS
		}
		norx_trail_remove_state(&g_T, S_next, iround + 1);
	 } else {
		WORD_T je[NLANES] = {0}; // counter

		for (je[ZERO] = 0; je[ZERO] < 2; je[ZERO]++) { // e
		  norx_diff_lane_t L_zero;
		  norx_lane_init(&L_zero);
		  norx_lane_assign_bit_e(&L_zero, S.lane[ZERO], je[ZERO], ibit, iround);

		  // if((g_T.w + L_zero.w + g_B[g_nrounds - 2]) <= g_Bn) {
		  //		  if((g_T.w + L_zero.w + g_B[g_nrounds - iround - 1]) <= g_Bn) {
		  assert((g_nrounds - iround - 1) == 0);
		  if((g_T.w + L_zero.w) <= g_Bn) {

			 for (je[ONE] = 0; je[ONE] < 2; je[ONE]++) { // e
				norx_diff_lane_t L_one;
				norx_lane_init(&L_one);
				norx_lane_assign_bit_e(&L_one, S.lane[ONE], je[ONE], ibit, iround);

				// if((g_T.w + L_zero.w + L_one.w + g_B[g_nrounds - 2]) <= g_Bn) {
				if((g_T.w + L_zero.w + L_one.w) <= g_Bn) {

				  for (je[TWO] = 0; je[TWO] < 2; je[TWO]++) { // e
					 norx_diff_lane_t L_two;
					 norx_lane_init(&L_two);
					 norx_lane_assign_bit_e(&L_two, S.lane[TWO], je[TWO], ibit, iround);

					 // if((g_T.w + L_zero.w + L_one.w + L_two.w + g_B[g_nrounds - 2]) <= g_Bn) {
					 if((g_T.w + L_zero.w + L_one.w + L_two.w) <= g_Bn) {

						for (je[THREE] = 0; je[THREE] < 2; je[THREE]++) { // e
						  norx_diff_lane_t L_three;
						  norx_lane_init(&L_three);
						  norx_lane_assign_bit_e(&L_three, S.lane[THREE], je[THREE], ibit, iround);

						  // if((g_T.w + L_zero.w + L_one.w + L_two.w + L_three.w + g_B[g_nrounds - 2]) <= g_Bn) {
						  if((g_T.w + L_zero.w + L_one.w + L_two.w + L_three.w) <= g_Bn) {
							 norx_diff_state_t S_part;
							 norx_state_init(&S_part);
							 S_part.lane[ZERO] = L_zero;
							 S_part.lane[ONE] = L_one;
							 S_part.lane[TWO] = L_two;
							 S_part.lane[THREE] = L_three;
							 S_part.w = L_zero.w + L_one.w + L_two.w + L_three.w;
#if 0 // DEBUG
							 if(ibit == 4) {
								printf("[%s:%d] ibit %d calling ibit + 1 %d\n", __FILE__, __LINE__, ibit, ibit + 1);
								printf("[%s:%d] S_part\n", __FILE__, __LINE__);
								norx_state_print(S_part);
								printf("[%s:%d] L_parts\n", __FILE__, __LINE__);
								norx_lane_print(L_zero);
								norx_lane_print(L_one);
								norx_lane_print(L_two);
								norx_lane_print(L_three);
								printf("[%s:%d] iround %d g_T.w + S_part.w = %d + %d = %d\n", __FILE__, __LINE__,
										 iround, g_T.w, S_part.w, g_T.w + S_part.w);
								//													 assert((g_T.w + S_part.w) >= 3);
							 }
#endif // #if 0 // DEBUG
#if FIND_ALL_TRAILS
							 norx_diff_trail_search(iround, ibit + 1, S_part);
#else
							 //							 if(ibit == 4) {
							 //								printf("[%s:%d] S_part.w = %d ibit = %d\n", __FILE__, __LINE__, S_part.w, ibit);
							 //							 }
							 bool b_found = norx_diff_trail_search(iround, ibit + 1, S_part);
							 if(b_found) {
								return true;
							 }
#endif // #if !FIND_ALL_TRAILS
						  }
						}
					 }
				  }
				}
			 }
		  }
		}

	 }
#endif // #if 1 // LAST ROUND
  }
  //  printf("[%s:%d] S.w = %d\n", __FILE__, __LINE__, S.w);
  //  printf("[%s:%d] Exit %s() g_Bn %d\n", __FILE__, __LINE__, __FUNCTION__, g_Bn);
  return false;
}

/* --- */

#if 0
		  WORD_T bb = XORROT(T.state[iround_prev].lane[ilane].b, T.state[iround_prev].lane[ilane].e, ROTCONST[iround_prev % 4]); // bb = (b ^ c) <<< r1,r3
		  assert(T.state[iround].lane[ilane].b == bb);
		  assert(T.state[iround].lane[ilane].c == T.state[iround_prev].lane[ilane].e);
		  assert(T.state[iround].lane[ilane].d == T.state[iround_prev].lane[ilane].d);
		  uint32_t weight = 
			 xdp_h(T.state[iround_prev].lane[ilane].c, T.state[iround_prev].lane[ilane].d, T.state[iround_prev].lane[ilane].e, WORD_SIZE); // c,d->e
		  assert(weight == T.state[iround].lane[ilane].w);
#endif
#if 0 // DEBUG
		  printf("[%s:%d] ilane = %d\n", __FILE__, __LINE__, ilane);
		  printf("[%s:%d] assert T.state[%d].lane[%d].b %d == bb %d\n", __FILE__, __LINE__, 
					iround, (ilane - B) % 4, T.state[iround].lane[(ilane - B) % 4].b, bb);
		  printf("[%s:%d] assert T.state[%d].lane[%d].c %d == T.state[%d].lane[%d].e %d\n", __FILE__, __LINE__, 
					iround, (ilane - C) % 4, T.state[iround].lane[(ilane - C) % 4].c, 
					iround_prev, ilane, T.state[iround_prev].lane[ilane].e);
		  printf("[%s:%d] assert T.state[%d].lane[%d].d %d == T.state[%d].lane[%d].d %d\n", __FILE__, __LINE__, 
					iround, ((ilane - D) % 4), T.state[iround].lane[((ilane - D) % 4)].d, 
					iround_prev, ilane, T.state[iround_prev].lane[ilane].d);
		  assert(T.state[iround].lane[(ilane - B) % 4].b == bb);
		  assert(T.state[iround].lane[(ilane - C) % 4].c == T.state[iround_prev].lane[ilane].e);
		  assert(T.state[iround].lane[(ilane - D) % 4].d == T.state[iround_prev].lane[ilane].d);
#endif // #if 1 // DEBUG

/* --- */

#if 1
		  printf("[%s:%d] Previous state T[%d].state:\n", __FILE__, __LINE__, iround_prev);
		  norx_state_print(T->state[iround_prev]);
		  printf("[%s:%d] R0123 %d %d %d %d\n", __FILE__, __LINE__, R0, R1, R2, R3);
		  printf("[%s:%d] lane i = %d (i + B - C) mod 4 = %d + %d - %dmod 4 = %d\n", __FILE__, __LINE__, i, i, B, C, (i + B - C) % 4);
		  printf("[%s:%d] assert S.lane[%d].b %X == bb %X == T->state[%d].lane[%d].b %X ^ S.lane[%d].c %X\n", __FILE__, __LINE__, 
					i, S.lane[i].b, bb,
					iround_prev, (i + B) % 4, T->state[iround_prev].lane[(i + B) % 4].b, 
					(i + B - C) % 4, S.lane[(i + B - C) % 4].c);
		  printf("[%s:%d] assert T->state[%d].lane[%d].c %d == T->state[%d].lane[%d].e %d\n", __FILE__, __LINE__, 
					iround, i, S.lane[i].c, 
					iround_prev, (i + C) % 4, T->state[iround_prev].lane[i].e);
		  printf("[%s:%d] assert T->state[%d].lane[%d].d %d == T->state[%d].lane[%d].d %d\n", __FILE__, __LINE__, 
					iround, i, S.lane[i].d, 
					iround_prev, (i + D) % 4, T->state[iround_prev].lane[i].d);
#endif // #if 1 // DEBUG


/* --- */

/*
 * Add a new state to the trail.
 *
 * If (iround % 4) == 0, then the state \p S has been permuted
 * according to the NORX permutation (\see norx_state_permute). This
 * is taken into account when adding the new state. 
 *
 * If \p i is the index of word X \in \{A, B, C, D\} from the state
 * *after* the permutation, then the corresponding index of the word
 * *before* the permutation is \p (i + X). 
 *
 * Similarly if \p i is the index *before* the permutation then \p (i
 * - X) is the corresponding index *after* the permutation.
 */
void norx_trail_add_state(norx_diff_trail_t* T, const norx_diff_state_t S, const uint32_t iround)
{
  assert(iround < NROUNDS_MAX);
  //  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);

  WORD_T wstate = 0; // for DEBUG
  for(uint32_t i = 0; i < NLANES; i++) {

#if 1 // DEBUG
	 assert(T->state[iround].lane[i].a == 0);
	 assert(T->state[iround].lane[i].b == 0);
	 assert(T->state[iround].lane[i].c == 0);
	 assert(T->state[iround].lane[i].d == 0);
	 assert(T->state[iround].lane[i].e == 0);
	 assert(S.lane[i].w < INF);
	 assert(S.lane[i].w >= 0);
#endif // #if 1 // DEBUG

	 if(iround == 0) {
		assert(T->state[iround].lane[i].w == 0);
		T->state[iround].lane[i].w = 0; // the initial state has
		wstate += T->state[iround].lane[i].w;
		// probability 1 == weight 0
	 } else {
		if((iround % 4) != 0) { // no permutation
		  assert(T->state[iround].lane[i].w == 0);
		  T->state[iround].lane[i].w = S.lane[i].w;
		  wstate += T->state[iround].lane[i].w;
		} else {
		  assert(T->state[iround].lane[(i + B) % 4].w == 0);
		  T->state[iround].lane[(i + B) % 4].w = S.lane[(i + B) % 4].w;
		  wstate += T->state[iround].lane[(i + B) % 4].w;
		}
	 }

	 T->state[iround].lane[i].a = S.lane[i].a;
	 T->state[iround].lane[i].b = S.lane[i].b;
	 T->state[iround].lane[i].c = S.lane[i].c;
	 T->state[iround].lane[i].d = S.lane[i].d;
	 T->state[iround].lane[i].e = S.lane[i].e;

	 if(iround == 0) {
		continue;
	 }

	 uint32_t iround_prev = (iround - 1);
	 if((iround_prev & 1) == 0) { // prev round is even
		assert((iround % 4) != 0);
		if(iround >= 3) {
		  assert(T->state[iround_prev].lane[i].e == 0); // must be unassigned
		  T->state[iround_prev].lane[i].e = T->state[iround].lane[i].a; // e = aa
		}
#if 1 // DEBUG
		assert(T->state[iround].lane[i].a == T->state[iround_prev].lane[i].e);
		assert(T->state[iround].lane[i].b == T->state[iround_prev].lane[i].b);
		assert(T->state[iround].lane[i].c == T->state[iround_prev].lane[i].c);
		WORD_T dd = XORROT(T->state[iround_prev].lane[i].d, T->state[iround_prev].lane[i].e, ROTCONST[(iround_prev % 4)]); // dd = (d ^ e) <<< r0,r2
		assert(T->state[iround].lane[i].d == dd);
		uint32_t weight = 
		  xdp_h(T->state[iround_prev].lane[i].a, T->state[iround_prev].lane[i].b, T->state[iround_prev].lane[i].e, WORD_SIZE); // a,b->e
		assert(weight == T->state[iround].lane[i].w);
#endif // #if 1 // DEBUG
	 }
	 if((iround_prev & 1) == 1) { // prev round is odd
		assert(T->state[iround].lane[i].a == T->state[iround_prev].lane[i].a);
		if(iround >= 3) {
		  if((iround % 4) != 0) { // no permutation
			 assert(T->state[iround_prev].lane[i].e == 0); // must be unassigned
			 T->state[iround_prev].lane[i].e = T->state[iround].lane[i].c; // e = cc
		  } else { // permutation
			 assert(T->state[iround_prev].lane[(i + B) % 4].e == 0); // must be unassigned
			 T->state[iround_prev].lane[(i + B) % 4].e = S.lane[(i + B - C) % 4].c;
		  }
		}
#if 1 // DEBUG
		if((iround % 4) != 0) { // no permutation
		  WORD_T bb = XORROT(T->state[iround_prev].lane[i].b, T->state[iround_prev].lane[i].e, ROTCONST[iround_prev % 4]); // bb = (b ^ c) <<< r1,r3
		  assert(T->state[iround].lane[i].b == bb);
		  assert(T->state[iround].lane[i].c == T->state[iround_prev].lane[i].e);
		  assert(T->state[iround].lane[i].d == T->state[iround_prev].lane[i].d);
		  uint32_t weight = 
			 xdp_h(T->state[iround_prev].lane[i].c, T->state[iround_prev].lane[i].d, T->state[iround_prev].lane[i].e, WORD_SIZE); // c,d->e
		  assert(weight == T->state[iround].lane[i].w);
		} else { // permutation
		  WORD_T bb = XORROT(T->state[iround_prev].lane[(i + B) % 4].b, S.lane[(i + B - C) % 4].c, ROTCONST[iround_prev % 4]); // bb = (b ^ c) <<< r1,r3
		  //		  WORD_T bb = XORROT(T->state[iround_prev].lane[(i + B) % 4].b, T->state[iround_prev].lane[(i + C) % 4].e, ROTCONST[iround_prev % 4]); // bb = (b ^ c) <<< r1,r3
#if 1
		  printf("[%s:%d] Previous state T[%d].state:\n", __FILE__, __LINE__, iround_prev);
		  norx_state_print(T->state[iround_prev]);
		  //		  printf("[%s:%d] Next state T[%d].state:\n", __FILE__, __LINE__, iround);
		  //		  norx_state_print(T->state[iround]);
		  //		  printf("[%s:%d] Next state iround = %d:\n", __FILE__, __LINE__, iround);
		  //		  norx_state_print(S);
		  printf("[%s:%d] R0123 %d %d %d %d\n", __FILE__, __LINE__, R0, R1, R2, R3);
		  //		  printf("[%s:%d] i = %d T->state[%d].lane[%d].b %d != bb %d\n", __FILE__, __LINE__, i, iround, (i + B) % 4, S.lane[(i + B) % 4].b, bb);
		  //		  printf("[%s:%d] i %d (i + D) %d (i + B) %d (i + C) %d\n", __FILE__, __LINE__, i, (i+D) % 4, (i+B) % 4, (i+C) % 4);
#endif 

#if 1 // DEBUG
		  printf("[%s:%d] lane i = %d (i + B - C) mod 4 = %d + %d - %dmod 4 = %d\n", __FILE__, __LINE__, i, i, B, C, (i + B - C) % 4);
		  //		  printf("[%s:%d] lane i = %d\n", __FILE__, __LINE__, i);
		  printf("[%s:%d] assert S.lane[%d].b %X == bb %X == T->state[%d].lane[%d].b %X ^ S.lane[%d].c %X\n", __FILE__, __LINE__, 
					i, S.lane[i].b, bb,
					iround_prev, (i + B) % 4, T->state[iround_prev].lane[(i + B) % 4].b, 
					(i + B - C) % 4, S.lane[(i + B - C) % 4].c);
		  //					iround_prev, (i + C) % 4, T->state[iround_prev].lane[(i + C) % 4].e);
		  printf("[%s:%d] assert T->state[%d].lane[%d].c %d == T->state[%d].lane[%d].e %d\n", __FILE__, __LINE__, 
					iround, i, S.lane[i].c, 
					iround_prev, (i + C) % 4, T->state[iround_prev].lane[i].e);
		  printf("[%s:%d] assert T->state[%d].lane[%d].d %d == T->state[%d].lane[%d].d %d\n", __FILE__, __LINE__, 
					iround, i, S.lane[i].d, 
					iround_prev, (i + D) % 4, T->state[iround_prev].lane[i].d);
#endif // #if 1 // DEBUG
		  assert(S.lane[i].b == bb);
		  assert(S.lane[(i + B - C) % 4].c == T->state[iround_prev].lane[(i + B) % 4].e);
		  assert(S.lane[i].d == T->state[iround_prev].lane[(i + D) % 4].d);
		  //		  assert(S.lane[(i - B) % 4].b == bb);
		  //		  assert(S.lane[(i - C) % 4].c == T->state[iround_prev].lane[i].e);
		  //		  assert(S.lane[(i - D) % 4].d == T->state[iround_prev].lane[i].d);
		  uint32_t weight = 
			 xdp_h(T->state[iround_prev].lane[(i + B) % 4].c, T->state[iround_prev].lane[(i + B) % 4].d, T->state[iround_prev].lane[(i + B) % 4].e, WORD_SIZE); // c,d->e
		  if(!(weight == T->state[iround].lane[(i + B) % 4].w)) {
			 printf("[%s:%d] weight %d T->[%d][%d].w %d\n", __FILE__, __LINE__, weight, iround, (i + B) % 4, T->state[iround].lane[(i + B) % 4].w);
		  }
		  assert(weight == T->state[iround].lane[(i + B) % 4].w);
		} 
#endif // #if 1 // DEBUG
	 }
  }

  T->state[iround].w = wstate; // state weight
  T->w += wstate; // update trail weight

  if(iround == 0) {
	 assert(T->w == 0);
  } else {
	 assert(wstate == S.w); // DEBUG
  }
  if(!(wstate == T->state[iround].w)) {
	 printf("[%s:%d] wstate %lld S.w %lld\n", __FILE__, __LINE__, (WORD_MAX_T)wstate, (WORD_MAX_T)S.w);
  }
  assert(wstate == T->state[iround].w);
#if 1 // DEBUG
  //  printf("[%s:%d] BEFORE iround %d\n", __FILE__, __LINE__, iround);
  //  norx_trail_print(*T, iround);
  norx_trail_assert(*T, iround);
  //  printf("[%s:%d]  AFTER iround %d\n", __FILE__, __LINE__, iround);
#endif // #if 1 // DEBUG
}

/* --- */

			 //			 T->state[iround_prev].lane[i].e = T->state[iround].lane[i].c; // e = cc
			 //			 T->state[iround_prev].lane[i].e = T->state[iround].lane[(i - C) % 4].c; // e = cc
			 //			 printf("[%s:%d] Set T->state[%d].lane[%d].e %d to T->state[%d].lane[%d].c %d\n", __FILE__, __LINE__,
			 //					  iround_prev, i, T->state[iround_prev].lane[i].e, iround, i, T->state[iround].lane[i].c);
			 //			 T->state[iround_prev].lane[i].e = T->state[iround].lane[i].c; // e = cc
			 //			 printf("[%s:%d] Set T->state[%d].lane[%d].e %d to T->state[%d].lane[%d].c %d\n", __FILE__, __LINE__,
			 //					  iround_prev, i, T->state[iround_prev].lane[i].e, iround, (i - C) % 4, T->state[iround].lane[(i - C) % 4].c);
			 //			 T->state[iround_prev].lane[i].e = T->state[iround].lane[(i - C) % 4].c; // e = cc
			 //			 printf("[%s:%d] Set T->state[%d].lane[%d].e %d to T->state[%d].lane[%d].c %d\n", __FILE__, __LINE__,
			 //					  iround_prev, i, T->state[iround_prev].lane[(i + C) % 4].e, iround, i, T->state[iround].lane[(i - C) % 4].c);
			 //			 T->state[iround_prev].lane[(i + B) % 4].e = T->state[iround].lane[i].c; // e = cc

/* --- */

void norx_trail_add_state(norx_diff_trail_t* T, const norx_diff_state_t S, const uint32_t iround)
{
  assert(iround < NROUNDS_MAX);
  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);

  WORD_T wstate = 0; // for DEBUG
  for(uint32_t i = 0; i < NLANES; i++) {
#if 1 // DEBUG
	 assert(T->state[iround].lane[i].w == 0);
	 assert(T->state[iround].lane[i].a == 0);
	 assert(T->state[iround].lane[i].b == 0);
	 assert(T->state[iround].lane[i].c == 0);
	 assert(T->state[iround].lane[i].d == 0);
	 assert(T->state[iround].lane[i].e == 0);
	 assert(S.lane[i].w < INF);
	 assert(S.lane[i].w >= 0);
#endif // #if 1 // DEBUG

	 if(iround == 0) {
		T->state[iround].lane[i].w = 0; // the initial state has
												  // probability 1 == weight 0
	 } else {
		T->state[iround].lane[i].w = S.lane[i].w;
	 }
	 wstate += T->state[iround].lane[i].w;

	 T->state[iround].lane[i].a = S.lane[i].a;
	 T->state[iround].lane[i].b = S.lane[i].b;
	 T->state[iround].lane[i].c = S.lane[i].c;
	 T->state[iround].lane[i].d = S.lane[i].d;
	 T->state[iround].lane[i].e = S.lane[i].e;

	 if(iround == 0) {
		continue;
	 }

	 uint32_t iround_prev = (iround - 1);
	 if((iround_prev & 1) == 0) { // prev round is even
		assert((iround % 4) != 0);
		if(iround >= 3) {
		  assert(T->state[iround_prev].lane[i].e == 0); // must be unassigned
		  T->state[iround_prev].lane[i].e = T->state[iround].lane[i].a; // e = aa
		}
	 }
	 if((iround_prev & 1) == 1) { // prev round is odd
		assert(T->state[iround].lane[i].a == T->state[iround_prev].lane[i].a);
		if(iround >= 3) {
		  assert(T->state[iround_prev].lane[i].e == 0); // must be unassigned
		  if((iround % 4) != 0) { // no permutation
			 T->state[iround_prev].lane[i].e = T->state[iround].lane[i].c; // e = cc
		  } else { // permutation
			 T->state[iround_prev].lane[i].e = T->state[iround].lane[(i - C) % 4].c; // e = cc
		  }
		}
	 }
  }

  T->state[iround].w = wstate; // state weight
  T->w += wstate; // update trail weight

  if(iround == 0) {
	 assert(T->w == 0);
  } else {
	 assert(wstate == S.w); // DEBUG
  }
  if(!(wstate == T->state[iround].w)) {
	 printf("[%s:%d] wstate %lld S.w %lld\n", __FILE__, __LINE__, (WORD_MAX_T)wstate, (WORD_MAX_T)S.w);
  }
  assert(wstate == T->state[iround].w);
#if 1 // DEBUG
  printf("[%s:%d] BEFORE iround %d\n", __FILE__, __LINE__, iround);
  norx_trail_print(*T, iround);
  norx_trail_assert(*T, iround);
  printf("[%s:%d]  AFTER iround %d\n", __FILE__, __LINE__, iround);
#endif // #if 1 // DEBUG
}

/* --- */

void norx_trail_add_state(norx_diff_trail_t* T, const norx_diff_state_t S, const uint32_t iround)
{
  assert(iround < NROUNDS_MAX);
  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);

  WORD_T wstate = 0; // for DEBUG
  for(uint32_t i = 0; i < NLANES; i++) {
#if 1 // DEBUG
	 assert(T->state[iround].lane[i].w == 0);
	 assert(T->state[iround].lane[i].a == 0);
	 assert(T->state[iround].lane[i].b == 0);
	 assert(T->state[iround].lane[i].c == 0);
	 assert(T->state[iround].lane[i].d == 0);
	 assert(T->state[iround].lane[i].e == 0);
	 assert(S.lane[i].w < INF);
	 assert(S.lane[i].w >= 0);
#endif // #if 1 // DEBUG

	 if(iround == 0) {
		T->state[iround].lane[i].w = 0; // the initial state has
												  // probability 1 == weight 0
	 } else {
		T->state[iround].lane[i].w = S.lane[i].w;
	 }
	 wstate += T->state[iround].lane[i].w;

	 T->state[iround].lane[i].a = S.lane[i].a;
	 T->state[iround].lane[i].b = S.lane[i].b;
	 T->state[iround].lane[i].c = S.lane[i].c;
	 T->state[iround].lane[i].d = S.lane[i].d;
	 T->state[iround].lane[i].e = S.lane[i].e;

	 if(iround == 0) {
		continue;
	 }

	 uint32_t iround_prev = (iround - 1);
	 if((iround_prev & 1) == 0) { // prev round is even
		assert((iround % 4) != 0);
		if(iround >= 3) {
		  assert(T->state[iround_prev].lane[i].e == 0); // must be unassigned
		  T->state[iround_prev].lane[i].e = T->state[iround].lane[i].a; // e = aa
		}
#if 1 // DEBUG
		assert(T->state[iround].lane[i].a == T->state[iround_prev].lane[i].e);
		assert(T->state[iround].lane[i].b == T->state[iround_prev].lane[i].b);
		assert(T->state[iround].lane[i].c == T->state[iround_prev].lane[i].c);
		WORD_T dd = XORROT(T->state[iround_prev].lane[i].d, T->state[iround_prev].lane[i].e, ROTCONST[(iround_prev % 4)]); // dd = (d ^ e) <<< r0,r2
		assert(T->state[iround].lane[i].d == dd);
		uint32_t weight = 
		  xdp_h(T->state[iround_prev].lane[i].a, T->state[iround_prev].lane[i].b, T->state[iround_prev].lane[i].e, WORD_SIZE); // a,b->e
		assert(weight == T->state[iround].lane[i].w);
#endif // #if 1 // DEBUG
	 }
	 if((iround_prev & 1) == 1) { // prev round is odd
		assert(T->state[iround].lane[i].a == T->state[iround_prev].lane[i].a);
		if(iround >= 3) {
		  assert(T->state[iround_prev].lane[i].e == 0); // must be unassigned
		  if((iround % 4) != 0) { // no permutation
			 T->state[iround_prev].lane[i].e = T->state[iround].lane[i].c; // e = cc
		  } else { // permutation
			 T->state[iround_prev].lane[i].e = T->state[iround].lane[(i - C) % 4].c; // e = cc
		  }
		}
#if 1 // DEBUG
		WORD_T bb = XORROT(T->state[iround_prev].lane[i].b, T->state[iround_prev].lane[i].e, ROTCONST[iround_prev % 4]); // bb = (b ^ c) <<< r1,r3
		if((iround % 4) != 0) { // no permutation
		  assert(T->state[iround].lane[i].b == bb);
		  assert(T->state[iround].lane[i].c == T->state[iround_prev].lane[i].e);
		  assert(T->state[iround].lane[i].d == T->state[iround_prev].lane[i].d);
		} else { // permutation

#if 1
		  printf("[%s:%d] Previous state T[%d].state:\n", __FILE__, __LINE__, iround_prev);
		  norx_state_print(T->state[iround_prev]);
		  printf("[%s:%d] Next state T[%d].state:\n", __FILE__, __LINE__, iround);
		  norx_state_print(T->state[iround]);
		  printf("[%s:%d] Next state iround = %d:\n", __FILE__, __LINE__, iround);
		  norx_state_print(S);

		  printf("[%s:%d] R0123 %d %d %d %d\n", __FILE__, __LINE__, R0, R1, R2, R3);
		  printf("[%s:%d] i = %d T->state[%d].lane[%d].b %d != bb %d\n", __FILE__, __LINE__, i, iround, (i + B) % 4, T->state[iround].lane[(i + B) % 4].b, bb);
		  printf("[%s:%d] i %d (i + D) %d (i + B) %d (i + C) %d\n", __FILE__, __LINE__, i, (i+D) % 4, (i+B) % 4, (i+C) % 4);
#endif 

#if 1 // DEBUG
		  printf("[%s:%d] i = %d\n", __FILE__, __LINE__, i);
		  printf("[%s:%d] assert T->state[%d].lane[%d].b %d == bb %d\n", __FILE__, __LINE__, 
					iround, (i - B) % 4, T->state[iround].lane[(i - B) % 4].b, bb);
		  printf("[%s:%d] assert T->state[%d].lane[%d].c %d == T->state[%d].lane[%d].e %d\n", __FILE__, __LINE__, 
					iround, (i - C) % 4, T->state[iround].lane[(i - C) % 4].c, 
					iround_prev, i, T->state[iround_prev].lane[i].e);
		  printf("[%s:%d] assert T->state[%d].lane[%d].d %d == T->state[%d].lane[%d].d %d\n", __FILE__, __LINE__, 
					iround, (i - D) % 4, T->state[iround].lane[(i - D) % 4].d, 
					iround_prev, i, T->state[iround_prev].lane[i].d);
#endif // #if 1 // DEBUG
		  assert(T->state[iround].lane[(i - B) % 4].b == bb);
		  assert(T->state[iround].lane[(i - C) % 4].c == T->state[iround_prev].lane[i].e);
		  assert(T->state[iround].lane[(i - D) % 4].d == T->state[iround_prev].lane[i].d);
		}		  
		uint32_t weight = 
		  xdp_h(T->state[iround_prev].lane[i].c, T->state[iround_prev].lane[i].d, T->state[iround_prev].lane[i].e, WORD_SIZE); // c,d->e
		assert(weight == T->state[iround ].lane[i].w);
#endif // #if 1 // DEBUG
	 }
  }

  T->state[iround].w = wstate; // state weight
  T->w += wstate; // update trail weight

  if(iround == 0) {
	 assert(T->w == 0);
  } else {
	 assert(wstate == S.w); // DEBUG
  }
  if(!(wstate == T->state[iround].w)) {
	 printf("[%s:%d] wstate %lld S.w %lld\n", __FILE__, __LINE__, (WORD_MAX_T)wstate, (WORD_MAX_T)S.w);
  }
  assert(wstate == T->state[iround].w);
#if 1 // DEBUG
  printf("[%s:%d] BEFORE iround %d\n", __FILE__, __LINE__, iround);
  norx_trail_print(*T, iround);
  norx_trail_assert(*T, iround);
  printf("[%s:%d]  AFTER iround %d\n", __FILE__, __LINE__, iround);
#endif // #if 1 // DEBUG
}

/* --- */

/**
 * Convert diagonals to columns.
 */
void norx_state_permute(norx_diff_state_t* S)
{
  WORD_T x[16] = {0};

  // extract columns
  x[0 ] = S->lane[ZERO].a;
  x[4 ] = S->lane[ZERO].b;
  x[8 ] = S->lane[ZERO].c;
  x[12] = S->lane[ZERO].d;

  x[1 ] = S->lane[ONE].a;
  x[5 ] = S->lane[ONE].b;
  x[9 ] = S->lane[ONE].c;
  x[13] = S->lane[ONE].d;

  x[2 ] = S->lane[TWO].a;
  x[6 ] = S->lane[TWO].b;
  x[10] = S->lane[TWO].c;
  x[14] = S->lane[TWO].d;

  x[3 ] = S->lane[THREE].a;
  x[7 ] = S->lane[THREE].b;
  x[11] = S->lane[THREE].c;
  x[15] = S->lane[THREE].d;

  // transform diagonals to columns
  S->lane[ZERO].a = x[ 0];
  S->lane[ZERO].b = x[ 5];
  S->lane[ZERO].c = x[10];
  S->lane[ZERO].d = x[15];

  S->lane[ONE].a = x[ 1];
  S->lane[ONE].b = x[ 6];
  S->lane[ONE].c = x[11];
  S->lane[ONE].d = x[12];

  S->lane[TWO].a = x[ 2];
  S->lane[TWO].b = x[ 7];
  S->lane[TWO].c = x[ 8];
  S->lane[TWO].d = x[13];

  S->lane[THREE].a = x[ 3];
  S->lane[THREE].b = x[ 4];
  S->lane[THREE].c = x[ 9];
  S->lane[THREE].d = x[14];

}

/**
 * Convert columns to diagonals.
 */
void norx_state_permute_invert(norx_diff_state_t* S)
{
  WORD_T x[16] = {0};

  // extract diagonals
  x[ 0] = S->lane[ZERO].a;
  x[ 5] = S->lane[ZERO].b;
  x[10] = S->lane[ZERO].c;
  x[15] = S->lane[ZERO].d;

  x[ 1] = S->lane[ONE].a;
  x[ 6] = S->lane[ONE].b;
  x[11] = S->lane[ONE].c;
  x[12] = S->lane[ONE].d;

  x[ 2] = S->lane[TWO].a;
  x[ 7] = S->lane[TWO].b;
  x[ 8] = S->lane[TWO].c;
  x[13] = S->lane[TWO].d;

  x[ 3] = S->lane[THREE].a;
  x[ 4] = S->lane[THREE].b;
  x[ 9] = S->lane[THREE].c;
  x[14] = S->lane[THREE].d;


  // transform columns to diagonals
  S->lane[ZERO].a = x[0 ];
  S->lane[ZERO].b = x[4 ];
  S->lane[ZERO].c = x[8 ];
  S->lane[ZERO].d = x[12];

  S->lane[ONE].a = x[1 ];
  S->lane[ONE].b = x[5 ];
  S->lane[ONE].c = x[9 ];
  S->lane[ONE].d = x[13];

  S->lane[TWO].a = x[2 ];
  S->lane[TWO].b = x[6 ];
  S->lane[TWO].c = x[10];
  S->lane[TWO].d = x[14];

  S->lane[THREE].a = x[3 ];
  S->lane[THREE].b = x[7 ];
  S->lane[THREE].c = x[11];
  S->lane[THREE].d = x[15];
}

/* --- */
void norx_trail_add_state(norx_diff_trail_t* T, const norx_diff_state_t S, const uint32_t iround)
{
  assert(iround < NROUNDS_MAX);
  norx_diff_state_t S_trans = S;
  if((iround > 1) && (iround != (g_nrounds - 1)) && ((iround % 4) == 0)) {
	 norx_state_col_to_dia(&S_trans); // invert the permutation for debugging
  }

  WORD_T wstate = 0; // for DEBUG
  for(uint32_t i = 0; i < NLANES; i++) {
#if 1 // DEBUG
	 assert(T->state[iround].lane[i].w == 0);
	 assert(T->state[iround].lane[i].a == 0);
	 assert(T->state[iround].lane[i].b == 0);
	 assert(T->state[iround].lane[i].c == 0);
	 assert(T->state[iround].lane[i].d == 0);
	 assert(T->state[iround].lane[i].e == 0);
	 assert(S.lane[i].w < INF);
	 assert(S.lane[i].w >= 0);
#endif // #if 1 // DEBUG

	 if(iround == 0) {
		T->state[iround].lane[i].w = 0; // the initial state has
												  // probability 1 == weight 0
	 } else {
		T->state[iround].lane[i].w = S.lane[i].w;
	 }
	 wstate += T->state[iround].lane[i].w;

	 T->state[iround].lane[i].a = S.lane[i].a;
	 T->state[iround].lane[i].b = S.lane[i].b;
	 T->state[iround].lane[i].c = S.lane[i].c;
	 T->state[iround].lane[i].d = S.lane[i].d;
	 T->state[iround].lane[i].e = S.lane[i].e;

	 if(iround == 0) {
		continue;
	 }

	 uint32_t iround_prev = (iround - 1);
	 if((iround_prev & 1) == 0) { // prev round is even
		if(iround >= 3) {
		  assert(T->state[iround_prev].lane[i].e == 0); // must be unassigned
		  //		  T->state[iround_prev].lane[i].e = S.lane[i].a; // e = aa
		  T->state[iround_prev].lane[i].e = S_trans.lane[i].a; // e = aa
		}
#if 1 // DEBUG
		assert(S_trans.lane[i].a == T->state[iround_prev].lane[i].e);
		assert(S_trans.lane[i].b == T->state[iround_prev].lane[i].b);
		assert(S_trans.lane[i].c == T->state[iround_prev].lane[i].c);
		WORD_T dd = XORROT(T->state[iround_prev].lane[i].d, T->state[iround_prev].lane[i].e, ROTCONST[(iround_prev % 4)]); // dd = (d ^ e) <<< r0,r2
		assert(S_trans.lane[i].d == dd);
		uint32_t weight = 
		  xdp_h(T->state[iround_prev].lane[i].a, T->state[iround_prev].lane[i].b, T->state[iround_prev].lane[i].e, WORD_SIZE); // a,b->e
		assert(weight == S_trans.lane[i].w);
#endif // #if 1 // DEBUG
	 }
	 if((iround_prev & 1) == 1) { // prev round is odd
		assert(S.lane[i].a == T->state[iround_prev].lane[i].a);
		if(iround >= 3) {
		  assert(T->state[iround_prev].lane[i].e == 0); // must be unassigned
		  //		  T->state[iround_prev].lane[i].e = S.lane[i].c; // e = cc
		  T->state[iround_prev].lane[i].e = S_trans.lane[i].c; // e = cc
		}
#if 1 // DEBUG
		WORD_T bb = XORROT(T->state[iround_prev].lane[i].b, T->state[iround_prev].lane[i].e, ROTCONST[iround_prev % 4]); // bb = (b ^ c) <<< r1,r3
		assert(S_trans.lane[i].b == bb);
		assert(S_trans.lane[i].c == T->state[iround_prev].lane[i].e);
		assert(S_trans.lane[i].d == T->state[iround_prev].lane[i].d);
		uint32_t weight = 
		  xdp_h(T->state[iround_prev].lane[i].c, T->state[iround_prev].lane[i].d, T->state[iround_prev].lane[i].e, WORD_SIZE); // c,d->e
		assert(weight == S_trans.lane[i].w);
#endif // #if 1 // DEBUG
	 }
  }

  T->state[iround].w = wstate; // state weight
  T->w += wstate; // update trail weight

  if(iround == 0) {
	 assert(T->w == 0);
  } else {
	 assert(wstate == S.w); // DEBUG
  }
  if(!(wstate == T->state[iround].w)) {
	 printf("[%s:%d] wstate %lld S.w %lld\n", __FILE__, __LINE__, (WORD_MAX_T)wstate, (WORD_MAX_T)S.w);
  }
  assert(wstate == T->state[iround].w);
}

/* --- */
		if(!((g_T.w  + g_B[g_nrounds - 2]) <= g_Bn)) {
		  printf("[%s:%d] (g_T.w  + g_B[g_nrounds - 2]) %d g_Bn %d\n", __FILE__, __LINE__, g_T.w, g_Bn);
		}

/* --- */

		  printf("[%s:%d] AFTER 2: S_next.w %d g_T.w %d\n", __FILE__, __LINE__, S_next.w, g_T.w);

		  if(!((g_T.w  + g_B[g_nrounds - 2]) <= g_Bn)) {
			 printf("[%s:%d] (g_T.w  + g_B[%d]) = %d + %d = %d != g_Bn %d\n", __FILE__, __LINE__, (g_nrounds - 2), g_T.w, g_B[g_nrounds - 2], (g_T.w  + g_B[g_nrounds - 2]), g_Bn);
		  }

/* --- */

#if 1 // DEBUG
		norx_trail_print(g_T, iround);
		norx_state_print(S);
#endif // #if 1 // DEBUG

#if 1 // DEBUG
		for(uint32_t i = 0; i < NLANES; i++) {
		  if(!(g_T.state[iround].lane[i].a == S.lane[i].a)) {
			 printf("[%s:%d] lane %d T.a =  %X S.a = %X\n", __FILE__, __LINE__, i, g_T.state[iround].lane[i].a, S.lane[i].a);
		  }
		  norx_diff_lane_t L = S.lane[i];
		  printf("%llX %llX %llX %llX | %llX %lld (dcba|ew)\n", 
					(WORD_MAX_T)L.d, (WORD_MAX_T)L.c, (WORD_MAX_T)L.b, (WORD_MAX_T)L.a, (WORD_MAX_T)L.e, (WORD_MAX_T)L.w);

		  norx_diff_lane_t LL = g_T.state[iround].lane[i];
		  printf("%llX %llX %llX %llX | %llX %lld (dcba|ew)\n", 
					(WORD_MAX_T)LL.d, (WORD_MAX_T)LL.c, (WORD_MAX_T)LL.b, (WORD_MAX_T)LL.a, (WORD_MAX_T)LL.e, (WORD_MAX_T)LL.w);

		  assert(g_T.state[iround].lane[i].a == S.lane[i].a);
		  //		  assert(g_T.state[iround].lane[i].b == S.lane[i].b);
		  //		  assert(g_T.state[iround].lane[i].c == S.lane[i].c);
		  //		  assert(g_T.state[iround].lane[i].d == S.lane[i].d);
		}
#endif // #if 1 // DEBUG

/* --- */

#if 0
#if(WORD_SIZE <= 32)
	const WORD_T mask = (0xffffffffUL >> (32 - (word_size - 1)));
#else // #if(WORD_SIZE <= 32)
	const WORD_T mask = (0xffffffffffffffffULL >> (64 - (word_size - 1)));
#endif // #if(WORD_SIZE <= 32)
#endif


/* --- */

  //  bool b_prob_iszero = (zero_cond != 0);
  printf("[%s:%d] zero_cond %X %d: %X %X -> %X\n", __FILE__, __LINE__, zero_cond, zero_cond, da, db, dc);
  //  printf("[%s:%d] b_prob_iszero %d\n", __FILE__, __LINE__, b_prob_iszero);
  //  if(b_prob_iszero) {

/* --- */


	 for(uint32_t ilane = 0; ilane < NLANES; ilane++) {
		assert(S.lane[ilane].a == g_T.state[1].lane[ilane].a);
		assert(S.lane[ilane].b == g_T.state[1].lane[ilane].b);
	 }


/* --- */

		printf("[%s:%d] S\n", __FILE__, __LINE__);
		norx_state_print(S);
		printf("[%s:%d] T[0]\n", __FILE__, __LINE__);
		norx_state_print(g_T.state[iround-1]);
		printf("[%s:%d] T[1]\n", __FILE__, __LINE__);
		norx_state_print(g_T.state[iround]);
		//		norx_state_print(S_next);

/* --- */

		  // --- check 0 vs 1
		  assert(g_T.state[1].lane[ilane].a == g_T.state[0].lane[ilane].e);
		  assert(g_T.state[1].lane[ilane].b == g_T.state[0].lane[ilane].b);
		  assert(g_T.state[1].lane[ilane].c == g_T.state[0].lane[ilane].c);
		  WORD_T dd = XORROT(g_T.state[0].lane[ilane].d, g_T.state[0].lane[ilane].e, ROTCONST[(0 % 4)]); // dd = (d ^ e) <<< r0,r2
		  assert(g_T.state[1].lane[ilane].d == dd);
		  uint32_t weight = 
			 xdp_h(g_T.state[0].lane[ilane].a, g_T.state[0].lane[ilane].b, g_T.state[0].lane[ilane].e, WORD_SIZE); // a,b->e
		  assert(weight == g_T.state[1].lane[ilane].w);

		  // --- check 1 vs 2
		  assert(g_T.state[0].lane[ilane].e == g_T.state[1].lane[ilane].a);

		  assert(S_next.lane[ilane].a == S.lane[ilane].a);

		  assert(S.lane[ilane].a == g_T.state[1].lane[ilane].a);
		  assert(S.lane[ilane].b == g_T.state[1].lane[ilane].b);
		  assert(S.lane[ilane].e == g_T.state[1].lane[ilane].e);
		  assert(S_next.lane[ilane].d == S.lane[ilane].d);
		  assert(S.lane[ilane].d == g_T.state[1].lane[ilane].d);

		  assert(S_next.lane[ilane].a == g_T.state[1].lane[ilane].a);
		  WORD_T bb = XORROT(g_T.state[1].lane[ilane].b, g_T.state[1].lane[ilane].e, ROTCONST[1 % 4]); // bb = (b ^ c) <<< r1,r3
		  assert(S_next.lane[ilane].b == bb);
		  assert(S_next.lane[ilane].c == g_T.state[1].lane[ilane].e);
		  assert(S_next.lane[ilane].d == g_T.state[1].lane[ilane].d);
		  weight = 
			 xdp_h(g_T.state[1].lane[ilane].c, g_T.state[1].lane[ilane].d, g_T.state[1].lane[ilane].e, WORD_SIZE); // c,d->e
		  uint32_t weight_tmp = 
			 xdp_h(S.lane[ilane].c, S.lane[ilane].d, S.lane[ilane].e, WORD_SIZE); // c,d->e
		  assert(weight == weight_tmp);
		  assert(S.lane[ilane].c == g_T.state[1].lane[ilane].c);
		  assert(S.lane[ilane].d == g_T.state[1].lane[ilane].d);
		  assert(S.lane[ilane].e == g_T.state[1].lane[ilane].e);
		  assert(weight == S_next.lane[ilane].w);

/* --- */

  if(iround == 0) {
#if 1 // FIRST ROUND
	 printf("[%s:%d] First iround %d\n", __FILE__, __LINE__, iround);

	 if (ibit == WORD_SIZE) {

		norx_trail_add_state(&g_T, S, 0); // add input state
		norx_diff_state_t S_next;
		norx_state_init(&S_next);
		norx_compute_next_state(&S_next, S, iround);
		norx_trail_add_state(&g_T, S_next, iround + 1);
		assert(g_T.w <= g_Bn);
		norx_diff_trail_search(iround + 1, 0, S);
		norx_trail_remove_state(&g_T, S_next, iround + 1);

	 } else {
		// counters
		WORD_T ja[NLANES] = {0};
		WORD_T jb[NLANES] = {0};
		WORD_T je[NLANES] = {0};

		// lane 0
		for (ja[ZERO] = 0; ja[ZERO] < g_NBITS[ZERO][A]; ja[ZERO]++) { // a
		  for (jb[ZERO] = 0; jb[ZERO] < g_NBITS[ZERO][B]; jb[ZERO]++) { // b
			 for (je[ZERO] = 0; je[ZERO] < 2; je[ZERO]++) { // e

				norx_diff_state_t S_part; // partial state
				norx_state_init(&S_part);

				printf("[%s:%d] S_part.w %d\n", __FILE__, __LINE__, S_part.w);

				norx_lane_assign_bit(&S_part.lane[ZERO], S.lane[ZERO], ja[ZERO], jb[ZERO], je[ZERO], ibit, iround);
				S_part.w += S_part.lane[ZERO].w; // aggregate prob of state

				printf("[%s:%d] %d %d %d %d %d\n", __FILE__, __LINE__, ja[ZERO], jb[ZERO], je[ZERO], S_part.lane[ZERO].w, S_part.w);

				if((S_part.w + g_B[g_nrounds - 2]) <= g_Bn) {

				  printf("[%s:%d] CHECKPOINT\n", __FILE__, __LINE__);

				  // lane 1
				  for (ja[ONE] = 0; ja[ONE] < g_NBITS[ONE][A]; ja[ONE]++) { // a
					 for (jb[ONE] = 0; jb[ONE] < g_NBITS[ONE][B]; jb[ONE]++) { // b
						for (je[ONE] = 0; je[ONE] < 2; je[ONE]++) { // e

						  norx_lane_assign_bit(&S_part.lane[ONE], S.lane[ONE], ja[ONE], jb[ONE], je[ONE], ibit, iround);
						  S_part.w += S_part.lane[ONE].w; // aggregate prob of state
						  assert(S_part.lane[ONE].w < INF);

						  if((S_part.w + g_B[g_nrounds - 2]) <= g_Bn) {

				          // lane 2
							 for (ja[TWO] = 0; ja[TWO] < g_NBITS[TWO][A]; ja[TWO]++) { // a
								for (jb[TWO] = 0; jb[TWO] < g_NBITS[TWO][B]; jb[TWO]++) { // b
								  for (je[TWO] = 0; je[TWO] < 2; je[TWO]++) { // e

									 norx_lane_assign_bit(&S_part.lane[TWO], S.lane[TWO], ja[TWO], jb[TWO], je[TWO], ibit, iround);
									 S_part.w += S_part.lane[TWO].w; // aggregate prob of state
									 assert(S_part.lane[TWO].w < INF);

									 if((S_part.w + g_B[g_nrounds - 2]) <= g_Bn) {

				                  // lane 3
										for (ja[THREE] = 0; ja[THREE] < g_NBITS[THREE][A]; ja[THREE]++) { // a
										  for (jb[THREE] = 0; jb[THREE] < g_NBITS[THREE][B]; jb[THREE]++) { // b
											 for (je[THREE] = 0; je[THREE] < 2; je[THREE]++) { // e

												norx_lane_assign_bit(&S_part.lane[THREE], S.lane[THREE], ja[THREE], jb[THREE], je[THREE], ibit, iround);
												S_part.w += S_part.lane[THREE].w; // aggregate prob of state
												assert(S_part.lane[THREE].w < INF);

												if((S_part.w + g_B[g_nrounds - 2]) <= g_Bn) {
												  norx_diff_trail_search(iround, ibit + 1, S_part);
												}
											 }
										  }
										}
									 }
								  }
								}
							 }
						  }
						}
					 }
				  }
				}
			 }
		  }
		}
	 }
#endif // #if 1 // FIRST ROUND
  }

/* --- */

#if 1 // DEBUG
	 if(!(L_ext->w >= 0)) {
		printf("[%s:%d] abe %X %X %X ibit %d w %d\n", __FILE__, __LINE__, L_ext->a, L_ext->b, L_ext->e, ibit, L_ext->w);
	 }
#endif // #if 1 // DEBUG


/* --- */
int xdp_h(WORD_T da, WORD_T db, WORD_T dc)
{
  bool b_prob_iszero = ((((da ^ db ^ dc) & (~((da | db) << 1))) & MASK) != 0);
  if(b_prob_iszero) {
	 return INF;
  }
  int res = hamming_weight(((da | db) << 1) & MASK);
  return res;
}

/*
 * The XOR DP of H
 */
double xdp_h_exper(WORD_T da, WORD_T db, WORD_T dc)
{
  assert(WORD_SIZE <= 10);
  double p = 0.0;
#if(WORD_SIZE <= 10)
  WORD_T cnt = 0;
  for(WORD_T x = 0; x < ALL_WORDS; x++) {
	 for(WORD_T y = 0; y < ALL_WORDS; y++) {
		WORD_T xx = (x ^ da) & MASK;
		WORD_T yy = (y ^ db) & MASK;
		WORD_T z = H(x, y) & MASK;
		WORD_T zz = H(xx, yy) & MASK;
		WORD_T dz = (z ^ zz)  & MASK;
		if(dz == dc) {
		  cnt++;
		}
	 }
  }
  p = (double)cnt / (double)(ALL_WORDS * ALL_WORDS);
#endif // #if(WORD_SIZE <= 10)
  return p;
}


/* --- */

bool xdp_h_iszero(WORD_T da, WORD_T db, WORD_T dc, uint32_t word_size)
{
#if (WORD_SIZE <= 32)
  WORD_T mask =  ~(0xffffffffUL << word_size);
#else
  WORD_T mask =  ~(0xffffffffffffffffULL << word_size);
#endif
  WORD_T c = ((da ^ db ^ dc) & (~((da | db) << 1))) & mask;
  return (c != 0);
}

/* --- */

bool xdp_h_iszero(WORD_T da, WORD_T db, WORD_T dc)
{
  WORD_T c = ((da ^ db ^ dc) & (~((da | db) << 1))) & MASK;
  bool b_iszero = (c != 0);
  return b_iszero;
}


/* --- */

bool xdp_h_iszero(WORD_T da, WORD_T db, WORD_T dc)
{
  WORD_T c = ((da ^ db ^ dc) & (~((da | db) << 1))) & MASK;
  return (c != 0);
}

int xdp_h(WORD_T da, WORD_T db, WORD_T dc)
{
  if(xdp_h_iszero(da, db, dc)) {
	 return INF;
  }
  int hw = hamming_weight(((da | db) << 1) & MASK);
  return hw;
}


/* --- */

bool xdp_h_iszero(uint32_t da, uint32_t db, uint32_t dc)
{

  uint32_t c_orig = ((da ^ db ^ dc) & (~((da | db) << 1))) & MASK;
  uint32_t c = (((da >> 1) ^ (db >> 1) ^ dc) & (~((da | db) << 1))) & MASK;
  uint32_t a_orig = (da ^ db ^ dc);
  uint32_t a = ((da << 1) ^ (db << 1) ^ dc);
  uint32_t t = (~((da | db) << 1))& MASK;
  uint32_t x = 1 & t;
  uint32_t y = 2 & t;
  printf("[%s:%d] t %X x %X y %X\n", __FILE__, __LINE__, t, x, y);
  //  printf("[%s:%d] %X %X %X c %X %X c_orig %X %X\n", __FILE__, __LINE__, da, db, dc, c, a, c_orig, a_orig);
  bool b_xdp_h_iszero = !(c == 0);
  return b_xdp_h_iszero;
}


/* --- */

/* 

LAX16: linear

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ ./bin/lax-best-linear-search-tests
#--- [./tests/lax-best-linear-search-tests.cc:620] Tests, WORD_SIZE  8 g_bn 5
-- g_nRounds = 2
-- g_Bn =  +0 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: 0 -> 0
trail found! [0 s] {648 nodes -> nan nodes/s}
 0: M_LR        0       D7   +0
 1: M_LR        1        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 2: M_LR       79       79   +0  | a b c        1        1        1 c 2^  +0 1.00
corr_trail 2^+0
-- g_nRounds = 3
-- g_Bn =  +0 ... no trail found! [0 s] {1554 nodes -> nan nodes/s}
-- g_Bn =  -1 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: -1 -> -1
trail found! [0 s] {2927 nodes -> nan nodes/s}
 0: M_LR        1        1   +0
 1: M_LR        0       79   +0  | a b c        1        1        1 c 2^  +0 1.00
 2: M_LR        2        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 3: M_LR       F2       F2   -1  | a b c        2        2        2 c 2^  -1 0.50
corr_trail 2^-1
-- g_nRounds = 4
-- g_Bn =  -1 ... no trail found! [0 s] {5826 nodes -> nan nodes/s}
-- g_Bn =  -2 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: -2 -> -2
trail found! [0 s] {343 nodes -> nan nodes/s}
 0: M_LR        0        6   +0
 1: M_LR       C1        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 2: M_LR        C        C   -1  | a b c       C1       C1       C1 c 2^  -1 0.50
 3: M_LR        0       55   -1  | a b c        C        C        C c 2^  -1 0.50
 4: M_LR       18        0   +0  | a b c        0        0        0 c 2^  +0 1.00
corr_trail 2^-2
-- g_nRounds = 5
-- g_Bn =  -2 ... no trail found! [0 s] {6894 nodes -> nan nodes/s}
-- g_Bn =  -3 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: -3 -> -3
trail found! [0 s] {624 nodes -> nan nodes/s}
 0: M_LR        0        6   +0
 1: M_LR       C1        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 2: M_LR        C        C   -1  | a b c       C1       C1       C1 c 2^  -1 0.50
 3: M_LR        0       55   -1  | a b c        C        C        C c 2^  -1 0.50
 4: M_LR       18        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 5: M_LR       CC       CC   -1  | a b c       18       10       10 c 2^  -1 0.50
corr_trail 2^-3
-- g_nRounds = 6
-- g_Bn =  -3 ... no trail found! [0 s] {7182 nodes -> nan nodes/s}
-- g_Bn =  -4 ... no trail found! [0 s] {254722 nodes -> nan nodes/s}
-- g_Bn =  -5 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: -5 -> -5
trail found! [0 s] {1067 nodes -> nan nodes/s}
 0: M_LR        0        1   +0
 1: M_LR       79        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 2: M_LR        2        2   -2  | a b c       79       79       79 c 2^  -2 0.25
 3: M_LR        0       F2   -1  | a b c        2        2        2 c 2^  -1 0.50
 4: M_LR        4        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 5: M_LR       C1       33   -1  | a b c        4        6        4 c 2^  -1 0.50
 6: M_LR       9A       92   -1  | a b c       C1       81       81 c 2^  -1 0.50
corr_trail 2^-5


 */


/* ------------- */
/* 

LAX32: diff: (bound on 2R = -6)

 1:  +0
 2:  -2
 3:  -6    -6
 4:  -9
 5: -11
 6: -16   -12
 7:
 8:
 9:

 */

/* 

LAX32: differential

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ make lax-best-diff-search-tests
g++ -O3 -std=c++11 -Wall -c -I./include/ ./src/xdp-add.cc -o ./obj/xdp-add.o
g++ -O3 -std=c++11 -Wall -c -I./include/ ./tests/lax-best-diff-search-tests.cc -o ./obj/lax-best-diff-search-tests.o
g++  ./obj/common.o ./obj/xdp-add.o ./obj/lax-cipher.o ./obj/lax-best-diff-search-tests.o -o ./bin/lax-best-diff-search-tests -lgsl -lgslcblas -lgmpxx -lgmp
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ ./bin/lax-best-diff-search-tests
#--- [./tests/lax-best-diff-search-tests.cc:654] Tests, WORD_SIZE  16 g_bn 8
-- g_nRounds = 2
-- g_Bn =  +0 ... no trail found! [0 s]
-- g_Bn =  -1 ... no trail found! [0 s]
-- g_Bn =  -2 ... # Update bound: -2 -> -2
[./tests/lax-best-diff-search-tests.cc:153] lax_print_diff_trail_log2()
 0:     E000     F000 ->     F000 0.00 -1
 1:     F000     F000 ->     E000 0.00 -1
log2p_trail -2
trail found! [0 s]
[./tests/lax-best-diff-search-tests.cc:153] lax_print_diff_trail_log2()
 0:     E000     F000 ->     F000 0.00 -1
 1:     F000     F000 ->     E000 0.00 -1
log2p_trail -2
-- g_nRounds = 3
-- g_Bn =  -2 ... no trail found! [0 s]
-- g_Bn =  -3 ... no trail found! [0 s]
-- g_Bn =  -4 ... no trail found! [0 s]
-- g_Bn =  -5 ... no trail found! [0 s]
-- g_Bn =  -6 ... # Update bound: -6 -> -6
[./tests/lax-best-diff-search-tests.cc:153] lax_print_diff_trail_log2()
 0:     8000     8000 ->        0 0.00 0
 1:     8536        0 ->     8536 0.00 -6
 2:        0     8000 ->     8000 0.00 0
log2p_trail -6
trail found! [0 s]
[./tests/lax-best-diff-search-tests.cc:153] lax_print_diff_trail_log2()
 0:     8000     8000 ->        0 0.00 0
 1:     8536        0 ->     8536 0.00 -6
 2:        0     8000 ->     8000 0.00 0
log2p_trail -6
-- g_nRounds = 4
-- g_Bn =  -6 ... no trail found! [0 s]
-- g_Bn =  -7 ... no trail found! [0 s]
-- g_Bn =  -8 ... no trail found! [0 s]
-- g_Bn =  -9 ... # Update bound: -9 -> -9
[./tests/lax-best-diff-search-tests.cc:153] lax_print_diff_trail_log2()
 0:     FC00     FE00 ->     FE00 0.00 -1
 1:     5B93     5B93 ->     FF00 0.00 -6
 2:     FE00     FF00 ->     FF00 0.00 -1
 3:     FF00     FF00 ->     FE00 0.00 -1
log2p_trail -9
trail found! [0 s]
[./tests/lax-best-diff-search-tests.cc:153] lax_print_diff_trail_log2()
 0:     FC00     FE00 ->     FE00 0.00 -1
 1:     5B93     5B93 ->     FF00 0.00 -6
 2:     FE00     FF00 ->     FF00 0.00 -1
 3:     FF00     FF00 ->     FE00 0.00 -1
log2p_trail -9
-- g_nRounds = 5
-- g_Bn =  -9 ... no trail found! [0 s]
-- g_Bn = -10 ... no trail found! [0 s]
-- g_Bn = -11 ... # Update bound: -11 -> -11
[./tests/lax-best-diff-search-tests.cc:153] lax_print_diff_trail_log2()
 0:     E000     F000 ->     F000 0.00 -1
 1:     F000     F000 ->        0 0.00 -3
 2:     F000        0 ->     F000 0.00 -3
 3:        0     F000 ->     F000 0.00 -3
 4:     F000     F000 ->     E000 0.00 -1
log2p_trail -11
trail found! [0 s]
[./tests/lax-best-diff-search-tests.cc:153] lax_print_diff_trail_log2()
 0:     E000     F000 ->     F000 0.00 -1
 1:     F000     F000 ->        0 0.00 -3
 2:     F000        0 ->     F000 0.00 -3
 3:        0     F000 ->     F000 0.00 -3
 4:     F000     F000 ->     E000 0.00 -1
log2p_trail -11
-- g_nRounds = 6
-- g_Bn = -11 ... no trail found! [0 s]
-- g_Bn = -12 ... no trail found! [0 s]
-- g_Bn = -13 ... no trail found! [0 s]
-- g_Bn = -14 ... no trail found! [0 s]
-- g_Bn = -15 ... no trail found! [3 s]
-- g_Bn = -16 ... # Update bound: -16 -> -16
[./tests/lax-best-diff-search-tests.cc:153] lax_print_diff_trail_log2()
 0:     1000        0 ->     F000 0.00 -3
 1:        0     F000 ->     F000 0.00 -3
 2:     F000     F000 ->        0 0.00 -3
 3:     F000        0 ->     F000 0.00 -3
 4:        0     F000 ->     F000 0.00 -3
 5:     F000     F000 ->     E000 0.00 -1
log2p_trail -16
trail found! [0 s]
[./tests/lax-best-diff-search-tests.cc:153] lax_print_diff_trail_log2()
 0:     1000        0 ->     F000 0.00 -3
 1:        0     F000 ->     F000 0.00 -3
 2:     F000     F000 ->        0 0.00 -3
 3:     F000        0 ->     F000 0.00 -3
 4:        0     F000 ->     F000 0.00 -3
 5:     F000     F000 ->     E000 0.00 -1
log2p_trail -16


 */

/* 

LAX32: linear

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ make lax-best-linear-search-tests
g++ -O3 -std=c++11 -Wall -c -I./include/ ./src/common.cc -o ./obj/common.o
g++ -O3 -std=c++11 -Wall -c -I./include/ ./src/xlp-add.cc -o ./obj/xlp-add.o
g++ -O3 -std=c++11 -Wall -c -I./include/ ./src/lax-cipher.cc -o ./obj/lax-cipher.o
g++ -O3 -std=c++11 -Wall -c -I./include/ ./tests/lax-best-linear-search-tests.cc -o ./obj/lax-best-linear-search-tests.o
g++  ./obj/common.o ./obj/xlp-add.o ./obj/lax-cipher.o ./obj/lax-best-linear-search-tests.o -o ./bin/lax-best-linear-search-tests -lgsl -lgslcblas -lgmpxx -lgmp
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ ./bin/lax-best-linear-search-tests
#--- [./tests/lax-best-linear-search-tests.cc:620] Tests, WORD_SIZE  16 g_bn 8
-- g_nRounds = 2
-- g_Bn =  +0 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: 0 -> 0
trail found! [0 s] {76445 nodes -> nan nodes/s}
 0: M_LR        0     6358   +0
 1: M_LR        1        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 2: M_LR     6358     6358   +0  | a b c        1        1        1 c 2^  +0 1.00
corr_trail 2^+0
-- g_nRounds = 3
-- g_Bn =  +0 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: 0 -> 0
trail found! [0 s] {196647 nodes -> nan nodes/s}
 0: M_LR        1        1   +0
 1: M_LR        0     6358   +0  | a b c        1        1        1 c 2^  +0 1.00
 2: M_LR        1        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 3: M_LR     6358     6358   +0  | a b c        1        1        1 c 2^  +0 1.00
corr_trail 2^+0
-- g_nRounds = 4
-- g_Bn =  +0 ... no trail found! [0 s] {393254 nodes -> nan nodes/s}
-- g_Bn =  -1 ... no trail found! [2 s] {25037422 nodes -> 1.25187e+07 nodes/s}
-- g_Bn =  -2 ... no trail found! [54 s] {752835798 nodes -> 1.39414e+07 nodes/s}
-- g_Bn =  -3 ... no trail found! [1106 s] {14235781126 nodes -> 1.28714e+07 nodes/s}
-- g_Bn =  -4 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: -4 -> -4
trail found! [0 s] {9746 nodes -> nan nodes/s}
 0: M_LR        0        F   +0
 1: M_LR        F        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 2: M_LR        F     6357   -2  | a b c        F        F        E c 2^  -2 0.25
 3: M_LR        1     9C5D   -2  | a b c        F        F        B c 2^  -2 0.25
 4: M_LR     6353     6358   +0  | a b c        1        1        1 c 2^  +0 1.00
corr_trail 2^-4
-- g_nRounds = 5
-- g_Bn =  -4 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: -4 -> -4
trail found! [9 s] {35863933 nodes -> 3.98488e+06 nodes/s}
 0: M_LR        0     C61B   +0
 1: M_LR     6378        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 2: M_LR        1        1   -4  | a b c     6378     6358     6358 c 2^  -4 0.06
 3: M_LR        0     6358   +0  | a b c        1        1        1 c 2^  +0 1.00
 4: M_LR        1        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 5: M_LR     6358     6358   +0  | a b c        1        1        1 c 2^  +0 1.00
corr_trail 2^-4
-- g_nRounds = 6
-- g_Bn =  -4 ... no trail found! [0 s] {395253 nodes -> nan nodes/s}
-- g_Bn =  -5 ... no trail found! [1 s] {25398070 nodes -> 2.53981e+07 nodes/s}
-- g_Bn =  -6 ... no trail found! [60 s] {774754140 nodes -> 1.29126e+07 nodes/s}
-- g_Bn =  -7 ... no trail found! [1317 s] {15071748850 nodes -> 1.1444e+07 nodes/s}
-- g_Bn =  -8 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: -8 -> -8
trail found! [0 s] {230110 nodes -> nan nodes/s}
 0: M_LR        0        F   +0
 1: M_LR        F        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 2: M_LR        F        F   -2  | a b c        F        F        F c 2^  -2 0.25
 3: M_LR        0     9C5D   -2  | a b c        F        F        B c 2^  -2 0.25
 4: M_LR        B        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 5: M_LR        F     FF05   -2  | a b c        B        F        A c 2^  -2 0.25
 6: M_LR     FF0F     FF05   -2  | a b c        F        A        A c 2^  -2 0.25
corr_trail 2^-8
-- g_nRounds = 7
-- g_Bn =  -8 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: -8 -> -8
trail found! [0 s] {29772 nodes -> nan nodes/s}
 0: M_LR        0        F   +0
 1: M_LR        F        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 2: M_LR        F        F   -2  | a b c        F        F        F c 2^  -2 0.25
 3: M_LR        0     9C5D   -2  | a b c        F        F        B c 2^  -2 0.25
 4: M_LR        B        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 5: M_LR        F        F   -2  | a b c        B        F        F c 2^  -2 0.25
 6: M_LR        0     9C5D   -2  | a b c        F        F        B c 2^  -2 0.25
 7: M_LR        B        0   +0  | a b c        0        0        0 c 2^  +0 1.00
corr_trail 2^-8
-- g_nRounds = 8
-- g_Bn =  -8 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: -8 -> -8
trail found! [0 s] {77766 nodes -> nan nodes/s}
 0: M_LR        0     6358   +0
 1: M_LR        1        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 2: M_LR     6358     6358   +0  | a b c        1        1        1 c 2^  +0 1.00
 3: M_LR        0     C61B   -4  | a b c     6358     6358     6378 c 2^  -4 0.06
 4: M_LR     6378        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 5: M_LR        1        1   -4  | a b c     6378     6358     6358 c 2^  -4 0.06
 6: M_LR        0     6358   +0  | a b c        1        1        1 c 2^  +0 1.00
 7: M_LR        1        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 8: M_LR     6358     6358   +0  | a b c        1        1        1 c 2^  +0 1.00
corr_trail 2^-8
-- g_nRounds = 9
-- g_Bn =  -8 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: -8 -> -8
trail found! [0 s] {197968 nodes -> nan nodes/s}
 0: M_LR        1        1   +0
 1: M_LR        0     6358   +0  | a b c        1        1        1 c 2^  +0 1.00
 2: M_LR        1        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 3: M_LR     6358     6358   +0  | a b c        1        1        1 c 2^  +0 1.00
 4: M_LR        0     C61B   -4  | a b c     6358     6358     6378 c 2^  -4 0.06
 5: M_LR     6378        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 6: M_LR        1        1   -4  | a b c     6378     6358     6358 c 2^  -4 0.06
 7: M_LR        0     6358   +0  | a b c        1        1        1 c 2^  +0 1.00
 8: M_LR        1        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 9: M_LR     6358     6358   +0  | a b c        1        1        1 c 2^  +0 1.00
corr_trail 2^-8
-- g_nRounds = 10
-- g_Bn =  -8 ... no trail found! [1 s] {393254 nodes -> 393254 nodes/s}
-- g_Bn =  -9 ... no trail found! [1 s] {25037422 nodes -> 2.50374e+07 nodes/s}
-- g_Bn = -10 ... no trail found! [57 s] {752835798 nodes -> 1.32076e+07 nodes/s}
-- g_Bn = -11 ... no trail found! [1121 s] {14235781126 nodes -> 1.26992e+07 nodes/s}
-- g_Bn = -12 ... [./tests/lax-best-linear-search-tests.cc:450] Update bound: -12 -> -12
trail found! [0 s] {31422 nodes -> nan nodes/s}
 0: M_LR        0        F   +0
 1: M_LR        F        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 2: M_LR        F        F   -2  | a b c        F        F        F c 2^  -2 0.25
 3: M_LR        0     9C5D   -2  | a b c        F        F        B c 2^  -2 0.25
 4: M_LR        B        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 5: M_LR        F        F   -2  | a b c        B        F        F c 2^  -2 0.25
 6: M_LR        0     9C5D   -2  | a b c        F        F        B c 2^  -2 0.25
 7: M_LR        B        0   +0  | a b c        0        0        0 c 2^  +0 1.00
 8: M_LR        F        F   -2  | a b c        B        F        F c 2^  -2 0.25
 9: M_LR        0     9C5D   -2  | a b c        F        F        B c 2^  -2 0.25
10: M_LR        B        0   +0  | a b c        0        0        0 c 2^  +0 1.00
corr_trail 2^-12


 */

/* --- */
void test_lax_matrix_vector_multiply_one_table()
{
  const std::array<std::array<bool, WORD_SIZE>, WORD_SIZE> L = g_L;
  std::array<std::array<uint32_t, (ALL_WORDS / NTABLES)>, NTABLES> M;
  lax_build_mvtable_one(L, &M);
  for(WORD_T x = 0; x < ALL_WORDS; x++) {
	 WORD_T y = 0;
	 matrix_vector_multiply(&y, L, x);
	 WORD_T yy = lax_matrix_vector_multiply_one_table(L, M, x);
	 printf("%8X %8X\n", y, yy);
	 assert(y == yy);
  }
}

void test_lax_matrix_vector_multiply_two_table()
{
  const std::array<std::array<bool, WORD_SIZE>, WORD_SIZE> L = g_L;
  std::array<std::array<uint32_t, (ALL_WORDS / NTABLES)>, NTABLES> M;
  lax_build_mvtable_two(L, &M);
  for(WORD_T x = 0; x < ALL_WORDS; x++) {
	 WORD_T y = 0;
	 matrix_vector_multiply(&y, L, x);
	 WORD_T yy = lax_matrix_vector_multiply_two_table(L, M, x);
	 printf("%8X %8X\n", y, yy);
	 assert(y == yy);
  }
}



/* --- */

void test_lax_two_rounds()
{
  for(WORD_T a = 0; a < ALL_WORDS; a++) {
	 for(WORD_T b = 0; b < ALL_WORDS; b++) {
		if((a == 0) && (b == 0))
		  continue;
		for(WORD_T c = 0; c < ALL_WORDS; c++) {
		  double p1 = xdp_add_lm(a, b, c);
		  if(p1 != 0.0) {
			 WORD_T d = 0;
			 matrix_vector_multiply(&d, g_L, a);
			 WORD_T e = 0;
			 matrix_vector_multiply(&e, g_L, c);
			 for(WORD_T f = 0; f < ALL_WORDS; f++) {
				double p2 = xdp_add_lm(d, e, f);
				if(p2 != 0.0) {
				  double p_tot = p1 * p2;
				  if((uint32_t)std::abs(log2(p_tot)) < (g_bn - 2)) {
					 printf("(%X %X -> %X) * (%X %X -> %X) = ", a, b, c, d, e, f);
					 printf("%4.2f + %4.2f = %4.2f\n", log2(p1), log2(p2), log2(p_tot));
				  //					 return;
				  }
				}
			 }
		  }
		}
	 }
  }
}

void test_lax_three_rounds()
{
  for(WORD_T a = 0; a < ALL_WORDS; a++) {
	 for(WORD_T b = 0; b < ALL_WORDS; b++) {
		if((a == 0) && (b == 0))
		  continue;
		for(WORD_T c = 0; c < ALL_WORDS; c++) {
		  double p1 = xdp_add_lm(a, b, c);
		  if(p1 != 0.0) {
			 WORD_T la = 0;
			 matrix_vector_multiply(&la, g_L, a);

			 WORD_T lc = 0;
			 matrix_vector_multiply(&lc, g_L, c);

			 for(WORD_T d = 0; d < ALL_WORDS; d++) {
				double p2 = xdp_add_lm(la, lc, d);

				if(p2 != 0.0) {

				  for(WORD_T e = 0; e < ALL_WORDS; e++) {

					 WORD_T lla = 0;
					 matrix_vector_multiply(&lla, g_L, la);

					 WORD_T ld = 0;
					 matrix_vector_multiply(&ld, g_L, d);

					 double p3 = xdp_add_lm(lla, ld, e);

					 if(p3 != 0.0) {

						double p_tot = p1 * p2 * p3;
						if((uint32_t)std::abs(log2(p_tot)) < (g_bn - 1)) {
						  printf("(%X %X -> %X) * (%X %X -> %X) * (%X %X -> %X) = ", a, b, c, la, lc, d, lla, ld, e);
						  printf("%4.2f + %4.2f + %4.2f = %4.2f\n", log2(p1), log2(p2), log2(p2), log2(p_tot));
						  //					 return;
						}
					 }
				  }
				}
			 }
		  }
		}
	 }
  }
}

/* --- */

void test_speck_convert_diff_trail_to_differentials()
{
  differential_t diff_arr[SPECK_TRAIL_LEN] = {{0, 0, 0, 0.0}};
  WORD_T delta_L = 0;
  WORD_T delta_R = 0;
  speck_convert_diff_trail_to_differentials(NROUNDS, g_speck32_best_trail, diff_arr, &delta_L, &delta_R);

  printf("\n[%s:%d] Differential:\n", __FILE__, __LINE__);
#if (WORD_SIZE <= 32)
  printf("IN: %8X %8X\n", delta_L, delta_R); // input diff
#else
  printf("IN: %16llX %16llX\n", (WORD_MAX_T)delta_L, (WORD_MAX_T)delta_R); // input diff
#endif // #if (WORD_SIZE <= 32)
  for(uint32_t i = 0; i < (NROUNDS-1); i++) {
#if (WORD_SIZE <= 32)
	 printf("%8X %8X %4.2f\n", diff_arr[i].dx, diff_arr[i].dy, log2(diff_arr[i].p));
#else
	 printf("%2d: %16llX %16llX %4.2f\n", i+1, (WORD_MAX_T)diff_arr[i].dx, (WORD_MAX_T)diff_arr[i].dy, log2(diff_arr[i].p));
#endif // #if (WORD_SIZE <= 32)
  }
}

/* --- */

/*
 0:     C014     4205 ->      211 0.03 -5.00
 1:     2204      A04 ->     2800 0.06 -4.00
 2:       50       10 ->       40 0.25 -2.00
 3:     8000        0 ->     8000 1.00 0.00
 4:      100     8000 ->     8100 0.50 -1.00
 5:      102     8102 ->     8000 0.25 -2.00
 6:      100     840A ->     850A 0.06 -4.00
*/
differential_3d_t g_speck32_best_trail[SPECK_TRAIL_LEN] = {
  {0xC014, 0x4205,  0x211,  (1.0 / (double)(1ULL <<  5))},
  {0x2204,  0xA04, 0x2800,  (1.0 / (double)(1ULL <<  4))},
  {  0x50,   0x10,   0x40,  (1.0 / (double)(1ULL <<  2))},
  {0x8000,    0x0, 0x8000,  (1.0 / (double)(1ULL <<  0))},
  { 0x100, 0x8000, 0x8100,  (1.0 / (double)(1ULL <<  1))},
  { 0x102, 0x8102, 0x8000,  (1.0 / (double)(1ULL <<  2))},
  { 0x100, 0x840A, 0x850A,  (1.0 / (double)(1ULL <<  4))},
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0}						 // dummy
};


/*
 0: 82020000 12020000 -> 90000000 0.12 -3.00
 1:   900000   100000 ->   800000 0.25 -2.00
 2:     8000        0 ->     8000 0.50 -1.00
 3:       80     8000 ->     8080 0.25 -2.00
 4: 80000080    48080 -> 80048000 0.12 -3.00
 5:   800480 80208400 -> 80A08080 0.03 -5.00
 */
differential_3d_t g_speck64_best_trail[SPECK_TRAIL_LEN] = {
  {0x82020000, 0x12020000, 0x90000000, (1.0 / (double)(1ULL <<  3))},
  {  0x900000,   0x100000,   0x800000, (1.0 / (double)(1ULL <<  2))},
  {    0x8000,        0x0,     0x8000, (1.0 / (double)(1ULL <<  1))},
  {      0x80,     0x8000,     0x8080, (1.0 / (double)(1ULL <<  2))},
  {0x80000080,    0x48080, 0x80048000, (1.0 / (double)(1ULL <<  3))},
  {  0x800480, 0x80208400, 0x80A08080, (1.0 / (double)(1ULL <<  5))},
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0}						 // dummy
};

/*
0:               90               10 ->               80 0.25 -2.00
1:     800000000000                0 ->     800000000000 1.00 0.00
2:       8000000000     800000000000 ->     808000000000 0.50 -1.00
*/
#if (WORD_SIZE > 32)
differential_3d_t g_speck96_best_trail[SPECK_TRAIL_LEN] = {
  {0x90, 0x10, 0x80, (1.0 / (double)(1ULL <<  2))},
  {0x800000000000, 0, 0x800000000000, (1.0 / (double)(1ULL <<  0))},
  {0x8000000000, 0x800000000000, 0x808000000000, (1.0 / (double)(1ULL <<  1))},
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0},						 // dummy
  {0, 0, 0, 0.0}						 // dummy
};
#endif // #if (WORD_SIZE > 32)


/* --- */

void test_speck_verify_xor_differential()
{

  uint32_t npairs = (1ULL << 22);
  WORD_T key[SPECK_MAX_NROUNDS] = {0};
  differential_t diff_arr[SPECK_TRAIL_LEN] = {{0, 0, 0, 0.0}};
  WORD_T delta_L = 0;
  WORD_T delta_R = 0;

#if (WORD_SIZE == 16)
  assert(NROUNDS == 7);
  speck_convert_diff_trail_to_differentials(NROUNDS, g_speck32_best_trail, diff_arr, &delta_L, &delta_R);
#endif // #if (WORD_SIZE == 16)

#if (WORD_SIZE == 32)
  assert(NROUNDS == 6);
  speck_convert_diff_trail_to_differentials(NROUNDS, g_speck64_best_trail, diff_arr, &delta_L, &delta_R);
#endif // #if (WORD_SIZE == 16)

#if (WORD_SIZE == 48)
  assert(NROUNDS == 3);
  speck_convert_diff_trail_to_differentials(NROUNDS, g_speck96_best_trail, diff_arr, &delta_L, &delta_R);
#endif // #if (WORD_SIZE == 16)

  printf("\n[%s:%d] Differential:\n", __FILE__, __LINE__);
#if (WORD_SIZE <= 32)
  printf("IN: %8X %8X\n", delta_L, delta_R); // input diff
#else
  printf("IN: %16llX %16llX\n", (WORD_MAX_T)delta_L, (WORD_MAX_T)delta_R); // input diff
#endif // #if (WORD_SIZE <= 32)
  for(uint32_t i = 0; i < NROUNDS; i++) {
#if (WORD_SIZE <= 32)
	 printf("%2d: %8X %8X %4.2f\n", i+1, diff_arr[i].dx, diff_arr[i].dy, log2(diff_arr[i].p));
#else
	 printf("%2d: %16llX %16llX %4.2f\n", i+1, (WORD_MAX_T)diff_arr[i].dx, (WORD_MAX_T)diff_arr[i].dy, log2(diff_arr[i].p));
#endif // #if (WORD_SIZE <= 32)
  }

  key[0] = xrandom() & MASK;
  key[1] = xrandom() & MASK;
  key[2] = xrandom() & MASK;
  key[3] = xrandom() & MASK;

  printf("[%s:%d] key %16llX %16llX %16llX %16llX\n", __FILE__, __LINE__, (WORD_MAX_T)key[0], (WORD_MAX_T)key[1], (WORD_MAX_T)key[2], (WORD_MAX_T)key[3]);

  speck_verify_xor_differential(NROUNDS, npairs, key, diff_arr, delta_L, delta_R, g_r1, g_r2);

}


/* --- */

void test_speck64_r7_greedy_forward()
{
#if (WORD_SIZE <= 32)
  // Table 73 and 74
#if 0
  const WORD_T beta_in  = 0x84008020;
  const WORD_T gamma_in = 0x808080A0;
#endif
  // Table 75
#if 1
  const WORD_T beta_in  = 0x802084;
  const WORD_T gamma_in = 0x8080A080;
#endif

  WORD_T alpha_next = 0; 
  WORD_T beta_next = 0;
  WORD_T gamma_next = 0;

  speck_compute_next_alpha_beta(beta_in, gamma_in, &alpha_next, &beta_next);
  double p_max = max_xdp_add_lm(alpha_next, beta_next, &gamma_next);

  printf("[%s:%d] (%X %X) -> (%X %X -> %X) 2^%4.2f\n", __FILE__, __LINE__,
			beta_in, gamma_in, alpha_next, beta_next, gamma_next, log2(p_max));
#endif // #if (WORD_SIZE <= 32)
}

void test_speck64_r7_greedy_backward()
{
#if (WORD_SIZE <= 32)
  // Table 73
#if 0
  const WORD_T alpha_in  = 0x40924000;
  const WORD_T beta_in = 0x40104200;
#endif
  // Table 74
#if 0
  const WORD_T alpha_in  = 0xC0924000;
  const WORD_T beta_in = 0x40104200;
#endif
  // Table 75
#if 1
  const WORD_T alpha_in  = 0x92400040;
  const WORD_T beta_in = 0x10420040;
#endif

  WORD_T alpha_prev = 0; 
  WORD_T beta_prev = 0;
  WORD_T gamma_prev = 0;

  speck_compute_prev_gamma_beta(alpha_in, beta_in, &gamma_prev, &beta_prev);
  double p_max = max_xdp_add_lm(gamma_prev, beta_prev, &alpha_prev);
  double p_tmp = xdp_add_lm(alpha_prev, beta_prev, gamma_prev);
  assert(p_max == p_tmp);

  printf("[%s:%d] (%X %X) -> (%X %X -> %X) 2^%4.2f\n", __FILE__, __LINE__,
			alpha_in, beta_in, alpha_prev, beta_prev, gamma_prev, log2(p_max));
#endif // #if (WORD_SIZE <= 32)
}

#if 1 // verify trails from submission 163 (MILP), FSE2016


void test_speck_verify_trails_milp_sub163_fse2016()
{
  WORD_T key[SPECK_MAX_NROUNDS] = {0};

#if (WORD_SIZE == 16)
  assert(NROUNDS == 9);
  differential_t diff_arr[SPECK_TRAIL_LEN] = {
	 {0x1488, 0x1008, 0, (1.0 / (double)(1ULL <<  0))}, // 00
	 {0x0021, 0x4001, 0, (1.0 / (double)(1ULL <<  4))}, // 00
	 {0x0601, 0x0604, 0, (1.0 / (double)(1ULL <<  4))}, // 00
	 {0x1800, 0x0010, 0, (1.0 / (double)(1ULL <<  6))}, // 00
	 {0x0040, 0x0000, 0, (1.0 / (double)(1ULL <<  3))}, // 00
	 {0x8000, 0x8000, 0, (1.0 / (double)(1ULL <<  0))}, // 00
	 {0x8100, 0x8102, 0, (1.0 / (double)(1ULL <<  1))}, // 00
	 {0x8000, 0x840A, 0, (1.0 / (double)(1ULL <<  2))}, // 00
	 {0x850A, 0x9520, 0, (1.0 / (double)(1ULL <<  4))}, // 00
	 {0x802A, 0xD4A8, 0, (1.0 / (double)(1ULL <<  6))}, // 00
  };
#endif // #if (WORD_SIZE == 16)

#if (WORD_SIZE == 24)
  assert(NROUNDS == 11);
  differential_t diff_arr[SPECK_TRAIL_LEN] = {
	 {0x001202, 0x020002, 0, (1.0 / (double)(1ULL <<  0))}, // 00
	 {0x000010, 0x100000, 0, (1.0 / (double)(1ULL <<  3))}, // 01
	 {0x000000, 0x800000, 0, (1.0 / (double)(1ULL <<  1))}, // 02
	 {0x800000, 0x800004, 0, (1.0 / (double)(1ULL <<  0))}, // 03
	 {0x808004, 0x808020, 0, (1.0 / (double)(1ULL <<  2))}, // 04
	 {0x8400A0, 0x8001A4, 0, (1.0 / (double)(1ULL <<  4))}, // 05
	 {0x608DA4, 0x608080, 0, (1.0 / (double)(1ULL <<  9))}, // 06
	 {0x042003, 0x002400, 0, (1.0 / (double)(1ULL << 11))}, // 07
	 {0x012020, 0x000020, 0, (1.0 / (double)(1ULL <<  5))}, // 08
	 {0x200100, 0x200000, 0, (1.0 / (double)(1ULL <<  3))}, // 09
	 {0x202001, 0x202000, 0, (1.0 / (double)(1ULL <<  3))}, // 10
	 {0x210020, 0x200021, 0, (1.0 / (double)(1ULL <<  4))}, // 11
  };
#endif // #if (WORD_SIZE == 16)

#if (WORD_SIZE == 32)
  assert(NROUNDS == 15);
  differential_t diff_arr[SPECK_TRAIL_LEN] = {
	 {0x04092400, 0x20040104, 0, (1.0 / (double)(1ULL <<  0))}, // 00
	 {0x20000820, 0x20200001, 0, (1.0 / (double)(1ULL <<  6))}, // 00
	 {0x00000009, 0x01000000, 0, (1.0 / (double)(1ULL <<  4))}, // 00
	 {0x08000000, 0x00000000, 0, (1.0 / (double)(1ULL <<  2))}, // 00
	 {0x00080000, 0x00080000, 0, (1.0 / (double)(1ULL <<  1))}, // 00
	 {0x00080800, 0x00480800, 0, (1.0 / (double)(1ULL <<  2))}, // 00
	 {0x00480008, 0x02084008, 0, (1.0 / (double)(1ULL <<  4))}, // 00
	 {0x06080808, 0x164A0848, 0, (1.0 / (double)(1ULL <<  7))}, // 00
	 {0xF2400040, 0x40104200, 0, (1.0 / (double)(1ULL << 13))}, // 00
	 {0x00820200, 0x00001202, 0, (1.0 / (double)(1ULL <<  8))}, // 00
	 {0x00009000, 0x00000010, 0, (1.0 / (double)(1ULL <<  4))}, // 00
	 {0x00000080, 0x00000000, 0, (1.0 / (double)(1ULL <<  2))}, // 00
	 {0x80000000, 0x80000000, 0, (1.0 / (double)(1ULL <<  0))}, // 00
	 {0x80800000, 0x80800004, 0, (1.0 / (double)(1ULL <<  1))}, // 00
	 {0x80008004, 0x84008020, 0, (1.0 / (double)(1ULL <<  3))}, // 00
	 {0x808080A0, 0xA08481A4, 0, (1.0 / (double)(1ULL <<  5))}, // 00
};
#endif // #if (WORD_SIZE == 16)

#if (WORD_SIZE == 48)
  assert(NROUNDS == 16);
  differential_t diff_arr[SPECK_TRAIL_LEN] = {
	 {0x240004000009, 0x010420040000, 0, (1.0 / (double)(1ULL <<  0))}, // 00
	 {0x082020000000, 0x000120200000, 0, (1.0 / (double)(1ULL <<  6))}, // 00
	 {0x000900000000, 0x000001000000, 0, (1.0 / (double)(1ULL <<  4))}, // 00
	 {0x000008000000, 0x000000000000, 0, (1.0 / (double)(1ULL <<  2))}, // 00
	 {0x000000080000, 0x000000080000, 0, (1.0 / (double)(1ULL <<  1))}, // 00
	 {0x000000080800, 0x000000480800, 0, (1.0 / (double)(1ULL <<  2))}, // 00
	 {0x000000480008, 0x000002084008, 0, (1.0 / (double)(1ULL <<  4))}, // 00
	 {0x0800FE080808, 0x0800EE4A0848, 0, (1.0 / (double)(1ULL << 12))}, // 00
	 {0x000772400040, 0x400000104200, 0, (1.0 / (double)(1ULL << 21))}, // 00
	 {0x000000820200, 0x000000001202, 0, (1.0 / (double)(1ULL << 11))}, // 00
	 {0x000000009000, 0x000000000010, 0, (1.0 / (double)(1ULL <<  4))}, // 00
	 {0x000000000080, 0x000000000000, 0, (1.0 / (double)(1ULL <<  2))}, // 00
	 {0x800000000000, 0x800000000000, 0, (1.0 / (double)(1ULL <<  0))}, // 00
	 {0x808000000000, 0x808000000004, 0, (1.0 / (double)(1ULL <<  1))}, // 00
	 {0x800080000004, 0x840080000020, 0, (1.0 / (double)(1ULL <<  3))}, // 00
	 {0x808080800020, 0xA08480800124, 0, (1.0 / (double)(1ULL <<  5))}, // 00
	 {0x800400008124, 0x842004008801, 0, (1.0 / (double)(1ULL <<  9))}, // 00
};
#endif // #if (WORD_SIZE == 16)

#if (WORD_SIZE == 64)
  assert(NROUNDS == 19);
  differential_t diff_arr[SPECK_TRAIL_LEN] = {
	 {0x0124000400000000, 0x0801042004000000, 0, (1.0 / (double)(1ULL <<  0))}, // 00
	 {0x0800202000000000, 0x4808012020000000, 0, (1.0 / (double)(1ULL <<  6))}, // 00
	 {0x4800010000000000, 0x0840080100000002, 0, (1.0 / (double)(1ULL <<  6))}, // 00
	 {0x0808080000000006, 0x4A08480800000016, 0, (1.0 / (double)(1ULL <<  7))}, // 00
	 {0x4000400000000032, 0x1042004000000080, 0, (1.0 / (double)(1ULL << 12))}, // 00
	 {0x0202000000000080, 0x8012020000000480, 0, (1.0 / (double)(1ULL <<  7))}, // 00
	 {0x0010000000000480, 0x0080100000002084, 0, (1.0 / (double)(1ULL <<  5))}, // 00
	 {0x8080000000002080, 0x84808000000124A0, 0, (1.0 / (double)(1ULL <<  5))}, // 00
	 {0x0400000000012440, 0x2004000000080144, 0, (1.0 / (double)(1ULL <<  9))}, // 00
	 {0x2000000000080220, 0x2020000000480801, 0, (1.0 / (double)(1ULL <<  9))}, // 00
	 {0x0000000000480001, 0x0100000002084008, 0, (1.0 / (double)(1ULL <<  7))}, // 00
	 {0x000000000E080808, 0x080000001E4A0848, 0, (1.0 / (double)(1ULL <<  8))}, // 00
	 {0x00000000F2400040, 0x4000000000104200, 0, (1.0 / (double)(1ULL << 15))}, // 00
	 {0x0000000000820200, 0x0000000000001202, 0, (1.0 / (double)(1ULL <<  8))}, // 00
	 {0x0000000000009000, 0x0000000000000010, 0, (1.0 / (double)(1ULL <<  4))}, // 00
	 {0x0000000000000080, 0x0000000000000000, 0, (1.0 / (double)(1ULL <<  2))}, // 00
	 {0x8000000000000000, 0x8000000000000000, 0, (1.0 / (double)(1ULL <<  0))}, // 00
	 {0x8080000000000000, 0x8080000000000004, 0, (1.0 / (double)(1ULL <<  1))}, // 00
	 {0x8000800000000004, 0x8400800000000020, 0, (1.0 / (double)(1ULL <<  3))}, // 00
	 {0x8080808000000020, 0xA084808000000124, 0, (1.0 / (double)(1ULL <<  5))}, // 00
  };
#endif // #if (WORD_SIZE == 16)

  WORD_T delta_L = diff_arr[0].dx;
  WORD_T delta_R = diff_arr[0].dy;
  printf("\n[%s:%d] Differential:\n", __FILE__, __LINE__);
#if (WORD_SIZE <= 32)
  printf("IN: %8X %8X\n", delta_L, delta_R); // input diff
#else
  printf("IN: %16llX %16llX\n", (WORD_MAX_T)delta_L, (WORD_MAX_T)delta_R); // input diff
#endif // #if (WORD_SIZE <= 32)
  for(uint32_t i = 1; i < NROUNDS; i++) {
#if (WORD_SIZE <= 32)
	 printf("%2d: %8X %8X %4.2f\n", i+1, diff_arr[i].dx, diff_arr[i].dy, log2(diff_arr[i].p));
#else
	 printf("%2d: %16llX %16llX %4.2f\n", i+1, (WORD_MAX_T)diff_arr[i].dx, (WORD_MAX_T)diff_arr[i].dy, log2(diff_arr[i].p));
#endif // #if (WORD_SIZE <= 32)
  }

  key[0] = xrandom() & MASK;
  key[1] = xrandom() & MASK;
  key[2] = xrandom() & MASK;
  key[3] = xrandom() & MASK;

  printf("[%s:%d] key %16llX %16llX %16llX %16llX\n", __FILE__, __LINE__, (WORD_MAX_T)key[0], (WORD_MAX_T)key[1], (WORD_MAX_T)key[2], (WORD_MAX_T)key[3]);


  uint32_t npairs = (1ULL << 25);
  //  uint32_t i_round = 5; // WORD_SIZE == 24
  uint32_t i_round = 0; // WORD_SIZE == 24
  uint32_t step = 5;
  assert(step >= 2);
  uint32_t warn_cnt = 0;

  while ((i_round + step) <= NROUNDS) {

	 // WARNING!! Random key for every diferential
	 key[0] = xrandom() & MASK;
	 key[1] = xrandom() & MASK;
	 key[2] = xrandom() & MASK;
	 key[3] = xrandom() & MASK;
	 printf("[%s:%d] key %16llX %16llX %16llX %16llX\n", __FILE__, __LINE__, (WORD_MAX_T)key[0], (WORD_MAX_T)key[1], (WORD_MAX_T)key[2], (WORD_MAX_T)key[3]);

	 differential_t sub_arr[SPECK_TRAIL_LEN] = {0, 0, 0, 0.0};

	 uint32_t t_start = i_round;
	 uint32_t t_end = MIN((i_round + step), NROUNDS);

	 //	 printf("[%s:%d] Verify T[%2d:%2d]\n", __FILE__, __LINE__, t_start, t_end-1);
	 delta_L = diff_arr[t_start].dx;
	 delta_R = diff_arr[t_start].dy;
	 uint32_t sub_rounds = 0;
	 for(uint32_t i = (t_start + 1); i < t_end; i++, sub_rounds++) {
		sub_arr[sub_rounds] = diff_arr[i];
	 }

	 printf("\n[%s:%d] Sub differential [%2d:%2d]\n", __FILE__, __LINE__, t_start, t_end);
#if (WORD_SIZE <= 32)
	 printf("IN: %8X %8X\n", delta_L, delta_R); // input diff
#else
	 printf("IN: %16llX %16llX\n", (WORD_MAX_T)delta_L, (WORD_MAX_T)delta_R); // input diff
#endif // #if (WORD_SIZE <= 32)
	 for(uint32_t i = 0; i < sub_rounds; i++) {
#if (WORD_SIZE <= 32)
		printf("%2d: %8X %8X %4.2f\n", i+1, sub_arr[i].dx, sub_arr[i].dy, log2(sub_arr[i].p));
#else
		printf("%2d: %16llX %16llX %4.2f\n", i+1, (WORD_MAX_T)sub_arr[i].dx, (WORD_MAX_T)sub_arr[i].dy, log2(sub_arr[i].p));
#endif // #if (WORD_SIZE <= 32)
	 }

	 warn_cnt += speck_verify_xor_differential(sub_rounds, npairs, key, sub_arr, delta_L, delta_R, g_r1, g_r2);

	 if(step == 2) { // verify single ADD transitions

		WORD_T i_L = diff_arr[t_start].dx;
		WORD_T i_R = diff_arr[t_start].dy;;
		WORD_T o_L = diff_arr[t_start + 1].dx;
		WORD_T o_R = diff_arr[t_start + 1].dy;
		double p_round = diff_arr[t_start + 1].p;

		WORD_T alpha = RROT(i_L, g_r1);
		WORD_T beta = i_R;
		WORD_T gamma = o_L;
		double p_add = xdp_add_lm(alpha, beta, gamma);

#if (WORD_SIZE <= 32)
		printf("[%s:%d] step = %d: R[%2d] (%8X %8X) -> R[%2d] (%8X %8X)\n", __FILE__, __LINE__, step, t_start, i_L, i_R, t_start + 1, o_L, o_R);
		printf("(%8X %8X) -> %8X %2.0f %2.0f\n", alpha, beta, gamma, log2(p_add), log2(p_round));
#else
		printf("[%s:%d] step = %d: R[%2d] (%16llX %16llX) -> R[%2d] (%16llX %16llX)\n", __FILE__, __LINE__, step, t_start, (WORD_MAX_T)i_L, (WORD_MAX_T)i_R, t_start + 1, (WORD_MAX_T)o_L, (WORD_MAX_T)o_R);
		printf("(%16llX %16llX) -> %16llX %2.0f %2.0f\n", (WORD_MAX_T)alpha, (WORD_MAX_T)beta, (WORD_MAX_T)gamma, log2(p_add), log2(p_round));
#endif // #if (WORD_SIZE <= 32)

		assert(p_add == p_round);

	 }

	 i_round++;
  }
  printf("[%s:%d] Total warn_cnt: %2d\n", __FILE__, __LINE__, warn_cnt);
}
#endif // #if 1 // verify trails from submission 163, FSE2016


/* --- */

/*
 * The probabilities of the best differential trails for up to 15
 * rounds of MARX.
 *
 * \see g_best_B in arx-best-diff-search-tests.cc
 */
const int g_marx_best_prob_log2[NROUNDS_MAX + 1] = {
    -0,  // 0
	 -0,  // 1
	 -0,  // 2
	 -1,  // 3
	 -2,  // 4
	 -5,  // 5
	 -9,  // 6
	 -14,  // 7
	 -19,  // 8
	 -22,  // 9
	 -27,  // 10
	 -32,  // 11
	 -37,  // 12
	 -39,  // 13
	 -42,  // 14
	 -46,  // 15
};

/* --- */
{
	nnodes++;
	if (iround == 1)
	{
		if (ibit == -1)
		{
			const int c = xlc_add(alpha_in, beta_in, gamma_in, WORD_SIZE);

			for (uint32_t iGamma_R = 0; iGamma_R < ALL_WORDS; iGamma_R++)
			{
				{
					uint32_t iGamma_L = alpha_in;
					const uint32_t oGamma_R = LROT((beta_in ^ iGamma_R), g_r2) & MASK;
					uint32_t oGamma_L = (gamma_in ^ oGamma_R) & MASK;

					if ((iGamma_L == 0) && (iGamma_R == 0))
					{
						/* skip the zero input masks */
						continue;
					}

					//					iGamma_L = RROT(alpha_in, g_r1) & MASK; // additional rot in the left input => rot right in order to store the mask to the addition (and not the mask to the round!)
					iGamma_L = LROT(alpha_in, g_r1) & MASK; // additional rot in the right input => rot left in order to store the mask to the addition (and not the mask to the round!)

					const Correlation T_zero[2] = {{iGamma_L, iGamma_R, 0}, {0, 0, 0}};
					marx_add_mask_to_trail(g_T, iround - 1, T_zero);

					const Correlation T_one[2] = {{oGamma_L, oGamma_R, c}, {0, 0, 0}};
					marx_add_mask_to_trail(g_T, iround, T_one);

					oGamma_L = RROT(oGamma_L, g_r1) & MASK; // right rot

					bool ret = speckey_best_linear_search_i(
						2,
						WORD_SIZE - 1, 
					   oGamma_L,
						0,
						0, 
						oGamma_R
					);
					if (ret == true)
					{
						return true;
					}

					marx_remove_mask_from_trail(g_T, iround - 1);
					marx_remove_mask_from_trail(g_T, iround);
				}
			}
		} else {
			const uint32_t word_size = (WORD_SIZE - ibit); /* word size of the partial masks: ibit = (WORD_SIZE - 1) down to 0 */
			const uint32_t mask_part = (~((uint32_t)0) >> (32 - word_size)); /* partial mask (word_size bits) */

			for (uint32_t w = 0; w < 8; w++)
			{
				const uint32_t alpha_i = (w >> 0) & 1;
				const uint32_t beta_i = (w >> 1) & 1;
				const uint32_t gamma_i = (w >> 2) & 1;

				const uint32_t alpha_part = alpha_in | (alpha_i << ibit);
				const uint32_t beta_part = beta_in | (beta_i << ibit);
				const uint32_t gamma_part = gamma_in | (gamma_i << ibit);

				/* Extract the word_size MS bits of alpha_part, beta_part,
				   gamma_part:(MSB alpha_in | ibit | 000000..000 LSB)
				*/
				const uint32_t alpha_part_msb = (alpha_part >> ibit) & mask_part;
				const uint32_t beta_part_msb = (beta_part >> ibit) & mask_part;
				const uint32_t gamma_part_msb = (gamma_part >> ibit) & mask_part;

				const int c_part = xlc_add(alpha_part_msb, beta_part_msb, gamma_part_msb, word_size); /* partial prob. */
				const int c_est = c_part + g_best_B[g_nRounds - 2];

				if (c_est >= g_Bn)
				{
				  //					for (uint32_t v = 0; v < 8; v++)
					{
						const int cd_est = c_est;

						if (cd_est >= g_Bn)
						{
							bool ret = speckey_best_linear_search_i(
								iround,
								ibit - 1,
								alpha_part,
								beta_part,
								gamma_part,
								iGamma_R_in
							);
							if (ret == true)
							{
								return true;
							}
						}
					}
				}
			}
		}
	}

	if ((iround > 1) && (iround != g_nRounds))
	{
		if (ibit == -1)
		{
			const int c = xlc_add(alpha_in, beta_in, gamma_in, WORD_SIZE);

			const uint32_t oGamma_R = LROT((beta_in ^ iGamma_R_in), g_r2) & MASK;
		   uint32_t oGamma_L = (gamma_in ^ oGamma_R) & MASK;

			const Correlation T_iround[2] = {{oGamma_L, oGamma_R, c}, {0, 0, 0}};
			marx_add_mask_to_trail(g_T, iround, T_iround);

			oGamma_L = RROT(oGamma_L, g_r1) & MASK;

			bool ret = speckey_best_linear_search_i(
				iround + 1,
				WORD_SIZE - 1, 
				oGamma_L,
				0,
				0, 
				oGamma_R
			);
			if (ret == true)
			{
				return true;
			}
			marx_remove_mask_from_trail(g_T, iround);
		} else {
			const uint32_t word_size = (WORD_SIZE - ibit); /* word size of the partial masks */
			const uint32_t mask_part = (~((uint32_t)0) >> (32 - word_size)); /* partial mask of word_size MS bits = 0000000FFF */
			const uint32_t mask_msb = (~((uint32_t)0) << ibit) & MASK; /* masks word_size MS bits = FFF000000 */

			int corr_trail = 0;
			/* c[1]d[1] c[2]d[2] ... c[iround - 1]d[iround - 1] => first
			   (iround - 1) rounds: c[0] = d[0] = 0 is the input mask corr.
			*/
			for (uint32_t i = 1; i < iround; i++)
			{
				int c_i = g_T[LEFT][i].c;
				int d_i = g_T[RIGHT][i].c;
				assert(d_i == 0);
				corr_trail += (c_i + d_i);
			}

			for (uint32_t w = 0; w < 4; w++)
			{
				const uint32_t beta_i = (w >> 0) & 1;
				const uint32_t gamma_i = (w >> 1) & 1;

				const uint32_t alpha_part = (alpha_in & mask_msb);
				const uint32_t beta_part = beta_in | (beta_i << ibit);
				const uint32_t gamma_part = gamma_in | (gamma_i << ibit);

				/* Extract the word_size MS bits of alpha_part, beta_part,
				   gamma_part:(MSB alpha_in | ibit | 000000..000 LSB)
				*/
				const uint32_t alpha_part_msb = (alpha_part >> ibit) & mask_part;
				const uint32_t beta_part_msb = (beta_part >> ibit) & mask_part;
				const uint32_t gamma_part_msb = (gamma_part >> ibit) & mask_part;

				const int c_part = xlc_add(alpha_part_msb, beta_part_msb, gamma_part_msb, word_size); /* partial prob. */

				/* c[1]d[1] c[2]d[2] ... c[iround - 1]d[iround - 1] (c_part = c[iround]) => first iround rounds */
				const int c_est = corr_trail + c_part + g_best_B[g_nRounds - iround - 1];

				if (c_est >= g_Bn)
				{
				  //					for (uint32_t v = 0; v < 4; v++)
					{
						const int cd_est = c_est;

						if (cd_est >= g_Bn)
						{
							bool ret = speckey_best_linear_search_i(
								iround,
								ibit - 1, 
								alpha_in,
								beta_part,
								gamma_part,
								iGamma_R_in
							);
							if (ret == true)
							{
								return true;
							}
						}
					}
				}
			}
		}
	}

	if (iround == g_nRounds)
	{
		if (ibit == -1)
		{
			const int c = xlc_add(alpha_in, beta_in, gamma_in, WORD_SIZE);

			const uint32_t oGamma_R = LROT((beta_in ^ iGamma_R_in), g_r2) & MASK;
			const uint32_t oGamma_L = (gamma_in ^ oGamma_R) & MASK;

			const Correlation T_iround[2] = {{oGamma_L, oGamma_R, c}, {0, 0, 0}};

			marx_add_mask_to_trail(g_T, iround, T_iround);

			/* c[1]d[1] c[2]d[2] ... c[iround]d[iround] => first iround rounds */
			int corr_trail = 0;
			for (uint32_t i = 1; i <= iround; i++)
			{
				int c_i = g_T[LEFT][i].c;
				int d_i = g_T[RIGHT][i].c;
				assert(d_i == 0);
				corr_trail += (c_i + d_i);
			}

			if (corr_trail >= g_Bn)
			{
				/* We have a winner ! */
				return true;
			}

			marx_remove_mask_from_trail(g_T, iround);
		} else {
			const uint32_t word_size = (WORD_SIZE - ibit); /* word size of the partial masks */
			const uint32_t mask_part = (~((uint32_t)0) >> (32 - word_size)); /* partial mask of word_size MS bits = 0000000FFF */
			const uint32_t mask_msb = (~((uint32_t)0) << ibit) & MASK; /* masks word_size MS bits = FFF000000 */

			int corr_trail = 0;
			/* c[1]d[1] c[2]d[2] ... c[iround - 1]d[iround - 1] => first
			   (iround - 1) rounds: c[0] = d[0] = 1.0 is the input mask corr
			*/
			for (uint32_t i = 1; i < iround; i++)
			{
				int c_i = g_T[LEFT][i].c;
				int d_i = g_T[RIGHT][i].c;
				assert(d_i == 0);
				corr_trail += (c_i + d_i);
			}

			for (uint32_t w = 0; w < 4; w++)
			{
				const uint32_t beta_i = (w >> 0) & 1;
				const uint32_t gamma_i = (w >> 1) & 1;

				const uint32_t alpha_part = (alpha_in & mask_msb);
				const uint32_t beta_part = beta_in | (beta_i << ibit);
				const uint32_t gamma_part = gamma_in | (gamma_i << ibit);

				/* Extract the word_size MS bits of alpha_part, beta_part,
				   gamma_part:(MSB alpha_in | ibit | 000000..000 LSB)
				*/
				const uint32_t alpha_part_msb = (alpha_part >> ibit) & mask_part;
				const uint32_t beta_part_msb = (beta_part >> ibit) & mask_part;
				const uint32_t gamma_part_msb = (gamma_part >> ibit) & mask_part;

				const int c_part = xlc_add(alpha_part_msb, beta_part_msb, gamma_part_msb, word_size); /* partial prob. */

				/* c[1]d[1] c[2]d[2] ... c[iround - 1]d[iround - 1] (c_part = c[iround]) => first iround rounds */
				const int c_est = corr_trail + c_part;

				if (c_est >= g_Bn)
				{
				  //					for (uint32_t v = 0; v < 4; v++)
					{
						const int cd_est = c_est;

						if (cd_est >= g_Bn)
						{
							bool ret = speckey_best_linear_search_i(
								iround,
								ibit - 1, 
								alpha_in,
								beta_part,
								gamma_part,
								iGamma_R_in
							);
							if (ret == true)
							{
								return true;
							}
						}
					}
				}
			}
		}
	}
	return false;
}

/* --- */

void test_xlc_add_monotonous_decrease()
{
  WORD_T word_size = WORD_SIZE;

  // fse16 slides example
#if 1
  // 7F E7 2A
  WORD_T i = 0x7F;
  WORD_T j = 0xE7;
  WORD_T k = 0x2A;
#endif
#if 0
  WORD_T i = xrandom() & MASK;
  WORD_T j = xrandom() & MASK;
  WORD_T k = xrandom() & MASK;
#endif

  printf("[%s:%d] --- %X %X %X ---\n", __FILE__, __LINE__, i, j, k);

  double corr_prev = 0.0;//xlc_add(i & 1, j & 1, k & 1, 1);

  for(WORD_T w = 1; w <= word_size; w++) {

	 WORD_MAX_T mask = (~0ULL >> (64 - w)); // full mask (word_size bits)

	 WORD_T ma = i & mask;
	 WORD_T mb = j & mask;
	 WORD_T mc = k & mask;

	 double corr = xlc_add(ma, mb, mc, w);// * xlc_add_sign(ma, mb, mc, w);
	 printf("[%s:%d] xlc(%2d: %X %X -> %X) = corr %f 2^%4.2f corr_prev %f 2^%4.2f\n", __FILE__, __LINE__, 
			  w, ma, mb, mc, corr, log2(corr), corr_prev, log2(corr_prev));

	 if(w > 1) {
		assert(corr <= corr_prev);
	 }

	 corr_prev = corr;
  }
  printf("[%s:%d] Test OK!\n", __FILE__, __LINE__);
}

/* --- */

#if 0
typedef struct {
  WORD_T state[MBOXES];
  double prob;
} MarxWtrailState_t;

typedef struct {
  WORD_T state_from[MBOXES];
  WORD_T state_to[MBOXES];
} MarxWtrailStateLink_t;

  //  boost::unordered_map<MarxWtrailState_t, uint32_t, marx_wtrail_state_hash, marx_wtrail_state_equal_to> linearhull_hash_map;
#endif


/* --- */
/* 
#if 1 // maximum
  bool L[MBOXES][MBOXES] = {
	 {0, 0, 1, 0},
	 {0, 0, 0, 1},
	 {1, 1, 0, 0},
	 {0, 1, 1, 0}
  };
#endif

[./tests/arx-widetrail-search-tests.cc:972] Maximum for this linear layer is: 2^-137
( 1  7  9 14 19 21 26 ) (15 17 23 25 ) (16 ) (27 ) ( 0 )
len[0] 28 = (1 15) + 13 = (1 -46.00) + -39.00 = -85
len[1] 16 = (1 15) + 1 = (1 -46.00) + 0.00 = -46
len[2] 4 = (0 15) + 4 = (0 -46.00) + -2.00 = -2
len[3] 4 = (0 15) + 4 = (0 -46.00) + -2.00 = -2
len[4] 4 = (0 15) + 4 = (0 -46.00) + -2.00 = -2

 */

/* 

#if 0 // original
  bool L[MBOXES][MBOXES] = {
	 {0, 1, 0, 0},
	 {0, 0, 1, 1},
	 {1, 1, 0, 0},
	 {0, 0, 1, 0}
  };
#endif

[./tests/arx-widetrail-search-tests.cc:972] Maximum for this linear layer is: 2^-123
( 0  6 11 13 16 ) ( 9 14 19 21 24 ) (12 ) (17 20 )
len[0] 20 = (1 15) + 5 = (1 -46.00) + -5.00 = -51
len[1] 20 = (1 15) + 5 = (1 -46.00) + -5.00 = -51
len[2] 4 = (0 15) + 4 = (0 -46.00) + -2.00 = -2
len[3] 8 = (0 15) + 8 = (0 -46.00) + -19.00 = -19

 */

/* --- */
void test_marx_wtrail_all_trunc_trails_all_partitions()
{

  int min_prob_layer_log2 = 0.0;
  bool min_L[MBOXES][MBOXES] = {
	 {0, 0, 0, 0},
	 {0, 0, 0, 0},
	 {0, 0, 0, 0},
	 {0, 0, 0, 0}
  };

  // Generate all linear layers
  uint32_t nlin_layers = (1U << (MBOXES * MBOXES));
  uint32_t nlin_layers_from = (nlin_layers / 2);
  uint32_t nlin_layers_to = nlin_layers_from + 1000;
  //  for(uint32_t l = 1; l < nlin_layers; l++) {
  for(uint32_t l = nlin_layers_from; l < nlin_layers_to; l++) {

	 bool L[MBOXES][MBOXES] = {
		{0, 0, 0, 0},
		{0, 0, 0, 0},
		{0, 0, 0, 0},
		{0, 0, 0, 0}
	 };

#if 0 // DEBUG
	 print_binary(l, 16);
	 printf(" %X", l);
	 printf("\n");
#endif // #if 0 // DEBUG
	 for(uint32_t row = 0; row < MBOXES; row++) {
		for(uint32_t col = 0; col < MBOXES; col++) {

		  uint32_t ipos = (row * MBOXES) + col;
		  assert(ipos < (MBOXES * MBOXES));
		  bool b_active = (l >> ipos) & 1;
		  L[row][col] = b_active;
		}
	 }
	 //	 if((l % 10000) == 0) {
	 if((((l - nlin_layers_from)) % 100) == 0) {
		//		printf("[%s:%d] Linear layer #%3d / %7d:\n", __FILE__, __LINE__, l, nlin_layers);
		uint32_t all = nlin_layers_to - nlin_layers_from;
		printf("[%s:%d] Linear layer #%3d / %7d:\n", __FILE__, __LINE__, (l - nlin_layers_from), all);
	 }
#if 0 // DEBUG
	 marx_wtrail_print_linear_layer(L);
#endif // #if 0 // DEBUG

	 uint32_t iround = 0;
	 std::array<std::array<bool, MBOXES>, NROUNDS+1> trunc_trail;
	 std::vector<std::array<std::array<bool, MBOXES>, NROUNDS+1>> all_trunc_trails;
	 marx_wtrail_init_trail(&trunc_trail);
	 for(uint32_t iword = 0; iword < ALL_STATES; iword++) {
		std::array<bool, MBOXES> istate {{0, 0, 0, 0}};
		marx_wtrail_word_to_state(&istate, iword);
		marx_wtrail_add_to_trail(istate, &trunc_trail, iround);
		marx_wtrail_all_trunc_trails(iround + 1, istate, L, trunc_trail, &all_trunc_trails);
	 }
#if 0 // DEBUG
	 marx_wtrail_print_all_trails(all_trunc_trails);
	 printf("[%s:%d] Found [%5d] truncated trails.\n", __FILE__, __LINE__, (uint32_t)all_trunc_trails.size());
	 printf("[%s:%d] Start partitioning...\n", __FILE__, __LINE__);
#endif // #if 0 // DEBUG
	 uint32_t i = 0;
	 int max_prob_trunc_log2 = LOG0; // the maximum among all truncated trails
	 std::vector<std::array<std::array<bool, MBOXES>, NROUNDS+1>>::const_iterator vec_iter;
	 for(vec_iter = all_trunc_trails.begin(); vec_iter != all_trunc_trails.end(); vec_iter++,i++) {
		std::array<std::array<bool, MBOXES>, NROUNDS+1> trunc_trail = *vec_iter;
		if(i == 0)
		  continue; // skip the all-zero trail
#if 0 // DEBUG
		uint32_t N = (uint32_t)all_trunc_trails.size();
		printf("Partition trail #%2d / %5d:\n", i, N);
		marx_wtrail_print_trail(trunc_trail);
#endif // #if 0 // DEBUG

		std::array<std::vector<int>, NNODES> tree;
		std::vector<std::vector<int>> path_partition;
		std::vector<std::vector<std::vector<int>>> all_partitions;
		std::vector<int> childless_nodes;
		marx_wtrail_trail_to_tree(trunc_trail, L, &tree);
#if 0 // DEBUG
		marx_wtrail_print_tree(tree);
#endif // #if 0 // DEBUG

#if 0 // DEBUG
		printf("[%s:%d] Sweep childless:\n", __FILE__, __LINE__);
#endif // #if 0 // DEBUG
		marx_wtrail_tree_sweep_childless_nodes(&tree, &childless_nodes);
#if 0 // DEBUG
		marx_wtrail_print_tree(tree);
#endif // #if 0 // DEBUG

		marx_wtrail_tree_path_partitions(tree, path_partition, &all_partitions);
#if 0 // DEBUG
		printf("[%s:%d] Adding [%2d] childless nodes: ", __FILE__, __LINE__, (uint32_t)childless_nodes.size());
		marx_wtrail_print_path(childless_nodes);
		printf("\n");
#endif // #if 0 // DEBUG
		marx_wtrail_all_partitions_add_childless(&all_partitions, childless_nodes);

#if 0 // DEBUG
		printf("[%s:%d] There are [%3d] path partitions:\n", __FILE__, __LINE__, (uint32_t)all_partitions.size());
		marx_wtrail_print_all_partitions(all_partitions);
		printf("[%s:%d] Probabilities of partitions:\n", __FILE__, __LINE__);
#endif // #if 0 // DEBUG
		int max_prob_path_log2 = marx_wtrail_max_partition_prob_log2(all_partitions);
#if 0 // DEBUG
		printf("[%s:%d] Max prob: 2^%d\n", __FILE__, __LINE__, max_prob_path_log2);
#endif // #if 0 // DEBUG
		if(max_prob_path_log2 > max_prob_trunc_log2) {
#if 0 // DEBUG
		  printf("Update max: 2^%d -> 2^%d\n", max_prob_trunc_log2, max_prob_path_log2);
#endif // #if 0 // DEBUG
		  max_prob_trunc_log2 = max_prob_path_log2;
		}
	 }
#if 0 // DEBUG
	 printf("[%s:%d] Maximum for this linear layer is: 2^%d\n", __FILE__, __LINE__, max_prob_trunc_log2);
#endif // #if 0 // DEBUG
	 if(max_prob_trunc_log2 <= min_prob_layer_log2) {
		printf("Update min: 2^%d -> 2^%d\n", min_prob_layer_log2, max_prob_trunc_log2);
		min_prob_layer_log2 = max_prob_trunc_log2;
		for(uint32_t row = 0; row < MBOXES; row++) { // copy min layer
		  for(uint32_t col = 0; col < MBOXES; col++) {
			 min_L[row][col] = L[row][col];
		  }
		}
		printf("[%s:%d] New min_L with prob 2^%d:\n", __FILE__, __LINE__, min_prob_layer_log2);
		marx_wtrail_print_linear_layer(min_L);
	 }
  }
  printf("[%s:%d] Minimum probability is 2^%d\n", __FILE__, __LINE__, min_prob_layer_log2);
  printf("[%s:%d] The minimum linear layer is:\n", __FILE__, __LINE__);
  marx_wtrail_print_linear_layer(min_L);
}

/* --- */

/*

http://www.macapp.net/pmwiki/pmwiki.php?n=Main.InvertMatrix

#include <gsl/gsl_matrix.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_cblas.h>

int main (void)
{
// Define the dimension n of the matrix
// and the signum s (for LU decomposition)
int n = 2;
int s;

// Define all the used matrices
gsl_matrix * m = gsl_matrix_alloc (n, n);
gsl_matrix * inverse = gsl_matrix_alloc (n, n);
gsl_permutation * perm = gsl_permutation_alloc (n);

// Fill the matrix m
//
//
//
//

// Make LU decomposition of matrix m
gsl_linalg_LU_decomp (m, perm, &s);

// Invert the matrix m
gsl_linalg_LU_invert (m, perm, inverse);
}
*/


/* --- */

// https://www.gnu.org/software/gsl/manual/html_node/Permutation-Examples.html
void test_permutation_matrices()
{
  std::vector<gsl_matrix> P_vec;
  marx_wtrail_build_permutation_matrices(MBOXES, &P_vec);
  std::vector<gsl_matrix>::iterator vec_iter;
  int i = 0;
  for(vec_iter = P_vec.begin(); vec_iter != P_vec.end(); vec_iter++, i++) {
	 gsl_matrix P = *vec_iter;
	 printf("P#%2d\n", i);
	 marx_wtrail_print_gsl_matrix_int(P, MBOXES, MBOXES);
  }
  for(vec_iter = P_vec.begin(); vec_iter != P_vec.end(); vec_iter++) {
	 gsl_matrix P = *vec_iter;
	 gsl_matrix_free(&P);
  }
}


/* --- */
															std::vector<std::vector<std::vector<int>>>* max_partitions)

/* --- */
	 //	 double prob_path = std::pow(g_marx_best_prob[MARXBOX_LEN - 1], quo) * g_marx_best_prob[rem - 1]; // p1^q + p2

/* --- */

/*
 * Convert a truncated trail to a tree.
 */
void marx_wtrail_trail_to_tree(const std::array<std::array<bool, MBOXES>, NROUNDS+1> trunc_trail, 
										 const bool L[MBOXES][MBOXES],
										 std::array<std::vector<int>, NNODES>* tree)
{
  for(uint32_t r = 1; r < (NROUNDS + 1); r++) {
	 for(uint32_t i = 0; i < MBOXES; i++) {
		if(trunc_trail[r][i] == 1) {
#if 0 // DEBUG
		  marx_wtrail_print_state(trunc_trail[r-1]);
		  printf("\n");
#endif // #if 1 // DEBUG
		  uint32_t nactive_terms = marx_wtrail_row_by_state_active_terms(i, L, trunc_trail[r-1]);
		  /*
			* If a child depends on exactly one parent => it is part of a
			* LongMarxBox, so add a link parent->child
			*/
		  if(nactive_terms == 1) { // find the active term (the parent)
			 uint32_t j = 0;
			 // printf("L[%d][%d] & TT[%d][%d] = %d & %d | j = %d\n", i, j, r, j, L[i][j], trunc_trail[r-1][j], j);
			 while((L[i][j] & trunc_trail[r-1][j]) == 0) {
				j++;
				// printf("L[%d][%d] & TT[%d][%d] = %d & %d | j = %d\n", i, j, r, j, L[i][j], trunc_trail[r-1][j], j);
			 }
			 // printf("[%s:%d] R %d i %d nonzero j = %d\n", __FILE__, __LINE__, r, i, j);
			 int parent = marx_wtrail_get_tree_node_index(r-1, j);
			 int child = marx_wtrail_get_tree_node_index(r, i);
			 (*tree)[parent].push_back(child); // add link (parent -> child)
		  }
		  /*
			* If a child depends on more than one parent AND if the child
			* has no children (i.e. is a childless parent) => this can
			* not be part of a LongMarxBox, so add a self-link
			* child->child
			*/
		  if(nactive_terms > 1) && () { 
			 int child = marx_wtrail_get_tree_node_index(r, i);
			 (*tree)[child].push_back(child); // add link (child -> child)
			 printf("[%s:%d] Add self-link %2d -> %2d\n", __FILE__, __LINE__, child, child);
		  }
		}
	 }
  }
  // parse the input state for childless input nodes and make them
  // their own child
  for(uint32_t i = 0; i < MBOXES; i++) {
	 int parent = marx_wtrail_get_tree_node_index(0, i);
	 if((trunc_trail[0][i] == 1) && ((*tree)[parent].empty())) {
		(*tree)[parent].push_back(parent); // add self-link (parent -> parent)
	 }
  }
}

/* --- */
#if 0
  for(std::vector<std::vector<std::vector<int>>>::const_iterator vec_iter = all_partitions->begin(); 
		vec_iter != all_partitions->end(); vec_iter++,i++) {
	 std::vector<std::vector<int>> path_partition = *vec_iter;
	 path_partition.push_back(childless_nodes);
  }
#endif

/* --- */

/*
 * Remove a given path from a tree.
 */
void marx_wtrail_tree_copy(const std::array<std::vector<int>, NNODES> from_tree,
									std::array<std::vector<int>, NNODES>* to_tree)
{
  *to_tree = from_tree;
}

/* --- */

/*
 * Add a given path to a tree.
 */
void marx_wtrail_tree_add_path(const std::vector<int> path,
										 std::array<std::vector<int>, NNODES>* tree)
{
  assert(!path.empty());
  int parent = 0;
  for(std::vector<int>::const_iterator vec_iter = path.begin(); 
		vec_iter != path.end(); vec_iter++) {
	 if(vec_iter == path.begin()) {
		parent = *vec_iter;
		continue;
	 }
	 assert(parent < NNODES);
	 int child = *vec_iter;
#if 1 // DEBUG
	 std::vector<int>::iterator child_iter = std::find((*tree)[parent].begin(), (*tree)[parent].end(), child);
	 assert((child_iter == (*tree)[parent].end()));
#endif // #if 1 // DEBUG
	 (*tree)[parent].push_back(child);
	 parent = child;
  }
}
  printf("[%s:%d] Add path: \n", __FILE__, __LINE__);
  marx_wtrail_print_path(all_paths[0]);
  marx_wtrail_tree_add_path(all_paths[0], &tree);
  printf("[%s:%d] New tree:\n", __FILE__, __LINE__);
  marx_wtrail_print_tree(tree);


/* --- */
#if 0
	 if(vec_iter == path.begin()) {
		parent = *vec_iter;
		continue;
	 }
	 assert(parent < NNODES);
	 int child = *vec_iter;
	 std::vector<int>::iterator child_index = std::find((*tree)[parent].begin(), (*tree)[parent].end(), child);
	 assert((child_index != (*tree)[parent].end()));
	 if(child_index != (*tree)[parent].end()) { // delete child
		(*tree)[parent].erase(child_index);
	 }
	 parent = child;
#endif

/* --- */
  int parent = 0; 
  for(vec_iter = path.begin(); vec_iter != path.end(); vec_iter++) {
	 if(vec_iter == path.begin()) {
	 }
	 assert(parent < NNODES);
	 int child = *vec_iter;
	 //	 auto i = std::find(tree[parent].begin(), tree[parent].end(), child);
	 //	 if(i != v.end()) { // delete child
	 //		tree[parent].erase(i);
	 //	 }
	 parent = child;
  }

/* --- */

  std::vector<int>::const_iterator vec_iter = path.begin();
  int parent = *vec_iter; 
  for(vec_iter = std::advance(path.begin(), 1); vec_iter != path.end(); vec_iter++) {
	 assert(parent < NNODES);
	 int child = *vec_iter;
	 //	 auto i = std::find(tree[parent].begin(), tree[parent].end(), child);
	 //	 if(i != v.end()) { // delete child
	 //		tree[parent].erase(i);
	 //	 }
	 parent = child;
  }


/* --- */

/*
 * Global counter of the number of (long) MARX-boxes
 */
//uint32_t g_nlbox[NROUNDS+1] = {0};

void marx_wtrail_lbox_count_print(uint32_t nlbox[NROUNDS+2])
{
  for(uint32_t i = 0; i < (NROUNDS + 2); i++) {
	 if(nlbox[i]) {
		printf("#Lboxes of length %2d is: %2d\n", i, nlbox[i]);
	 }
  }
}


/* --- */

/*
 * Counts the number of (long) MARX-boxes in a given trail
 */
void marx_wtrail_lbox_count(uint32_t nlbox[NROUNDS+2], const bool trunc_trail[NROUNDS+1][MBOXES], const bool L[MBOXES][MBOXES])
{
  printf("[%s:%d] Before\n", __FILE__, __LINE__);
  marx_wtrail_lbox_count_print(nlbox);
  uint32_t cnt[MBOXES] = {0};
  for(uint32_t i = 0; i < MBOXES; i++) {
	 cnt[i] = trunc_trail[0][i];
	 if(cnt[i]) {
		nlbox[cnt[i]]++;
	 }
	 printf("Increase nlbox[%d] to %d\n", cnt[i], nlbox[cnt[i]]);
  }
  printf("[%s:%d]  After\n", __FILE__, __LINE__);
  marx_wtrail_lbox_count_print(nlbox);

  for(uint32_t r = 0; r < NROUNDS; r++) {
	 printf("\n[%s:%d] Round r =  %2d\n", __FILE__, __LINE__, r);
	 marx_wtrail_lbox_count_print(nlbox);
	 for(uint32_t i = 0; i < MBOXES; i++) {
		uint32_t nactive_terms = marx_wtrail_row_by_state_active_terms(i, L, trunc_trail[r]);
		printf("i %d: ", i);
		marx_wtrail_print_state(L[i]);
		printf(" x ");
		marx_wtrail_print_state(trunc_trail[r]);
		printf(" = ");
		bool res = marx_wtrail_row_by_state_mult(i, L, trunc_trail[r]);
		printf("%d \n", res);
		if(nactive_terms == 1) {
		  uint32_t j = 0;
		  while((L[i][j] & trunc_trail[r][j]) == 0) {
			 printf("L[%d][%d] & TT[%d][%d] = %d & %d | j = %d\n", i, j, r, j, L[i][j], trunc_trail[r][j], j);
			 j++;
		  }
		  printf("[%s:%d] R %d i %d nonzero j = %d\n", __FILE__, __LINE__, r, i, j);
		  //		  assert(nlbox[cnt[j]] > 0);
		  printf("Decrease nlbox[%d] %d to %d\n", cnt[j], nlbox[cnt[j]], nlbox[cnt[j]] - 1);
		  if(nlbox[cnt[j]]) {
			 nlbox[cnt[j]]--;
			 cnt[i] = cnt[j] + 1;
			 nlbox[cnt[i]]++;
		  } else {
			 cnt[i] = 1;
			 nlbox[cnt[i]]++;
		  }
		  printf("Increase nlbox[%d] to %d\n", cnt[i], nlbox[cnt[i]]);
		}
	 }
  }
}


/* --- */

void test_marx_wtrail_lbox_count()
{
  assert(NROUNDS <= 5);

  const bool trunc_trail[NROUNDS+1][MBOXES] = {
	 {0, 0, 1, 1}, // s[0] s[1] s[2] s[3]
	 {0, 0, 0, 1},
	 {0, 1, 0, 0},
	 {1, 0, 1, 0},
	 {0, 1, 1, 1},
  };
  bool L[MBOXES][MBOXES] = {
	 {0, 1, 0, 0},
	 {0, 0, 1, 1},
	 {1, 1, 0, 0},
	 {0, 0, 1, 0}
  };
  uint32_t nlbox[NROUNDS+2] = {0};
  for(uint32_t i = 0; i < (NROUNDS + 2); i++) {
	 nlbox[i] = 0;
  }
  marx_wtrail_print_trail(trunc_trail);
  marx_wtrail_lbox_count(nlbox, trunc_trail, L);
  marx_wtrail_lbox_count_print(nlbox);
}


/* --- */
		  uint32_t word = 0;
		  for(uint32_t k = 0; k < MBOXES; k++) {
			 bool iterm = L[i][k];
			 word |= (iterm << i);
		  }
		  uint32_t hw = hamming_weight(word);
		  if(hw > 1)
			 continue;


/* --- */

/*
 * Counts the number of (long) MARX-boxes in a given trail
 */
void marx_wtrail_lbox_count(uint32_t nlbox[NROUNDS+1], const bool trunc_trail[NROUNDS+1][MBOXES], const bool L[MBOXES][MBOXES])
{
  uint32_t cnt[MBOXES] = {0};
  for(uint32_t i = 0; i < MBOXES; i++) {
	 cnt[i] = trunc_trail[0][i];
	 nlbox[cnt[i]]++;
  }
  //  for(uint32_t r = 0; r < (NROUNDS + 1); r++) {
  for(uint32_t r = 0; r < NROUNDS; r++) {
	 for(uint32_t i = 0; i < MBOXES; i++) {
		uint32_t nactive_terms = marx_wtrail_row_by_state_active_terms(i, L, trunc_trail[r]);
		if(nactive_terms > 1) {
		  cnt[i] = trunc_trail[r+1][i];
		}
		if(nactive_terms == 1) {
		  // find the non-zero term
		  uint32_t j = 0;
		  while((L[i][j] & trunc_trail[r][j]) == 0) {
			 j++;
		  }
		  marx_wtrail_print_state(trunc_trail[r]);
		  printf("\n");
		  printf("[%s:%d] R %d i %d nonzero j = %d\n", __FILE__, __LINE__, r, i, j);
		  assert(nlbox[cnt[j]] > 0);
		  nlbox[cnt[j]]--;
		  cnt[i] = cnt[j] + 1;
		  nlbox[cnt[i]]++;
		}
		if(nactive_terms == 0) {
		  cnt[i] = 0;
		}
	 }
  }
}


/* --- */

/*
 * Counts the number of (long) MARX-boxes in a given trail
 */
void marx_wtrail_lbox_count(uint32_t nlbox[NROUNDS+1], const bool trunc_trail[NROUNDS+1][MBOXES], const bool L[MBOXES][MBOXES])
{
  uint32_t cnt[MBOXES] = {0};
  for(uint32_t i = 0; i < MBOXES; i++) {
	 cnt[i] = trunc_trail[0][i];
	 printf("%d ", cnt[i]);
  }
  printf("\n");
  marx_wtrail_print_state(trunc_trail[0]);
  printf("\n");
  for(uint32_t r = 1; r < (NROUNDS + 1); r++) {
	 marx_wtrail_print_state(trunc_trail[r]);
	 printf("\n");
	 for(uint32_t i = 0; i < MBOXES; i++) {
		printf("R %2d i %2d\n", r, i);
		uint32_t nactive_terms = marx_wtrail_row_by_state_active_terms(i, L, trunc_trail[r]);
		if(nactive_terms > 1) { // the i-th Mbox can be 0/1 => end of an Lbox
		  nlbox[cnt[i]]++; // count an Lbox of size cnt[i]
		  printf("[%s:%d] nlbox[%2d] %2d\n", __FILE__, __LINE__, cnt[i], nlbox[cnt[i]]);
		  cnt[i] = 0; // reset counter
		} else {
		  if(nactive_terms == 1) { // the i-th Mbox is 1 => increase the length of the Lbox
			 cnt[i]++;
			 printf("[%s:%d] (nactive_terms == 1) cnt[%2d] %2d\n", __FILE__, __LINE__, cnt[i], cnt[i]);
		  }
		}
		printf("cnt[%2d] %2d\n", i, cnt[i]);
	 }
  }
}

/* --- */

/*
 * \see speck_verify_xor_differential
 */
uint32_t speck_verify_xor_differential_expanded_key(uint32_t nrounds, uint32_t npairs, 
																	 WORD_T key[SPECK_MAX_NROUNDS], differential_t trail[NROUNDS],
																	 const WORD_T dx_init, const WORD_T dy_init,
																	 uint32_t right_rot_const, uint32_t left_rot_const)
{
  if(WORD_SIZE == 16) {
	 assert(right_rot_const == SPECK_RIGHT_ROT_CONST_16BITS); 
	 assert(left_rot_const == SPECK_LEFT_ROT_CONST_16BITS);
  } else {
	 assert(right_rot_const == SPECK_RIGHT_ROT_CONST); 
	 assert(left_rot_const == SPECK_LEFT_ROT_CONST);
  }

  uint32_t warn_cnt = 0;
  printf("[%s:%d] Verify P of differentials (2^%f CPs)...\n", __FILE__, __LINE__, log2(npairs));

  WORD_T dx_in = dx_init;
  WORD_T dy_in = dy_init;

  printf("Input differences: %16llX %16llX\n\n", (WORD_MAX_T)dx_in, (WORD_MAX_T)dy_in);

  double p_the = 1.0;
  for(uint32_t i = 0; i < nrounds; i++) {

	 uint32_t enc_nrounds = i+1;

	 uint32_t cnt = 0;

	 WORD_T dx_out = trail[i].dx;
	 WORD_T dy_out = trail[i].dy;
	 p_the *= trail[i].p;

	 for(uint64_t j = 0; j < npairs; j++) {
		WORD_T x1 = xrandom() & MASK;
		WORD_T x2 = XOR(x1, dx_in);

		WORD_T y1 = xrandom() & MASK;
		WORD_T y2 = XOR(y1, dy_in);

		speck_encrypt(key, enc_nrounds, right_rot_const, left_rot_const, &x1, &y1);
		speck_encrypt(key, enc_nrounds, right_rot_const, left_rot_const, &x2, &y2);

		WORD_T dx_ctext = XOR(x1, x2);
		WORD_T dy_ctext = XOR(y1, y2);

		if((dx_ctext == dx_out) && (dy_ctext == dy_out)) {
		  cnt++;
		}
	 }
	 double p_exp = (double)cnt / (double)npairs;;

#if (WORD_SIZE <= 32)									  // DEBUG
	 printf("R#%2d Output differences: %8X %8X\n", i, dx_out, dy_out);
	 printf("THE %2d: %f (2^%f) %8X -> %8X\n", i+1,   p_the, log2(p_the), trail[i].dx, trail[i].dy);
	 printf("EXP %2d: %f (2^%f) %8X -> %8X\n\n", i+1, p_exp, log2(p_exp), trail[i].dx, trail[i].dy);
#endif
#if (WORD_SIZE > 32)									  // DEBUG
	 printf("R#%2d Output differences: %16llX %16llX\n", i, (WORD_MAX_T)dx_out, (WORD_MAX_T)dy_out);
	 printf("THE %2d: %f (2^%f) %16llX -> %16llX\n", i+1,   p_the, log2(p_the), (WORD_MAX_T)trail[i].dx, (WORD_MAX_T)trail[i].dy);
	 printf("EXP %2d: %f (2^%f) %16llX -> %16llX\n\n", i+1, p_exp, log2(p_exp), (WORD_MAX_T)trail[i].dx, (WORD_MAX_T)trail[i].dy);
#endif

	 if((p_exp == 0.0) && (trail[i].p != 0.0)) {
		warn_cnt++;
	 }
  }
  printf("OK\n");
  return warn_cnt;
}

/* --- */

#if (WORD_SIZE == 24)
#if 0 // top
  assert(NROUNDS == 11);
  WORD_T delta_L = 0x001202;
  WORD_T delta_R = 0x020002;
  differential_t diff_arr[SPECK_TRAIL_LEN] = {
	 //	 {0x001202, 0x020002, 0, (1.0 / (double)(1ULL <<  0))}, // 00
	 {0x000010, 0x100000, 0, (1.0 / (double)(1ULL <<  3))}, // 01
	 {0x000000, 0x800000, 0, (1.0 / (double)(1ULL <<  1))}, // 02
	 {0x800000, 0x800004, 0, (1.0 / (double)(1ULL <<  0))}, // 03
	 {0x808004, 0x808020, 0, (1.0 / (double)(1ULL <<  2))}, // 04
	 {0x8400A0, 0x8001A4, 0, (1.0 / (double)(1ULL <<  4))}, // 05
	 {0x608DA4, 0x608080, 0, (1.0 / (double)(1ULL <<  9))}, // 06
	 {0x042003, 0x002400, 0, (1.0 / (double)(1ULL << 11))}, // 07
	 {0x012020, 0x000020, 0, (1.0 / (double)(1ULL <<  5))}, // 08
	 {0x200100, 0x200000, 0, (1.0 / (double)(1ULL <<  3))}, // 09
	 {0x202001, 0x202000, 0, (1.0 / (double)(1ULL <<  3))}, // 10
	 {0x210020, 0x200021, 0, (1.0 / (double)(1ULL <<  4))}, // 11
  };
#endif
#if 0 // middle
  assert(NROUNDS == 6);
  WORD_T delta_L = 0x8400A0;
  WORD_T delta_R = 0x8001A4;
  differential_t diff_arr[SPECK_TRAIL_LEN] = {
	 //	 {0x001202, 0x020002, 0, (1.0 / (double)(1ULL <<  0))}, // 00
	 //	 {0x000010, 0x100000, 0, (1.0 / (double)(1ULL <<  3))}, // 01
	 //	 {0x000000, 0x800000, 0, (1.0 / (double)(1ULL <<  1))}, // 02
	 //	 {0x800000, 0x800004, 0, (1.0 / (double)(1ULL <<  0))}, // 03
	 //	 {0x808004, 0x808020, 0, (1.0 / (double)(1ULL <<  2))}, // 04
	 //	 {0x8400A0, 0x8001A4, 0, (1.0 / (double)(1ULL <<  4))}, // 05
	 {0x608DA4, 0x608080, 0, (1.0 / (double)(1ULL <<  9))}, // 06
	 {0x042003, 0x002400, 0, (1.0 / (double)(1ULL << 11))}, // 07
	 {0x012020, 0x000020, 0, (1.0 / (double)(1ULL <<  5))}, // 08
	 {0x200100, 0x200000, 0, (1.0 / (double)(1ULL <<  3))}, // 09
	 {0x202001, 0x202000, 0, (1.0 / (double)(1ULL <<  3))}, // 10
	 {0x210020, 0x200021, 0, (1.0 / (double)(1ULL <<  4))}, // 11
  };
#endif
#if 1 // middle
  assert(NROUNDS == 5);
  WORD_T delta_L = 0x608DA4;
  WORD_T delta_R = 0x608080;
  differential_t diff_arr[SPECK_TRAIL_LEN] = {
	 //	 {0x001202, 0x020002, 0, (1.0 / (double)(1ULL <<  0))}, // 00
	 //	 {0x000010, 0x100000, 0, (1.0 / (double)(1ULL <<  3))}, // 01
	 //	 {0x000000, 0x800000, 0, (1.0 / (double)(1ULL <<  1))}, // 02
	 //	 {0x800000, 0x800004, 0, (1.0 / (double)(1ULL <<  0))}, // 03
	 //	 {0x808004, 0x808020, 0, (1.0 / (double)(1ULL <<  2))}, // 04
	 //	 {0x8400A0, 0x8001A4, 0, (1.0 / (double)(1ULL <<  4))}, // 05
	 //	 {0x608DA4, 0x608080, 0, (1.0 / (double)(1ULL <<  9))}, // 06
	 {0x042003, 0x002400, 0, (1.0 / (double)(1ULL << 11))}, // 07
	 {0x012020, 0x000020, 0, (1.0 / (double)(1ULL <<  5))}, // 08
	 {0x200100, 0x200000, 0, (1.0 / (double)(1ULL <<  3))}, // 09
	 {0x202001, 0x202000, 0, (1.0 / (double)(1ULL <<  3))}, // 10
	 {0x210020, 0x200021, 0, (1.0 / (double)(1ULL <<  4))}, // 11
  };
#endif
#if 0 // bottom
  assert(NROUNDS == 4);
  WORD_T delta_L = 0x042003;
  WORD_T delta_R = 0x002400;
  differential_t diff_arr[SPECK_TRAIL_LEN] = {
	 //	 {0x001202, 0x020002, 0, (1.0 / (double)(1ULL <<  0))}, // 00
	 //	 {0x000010, 0x100000, 0, (1.0 / (double)(1ULL <<  3))}, // 01
	 //	 {0x000000, 0x800000, 0, (1.0 / (double)(1ULL <<  1))}, // 02
	 //	 {0x800000, 0x800004, 0, (1.0 / (double)(1ULL <<  0))}, // 03
	 //	 {0x808004, 0x808020, 0, (1.0 / (double)(1ULL <<  2))}, // 04
	 //	 {0x8400A0, 0x8001A4, 0, (1.0 / (double)(1ULL <<  4))}, // 05
	 //	 {0x608DA4, 0x608080, 0, (1.0 / (double)(1ULL <<  9))}, // 06
	 //	 {0x042003, 0x002400, 0, (1.0 / (double)(1ULL << 11))}, // 07
	 {0x012020, 0x000020, 0, (1.0 / (double)(1ULL <<  5))}, // 08
	 {0x200100, 0x200000, 0, (1.0 / (double)(1ULL <<  3))}, // 09
	 {0x202001, 0x202000, 0, (1.0 / (double)(1ULL <<  3))}, // 10
	 {0x210020, 0x200021, 0, (1.0 / (double)(1ULL <<  4))}, // 11
  };
#endif
#endif // #if (WORD_SIZE == 16)

/* --- */
	 fprintf(fp, "$%2d$ & \\texttt{%2X}\\texttt{%2X}\\texttt{%2X}\\texttt{%2X} & \\texttt{%2X}\\texttt{%2X}\\texttt{%2X}\\texttt{%2X} & $2^{%4.2f}$ & $2^{%2.0f}$ \\\\\n", i_hull,
				H.i_mask[0], H.i_mask[1], H.i_mask[2], H.i_mask[3],
				H.o_mask[0], H.o_mask[1], H.o_mask[2], H.o_mask[3], 
				log2(std::abs(H.corr)), log2(std::abs(corr_exper)));


/* --- */
	 printf("BEFORE: %2d: M_LR %8X %8X %4.2f %8X %8X %4.2f\n", i, 
			  (*T)[LEFT][i].dx, (*T)[LEFT][i].dy, (*T)[LEFT][i].p,
			  (*T)[RIGHT][i].dx, (*T)[RIGHT][i].dy, (*T)[RIGHT][i].p);

	 printf(" AFTER: %2d: M_LR %8X %8X %4.2f %8X %8X %4.2f\n", i, 
			  (*T)[LEFT][i].dx, (*T)[LEFT][i].dy, (*T)[LEFT][i].p,
			  (*T)[RIGHT][i].dx, (*T)[RIGHT][i].dy, (*T)[RIGHT][i].p);



/* --- */

#if 0
	 sscanf(line, "%X %X %X %X %X %X %d %d\n", 
			  &T[LEFT][i].dx, &T[LEFT][i].dy, &T[LEFT][i].dz, 
			  &T[RIGHT][i].dx, &T[RIGHT][i].dy, &T[RIGHT][i].dz, 
			  &T[LEFT][i].log2p, &T[RIGHT][i].log2p);
	 T[LEFT][i].p = std::pow(2, T[LEFT][i].log2p);
	 T[RIGHT][i].p = std::pow(2, T[RIGHT][i].log2p);

  uint32_t k[16] = {0};

  // extract the key
  read = getline(&line, &len, fp);
  sscanf(line, "%X %X %X %X %X %X %X %X %X %X %X %X %X %X %X %X\n", 
			&k[0], &k[1], &k[2], &k[3], &k[4], &k[5], &k[6], &k[7],  
			&k[8], &k[9], &k[10], &k[11], &k[12], &k[13], &k[14], &k[15]);

  printf("[%s:%d] key = \n", __FILE__, __LINE__);
  for(uint32_t i = 0; i < 16; i++) {
	 printf("%X ", k[i]);
  }
  printf("\n");

  while((read = getline(&line, &len, fp)) != -1) {
	 nline++;
	 uint32_t is_good = 0;
	 WORD_T x1_L, x1_R, x2_L, x2_R, y1_L, y1_R, y2_L, y2_R;
	 sscanf(line, "%X %X %X %X %X %X %X %X %d\n", 
			  &x1_L, &x1_R, &x2_L, &x2_R, &y1_L, &y1_R, &y2_L, &y2_R, &is_good);
	 printf("\nline #%5d: length %zu \n%s", nline, read, line);
	 printf("%X %X %X %X %X %X %X %X %d\n", 
			  x1_L, x1_R, x2_L, x2_R, y1_L, y1_R, y2_L, y2_R, is_good);
  }
#endif

/* --- */
  std::vector<differential_3d_t (*)[NROUNDS]> T_vec;

/**
 * Init the trail T
 */
void marx_init_diff_trail(differential_3d_t T[2][NROUNDS])
{
  for(uint32_t i = 0; i < 2; i++) {
	 for(uint32_t j = 0; j < NROUNDS; j++) {
		T[i][j].dx = 0; 
		T[i][j].dy = 0;
		T[i][j].dz = 0;
		T[i][j].p = 0.0;
		T[i][j].log2p = LOG0;;
	 }
  }
}


/**
 * Print the trail T
 */
void marx_print_diff_trail(differential_3d_t T[2][NROUNDS])
{
  printf("[%s:%d] %s() Print trail log2:\n", __FILE__, __LINE__, __FUNCTION__);
  double p = 1.0;
  for(uint32_t i = 0; i < NROUNDS; i++) {
	 printf("%2d: %8X %8X -> %8X %4.2f | ", i, T[LEFT][i].dx, T[LEFT][i].dy, T[LEFT][i].dz, log2(T[LEFT][i].p));
	 printf("%8X %8X -> %8X %4.2f\n", T[RIGHT][i].dx, T[RIGHT][i].dy, T[RIGHT][i].dz, log2(T[RIGHT][i].p));
	 p *= (T[LEFT][i].p * T[RIGHT][i].p);
#if 0 // DEBUG
	 double p_tmp = xdp_add_lm(T[LEFT][i].dx, T[LEFT][i].dy, T[LEFT][i].dz);
	 assert(p_tmp == T[LEFT][i].p);
	 p_tmp = xdp_add_lm(T[RIGHT][i].dx, T[RIGHT][i].dy, T[RIGHT][i].dz);
	 assert(p_tmp == T[RIGHT][i].p);
	 if(i > 0) {
		WORD_T dy_left = (LROT(T[RIGHT][i-1].dy, g_r2) ^ T[RIGHT][i].dx);
		WORD_T dy_right = (LROT(T[LEFT][i-1].dy, g_r1) ^ T[LEFT][i].dx);
		assert(dy_left == T[LEFT][i].dy);
		assert(dy_right == T[RIGHT][i].dy);
	 }
#endif // #if 1 // DEBUG
  }
  printf("p_trail %4.2f\n", log2(p));
}

void marx_print_diff_trail_element(const differential_3d_t T[2][NROUNDS], uint32_t i)
{
  assert(i < NROUNDS);
  printf("T[%2d]: %X %X %X %X %X %X %4.2f %4.2f\n", i,
			T[LEFT][i].dx, T[LEFT][i].dy, T[LEFT][i].dz, 
			T[RIGHT][i].dx, T[RIGHT][i].dy, T[RIGHT][i].dz, 
			T[LEFT][i].p, T[RIGHT][i].p);
}

/**
 * Print a vector of trails
 */
void marx_print_diff_trail_vector(std::vector<differential_3d_t (*)[NROUNDS]> T_vec)
{
  printf("[%s:%d] %s()\n", __FILE__, __LINE__, __FUNCTION__);
  std::vector<differential_3d_t (*)[NROUNDS]>::iterator vec_iter;
  for(vec_iter = T_vec.begin(); vec_iter != T_vec.end(); vec_iter++) {
	 marx_print_diff_trail(*vec_iter);
  }
}


/* --- */

		if(nline > 1) {
		  marx_print_diff_trail(T);
		}

/* --- */

#if (WORD_SIZE == 8) // linear r1, r2 = 2, 5
const double g_best_B[NROUNDS_MAX] = {
  (1.0 / (double)(1ULL <<  0)), // 1
  (1.0 / (double)(1ULL <<  0)), // 2
  (1.0 / (double)(1ULL <<  0)), // 3
  (1.0 / (double)(1ULL <<  1)), // 4
  (1.0 / (double)(1ULL <<  2)), // 5
  (1.0 / (double)(1ULL <<  3)), // 6
  (1.0 / (double)(1ULL <<  5)), // 7
  (1.0 / (double)(1ULL <<  7)), // 8
  (1.0 / (double)(1ULL <<  9)), // 9
  (1.0 / (double)(1ULL <<  11)), // 10
  (1.0 / (double)(1ULL <<  13)), // 11
  (1.0 / (double)(1ULL <<  15)), // 12
  (1.0 / (double)(1ULL <<  17)), // 13 
  (1.0 / (double)(1ULL <<  18)), // 14
  (1.0 / (double)(1ULL <<  19))  // 15
};
#else
#error("WORD_SIZE must be 8")
#endif


/* --- */

#if (WORD_SIZE == 8) // differntial r1, r2 = 2, 5
const double g_best_B[NROUNDS_MAX] = {
  (1.0 / (double)(1ULL <<  0)),  // 1
  (1.0 / (double)(1ULL <<  0)),  // 2
  (1.0 / (double)(1ULL <<  1)),  // 3
  (1.0 / (double)(1ULL <<  2)),  // 4
  (1.0 / (double)(1ULL <<  4)),  // 5
  (1.0 / (double)(1ULL <<  8)), // 6
  (1.0 / (double)(1ULL <<  12)), // 7
  (1.0 / (double)(1ULL <<  14)), // 8
  (1.0 / (double)(1ULL <<  17)), // 9
  (1.0 / (double)(1ULL <<  21)),  // 10
  (1.0 / (double)(1ULL <<  23)),  // 11
  (1.0 / (double)(1ULL <<  26)),  // 12
  (1.0 / (double)(1ULL <<  29)),  // 13
  (1.0 / (double)(1ULL <<  31)),  // 14
  (1.0 / (double)(1ULL <<  34)),  // 15
};
#else
#error("WORD_SIZE must be 8")
#endif

/* --- */

#if (WORD_SIZE == 8)
const double g_best_B[NROUNDS_MAX] = {
  (1.0 / (double)(1ULL <<  0)),  // 1
  (1.0 / (double)(1ULL <<  0)),  // 2
  (1.0 / (double)(1ULL <<  1)),  // 3
  (1.0 / (double)(1ULL <<  2)),  // 4
  (1.0 / (double)(1ULL <<  4)),  // 5
  (1.0 / (double)(1ULL <<  8)), // 6
  (1.0 / (double)(1ULL <<  12)), // 7
  (1.0 / (double)(1ULL <<  14)), // 8
  (1.0 / (double)(1ULL <<  17)), // 9
  (1.0 / (double)(1ULL <<  21)),  // 10
  (1.0 / (double)(1ULL <<  23)),  // 11
  (1.0 / (double)(1ULL <<  26)),  // 12
  (1.0 / (double)(1ULL <<  29)),  // 13
  (1.0 / (double)(1ULL <<  31)),  // 14
  (1.0 / (double)(1ULL <<  34)),  // 15
};
#elif (WORD_SIZE == 16)
const double g_best_B[NROUNDS_MAX] = {
  (1.0 / (double)(1ULL <<  0)),  // 1
  (1.0 / (double)(1ULL <<  0)),  // 2
  (1.0 / (double)(1ULL <<  1)),  // 3
  (1.0 / (double)(1ULL <<  2)),  // 4
  (1.0 / (double)(1ULL <<  4)),  // 5
  (1.0 / (double)(1ULL <<  8)), // 6
  (1.0 / (double)(1ULL <<  12)), // 7
  (1.0 / (double)(1ULL <<  14)), // 8
  (1.0 / (double)(1ULL <<  17)), // 9
  (1.0 / (double)(1ULL <<  22))  // 10
};
#else
#error("WORD_SIZE must be either 8 or 16")
#endif

/* --- */

/*
 * Rotation constants for the 64-bit version of Threefish (the original version).
 */ 
#if 0
uint32_t g_threefish64_rot_const_orig[8][2] = {
  {14, 16},
  {52, 57},
  {23, 40},
  { 5, 37},
  {25, 33},
  {46, 12},
  {58, 22},
  {32, 32},
};

uint32_t g_threefish64_rot_const[8][2] = {
  {3, 8},
  {6, 7},
  {7, 3},
  {1, 6},
  {2, 5},
  {5, 1},
  {8, 2},
  {4, 4},
};
#endif


/* ---- */

/* 
20151117

SPECK paper for FSE

20151115

latex share

cat main_serial_16_9_*|grep "RROT \| p_trail" > speck32-rconst.txt

-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 0):
# p_trail -21
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 1):
# p_trail -25
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 2):
# p_trail -24
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 3):
# p_trail -30
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 4):
# p_trail -27
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 5):
# p_trail -30
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 6):
# p_trail -25
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 7):
# p_trail -30
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 8):
# p_trail -24
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 9):
# p_trail -31
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 10):
# p_trail -26
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 11):
# p_trail -29
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 12):
# p_trail -27
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 13):
# p_trail -27
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 14):
# p_trail -22
-- Searching for 9 rounds (WORD_SIZE 16 bits, RROT 15):
# p_trail -24


20151011

[./tests/arx-best-linear-search-tests.cc:653] Best linear trail on 10 rounds (WORD_SIZE 8 bits):
[./tests/arx-best-linear-search-tests.cc:105] marx_print_linear_trail() nrounds 10 (log2 scale)
 0: M_LR       C0       E0 0.00        0        0 0.00
 1: M_LR       80        0 -1.00        0       40 0.00
 2: M_LR       C1        1 -1.00        1        1 0.00
 3: M_LR       80        0 -1.00        1        1 0.00
 4: M_LR        1        0 -1.00        1       81 0.00
 5: M_LR        3        2 0.00        3        2 0.00
 6: M_LR        1        0 -1.00        3        2 -1.00
 7: M_LR        3        0 0.00        3        2 -1.00
 8: M_LR        7        4 -1.00        7        4 -1.00
 9: M_LR        1        4 -1.00        1        6 -1.00
10: M_LR        B       1C 0.00       1D        A 0.00
corr_trail 0.000488 -11.00

real    1093m54.841s
user    1091m49.350s
sys     0m0.644s


[./tests/arx-best-linear-search-tests.cc:653] Best linear trail on 11 rounds (WORD_SIZE 8 bits):
[./tests/arx-best-linear-search-tests.cc:105] marx_print_linear_trail() nrounds 11 (log2 scale)
 0: M_LR       C0       E0 0.00        0        0 0.00
 1: M_LR       80        0 -1.00        0       40 0.00
 2: M_LR       C1        1 -1.00        1        1 0.00
 3: M_LR       80        0 -1.00        1        1 0.00
 4: M_LR        1        0 -1.00        1       81 0.00
 5: M_LR        3        2 0.00        3        2 0.00
 6: M_LR        1        0 -1.00        3        2 -1.00
 7: M_LR        3        0 0.00        3        2 -1.00
 8: M_LR        7        4 -1.00        7        4 -1.00
 9: M_LR        7        4 -1.00        1        2 -1.00
10: M_LR        1        C -1.00        D        6 0.00
11: M_LR       1B       2C 0.00       21       1A -1.00
corr_trail 0.000122 -13.00

real    775m41.699s
user    775m56.255s
sys     0m0.347s

[./tests/arx-best-linear-search-tests.cc:653] Best linear trail on 12 rounds (WORD_SIZE 8 bits):
[./tests/arx-best-linear-search-tests.cc:105] marx_print_linear_trail() nrounds 12 (log2 scale)
 0: M_LR       C0       E0 0.00        0        0 0.00
 1: M_LR       80        0 -1.00        0       40 0.00
 2: M_LR       C1        1 -1.00        1        1 0.00
 3: M_LR       80        0 -1.00        1        1 0.00
 4: M_LR        1        0 -1.00        1       81 0.00
 5: M_LR        3        2 0.00        3        2 0.00
 6: M_LR        1        0 -1.00        3        2 -1.00
 7: M_LR        3        0 0.00        3        2 -1.00
 8: M_LR        5        4 -1.00        7        6 -1.00
 9: M_LR        4        4 -2.00        0        0 -2.00
10: M_LR        0        0 -1.00        0        4 0.00
11: M_LR        0       10 0.00       10        0 0.00
12: M_LR       20       40 0.00       58       20 -1.00
corr_trail 0.000031 -15.00

real    103m51.090s
user    103m52.360s
sys     0m0.196s
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$

[./tests/arx-best-linear-search-tests.cc:653] Best linear trail on 13 rounds (WORD_SIZE 8 bits):
[./tests/arx-best-linear-search-tests.cc:105] marx_print_linear_trail() nrounds 13 (log2 scale)
 0: M_LR       C1       E1 0.00        1       41 0.00
 1: M_LR       81        1 -1.00        0       40 0.00
 2: M_LR        0        1 -1.00        1       81 0.00
 3: M_LR        2        2 0.00        3        2 0.00
 4: M_LR        1        0 -1.00        3        2 -1.00
 5: M_LR        3        0 0.00        3        2 -1.00
 6: M_LR        5        4 -1.00        6        6 -1.00
 7: M_LR        1        0 -2.00        6        6 -1.00
 8: M_LR        3        4 0.00        0        2 -2.00
 9: M_LR        C        8 -1.00        8        E 0.00
10: M_LR        8        8 -1.00        0        0 -1.00
11: M_LR        0        0 -1.00        0        8 0.00
12: M_LR        0       20 0.00       20        0 0.00
13: M_LR       40       80 0.00       B0       40 -1.00
corr_trail 0.000008 -17.00

real    431m35.623s
user    431m36.657s
sys     0m4.649s

[./tests/arx-best-linear-search-tests.cc:653] Best linear trail on 14 rounds (WORD_SIZE 8 bits):
[./tests/arx-best-linear-search-tests.cc:105] marx_print_linear_trail() nrounds 14 (log2 scale)
 0: M_LR       C0       E0 0.00        0        0 0.00
 1: M_LR       80        0 -1.00        0       40 0.00
 2: M_LR       C1        1 -1.00        1        1 0.00
 3: M_LR       80        0 -1.00        1        1 0.00
 4: M_LR        1        0 -1.00        1       81 0.00
 5: M_LR        3        2 0.00        3        2 0.00
 6: M_LR        1        0 -1.00        3        2 -1.00
 7: M_LR        3        0 0.00        3        2 -1.00
 8: M_LR        5        4 -1.00        6        6 -1.00
 9: M_LR        1        0 -2.00        6        6 -1.00
10: M_LR        3        4 0.00        0        2 -2.00
11: M_LR        C        8 -1.00        8        E 0.00
12: M_LR        8        8 -1.00        0        0 -1.00
13: M_LR        0        0 -1.00        0        8 0.00
14: M_LR        0       20 0.00       20        0 0.00
corr_trail 0.000004 -18.00

real    1221m40.152s
user    1221m57.241s
sys     0m4.265s

[./tests/arx-best-linear-search-tests.cc:653] Best linear trail on 15 rounds (WORD_SIZE 8 bits):
[./tests/arx-best-linear-search-tests.cc:105] marx_print_linear_trail() nrounds 15 (log2 scale)
 0: M_LR       C0       E0 0.00        0        0 0.00
 1: M_LR       80        0 -1.00        0       40 0.00
 2: M_LR       C1        1 -1.00        1        1 0.00
 3: M_LR       80        0 -1.00        1        1 0.00
 4: M_LR        1        0 -1.00        1       81 0.00
 5: M_LR        3        2 0.00        3        2 0.00
 6: M_LR        1        0 -1.00        3        2 -1.00
 7: M_LR        3        0 0.00        3        2 -1.00
 8: M_LR        5        4 -1.00        6        6 -1.00
 9: M_LR        1        0 -2.00        6        6 -1.00
10: M_LR        3        4 0.00        0        2 -2.00
11: M_LR        C        8 -1.00        8        E 0.00
12: M_LR        8        8 -1.00        0        0 -1.00
13: M_LR        0        0 -1.00        0        8 0.00
14: M_LR        0       20 0.00       20        0 0.00
15: M_LR       40       80 0.00       B0       40 -1.00
corr_trail 0.000002 -19.00

real    498m46.736s
user    498m56.748s
sys     0m0.048s


20151010


SPECK64 thershold search

vesselin@LACS-BIGMAN:~/speck-bins/w32$ time ./speck-ts-w32-r14-pth-5-wth-9-pddt-22.bin

[./tests/speck-xor-threshold-search-tests.cc:579] End search
[./tests/speck-xor-threshold-search-tests.cc:581] Final bounds:
B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-7.000000
B[ 4] = 2^-13.000000
B[ 5] = 2^-21.000000
B[ 6] = 2^-27.000000
B[ 7] = 2^-32.000000
B[ 8] = 2^-36.000000
B[ 9] = 2^-40.000000
B[10] = 2^-44.000000
B[11] = 2^-47.000000
B[12] = 2^-52.000000
B[13] = 2^-60.000000
[./tests/speck-xor-threshold-search-tests.cc:588] Final trail:
 0:        9 ->  1000000 1.000000
 1:  8000000 ->        0 0.250000 (2^-2.000000)
 2:    80000 ->    80000 0.500000 (2^-1.000000)
 3:    80800 ->   480800 0.250000 (2^-2.000000)
 4:   480008 ->  2084008 0.062500 (2^-4.000000)
 5:  6080808 -> 164A0848 0.007812 (2^-7.000000)
 6: F2400040 -> 40104200 0.000122 (2^-13.000000)
 7:   820200 ->     1202 0.003906 (2^-8.000000)
 8:     9000 ->       10 0.062500 (2^-4.000000)
 9:       80 ->        0 0.250000 (2^-2.000000)
10: 80000000 -> 80000000 1.000000 (2^0.000000)
11: 80800000 -> 80800004 0.500000 (2^-1.000000)
12: 80008004 -> 84008020 0.125000 (2^-3.000000)
13: 808080A0 -> A08481A4 0.031250 (2^-5.000000)
14:    40024 ->  4200D01 0.003906 (2^-8.000000)
p_tot = 0.000000000000000 = 2^-60.000000

[./tests/speck-xor-threshold-search-tests.cc:1343] WORD_SIZE 32 NROUNDS 14 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 4194304 2^22.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00

real    70m18.453s
user    70m10.143s
sys     0m0.268s
vesselin@LACS-BIGMAN:~/speck-bins/w32$ 


SPECK48 threshold search

vesselin@LACS-BIGMAN:~/speck-bins/w24$ time ./speck-ts-w24-r11-pth-7-wth-9-pddt-22.bin
#--- [./tests/speck-xor-threshold-search-tests.cc:1320] Tests, WORD_SIZE  = 24, MASK =   FFFFFF
[./tests/speck-xor-threshold-search-tests.cc:1330] WORD_SIZE 24 NROUNDS 11 SPECK_P_THRES 0.007812 2^-7.000000 SPECK_MAX_DIFF_CNT 4194304 2^22.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00
[./src/speck-xor-threshold-search.cc:695] HWay table: p_thres = 0.007812 (2^-7.000000), hw_thres = 9, n = 24, #diffs = 4194304 2^22.00
[./src/speck-xor-threshold-search.cc:544] Update length 4294967296 -> 894142677
[./src/speck-xor-threshold-search.cc:549] INIT table: p_thres = 0.007812 (2^-7.000000), hw_thres = 5, n = 24, #diffs = 894142677 2^29.74

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-7.000000
B[ 4] = 2^-10.000000
B[ 5] = 2^-14.000000
B[ 6] = 2^-20.000000
B[ 7] = 2^-27.000000
 0: 820200 -> 1202 0.015625 (2^-6.000000)
 1: 9000 -> 10 0.062500 (2^-4.000000)
 2: 80 -> 0 0.250000 (2^-2.000000)
 3: 800000 -> 800000 1.000000 (2^0.000000)
 4: 808000 -> 808004 0.500000 (2^-1.000000)
 5: 800084 -> 8400A0 0.125000 (2^-3.000000)
 6: 80A0 -> 2085A4 0.062500 (2^-4.000000)
 7: 808424 -> 84A905 0.007812 (2^-7.000000)
p_tot = 0.000000007450581 = 2^-27.000000, Bn = 0.000000 = 2^-27.000000
[./src/speck-xor-threshold-search.cc:3106] nrounds = 9, Bn_init = 2^-36.000000 : key   B8554E   8D102B   991A38   6EC1F3
[./src/speck-xor-threshold-search.cc:2176] 0: Init p_min 2^-9.000000

SPECK96 thershold search


B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-9.000000
B[ 4] = 2^-18.000000
B[ 5] = 2^-29.000000
B[ 6] = 2^-42.000000
B[ 7] = 2^-56.000000
 0: 0 -> 4 1.000000 (2^0.000000)
 1: 4 -> 24 0.500000 (2^-1.000000)
 2: 40000000024 -> 40000000104 0.125000 (2^-3.000000)
 3: 200400000104 -> 400000924 0.031250 (2^-5.000000)
 4: 42404000925 -> 40404004005 0.001953 (2^-9.000000)
 5: 210020044004 -> 1200006402C 0.000488 (2^-11.000000)
 6: 50100224424 -> C0100104544 0.000122 (2^-13.000000)
 7: 200401102100 -> 400C01920B20 0.000061 (2^-14.000000)
p_tot = 0.000000000000000 = 2^-56.000000, Bn = 0.000000 = 2^-56.000000

[./tests/speck-xor-threshold-search-tests.cc:585] End search
[./tests/speck-xor-threshold-search-tests.cc:587] Final bounds:
B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-9.000000
B[ 4] = 2^-18.000000
B[ 5] = 2^-29.000000
B[ 6] = 2^-42.000000
B[ 7] = 2^-56.000000
[./tests/speck-xor-threshold-search-tests.cc:594] Final trail:
 0:               80 ->     800000000000 1.000000
 1:                0 ->                4 1.000000 (2^0.000000)
 2:                4 ->               24 0.500000 (2^-1.000000)
 3:      40000000024 ->      40000000104 0.125000 (2^-3.000000)
 4:     200400000104 ->        400000924 0.031250 (2^-5.000000)
 5:      42404000925 ->      40404004005 0.001953 (2^-9.000000)
 6:     210020044004 ->      1200006402C 0.000488 (2^-11.000000)
 7:      50100224424 ->      C0100104544 0.000122 (2^-13.000000)
 8:     200401102100 ->     400C01920B20 0.000061 (2^-14.000000)
p_tot = 0.000000000000000 = 2^-56.000000

[./tests/speck-xor-threshold-search-tests.cc:1391] WORD_SIZE 48 NROUNDS 8 SPECK_P_THRES 0.000015 2^-16.000000 SPECK_MAX_DIFF_CNT 4194304 2^22.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 16  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00

real    1m7.312s
user    1m7.049s
sys     0m0.228s

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-9.000000
B[ 4] = 2^-18.000000
B[ 5] = 2^-29.000000
B[ 6] = 2^-42.000000
B[ 7] = 2^-58.000000
 0: 0 -> 4 1.000000 (2^0.000000)
 1: 4 -> 24 0.500000 (2^-1.000000)
 2: 400000000000024 -> 400000000000104 0.125000 (2^-3.000000)
 3: 2004000000000104 -> 4000000000924 0.031250 (2^-5.000000)
 4: 424040000000925 -> 404040000004005 0.001953 (2^-9.000000)
 5: 2100200400004004 -> 12000040002402C 0.000488 (2^-11.000000)
 6: 501002404024024 -> C01000404104144 0.000122 (2^-13.000000)
 7: 2004010420144104 -> 400C012400964B24 0.000015 (2^-16.000000)
p_tot = 0.000000000000000 = 2^-58.000000, Bn = 0.000000 = 2^-58.000000

[./tests/speck-xor-threshold-search-tests.cc:585] End search
[./tests/speck-xor-threshold-search-tests.cc:587] Final bounds:
B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-9.000000
B[ 4] = 2^-18.000000
B[ 5] = 2^-29.000000
B[ 6] = 2^-42.000000
B[ 7] = 2^-58.000000
[./tests/speck-xor-threshold-search-tests.cc:594] Final trail:
 0:               80 -> 8000000000000000 1.000000
 1:                0 ->                4 1.000000 (2^0.000000)
 2:                4 ->               24 0.500000 (2^-1.000000)
 3:  400000000000024 ->  400000000000104 0.125000 (2^-3.000000)
 4: 2004000000000104 ->    4000000000924 0.031250 (2^-5.000000)
 5:  424040000000925 ->  404040000004005 0.001953 (2^-9.000000)
 6: 2100200400004004 ->  12000040002402C 0.000488 (2^-11.000000)
 7:  501002404024024 ->  C01000404104144 0.000122 (2^-13.000000)
 8: 2004010420144104 -> 400C012400964B24 0.000015 (2^-16.000000)
p_tot = 0.000000000000000 = 2^-58.000000

[./tests/speck-xor-threshold-search-tests.cc:1391] WORD_SIZE 64 NROUNDS 8 SPECK_P_THRES 0.000015 2^-16.000000 SPECK_MAX_DIFF_CNT 4194304 2^22.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 16  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00

real    1m12.946s
user    1m12.776s
sys     0m0.168s




 */

/* --- */

  // check for consistency
#if 1 // DEBUG
		if(iround > 0) {
		  // input masks to round iround
		  const WORD_T iGamma_L = g_T[LEFT][iround-1].dx;
		  const WORD_T iGamma_R = g_T[LEFT][iround-1].dy;
		  const WORD_T iLambda_L = g_T[RIGHT][iround-1].dx;
		  const WORD_T iLambda_R = g_T[RIGHT][iround-1].dy;

		  // extract the masks to ADD from the IO masks to the round
		  WORD_T alpha = 0;
		  WORD_T beta = 0;
		  WORD_T gamma = 0;
		  marx_round_masks_to_add_masks(iGamma_L, iGamma_R, oGamma_L, oLambda_R, g_r1, &alpha, &beta, &gamma);
		  const double c_tmp = xlc_add(alpha, beta, gamma, WORD_SIZE);
		  assert(alpha == alpha_in);
		  assert(beta == beta_in);
		  assert(gamma == gamma_in);

		  WORD_T delta = 0;
		  WORD_T lambda = 0;
		  WORD_T eta = 0;
		  marx_round_masks_to_add_masks(iLambda_L, iLambda_R, oLambda_L, oGamma_R, g_r2, &delta, &lambda, &eta);
		  const double d_tmp = xlc_add(delta, lambda, eta, WORD_SIZE);
		  assert(delta == delta_in);
		  assert(lambda == lambda_in);
		  assert(eta == eta_in);

		  if(!(c == c_tmp)) {
			 printf("[%s:%d]  IN %2d: L(%X %X) R(%X %X)\n", __FILE__, __LINE__, iround-1, 
					  iGamma_L, iGamma_R, iLambda_L, iLambda_R);
			 printf("[%s:%d] c %4.2f != c_tmp %4.2f %X %X %X\n", __FILE__, __LINE__, log2(c), log2(c_tmp), alpha, beta, gamma);
			 printf("[%s:%d] OUT %2d: L(%X %X) R(%X %X)\n", __FILE__, __LINE__, iround-1,
					  oGamma_L, oGamma_R, oLambda_L, oLambda_R);

			 assert(alpha == iGamma_L);
			 assert(beta == ((RROT(oLambda_R, g_r1) ^ iGamma_R) & MASK));
			 assert(gamma == (oGamma_L ^ oLambda_R));

			 double prob_exper = xlp_add_exper(alpha, beta, gamma, WORD_SIZE);
			 double corr_exper = std::abs( ((2.0 * prob_exper) - 1.0));
			 assert(corr_exper == c);

		  }
		  assert(c == c_tmp);
		  if(!(d == d_tmp)) {
			 printf("[%s:%d]  IN %2d: L(%X %X) R(%X %X)\n", __FILE__, __LINE__, iround-1, 
					  iGamma_L, iGamma_R, iLambda_L, iLambda_R);
			 printf("[%s:%d] d %4.2f != d_tmp %4.2f %X %X %X\n", __FILE__, __LINE__, log2(d), log2(d_tmp), delta, lambda, eta);
			 printf("[%s:%d] OUT %2d: L(%X %X) R(%X %X)\n", __FILE__, __LINE__, iround-1,
					  oGamma_L, oGamma_R, oLambda_L, oLambda_R);

			 assert(delta == iLambda_L);
			 assert(lambda == ((RROT(oGamma_R, g_r2) ^ iLambda_R) & MASK));
			 assert(eta == (oLambda_L ^ oGamma_R));


			 double prob_exper = xlp_add_exper(delta, lambda, eta, WORD_SIZE);
			 double corr_exper =std::abs( ((2.0 * prob_exper) - 1.0));
			 assert(corr_exper == d);

		  }
		  assert(d == d_tmp);
		}
#endif // #if 1 // DEBUG



/* --- */

/*
 * Print trail T
 * \see speck_print_linear_trail
 */
void marx_print_linear_trail(differential_t T[2][NROUNDS + 1])
{
  printf("[%s:%d] %s()\n", __FILE__, __LINE__, __FUNCTION__);
  double corr_trail = 1.0;
  for(uint32_t i = 0; i <= NROUNDS; i++) {
	 printf("%2d: M_LR %8X %8X 2^%4.2f %8X %8X 2^%4.2f\n", i, 
			  T[LEFT][i].dx, T[LEFT][i].dy, log2(T[LEFT][i].p),
			  T[RIGHT][i].dx, T[RIGHT][i].dy, log2(T[RIGHT][i].p));

	 corr_trail *= (T[LEFT][i].p * T[RIGHT][i].p);

	 // check for consistency
#if 1 // DEBUG
	 if(i > 0) {
		// input masks to round i
		const WORD_T iGamma_L = T[LEFT][i-1].dx;
		const WORD_T iGamma_R = T[LEFT][i-1].dy;
		const WORD_T iLambda_L = T[RIGHT][i-1].dx;
		const WORD_T iLambda_R = T[RIGHT][i-1].dy;
		// output masks from round i
		const WORD_T oGamma_L = T[LEFT][i].dx;
		const WORD_T oGamma_R = T[LEFT][i].dy;
		const WORD_T oLambda_L = T[RIGHT][i].dx;
		const WORD_T oLambda_R = T[RIGHT][i].dy;
		// correlation of the approx. (iGamma_L, iGamma_R) -> (oGamma_L, oLambda_R) : 2nd branch is swapped
		const double c = T[LEFT][i].p;
		// correlation of the approx. (iLambda_L, iLambda_R) -> (oLambda_L, oGamma_R) : 2nd branch is swapped
		const double d = T[RIGHT][i].p;

		// extract the masks to ADD from the IO masks to the round
		WORD_T alpha = 0;
		WORD_T beta = 0;
		WORD_T gamma = 0;
		marx_round_masks_to_add_masks(iGamma_L, iGamma_R, oGamma_L, oLambda_R, g_r1, &alpha, &beta, &gamma);
		const double c_tmp = xlc_add(alpha, beta, gamma, WORD_SIZE);

		WORD_T delta = 0;
		WORD_T lambda = 0;
		WORD_T eta = 0;
		marx_round_masks_to_add_masks(iLambda_L, iLambda_R, oLambda_L, oGamma_R, g_r2, &delta, &lambda, &eta);
		const double d_tmp = xlc_add(delta, lambda, eta, WORD_SIZE);

		assert(c == c_tmp);
		assert(d == d_tmp);
	 }
#endif // #if 1 // DEBUG

  }
  printf("corr_trail %f %4.2f\n", corr_trail, log2(corr_trail));
}
/* --- */

const double g_best_B[NROUNDS_MAX] = {
  (1.0 / (double)(1ULL <<  0)),  // 1
  (1.0 / (double)(1ULL <<  0)),  // 2
  (1.0 / (double)(1ULL <<  1)),  // 3
  (1.0 / (double)(1ULL <<  2)),  // 4
  (1.0 / (double)(1ULL <<  6)),  // 5
  (1.0 / (double)(1ULL <<  6)), // 6
  (1.0 / (double)(1ULL <<  10)), // 7
  (0.0 / (double)(1ULL <<  24)), // 8
  (0.0 / (double)(1ULL <<  30)), // 9
  (0.0 / (double)(1ULL <<  32))  // 10
};

/* --- */

uint32_t m1(unsigned int word_size)
{
  return ~(0xffffffffUL << word_size);
}

uint32_t m2(unsigned int word_size)
{
  /*
    uint32_t x = 0UL;
    uint32_t y = ~(uint32_t)0;
    uint32_t z = y >> (32 - word_size);
    return z;
  */
  return ((~(uint32_t)0) >> (32 - word_size));
  //  return ((~0UL) >> (32 - word_size));
}

void test_masks_yann()
{
  for (unsigned int word_size = 0; word_size < 33; word_size++)
    {
		uint32_t v1 = m1(word_size);
		uint32_t v2 = m2(word_size);
		printf("word_size = %2u, v1 = 0x%08x, v2 = 0x%08x", word_size, v1, v2);
		if (v1 != v2)
        {
			 printf(" -> ERROR\n");
        }
		else
        {
			 printf("\n");
        }
    }
  printf("v2 = 0x%08llx\n", (WORD_MAX_T)((~0ULL) >> (64 - 0)));
  printf("v2 = 0x%08x\n", m2(0));
}


/* --- */

/* 
vpv@mazirat:~/exper/speck-bins$ time ./serial-bottom-w32-r6.bin
-- Searching for 6 rounds (WORD_SIZE 32 bits):
# Update bound: -15 -> -15
#  0: 0x00008202 0x00001202 -> 0x00009000 -4
#  1: 0x00000090 0x00000010 -> 0x00000080 -2
#  2: 0x80000000 0x00000000 -> 0x80000000 0
#  3: 0x00800000 0x80000000 -> 0x80800000 -1
#  4: 0x00808000 0x80800004 -> 0x80008004 -3
#  5: 0x04800080 0x84008020 -> 0x808080A0 -5
# p_trail -15
# Update bound: -15 -> -15
#  0: 0x00401042 0x00400240 -> 0x00001202 -5
#  1: 0x02000012 0x02000002 -> 0x00000010 -3
#  2: 0x10000000 0x10000000 -> 0x00000000 -1
#  3: 0x00000000 0x80000000 -> 0x80000000 0
#  4: 0x00800000 0x80000004 -> 0x80800004 -2
#  5: 0x04808000 0x80800020 -> 0x84008020 -4
# p_trail -15
--------------------------------------------------------------------------------
Best trail on 6 rounds (WORD_SIZE 32 bits):
#  0: 0x00401042 0x00400240 -> 0x00001202 -5
#  1: 0x02000012 0x02000002 -> 0x00000010 -3
#  2: 0x10000000 0x10000000 -> 0x00000000 -1
#  3: 0x00000000 0x80000000 -> 0x80000000 0
#  4: 0x00800000 0x80000004 -> 0x80800004 -2
#  5: 0x04808000 0x80800020 -> 0x84008020 -4
# p_trail -15
[1]+  Done                    xpdf best-trail-search.pdf  (wd: ~/skcrypto/trunk/work/txt/speck)
(wd now: ~/exper/speck-bins)

real    76m51.505s
user    76m55.360s
sys     0m0.205s


 */

/* --- */
#if 0
		  const WORD_T alpha_part = ((alpha_in | (alpha_i << ibit)) >> ibit) & mask_part;
		  const WORD_T beta_part = ((beta_in | (beta_i << ibit)) >> ibit) & mask_part;
		  const WORD_T gamma_part = ((gamma_in | (gamma_i << ibit)) >> ibit) & mask_part;
#endif

/* --- */

/** 
  * Given an XOR linear trail for \f$N\f$ rounds, experimentally verify
  * the probabilities of the corresponding \f$N\f$ linear approximations:
  *
  *       - Approximation for 1 round: round 0. 
  *       - Approximation for 2 rounds: rounds \f$0,1\f$. 
  *       - Approximation for 3 rounds: rounds \f$0,1,2\f$. 
  *       - \f$\ldots\f$
  *       - Approximation for \f$N\f$ rounds: rounds \f$0,1,2,\ldots,(N-1)\f$. 
  *
  * \see speck_verify_xor_differential
  */
void speck_verify_linear_trail(const uint32_t nrounds, const uint32_t npairs, 
										 const WORD_T master_key[SPECK_MAX_NROUNDS], 
										 const differential_t T[NROUNDS + 1])
{
#if (WORD_SIZE >= 16)
  uint32_t key_size = speck_get_keysize(WORD_SIZE);
  uint32_t nkey_words = speck_compute_nkeywords(WORD_SIZE, key_size);
  WORD_T key[SPECK_MAX_NROUNDS] = {0};
  for(uint32_t i = 0; i < SPECK_MAX_NROUNDS; i++) {
	 key[i] = master_key[i];
  }
  speck_key_expansion(key, nrounds, nkey_words, g_r1, g_r2);

  WORD_T ML_in = T[0].dx;		  // left paintext mask
  WORD_T MR_in = T[0].dy;		  // right plaintext mask

  printf("Input masks: %16llX %16llX\n\n", (WORD_MAX_T)ML_in, (WORD_MAX_T)MR_in);

  double corr_the = 1.0;
  for(uint32_t i = 1; i <= nrounds; i++) {

	 uint32_t enc_nrounds = i;

	 uint32_t cnt = 0;

	 WORD_T ML_out = T[i].dx;	  // left ciphertext mask
	 WORD_T MR_out = T[i].dy;	  // right ciphertext mask
	 corr_the *= T[i].p;

	 for(uint64_t j = 0; j < npairs; j++) {
		WORD_T x_L = xrandom() & MASK; // plaintext left
		WORD_T x_R = xrandom() & MASK; // plaintext right
		WORD_T y_L = x_L; // ciphertext left
		WORD_T y_R = x_R;	// ciphertext right

		speck_encrypt(key, enc_nrounds, g_r1, g_r2, &y_L, &y_R);

		WORD_T parity_x_L = parity(x_L & ML_in); // dot product (x_L . ML_in)
		WORD_T parity_x_R = parity(x_R & MR_in); // dot product (x_R . MR_in)

		WORD_T parity_y_L = parity(y_L & ML_out); // dot product (y_L . ML_out)
		WORD_T parity_y_R = parity(y_R & MR_out); // dot product (y_R . MR_out)

		// linear approximation: (a . ma) ^ (b . mb) = (c . mc)
		WORD_T leq = (parity_x_L ^ parity_x_R ^ parity_y_L ^ parity_y_R) & MASK;

		if(leq == 0) {
		  cnt++;
		}
	 }
	 double corr_exp = (double)cnt / (double)npairs;

	 printf("R#%2d Output masks: %8X %8X\n", i, ML_out, MR_out);
	 printf("THE %2d: %f (2^%f) %8X -> %8X\n", i+1,   corr_the, log2(corr_the), T[i].dx, T[i].dy);
	 printf("EXP %2d: %f (2^%f) %8X -> %8X\n\n", i+1, corr_exp, log2(corr_exp), T[i].dx, T[i].dy);
  }
#endif // #if (WORD_SIZE >= 16)
}

/* --- */

#if 1
  for(uint32_t i = 0; i <= NROUNDS; i++) {
	 //	 const WORD_T ml = T[i].dx;
	 //	 const WORD_T mr = T[i].dy;
	 //	 const double corr = T[i].p;

	 //	 printf("%2d: %X %X %4.2f %f\n", i, ml, mr, log2(corr), corr);
	 printf("%2d: %8X %8X %4.2f 2^%4.2f\n", i, T[i].dx, T[i].dy, T[i].p, log2(T[i].p));

	 if(i > 0) {
		const WORD_T ml_prev = T[i-1].dx;
		const WORD_T mr_prev = T[i-1].dy;
		const WORD_T ml = T[i].dx;
		const WORD_T mr = T[i].dy;
		const double corr = T[i].p;
		WORD_T alpha = 0;
		WORD_T beta = 0;
		WORD_T gamma = 0;

		speck_round_masks_to_add_masks(ml, mr, ml_prev, mr_prev, &alpha, &beta, &gamma);

		const double corr_tmp = xlc_add(alpha, beta, gamma, WORD_SIZE);

		//		printf("[%s:%d] %X %X %X %f %f\n", __FILE__, __LINE__, alpha, beta, gamma, corr, corr_tmp);
		printf("[%s:%d] alpha beta gamma %X %X -> %X %4.2f vs. %4.2f\n", __FILE__, __LINE__, alpha, beta, gamma, corr_tmp, corr);
		printf("[%s:%d] %f == %f\n", __FILE__, __LINE__, corr_tmp, corr);
		if(corr == corr_tmp) {
		  printf("[%s:%d] EQUAL!\n", __FILE__, __LINE__);
		}
		if(corr != corr_tmp) {
		  printf("[%s:%d] NOT EQUAL!\n", __FILE__, __LINE__);
		}
		//		assert(corr == corr_tmp);
	 }

	 // TODO: Add check for mask consistency
  }
#endif

/* --- */

#if 0
  // compute the sign of the correlation
  int sign = 1;
  if(!is_even(hamming_weight((ma ^ mc) & (mb ^ mc)))) {
	 sign = -1;
  }
#endif

/* --- */

double xlc_add(const WORD_T ma, const WORD_T mb, const WORD_T mc)
{
  WORD_T w = 1; // absolute value in the exponent of the correlation

#if 0 // DEBUG
  printf("ma = ");
  print_binary(ma);
  printf("\nmb = ");
  print_binary(mb);
  printf("\nmc = ");
  print_binary(mc);
  printf("\n");
#endif // #if 1 // DEBUG

  WORD_T S[WORD_SIZE] = {0};
  for(uint32_t i = 0; i < WORD_SIZE; i++) {

	 WORD_T ma_i = (ma >> i) & 1;
	 WORD_T mb_i = (mb >> i) & 1;
	 WORD_T mc_i = (mc >> i) & 1;

	 WORD_T word = (mc_i << 2) | (mb_i << 1) | (ma_i << 0);
	 assert((word >= 0) && (word <= 7));

	 // store the LSB at index S[WORD_SIZE - 1] and the MSB at S[0]
	 S[WORD_SIZE - i - 1] = word; 
	 //	 printf("%2d S[%2d] %d\n", i, (WORD_SIZE - i - 1), S[WORD_SIZE - i - 1]);
  }

#if 0 // DEBUG
  printf(" S = ");
  for(uint32_t i = 0; i < WORD_SIZE; i++) {
	 printf("%d", S[i]);
  }
  printf("\n");
#endif // #if 1 // DEBUG

  uint32_t ibit = 0; // bit iterator
  uint32_t state = 0; // state: can be 0 or 1

  // { -----------

  while(ibit < WORD_SIZE) {

	 const WORD_T index = ibit; // index of S
	 WORD_T cnt_b7 = 0; // counting 7-states

	 if(S[index] == 7) {

		assert(ibit == index);
		while(S[ibit] == 7) {
		  cnt_b7++; // count 7-block
		  ibit++; // move to next bit
		}
		w = w + (cnt_b7 / 2); // increase exponent by the number of 7-block tuples
		if(!is_even(cnt_b7)) { // if odd number of 7-blocks - change state from 0/1 tp 1/0
		  if(state == 1) {
			 w++;
		  }
		  state = 1 - state; // switch state
		  assert((state == 0) || (state == 1));
		}

		//		printf("[%s:%d] cnt_b7 = %d (cnt_b7 / 2) = %d state %d w %d\n", __FILE__, __LINE__, cnt_b7, cnt_b7 / 2, state, w);
	 }

	 if(S[index] == 0) {
		ibit++; // move to next bit
		if(state == 1) { // if at state 1 increase exponent
		  w = w + 1; // increase exponent
		}
	 }

	 if((S[index] == 1) || (S[index] == 2) || (S[index] == 4)) {
		if(state == 0) { /// if at state 0 halt (probability = 1/2, bias = 0)
		  // correlation 0
		  return 0.0;
		}
		state = 1 - state; // switch state
		w = w + 1; // increase exponent
		ibit++; // move to next bit
	 }

	 if((S[index] == 3) || (S[index] == 5) || (S[index] == 6)) {
		if(state == 0) { /// if at state 0 halt (probability = 1/2, bias = 0)
		  // correlation 0
		  return 0.0;
		}
		w = w + 1; // increase exponent
		ibit++; // move to next bit
	 }

  } // while

  // ----------- }

  w--; // corr = 2 * bias

  double corr_abs = 0.0;
  if (w == 64) {
	 corr_abs = pow(2, -64);
  } else {
	 corr_abs = (double) 1.0 / (double)(1ULL << w); // efficient pow(2, w)
  }

  return corr_abs;
}

/* --- */

double xlc_add(const WORD_T ma, const WORD_T mb, const WORD_T mc)
{
  WORD_T w = 0; // absolute value in the exponent
  double p = 0.0; // = 2^{-w}

#if 1 // DEBUG
  printf("ma = ");
  print_binary(ma);
  printf("\nmb = ");
  print_binary(mb);
  printf("\nmc = ");
  print_binary(mc);
  printf("\n");
#endif // #if 1 // DEBUG

  WORD_T S[WORD_SIZE] = {0};
  for(uint32_t i = 0; i < WORD_SIZE; i++) {

	 WORD_T ma_i = (ma >> i) & 1;
	 WORD_T mb_i = (mb >> i) & 1;
	 WORD_T mc_i = (mc >> i) & 1;

	 WORD_T word = (mc_i << 2) | (mb_i << 1) | (ma_i << 0);
	 assert((word >= 0) && (word <= 7));

	 // store the LSB at index S[WORD_SIZE - 1] and the MSB at S[0]
	 S[WORD_SIZE - i - 1] = word; 
	 //	 printf("%2d S[%2d] %d\n", i, (WORD_SIZE - i - 1), S[WORD_SIZE - i - 1]);
  }

#if 1 // DEBUG
  printf(" S = ");
  for(uint32_t i = 0; i < WORD_SIZE; i++) {
	 printf("%d", S[i]);
  }
  printf("\n");
#endif // #if 1 // DEBUG

  uint32_t ibit = 0; // bit iterator
  uint32_t state = 0; // state: can be 0 or 1

  // { -----------

  while(ibit < WORD_SIZE) {

	 WORD_T index = ibit; // index of S
	 WORD_T cnt_b7 = 0; // counting 7-states

	 if(S[index] == 7) {

		assert(ibit == index);
		while(S[ibit] == 7) {
		  cnt_b7++; // count 7-block
		  ibit++; // move to next bit
		}
		if(!is_even(cnt_b7)) { // if odd number of 7-blocks - change state from 0/1 tp 1/0
		  state = 1 - state; // switch state
		  assert((state == 0) || (state == 1));
		}
		w = w + (cnt_b7 / 2); // increase exponent by the number of 7-block tuples

		//		printf("[%s:%d] cnt_b7 = %d (cnt_b7 / 2) = %d\n", __FILE__, __LINE__, cnt_b7, cnt_b7 / 2);
	 }

	 if(S[index] == 0) {
		ibit++; // move to next bit
		if(state == 1) { // if at state 1 increase exponent
		  w = w + 1; // increase exponent
		}
	 }

	 if((S[index] == 1) || (S[index] == 2) || (S[index] == 4)) {
		if(state == 0) { /// if at state 0 halt (probability = 1/2, bias = 0)
		  w = 1; // exponent = 1 => probability = 1/2
		  break;
		}
		state = 1 - state; // switch state
		w = w + 1; // increase exponent
		ibit++; // move to next bit
	 }

	 if((S[index] == 3) || (S[index] == 5) || (S[index] == 6)) {
		if(state == 0) { /// if at state 0 halt (probability = 1/2, bias = 0)
		  w = 1; // exponent = 1 => probability = 1/2
		  break;
		}
		w = w + 1; // increase exponent
		ibit++; // move to next bit
	 }

  } // while

  // ----------- }

  double corr_abs = 0.0;
  if (w == 64) {
	 corr_abs = pow(2, -64);
  } else {
	 corr_abs = (double) 1.0 / (double)(1ULL << w); // efficient pow(2, w)
  }

  // compute the sign of the correlation
  int sign = 1;
  if(!is_even(hamming_weight((ma ^ mc) & (mb ^ mc)))) {
	 sign = -1;
  }
  double corr = sign * corr_abs;

  p = (corr + 1.0) / 2.0;

  return p;
}
/* --- */

double xdp_add_lm(WORD_T da, WORD_T db, WORD_T dc, uint32_t word_size)
{
  double p = 0.0;
  if(word_size > 1) {
	 WORD_MAX_T mask = (~0ULL >> (64 - WORD_SIZE)); // full maskm (word_size bits)
	 WORD_T mask_no_msb = 0; // mask without the MSB (word_size - 1 bits)
	 if(word_size <= 32) {
		mask_no_msb = (0xffffffffUL >> (32 - (word_size - 1)));
		WORD_T eq_d = eq(da, db, dc, word_size);
		WORD_T eq_d_sl_1 = ((eq_d << 1) | 0x00000001) & mask;
		bool b_is_possible = ((eq_d_sl_1 & (da ^ db ^ dc ^ (da << 1))) == 0);
	 } else {// #if(word_size <= 32)
		mask_no_msb = (0xffffffffffffffffULL >> (64 - (word_size - 1)));
		WORD_T eq_d = eq(da, db, dc, word_size);
		WORD_T eq_d_sl_1 = ((eq_d << 1) | 0x0000000000000001) & mask;
		bool b_is_possible = ((eq_d_sl_1 & (da ^ db ^ dc ^ (da << 1))) == 0);
	 }

	 // bool b_is_possible = ((eq((da << 1), (db << 1), (dc << 1), word_size) & (da ^ db ^ dc ^ (da << 1))) == 0);
	 if(b_is_possible) {
		// WORD_T neq = ~eq(da, db, dc, word_size); // positions at which da,db and dc are not equal
		WORD_T neq = ~eq_d; // positions at which da,db and dc are not equal
#if 0 // standard HW
		uint32_t w = hamming_weight(neq & mask_no_msb);
#else // assembly instruction for HW (-mpopcnt)
		uint32_t w = __builtin_popcount(neq & mask_no_msb);
#endif // #if 0 // standard HW
		p = (double)1.0 / (double)pow(2,w);
	 }
  } else {
	 if((da ^ db) == dc) {
		p = 1.0;
	 } else {
		p = 0.0;
	 }
  }
  return p;
}


/* --- */

double xdp_add_lm_unoptimized(WORD_T da, WORD_T db, WORD_T dc, uint32_t word_size)
{
#if 0 // DEBUG
  printf("[%s:%d] %s() %llX %llX %llX %d\n", __FILE__, __LINE__, __FUNCTION__, 
			(WORD_MAX_T)da, (WORD_MAX_T)db, (WORD_MAX_T)dc, wprd_size);
#endif// #if 0 // DEBUG

  double p = 0.0;
  if(word_size > 1) {
	 WORD_T mask = 0;
	 if(word_size <= 32) {
		mask = (0xffffffffUL >> (32 - (word_size - 1)));
	 } else {// #if(word_size <= 32)
		mask = (0xffffffffffffffffUL >> (64 - (word_size - 1)));
	 }

	 bool b_is_possible = ((eq((da << 1), (db << 1), (dc << 1), word_size) & (da ^ db ^ dc ^ (da << 1))) == 0);
	 if(b_is_possible) {
		WORD_T neq = ~eq(da, db, dc, word_size); // positions at which da,db and dc are not equal
		uint32_t w = hamming_weight(neq & mask);
		p = (double)1.0 / (double)pow(2,w);
	 }
  } else {
	 if((da ^ db) == dc) {
		p = 1.0;
	 } else {
		p = 0.0;
	 }
  }
  return p;
}


/* --- */

double xdp_add_lm_yann(WORD_T da, WORD_T db, WORD_T dc)
{
#if 0 // DEBUG
  printf("[%s:%d] %s() %llX %llX %llX\n", __FILE__, __LINE__, __FUNCTION__, 
			(WORD_MAX_T)da, (WORD_MAX_T)db, (WORD_MAX_T)dc);
#endif// #if 0 // DEBUG
  double p = 0.0;
  //#if(WORD_SIZE <= 32) // mask without the MSB
  //  WORD_T mask_no_msb = (0xffffffffUL >> (32 - (WORD_SIZE - 1)));
  //#else // #if(WORD_SIZE <= 32)
  //  WORD_T mask_no_msb = (0xffffffffffffffffUL >> (64 - (WORD_SIZE - 1)));
  //#endif // #if(WORD_SIZE <= 32)
  // mask without the MSB
  WORD_MAX_T mask_no_msb = (~0ULL >> (64 - WORD_SIZE - 1));
  //  bool b_is_possible = ((eq((da << 1), (db << 1), (dc << 1)) & (da ^ db ^ dc ^ (da << 1))) == 0);
  WORD_T eq_d = eq(da, db, dc);
  WORD_T eq_d_sl_1 = ((eq_d << 1) | 0x00000001) & MASK;
  bool b_is_possible = ((eq_d_sl_1 & (da ^ db ^ dc ^ (da << 1))) == 0);
  if(b_is_possible) {
	 //	 WORD_T neq = ~eq(da, db, dc); // positions at which da,db and dc are not equal
	 WORD_T neq = ~eq_d; // positions at which da,db and dc are not equal
	 uint32_t w = hamming_weight(neq & mask_no_msb);
	 p = (double)1.0 / (double)pow(2,w);
#if 0 // DEBUG
	 printf("\nneq = ");
	 print_binary(neq);
	 printf("\n");
	 printf("[%s:%d] w mask neq %d %llX %lld %lld\n", __FILE__, __LINE__, 
			  w, (WORD_MAX_T)mask, (WORD_MAX_T)neq, (WORD_MAX_T)(neq & mask_no_msb));
#endif // #if 1 // DEBUG
  }
  //  printf("[%s:%d] Exit %s()\n", __FILE__, __LINE__, __FUNCTION__);
  return p;
}

/* --- */



double xdp_add_lm(WORD_T da, WORD_T db, WORD_T dc, uint32_t word_size)
{
#if 0 // DEBUG
  printf("[%s:%d] %s() %llX %llX %llX %d\n", __FILE__, __LINE__, __FUNCTION__, 
			(WORD_MAX_T)da, (WORD_MAX_T)db, (WORD_MAX_T)dc, wprd_size);
#endif// #if 0 // DEBUG

  double p = 0.0;
  if(word_size > 1) {
	 WORD_T mask = 0;
	 if(word_size <= 32) {
		mask = (0xffffffffUL >> (32 - (word_size - 1)));
	 } else {// #if(word_size <= 32)
		mask = (0xffffffffffffffffUL >> (64 - (word_size - 1)));
	 }

	 //	 bool b_is_possible = ((eq((da << 1), (db << 1), (dc << 1), word_size) & (da ^ db ^ dc ^ (da << 1))) == 0);
	 WORD_T eq_d = eq(da, db, dc);
	 WORD_T eq_d_sl_1 = ((eq_d << 1) | 0x00000001) & MASK;
	 bool b_is_possible = ((eq_d_sl_1 & (da ^ db ^ dc ^ (da << 1))) == 0);
	 if(b_is_possible) {
		WORD_T neq = ~eq(da, db, dc, word_size); // positions at which da,db and dc are not equal
		uint32_t w = hamming_weight(neq & mask);
		p = (double)1.0 / (double)pow(2,w);
	 }
  } else {
	 if((da ^ db) == dc) {
		p = 1.0;
	 } else {
		p = 0.0;
	 }
  }
  return p;
}

double xdp_add_lm_yann(WORD_T da, WORD_T db, WORD_T dc, uint32_t word_size)
{
  double p = 0.0;
  if(word_size > 1) {
	 WORD_MAX_T mask = (~0ULL >> (64 - word_size));
	 // mask without the MSB
	 WORD_MAX_T mask_no_msb = (~0ULL >> (64 - word_size - 1));
	 //	 if(word_size <= 32) {
		//		mask = (0xffffffffUL >> (32 - (word_size - 1)));
	 //		mask = (~0ULL >> (64 - word_size)); // masks word_size LS bits
	 //	 } else {// #if(word_size <= 32)
		//		mask = (0xffffffffffffffffUL >> (64 - (word_size - 1)));
	 //		mask = (~0ULL >> (64 - word_size)); // masks word_size LS bits
	 //	 }

	 //	 printf("[%s:%d] %d %X %X\n", __FILE__, __LINE__, word_size, mask, MASK);
	 //	 printf(" %X >> %d = %X\n", ~0ULL, (32 - word_size), (~0ULL >> (32 - word_size)));
	 assert(mask == MASK);

	 // bool b_is_possible = ((eq((da << 1), (db << 1), (dc << 1), word_size) & (da ^ db ^ dc ^ (da << 1))) == 0);
	 WORD_T eq_d = eq(da, db, dc, word_size);
    WORD_T eq_d_sl_1 = ((eq_d << 1) | 0x00000001) & mask;
	 bool b_is_possible = ((eq_d_sl_1 & (da ^ db ^ dc ^ (da << 1))) == 0);
	 if(b_is_possible) {
		// WORD_T neq = ~eq(da, db, dc, word_size); // positions at which da,db and dc are not equal
		WORD_T neq = ~eq_d; // positions at which da,db and dc are not equal
		uint32_t w = hamming_weight(neq & mask_no_msb);
		p = (double)1.0 / (double)pow(2,w);
	 }
  } else {
	 if((da ^ db) == dc) {
		p = 1.0;
	 } else {
		p = 0.0;
	 }
  }
  return p;
}

/* --- */

#if (WORD_SIZE == 16)
#define WORD_T uint32_t//uint16_t
#endif // #if (WORD_SIZE == 16)
#if (WORD_SIZE == 32)
#define WORD_T uint32_t
#endif // #if (WORD_SIZE == 32)
#if (WORD_SIZE == 64)
#define WORD_T uint64_t
#endif // #if (WORD_SIZE == 64)
#if ((WORD_SIZE != 16) && (WORD_SIZE != 32) && (WORD_SIZE != 64))
#define WORD_T uint32_t//uint64_t//uint32_t
#endif  // #if ((WORD_SIZE == 16) && (WORD_SIZE == 32) && (WORD_SIZE == 64))

/* --- */

#if 1 // DEBUG
		  for(uint32_t i = 0; i < (iround - 1); i++) {
			 double p_i = g_T[i].p;
			 printf("%2d: %4.2f\n", i, log2(p_i));
		  }
		  printf("[%s:%d] iround %2d ibit %2d | p_est * p_part * g_B[%2d] = %4.2f + %4.2f + %4.2f <> %4.2f\n", __FILE__, __LINE__,
					iround, ibit, (NROUNDS - iround - 1), log2(p_est), log2(p_part), log2(g_B[NROUNDS - iround - 1]), log2(g_Bn));
#endif // #if 0 // DEBUG

/* --- */

#if 1 // DEBUG
			 printf("[%s:%d] %16.15f >= %16.15f\n", __FILE__, __LINE__, p_est, g_Bn);
#endif // #if 0 // DEBUG

/* --- */

// Best probabilities for WORD_SIZE = 4 bits
const double g_B[NROUNDS] = {
  (1.0 / (double)(1ULL <<  0)), // 1
  (1.0 / (double)(1ULL <<  1)), // 2
  (1.0 / (double)(1ULL <<  3)), // 3
  (1.0 / (double)(1ULL <<  4)), // 4
  (1.0 / (double)(1ULL <<  6)), // 5
  (1.0 / (double)(1ULL <<  7)), // 6
  (1.0 / (double)(1ULL <<  9)), // 7
  (1.0 / (double)(1ULL << 10)), // 8
};

/* --- */

  //  uint32_t ibit = 31;
  //  const WORD_MAX_T mask_lsb = (~0ULL >> (64 - (ibit + 1))); // masks (ibit + 1) LS bits
  //  printf("[%s:%d] ibit %d %llX\n", __FILE__, __LINE__, ibit, mask_lsb);
  //  print_binary(mask_lsb, 64);


/* --- */

		const uint32_t r_next = r + 1;
		const uint32_t i_next = 0;
		const WORD_T alpha_next = RROT(gamma_in, g_r1);
		const WORD_T beta_next = XOR(gamma_in, LROT(beta_in, g_r2));
		const WORD_T gamma_next = 0;
		speck_best_trail_search_i(n, r_next, i_next, alpha_next, beta_next, gamma_next);

/* --- */

/* *
 * Search for the best differential trail of block cipher SPECK.
 *
 * \param n total number of rounds
 * \param r current round: \f$ 0 \ge r < NROUNDS\f$
 * \param i current bit position
 * \param B array of bounds: 1,2,...,n-1
 * \param Bn underestimated bound for round n
 * \param T trail
 * \param alpha input difference to the addition of round r
 * \param beta input difference to the addition of round r
 * \param gamma output difference from the addition of round r
 *
 * \see speck_xor_threshold_search_simple
 */
void speck_best_trail_search_i(const uint32_t n, // total number of rounds
										 const uint32_t r, // current round
										 const uint32_t i, // current bit position
										 const double B[NROUNDS], // array of bounds: 1,2,...,n-1
										 double* Bn, // underestimated bound for round n
										 differential_3d_t T[NROUNDS], // trail
										 const WORD_T alpha_in, // input difference to the addition of round r
										 const WORD_T beta_in, // input difference to the addition of round r
										 const WORD_T gamma_in) // output difference from the addition of round r
{



}


/* --- */

/* 
Results from fill search (function speck_best_trail_search_full)

#--- [./tests/speck-xor-best-search-tests.cc:208] Tests, WORD_SIZE  = 4, MASK =        F
[./tests/speck-xor-best-search-tests.cc:64] ndiffs 2^24
[./tests/speck-xor-best-search-tests.cc:142] Best trail for 4 rounds (word size 4 bits) p 2^-4.00
8 0 -> 8 1.00
1 8 -> 9 0.50
3 B -> 6 0.25
C 8 -> C 0.50

real    0m1.896s
user    0m1.896s
sys     0m0.000s

#--- [./tests/speck-xor-best-search-tests.cc:208] Tests, WORD_SIZE  = 4, MASK =        F
[./tests/speck-xor-best-search-tests.cc:64] ndiffs 2^28
[./tests/speck-xor-best-search-tests.cc:142] Best trail for 5 rounds (word size 4 bits) p 2^-6.00
F F -> E 0.50
D 1 -> 4 0.25
8 0 -> 8 1.00
1 8 -> 9 0.50
3 B -> E 0.25

real    0m39.516s
user    0m39.525s
sys     0m0.000s


 */

/* --- */

		if(p_r == 0.0) {
		  break;
		}
		if(p_r == 0.0) {
		  break;
		}

/* --- */

	 printf("%10lld ", (WORD_MAX_T)diffs_i);
	 print_binary(diffs_i, 64);
	 printf("\n");
	 for(r = (NROUNDS - 1); r > 0; r--) {
		//		printf("%2.0f %2d %X %X %X %4.2f\n", log2(diffs_i), r, D[r].dx, D[r].dy, D[r].dz, log2(D[r].p));
		print_binary(D[r].dz);
		//		print_binary(D[r].dy);
		//		print_binary(D[r].dx);
	 }
	 print_binary(D[0].dz);
	 //	 print_binary(D[0].dy);
	 //	 print_binary(D[0].dx);
	 printf("\n");


/* --- */

void test_speck_xdp_add()
{
  printf("[%s:%d] %s()\n", __FILE__, __LINE__, __FUNCTION__);
  uint32_t rconst = 2;
  uint32_t lconst = 3;
  printf("[%s:%d] r l const %d %d\n", __FILE__, __LINE__, rconst, lconst);
  gsl_matrix* A = gsl_matrix_calloc(WORD_SIZE + 1, WORD_SIZE + 1);
  for(WORD_T da = 0; da < ALL_WORDS; da++) {
	 for(WORD_T db = 0; db < ALL_WORDS; db++) {
		for(WORD_T dc = 0; dc < ALL_WORDS; dc++) {
		  if((da == 0) && (db == 0) && (dc == 0))
			 continue;
		  double p = xdp_add_lm(da, db, dc);
		  if(p == 0)
			 continue;

		  // 1st round
		  WORD_T da_in = da;
		  WORD_T db_in = db;
		  WORD_T da_out = LROT(dc, rconst);
		  WORD_T db_out = dc ^ LROT(db, lconst);
		  WORD_T delta_in = (da_in ^ db_in);
		  WORD_T delta_out = (da_out ^ db_out);
		  // 2nd round
		  da_in = da_out;
		  db_in = db_out;
		  for(WORD_T dc_two = 0; dc_two < ALL_WORDS; dc_two++) {
			 da_out = LROT(dc_two, rconst);
			 db_out = dc_two ^ LROT(db, lconst);
			 //			 delta_in = (da_in ^ db_in);
			 delta_out = (da_out ^ db_out);

			 uint32_t col = hamming_weight(delta_in);
			 uint32_t row = hamming_weight(delta_out);
			 assert(col <= WORD_SIZE);
			 assert(row <= WORD_SIZE);
			 if((col == 0) && (row == 0)) {
				printf("(%X %X) -> %X -> (%X %X) | %2.0f\n", da, db, dc_two, da_out, db_out, log2(p));
			 }
			 double x = 1.0 + gsl_matrix_get(A, row, col);
			 gsl_matrix_set(A, row, col, x);
		  }
		}
	 }
  }
  for(int row = 0; row <= WORD_SIZE; row++){
	 for(int col = 0; col <= WORD_SIZE; col++){
		double e = gsl_matrix_get(A, row, col);
		printf("%2.0f, ", e);
	 }
	 printf("\n");
  }
  printf("\n");
  gsl_matrix_free(A);
}

/* --- */
		  if(!(col <= WORD_SIZE)) {
			 printf("[%s:%d] delta_in %X col %d\n", __FILE__, __LINE__, delta_in, col);
		  }
		  if(!(row <= WORD_SIZE)) {
			 printf("[%s:%d] delta_out %X row %d\n", __FILE__, __LINE__, delta_out, row);
		  }

/* --- */

void test_speck_xdp_add()
{
  printf("[%s:%d] %s()\n", __FILE__, __LINE__, __FUNCTION__);
  uint32_t rconst = 2;
  uint32_t lconst = 3;
  printf("[%s:%d] r l const %d %d\n", __FILE__, __LINE__, rconst, lconst);
  gsl_matrix* A = gsl_matrix_calloc(ALL_WORDS * ALL_WORDS, ALL_WORDS * ALL_WORDS);
  for(WORD_T da = 0; da < ALL_WORDS; da++) {
	 for(WORD_T db = 0; db < ALL_WORDS; db++) {
		for(WORD_T dc = 0; dc < ALL_WORDS; dc++) {
		  if((da == 0) && (db == 0) && (dc == 0))
			 continue;
		  double p = xdp_add_lm(da, db, dc);
		  if(p == 0)
			 continue;
		  WORD_T da_in = da;
		  WORD_T db_in = db;
		  WORD_T da_out = LROT(dc, rconst);
		  WORD_T db_out = dc ^ LROT(db, lconst);
		  WORD_T delta_in = (da_in ^ db_in);
		  WORD_T delta_out = (da_out ^ db_out);
		  //		  uint32_t col = hamming_weight(delta_in);
		  //		  uint32_t row = hamming_weight(delta_out);
		  uint32_t col = (db_in << WORD_SIZE) | da_in;
		  uint32_t row = (db_out << WORD_SIZE) | da_out;
		  //		  printf("(%X %X) -> %X -> (%X %X) | %2.0f\n", da_in, db_in, dc, da_out, db_out, log2(p));
		  double x = 1.0 + gsl_matrix_get(A, row, col);
		  printf("%f ", x);
		  gsl_matrix_set(A, row, col, x);
		}
	 }
  }
  for(uint32_t row = 0; row < ALL_WORDS * ALL_WORDS; row++){
	 for(uint32_t col = 0; col < ALL_WORDS * ALL_WORDS; col++){
		double e = gsl_matrix_get(A, row, col);
		if(e)
		  printf("1");
		else
		  printf(".");
		//		printf("%2.0f, ", e);
	 }
	 printf("\n");
  }
  printf("\n");
  gsl_matrix_free(A);
}
 
/* --- */

void test_speck_xdp_add()
{
  printf("[%s:%d] %s()\n", __FILE__, __LINE__, __FUNCTION__);
  uint32_t rconst = 2;
  uint32_t lconst = 3;
  printf("[%s:%d] r l const %d %d\n", __FILE__, __LINE__, rconst, lconst);
  gsl_matrix* A = gsl_matrix_calloc(WORD_SIZE + 1, WORD_SIZE + 1);
  for(WORD_T da = 0; da < ALL_WORDS; da++) {
	 for(WORD_T db = 0; db < ALL_WORDS; db++) {
		for(WORD_T dc = 0; dc < ALL_WORDS; dc++) {
		  if((da == 0) && (db == 0) && (dc == 0))
			 continue;
		  double p = xdp_add_lm(da, db, dc);
		  if(p == 0)
			 continue;
		  WORD_T da_in = da;
		  WORD_T db_in = db;
		  WORD_T da_out = LROT(dc, rconst);
		  WORD_T db_out = dc ^ LROT(db, lconst);
		  WORD_T delta_in = (da_in ^ db_in);
		  WORD_T delta_out = (da_out ^ db_out);
		  uint32_t col = hamming_weight(delta_in);
		  uint32_t row = hamming_weight(delta_out);
		  if(!(col <= WORD_SIZE)) {
			 printf("[%s:%d] delta_in %X col %d\n", __FILE__, __LINE__, delta_in, col);
		  }
		  if(!(row <= WORD_SIZE)) {
			 printf("[%s:%d] delta_out %X row %d\n", __FILE__, __LINE__, delta_out, row);
		  }
		  assert(col <= WORD_SIZE);
		  assert(row <= WORD_SIZE);
		  printf("(%X %X) -> %X -> (%X %X) | %2.0f\n", da_in, db_in, dc, da_out, db_out, log2(p));
		  double x = 1.0 + gsl_matrix_get(A, row, col);
		  gsl_matrix_set(A, row, col, x);
		}
	 }
  }
  for(int row = 0; row <= WORD_SIZE; row++){
	 for(int col = 0; col <= WORD_SIZE; col++){
		double e = gsl_matrix_get(A, row, col);
		printf("%2.0f, ", e);
	 }
	 printf("\n");
  }
  printf("\n");
  gsl_matrix_free(A);
}

/* --- */

/*
 * Compare differential_3d_t structs for use with std::sort
 */
bool sort_comp_diff_3d_hw_custom(differential_3d_t a, differential_3d_t b)
{
  uint32_t rconst = 2;//8 % WORD_SIZE;
  uint32_t lconst = 3 % WORD_SIZE;
  uint32_t hw1 = hamming_weight(RROT(a.dz, rconst) ^ a.dz ^ LROT(a.dy, lconst));
  uint32_t hw2 = hamming_weight(RROT(b.dz, rconst) ^ b.dz ^ LROT(b.dy, lconst));

  //  bool b_less = (a.p > b.p);	  // higher probability first
  bool b_less = (hw1 < hw2);	  // lower Hamming weight first first
  return b_less;
}


void test_xdp_add()
{
  printf("[%s:%d] %s()\n", __FILE__, __LINE__, __FUNCTION__);
  uint32_t rconst = 2;//8 % WORD_SIZE;
  uint32_t lconst = 3 % WORD_SIZE;
  printf("[%s:%d] r l const %d %d\n", __FILE__, __LINE__, rconst, lconst);
  std::vector<differential_3d_t> diff_vec;
  for(WORD_T da = 0; da < ALL_WORDS; da++) {
	 //	 if(da != 0x8)
	 //		continue;
	 for(WORD_T db = 0; db < ALL_WORDS; db++) {
		//		if(db != 0x8)
		//		  continue;
		for(WORD_T dc = 0; dc < ALL_WORDS; dc++) {
		  double p = xdp_add_lm(da, db, dc);
		  differential_3d_t diff;
		  diff.dx = da;
		  diff.dy = db;
		  diff.dz = dc;
		  diff.p = p;
		  diff_vec.push_back(diff);
		}
	 }
  }
  //  std::sort(diff_vec.begin(), diff_vec.end(), sort_comp_diff_3d_p);
  std::sort(diff_vec.begin(), diff_vec.end(), sort_comp_diff_3d_hw_custom);

  double p_prev = 1.0;
  uint32_t cnt = 0;
  for(std::vector<differential_3d_t>::iterator vec_iter = diff_vec.begin(); vec_iter != diff_vec.end(); vec_iter++) {
	 differential_3d_t diff = *vec_iter;
	 WORD_T da = diff.dx;
	 WORD_T db = diff.dy;
	 WORD_T dc = diff.dz;
	 double p = diff.p;
	 uint32_t hw = hamming_weight(RROT(dc, rconst) ^ dc ^ LROT(db, lconst));
#if 0
	 if(p != p_prev) {
		printf("ndiffs = %d 2^%4.2f\n", cnt, log2(cnt));
		printf("----------------------------------------------------------------\n");
		p_prev = p;
		cnt = 0;
	 }
	 cnt++;
#endif
	 if(p != 0.0) {
		uint32_t hwa = hamming_weight(da);
		uint32_t hwb = hamming_weight(db);
		uint32_t hwc = hamming_weight(dc);
		printf("HW %2d %2d %2d ", hwa, hwb, hwc);
		print_binary(da); printf(" ");
		print_binary(db); printf(" ");
		print_binary(dc); printf(" ");
		//		printf(" %X %X %X | %2.0f\n", da, db, dc, log2(p));
		printf(" %X %X %X %2.0f | %2d\n", da, db, dc, log2(p), hw);
	 }
  }
  printf("impossible diffs = %d 2^%4.2f\n", cnt, log2(cnt));
  printf("----------------------------------------------------------------\n");
  printf("[%s:%d] vec size: %d 2^%4.2f\n", __FILE__, __LINE__, (uint32_t)diff_vec.size(), log2(diff_vec.size()));
}

/* --- */

#if 1
		WORD_T t2 = (LROT(x, r) ^ x ^ LROT(y, s)) & MASK;
#endif
#if 0
		WORD_T t2 = (LROT(x, r) ^ LROT(y, s)) & MASK;
#endif
#if 0
		WORD_T t2 = (x ^ LROT(y, s)) & MASK;
#endif

/* --- */

#if 0
		double ratio = (double)cnt / (double)(ALL_WORDS * ALL_WORDS * WORD_SIZE);
		printf("[%s:%d] %d %d cnt (%5d / %5d) = %f\n", __FILE__, __LINE__, r, s, cnt, (uint32_t)(ALL_WORDS * ALL_WORDS * WORD_SIZE), ratio);
#endif

/* --- */

			 WORD_T da = i;
			 WORD_T db = j;
			 WORD_T dc = 0;
			 double p_zero = max_xdp_add_lm(da, db, &dc);

			 WORD_T da_temp = da;
			 da = sigma_left(da, r);
			 db = sigma_right(da_temp, db, s);

			 WORD_T dc2 = 0;
			 double p2 = max_xdp_add_lm(da, db, &dc2);
			 double p = p1 * p2;


/* --- */

#if 1 // -2.63
WORD_T g_G[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = {
  {1,0,0,0,0,1,1,1},
  {0,1,0,0,1,0,1,1},
  {0,0,1,0,1,1,0,1},
  {0,0,0,1,1,1,1,0},
};
#endif // #if 0
#if 0 // -2
WORD_T g_G[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = {
  {1,0,0,0,1,0,0,0},
  {0,1,0,0,1,0,0,0},
  {0,0,1,0,1,0,0,0},
  {0,0,0,1,1,0,0,0},
};
#endif // #if 0

/* --- */

#if 1 // Primitive 3: ADD + ROT + ADD
/*
 * x = (x_L | x_R) -> y = (x_L + x_R) | ((x_L <<< rot_L) + (x_R <<< rot_R))
 */
WORD_T rot_add_two_block(WORD_T x_L, WORD_T x_R, WORD_T rot_L, WORD_T rot_R)
{
  assert((WORD_SIZE % 2) == 0); // word size must be power of two
  WORD_T half_mask = (MASK >> (WORD_SIZE / 2));

  WORD_T x_L = (x >> (WORD_SIZE / 2)) & half_mask;
  WORD_T x_R = x & half_mask;
  WORD_T y_L = (x_L + x_R) & half_mask;

  WORD_T x_rot_L = LROT(x_L, rot_L);
  WORD_T x_rot_R = LROT(x_R, rot_R);
  WORD_T y_R = (x_rot_L + x_rot_R) & half_mask;

  WORD_T y = (y_L << (WORD_SIZE / 2)) | y_R;
  if(!(y <= MASK)) {
	 printf("[%s:%d] y > MASK %X > %llX\n", __FILE__, __LINE__, y, (WORD_MAX_T)MASK);
  }
  assert(y <= MASK);
  return y;
}

double xdp_rot_add_two_block_exper(WORD_T dx, WORD_T dy, WORD_T rot_L, WORD_T rot_R)
{
  assert((WORD_SIZE % 2) == 0); // word size must be power of two
  uint32_t cnt = 0;
  for(WORD_T x = 0; x < ALL_WORDS; x++) {
	 WORD_T xx = XOR(x, dx);
	 WORD_T y = rot_add_two_block(x, rot_L, rot_R);
	 WORD_T yy = rot_add_two_block(xx, rot_L, rot_R);
	 WORD_T diff = XOR(y, yy);
	 if(diff == dy) {
		cnt++;
	 }
  }
  double p = (double)cnt / (double)(ALL_WORDS);
  return p;
}

double max_xdp_rot_add_two_block_exper(WORD_T dx, WORD_T* dy_max, WORD_T rot_L, WORD_T rot_R)
{
  assert((WORD_SIZE % 2) == 0); // word size must be power of two
  double p_max = 0.0;
  for(WORD_T dy = 0; dy < ALL_WORDS; dy++) {
	 double p = xdp_rot_add_two_block_exper(dx, dy, rot_L, rot_R);
	 if(p > p_max) {
		p_max = p;
		*dy_max = dy;
	 }
  }
  return p_max;
}

void test_max_xdp_rot_add_two_block()
{
  assert((WORD_SIZE % 2) == 0); // word size must be power of two
  WORD_T half_mask = (MASK >> (WORD_SIZE / 2));
  WORD_T rot_L = 1 % WORD_SIZE;
  WORD_T rot_R = 3 % WORD_SIZE;
  for(WORD_T da = 0; da < ALL_WORDS; da++) {
	 WORD_T db = 0;
	 double p_max = max_xdp_rot_add_two_block_exper(da, &db, rot_L, rot_R);
	 printf("[%s:%d] max %X->%X = 2^%f\n", __FILE__, __LINE__, da, db, log2(p_max));
#if 0 // Estimate the probability
	 WORD_T da_L = (da >> (WORD_SIZE / 2)) & half_mask;
	 WORD_T da_R = x & half_mask;
	 double p_max_L = max_xdp_add_lm(da_L, da_R, &dc);
	 double p_max_R = max_xdp_rot_add_exper(da_L, da_R, &dc);
	 double p_max_est = p_max_L * p_max_R;
	 printf("[%s:%d] max %X->%X = 2^%f 2^%f\n", __FILE__, __LINE__, da, db, log2(p_max), log2(p_max_est));
#endif // #if 1
  }
}

#endif

/* --- */

#if 0
		WORD_T diff = (~((da & db & dc) | (~da & ~db & ~dc))) & MASK;
		uint32_t ndiff_bits = hamming_weight(diff & ~(1UL << (WORD_SIZE - 1))); // don't count the MSB
		if(p_max >= 0.5) {
		  printf("[%s:%d] max (%X,%X)->%X = 2^%f %d\n", __FILE__, __LINE__, da, db, dc, log2(p_max), ndiff_bits);
		}
#endif

/* --- */

void test_1()
{
  WORD_T i = 0x42;
  WORD_T half_mask = (MASK >> (WORD_SIZE / 2));
  WORD_T x_L = (i >> (WORD_SIZE / 2)) & half_mask;
  WORD_T x_R = i & half_mask;

  printf("[%s:%d] i x_L x_R %X %X %X\n", __FILE__, __LINE__, i, x_L, x_R);
}


/* --- */

#if 1 // Primitive: AND + ADD

WORD_T and_add(WORD_T x, WORD_T y)
{
  WORD_T x_and = (x & y);
  WORD_T z = ADD(x_and, y);
  return z;
}

double xdp_and_add_exper(WORD_T dx, WORD_T dy, WORD_T dz)
{
  uint32_t cnt = 0;
  for(WORD_T x = 0; x < ALL_WORDS; x++) {
	 for(WORD_T y = 0; y < ALL_WORDS; y++) {
		WORD_T xx = XOR(x, dx);
		WORD_T yy = XOR(y, dy);
		WORD_T z = and_add(x, y);
		WORD_T zz = and_add(xx, yy);
		WORD_T diff = XOR(z, zz);
		if(diff == dz) {
		  cnt++;
		}
	 }
  }
  double p = (double)cnt / (double)(ALL_WORDS * ALL_WORDS);
  return p;
}

double max_xdp_and_add_exper(WORD_T dx, WORD_T dy, WORD_T* dz)
{
  double p_max = 0.0;
  WORD_T dz_max = 0;
  for(WORD_T i = 0; i < ALL_WORDS; i++) {
	 double p = xdp_and_add_exper(dx, dy, i);
	 if(p > p_max) {
		p_max = p;
		dz_max = i;
	 }
  }
  *dz = dz_max;
  return p_max;
}

void test_xdp_and_add_exper()
{
  double p_max = 0.0;
  for(WORD_T da = 0; da < WORD_SIZE; da++) {
	 for(WORD_T db = 0; db < ALL_WORDS; db++) {
		if((da == 0) && (db == 0))
		  continue;
		for(WORD_T dc = 0; dc < ALL_WORDS; dc++) {
		  double p = xdp_and_add_exper(da, db, dc);
		  printf("[%s:%d] p(%X,%X->%X) = 2^%f\n", __FILE__, __LINE__, da, db, dc, log2(p));
		  if(p > p_max) {
			 p_max = p;
		  }
		}
	 }
  }
  printf("[%s:%d] p_max = %f 2^%f\n", __FILE__, __LINE__, p_max, log2(p_max));
}

#endif // #if 1 // Primitive: AND + ADD

/* --- */

double max_xdp_xor_add_exper_1111(gsl_matrix* A[2][2][2], 
									  const WORD_T da, const WORD_T db, 
									  WORD_T* dc_max)
{
  double p_max = 0.0;
#if(WORD_SIZE <= 10)
  for(uint32_t dc = 0; dc < ALL_WORDS; dc++) {
	 double p = xdp_add(A, da, db, dc);
	 if(p >= p_max) {
		p_max =p;
		*dc_max = dc;
	 }
  }
#endif // #if(WORD_SIZE <= 10)
  assert(WORD_SIZE <= 10);
  return p_max;
}



/* --- */

WORD_T rot_add(WORD_T r, WORD_T x)
{
  WORD_T x_lrot = LROT(x, r);
  WORD_T y = ADD(x, x_lrot);
  return y;
}

double xdp_rot_add(WORD_T r, WORD_T dx, WORD_T dy)
{
  uint32_t cnt = 0;
  for(uint32_t x = 0; x < ALL_WORDS; x++) {
	 WORD_T xx = XOR(x, dx);
	 WORD_T y = rot_add(r, x);
	 WORD_T yy = rot_add(r, xx);
	 WORD_T diff = XOR(y, yy);
	 if(diff == dy) {
		cnt++;
	 }
  }
  double p = (double)cnt / (double)ALL_WORDS;
  return p;
}


void test_xdp_rot_add()
{
  //  WORD_T r = 3;
  for(WORD_T r = 0; r < WORD_SIZE; r++) {
	 double p_max = 0.0;
	 WORD_T r_max = 0;
	 WORD_T da_max = 0;
	 WORD_T db_max = 0;
	 for(WORD_T i = 0; i < ALL_WORDS; i++) {
		if(i == 0) // skip zero input diff.
		  continue;
		for(WORD_T j = 0; j < ALL_WORDS; j++) {
		  WORD_T da = i;
		  WORD_T db = j;
		  double p = xdp_rot_add(r, da, db);
		  //		  printf("[%s:%d] p(%X->%X) = 2^%f\n", __FILE__, __LINE__, da, db, log2(p));
		  if(p > p_max) {
			 p_max = p;
			 r_max = r;
			 da_max = da;
			 db_max = db;
		  }
		}
	 }
	 printf("[%s:%d] r_max %2d p_max (%X -> %X) = 2^%4.2f (%f)\n", __FILE__, __LINE__, r_max, da_max, db_max, log2(p_max), p_max);
  }
}

/* --- */

void test_linear_transform()
{
  uint32_t L_dim = (2 * WORD_SIZE);
  gsl_matrix* L = gsl_matrix_calloc(L_dim, L_dim);
  gsl_vector* V_in = gsl_vector_calloc(L_dim);
  gsl_vector* V_out = gsl_vector_calloc(L_dim);
  uint64_t N = (1ULL << (L_dim * L_dim));
  printf("[%s:%d] N = %d N = 2^%4.2f\n", __FILE__, __LINE__, L_dim, log2(N));
  for(uint32_t e = 0; e < N; e++) {
	 gsl_matrix_set_zero(L);
	 printf("[%s:%d] Matrix L for e %X\n", __FILE__, __LINE__, e);
	 uint32_t cnt = 0;
	 for(uint32_t row = 0; row < L_dim; row++) {
		for(uint32_t col = 0; col < L_dim; col++) {
		  uint32_t bit = (e >> cnt) & 1;
		  //		  printf("%3d-%d", cnt, bit);
		  gsl_matrix_set(L, row, col, bit);
		  cnt++;
		}
	 }
	 //	 printf("\n");
	 //	 lcode_matrix_print(L, L_dim, L_dim);
	 for(uint32_t x = 0; x < ALL_WORDS; x++) {
		for(uint32_t y = 0; y < ALL_WORDS; y++) {
		  uint32_t w = (y << WORD_SIZE) | x;
		  uint32_t ww = 0;

		  gsl_vector_set_zero(V_in);
		  gsl_vector_set_zero(V_out);

		  lcode_hex_to_vec(w, V_in, L_dim);
		  lcode_encode(V_out, V_in, L, L_dim, L_dim);
		  lcode_vec_to_hex(&ww, V_out, L_dim);

		  uint32_t xx = ww & MASK;
		  uint32_t yy = (ww >> WORD_SIZE) & MASK;

		  //		  printf("[%s:%d] y x w %X %X %X\n", __FILE__, __LINE__, y, x, w);
		  		  printf("[%s:%d] yy xx ww %X %X %X\n", __FILE__, __LINE__, yy, xx, ww);
		}
	 }
  }
  gsl_vector_free(V_out);
  gsl_vector_free(V_in);
  gsl_matrix_free(L);
}



/* --- */

void test_lcode_add_dp_all()
{
  double p_min = 1.0;
  uint32_t e_min = 0;

  uint32_t N = 1;//(1UL << (LCODE_GEN_MATRIX_NROWS * (LCODE_GEN_MATRIX_NCOLS / 2)));
  //  assert((LCODE_GEN_MATRIX_NROWS * LCODE_GEN_MATRIX_NCOLS) == 18);
  for(uint32_t t = 0; t < N; t++) {
	 //	 uint32_t mask = (0xffffffffUL >> (32 - (LCODE_GEN_MATRIX_NROWS * (LCODE_GEN_MATRIX_NCOLS / 2))));
	 //	 uint32_t e = xrandom() & mask;
	 //	 uint32_t e = t;
	 uint32_t e = 0;//0x3567;
#if 0 // random matrix G
	 printf("[%s:%d] Matrix G for e %X\n", __FILE__, __LINE__, e);
	 uint32_t cnt = 0;
	 for(uint32_t row = 0; row < LCODE_GEN_MATRIX_NROWS; row++) {
		//for(uint32_t col = 0; col < LCODE_GEN_MATRIX_NCOLS; col++) {
		for(uint32_t col = (LCODE_GEN_MATRIX_NCOLS / 2); col < LCODE_GEN_MATRIX_NCOLS; col++) {
		  //		  g_G[row][col] = random() % 2;
		  g_G[row][col] = (e >> cnt) & 1;
		  //		  printf("[%s:%d] %d %d cnt %d\n", __FILE__, __LINE__, row, col, cnt);
		  cnt++;
		}
	 }
	 //	 assert(cnt == 18);
#endif // #if 0 // random matrix G
	 for(uint32_t row = 0; row < LCODE_GEN_MATRIX_NROWS; row++) {
		for(uint32_t col = 0; col < LCODE_GEN_MATRIX_NCOLS; col++) {
		  printf("%d,", g_G[row][col]);
		}
		printf("\n");
	 }

	 double p_max = 0.0;
	 for(WORD_T i = 0; i < ALL_WORDS; i++) {
		if(i == 0) // skip the zero difference
		  continue;
		WORD_T half_mask = (MASK >> (WORD_SIZE / 2));
		WORD_T x_L = (i >> (WORD_SIZE / 2)) & half_mask;
		WORD_T x_R = i & half_mask;
		if(x_L == x_R) { // skip inputs diffs with same halves
		  continue;
		}
		for(WORD_T j = 0; j < ALL_WORDS; j++) {
		  WORD_T da = i;
		  WORD_T db = j;
		  double p = lcode_add_dp_exper(da, db);
		  if(p > p_max) {
			 p_max = p;
			 printf("[%s:%d] %X -> %X 2^%f (%f) | Global min 2^%f for e_min %X\n", __FILE__, __LINE__, da, db, log2(p_max), p_max, log2(p_min), e_min);
		  }
		}
	 }

#if 1 // random matrix G
	 if(p_max < p_min) {
		p_min = p_max;
		e_min = e;
	 }
#endif // #if 0 // random matrix G

#if 0 // random matrix G
	 printf("[%s:%d] Random matrix G\n", __FILE__, __LINE__);
	 for(uint32_t row = 0; row < LCODE_GEN_MATRIX_NROWS; row++) {
		for(uint32_t col = 0; col < LCODE_GEN_MATRIX_NCOLS; col++) {
		  printf("%d,", g_G[row][col]);
		}
		printf("\n");
	 }
#endif
  }
}

/* --- */

#if 0
WORD_T g_G[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = {
  {1,0,0,0,1,1,1,1},
  {0,1,0,0,1,1,1,0},
  {0,0,1,0,0,1,1,1},
  {0,0,0,1,0,0,1,0},
};
WORD_T g_GG[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = {
  {1,0,0,0,1,1,1,0},
  {0,1,0,0,1,1,1,0},
  {0,0,1,0,0,1,1,0},
  {0,0,0,1,0,0,1,0},
};
#endif // #if 0
//WORD_T g_GG[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = { // dual of g_G
//  {0,1,1,1,1,0,0,0},
//  {1,0,1,1,0,1,0,0},
//  {1,1,0,1,0,0,1,0},
//  {1,1,1,0,0,0,0,1},
//};

/* --- */

#if 0 // DEBUG
	 printf("[%s:%d] x_R %X\n", __FILE__, __LINE__, x_R);
	 print_binary(x_R, (WORD_SIZE / 2));
	 printf("\n");
	 lcode_vector_print(message_vec_R, LCODE_MESSAGE_LEN_K);
#endif // #if 1 // DEBUG

#if 0 // DEBUG
	 printf("[%s:%d] xx_R %X\n", __FILE__, __LINE__, xx_R);
	 print_binary(xx_R, (WORD_SIZE / 2));
	 printf("\n");
	 lcode_vector_print(message_vec_RR, LCODE_MESSAGE_LEN_K);
#endif // #if 1 // DEBUG


/* --- */
//WORD_T g_GG[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = {
//  {0,1,1,0,1,0,0,1},
//  {0,1,0,1,0,1,0,1},
//  {0,0,1,1,0,0,1,1},
//  {1,1,1,1,0,0,0,0},
//};
// www.win.tue.nl/~ruudp/courses/2WC11/2WC11-book.pdf
//WORD_T g_GG[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = {
//  {1,0,0,0,0,1,1,1},
//  {0,1,0,0,1,0,1,1},
//  {0,0,1,0,1,1,0,1},
//  {0,0,0,1,1,1,1,0},
//};
WORD_T g_GG[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = {
  {0,1,1,1,1,0,0,0},
  {1,0,1,1,0,1,0,0},
  {1,1,0,1,0,0,1,0},
  {1,1,1,0,0,0,0,1},
};
//////

/* --- */

#if (WORD_SIZE == 8) // Puncturing from an [8,4,4] code

#define LCODE_GEN_MATRIX_NROWS 2 // K
#define LCODE_GEN_MATRIX_NCOLS 4 // N
#define LCODE_MESSAGE_LEN_K LCODE_GEN_MATRIX_NROWS
#define LCODE_CODEWORD_LEN_N LCODE_GEN_MATRIX_NCOLS
#define LCODE_MIN_DIST_D 2

/*
 * Original generator matrix:
 *
 * WORD_T g_G[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = {
 *   {1,0,0,1,0,1,1,0},
 *   {0,1,0,1,0,1,0,1},
 *   {0,0,1,1,0,0,1,1},
 *   {0,0,0,0,1,1,1,1},
 * };
 */
WORD_T g_G[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = {
  {1,0,0,1},
  {0,1,0,1},
  {0,0,1,1},
  {0,0,0,0},
};
WORD_T g_GG[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = {
  {0,1,1,0},
  {0,1,0,1},
  {0,0,1,1},
  {1,1,1,1},
};
#endif // #if (WORD_SIZE == 4)


WORD_T g_G[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = {
  {1,0,0,1,0,1,1,0},
  {0,1,0,1,0,1,0,1},
};
WORD_T g_GG[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = {
  {0,0,1,1,0,0,1,1},
  {0,0,0,0,1,1,1,1},
};
/* --- */


/* --- */

#if 1
	 if(!((code_x_L >= 2) || (x_L == 0))) {
		printf("[%s:%d] Message_Hex: %X ", __FILE__, __LINE__, x_L);
		print_binary(x_L, (WORD_SIZE / 2));
		printf("\n");
		printf("[%s:%d] Message_Vec: ", __FILE__, __LINE__);
		lcode_vector_print(message_vec_L, LCODE_MESSAGE_LEN_K);

		printf("[%s:%d] Codeword_Hex: %X ", __FILE__, __LINE__, code_x_L);
		print_binary(code_x_L);
		printf("\n");
		printf("[%s:%d] Codeword_Vec: ", __FILE__, __LINE__);
		lcode_vector_print(codeword_vec_L, LCODE_CODEWORD_LEN_N);
	 }
#endif 
 	 assert((code_x_L >= 2) || (x_L == 0));
	 assert((code_x_R >= 2) || (x_R == 0));


/* --- */

WORD_T g_GG[LCODE_GEN_MATRIX_NROWS][LCODE_GEN_MATRIX_NCOLS] = {
  {1,0,1,1,1,1,0,0,1,0},
  {1,0,1,0,0,0,1,0,1,0},
  {0,0,0,1,1,0,0,1,1,0},
  {1,0,0,1,1,0,0,0,0,1},
  {0,1,1,1,1,0,0,0,0,0},
};

/* --- */

	 //	 if(!x.count) continue;
	 //	 printf("[%s:%d] State %2d: %lld\n", __FILE__, __LINE__, i, (WORD_MAX_T)x.count);
	 //	 morus_state_print(x.diff_state_out);

/* --- */

		 if(x.diff_state_out[i][j][k] <= 5)
				continue;
			 double prob = (double)x.diff_state_out[i][j][k] / (double)ALL_WORDS;
			 printf("[%d][%d] %X (%lld %4.2f)", i, j, k, (WORD_MAX_T)x.diff_state_out[i][j][k], log2(prob));
	
/* --- */
		for(uint32_t i = 0; i < 5; i++) {
		  for(uint32_t j = 0; j < 4; j++) {
			 //dx_in[i][j] = XOR(state_first[i][j], state_second[i][j]);
			 dx_in[i][j] = SUB(state_second[i][j], state_first[i][j]);
		  }
		}


/* --- */

/*
 * Check if a state is already stored. If yes, then increase the
 * counter; if not -- store it.
 */
bool morus_diff_state_is_found(const morus_diff_state_t state,
										 std::vector<morus_diff_state_t>* diff_state_vec)
{
  std::vector<morus_diff_state_t>::iterator vec_iter = diff_state_vec->begin();
  bool b_found = false;
  while((vec_iter != diff_state_vec->end()) && (!b_found)) {
	 morus_diff_state_t x = *vec_iter;

	 bool b_input_state_equal = 
		(morus_states_are_equal(x.diff_state_in[0], state.diff_state_in[0]) &&
		 morus_states_are_equal(x.diff_state_in[1], state.diff_state_in[1]) &&
		 morus_states_are_equal(x.diff_state_in[2], state.diff_state_in[2]) &&
		 morus_states_are_equal(x.diff_state_in[3], state.diff_state_in[3]) &&
		 morus_states_are_equal(x.diff_state_in[4], state.diff_state_in[4]));

	 bool b_output_state_equal = 
		(morus_states_are_equal(x.diff_state_out[0], state.diff_state_out[0]) &&
		 morus_states_are_equal(x.diff_state_out[1], state.diff_state_out[1]) &&
		 morus_states_are_equal(x.diff_state_out[2], state.diff_state_out[2]) &&
		 morus_states_are_equal(x.diff_state_out[3], state.diff_state_out[3]) &&
		 morus_states_are_equal(x.diff_state_out[4], state.diff_state_out[4]));

	 b_found = b_input_state_equal && b_output_state_equal;

	 if(b_found) {
		vec_iter->count++;
	 }

	 vec_iter++;
  }

  if(!b_found) {
	 assert(state.count == 0);
	 diff_state_vec->push_back(state);
  }
  return b_found;
}


/* --- */
struct morus_diff_state_t
{
  WORD_T diff_state_in[5][4];
  WORD_T diff_state_out[5][4];
  uint64_t count; // how many times the state was encountered
};

/*
 * Check if a state is already stored. If yes, then increase the
 * counter; if not -- store it.
 */
bool morus_diff_state_is_found(const morus_diff_state_t state,
										 std::vector<morus_diff_state_t>* diff_state_vec)
{
  std::vector<morus_diff_state_t>::iterator vec_iter = diff_state_vec->begin();
  bool b_found = false;
  while((vec_iter != diff_state_vec->end()) && (!b_found)) {
	 morus_diff_state_t x = *vec_iter;

	 bool b_input_state_equal = 
		(morus_states_are_equal(x.diff_state_in[0], state.diff_state_in[0]) &&
		 morus_states_are_equal(x.diff_state_in[1], state.diff_state_in[1]) &&
		 morus_states_are_equal(x.diff_state_in[2], state.diff_state_in[2]) &&
		 morus_states_are_equal(x.diff_state_in[3], state.diff_state_in[3]) &&
		 morus_states_are_equal(x.diff_state_in[4], state.diff_state_in[4]));

	 bool b_output_state_equal = 
		(morus_states_are_equal(x.diff_state_out[0], state.diff_state_out[0]) &&
		 morus_states_are_equal(x.diff_state_out[1], state.diff_state_out[1]) &&
		 morus_states_are_equal(x.diff_state_out[2], state.diff_state_out[2]) &&
		 morus_states_are_equal(x.diff_state_out[3], state.diff_state_out[3]) &&
		 morus_states_are_equal(x.diff_state_out[4], state.diff_state_out[4]));

	 b_found = b_input_state_equal && b_output_state_equal;

	 if(b_found) {
		vec_iter->count++;
	 }

	 vec_iter++;
  }

  if(!b_found) {
	 assert(state.count == 0);
	 diff_state_vec->push_back(state);
  }
  return b_found;
}

/*
 * The DP of the MORUS state update function
 */
void morus_stateupdate_dp(std::vector<morus_diff_state_t>* diff_state_vec)
{
  uint32_t nrounds = 4;//NROUNDS;
  unsigned int state_first[5][4] = {{0}};
  unsigned int state_second[5][4] = {{0}};

  //  unsigned int diff = (~0U) & MASK;

  for(uint32_t i = 0; i < 5; i++) {
	 for(uint32_t j = 0; j < 4; j++) {
		state_first[i][j] = state_second[i][j] = xrandom() & MASK;
	 }
  }

  // for all differences [0][0] in the IV do
  for(uint32_t diff = 0; diff < ALL_WORDS; diff++) {

	 printf("[%s:%d] diff %X\n", __FILE__, __LINE__, diff);
	 unsigned int dx_in[5][4] = {{0}};
	 unsigned int dx_out[5][4] = {{0}};

	 // for all words [0][0] in the IV do
	 for(uint32_t i = 0; i < ALL_WORDS; i++) {

		state_first[0][0] = i;
		//state_second[0][0] = XOR(state_first[0][0], diff);
		state_second[0][0] = SUB(state_first[0][0], diff);

		for(uint32_t i = 0; i < 5; i++) {
		  for(uint32_t j = 0; j < 4; j++) {
			 //dx_in[i][j] = XOR(state_first[i][j], state_second[i][j]);
			 dx_in[i][j] = SUB(state_second[i][j], state_first[i][j]);
		  }
		}

		//		printf("[%s:%d] BEFORE:\n", __FILE__, __LINE__);
		//		morus_state_print(dx_in);

		morus_initialization(nrounds, state_first);
		morus_initialization(nrounds, state_second);

		for(uint32_t i = 0; i < 5; i++) {
		  for(uint32_t j = 0; j < 4; j++) {
			 //		dx_out[i][j] = state_first[i][j] ^ state_second[i][j];
			 dx_out[i][j] = SUB(state_first[i][j], state_second[i][j]);
		  }
		}

		//		printf("[%s:%d] AFTER:\n", __FILE__, __LINE__);
		//		morus_state_print(dx_out);
		//  morus_state_print(state_second);
		morus_diff_state_t new_state;
		for(uint32_t i = 0; i < 5; i++) {
		  for(uint32_t j = 0; j < 4; j++) {
			 new_state.diff_state_in[i][j] = dx_in[i][j];
			 new_state.diff_state_out[i][j] = dx_out[i][j];
		  }
		}
		new_state.count = 0;;

		morus_diff_state_is_found(new_state, diff_state_vec);
#if 0
		bool b_found = morus_diff_state_is_found(new_state, diff_state_vec);
		if(b_found) {
		  printf("[%s:%d] State found:\n", __FILE__, __LINE__);
		  morus_state_print(dx_in);
		  morus_state_print(dx_out);
		}
#endif
	 }
  }
}

/* --- */
void test_morus_stateupdate_dp()
{
  uint32_t nrounds = 4;//NROUNDS;
  unsigned int state_first[5][4] = {{0}};
  unsigned int state_second[5][4] = {{0}};

  unsigned int dx_in[5][4] = {{0}};
  unsigned int dx_out[5][4] = {{0}};

  unsigned int diff = (~0U) & MASK;

  printf("[%s:%d] diff %X\n", __FILE__, __LINE__, diff);

  for(uint32_t i = 0; i < 5; i++) {
	 for(uint32_t j = 0; j < 4; j++) {
		state_first[i][j] = state_second[i][j] = xrandom() & MASK;
	 }
  }

  //  state_first[0][0] ^= 1;
  state_second[0][0] = SUB(state_first[0][0], diff);
  //  state_second[0][1] = SUB(state_first[0][1], diff);
  //  state_second[0][2] = SUB(state_first[0][2], diff);
  //  state_second[0][3] = SUB(state_first[0][3], diff);

  for(uint32_t i = 0; i < 5; i++) {
	 for(uint32_t j = 0; j < 4; j++) {
		//		dx_in[i][j] = state_first[i][j] ^ state_second[i][j];
		dx_in[i][j] = SUB(state_first[i][j], state_second[i][j]);
	 }
  }

  printf("[%s:%d] BEFORE:\n", __FILE__, __LINE__);
  morus_state_print(dx_in);

  morus_initialization(nrounds, state_first);
  morus_initialization(nrounds, state_second);

  for(uint32_t i = 0; i < 5; i++) {
	 for(uint32_t j = 0; j < 4; j++) {
		//		dx_out[i][j] = state_first[i][j] ^ state_second[i][j];
		dx_out[i][j] = SUB(state_first[i][j], state_second[i][j]);
	 }
  }

  printf("[%s:%d] AFTER:\n", __FILE__, __LINE__);
  morus_state_print(dx_out);
  //  morus_state_print(state_second);

}

/* --- */

inline void morus_stateupdate(unsigned int msgblk[], unsigned int state[][4])    
{   
        unsigned int temp;  

        state[0][0] ^= state[3][0]; state[0][1] ^= state[3][1]; state[0][2] ^= state[3][2]; state[0][3] ^= state[3][3]; 
        state[0][0] ^= state[1][0] & state[2][0]; state[0][1] ^= state[1][1] & state[2][1]; state[0][2] ^= state[1][2] & state[2][2]; state[0][3] ^= state[1][3] & state[2][3];     
        state[0][0] = rotl(state[0][0],n1);  state[0][1] = rotl(state[0][1],n1);       state[0][2] = rotl(state[0][2],n1);       state[0][3] = rotl(state[0][3],n1);  
        temp = state[3][3];    state[3][3] = state[3][2];  state[3][2] = state[3][1];  state[3][1] = state[3][0];  state[3][0] = temp;  

        state[1][0] ^= msgblk[0];   state[1][1] ^= msgblk[1];   state[1][2] ^= msgblk[2];   state[1][3] ^= msgblk[3];
        state[1][0] ^= state[4][0]; state[1][1] ^= state[4][1]; state[1][2] ^= state[4][2]; state[1][3] ^= state[4][3]; 
        state[1][0] ^= (state[2][0] & state[3][0]); state[1][1] ^= (state[2][1] & state[3][1]); state[1][2] ^= (state[2][2] & state[3][2]); state[1][3] ^= (state[2][3] & state[3][3]);     
        state[1][0] = rotl(state[1][0],n2);  state[1][1] = rotl(state[1][1],n2);       state[1][2] = rotl(state[1][2],n2);       state[1][3] = rotl(state[1][3],n2); 
        temp = state[4][3];    state[4][3] = state[4][1];  state[4][1] = temp;     
        temp = state[4][2];    state[4][2] = state[4][0];  state[4][0] = temp;     

        state[2][0] ^= msgblk[0];   state[2][1] ^= msgblk[1];   state[2][2] ^= msgblk[2];   state[2][3] ^= msgblk[3];
        state[2][0] ^= state[0][0]; state[2][1] ^= state[0][1]; state[2][2] ^= state[0][2]; state[2][3] ^= state[0][3]; 
        state[2][0] ^= state[3][0] & state[4][0]; state[2][1] ^= state[3][1] & state[4][1]; state[2][2] ^= state[3][2] & state[4][2]; state[2][3] ^= state[3][3] & state[4][3];     
        state[2][0] = rotl(state[2][0],n3);  state[2][1] = rotl(state[2][1],n3);       state[2][2] = rotl(state[2][2],n3);       state[2][3] = rotl(state[2][3],n3);  
        temp = state[0][0];    state[0][0] = state[0][1];  state[0][1] = state[0][2];  state[0][2] = state[0][3];  state[0][3] = temp;  

        state[3][0] ^= msgblk[0];   state[3][1] ^= msgblk[1];   state[3][2] ^= msgblk[2];   state[3][3] ^= msgblk[3];
        state[3][0] ^= state[1][0]; state[3][1] ^= state[1][1]; state[3][2] ^= state[1][2]; state[3][3] ^= state[1][3]; 
        state[3][0] ^= state[4][0] & state[0][0]; state[3][1] ^= state[4][1] & state[0][1]; state[3][2] ^= state[4][2] & state[0][2]; state[3][3] ^= state[4][3] & state[0][3];     
        state[3][0] = rotl(state[3][0],n4);  state[3][1] = rotl(state[3][1],n4);       state[3][2] = rotl(state[3][2],n4);       state[3][3] = rotl(state[3][3],n4);  
        temp = state[1][3];    state[1][3] = state[1][1];  state[1][1] = temp;     
        temp = state[1][2];    state[1][2] = state[1][0];  state[1][0] = temp;     

        state[4][0] ^= msgblk[0];   state[4][1] ^= msgblk[1];   state[4][2] ^= msgblk[2];   state[4][3] ^= msgblk[3];
        state[4][0] ^= state[2][0]; state[4][1] ^= state[2][1]; state[4][2] ^= state[2][2]; state[4][3] ^= state[2][3]; 
        state[4][0] ^= state[0][0] & state[1][0]; state[4][1] ^= state[0][1] & state[1][1]; state[4][2] ^= state[0][2] & state[1][2]; state[4][3] ^= state[0][3] & state[1][3];     
        state[4][0] = rotl(state[4][0],n5);  state[4][1] = rotl(state[4][1],n5);       state[4][2] = rotl(state[4][2],n5);       state[4][3] = rotl(state[4][3],n5);  
        temp = state[2][3];    state[2][3] = state[2][2];  state[2][2] = state[2][1];  state[2][1] = state[2][0];  state[2][0] = temp;  
}

/* --- */

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
#if 1//(NROUNDS < 8)
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1))
#endif // #if (NROUNDS >= 8)
#if 0//(NROUNDS < 8)
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1))
#endif // #if (NROUNDS >= 8)
#if 0//(NROUNDS >= 8)
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1))
#endif // #if (NROUNDS >= 8)
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))


/* --- */

/*
 * Special case tailored to a specific pair (for DEBUG) (20140807)
 * {{0x73B11631, 0x160BBB00}, {0xF3B11631, 0x960BBB00}},
 */ 
#if 0
uint32_t FIB[RC5_FULL_FIB_LEN] = { 1, 1, 0, 1, 2, 2, 0, 1, 1, 3, 3, 4, 3,  3,  4,  8,  11, 18}; 
#endif

/* --- */
  printf("[%s:%d] rand_L_from_file rand_R_from_file %llX %llX\n", __FILE__, __LINE__, rand_L_from_file, rand_R_from_file);
  assert(rand_L_from_file == 0); // dummy
  assert(rand_R_from_file == 0); // dummy


/* --- */

#define RC5_32_ORIGINAL_FROM_PAPER 0 // if 1 => as in the paper (ref #20150328)

#if RC5_32_ORIGINAL_FROM_PAPER // original from paper(ref #20150328)
#define RC5_CIPHERTEXT_HW_LIMIT_LEFT 12//10//8
#define RC5_CIPHERTEXT_HW_LIMIT_RIGHT 12//10//8
#endif // #if !RC5_32_ORIGINAL_FROM_PAPER // original from paper(ref #20150328)

#if RC5_32_ORIGINAL_FROM_PAPER // original from paper(ref #20150328)
uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, /*|*/ 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  5,  5,  5,  RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 
#endif // #if RC5_32_ORIGINAL_FROM_PAPER // original from paper(ref #20150328)

// Original -- from the paper
#if RC5_32_ORIGINAL_FROM_PAPER // original from paper(ref #20150328)
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
#endif // #if RC5_32_ORIGINAL_FROM_PAPER // original from paper(ref #20150328)

/* --- */

/* 
vpv@igor:~/exper$ unzip rc5-paper-exper-logs-v2.zip
Archive:  rc5-paper-exper-logs-v2.zip
   creating: rc5-paper-exper-logs-v2/
   creating: rc5-paper-exper-logs-v2/rc5-32/
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-8R-2nd-pass-k22-after-1st-pass-k24-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681-v2.tgz
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-8R-2nd-pass-k22-after-1st-pass-k24-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681-v2.dat
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-10R-2nd-pass-k30-after-3rd-pass-top-10-after-1st-pass-k34-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681.dat
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-6R-1st-pass-k13-L32-R32-pth-8-7-6-no-hw-limit-add-expand-3-rounds-seed-681.dat
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-10R-1st-pass-k34-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681.dat
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-10R-1st-pass-k34-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681.tgz
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-8R-1st-pass-k24-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681.tgz
error:  cannot create rc5-paper-exper-logs-v2/rc5-32/rc5-32-10R-4th-pass-after-2nd-pass-k30-after-3rd-pass-top-10-after-1st-pass-k34-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681-stdout.dat
        File name too long
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-6R-1st-pass-k13-L32-R32-pth-8-7-6-no-hw-limit-add-expand-3-rounds-seed-681.tgz
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-8R-1st-pass-k24-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681.dat
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-10R-3rd-pass-after-1st-pass-k34-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681-top-10-stdout.txt
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-8R-2nd-pass-k23-after-1st-pass-k24-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681-v2.tgz
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-6R-2nd-pass-k13-after-1st-pass-k14-L32-R32-pth-8-7-6-no-hw-limit-add-expand-3-rounds-seed-681-v2.dat
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-10R-3rd-pass-after-1st-pass-k34-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681-top-10-stdout.dat
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-10R-2nd-pass-k30-after-3rd-pass-top-10-after-1st-pass-k34-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681.tgz
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-8R-2nd-pass-k23-after-1st-pass-k24-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681-v2.dat
  inflating: rc5-paper-exper-logs-v2/rc5-32/rc5-32-6R-2nd-pass-k13-after-1st-pass-k14-L32-R32-pth-8-7-6-no-hw-limit-add-expand-3-rounds-seed-681-v2.tgz
error:  cannot create rc5-paper-exper-logs-v2/rc5-32/rc5-32-10R-4th-pass-after-2nd-pass-k30-after-3rd-pass-top-10-after-1st-pass-k34-L32-R32-pth-6-5-4-no-hw-limit-add-expand-3-rounds-seed-681-stdout.txt
        File name too long
   creating: rc5-paper-exper-logs-v2/rc5-64/
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-8R-2nd-pass-k23-after-1st-pass-k24-L64-R64-pth-26-25-no-hw-limit-no-add-expand-seed-1977-v2.dat
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-10R-3rd-pass-after-1st-pass-k34-L64-R64-pth-23-22-no-hw-limit-no-add-expand-seed-1977-stdout.txt
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-8R-2nd-pass-k23-after-1st-pass-k24-L64-R64-pth-26-25-no-hw-limit-no-add-expand-seed-1977-v2.tgz
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-10R-3rd-pass-after-1st-pass-k34-L64-R64-pth-23-22-no-hw-limit-no-add-expand-seed-1977-stdout.dat
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-6R-2nd-pass-k13-after-1st-pass-k14-L64-R64-pth-29-28-no-hw-limit-no-add-expand-seed-1977-v2.tgz
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-10R-1st-pass-k34-L64-R64-pth-23-22-no-hw-limit-no-add-expand-seed-1977.tgz
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-8R-1st-pass-k24-L64-R64-pth-26-25-no-hw-limit-no-add-expand-seed-1977.tgz
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-8R-1st-pass-k24-L64-R64-pth-26-25-no-hw-limit-no-add-expand-seed-1977.dat
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-6R-1st-pass-k14-L64-R64-pth-29-28-no-hw-limit-no-add-expand-seed-1977.dat
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-10R-1st-pass-k34-L64-R64-pth-23-22-no-hw-limit-no-add-expand-seed-1977.dat
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-10R-4th-pass-after-2nd-pass-k30-L64-R64-pth-23-22-no-hw-limit-no-add-expand-seed-1977-v2.dat
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-10R-2nd-pass-after-3rd-pass-k30-L64-R64-pth-23-22-no-hw-limit-no-add-expand-seed-1977-v2.dat
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-6R-1st-pass-k14-L64-R64-pth-29-28-no-hw-limit-no-add-expand-seed-1977.tgz
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-10R-2nd-pass-after-3rd-pass-k30-L64-R64-pth-23-22-no-hw-limit-no-add-expand-seed-1977-v2.tgz
  inflating: rc5-paper-exper-logs-v2/rc5-64/rc5-64-6R-2nd-pass-k13-after-1st-pass-k14-L64-R64-pth-29-28-no-hw-limit-no-add-expand-seed-1977-v2.dat
vpv@igor:~/exper$ cd /tmp/

 */

/* --- */

/* 
RC5-32-10R: 1st pass, 30 keys, 2^34 CP (2^7 \times 2^27 in struct.) WTH 32, 32, 32, ..., PTH -6, -5, -4, -2, -2, -2, ...

Time on cluster

real    6095m53.813s
user    45m39.303s
sys     4m38.925s

 */

/* --- */

#if (WORD_SIZE == 64)
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  2) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),
  ((double)1.0 / (double)((1U <<  15) + 1)),
  ((double)1.0 / (double)((1U <<  16) + 1)),
  ((double)1.0 / (double)((1U <<  17) + 1)),
  ((double)1.0 / (double)((1U <<  18) + 1)),
  ((double)1.0 / (double)((1U <<  21) + 1)),
  ((double)1.0 / (double)((1U <<  22) + 1)),
  ((double)1.0 / (double)((1U <<  23) + 1)),
  ((double)1.0 / (double)((1U <<  24) + 1)),
  ((double)1.0 / (double)((1U <<  25) + 1)),
  ((double)1.0 / (double)((1U <<  26) + 1))
};
#endif // #if (WORD_SIZE == 64)


/* --- */
#if !RC5_32_ORIGINAL_FROM_PAPER // NOT the original from paper(ref #20150328)
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
#if (NROUNDS < 8)
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1))
#endif // #if (NROUNDS >= 8)
#if (NROUNDS >= 8)
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1))
#endif // #if (NROUNDS >= 8)
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
#endif // #if !RC5_32_ORIGINAL_FROM_PAPER // NOT the original from paper(ref #20150328)

/* --- */

#if !RC5_32_ORIGINAL_FROM_PAPER // NOT the original from paper(ref #20150328)
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
#if (NROUNDS < 8)
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1))
#endif // #if (NROUNDS >= 8)
#if (NROUNDS >= 8)
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1))
#endif // #if (NROUNDS >= 8)
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
#endif // #if !RC5_32_ORIGINAL_FROM_PAPER // NOT the original from paper(ref #20150328)


/* --- */

#if (WORD_SIZE == 64)
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  2) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),
  ((double)1.0 / (double)((1U <<  15) + 1)),
  ((double)1.0 / (double)((1U <<  16) + 1)),
  ((double)1.0 / (double)((1U <<  19) + 1)),
  ((double)1.0 / (double)((1U <<  20) + 1)),
  ((double)1.0 / (double)((1U <<  22) + 1)),
  ((double)1.0 / (double)((1U <<  23) + 1)),
  ((double)1.0 / (double)((1U <<  24) + 1)),
  ((double)1.0 / (double)((1U <<  25) + 1)),
  ((double)1.0 / (double)((1U <<  26) + 1)),
  ((double)1.0 / (double)((1U <<  27) + 1))
};
#endif // #if (WORD_SIZE == 64)

/* --- */

#if (WORD_SIZE == 64)
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  2) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),
  ((double)1.0 / (double)((1U <<  15) + 1)),
  ((double)1.0 / (double)((1U <<  16) + 1)),
  ((double)1.0 / (double)((1U <<  19) + 1)),
  ((double)1.0 / (double)((1U <<  20) + 1)),
  ((double)1.0 / (double)((1U <<  22) + 1)),
  ((double)1.0 / (double)((1U <<  25) + 1)),
  ((double)1.0 / (double)((1U <<  26) + 1)),
  ((double)1.0 / (double)((1U <<  27) + 1)),
  ((double)1.0 / (double)((1U <<  28) + 1)),
  ((double)1.0 / (double)((1U <<  29) + 1))
};
#endif // #if (WORD_SIZE == 64)


/* --- */
#if !RC5_32_ORIGINAL_FROM_PAPER // NOT the original from paper(ref #20150328)
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
#if (NROUNDS < 8)
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1))
#endif // #if (NROUNDS >= 8)
#if (NROUNDS >= 8)
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1))
#endif // #if (NROUNDS >= 8)
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
#endif // #if !RC5_32_ORIGINAL_FROM_PAPER // NOT the original from paper(ref #20150328)

/* --- */

/* 
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 6292445 (2^22.585189)
#Filtered pairs all: 21 (2^4.392317)
#Good pairs among filtered: 15
#Good pairs among filtered f1: 48
#Good pairs total: 48
#GoUP sets of trails: 21 (2^4.392317)
[./src/rc5-dc.cc:6184] A Strange Day: Good / Filtered / Good Filtered:    48    21    15
[./src/rc5-dc.cc:6186] ndata 2^27.58
RC5_P_THRES_ARRAY = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -2.32 [ 7] -2.32 [ 8] -2.32 [ 9] -2.32 [10] -7.01 [11] -8.01
        FIB_ARRAY = [ 0] 32 [ 1] 32 [ 2] 32 [ 3] 32 [ 4] 32 [ 5] 32 [ 6] 32 [ 7] 32 [ 8] 32 [ 9] 32 [10] 32 [11] 32
[./tests/rc5-tests.cc:1063] #GoUP sets of trails: 21 (2^4.392317)
const uint32_t g_key[16] = {0x2A, 0x3B, 0xFF, 0xF7, 0xD5, 0xE4, 0x93, 0x45, 0x4A, 0xF9, 0x48, 0xB2, 0x31, 0x53, 0x98, 0x43};
[./tests/rc5-tests.cc:1299] key A_L A_R = 0x2A3BFFF7D5E493454AF948B231539843 0x84C2B149 0xBB61EDC4

[./tests/rc5-tests.cc:1321] RC5_LAST_ROUND_PARAMS_INCLUDE_DX 0
[./tests/rc5-tests.cc:1322] RC5_LAST_ROUND_PARAMS_NVARIANTS_CUT_THRES 5
[./tests/rc5-tests.cc:1323] Test OK!
NROUNDS 8
RC5_NTEXTS 2^23.00
RC5_ORACLE_NTEXTS 2^27.00
RC5_FILTER_SECOND_PASS 0
RC5_STRUCTURES_NBITS 24
RC5_STRUCTURES_NTEXTS 2^24.00
RC5_FILTER_ORACLE 0 RC5_ORACLE_KM 0 RC5_ORACLE_BK 0
RC5_FILTER_USE_STRUCTURES 1
RC5_LOG_TO_FILE 1
[./tests/rc5-tests.cc:4594] RC5_FILTER_CUT_HW1  1
[./tests/rc5-tests.cc:4595] RC5_FILTER_GOUP_LINEAR  0
[./tests/rc5-tests.cc:4596] RC5_FILTER_GOUP_DIFF_SET  1
RC5_FIXED_KEY 0
RC5_FILTER_CIPHERTEXT_HW_LIMIT 1
RC5_CIPHERTEXT_HW_LIMIT_LEFT 32
RC5_CIPHERTEXT_HW_LIMIT_RIGHT 32
RC5_FILTER_GOUP_ADD_EQUALS_XOR_APPROX 1
RC5_GOUP_EXPAND_ADD_DEPTH 10
GoUP NL: Rounds to go upper than the bottom two = 0
[./tests/rc5-tests.cc:4606] RC5_FILTER_SECOND_PASS 0
RC5_PAIRS_SORT_BY_CIPHERTEXT_DIFF_HW 1

real    67m0.922s
user    67m2.341s
sys     0m0.024s
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$

 */

/* --- */

#if !RC5_32_ORIGINAL_FROM_PAPER // NOT the original from paper(ref #20150328)
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),
  ((double)1.0 / (double)((1U <<  9) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
#endif // #if !RC5_32_ORIGINAL_FROM_PAPER // NOT the original from paper(ref #20150328)

/* --- */


#if !RC5_32_ORIGINAL_FROM_PAPER // NOT the original from paper(ref #20150328)
/* 
#Filtered pairs all: 23 (2^4.523562)
#Good pairs among filtered: 1
#Good pairs among filtered f1: 30
#Good pairs total: 30
#
 */
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),
  ((double)1.0 / (double)((1U <<  9) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),
  ((double)1.0 / (double)((1U <<  15) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
#endif // #if !RC5_32_ORIGINAL_FROM_PAPER // NOT the original from paper(ref #20150328)

#if !RC5_32_ORIGINAL_FROM_PAPER // NOT the original from paper(ref #20150328)
/* 
#Filtered pairs all: 6019 (2^12.555308)
#Good pairs among filtered: 8
#Good pairs among filtered f1: 19
#Good pairs total: 19
#
 */
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),
  ((double)1.0 / (double)((1U <<  9) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  11) + 1)),
  ((double)1.0 / (double)((1U <<  12) + 1)),
  ((double)1.0 / (double)((1U <<  13) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),
  ((double)1.0 / (double)((1U <<  15) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
#endif // #if !RC5_32_ORIGINAL_FROM_PAPER // NOT the original from paper(ref #20150328)

#if !RC5_32_ORIGINAL_FROM_PAPER // original from paper(ref #20150328)
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),
  ((double)1.0 / (double)((1U <<  9) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  12) + 1)),
  ((double)1.0 / (double)((1U <<  13) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),
  ((double)1.0 / (double)((1U <<  15) + 1)),
  ((double)1.0 / (double)((1U <<  16) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
#endif // #if !RC5_32_ORIGINAL_FROM_PAPER // original from paper(ref #20150328)

/* --- */
#if RC5_32_ORIGINAL_FROM_PAPER // original from paper(ref #20150328)
uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, /*|*/ 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  5,  5,  5,  RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 
#endif // #if RC5_32_ORIGINAL_FROM_PAPER // original from paper(ref #20150328)
#if !RC5_32_ORIGINAL_FROM_PAPER // original from paper(ref #20150328)
//uint32_t FIB[RC5_FULL_FIB_LEN] =  { 1, 1, 0, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16}; // based on max ave over 128 keys
uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, /*|*/ 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  5,  5,  5,  RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 
#endif // #if !RC5_32_ORIGINAL_FROM_PAPER // original from paper(ref #20150328)

/* --- */

#if(WORD_SIZE == 64)
/*
For 64-bit
[./src/rc5-dc.cc:5414]   average hw_arr = 1.00 1.00 1.03 0.00 1.60 1.37 0.87 0.73 1.82 2.52 3.69 6.27 8.29 10.35 14.68 17.42 21.03 23.90 26.27
[./src/rc5-dc.cc:5422] average prob_arr = -0.87 -0.87 -0.87 -0.05 -1.52 -1.72 -0.66 -0.41 -1.66 -1.89 -2.19 -3.61 -3.96 -5.25 -7.47 -7.86 -13.90 -15.20 -20.26
*/
/*
  average hw_arr = 1.00 1.00 1.38 0.00 1.38 1.38 0.00 2.00 2.00 0.00 3.15 3.00 5.23 8.69 11.77 10.31 15.77 17.31 18.62 19.38 23.23 25.00 29.31
average prob_arr = -1.00 -1.00 -1.00 -0.38 -1.43 -1.31 0.00 -1.75 -1.75 0.00 -2.41 -2.54 -3.66 -6.75 -11.63 -10.31 -20.44 -15.86 -21.28 -24.32 -27.61 -19.61 -25.53

[./src/rc5-dc.cc:5536]   average hw_arr = 1.00 1.00 1.47 0.00 1.56 2.49 2.97 5.42 9.03 13.77 16.29 16.19 18.42 20.22 22.69 23.66 25.03 26.04 27.47
[./src/rc5-dc.cc:5544] average prob_arr = -0.96 -0.96 -0.96 -0.74 -1.59 -2.24 -2.68 -6.11 -8.55 -14.46 -17.46 -13.98 -19.20 -20.05 -22.49 -21.05 -25.68 -20.68 -22.04

2, 2, 2, 2, 2, 3, 3, 5, 9, 13, 16, 16, 18, 20, 22, 23, 25, 26, 27

*/
//uint32_t FIB[RC5_FULL_FIB_LEN] = { 0, 0, 0, 0, 0, 0, 0, 0, /*|*/ 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  20,  20,  20,  RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 
// [./src/rc5-dc.cc:5402]   average hw_arr = 1.00 1.00 1.27 0.00 1.12 1.21 0.40 1.65 2.44 4.25 5.98 7.31 10.33 12.10 14.27 15.46 18.77 21.81 24.52
 //uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*|*/ 2, 2, 0, 2, 3, 3, 4, 4, 4, 4, 8, 8, 8,  20,  20,  20,  RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 
 // original
 //uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*|*/ 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  5,  5,  5,  RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 

uint32_t FIB[RC5_FULL_FIB_LEN] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*|*/ 2, 2, 3, 3, 5, 9, 13, 16, 16, 18, 20, 22, 23, 25, 26, 27, RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 

 //uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*|*/ 2, 2, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  10,  10,  10,  RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT};  // 20, 20
// ---- last 20150322---
 //uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 6, 6, 6, 6, 6, 6, /*|*/ 10, 10, 10, 10,  20,  20,  20,  RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 
#if 0
uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 6, 6, 6, 6, 6, 6, /*|*/ 6, 6, 6, 6,  15,  15,  15,  RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 
#endif
/*
RC5-64-8R Averages over 245 good pairs

  average hw_arr = 1.00 1.00 1.26 0.00 1.84 2.13 3.35 5.47 8.85 8.26 12.52 13.22 11.60 13.66 15.43 18.77 20.63 23.84 25.86
average prob_arr = -0.78 -0.78 -0.78 -0.34 -1.60 -1.90 -3.87 -5.09 -7.76 -6.48 -11.83 -13.66 -8.41 -14.09 -15.71 -16.12 -20.05 -17.80 -18.39
FIB_ARRAY =  6  6  6  6  6  6  6  6  6 15 15 15
max hw_arr = {11} {22} {27} {27} {23} {26} {24} {31} {31} {36} {37} {39}
min prob_arr = -1.00 -1.00 -1.00 -inf -5.00 -11.00 -12.00 -15.00 -27.00 -35.00 -38.00 -33.00 -30.00 -31.00 -42.00 -39.00 -45.00 -37.00 -36.00
*/
//uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 2, 2, 4, 6, 9, 9, 13, 13, 13, 14, 16, 19, 21, RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 
//uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 

#endif // #if(WORD_SIZE == 64)

/* --- */

#if (WORD_SIZE == 64)
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  2) + 0)),
  ((double)1.0 / (double)((1U <<  3) + 0)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  9) + 1)),
  ((double)1.0 / (double)((1U <<  13) + 1)),
  ((double)1.0 / (double)((1U <<  16) + 1)),
  ((double)1.0 / (double)((1U <<  16) + 1)),
  ((double)1.0 / (double)((1U <<  16) + 1)),
  ((double)1.0 / (double)((1U <<  18) + 1)),
  ((double)1.0 / (double)((1U <<  20) + 1)),
  ((double)1.0 / (double)((1U <<  22) + 1)),
  ((double)1.0 / (double)((1U <<  23) + 1)),
  ((double)1.0 / (double)((1U <<  25) + 1)),
  ((double)1.0 / (double)((1U <<  26) + 1)),
  ((double)1.0 / (double)((1U <<  27) + 1)),
  ((double)1.0 / (double)((1U <<  30) + 1)),
  ((double)1.0 / (double)((1U <<  30) + 1))
  //  ((double)1.0 / (double)((1U <<  9) + 1)),
  //  ((double)1.0 / (double)((1U << 10) + 1))
  //  ((double)1.0 / (double)((1U <<  11) + 1)),
  //  ((double)1.0 / (double)((1U <<  12) + 1))
  //  ((double)1.0 / (double)((1U <<  13) + 1)),
  //  ((double)1.0 / (double)((1U <<  14) + 1))
  // 800 min.
  //  ((double)1.0 / (double)((1U <<  15) + 1)),
  //  ((double)1.0 / (double)((1U <<  16) + 1))
  // > 2 hrs => job is killed
  //((double)1.0 / (double)((1U <<  24) + 1)),
  //((double)1.0 / (double)((1U <<  25) + 1))
};
#endif // #if (WORD_SIZE == 64)

/* --- */

#if (WORD_SIZE == 64)
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  //  ((double)1.0 / (double)((1U <<  7) + 1)),
  //  ((double)1.0 / (double)((1U <<  8) + 1))
  //  ((double)1.0 / (double)((1U <<  9) + 1)),
  //  ((double)1.0 / (double)((1U << 10) + 1))
  //  ((double)1.0 / (double)((1U <<  11) + 1)),
  //  ((double)1.0 / (double)((1U <<  12) + 1))
  //  ((double)1.0 / (double)((1U <<  13) + 1)),
  //  ((double)1.0 / (double)((1U <<  14) + 1))
  // 800 min.
  //  ((double)1.0 / (double)((1U <<  15) + 1)),
  //  ((double)1.0 / (double)((1U <<  16) + 1))
  // > 2 hrs => job is killed
((double)1.0 / (double)((1U <<  24) + 1)),
((double)1.0 / (double)((1U <<  25) + 1))
};
#endif // #if (WORD_SIZE == 64)

/* --- */

//   average hw_arr = 1.00 1.00 1.38 0.00 1.38 1.38 0.00 2.00 2.00 0.00 3.15 3.00 5.23 8.69 11.77 10.31 15.77 17.31 18.62 19.38 23.23 25.00 29.31
//   average hw_arr = 1.00 1.00 1.38 0.00 1.38 1.38 0.00 2.00 2.00 0.00 3.15 3.00 5.23 8.69 11.77 10.31 15.77 17.31 18.62 19.38 23.23 25.00 29.31
//uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 6, 6, 6, 6, 6, 6, /*|*/ 6, 7, 9, 11,  15,  18,  20,  RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 
//uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 6, 6, 6, 6, 6, /**/8, 10, 10, 15, 17, 18, 19, 23, RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 
//uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 6, 6, 6, 6, 6, 6, /*|*/ 6, 10, 10, 10,  15,  15,  15,  RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 
 //uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 6, 6, 6, 6, 6, 6, /*|*/ 10, 10, 10, 10,  20,  20,  20,  RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 
//uint32_t FIB[RC5_FULL_FIB_LEN] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 6, 6, 6, 6, 6, 6, /*|*/ 6, 6, 6, 15,  15,  20,  25,  RC5_CIPHERTEXT_HW_LIMIT_LEFT, RC5_CIPHERTEXT_HW_LIMIT_RIGHT}; 

/* --- */

/* ---x */

		bool b_pass = (((D_i >> j) & 1) == 0);
		if(j >= (rc5_structures_nbits - sub_struct_nbits))
		  b_pass = false;
		if(b_pass) {
/* --- */
void test_rc5_split_struct(WORD_T struct_id)
{
  printf("[%s:%d] Enter %s() ...\n", __FILE__, __LINE__, __FUNCTION__);
  WORD_T rc5_structures_nbits = RC5_STRUCTURES_NBITS;//27;
  printf("[%s:%d] Split structures:\n", __FILE__, __LINE__);
  WORD_T s_start = ((WORD_T)1 << rc5_structures_nbits) * struct_id;
  WORD_T s_end = ((WORD_T)1 << rc5_structures_nbits) + s_start;

  printf("[%s:%d] #%2lld %8llX %8llX | ", __FILE__, __LINE__, (WORD_MAX_T)struct_id, (WORD_MAX_T)s_start, (WORD_MAX_T)s_end);
  printf("\n");
  print_binary(s_start); 
  printf(" "); 
  print_binary(s_end); 
  printf("\n");
#if 0
  for(WORD_T i = s_start; i < s_end; i++) {
	 printf("[%s:%d] %llX %lld\n", __FILE__, __LINE__, (WORD_MAX_T)i, (WORD_MAX_T)i);
  }
#endif
}


/* --- */
#if 1 // set structures in the middle of the word
	//  uint32_t lsb_start_idx = (WORD_SIZE - rc5_structures_nbits - 12); // so that we have the 80000000 difference
	///  uint32_t lsb_start_idx = (WORD_SIZE - rc5_structures_nbits - 8); // so that we have the 80000000 difference
	uint32_t lsb_start_idx = (WORD_SIZE - rc5_structures_nbits - num_struct); // so that we have the 80000000 difference
	//  uint32_t lsb_start_idx = (WORD_SIZE - rc5_structures_nbits - 16); // so that we have the 80000000 difference
#endif
	assert(rc5_structures_nbits <= WORD_SIZE);
	assert(lsb_start_idx < WORD_SIZE);
	assert(rc5_structures_nbits <= (WORD_SIZE - lsb_start_idx));

	const WORD_T A_left = rand_L;//xrandom() & MASK;
	const WORD_T A_right = rand_R;//xrandom() & MASK;
	WORD_T ndata = 0;

	const WORD_T s_start = (rc5_structures_ntexts * struct_id);
	//	const WORD_T s_end = s_start + rc5_structures_ntexts;

	printf("[%s:%d] Structures random A_L A_R %llX %llX | lsb_start_idx struct_id %lld: (%lld / %lld) | s_start 0x%llX\n", __FILE__, __LINE__, 
			 (WORD_MAX_T)A_left, (WORD_MAX_T)A_right, (WORD_MAX_T)lsb_start_idx, (WORD_MAX_T)struct_id, 
			 (WORD_MAX_T)num_struct, (WORD_MAX_T)s_start);

	assert(s_start <= ((WORD_T)1 << (WORD_SIZE - 1)));

	// bit positions of structures
	//	for(uint32_t struct_bit_i = 0; struct_bit_i < 1; struct_bit_i++) { // for DEBUG
	for(uint32_t struct_bit_i = 0; struct_bit_i < rc5_structures_nbits; struct_bit_i++) {

	  WORD_T e_i = ((WORD_T)1 << (struct_bit_i + lsb_start_idx)); // basis vector e_i

	  for(uint32_t j = 0; j < rc5_structures_ntexts; j++) { // delta_j : 2^k

		 //		 WORD_T D_j = j;
		 //		 if(((D_j >> struct_bit_i) & 1) != 0)  // the j-th bit of D_i is not set to 0
#if 1
		 if(((j >> struct_bit_i) & 1) != 0)  // the j-th bit of D_i is not set to 0
			continue;
#endif

		 WORD_T D_j = ((j + s_start) % rc5_structures_ntexts) << lsb_start_idx;
		 //		 WORD_T D_j = j;

		 //		 printf("[%s:%d] D_j 0x%llX %lld\n", __FILE__, __LINE__, (WORD_MAX_T)D_j, (WORD_MAX_T)D_j);
		 assert(D_j <= ((WORD_T)1 << (WORD_SIZE - 1)));



/* --- */
#if(WORD_SIZE == 32)
  assert((NROUNDS == 6) || (NROUNDS == 8));
#if(NROUNDS == 6)
  assert(RC5_STRUCTURES_NTEXTS == (1U << 14)); //6R
  assert(RC5_STRUCTURES_NBITS == 14); //6R
#endif // #if(NROUNDS == 8)
#if(NROUNDS == 8)
  assert(RC5_STRUCTURES_NTEXTS == (1U << 24)); //8R
  assert(RC5_STRUCTURES_NBITS == 24); //8R
#endif // #if(NROUNDS == 8)
#endif // #if(WORD_SIZE == 32)


/* --- */
  //  bool b_good_const = false;
  //  bool b_is_good_filters_all = false;
  //  WORD_T A_right = 0;//(A_i >> 16) & 0xffff;//xrandom() & A_mask;
  //  WORD_T A_left = 0;//A_i & 0xffff;//xrandom() & A_mask;

/* --- */

  //	 b_good_const = true;
	 if(b_is_good_filters_all) {
		//		b_good_const = true;
		uint32_t rc5_structures_nbits_prev = rc5_structures_nbits;
		rc5_structures_nbits = 20;
		rc5_structures_ntexts = ((WORD_T)1 << rc5_structures_nbits);
		lsb_start_idx = (WORD_SIZE - rc5_structures_nbits - 0); // so that we have the 80000000 difference
		printf("[%s:%d] Upgrade struct %2lld -> %2lld\n", __FILE__, __LINE__, 
				 (WORD_MAX_T)rc5_structures_nbits_prev, (WORD_MAX_T)rc5_structures_nbits);
		printf("[%s:%d] Structures random A_L A_R %llX %llX\n", __FILE__, __LINE__, (WORD_MAX_T)A_left, (WORD_MAX_T)A_right);
		b_is_good_filters_all = false;
		//		b_good_const = true;
	 } else {
		//		uint32_t rc5_structures_nbits_prev = rc5_structures_nbits;
		rc5_structures_nbits = 1;
		rc5_structures_ntexts = ((WORD_T)1 << rc5_structures_nbits);
		//		printf("[%s:%d] Degrade struct %2lld -> %2lld\n", __FILE__, __LINE__, 
		//				 (WORD_MAX_T)rc5_structures_nbits_prev, (WORD_MAX_T)rc5_structures_nbits);
	 }



/* --- */
#if 0//(WORD_SIZE == 64)
  const WORD_T dx[2] = {0x8000000000000000, 0x8000000000000000};
#endif  // #if (WORD_SIZE == 32)
#if 0//(WORD_SIZE == 32)
  const WORD_T dx[2] = {0x80000000, 0x80000000};
#endif  // #if (WORD_SIZE == 32)
#if 0//(WORD_SIZE == 16)
  const WORD_T dx[2] = {0x8000, 0x8000};
#endif  // #if (WORD_SIZE == 16)


/* --- */

#if 1
		printf("[%s:%d] a ^ b %llX\n", __FILE__, __LINE__, (WORD_MAX_T)(*ret_X_L ^ *ret_X_R) & mask);
		printf("[%s:%d] a ^ ~b %llX\n", __FILE__, __LINE__, (WORD_MAX_T)(*ret_X_L ^ (~(*ret_X_R))) & mask);
		printf("[%s:%d] a ~b %llX %llX\n", __FILE__, __LINE__, (WORD_MAX_T)(~(*ret_X_L)) & mask, (WORD_MAX_T)(~(*ret_X_R)) & mask);
#endif

/* ---- */

void test_rc5_compute_struct_const_rand(const WORD_T nrounds, 
													 uint8_t key[RC5_KEY_NBYTES_B],
													 WORD_T* ret_X_L, WORD_T* ret_X_R)
{
  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);
  assert(nrounds <= NROUNDS);

  WORD_T S[RC5_STAB_LEN_T] = {0};					// expanded key
  rc5_setup(key, S);  

  //  WORD_T nrounds = NROUNDS;
  uint32_t lim_hi = 56;//56;
  uint32_t lim_lo = 4;//8;
  WORD_T mask_hi = 0x0000000000000000;//(WORD_T)(~0) << 56;
  WORD_T mask = 0xffffff | mask_hi;

  printf("[%s:%d] mask %016llX\n", __FILE__, __LINE__, (WORD_MAX_T)mask);

  uint64_t cnt = 0;
  uint32_t rot_mask = (WORD_SIZE - 1);

  //  test_rc5_blind_oracle_rot5(key);

  bool b_found = false;
  while(!b_found) {
	 WORD_T X_L = xrandom() & mask;
	 WORD_T X_R = xrandom() & mask;

	 bool b_sat_limit = true;

	 WORD_T A1 = X_L + S[0];
	 WORD_T B1 = X_R + S[1];

	 for(WORD_T i = 1; i <= nrounds; i++) { 

		bool b_is_sat = true;

		WORD_T rb = (B1 & rot_mask);
		b_is_sat = ((rb >= lim_hi) || (rb <= lim_lo));
		if(!b_is_sat) {
		  assert(b_sat_limit == true);
		  b_sat_limit = false;
		  break;
		}
		A1 = RC5_ROTL(A1^B1, B1) + S[2*i]; 

		WORD_T ra = (A1 & rot_mask);
		b_is_sat = ((ra >= lim_hi) || (ra <= lim_lo));
		if(!b_is_sat) {
		  assert(b_sat_limit == true);
		  b_sat_limit = false;
		  break;
		}
		B1 = RC5_ROTL(B1^A1, A1) + S[2*i+1]; 

	 }

	 if(b_sat_limit) {
		*ret_X_L = X_L;
		*ret_X_R = X_R;
		b_found = true;
		printf("[%s:%d] Found at cnt %lld 2^%4.2f | %016llx %016llX\n", __FILE__, __LINE__, 
				 (WORD_MAX_T)cnt, log2(cnt), (WORD_MAX_T)(*ret_X_L), (WORD_MAX_T)(*ret_X_R));
		//			 assert(0 == 1);
		return;
	 }

	 cnt++;
	 if((cnt % (1ULL << 27)) == 0) {
		//			 printf("[%s:%d] cnt %lld 2^%4.2f\n", __FILE__, __LINE__, (WORD_MAX_T)cnt, log2(cnt));
		printf("[%s:%d] cnt %lld 2^%4.2f %16llX %16llX\n", __FILE__, __LINE__, (WORD_MAX_T)cnt, log2(cnt),
				 (WORD_MAX_T)X_L, (WORD_MAX_T)X_R);
	 }
  } // b_found

  WORD_T diff = *ret_X_L ^ *ret_X_R;
  printf("[%s:%d] cnt %lld 2^%4.2f | %016llx %016llX diff %016llX\n", __FILE__, __LINE__, 
			(WORD_MAX_T)cnt, log2(cnt), (WORD_MAX_T)(*ret_X_L), (WORD_MAX_T)(*ret_X_R), (WORD_MAX_T)diff);

  if(!b_found) {
	 printf("[%s:%d] Not found! Exiting...\n", __FILE__, __LINE__);
  }

  assert(b_found);
}



/* ---- */

		  print_binary(cp_pair.plaintext_first[left]);
		  printf("\n");
		  print_binary(mask_L);
		  printf("\n");
		  mask_L = (mask_L & cp_pair.plaintext_first[left]) | (~mask_L & ~cp_pair.plaintext_first[left]);

		  print_binary(mask_L);
		  printf("\n");
		  print_binary(tmp_mask_L);
		  printf("\n");

		  assert(mask_L == tmp_mask_L);


/* --- */


void test_rc5_compute_struct_const_rand(const WORD_T nrounds, 
													 uint8_t key[RC5_KEY_NBYTES_B],
													 WORD_T* ret_X_L, WORD_T* ret_X_R)
{
  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);

  assert(nrounds <= NROUNDS);

  WORD_T S[RC5_STAB_LEN_T] = {0};					// expanded key
  rc5_setup(key, S);  

  //  WORD_T nrounds = NROUNDS;
  uint32_t lim_hi = 56;//56;//56;//52;//56;//48;//56;
  uint32_t lim_lo = 8;//4;//8;//12;//4;//5;//8;//16;//12;//4;
  WORD_T mask_hi = 0;//(WORD_T)(~0) << 56;
  //  WORD_T mask = ~(0ULL) >> (WORD_SIZE - ((WORD_SIZE - lim_hi) + lim_lo));
  //  WORD_T mask = 0xffff;
  WORD_T mask = 0xffff | mask_hi;
  //  WORD_T mask = 0xffff;

  printf("[%s:%d] mask %016llX\n", __FILE__, __LINE__, (WORD_MAX_T)mask);

  uint64_t cnt = 0;
  uint32_t rot_mask = (WORD_SIZE - 1);

  bool b_found = false;
  while(!b_found) {
  //  {
	 WORD_T X_L = xrandom() & mask;
	 WORD_T X_R = xrandom() & mask;

	 //	 for(WORD_T x_i = 0; x_i <= mask; x_i++) {
	 //		for(WORD_T x_j = 0; x_j <= mask; x_j++) {
	 //		  WORD_T X_L = x_i;
	 //		  WORD_T X_R = x_j;

		  bool b_sat_limit = true;

		  WORD_T A1 = X_L + S[0];
		  WORD_T B1 = X_R + S[1];

		  for(WORD_T i = 1; i <= nrounds; i++) { 

			 bool b_is_sat = true;

			 WORD_T rb = (B1 & rot_mask);
			 b_is_sat = ((rb >= lim_hi) || (rb <= lim_lo));
			 if(!b_is_sat) {
				assert(b_sat_limit == true);
				b_sat_limit = false;
				break;
			 }
			 A1 = RC5_ROTL(A1^B1, B1) + S[2*i]; 

			 WORD_T ra = (A1 & rot_mask);
			 b_is_sat = ((ra >= lim_hi) || (ra <= lim_lo));
			 if(!b_is_sat) {
				assert(b_sat_limit == true);
				b_sat_limit = false;
				break;
			 }
			 B1 = RC5_ROTL(B1^A1, A1) + S[2*i+1]; 

		  }

		  if(b_sat_limit) {
			 *ret_X_L = X_L;
			 *ret_X_R = X_R;
			 b_found = true;
			 printf("[%s:%d] Found at cnt %lld 2^%4.2f | %016llx %016llX\n", __FILE__, __LINE__, 
					  (WORD_MAX_T)cnt, log2(cnt), (WORD_MAX_T)(*ret_X_L), (WORD_MAX_T)(*ret_X_R));
			 //			 assert(0 == 1);
			 return;
		  }

		  cnt++;
		  if((cnt % (1ULL << 27)) == 0) {
			 //			 printf("[%s:%d] cnt %lld 2^%4.2f\n", __FILE__, __LINE__, (WORD_MAX_T)cnt, log2(cnt));
			 printf("[%s:%d] cnt %lld 2^%4.2f %16llX %16llX\n", __FILE__, __LINE__, (WORD_MAX_T)cnt, log2(cnt),
					  (WORD_MAX_T)X_L, (WORD_MAX_T)X_R);
		  }
			 //		}
		  //	 }
  } // b_found


  WORD_T diff = *ret_X_L ^ *ret_X_R;
  printf("[%s:%d] cnt %lld 2^%4.2f | %016llx %016llX diff %016llX\n", __FILE__, __LINE__, 
			(WORD_MAX_T)cnt, log2(cnt), (WORD_MAX_T)(*ret_X_L), (WORD_MAX_T)(*ret_X_R), (WORD_MAX_T)diff);

  if(!b_found) {
	 printf("[%s:%d] Not found! Exiting...\n", __FILE__, __LINE__);
  }

  assert(b_found);
}


/* --- */

#if 1 // DEBUG
  printf("[%s:%d] RC5_FIXED_KEY %d | Expanded key[%d] = {", __FILE__, __LINE__, RC5_FIXED_KEY, RC5_STAB_LEN_T);
  //  uint32_t rand = 32;
  //  rand = 0x3F;//xrandom() % WORD_SIZE; //RC5_FIXED_KEY
  //  S[0] = S[1] = S[2] = S[3]; // !!!
  //  S[0] = S[1] = S[2] = S[3] = S[4] = S[5] = 0; // !!!
  for(uint32_t j = 0; j < RC5_STAB_LEN_T; j++) {
#if 0 // CHANGE THE EXPANDED KEY!
	 if(j < 5) {
		//		while(!((rand > 60) || (rand < 4))) {
		while(!(rand > 60)) {
		  rand = xrandom() % WORD_SIZE;
		}
		rand = 0;
		S[j] &= (~0x3F);
      S[j] |= (rand) & 0x3F;
	 }
#endif // #if 1 // CHANGE THE EXPANDED KEY!
	 printf("0x%llX, ", (WORD_MAX_T)S[j]);
  }
  printf("};\n");
#endif // #if 1 // DEBUG

/* --- */

/* 

[./tests/rc5-tests.cc:3892] key form stdin: A_L A_R = 0x837ADBE22D9B95BCD684F272302E29C0 0x0 0x0
[./src/rc5-dc.cc:5831] Structures random A_L A_R AFD 241

0x837ADBE22D9B95BCD684F272302E29C0 0xAFD 0x241

[./src/rc5-dc.cc:5633] Good filtered f1 [HW L 18 R 18]: 2,047,474

[./tests/rc5-tests.cc:1017] RC5_FIXED_KEY 0 | Expanded key[26] = 
0x33C187801FD0C241, 
0xE11B00B9A09983F9, 
0x94B3C13F994966CF, 
0x20F58E1CC07A9D8E, 
0x359B1D5C9BDD7260, 
0xF91B9BA8E8864745, 
0x97AE32EB27531B77, 
0xC2F278FBA4FE8190, 
0x29B93C1812B37E74, 
0xA20BBEA8CC8ED43B, 
0x26E8CCFBC67F1C2D, 
0x9A14CBE84CCD524D, 
0xD45490F0C08E7C55, 
0x762AD53EBB16F1A2, 
0x7FA7F137C1C89253, 
0xBE62D4EB9C2410D4, 
0x1C76D4F198C71D27, 
0x612E1BE318439190, 
0x546984D7677CDCED, 
0x9CD9060CFBC62520, 
0xA9DD27AF2DB6C606, 
0x7BA219491502F85D, 
0x163BC89BB7798208, 
0x501156276E21FFD3, 
0xA634C50D7C9CE0C4, 
0x58DC35B86A0B71E1, };


[./src/rc5-dc.cc:5898] New good pair: x (F868CB0000000AFD F868CB0000000241) xx (F86CCB0000000AFD F86CCB0000000241)

[./src/rc5-dc.cc:5908] Add good pair to good_pairs_vec #2047475 : x (F868CB0000000AFD F868CB0000000241) xx (F86CCB0000000AFD F86CCB0000000241)
[./src/rc5-dc.cc:637] Good pair intermediate values:
X[ 0] F868CB0000000AFD F86CCB0000000AFD    4000000000000 s 61 HW  1
X[ 1] F868CB0000000241 F86CCB0000000241    4000000000000 s  1 HW  1
X[ 2] D983CBB9A099863A D987CBB9A099863A    4000000000000 s 58 HW  1
X[ 3] A88A67A480468BFB A88A67A480468BFB                0 s 59 HW  0
X[ 4] 2C7DDB7DA98195FC 2C7DFB7DA98195FC     200000000000 s 60 HW  1
X[ 5] ADEA992A2E79E440 ADEA972A2E79E440      E0000000000 s  0 HW  3 F[12]  3
X[ 6] 7AB2DE00707EB901 7AB30800707EB901    1D60000000000 s  1 HW  6 F[13]  3 .
X[ 7] 465EC13FE361D5FA 4661713FE361D5FA   3FB00000000000 s 58 HW  9 F[14]  4 .
X[ 8] AFE62978A34AFF43 AFE5C2E0A34AFF43    3EB9800000000 s  3 HW 11 F[15]  4 .
X[ 9] 77807E52140CD443 75DEDB12140CD443  25EA54000000000 s  3 HW 11 F[16]  4 .
X[10] 653E77FE86C02C41 73E48E3E86C02C41 16DAF9C000000000 s  1 HW 16 F[17]  4 .
X[11] 4C64E054EC190C31 335D7754EC190C31 7F39970000000000 s 49 HW 16 F[18]  8 .
X[12] DAF51E9D7C2227FF DAF54D5C3FA227FF     53C143800000 s 63 HW 11 F[19]  8 .
X[13] 1F9D905588AC123C 4928ADF52A6C123C 56B53DA0A2C00000 s 60 HW 21 F[20]  8 .
X[14] B2815E2B4A5FD4FE AF68B3494C73D4FE 1DE9ED62062C0000 s 62 HW 23 F[21] 20 .
X[15] 2AEF24D772858403 3937F8E6DB508403 13D8DC31A9D50000 s  3 HW 24 F[22] 20 .
X[16] 81D6ACCD62F698C0 715D3268553E98C0 F08B9EA537C80000 s  0 HW 25 F[23] 20 .
X[17] C7B05D0BA93A39EA 64E19F80273539EA A351C28B8E0F0000 s 42 HW 22
X[18] 93B2C4FCB40AACBF 8FB2C43A0AFB3358 1C0000C6BEF19FE7 s 63 HW 30
[./src/rc5-dc.cc:7858] Enter rc5_equal_rot_trail_dp()


[./src/rc5-dc.cc:5633] Good filtered f1 [HW L 18 R 18]: 2555788

[./src/rc5-dc.cc:5898] New good pair: x (2119588000000AFD 2119588000000241) xx (2139588000000AFD 2139588000000241)

[./src/rc5-dc.cc:5908] Add good pair to good_pairs_vec #2555789 : x (2119588000000AFD 2119588000000241) xx (2139588000000AFD 21395880000002
41)
[./src/rc5-dc.cc:637] Good pair intermediate values:
X[ 0] 2119588000000AFD 2139588000000AFD   20000000000000 s 61 HW  1
X[ 1] 2119588000000241 2139588000000241   20000000000000 s  1 HW  1
X[ 2]  2345939A099863A  2545939A099863A   60000000000000 s 58 HW  2
X[ 3] A60F7C2480468BFB A60E7C2480468BFB    1000000000000 s 59 HW  1
X[ 4] 2E176745A98195FC 2E185F45A98195FC    F380000000000 s 60 HW  7
X[ 5] AE1C9F12AE79E440 AE1C7F92AE79E440     E08000000000 s  0 HW  4 F[12]  3 .
X[ 6] 792793FFF07EB901 791FBC7FF07EB901   382F8000000000 s  1 HW  9 F[13]  3 .
X[ 7] 46244CC5E361D5FA 45B5BAC5E361D5FA  391F60000000000 s 58 HW 11 F[14]  4 .
X[ 8] AFEE88788D4AFF43 AFE521168D4AFF43    BA96E00000000 s  3 HW 12 F[15]  4 .
X[ 9] 780F6203840CD443 7C3E1AB3840CD443  43178B000000000 s  3 HW 11 F[16]  4 .
X[10] 611B128116C02C41 40E59BD116C02C41 21FE895000000000 s  1 HW 14 F[17]  4 .
X[11] 5911AE00EC190C31 A09FCFC0EC190C31 F98E61C000000000 s 49 HW 15 F[18]  8 .
X[12] DAF53BFDC5D147FF DAF68CDCF4F147FF    3B72131200000 s 63 HW 14 F[19]  8 .
X[13] 1646DBEF5572A23C 1189327ECD02A23C  7CFE99198700000 s 60 HW 23 F[20]  8 .
X[14] B2F6133FE4212FFE B2E2D128DEB62FFE   14C2173A970000 s 62 HW 18 F[21] 20
X[15] 28D4236BEE1D75C3 2882EA0D46B5B5C3   56C966A8A8C000 s  3 HW 20 F[22] 20
X[16] 8F74578BEE06E2C0 9164AE185C40E2C0 1E10F993B2460000 s  0 HW 22 F[23] 20 . 
X[17] C41749D198E2B42A D65D1906B3BC742A 124A50D72B5EC000 s 42 HW 24 
X[18] F289C510A4BCFB6B 5389C4FFFF200D4F A10001EF5B9CF624 s 43 HW 28 


 */

/* 
vpv@mazirat:~/skcrypto/trunk/work/src/va$ python rc5-rotsig-extract.py
Rotation constant signature:
33732 0xB27E0F45C8A9F73A39FBB37BE925765E 0x63a 0x74d 0x177
vpv@mazirat:~/skcrypto/trunk/work/src/va$ python rc5-rotsig-extract.py
Rotation constant signature:
22019 0x4766E700CCA6411B1F7C71F9DD452901 0x9a0 0x30c 0xaac
100503 0x6E5AFAE962EBCFD4CD6F8052A7E3B516 0x940 0xc0a 0x54a
vpv@mazirat:~/skcrypto/trunk/work/src/va$ python rc5-rotsig-extract.py
Rotation constant signature:
38625 0x68DDBF9344EC2587418CF231FAEA3757 0xd7d 0x964 0x419
32572 0xA2E3622318257E496253A444409D0B71 0x428 0xdf7 0x9df
vpv@mazirat:~/skcrypto/trunk/work/src/va$ python rc5-rotsig-extract.py
Rotation constant signature:
149132 0x79FD314ADB861C82425B60B87A64B596 0x5a1 0x9e2 0xc43
vpv@mazirat:~/skcrypto/trunk/work/src/va$ python rc5-rotsig-extract.py
Rotation constant signature:
52625 0x6D73ABCD7C31078376D167120939D3EF 0x2ed 0x54 0x2b9
vpv@mazirat:~/skcrypto/trunk/work/src/va$ python rc5-rotsig-extract.py
Rotation constant signature:
199389 0x476FBBFBEA86D945C4C9A2136B1190AF 0xde2 0xb7d 0x69f
vpv@mazirat:~/skcrypto/trunk/work/src/va$ python rc5-rotsig-extract.py
Rotation constant signature:
51069 0x90A1D9A491B72B4AABEE8671DD8420DE 0x380 0x990 0xa10
vpv@mazirat:~/skcrypto/trunk/work/src/va$ python rc5-rotsig-extract.py
Rotation constant signature:
20018 0x476FBBFBEA86D945C4C9A2136B1190AF 0xfe2 0x17d 0xe9f
vpv@mazirat:~/skcrypto/trunk/work/src/va$ python rc5-rotsig-extract.py
Rotation constant signature:
467401 0x3103827605F2FCCA22BF65950818B72B 0x1a5 0xcb3 0xd16
vpv@mazirat:~/skcrypto/trunk/work/src/va$ python rc5-rotsig-extract.py
Rotation constant signature:
76293 0x4766E700CCA6411B1F7C71F9DD452901 0xa0d 0xb8d 0x180
64513 0x476FBBFBEA86D945C4C9A2136B1190AF 0x412 0x3fb 0x7e9
v
 */

/* ---- */

  // copy rand_L, rand_R
  //#if RC5_LOG_FILE_CONTAINS_RAND_LR
  //  rand_L = pairs_from_file_vec[0].rand[0];
  //  rand_R = pairs_from_file_vec[0].rand[1];
  //  printf("[%s:%d] copied (rand_L, rand_R) from file %s: = %8X %8X\n", __FILE__, __LINE__, g_rc5_filtered_pairs_filename, rand_L, rand_R);
  //#endif


/* --- */

void test_rc5_compute_struct_const_rand_allrounds(uint8_t key[RC5_KEY_NBYTES_B], 
																  WORD_T* ret_X_L, WORD_T* ret_X_R)
{
  WORD_T S[RC5_STAB_LEN_T] = {0};					// expanded key
  rc5_setup(key, S);  

  WORD_T dx[2] = {(WORD_T)1 << (WORD_SIZE - 1), (WORD_T)1 << (WORD_SIZE - 1)};
  WORD_T nrounds = NROUNDS;

  uint64_t cnt = 0;
  uint32_t rot_mask = (WORD_SIZE - 1);

  WORD_T plaintext_first[2] = {0, 0};
  WORD_T plaintext_second[2] = {0, 0};

  //  uint32_t lim_hi = 47;//56;
  //  uint32_t lim_lo = 4;

  uint32_t ngood = 0;
  bool b_found = false;
  while(!b_found) {

	 //	 WORD_T mask = 0xffffffffff;
	 WORD_T mask = 0xffffffff;
	 //	 WORD_T mask = 0xffffffffffffffff >> RC5_STRUCTURES_NBITS;
	 plaintext_first[0] = xrandom() & mask;
	 plaintext_first[1] = xrandom() & mask;

	 plaintext_second[0] = plaintext_first[0] ^ dx[0];
	 plaintext_second[1] = plaintext_first[1] ^ dx[1];

	 bool b_is_rot_equal = true;

	 WORD_T A1 = plaintext_first[0] + S[0];
	 WORD_T B1 = plaintext_first[1] + S[1];
	 WORD_T A2 = plaintext_second[0] + S[0];
	 WORD_T B2 = plaintext_second[1] + S[1];

	 for(WORD_T i = 1; i <= nrounds; i++) { 

		bool b_is_sat = true;

		b_is_sat = ((B1 & rot_mask) == (B2 & rot_mask));
		if(!b_is_sat) {
		  assert(b_is_rot_equal == true);
		  b_is_rot_equal = false;
		  break;
		}
		A1 = RC5_ROTL(A1^B1, B1) + S[2*i]; 
		A2 = RC5_ROTL(A2^B2, B2) + S[2*i]; 

		b_is_sat = ((A1 & rot_mask) == (A2 & rot_mask));
		if(!b_is_sat) {
		  assert(b_is_rot_equal == true);
		  b_is_rot_equal = false;
		  break;
		}
		B1 = RC5_ROTL(B1^A1, A1) + S[2*i+1]; 
		B2 = RC5_ROTL(B2^A2, A2) + S[2*i+1]; 

	 }

	 if(b_is_rot_equal) {
		if(ngood == 0) {
		  *ret_X_L = plaintext_first[0];
		  *ret_X_R = plaintext_first[1];
		}
		if(ngood != 0) {
		  *ret_X_L &= plaintext_first[0];
		  *ret_X_R &= plaintext_first[1];
		}
		ngood++;
		if(ngood >= 4) {
		  b_found = true;
		}
		printf("[%s:%d] Found!\n", __FILE__, __LINE__);
	 }

	 cnt++;
	 if((cnt % (1ULL << 16)) == 0) {
		printf("[%s:%d] cnt %lld\n", __FILE__, __LINE__, (WORD_MAX_T)cnt);
	 }
  }	// ntexts
  printf("[%s:%d] cnt %lld | %016llx %016llX\n", __FILE__, __LINE__, 
			(WORD_MAX_T)cnt, (WORD_MAX_T)(*ret_X_L), (WORD_MAX_T)(*ret_X_R));
}
/* --- */

		WORD_T diff = SUB((B2 & rot_mask), (B1 & rot_mask));
		//		b_is_sat = (diff == 0);
		b_is_sat = (diff == RC5_ROT_MASK);

		diff = SUB((A2 & rot_mask), (A1 & rot_mask));
		//		b_is_sat = (diff == 0);
		b_is_sat = (diff == RC5_ROT_MASK);



/* --- */

//  test_rc5_dp_ddrot();
// Test the differential probability (DP) of data dependent rotations
// (DDROT)
void test_rc5_dp_ddrot()
{
  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);
  assert(WORD_SIZE <= 16);

#if 0//(WORD_SIZE == 8)
  WORD_T log2w = 3;
  WORD_T DB[((WORD_T)1 << 3)] = {0};
  WORD_T N = ((WORD_T)1 << 3);
#endif
#if 0//(WORD_SIZE == 16)
  WORD_T log2w = 4;
  WORD_T DB[((WORD_T)1 << 4)] = {0};
  WORD_T N = ((WORD_T)1 << 4);
#endif

  //  WORD_T DB[((WORD_T)1 << (WORD_T)log2(WORD_SIZE))] = {0};
  //  WORD_T N = ((WORD_T)1 << (WORD_T)log2(WORD_SIZE));
  WORD_T DB[((WORD_T)1 << WORD_SIZE)] = {0};
  uint64_t N = ((WORD_T)1 << (WORD_T)WORD_SIZE);
  WORD_T log2w = (WORD_T)log2(WORD_SIZE);

  printf("[%s:%d] log2w %2lld\n", __FILE__, __LINE__, (WORD_MAX_T)log2w);

  //  WORD_T all_0x5 = 0x5555555555555555;
  //  WORD_T all_0xA = 0xAAAAAAAAAAAAAAAA;
  //  WORD_T dx = (WORD_T)(~0) & MASK;
  WORD_T dx = 0x8000;//all_0x5 & MASK;
  //  WORD_T DB[((WORD_T)1 << WORD_SIZE)] = {0};
  for(uint64_t i = 0; i < N; i++) {
	 WORD_T x = i & MASK;;
	 WORD_T r = (i % log2w);
	 //	 WORD_T xx = ADD(dx, x) & MASK;
	 WORD_T xx = XOR(dx, x) & MASK;
	 WORD_T rr = (xx % log2w);
	 //	 WORD_T dr = (r ^ rr);
#if 0
	 printf("[%s:%d] dx log2dw x xx r rr %llX %2lld %2lld %2lld %2lld %2lld\n", __FILE__, __LINE__,
			  (WORD_MAX_T)dx, (WORD_MAX_T)log2w, (WORD_MAX_T)x, (WORD_MAX_T)xx, (WORD_MAX_T)r, (WORD_MAX_T)rr);
#endif
	 WORD_T y = LROT(x, r);
	 WORD_T yy = LROT(xx, rr);
	 //	 WORD_T dy_r = SUB(yy, y) % log2w;
	 //	 WORD_T dy = SUB(yy, y) % MASK;
	 WORD_T dy = XOR(yy, y) % MASK;
	 DB[dy]++;
  }
  for(uint64_t i = 0; i < N; i++) {
	 uint32_t min = 3;
	 if(DB[i] >= min) {
		printf("[%s:%d] dx %llX dr %llX %3lld\n", __FILE__, __LINE__,
				 (WORD_MAX_T)dx, (WORD_MAX_T)i, (WORD_MAX_T)DB[i]);
	 }
  }
}

/* --- */

/*
 * Find one or several good pairs and form them try to "learn"
 * (ie. construct) more.
 */
void rc5_learning_oracle(const WORD_T S[RC5_STAB_LEN_T])
{
  //  const uint32_t nrounds = NROUNDS;
  const uint32_t left = RC5_FEISTEL_LEFT;
  const uint32_t right = RC5_FEISTEL_RIGHT;
  pair_t cp_pair = {{0,0}, {0,0}, {0,0}, {0,0}};
#if 0
  WORD_T**** M_L;
  WORD_T**** M_R;
  yaarx_alloc_matrices_4d(&M_L, WORD_SIZE);
  yaarx_alloc_matrices_4d(&M_R, WORD_SIZE);
  rc5_blind_oracle_rot5_compute_plaintext_masks(M_L, M_R);
#endif

  bool b_found = false;
  while(!b_found) {
	 WORD_T rand_L = xrandom() & MASK;
	 WORD_T rand_R = xrandom() & MASK;
	 // random index outside of the rotation window
	 WORD_T rand_i = (xrandom() % (WORD_SIZE - RC5_LOG2W)) + RC5_LOG2W;
	 assert((rand_i < WORD_SIZE) && (rand_i >= RC5_LOG2W));
	 WORD_T diff = (1ULL << rand_i);
#if 0 // DEBUG
	 printf("[%s:%d] rand_LR diff %016llX %016llX %016llX\n", __FILE__, __LINE__, 
			  (WORD_MAX_T)diff, (WORD_MAX_T)rand_L, (WORD_MAX_T)rand_R);
#endif // #if 1 // DEBUG
	 cp_pair.plaintext_first[left] = rand_L;
	 cp_pair.plaintext_first[right] = rand_R;
	 cp_pair.plaintext_second[left] = (rand_L ^ diff);
	 cp_pair.plaintext_second[right] = (rand_R ^ diff);

	 rc5_encrypt(NROUNDS, S, cp_pair.plaintext_first, cp_pair.ciphertext_first);  
	 rc5_encrypt(NROUNDS, S, cp_pair.plaintext_second, cp_pair.ciphertext_second);  
	 b_found = rc5_pair_is_good(S, NROUNDS, cp_pair);
	 cp_pair.b_good = b_found;
  }

  rc5_print_pair(cp_pair);

  std::vector<WORD_T> X_first;		  // intermediate values from encryption
  std::vector<WORD_T> X_second;
  rc5_encrypt_pair_get_intermediate_values(S, NROUNDS, cp_pair, &X_first, &X_second);
#if 1 // DEBUG
  rc5_pair_print_intermediate_values(S, NROUNDS, cp_pair, X_first, X_second);
#endif // #if 1 // DEBUG

  WORD_T rot_arr[(2*NROUNDS) + 3] = {0};
  rc5_encrypt_pair_get_rotation_constants(X_first, X_second, rot_arr);

  uint32_t nconst = (2*NROUNDS) + 3;//5;
  printf("[%s:%d] R = \n", __FILE__, __LINE__);
  for(uint32_t i = 2; i < nconst; i++) {
	 printf("%2lld ", (WORD_MAX_T)rot_arr[i]);
  }
  printf("\n");
#if 0
  WORD_T r1 = rot_arr[2];
  WORD_T r2 = rot_arr[3];
  WORD_T r3 = rot_arr[4];
  WORD_T r4 = rot_arr[5];
#endif
#if 0 // DEBUG
  // number of inactive bits (ie. not important bits) 
  WORD_T mask_L = M_L[r1][r2][r3][r4];
  WORD_T mask_R = M_R[r1][r2][r3][r4];
  WORD_T len_L = hamming_weight(~mask_L);
  WORD_T len_R = hamming_weight(~mask_R);
  printf("[%s:%d] M_LR %016llX %016llX | %016llX ~%016llX (%2lld)\n", __FILE__, __LINE__,
			(WORD_MAX_T)mask_L, (WORD_MAX_T)mask_R, (WORD_MAX_T)(mask_L & mask_R), (WORD_MAX_T)mask_common_inv, (WORD_MAX_T)len_common_inv);
  printf("[%s:%d] len_LR %2lld %2lld\n", __FILE__, __LINE__, 
			(WORD_MAX_T)len_L, (WORD_MAX_T)len_R);
#endif // #if 1 // DEBUG

  WORD_T diff = (cp_pair.plaintext_first[left] ^ cp_pair.plaintext_second[left]);
  assert(diff == (cp_pair.plaintext_first[right] ^ cp_pair.plaintext_second[right]));

#if 0
  WORD_T ngood = 0;
  WORD_T nall = 0;
  for(int64_t i_L = WORD_SIZE-1; i_L >= 0; i_L--) {
	 if((mask_L >> (WORD_T)i_L) & 1) // is a significant bit
		continue;
	 for(int64_t i_R = WORD_SIZE-1; i_R >= 0; i_R--) {
		if((mask_R >> (WORD_T)i_R) & 1) // is a significant bit
		  continue;
		nall++;
		WORD_T diff = (cp_pair.plaintext_first[left] ^ cp_pair.plaintext_second[left]);
		assert(diff == (cp_pair.plaintext_first[right] ^ cp_pair.plaintext_second[right]));
		WORD_T x_L = cp_pair.plaintext_first[left] ^ (1ULL << i_L);
		WORD_T x_R = cp_pair.plaintext_first[right] ^ (1ULL << i_R);
		WORD_T xx_L = cp_pair.plaintext_second[left] ^ (1ULL << i_L);
		WORD_T xx_R = cp_pair.plaintext_second[right] ^ (1ULL << i_R);
		//		WORD_T xx_L = x_L ^ diff;
		//		WORD_T xx_R = x_R ^ diff;
		pair_t new_pair = {{x_L,x_R}, {xx_L,xx_R}, {0,0}, {0,0}, 0, {0,0}};
		new_pair.plaintext_first[left] = x_L;
		new_pair.plaintext_first[right] = x_R;
		new_pair.plaintext_second[left] = xx_L;
		new_pair.plaintext_second[right] = xx_R;
		rc5_encrypt(NROUNDS, S, new_pair.plaintext_first, new_pair.ciphertext_first);  
		rc5_encrypt(NROUNDS, S, new_pair.plaintext_second, new_pair.ciphertext_second);  
		bool b_good = rc5_pair_is_good(S, NROUNDS, new_pair);
		WORD_T bit_diff_L = (x_L ^ xx_L); // (1ULL << i_L);
		WORD_T bit_diff_R = (x_R ^ xx_R); // (1ULL << i_R);
		assert(bit_diff_L == diff);
		assert(bit_diff_R == diff);
#if 0
		printf("[%s:%d]  #New (%lld / %lld) bit# %016llX %016llX\n", __FILE__, __LINE__,
				 (WORD_MAX_T)ngood, (WORD_MAX_T)nall, (WORD_MAX_T)bit_diff_L, (WORD_MAX_T)bit_diff_R);
#endif
		if(b_good) {
		  ngood++;
#if 1
		  printf("[%s:%d] #Good (%lld / %lld) bit# %2lld %2lld\n", __FILE__, __LINE__,
					(WORD_MAX_T)ngood, (WORD_MAX_T)nall, (WORD_MAX_T)i_L, (WORD_MAX_T)i_R);
		  rc5_print_pair(new_pair);
#endif
		}

	 }
  }
  printf("[%s:%d] MASK #Good (%lld / %lld)\n", __FILE__, __LINE__, (WORD_MAX_T)ngood, (WORD_MAX_T)nall);
  //  printf("[%s:%d] #Good (%lld / %lld) bit# %016llX %016llX | r %2lld %2lld %2lld %2lld\n", __FILE__, __LINE__,
  //			(WORD_MAX_T)ngood, (WORD_MAX_T)nall, (WORD_MAX_T)mask_L, (WORD_MAX_T)mask_R,
  //			(WORD_MAX_T)r1, (WORD_MAX_T)r2, (WORD_MAX_T)r3, (WORD_MAX_T)r4);
#endif

#if 0
  ngood = 0;
  nall = 0;

  for(int64_t i_L = WORD_SIZE-1; i_L >= 0; i_L--) {
	 for(int64_t i_R = WORD_SIZE-1; i_R >= 0; i_R--) {
		nall++;
		WORD_T diff = (cp_pair.plaintext_first[left] ^ cp_pair.plaintext_second[left]);
		assert(diff == (cp_pair.plaintext_first[right] ^ cp_pair.plaintext_second[right]));
		WORD_T x_L = cp_pair.plaintext_first[left] ^ (1ULL << i_L);
		WORD_T x_R = cp_pair.plaintext_first[right] ^ (1ULL << i_R);
		WORD_T xx_L = cp_pair.plaintext_second[left] ^ (1ULL << i_L);
		WORD_T xx_R = cp_pair.plaintext_second[right] ^ (1ULL << i_R);
		//		WORD_T xx_L = x_L ^ diff;
		//		WORD_T xx_R = x_R ^ diff;
		pair_t new_pair = {{x_L,x_R}, {xx_L,xx_R}, {0,0}, {0,0}, 0, {0,0}};
		new_pair.plaintext_first[left] = x_L;
		new_pair.plaintext_first[right] = x_R;
		new_pair.plaintext_second[left] = xx_L;
		new_pair.plaintext_second[right] = xx_R;
		rc5_encrypt(NROUNDS, S, new_pair.plaintext_first, new_pair.ciphertext_first);  
		rc5_encrypt(NROUNDS, S, new_pair.plaintext_second, new_pair.ciphertext_second);  
		bool b_good = rc5_pair_is_good(S, NROUNDS, new_pair);
		WORD_T bit_diff_L = (x_L ^ xx_L); // (1ULL << i_L);
		WORD_T bit_diff_R = (x_R ^ xx_R); // (1ULL << i_R);
		assert(bit_diff_L == diff);
		assert(bit_diff_R == diff);
#if 0
		printf("[%s:%d]  #New (%lld / %lld) bit# %016llX %016llX\n", __FILE__, __LINE__,
				 (WORD_MAX_T)ngood, (WORD_MAX_T)nall, (WORD_MAX_T)bit_diff_L, (WORD_MAX_T)bit_diff_R);
#endif
		if(b_good) {
		  ngood++;
#if 1
		  printf("[%s:%d] #Good (%lld / %lld) bit# %2lld %2lld\n", __FILE__, __LINE__,
					(WORD_MAX_T)ngood, (WORD_MAX_T)nall, (WORD_MAX_T)i_L, (WORD_MAX_T)i_R);
		  rc5_print_pair(new_pair);
#endif
		}

	 }
  }
  printf("[%s:%d] CLEAN #Good (%lld / %lld)\n", __FILE__, __LINE__, (WORD_MAX_T)ngood, (WORD_MAX_T)nall);
  //  printf("[%s:%d] #Good (%lld / %lld) bit# %016llX %016llX | r %2lld %2lld %2lld %2lld\n", __FILE__, __LINE__,
  //			(WORD_MAX_T)ngood, (WORD_MAX_T)nall, (WORD_MAX_T)mask_L, (WORD_MAX_T)mask_R,
  //			(WORD_MAX_T)r1, (WORD_MAX_T)r2, (WORD_MAX_T)r3, (WORD_MAX_T)r4);
#endif

#if 0
  WORD_T mask_common_inv = ~mask_L & ~mask_R;
  WORD_T len_common_inv = hamming_weight(mask_common_inv);
  WORD_T all_common_inv = (1ULL << len_common_inv);
  ngood = 0;
  nall = 0;
  printf("[%s:%d] all_common_inv 2^%4.2f\n", __FILE__, __LINE__, log2(all_common_inv));
  for(WORD_T word = 0; word < all_common_inv; word++) {

	 //	 if((word % 1024) == 0) {
	 //		printf("[%s:%d] %2lld / %2lld\n", __FILE__, __LINE__, (WORD_MAX_T)word, (WORD_MAX_T)all_common_inv);
	 //	 }
	 nall++;

	 WORD_T x_L = cp_pair.plaintext_first[left] & mask_common_inv;
	 WORD_T x_R = cp_pair.plaintext_first[right] & mask_common_inv;

#if 0 // DEBUG
	 printf("[%s:%d] BEFORE %016llX %016llX\n", __FILE__, __LINE__, (WORD_MAX_T)x_L, (WORD_MAX_T)x_R);
#endif // #if 1 // DEBUG

	 WORD_T j = 0;
	 for(WORD_T i = 0; i < WORD_SIZE; i++) {
		if(((mask_common_inv >> i) & 1) == 1) {
		  assert(((mask_L >> i) & 1) == 0);
		  assert(((mask_R >> i) & 1) == 0);
		  WORD_T bit = (word >> j) & 1;
		  x_L |= (bit << i);
		  x_R |= (bit << i);
		  j++;
		}
	 }

#if 0 // DEBUG
	 printf("[%s:%d]  AFTER %016llX %016llX\n", __FILE__, __LINE__, (WORD_MAX_T)x_L, (WORD_MAX_T)x_R);
#endif // #if 1 // DEBUG

	 WORD_T xx_L = x_L ^ diff;
	 WORD_T xx_R = x_R ^ diff;

#if 0 // DEBUG
	 printf("[%s:%d]        %016llX %016llX\n", __FILE__, __LINE__, (WORD_MAX_T)xx_L, (WORD_MAX_T)xx_R);
#endif // #if 1 // DEBUG

	 //		assert(x_L != x_R);

	 pair_t new_pair = {{x_L,x_R}, {xx_L,xx_R}, {0,0}, {0,0}, 0, {0,0}};
	 rc5_encrypt(NROUNDS, S, new_pair.plaintext_first, new_pair.ciphertext_first);  
	 rc5_encrypt(NROUNDS, S, new_pair.plaintext_second, new_pair.ciphertext_second);  
	 bool b_good = rc5_pair_is_good(S, NROUNDS, new_pair);
	 if(b_good) {
		ngood++;
#if 1
		printf("[%s:%d] #Good (%lld / %lld)\n", __FILE__, __LINE__,
				 (WORD_MAX_T)ngood, (WORD_MAX_T)nall);
		//		rc5_print_pair(new_pair);
#endif
	 }
  }
  printf("[%s:%d]   ALL #Good (%lld / %lld)\n", __FILE__, __LINE__, (WORD_MAX_T)ngood, (WORD_MAX_T)nall);
#endif

#if 0
  for(int64_t i = WORD_SIZE; i >= 0; i--) {
	 nall++;
	 if(((mask_R >> (WORD_T)i) & 1) || ((mask_L >> (WORD_T)i) & 1)) // is a significant bit
		continue;
	 WORD_T diff = (cp_pair.plaintext_first[left] ^ cp_pair.plaintext_second[left]);
	 assert(diff == (cp_pair.plaintext_first[right] ^ cp_pair.plaintext_second[right]));
	 WORD_T x_L = cp_pair.plaintext_first[left] ^ (1ULL << i);
	 WORD_T x_R = cp_pair.plaintext_first[right] ^ (1ULL << i);
	 WORD_T xx_L = x_L ^ diff;
	 WORD_T xx_R = x_R ^ diff;
	 pair_t new_pair = {{x_L,x_R}, {xx_L,xx_R}, {0,0}, {0,0}, 0, {0,0}};
	 rc5_encrypt(NROUNDS, S, new_pair.plaintext_first, new_pair.ciphertext_first);  
	 rc5_encrypt(NROUNDS, S, new_pair.plaintext_second, new_pair.ciphertext_second);  
	 bool b_good = rc5_pair_is_good(S, NROUNDS, new_pair);
	 if(b_good) {
		ngood++;
		WORD_T bit_diff = (1ULL << i);
		printf("[%s:%d] #Good (%lld / %lld) bit# %2lld %016llX\n", __FILE__, __LINE__,
				 (WORD_MAX_T)ngood, (WORD_MAX_T)nall, (WORD_MAX_T)i, (WORD_MAX_T)bit_diff);
		rc5_print_pair(new_pair);
	 }
  }
#endif

#if 0
  yaarx_free_matrices_4d(M_L, WORD_SIZE);
  yaarx_free_matrices_4d(M_R, WORD_SIZE);
#endif
}

/* --- */
  for(WORD_T r1 = 0; r1 < WORD_SIZE; r1++) {
	 printf("[%s:%d] r1 %2lld\n", __FILE__, __LINE__, (WORD_MAX_T)r1);
	 for(WORD_T r2 = 0; r2 < WORD_SIZE; r2++) {
		for(WORD_T r3 = 0; r3 < WORD_SIZE; r3++) {
		  for(WORD_T r4 = 0; r4 < WORD_SIZE; r4++) {

		  }
		}
	 }
  }

/* --- */

  for(WORD_T word = 0; word < all_common_inv; word++) {
		nall++;

		WORD_T x_L = cp_pair.plaintext_first[left] & mask_common_inv;
		WORD_T x_R = cp_pair.plaintext_first[right] & mask_common_inv;

#if 0 // DEBUG
		printf("[%s:%d] BEFORE %016llX %016llX\n", __FILE__, __LINE__, (WORD_MAX_T)x_L, (WORD_MAX_T)x_R);
#endif // #if 1 // DEBUG

		WORD_T j = 0;
		for(WORD_T i = 0; i < WORD_SIZE; i++) {
		  if(((mask_common_inv >> i) & 1) == 1) {
			 assert(((mask_L >> i) & 1) == 0);
			 assert(((mask_R >> i) & 1) == 0);
			 WORD_T bit = (word >> j) & 1;
			 x_L |= (bit << i);
			 x_R |= (bit << i);
			 j++;
		  }
		}

#if 0 // DEBUG
		printf("[%s:%d]  AFTER %016llX %016llX\n", __FILE__, __LINE__, (WORD_MAX_T)x_L, (WORD_MAX_T)x_R);
#endif // #if 1 // DEBUG

		WORD_T xx_L = x_L ^ diff;
		WORD_T xx_R = x_R ^ diff;

#if 0 // DEBUG
		printf("[%s:%d]        %016llX %016llX\n", __FILE__, __LINE__, (WORD_MAX_T)xx_L, (WORD_MAX_T)xx_R);
#endif // #if 1 // DEBUG

		//		assert(x_L != x_R);

		pair_t new_pair = {{x_L,x_R}, {xx_L,xx_R}, {0,0}, {0,0}, 0, {0,0}};
		rc5_encrypt(NROUNDS, S, new_pair.plaintext_first, new_pair.ciphertext_first);  
		rc5_encrypt(NROUNDS, S, new_pair.plaintext_second, new_pair.ciphertext_second);  
		bool b_good = rc5_pair_is_good(S, NROUNDS, new_pair);
		if(b_good) {
		  ngood++;
		  printf("[%s:%d] #Good (%lld / %lld)\n", __FILE__, __LINE__,
					(WORD_MAX_T)ngood, (WORD_MAX_T)nall);
		  rc5_print_pair(new_pair);
		}
  }


/* --- */
  for(int64_t i = WORD_SIZE; i >= 0; i--) {
		nall++;
		if(((mask_R >> (WORD_T)i) & 1) || ((mask_L >> (WORD_T)i) & 1)) // is a significant bit
		  continue;
		WORD_T diff = (cp_pair.plaintext_first[left] ^ cp_pair.plaintext_second[left]);
		assert(diff == (cp_pair.plaintext_first[right] ^ cp_pair.plaintext_second[right]));
		WORD_T x_L = cp_pair.plaintext_first[left] ^ (1ULL << i);
		WORD_T x_R = cp_pair.plaintext_first[right] ^ (1ULL << i);
		WORD_T xx_L = x_L ^ diff;
		WORD_T xx_R = x_R ^ diff;
		pair_t new_pair = {{x_L,x_R}, {xx_L,xx_R}, {0,0}, {0,0}, 0, {0,0}};
		rc5_encrypt(NROUNDS, S, new_pair.plaintext_first, new_pair.ciphertext_first);  
		rc5_encrypt(NROUNDS, S, new_pair.plaintext_second, new_pair.ciphertext_second);  
		bool b_good = rc5_pair_is_good(S, NROUNDS, new_pair);
		if(b_good) {
		  ngood++;
		  WORD_T bit_diff = (1ULL << i);
		  printf("[%s:%d] #Good (%lld / %lld) bit# %2lld %016llX\n", __FILE__, __LINE__,
					(WORD_MAX_T)ngood, (WORD_MAX_T)nall, (WORD_MAX_T)i, (WORD_MAX_T)bit_diff);
		  rc5_print_pair(new_pair);
		}
  }

/* --- */
  for(int64_t i = WORD_SIZE; i >= 0; i--) {
	 nall++;
	 if((mask_L >> (WORD_T)i) & 1) // is a significant bit
		continue;
	 WORD_T diff = (cp_pair.plaintext_first[left] ^ cp_pair.plaintext_second[left]);
	 assert(diff == (cp_pair.plaintext_first[right] ^ cp_pair.plaintext_second[right]));
	 WORD_T x_L = cp_pair.plaintext_first[left] ^ (1ULL << i);
	 WORD_T x_R = cp_pair.plaintext_first[right];// ^ (1ULL << i);
	 WORD_T xx_L = x_L ^ diff;
	 WORD_T xx_R = x_R ^ diff;
	 pair_t new_pair = {{x_L,x_R}, {xx_L,xx_R}, {0,0}, {0,0}, 0, {0,0}};
	 rc5_encrypt(NROUNDS, S, new_pair.plaintext_first, new_pair.ciphertext_first);  
	 rc5_encrypt(NROUNDS, S, new_pair.plaintext_second, new_pair.ciphertext_second);  
	 bool b_good = rc5_pair_is_good(S, NROUNDS, new_pair);
	 if(b_good) {
		ngood++;
		WORD_T bit_diff = (1ULL << i);
		printf("[%s:%d] #Good (%lld / %lld) bit# %2lld %016llX\n", __FILE__, __LINE__,
				 (WORD_MAX_T)ngood, (WORD_MAX_T)nall, (WORD_MAX_T)i, (WORD_MAX_T)bit_diff);
	 }
  }
  for(int64_t i = WORD_SIZE; i >= 0; i--) {
	 nall++;
	 if((mask_R >> (WORD_T)i) & 1) // is a significant bit
		continue;
	 WORD_T diff = (cp_pair.plaintext_first[left] ^ cp_pair.plaintext_second[left]);
	 assert(diff == (cp_pair.plaintext_first[right] ^ cp_pair.plaintext_second[right]));
	 WORD_T x_L = cp_pair.plaintext_first[left];// ^ (1ULL << i);
	 WORD_T x_R = cp_pair.plaintext_first[right] ^ (1ULL << i);
	 WORD_T xx_L = x_L ^ diff;
	 WORD_T xx_R = x_R ^ diff;
	 pair_t new_pair = {{x_L,x_R}, {xx_L,xx_R}, {0,0}, {0,0}, 0, {0,0}};
	 rc5_encrypt(NROUNDS, S, new_pair.plaintext_first, new_pair.ciphertext_first);  
	 rc5_encrypt(NROUNDS, S, new_pair.plaintext_second, new_pair.ciphertext_second);  
	 bool b_good = rc5_pair_is_good(S, NROUNDS, new_pair);
	 if(b_good) {
		ngood++;
		WORD_T bit_diff = (1ULL << i);
		printf("[%s:%d] #Good (%lld / %lld) bit# %2lld %016llX\n", __FILE__, __LINE__,
				 (WORD_MAX_T)ngood, (WORD_MAX_T)nall, (WORD_MAX_T)i, (WORD_MAX_T)bit_diff);
	 }
  }



/* --- */

  for(int64_t i = WORD_SIZE; i >= 0; i--) {
	 if(((mask_L >> (WORD_T)i) & 1) || ((mask_R >> (WORD_T)i) & 1)) // is a significant bit
		continue;
	 nall++;
	 WORD_T diff = (cp_pair.plaintext_first[left] ^ cp_pair.plaintext_second[left]);
	 assert(diff == (cp_pair.plaintext_first[right] ^ cp_pair.plaintext_second[right]));
	 WORD_T x_L = cp_pair.plaintext_first[left] ^ (1ULL << i);
	 WORD_T x_R = cp_pair.plaintext_first[right] ^ (1ULL << i);
	 WORD_T xx_L = x_L ^ diff;
	 WORD_T xx_R = x_R ^ diff;
	 pair_t new_pair = {{x_L,x_R}, {xx_L,xx_R}, {0,0}, {0,0}, 0, {0,0}};
	 rc5_encrypt(NROUNDS, S, new_pair.plaintext_first, new_pair.ciphertext_first);  
	 rc5_encrypt(NROUNDS, S, new_pair.plaintext_second, new_pair.ciphertext_second);  
	 bool b_good = rc5_pair_is_good(S, NROUNDS, new_pair);
	 if(b_good) {
		ngood++;
		WORD_T bit_diff = (1ULL << i);
		printf("[%s:%d] #Good (%lld / %lld) bit# %2lld %016llX\n", __FILE__, __LINE__,
				 (WORD_MAX_T)ngood, (WORD_MAX_T)nall, (WORD_MAX_T)i, (WORD_MAX_T)bit_diff);
	 }
  }


/* --- */

  for(int64_t i_L = WORD_SIZE; i_L >= 0; i_L--) {
	 if((mask_L >> (WORD_T)i_L) & 1) // is a significant bit
		continue;
	 for(int64_t i_R = WORD_SIZE; i_R >= 0; i_R--) {
		if((mask_R >> (WORD_T)i_R) & 1) // is a significant bit
		  continue;
		nall++;
		WORD_T diff = (cp_pair.plaintext_first[left] ^ cp_pair.plaintext_second[left]);
		assert(diff == (cp_pair.plaintext_first[right] ^ cp_pair.plaintext_second[right]));
		WORD_T x_L = cp_pair.plaintext_first[left] ^ (1ULL << i_L);
		WORD_T x_R = cp_pair.plaintext_first[right] ^ (1ULL << i_R);
		WORD_T xx_L = x_L ^ diff;
		WORD_T xx_R = x_R ^ diff;
		pair_t new_pair = {{x_L,x_R}, {xx_L,xx_R}, {0,0}, {0,0}, 0, {0,0}};
		rc5_encrypt(NROUNDS, S, new_pair.plaintext_first, new_pair.ciphertext_first);  
		rc5_encrypt(NROUNDS, S, new_pair.plaintext_second, new_pair.ciphertext_second);  
		bool b_good = rc5_pair_is_good(S, NROUNDS, cp_pair);
		if(b_good) {
		  ngood++;
		  WORD_T bit_diff = (1ULL << i);
		  printf("[%s:%d] #Good (%lld / %lld) bit# %2lld %016llX\n", __FILE__, __LINE__,
					(WORD_MAX_T)ngood, (WORD_MAX_T)nall, (WORD_MAX_T)i, (WORD_MAX_T)bit_diff);
		  rc5_print_pair(new_pair);
		}

	 }
  }


/* --- */

 /*
  * Find one or several good pairs and form them try to "learn"
  * (ie. construct) more.
  */
 void xxx_rc5_learning_oracle(const WORD_T S[RC5_STAB_LEN_T],
								  const uint32_t nrounds,
								  const WORD_T dx[2], // input difference
								  const gsl_matrix* AA_last[2][2][2][2], // last round
								  const gsl_matrix* A_last[2][2][2], // last round
								  const gsl_vector* L_last,
								  const gsl_vector* C_last,
								  const gsl_matrix* A_mid[2][2], // middle round
								  const gsl_vector* L_mid,
								  const gsl_vector* C_mid,
								  std::vector<std::vector<rc5_goup_diffs_t>>* goup_diff_vec_2d,
								  std::vector<pair_t>* good_pairs_vec,
								  const WORD_T mask_L, const WORD_T mask_R,
								  const WORD_T rand_L, const WORD_T rand_R, 
								  const WORD_T rand_LL, const WORD_T rand_RR)
 {
	assert(RC5_FILTER_SECOND_PASS == 0); // first pass
	assert(RC5_FILTER_BLIND_ORACLE == 1); // blind oracle
	assert(nrounds == NROUNDS);
	printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);
	assert(nrounds == NROUNDS);
	WORD_T** logp2hw_arr;
	WORD_T** logp2hw_arr_max;
	uint32_t logp2hw_arr_rows = ((2 * NROUNDS) + 3);
	uint32_t logp2hw_arr_cols = WORD_SIZE;

	yaarx_alloc_matrices_2d(&logp2hw_arr, logp2hw_arr_rows, logp2hw_arr_cols);
	yaarx_alloc_matrices_2d(&logp2hw_arr_max, logp2hw_arr_rows, logp2hw_arr_cols);

	//  uint64_t ret_cnt = 0;
	uint32_t fib_array_len = RC5_FIB_LEN;//= RC5_GOUP_LEVEL + 1;
	/**
	 * Initialize the Fibonacci array.
	 */
	std::vector<uint32_t> fib_array;
	std::vector<double> p_thres_array;
	for(uint32_t i = (RC5_FULL_FIB_LEN - fib_array_len); i < RC5_FULL_FIB_LEN; i++) {
	  fib_array.push_back(FIB[i - 2]);  // <--- last two entries are the ciphertexts
	  p_thres_array.push_back(RC5_P_THRES_ARRAY[i]);
	}
	assert(p_thres_array.size() == RC5_FIB_LEN);
	assert(fib_array.size() == RC5_FIB_LEN);

	WORD_T plaintext_first[2] = {0, 0};
	WORD_T plaintext_second[2] = {0, 0};
	WORD_T ciphertext_first[2] = {0, 0};
	WORD_T ciphertext_second[2] = {0, 0};

	uint32_t cnt_cptext_pairs = 0;
	uint64_t cnt_filtered_f1 = 0;
	uint64_t cnt_filtered = 0;
	uint32_t cnt_good_all = 0;
	uint32_t cnt_good_filtered = 0;
	uint32_t cnt_good_filtered_f1 = 0;

#if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages
	uint32_t arr_len = (2*NROUNDS) + 3;
	double min_prob_arr[(2*NROUNDS) + 3] = {1.0};
	double sum_prob_arr[(2*NROUNDS) + 3] = {0.0};
	double sum_hw_arr[(2*NROUNDS) + 3] = {0.0};
	uint32_t max_hw_arr[(2*NROUNDS) + 3] = {0};
	// initialize arrays
	for(uint32_t i = 0; i < arr_len; i++) {
	  min_prob_arr[i] = 1.0;
	  sum_prob_arr[i] = 0.0;
	  sum_hw_arr[i] = 0.0;
	  max_hw_arr[i] = 0;
	}
#endif // #if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages

#if 1									  // print FIB
	printf("[%s:%d] RC5_FIB_LEN %d fib_array.size() = %d\n", __FILE__, __LINE__, RC5_FIB_LEN, (uint32_t)fib_array.size());
	assert(fib_array.size() == RC5_FIB_LEN);
	printf("[%s:%d] fib_array = ", __FILE__, __LINE__);
	for(uint32_t i = 0; i < RC5_FIB_LEN; i++) {
	  printf("[%2d] %2d ", i, fib_array[i]);
	}
	printf("\n");
	printf("[%s:%d] p_thres_array = ", __FILE__, __LINE__);
	for(uint32_t i = 0; i < RC5_FIB_LEN; i++) {
	  printf("[%2d] %4.2f ", i, log2(p_thres_array[i]));
	}
	printf("\n");
#endif								  // #if 1p

	/**
	 * Keeps the full trail from the goUP filter that corresponds to a
	 * filtered pair. This is one element of the \ref goup_diff_vec_2d
	 * array.
	 */
	std::vector<rc5_goup_diffs_t> goup_diff_vec;

	uint32_t left = RC5_FEISTEL_LEFT;
	uint32_t right = RC5_FEISTEL_RIGHT;

	assert(RC5_FILTER_USE_STRUCTURES == 1);
	assert(RC5_STRUCTURES_NBITS <= (WORD_SIZE - RC5_LOG2W));
	uint32_t lsb_start_idx = (WORD_SIZE - RC5_STRUCTURES_NBITS); // so that we have the 80000000 difference
	assert(RC5_STRUCTURES_NBITS <= WORD_SIZE);
	assert(lsb_start_idx < WORD_SIZE);
	assert(RC5_STRUCTURES_NBITS <= (WORD_SIZE - lsb_start_idx));

	const WORD_T A_left = rand_L;//xrandom() & MASK;
	const WORD_T A_right = rand_R;//xrandom() & MASK;
	//	 const WORD_T A_right = (rand_R & ~(0x1F));

#if 1 // DEBUG
	printf("[%s:%d] %s() Structures random A_L A_R %llX %llX\n", __FILE__, __LINE__, __FUNCTION__,
			 (WORD_MAX_T)A_left, (WORD_MAX_T)A_right);
#endif // #if 1 // DEBUG

	uint64_t ndata = 0;

	// bit positions of structures
	for(uint32_t struct_bit_i = 0; struct_bit_i < RC5_STRUCTURES_NBITS; struct_bit_i++) {

	  uint32_t e_i = (1U << struct_bit_i); // basis vector e_i

	  for(uint32_t j = 0; j < RC5_STRUCTURES_NTEXTS; j++) { // delta_j : 2^k

		 WORD_T D_j = j;
		 if(((D_j >> struct_bit_i) & 1) != 0)  // the j-th bit of D_i is not set to 0
			continue;

		 ndata++;

		 WORD_T d_j = (D_j << lsb_start_idx);
		 WORD_T dd_j = ((D_j ^ e_i) << lsb_start_idx);

		 WORD_T new_L = (A_left ^ d_j) & MASK;
		 WORD_T new_R = (A_right ^ d_j) & MASK;
		 WORD_T new_LL = (A_left ^ dd_j) & MASK;
		 WORD_T new_RR = (A_right ^ dd_j) & MASK;

		 pair_t cp_pair = {{0,0}, {0,0}, {0,0}, {0,0}};
		 cp_pair.plaintext_first[left] = new_L;
		 cp_pair.plaintext_first[right] = new_R;
		 cp_pair.plaintext_second[left] = new_LL;
		 cp_pair.plaintext_second[right] = new_RR;
		 cp_pair.rand[left] = A_left;
		 cp_pair.rand[right] = A_right;

		 /* Init the goup vector */
		 goup_diff_vec.clear();

		 cnt_cptext_pairs++;

		 for(uint32_t i = 0; i < 2; i++) { // left pt = 0, right pt = 1
			plaintext_first[i] = cp_pair.plaintext_first[i];
			plaintext_second[i] = cp_pair.plaintext_second[i];
		 }

		 // encrypt pairs of texts
		 rc5_encrypt(nrounds, S, plaintext_first, ciphertext_first);  
		 rc5_encrypt(nrounds, S, plaintext_second, ciphertext_second);  

		 // fill the ciphertexts into the pair
		 for(uint32_t i = 0; i < 2; i++) { // left ct = 0, right ct = 1
			cp_pair.ciphertext_first[i] = ciphertext_first[i];
			cp_pair.ciphertext_second[i] = ciphertext_second[i];
		 }

		 assert(cp_pair.ciphertext_first[0] == ciphertext_first[0]);
		 assert(cp_pair.ciphertext_second[0] == ciphertext_second[0]);
		 assert(cp_pair.ciphertext_first[1] == ciphertext_first[1]);
		 assert(cp_pair.ciphertext_second[1] == ciphertext_second[1]);

		 bool b_is_already_stored = false;
		 bool b_is_already_stored_swapped = false;
		 bool b_good = rc5_pair_is_good(S, nrounds, cp_pair);
		 if(b_good) {
			cnt_good_all++;
			cp_pair.b_good = true;
			WORD_T x1_L = cp_pair.plaintext_first[left];
			WORD_T x1_R = cp_pair.plaintext_first[right];
			WORD_T x2_L = cp_pair.plaintext_second[left];
			WORD_T x2_R = cp_pair.plaintext_second[right];

			printf("\n[%s:%d] New good pair: x (%llX %llX) xx (%llX %llX)\n", __FILE__, __LINE__, 
					 (WORD_MAX_T)x1_L, (WORD_MAX_T)x1_R, (WORD_MAX_T)x2_L, (WORD_MAX_T)x2_R);

#if 1 // DEBUG: check for duplicates and swaps
			b_is_already_stored = rc5_pair_is_in_good_vec(cp_pair, (*good_pairs_vec));
			b_is_already_stored_swapped = rc5_pair_is_in_good_vec_swapped(cp_pair, (*good_pairs_vec));
			assert(b_is_already_stored == false);
			assert(b_is_already_stored_swapped == false);
#endif // #if 1 // DEBUG: check for duplicates

			printf("\n[%s:%d] Add good pair to good_pairs_vec #%2d : x (%llX %llX) xx (%llX %llX)\n", __FILE__, __LINE__, cnt_good_all, 
					 (WORD_MAX_T)x1_L, (WORD_MAX_T)x1_R, (WORD_MAX_T)x2_L, (WORD_MAX_T)x2_R);
			good_pairs_vec->push_back(cp_pair);

#if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages
			rc5_good_pair_debug_statistics(S, nrounds, cp_pair, fib_array,
													 (const gsl_matrix*(*)[2][2][2])AA_last, (const gsl_matrix*(*)[2][2])A_last, L_last, C_last,
													 (const gsl_matrix*(*)[2])A_mid, L_mid, C_mid,
													 min_prob_arr, sum_prob_arr, sum_hw_arr, max_hw_arr);
#endif // #if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages
		 } 

		 bool b_is_good_filters_all = false;
		 // all three filters in one
#if 1
		 b_is_good_filters_all = rc5_filters_all((const gsl_matrix*(*)[2][2])A_last, L_last, C_last,
															  (const gsl_matrix*(*)[2])A_mid, L_mid, C_mid, 
															  cp_pair, b_good, fib_array, p_thres_array, 
															  logp2hw_arr, logp2hw_arr_rows, logp2hw_arr_cols,
															  &goup_diff_vec,
															  &cnt_filtered_f1, &cnt_good_filtered_f1);
#endif // #if 0

		 if(b_is_good_filters_all) {
			cnt_filtered++;
#if RC5_LOG_TO_FILE // store filtered pairs in file
			//			 assert((RC5_FILTER_SECOND_PASS == 0) || (RC5_FILTER_SECOND_PASS == 2)); // 1st pass or 3rd pass (post-processing of 1st pass)
			FILE* fp = fopen(g_filename, "a");
			rc5_pair_print_to_file(fp, cp_pair, b_good);
			fclose(fp);
#endif // #if 1 // store filtered pairs in file

#if 1 // DEBUG
			if(b_good) {
			  printf("\n[%s:%d] Good filtered: BOOM!\n", __FILE__, __LINE__);
			  printf("\n[%s:%d] Good filtered: BOOM!! [%10d / %10lld]\n", __FILE__, __LINE__, j, RC5_NTEXTS);
			  cnt_good_filtered++;
			} 
#endif // #if 1 // DEBUG

			/**
			 * Store the equal-rot trail corresponding to the filtered pair
			 * (i.e. to the candidate good pair)
			 */
			goup_diff_vec_2d->push_back(goup_diff_vec);
		 }
		 //#if 0 // structures
	  }
	}
	//#else

#if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages
	rc5_equal_rot_attack_average_stats(fib_array, cnt_good_all, min_prob_arr, sum_prob_arr, sum_hw_arr, max_hw_arr);
#endif //#if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages

	// update the GLOBALS!!
#if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages
	for(uint32_t i = 0; i < arr_len; i++) {
	  g_max_hw_arr[i] += (double)max_hw_arr[i];
	  g_min_prob_arr[i] += (double)min_prob_arr[i];
	}
#endif // #if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages

	printf("[%s:%s():%d] Exit statistics:\n", __FILE__, __FUNCTION__, __LINE__);
	printf("NROUNDS %d\n", nrounds);
	printf("WORD_SIZE %d\n", WORD_SIZE);
	printf("RC5_FILTER_CUT_HW1 %2d\n", RC5_FILTER_CUT_HW1);
	printf("RC5_ADD_APPROX  %2d\n", RC5_ADD_APPROX);
	printf("RC5_LAST_ROUND_ADD_APPROX  %2d\n", RC5_LAST_ROUND_ADD_APPROX);
	printf("RC5_ADD_APPROX_ORDER  %2d\n", RC5_ADD_APPROX_ORDER);
	printf("RC5_NTEXTS 2^%4.2f\n", log2((double)RC5_NTEXTS));
	printf("RC5_FIXED_KEY %d\n", RC5_FIXED_KEY);
	printf("RC5_FILTER_LAST_ROUND %d\n", RC5_FILTER_LAST_ROUND);
	printf("RC5_FILTER_ONETOLAST_ROUND %d\n", RC5_FILTER_ONETOLAST_ROUND);
	printf("RC5_FILTER_GOUP %d\n", RC5_FILTER_GOUP);
	printf("RC5_FILTER_GOUP_DEBUG %d\n", RC5_FILTER_GOUP_DEBUG);
	printf("RC5_DEBUG_HAVE_MEMORY %d\n", RC5_DEBUG_HAVE_MEMORY);
	printf("RC5_FILTER_GOUP_DIFF_SET %d\n", RC5_FILTER_GOUP_DIFF_SET);
	printf("#Filtered pairs f1: %lld (2^%f)\n", (long long int)cnt_filtered_f1, log2(cnt_filtered_f1));
	printf("#Filtered pairs all: %lld (2^%f)\n", (long long int)cnt_filtered, log2(cnt_filtered));
	printf("#Good pairs among filtered: %d\n", cnt_good_filtered);
	printf("#Good pairs among filtered f1: %d\n", cnt_good_filtered_f1);
	printf("#Good pairs total: %d\n", cnt_good_all);
	printf("#GoUP sets of trails: %d (2^%f)\n", (uint32_t)goup_diff_vec_2d->size(), log2(goup_diff_vec_2d->size()));

	printf("[%s:%d] A Strange Day: Good / Filtered / Good Filtered: %5d %5lld %5d \n", 
			 __FILE__, __LINE__, cnt_good_all, (long long int)cnt_filtered, cnt_good_filtered);

	printf("[%s:%d] ndata 2^%4.2f\n", __FILE__, __LINE__, log2(ndata));

	printf("RC5_P_THRES_ARRAY = ");
	for(uint32_t i = 0; i < RC5_FIB_LEN; i++) {
	  printf("[%2d] %4.2f ", i, log2(p_thres_array[i]));
	}
	printf("\n");
	printf("        FIB_ARRAY = ");
	for(uint32_t i = 0; i < RC5_FIB_LEN; i++) {
	  printf("[%2d] %2d ", i, fib_array[i]);
	}
	printf("\n");

	yaarx_free_matrices_2d(logp2hw_arr, logp2hw_arr_rows, logp2hw_arr_cols);
	yaarx_free_matrices_2d(logp2hw_arr_max, logp2hw_arr_rows, logp2hw_arr_cols);
 }


/* --- */
#if 0
  const WORD_T active_mask_L = 0x0000000FFFFFFFFULL;
  const WORD_T active_mask_R = 0x0000000FFFFFFFFULL;
#endif
#if 0
				bool b_allowed = (((M_L[r1][r2][r3][r4] & ~(active_mask_L)) == 0) && ((M_R[r1][r2][r3][r4] & ~(active_mask_R)) == 0));
#else
				bool b_allowed = true;
#endif
				if(!b_allowed) {
				  continue;
				}


/* --- */

  const WORD_T nback = RC5_FILTER_BLIND_ORACLE_BACKWARD_WINDOW_NBITS; // number of bits to trace backwards
  const WORD_T nfront = RC5_FILTER_BLIND_ORACLE_FORWARD_WINDOW_NBITS; // number of bits to trace forwards
  const WORD_T w = WORD_SIZE;
  const WORD_T L = nfront + RC5_LOG2W + nback;

/* --- */

#else // high to low
    for(int64_t i1 = nconst - 1; i1 >= 0; i1--) {
		//		printf("[%s:%d] i1 %lld\n", __FILE__, __LINE__, (WORD_MAX_T)i1);
      for(int64_t i2 = nconst - 1; i2 >= 0; i2--) {
        for(int64_t i3 = nconst - 1; i3 >= 0; i3--) {
			 //		printf("[%s:%d] i3 %lld\n", __FILE__, __LINE__, (WORD_MAX_T)i3);
          for(int64_t i4 = nconst - 1; i4 >= 0; i4--) {
#endif // #if 0 // low to high

/* --- */

/* 
[./tests/rc5-tests.cc:3272] Found solution:   193 |  1  1 59 48  0  0 | X_L X_R F948AF0B 27171580
[./tests/rc5-tests.cc:851] OUTPUT log filename (rc5-filtered-pairs.txt) from stdin: /tmp/test.txt
[./tests/rc5-tests.cc:876] RC5_FIXED_KEY 0 | Master key[16] = {0xDC, 0xB2, 0xF8, 0x5F, 0xB3, 0x70, 0x9B, 0x5A, 0xB1, 0xAF, 0xB7, 0xA7, 0x1A, 0x3A, 0x83, 0xC3, };
[./tests/rc5-tests.cc:886] RC5_FIXED_KEY 0 | Expanded key[26] = {0x96045D995E4671ED, 0xDE05A3E7F023E501, 0xDDFA4826A33F394F, 0x2B746B00532194BA, 0x129A44D48F1E6C6D, 0xF06C4AA126DD166C, 0xB916D713E2B31F10, 0x1B2DCE7A779ADE02, 0xBA09DE82DE3495F6, 0xDFA0A812836F1B9D, 0xA60A4AD4A726C6B2, 0x12C6B082040690B9, 0x6DC5E343D43D2652, 0x122CC68A0434E4EB, 0xA087B791BD8D7056, 0xFC83B8137EE78EC7, 0x162C35BDA529C6C8, 0xFB17545D40B96C7F, 0x80608858EBD29D43, 0x56C0DAE6F31290DB, 0x4AF1C981A3E5D5AD, 0x949C24A9024F544C, 0x442BD035D2B5402A, 0x6E174D69F3A5B5CE, 0x514660305B1E2C87, 0xEE7D05F55B8EC968, };
[./src/rc5-dc.cc:2973]  8R p(8000000000000000 8000000000000000 -> *) = 0.000000 2^-21.415037 | 3 2^23.000000
[./src/rc5-dc.cc:5741] Enter rc5_equal_rot_attack_first_pass_structures()
[./src/rc5-dc.cc:5779] RC5_FIB_LEN 12 fib_array.size() = 12
[./src/rc5-dc.cc:5781] fib_array = [ 0]  3 [ 1]  3 [ 2]  4 [ 3]  4 [ 4]  4 [ 5]  4 [ 6]  8 [ 7]  8 [ 8]  8 [ 9] 20 [10] 20 [11] 20 
[./src/rc5-dc.cc:5786] p_thres_array = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -2.32 [ 7] -2.32 [ 8] -2.32 [ 9] -2.32 [10] -11.00 [11] -13.00 
[./src/rc5-dc.cc:5831] Structures random A_L A_R F948AF0B 27171580

NROUNDS 8
WORD_SIZE 64
RC5_FILTER_CUT_HW1  1
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 6961378 (2^22.730941)
#Filtered pairs all: 25750 (2^14.652285)
#Good pairs among filtered: 25745
#Good pairs among filtered f1: 443794
#Good pairs total: 443794
#GoUP sets of trails: 25750 (2^14.652285)
[./src/rc5-dc.cc:6029] A Strange Day: Good / Filtered / Good Filtered: 443794 25750 25745 
RC5_P_THRES_ARRAY = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -2.32 [ 7] -2.32 [ 8] -2.32 [ 9] -2.32 [10] -11.00 [11] -13.00 
        FIB_ARRAY = [ 0]  3 [ 1]  3 [ 2]  4 [ 3]  4 [ 4]  4 [ 5]  4 [ 6]  8 [ 7]  8 [ 8]  8 [ 9] 20 [10] 20 [11] 20 
[./tests/rc5-tests.cc:940] #GoUP sets of trails: 25750 (2^14.652285)
const uint32_t g_key[16] = {0xDC, 0xB2, 0xF8, 0x5F, 0xB3, 0x70, 0x9B, 0x5A, 0xB1, 0xAF, 0xB7, 0xA7, 0x1A, 0x3A, 0x83, 0xC3};
[./tests/rc5-tests.cc:1171] key A_L A_R = 0xDCB2F85FB3709B5AB1AFB7A71A3A83C3 0xF948AF0B 0x27171580

[./tests/rc5-tests.cc:1193] RC5_LAST_ROUND_PARAMS_INCLUDE_DX 0
[./tests/rc5-tests.cc:1194] RC5_LAST_ROUND_PARAMS_NVARIANTS_CUT_THRES 5
[./tests/rc5-tests.cc:1195] Test OK!

 */

/* --- */
/* 
  WORD_T nback = RC5_FILTER_BLIND_ORACLE_BACKWARD_WINDOW_NBITS; // number of bits to trace backwards
 */

/* --- */
  uint32_t shift_lo = 0;//8;
  uint32_t key_lo = 8;//32;//12;//WORD_SIZE + 1;//32;//55;//55;//57;
  uint32_t key_hi = 0;//32;//12;//16;//8;//7;

  WORD_T mask_key_hi = (~(0ULL) << (WORD_SIZE - key_hi));
  WORD_T mask_key_lo = (~(0ULL) >> (WORD_SIZE - key_lo)) << shift_lo;
  WORD_T mask_key = mask_key_hi | mask_key_lo;

  printf("[%s:%d] mask_key hi:%016llX lo:%016llX al:%016llX\n", __FILE__, __LINE__, 
			(WORD_MAX_T)mask_key_hi, (WORD_MAX_T)mask_key_lo, (WORD_MAX_T)mask_key);

/* --- */
WORD_T r1 = 59;//i1;
WORD_T r2 = 60;//i2;
WORD_T r3 = 59;//i3;
WORD_T r4 = 63;//i4;
WORD_T r5 = 7;//i5;

/* --- */
  //  WORD_T (*M_L)[WORD_SIZE][WORD_SIZE][WORD_SIZE] = 
  //	 (WORD_T (*)[WORD_SIZE][WORD_SIZE][WORD_SIZE]) A_L;
  //  WORD_T (*M_R)[WORD_SIZE][WORD_SIZE][WORD_SIZE] = 
  //	 (WORD_T (*)[WORD_SIZE][WORD_SIZE][WORD_SIZE]) A_R;

/* --- */
  for(WORD_T i1 = 0; i1 < len; i1++) {
	 for(WORD_T i2 = 0; i2 < len; i2++) {
		for(WORD_T i3 = 0; i3 < len; i3++) {
		  for(WORD_T i4 = 0; i4 < len; i4++) {
			 printf("[%s:%d] %lld\n", __FILE__, __LINE__, (WORD_MAX_T)M_R[i1][i2][i3][i4]);
		  }
		}
	 }
  }

/* --- */
			 uint32_t r1 = xrandom() % 1;
			 uint32_t r2 = xrandom() % 1;
			 uint32_t r3 = xrandom() % 1;


/* --- */

#else
		 //		 uint32_t j = ndata;

	  const uint32_t ntests = (1ULL << 10);//(1ULL << 25);
		 //	 printf("[%s:%d] CHECKPOINT %s()\n", __FILE__, __LINE__, __FUNCTION__);
		 //		 for(uint32_t rand_bit = (WORD_SIZE - 1); rand_bit > (WORD_SIZE - 1 - 32); rand_bit--) {
	  for(uint32_t j = 0; j < ntests; j++) { // delta_j : 2^k

		 //			printf("[%s:%d] CHECKPOINT %s() rand_bit %d\n", __FILE__, __LINE__, __FUNCTION__, rand_bit);

	  //	  for(uint32_t j = 0; j < ntests; j++) { // delta_j : 2^k
			uint32_t nrot = (1ULL << RC5_LOG2W);
	  for(uint32_t r1 = 0; r1 < nrot; r1++) { // delta_j : 2^k
	  for(uint32_t r2 = 0; r2 < nrot; r2++) { // delta_j : 2^k
	  for(uint32_t r3 = 0; r3 < nrot; r3++) { // delta_j : 2^k

		 ndata++;
		 //		 j = ndata;
		 //		 uint32_t r1 = xrandom() % WORD_SIZE;
		 //		 uint32_t r2 = xrandom() % WORD_SIZE;
		 //		 uint32_t r3 = xrandom() % WORD_SIZE;
		 uint32_t rand_bit = xrandom() % WORD_SIZE;
		 if(rand_bit < RC5_LOG2W) {
			rand_bit += RC5_LOG2W;
		 }
		 //		 rand_bit = WORD_SIZE - 1;
		 //		 WORD_T diff = (1ULL << rand_bit);
		 WORD_T diff = (1ULL << rand_bit);
		 WORD_T new_L = xrandom() & M_L[r1][r2][r3];
		 WORD_T new_R = xrandom() & M_R[r1][r2][r3];
		 WORD_T new_LL = new_L ^ diff;
		 WORD_T new_RR = new_R ^ diff;
#endif // #if 0 // structures


/* --- */

// {--- 20150131: Blind oracle code ---
WORD_T rc5_get_bit_seq(const WORD_T word, const WORD_T start, const WORD_T len);
void rc5_blind_oracle_rot4_roundkeys_intervals(const WORD_T r1, const WORD_T r2, 
															  const WORD_T r3, const WORD_T r4, 
															  std::vector<WORD_T>* I_k0, std::vector<WORD_T>* I_k1,
															  std::vector<WORD_T>* I_k2, std::vector<WORD_T>* I_k3, 
															  std::vector<WORD_T>* I_k4);
void rc5_blind_oracle_rot4_plaintext_intervals(const WORD_T r1, const WORD_T r2, const WORD_T r3, 
															  std::vector<WORD_T>* I_L, std::vector<WORD_T>* I_R);
void rc5_blind_oracle_rot4_compute_mask_bool(const std::vector<WORD_T> I_L, const std::vector<WORD_T> I_R,
															bool M_L[WORD_SIZE], bool M_R[WORD_SIZE]);
void rc5_blind_oracle_print_mask_bool(const bool M[WORD_SIZE]);
void rc5_blind_oracle_print_all_masks_hex(const WORD_T M_L[WORD_SIZE][WORD_SIZE][WORD_SIZE],
														const WORD_T M_R[WORD_SIZE][WORD_SIZE][WORD_SIZE]);
void rc5_blind_oracle_rot4_const_allowed(const WORD_T active_mask_L, const WORD_T active_mask_R, 
													  const WORD_T M_L[WORD_SIZE][WORD_SIZE][WORD_SIZE],
													  const WORD_T M_R[WORD_SIZE][WORD_SIZE][WORD_SIZE],
													  bool r_allowed[WORD_SIZE][WORD_SIZE][WORD_SIZE]);
void rc5_blind_oracle_mask_bool_to_bin(const bool bool_M[WORD_SIZE], WORD_T* bin_M);
void rc5_blind_oracle_rot4_compute_plaintext_masks(WORD_T M_L[WORD_SIZE][WORD_SIZE][WORD_SIZE],
									                        WORD_T M_R[WORD_SIZE][WORD_SIZE][WORD_SIZE]);
void rc5_blind_oracle_rot4_vars_print(const WORD_T S[RC5_STAB_LEN_T],
												  const WORD_T M_L[WORD_SIZE][WORD_SIZE][WORD_SIZE], 
												  const WORD_T M_R[WORD_SIZE][WORD_SIZE][WORD_SIZE],
												  const WORD_T plaintext_L, const WORD_T plaintext_R, 
												  const WORD_T rot_const[4]);
bool rc5_blind_oracle_rot4_solution_search(const WORD_T S[RC5_STAB_LEN_T],
														 const WORD_T M_L[WORD_SIZE][WORD_SIZE][WORD_SIZE], 
														 const WORD_T M_R[WORD_SIZE][WORD_SIZE][WORD_SIZE],
														 WORD_T* ret_M_L, WORD_T* ret_M_R,
													    WORD_T* ret_X_L, WORD_T* ret_X_R, 
													    WORD_T* ret_XX_L, WORD_T* ret_XX_R, 
														 WORD_T ret_r[4]);
void rc5_blind_oracle_rot4_modify_round_keys(WORD_T S[RC5_STAB_LEN_T], const WORD_T rot_const[4]);

// --- 20150131: Blind oracle code ---}


/* --- */

/* 
#Filtered pairs all: 638 (2^9.317413)
#Good pairs among filtered: 633
#Good pairs among filtered f1: 4927
#Good pairs total: 4927
#GoUP sets of trails: 638 (2^9.317413)
[./src/rc5-dc.cc:6400] A Strange Day: Good / Filtered / Good Filtered:  4927   638   633
[./src/rc5-dc.cc:6402] ndata 2^28.64
RC5_P_THRES_ARRAY = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -2.32 [ 7] -2.32 [ 8] -2.32 [ 9] -2.32 [10] -11.00 [11] -13.00
        FIB_ARRAY = [ 0]  3 [ 1]  3 [ 2]  4 [ 3]  4 [ 4]  4 [ 5]  4 [ 6]  8 [ 7]  8 [ 8]  8 [ 9] 20 [10] 20 [11] 20
[./tests/rc5-tests.cc:1980] #GoUP sets of trails: 638 (2^9.317413)
[./tests/rc5-tests.cc:1982] WARNING! Overload 3 rand L R: 3C000 2C | r1234 50 0 0 12
const uint32_t g_key[16] = {0x99, 0x5A, 0xD9, 0xFF, 0xFA, 0x6, 0x6E, 0xBA, 0x86, 0x9D, 0x4E, 0x4C, 0x2C, 0x5F, 0xF9, 0x45};
[./tests/rc5-tests.cc:2125] key A_L A_R = 0x995AD9FFFA66EBA869D4E4C2C5FF945 0x68BBDFB6 0x58469A0E

[./tests/rc5-tests.cc:2134] WARNING! Overload 3 rand L R: 3C000 2C | r1234 50 0 0 12
[./tests/rc5-tests.cc:1143] r1234 50  0  0 12
[./tests/rc5-tests.cc:1145] M_LR 00000000000FC000 00000000000FC03F P_LR 000000000003C000 000000000000002C X_LR 000000000003C000 000000000000002C
[./tests/rc5-tests.cc:1149] X_L    cdhq      F  F  F  F
[./tests/rc5-tests.cc:1151] X_R abefglm     2C  0 2C  0  0 2C  0
[./tests/rc5-tests.cc:1156] S0: k0_1234      D  D  D  D
[./tests/rc5-tests.cc:1158] S1: k1_1234567  29 29  6  6 29 29  6
[./tests/rc5-tests.cc:1161] S2: k2_1234      B  B  B  B
[./tests/rc5-tests.cc:1163] S3: k3_12        E  E
[./tests/rc5-tests.cc:1165] S4: k4_1         C
[./tests/rc5-tests.cc:1169] Guess key bits:
[./tests/rc5-tests.cc:1170] S0  [14:19]  [14:19]  [14:19]  [14:19]
[./tests/rc5-tests.cc:1175] S1  [14:19]  [14:19]  [ 0: 5]  [ 0: 5]  [14:19]  [14:19]  [ 0: 5]
[./tests/rc5-tests.cc:1180] S2  [ 0: 5]  [ 0: 5]  [ 0: 5]  [ 0: 5]
[./tests/rc5-tests.cc:1185] S3  [ 0: 5]  [ 0: 5]
[./tests/rc5-tests.cc:1190] S4  [ 0: 5]
[./tests/rc5-tests.cc:1215] EQ_1234: 32  0  0  C = 50  0  0 12
[./tests/rc5-tests.cc:2138] RC5_LAST_ROUND_PARAMS_INCLUDE_DX 0
[./tests/rc5-tests.cc:2139] RC5_LAST_ROUND_PARAMS_NVARIANTS_CUT_THRES 5
[./tests/rc5-tests.cc:2140] Test OK!
NROUNDS 8
RC5_NTEXTS 2^23.00
RC5_ORACLE_NTEXTS 2^27.00
RC5_FILTER_SECOND_PASS 0
RC5_STRUCTURES_NBITS 25
RC5_STRUCTURES_NTEXTS 2^25.00
RC5_FILTER_ORACLE 0 RC5_ORACLE_KM 0 RC5_ORACLE_BK 0
RC5_FILTER_USE_STRUCTURES 1
RC5_LOG_TO_FILE 1
RC5_FIXED_KEY 0
RC5_FILTER_CIPHERTEXT_HW_LIMIT 1
RC5_CIPHERTEXT_HW_LIMIT_LEFT 18
RC5_CIPHERTEXT_HW_LIMIT_RIGHT 18
RC5_FILTER_GOUP_ADD_EQUALS_XOR_APPROX 1
RC5_GOUP_EXPAND_ADD_DEPTH 10
GoUP NL: Rounds to go upper than the bottom two = 0
[./tests/rc5-tests.cc:4712] RC5_FILTER_SECOND_PASS 0
RC5_PAIRS_SORT_BY_CIPHERTEXT_DIFF_HW 0

real    2m47.795s
user    2m22.700s
sys     0m0.671s

 */

/* --- */

  //  const WORD_T active_mask = 0x00000000ffffffffULL;
  //  const WORD_T active_mask_L = 0x000000000000ffC0ULL;
  //  const WORD_T active_mask_L = 0x00000000001FFF80ULL;//0x000000000000ff00ULL;
  //  const WORD_T active_mask_R = 0x00000000001FFFFFULL;//0x000000000000ffffULL;

/* --- */
/* 
[./tests/rc5-tests.cc:1780] Found solution: rot  57 57 63 13 MASK 00000000001FFF80 (14) 00000000001FFFBF (20) 
                                                             X_LR 00000000000F9000      00000000000F901B
[./tests/rc5-tests.cc:1145]                                  M_LR 00000000000007F0      00000000000007FF 
                                                             P_LR 0000000000000360      00000000000004A5 
                                                             X_LR 0000000000000360 00000000000004A5

[./tests/rc5-tests.cc:1465] EQ 1 and 2 and 3 and 4: OK! M_LR  57 57 63 13 00000000001FFF80 00000000001FFFBF 
 X_LR 00000000000F9000 00000000000F901B 
XX_LR 00000000000F9000 00000000000F901B

 */

/* --- */

/* 
#Filtered pairs f1: 6559529 (2^22.645161)
#Filtered pairs all: 285 (2^8.154818)
#Good pairs among filtered: 281
#Good pairs among filtered f1: 2888
#Good pairs total: 2888
#GoUP sets of trails: 285 (2^8.154818)
[./src/rc5-dc.cc:6400] A Strange Day: Good / Filtered / Good Filtered:  2888   285   281
[./src/rc5-dc.cc:6402] ndata 2^28.64
RC5_P_THRES_ARRAY = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -2.32 [ 7] -2.32 [ 8] -2.32 [ 9] -2.32 [10] -11.00 [11] -13.00
        FIB_ARRAY = [ 0]  3 [ 1]  3 [ 2]  4 [ 3]  4 [ 4]  4 [ 5]  4 [ 6]  8 [ 7]  8 [ 8]  8 [ 9] 20 [10] 20 [11] 20
[./tests/rc5-tests.cc:1978] #GoUP sets of trails: 285 (2^8.154818)
[./tests/rc5-tests.cc:1980] WARNING! Overload 3 rand L R: 360 4A5 | r1234 60 0 63 5
const uint32_t g_key[16] = {0xBB, 0x7D, 0x3C, 0xC, 0xAD, 0xED, 0x70, 0x25, 0x8E, 0x19, 0xE4, 0x28, 0x98, 0x57, 0x10, 0xA7};
[./tests/rc5-tests.cc:2123] key A_L A_R = 0xBB7D3CCADED70258E19E428985710A7 0x89A844DC 0x652BAFF2

[./tests/rc5-tests.cc:2132] WARNING! Overload 3 rand L R: 360 4A5 | r1234 60 0 63 5
[./tests/rc5-tests.cc:1143] r1234 60  0 63  5
[./tests/rc5-tests.cc:1145] M_LR 00000000000007F0 00000000000007FF P_LR 0000000000000360 00000000000004A5 X_LR 0000000000000360 00000000000004A5
[./tests/rc5-tests.cc:1149] X_L    cdhq     36 36 1B 1B
[./tests/rc5-tests.cc:1151] X_R abefglm     25  A 25  A 25 12 25
[./tests/rc5-tests.cc:1156] S0: k0_1234     1A 1A 35 35
[./tests/rc5-tests.cc:1158] S1: k1_1234567  18 18  B 17 31 31 17
[./tests/rc5-tests.cc:1161] S2: k2_1234     18 18 30 30
[./tests/rc5-tests.cc:1163] S3: k3_12        1  3
[./tests/rc5-tests.cc:1165] S4: k4_1        27
[./tests/rc5-tests.cc:1169] Guess key bits:
[./tests/rc5-tests.cc:1170] S0  [ 5:10]  [ 5:10]  [ 4: 9]  [ 4: 9]
[./tests/rc5-tests.cc:1175] S1  [ 5:10]  [ 5:10]  [ 1: 6]  [ 0: 5]  [ 4: 9]  [ 4: 9]  [ 0: 5]
[./tests/rc5-tests.cc:1180] S2  [ 1: 6]  [ 1: 6]  [ 0: 5]  [ 0: 5]
[./tests/rc5-tests.cc:1185] S3  [ 1: 6]  [ 0: 5]
[./tests/rc5-tests.cc:1190] S4  [ 0: 5]
[./tests/rc5-tests.cc:1215] EQ_1234: 3C  0 3F  5 = 60  0 63  5
[./tests/rc5-tests.cc:2136] RC5_LAST_ROUND_PARAMS_INCLUDE_DX 0
[./tests/rc5-tests.cc:2137] RC5_LAST_ROUND_PARAMS_NVARIANTS_CUT_THRES 5
[./tests/rc5-tests.cc:2138] Test OK!
NROUNDS 8

 */

/* --- */

/* 
[./tests/rc5-tests.cc:4361] key form stdin: A_L A_R = 0x1864B5E8A977DE4A613092EEEFC3E671 0x0 0x0

[./tests/rc5-tests.cc:1670] OUTPUT log filename (rc5-filtered-pairs.txt) from stdin: /tmp/test.txt
[./tests/rc5-tests.cc:1695] RC5_FIXED_KEY 0 | Master key[16] = {0x18, 0x64, 0xB5, 0xE8, 0xA9, 0x77, 0xDE, 0x4A, 0x61, 0x30, 0x92, 0xEE, 0xEF, 0xC3, 0xE6, 0x71, };
[./tests/rc5-tests.cc:1705] RC5_FIXED_KEY 0 | Expanded key[26] = {0xF689D7D1A63F67B5, 0xE7FDDF592336DD1E, 0x15061E04699E3584, 0xAF78AD66454A8EA8, 0x71B1884AEA88342A, 0xBA6C7C53163D826, 0x20D0C3C071462734, 0xBE14FF55CF053433, 0xE3A77225E065D3D7, 0x84C1426418D668E5, 0xBEE7782393034FC7, 0x58C30A9D8C40FBDE, 0x17741621F50EB04A, 0xB7E4E9424EFAE9C7, 0x2768E8FC21A3FA97, 0x3CE2A19E570346EF, 0x403D121CFA853BCA, 0x59A6DAF638979B5B, 0x56159FFAC562C90F, 0x3F5BCFA4A9E6D213, 0xF95E62C94914DF94, 0x8DA3D8A7E50A77BE, 0x8241989E771F722C, 0x126E04DF97F30C82, 0x77EEF09B8F7736F8, 0xA34FAEB22FA98907, };
[./src/rc5-dc.cc:2973]  8R p(8000000000000000 8000000000000000 -> *) = 0.000000 2^-21.000000 | 4 2^23.000000
[./tests/rc5-tests.cc:1465] EQ 1 and 2 and 3 and 4: OK! M_LR  57 57 63 13 00000000001FFF80 00000000001FFFBF 
 X_LR 00000000000F9000 00000000000F901B 
XX_LR 00000000000F9000 00000000000F901B
[./tests/rc5-tests.cc:1481] Guess key bits:
[./tests/rc5-tests.cc:1482] S0  [ 8:13]  [15:20]  [14:19]  [ 7:12] 
[./tests/rc5-tests.cc:1487] S1  [ 8:13]  [15:20]  [ 8:13]  [ 7:12]  [14:19]  [ 7:12]  [ 0: 5] 
[./tests/rc5-tests.cc:1492] S2  [ 1: 6]  [ 8:13]  [ 7:12]  [ 0: 5] 
[./tests/rc5-tests.cc:1497] S3  [ 1: 6]  [ 0: 5] 
[./tests/rc5-tests.cc:1502] S4  [ 0: 5] 
[./tests/rc5-tests.cc:1780] Found solution: rot  57 57 63 13 MASK 00000000001FFF80 (14) 00000000001FFFBF (20) 
                                                             X_LR 00000000000F9000      00000000000F901B
[./tests/rc5-tests.cc:1792] WARNING! Overload 3 rand L R: F9000 F901B
[./tests/rc5-tests.cc:1794] CHECKPOINT!
[./src/rc5-dc.cc:6095] Enter rc5_equal_rot_attack_first_pass_blind_oracle()
[./src/rc5-dc.cc:6133] RC5_FIB_LEN 12 fib_array.size() = 12
[./src/rc5-dc.cc:6135] fib_array = [ 0]  3 [ 1]  3 [ 2]  4 [ 3]  4 [ 4]  4 [ 5]  4 [ 6]  8 [ 7]  8 [ 8]  8 [ 9] 20 [10] 20 [11] 20 
[./src/rc5-dc.cc:6140] p_thres_array = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -2.32 [ 7] -2.32 [ 8] -2.32 [ 9] -2.32 [10] -11.00 [11] -13.00 
[./src/rc5-dc.cc:6185] rc5_equal_rot_attack_first_pass_blind_oracle() Structures random A_L A_R F9000 F901B

NROUNDS 8
WORD_SIZE 64
RC5_FILTER_CUT_HW1  1
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 6561114 (2^22.645509)
#Filtered pairs all: 598 (2^9.224002)
#Good pairs among filtered: 594
#Good pairs among filtered f1: 7349
#Good pairs total: 7349
#GoUP sets of trails: 598 (2^9.224002)
[./src/rc5-dc.cc:6388] A Strange Day: Good / Filtered / Good Filtered:  7349   598   594 

 */


/* 
0xbb7d3c0caded70258e19e428985710a7 0x6e45ec9c 0xe9b31364
0xBB7D3C0CADED70258E19E428985710A7 0x1B2000 0x221821

 */

/* --- */
/* 
[./tests/rc5-tests.cc:1676] RC5_FIXED_KEY 0 | Expanded key[26] = {0xA9304875E28BD8E5, 0x6719216E3395B8F3, 0x9FAA78D29184226D, 0x5568D47B25525DCB, 0x7726F4A0D47F5B58, 0xDCA0C911992D8188, 0x80B459361B9B6146, 0xCD47EE57E66CE2D1, 0xFCA00F3516BC70F8, 0x86F11F9E27D7D384, 0xE8E568DA5E6A46A5, 0xC6242EEC74867F9A, 0x4A882F1222591016, 0xD5DF7DEF08F9CA4B, 0x74B6DFCDC8A802B6, 0x597D391EA9A295FD, 0x180BC4993A74EC7C, 0xB01EEAAC61FEFC26, 0x785726B72C0D7734, 0xD77AC5B49666C84D, 0xEC2E5C6C88653FE0, 0x7803649E99A49A7E, 0x835C472DDFDCDC30, 0x656F67158054C3ED, 0xBDCDA46FE5DC8D75, 0x19D420F309C60F81, };

 */

/* 
NROUNDS 8
WORD_SIZE 64
RC5_FILTER_CUT_HW1  1
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 6555958 (2^22.644375)
#Filtered pairs all: 1105 (2^10.109831)
#Good pairs among filtered: 1100
#Good pairs among filtered f1: 4620
#Good pairs total: 4620
#GoUP sets of trails: 1105 (2^10.109831)
[./src/rc5-dc.cc:6022] A Strange Day: Good / Filtered / Good Filtered:  4620  1105  1100
RC5_P_THRES_ARRAY = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -2.32 [ 7] -2.32 [ 8] -2.32 [ 9] -2.32 [10] -11.00 [11] -13.00
        FIB_ARRAY = [ 0]  3 [ 1]  3 [ 2]  4 [ 3]  4 [ 4]  4 [ 5]  4 [ 6]  8 [ 7]  8 [ 8]  8 [ 9] 20 [10] 20 [11] 20
[./tests/rc5-tests.cc:1728] #GoUP sets of trails: 1105 (2^10.109831)
const uint32_t g_key[16] = {0xBB, 0x7D, 0x3C, 0xC, 0xAD, 0xED, 0x70, 0x25, 0x8E, 0x19, 0xE4, 0x28, 0x98, 0x57, 0x10, 0xA7};
[./tests/rc5-tests.cc:1920] key A_L A_R = 0xBB7D3CCADED70258E19E428985710A7 0x1B2000 0x221821
                                         0xBB7D3C0CADED70258E19E428985710A7
 */

/* ---- */
/*
NROUNDS 8
WORD_SIZE 64
RC5_FILTER_CUT_HW1  1
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 6555958 (2^22.644375)
#Filtered pairs all: 1105 (2^10.109831)
#Good pairs among filtered: 1100
#Good pairs among filtered f1: 4620
#Good pairs total: 4620
#GoUP sets of trails: 1105 (2^10.109831)
[./src/rc5-dc.cc:6388] A Strange Day: Good / Filtered / Good Filtered:  4620  1105  1100 
[./src/rc5-dc.cc:6390] ndata 2^28.64
RC5_P_THRES_ARRAY = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -2.32 [ 7] -2.32 [ 8] -2.32 [ 9] -2.32 [10] -11.00 [11] -13.00 
        FIB_ARRAY = [ 0]  3 [ 1]  3 [ 2]  4 [ 3]  4 [ 4]  4 [ 5]  4 [ 6]  8 [ 7]  8 [ 8]  8 [ 9] 20 [10] 20 [11] 20 
  [./tests/rc5-tests.cc:1775] #GoUP sets of trails: 1105 (2^10.109831)
  [./tests/rc5-tests.cc:1777] WARNING! Overload 3 rand L R: 1B2000 221821
  const uint32_t g_key[16] = {0xBB, 0x7D, 0x3C, 0xC, 0xAD, 0xED, 0x70, 0x25, 0x8E, 0x19, 0xE4, 0x28, 0x98, 0x57, 0x10, 0xA7};
[./tests/rc5-tests.cc:1920] key A_L A_R = 0xBB7D3CCADED70258E19E428985710A7 0x6E45EC9C 0xE9B31364

*/
/* --- */
/* 
NROUNDS 8
WORD_SIZE 64
RC5_FILTER_CUT_HW1  1
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 6551463 (2^22.643386)
#Filtered pairs all: 15 (2^3.906891)
#Good pairs among filtered: 2
#Good pairs among filtered f1: 761
#Good pairs total: 761
#GoUP sets of trails: 15 (2^3.906891)
[./src/rc5-dc.cc:6388] A Strange Day: Good / Filtered / Good Filtered:   761    15     2
[./src/rc5-dc.cc:6390] ndata 2^28.64
RC5_P_THRES_ARRAY = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -2.32 [ 7] -2.32 [ 8] -2.32 [ 9] -2.32 [10] -11.00 [11] -13.00
        FIB_ARRAY = [ 0]  3 [ 1]  3 [ 2]  4 [ 3]  4 [ 4]  4 [ 5]  4 [ 6]  8 [ 7]  8 [ 8]  8 [ 9] 20 [10] 20 [11] 20
[./tests/rc5-tests.cc:1775] #GoUP sets of trails: 15 (2^3.906891)
[./tests/rc5-tests.cc:1777] WARNING! Overload 3 rand L R: 60C00 5C61
const uint32_t g_key[16] = {0xBB, 0x7D, 0x3C, 0xC, 0xAD, 0xED, 0x70, 0x25, 0x8E, 0x19, 0xE4, 0x28, 0x98, 0x57, 0x10, 0xA7};

 */
/* --- */
#if (RC5_FILTER_SECOND_PASS == 0) && (RC5_FILTER_BLIND_ORACLE == 1) // First pass with blind oracle
  printf("[%s:%d] WARNING! Overload 3 rand L R: %llX %llX\n", __FILE__, __LINE__, (WORD_MAX_T)overload_rand_L, (WORD_MAX_T)overload_rand_R);
  //  test_rc5_blind_oracle_rot4_vars_print(key, overload_rand_L, overload_rand_R); // <-
#endif // #if (RC5_FILTER_SECOND_PASS == 0) && (RC5_FILTER_BLIND_ORACLE == 1) // First pass with blind oracle

/* --- */

  printf("[%s:%d] Guess key bits:\n", __FILE__, __LINE__);
  printf("[%s:%d] S0 ", __FILE__, __LINE__);
  printf(" [%2lld:%2lld] %llX ", (WORD_MAX_T)I_k0[0], (WORD_MAX_T)(I_k0[0] + RC5_LOG2W - 1), (WORD_MAX_T)k0_1);
  printf(" [%2lld:%2lld] %llX", (WORD_MAX_T)I_k0[1], (WORD_MAX_T)(I_k0[1] + RC5_LOG2W - 1), (WORD_MAX_T)k0_2);
  printf(" [%2lld:%2lld] %llX", (WORD_MAX_T)I_k0[2], (WORD_MAX_T)(I_k0[2] + RC5_LOG2W - 1), (WORD_MAX_T)k0_3);
  printf(" [%2lld:%2lld] %llX", (WORD_MAX_T)I_k0[3], (WORD_MAX_T)(I_k0[3] + RC5_LOG2W - 1), (WORD_MAX_T)k0_4);
  printf("\n");

/* --- */
bool rc5_blind_oracle_rot4_solution_search(const WORD_T S[RC5_STAB_LEN_T],
														 const WORD_T M_L[WORD_SIZE][WORD_SIZE][WORD_SIZE], 
														 const WORD_T M_R[WORD_SIZE][WORD_SIZE][WORD_SIZE],
														 WORD_T* ret_M_L, WORD_T* ret_M_R,
													    WORD_T* ret_X_L, WORD_T* ret_X_R, 
													    WORD_T* ret_XX_L, WORD_T* ret_XX_R, 
														 WORD_T ret_r[4])
{
  //  const WORD_T seq_len = RC5_LOG2W + RC5_LOG2W; // xxx vpv
  const WORD_T seq_len = RC5_LOG2W;
  const WORD_T w = WORD_SIZE;
  const WORD_T S0 = S[0];
  const WORD_T S1 = S[1];
  const WORD_T S2 = S[2];
  const WORD_T S3 = S[3];
  const WORD_T S4 = S[4];
  bool b_solution = false;
  WORD_T lim_lo = 1;//5;
  WORD_T lim_hi = 56;
  while(!b_solution) {
	 //	 WORD_T diff = 0x8000000000000000ULL;
	 //    const WORD_T init_X_L = xrandom();
	 //    const WORD_T init_X_R = xrandom();
	 //    const WORD_T init_XX_L = init_X_L;// ^ diff;
	 //    const WORD_T init_XX_R = init_X_R;// ^ diff;
#if 0 // DEBUG
	 printf("[%s:%d] Init X_LR XX_LR %llX %llX %llX %llX\n", __FILE__, __LINE__,
			  (WORD_MAX_T)init_X_L, (WORD_MAX_T)init_X_R, (WORD_MAX_T)init_XX_L, (WORD_MAX_T)init_XX_R);
#endif // #if 1 // DEBUG
#if 0 // DEBUG
	 printf("[%s:%d] Init X_LR XX_LR %llX %llX\n", __FILE__, __LINE__,
			  (WORD_MAX_T)init_X_L, (WORD_MAX_T)init_X_R);
#endif // #if 1 // DEBUG
	 //    for(WORD_T i1 = 0; i1 < WORD_SIZE; i1++) {
	 //	   for(WORD_T i2 = 0; i2 < WORD_SIZE; i2++) {
	 //		  for(WORD_T i3 = 0; i3 < WORD_SIZE; i3++) {
	 {{{
#if 0
			 WORD_T r1 = 59;//i1;
			 WORD_T r2 = 60;//i2;
			 WORD_T r3 = 59;//i3;
#else
			 //			 WORD_T r1 = 0;//((xrandom() % 1) * 60) + (xrandom() % 4);
			 //			 WORD_T r2 = 0;//((xrandom() % 1) * 60) + (xrandom() % 4);
			 //			 WORD_T r3 = 0;//((xrandom() % 1) * 60) + (xrandom() % 4);
			 WORD_T r1 = xrandom() % WORD_SIZE;
			 while(!((r1 < lim_lo) || (r1 > lim_hi))) {
				r1 = xrandom() % WORD_SIZE;
			 }
			 WORD_T r2 = xrandom() % WORD_SIZE;
			 while(!((r2 < lim_lo) || (r2 > lim_hi))) {
				r2 = xrandom() % WORD_SIZE;
			 }
			 WORD_T r3 = xrandom() % WORD_SIZE;
			 while(!((r3 < lim_lo) || (r3 > lim_hi))) {
				r3 = xrandom() % WORD_SIZE;
			 }
#endif

			 const WORD_T init_X_L = xrandom();
			 const WORD_T init_X_R = xrandom();
			 /**
			  * Generate 1 bit difference in in the active bits of bothe
			  * M_L and M_R
			  */
#if 0
			 WORD_T diff = (1ULL << (WORD_SIZE - 1));
			 assert(diff == 0x8000000000000000ULL);
			 WORD_T index = 0;//(WORD_SIZE - 1);
			 bool b_set = false;
			 do {
				if((((M_L[r1][r2][r3] >> (WORD_SIZE - index - 1)) & 1) == 1) && 
					(((M_R[r1][r2][r3] >> (WORD_SIZE - index - 1)) & 1) == 1)) {
				  b_set = true;
				  diff = (diff >> index);
				} else {
				  index++;
				}
			 } while ((b_set == false) && (index < (WORD_SIZE - RC5_LOG2W)));
			 //			 printf("[%s:%d] diff %llX\n", __FILE__, __LINE__, (WORD_MAX_T)diff);
#else
			 WORD_T diff = 0;
#endif // #if 0
			 const WORD_T init_XX_L = init_X_L ^ diff;
			 const WORD_T init_XX_R = init_X_R ^ diff;

			 const WORD_T X_L = init_X_L & M_L[r1][r2][r3];
			 const WORD_T X_R = init_X_R & M_R[r1][r2][r3];
			 const WORD_T XX_L = init_XX_L & M_L[r1][r2][r3];
			 const WORD_T XX_R = init_XX_R & M_R[r1][r2][r3];
#if 0 // DEBUG
			 printf("[%s:%d] X_LR XX_LR %llX %llX %llX %llX\n", __FILE__, __LINE__,
					  (WORD_MAX_T)X_L, (WORD_MAX_T)X_R, (WORD_MAX_T)XX_L, (WORD_MAX_T)XX_R);
#endif // #if 1 // DEBUG
			 std::vector<WORD_T> I_L;
			 std::vector<WORD_T> I_R;
			 rc5_blind_oracle_rot4_plaintext_intervals(r1, r2, r3, &I_L, &I_R);
			 assert(I_L.size() == 4);
			 assert(I_R.size() == 7);
			 // X_L
			 WORD_T c = rc5_get_bit_seq(X_L, I_L[0], seq_len);
			 WORD_T d = rc5_get_bit_seq(X_L, I_L[1], seq_len);
			 WORD_T h = rc5_get_bit_seq(X_L, I_L[2], seq_len);
			 WORD_T q = rc5_get_bit_seq(X_L, I_L[3], seq_len);
			 // X_R
			 WORD_T a = rc5_get_bit_seq(X_R, I_R[0], seq_len);
			 WORD_T b = rc5_get_bit_seq(X_R, I_R[1], seq_len);
			 WORD_T e = rc5_get_bit_seq(X_R, I_R[2], seq_len);
			 WORD_T f = rc5_get_bit_seq(X_R, I_R[3], seq_len);
			 WORD_T g = rc5_get_bit_seq(X_R, I_R[4], seq_len);
			 WORD_T l = rc5_get_bit_seq(X_R, I_R[5], seq_len);
			 WORD_T m = rc5_get_bit_seq(X_R, I_R[6], seq_len);
			 // XX_L
			 WORD_T cc = rc5_get_bit_seq(XX_L, I_L[0], seq_len);
			 WORD_T dd = rc5_get_bit_seq(XX_L, I_L[1], seq_len);
			 WORD_T hh = rc5_get_bit_seq(XX_L, I_L[2], seq_len);
			 WORD_T qq = rc5_get_bit_seq(XX_L, I_L[3], seq_len);
			 // XX_R
			 WORD_T aa = rc5_get_bit_seq(XX_R, I_R[0], seq_len);
			 WORD_T bb = rc5_get_bit_seq(XX_R, I_R[1], seq_len);
			 WORD_T ee = rc5_get_bit_seq(XX_R, I_R[2], seq_len);
			 WORD_T ff = rc5_get_bit_seq(XX_R, I_R[3], seq_len);
			 WORD_T gg = rc5_get_bit_seq(XX_R, I_R[4], seq_len);
			 WORD_T ll = rc5_get_bit_seq(XX_R, I_R[5], seq_len);
			 WORD_T mm = rc5_get_bit_seq(XX_R, I_R[6], seq_len);
#if 0 // DEBUG
			 printf("[%s:%d] X_L    cdhq %llX %llX %llX %llX\n", __FILE__, __LINE__,
					  (WORD_MAX_T)c, (WORD_MAX_T)d, (WORD_MAX_T)h, (WORD_MAX_T)q);
			 printf("[%s:%d] X_R abefglm %llX %llX %llX %llX %llX %llX %llX\n", __FILE__, __LINE__,
					  (WORD_MAX_T)a, (WORD_MAX_T)b, (WORD_MAX_T)e, (WORD_MAX_T)f, (WORD_MAX_T)g, (WORD_MAX_T)l, (WORD_MAX_T)m);
			 printf("[%s:%d] XX_L    cdhq %llX %llX %llX %llX\n", __FILE__, __LINE__,
					  (WORD_MAXX_T)cc, (WORD_MAXX_T)dd, (WORD_MAXX_T)hh, (WORD_MAXX_T)qq);
			 printf("[%s:%d] XX_R abefglm %llX %llX %llX %llX %llX %llX %llX\n", __FILE__, __LINE__,
					  (WORD_MAXX_T)aa, (WORD_MAXX_T)bb, (WORD_MAXX_T)ee, (WORD_MAXX_T)ff, (WORD_MAXX_T)gg, (WORD_MAXX_T)ll, (WORD_MAXX_T)mm);
#endif // #if 1 // DEBUG
			 //		    for(WORD_T i4 = 0; i4 < WORD_SIZE; i4++) {
			 {

				//				WORD_T r4 = ((xrandom() % 1) * 60) + (xrandom() % 4);
#if 0
				WORD_T r4 = 63;//i4;//
#else
				WORD_T r4 = xrandom() % WORD_SIZE;
				while(!((r4 < lim_lo) || (r4 > lim_hi))) {
				  r4 = xrandom() % WORD_SIZE;
				}
#endif

			   std::vector<WORD_T> I_k0;
            std::vector<WORD_T> I_k1;
            std::vector<WORD_T> I_k2;
            std::vector<WORD_T> I_k3;
				std::vector<WORD_T> I_k4;
            rc5_blind_oracle_rot4_roundkeys_intervals(r1, r2, r3, r4, &I_k0, &I_k1, &I_k2, &I_k3, &I_k4);
				assert(I_k0.size() == 4);
				assert(I_k1.size() == 7);
				assert(I_k2.size() == 4);
				assert(I_k3.size() == 2);
				assert(I_k4.size() == 1);

				// S0
				WORD_T k0_1 = rc5_get_bit_seq(S0, I_k0[0], seq_len);
				WORD_T k0_2 = rc5_get_bit_seq(S0, I_k0[1], seq_len);
				WORD_T k0_3 = rc5_get_bit_seq(S0, I_k0[2], seq_len);
				WORD_T k0_4 = rc5_get_bit_seq(S0, I_k0[3], seq_len);
				// S1
				WORD_T k1_1 = rc5_get_bit_seq(S1, I_k1[0], seq_len);
				WORD_T k1_2 = rc5_get_bit_seq(S1, I_k1[1], seq_len);
				WORD_T k1_3 = rc5_get_bit_seq(S1, I_k1[2], seq_len);
				WORD_T k1_4 = rc5_get_bit_seq(S1, I_k1[3], seq_len);
				WORD_T k1_5 = rc5_get_bit_seq(S1, I_k1[4], seq_len);
				WORD_T k1_6 = rc5_get_bit_seq(S1, I_k1[5], seq_len);
				WORD_T k1_7 = rc5_get_bit_seq(S1, I_k1[6], seq_len);
				// S2
				WORD_T k2_1 = rc5_get_bit_seq(S2, I_k2[0], seq_len);
				WORD_T k2_2 = rc5_get_bit_seq(S2, I_k2[1], seq_len);
				WORD_T k2_3 = rc5_get_bit_seq(S2, I_k2[2], seq_len);
				WORD_T k2_4 = rc5_get_bit_seq(S2, I_k2[3], seq_len);
				// S3
				WORD_T k3_1 = rc5_get_bit_seq(S3, I_k3[0], seq_len);
				WORD_T k3_2 = rc5_get_bit_seq(S3, I_k3[1], seq_len);
				// S4
				WORD_T k4_1 = rc5_get_bit_seq(S4, I_k4[0], seq_len);

#if 0 // DEBUG
				printf("[%s:%d] S0: %llX %llX %llX %llX\n", __FILE__, __LINE__,
						 (WORD_MAX_T)k0_1, (WORD_MAX_T)k0_2, (WORD_MAX_T)k0_3, (WORD_MAX_T)k0_4);
				printf("[%s:%d] S1: %llX %llX %llX %llX %llX %llX %llX\n", __FILE__, __LINE__,
						 (WORD_MAX_T)k1_1, (WORD_MAX_T)k1_2, (WORD_MAX_T)k1_3, (WORD_MAX_T)k1_4,
						 (WORD_MAX_T)k1_5, (WORD_MAX_T)k1_6, (WORD_MAX_T)k1_7);
				printf("[%s:%d] S2: %llX %llX %llX %llX\n", __FILE__, __LINE__,
						 (WORD_MAX_T)k2_1, (WORD_MAX_T)k2_2, (WORD_MAX_T)k2_3, (WORD_MAX_T)k2_4);
				printf("[%s:%d] S3: %llX %llX\n", __FILE__, __LINE__,
						 (WORD_MAX_T)k3_1, (WORD_MAX_T)k3_2);
				printf("[%s:%d] S4: %llX\n", __FILE__, __LINE__, (WORD_MAX_T)k4_1);
#endif // #if 1 // DEBUG
				WORD_T E_1 = (a + k1_7) % w;
				WORD_T EE_1 = (aa + k1_7) % w;
				bool b_eq_1 = (E_1 == r1) && (EE_1 == r1);
				if(!b_eq_1) {
				  continue;
				} else {
				  //				  printf("[%s:%d] EQ 1: OK!\n", __FILE__, __LINE__);
				}
				WORD_T E_2 = ((((b + k1_6) % w) ^ ((c + k0_4) % w)) + k2_4) % w;
				WORD_T EE_2 = ((((bb + k1_6) % w) ^ ((cc + k0_4) % w)) + k2_4) % w;
				bool b_eq_2 = (E_2 == r2) && (EE_2 == r2);
				if(!b_eq_2) {
				  continue;
				} else {
				  //				  printf("[%s:%d] EQ 1 and 2: OK!\n", __FILE__, __LINE__);
				}
				WORD_T E_3 = (((((((f + k1_5) % w) ^ ((d + k0_3) % w)) + k2_3) % w) ^ ((e + k1_4) % w)) + k3_2) % w;
				WORD_T EE_3 = (((((((ff + k1_5) % w) ^ ((dd + k0_3) % w)) + k2_3) % w) ^ ((ee + k1_4) % w)) + k3_2) % w;
				bool b_eq_3 = (E_3 == r3) && (EE_3 == r3);
				if(!b_eq_3) {
				  continue;
				} else {
				  //				  printf("[%s:%d] EQ 1 and 2 and 3: OK! M_LR %3d%3d%3d%3d %016llX %016llX X_LR %016llX %016llX\n", __FILE__, __LINE__,
				  //							r1, r2, r3, r4, (WORD_MAX_T)M_L[r1][r2][r3], (WORD_MAX_T)M_R[r1][r2][r3], (WORD_MAX_T)X_L, (WORD_MAX_T)X_R);
				}
				WORD_T temp_1 = ((((m + k1_2) % w) ^ ((q + k0_2) % w)) + k2_2) % w;
				temp_1 = ((temp_1 ^ ((l + k1_3) % w)) + k3_1) % w;
				WORD_T temp_2 = ((((g + k1_1) % w) ^ ((h + k0_1) % w)) + k2_1) % w;
				WORD_T E_4 = ((temp_1 ^ temp_2) + k4_1) % w;

				temp_1 = temp_2 = 0; // re-init

				temp_1 = ((((mm + k1_2) % w) ^ ((qq + k0_2) % w)) + k2_2) % w;
				temp_1 = ((temp_1 ^ ((ll + k1_3) % w)) + k3_1) % w;
				temp_2 = ((((gg + k1_1) % w) ^ ((hh + k0_1) % w)) + k2_1) % w;
				WORD_T EE_4 = ((temp_1 ^ temp_2) + k4_1) % w;

				bool b_eq_4 = (E_4 == r4) && (EE_4 == r4);
				if(!b_eq_4) {
				  continue;
				} else {
				  printf("[%s:%d] EQ 1 and 2 and 3 and 4: OK! M_LR %3lld%3lld%3lld%3lld %016llX %016llX \n X_LR %016llX %016llX \nXX_LR %016llX %016llX\n", __FILE__, __LINE__,
							(WORD_MAX_T)r1, (WORD_MAX_T)r2, (WORD_MAX_T)r3, (WORD_MAX_T)r4, 
							(WORD_MAX_T)M_L[r1][r2][r3], (WORD_MAX_T)M_R[r1][r2][r3], 
							(WORD_MAX_T)X_L, (WORD_MAX_T)X_R, (WORD_MAX_T)XX_L, (WORD_MAX_T)XX_R);
				  (*ret_M_L) = M_L[r1][r2][r3];
				  (*ret_M_R) = M_R[r1][r2][r3];
				  (*ret_X_L) = X_L;
				  (*ret_X_R) = X_R;
				  (*ret_XX_L) = XX_L;
				  (*ret_XX_R) = XX_R;
				  ret_r[0] = r1;
				  ret_r[1] = r2;
				  ret_r[2] = r3;
				  ret_r[3] = r4;
				  b_solution = true;
				  //				  return b_solution;
				}

          }
        }
      }
    }
	 //	 b_solution = true;
  }
  return b_solution;
}


/* --- */

  //  ret_M_L = 0x0FC07E1F80FC0000;
  //  ret_M_R = 0x0FC07E1F80FFE03F;
  //  ret_X_L = ret_XX_L = 0x0B00321700940000; 
  //  ret_X_R = ret_XX_R = 0x0640120780798000;
//  ret_M_L = 0xffffffff;
//  ret_M_R = 0xffffffff;
//  ret_X_L = ret_XX_L = 0x6e45ec9c;
//  ret_X_R = ret_XX_R = 0x2fd31364;

  //  WORD_T diff = 0x8000000000000000ULL;

/* ---- */

#if 1
			 WORD_T diff = (1ULL << (WORD_SIZE - 1));
			 assert(diff == 0x8000000000000000ULL);
			 WORD_T index = 0;//(WORD_SIZE - 1);
			 bool b_set = false;
			 do {
				if((((M_L[r1][r2][r3] >> (WORD_SIZE - index - 1)) & 1) == 1) && 
					(((M_R[r1][r2][r3] >> (WORD_SIZE - index - 1)) & 1) == 1)) {
				  b_set = true;
				  diff = (diff >> index);
				} else {
				  index++;
				}
			 } while ((b_set == false) && (index < (WORD_SIZE - RC5_LOG2W)));
			 //			 printf("[%s:%d] diff %llX\n", __FILE__, __LINE__, (WORD_MAX_T)diff);
#else
			 WORD_T diff = 0;
#endif // #if 0

/* --- */

	 //cp_pair.plaintext_first[left] = (new_L & (~(ret_M_L))) | (ret_X_L & ret_M_L);//ret_X_L;
	 //cp_pair.plaintext_first[right] = (new_R & (~(ret_M_R))) | (ret_X_R & ret_M_R);//ret_X_R;
	 //cp_pair.plaintext_second[left] = cp_pair.plaintext_first[left] ^ diff;//((new_L & (~(ret_M_L))) | (ret_XX_L & ret_M_L)) ^ diff;//ret_XX_L;
	 //cp_pair.plaintext_second[right] = cp_pair.plaintext_first[right] ^ diff;//((new_R & (~(ret_M_R))) | (ret_XX_R & ret_M_R)) ^ diff;//ret_XX_R;

/* --- */
#if 0
#if 1
	 WORD_T index = (WORD_SIZE - 1);
	 WORD_T diff = (1ULL << index);
	 assert(diff == 0x8000000000000000ULL);
	 bool b_set = false;
	 do {
		//		if((((ret_M_L >> (WORD_SIZE - index - 1)) & 1) == 0) && 
		//			(((ret_M_R >> (WORD_SIZE - index - 1)) & 1) == 0) &&
		//			(index >= RC5_LOG2W)) {
		if((((ret_M_L >> index) & 1) == 0) && 
			(((ret_M_R >> index) & 1) == 0) &&
			(index >= RC5_LOG2W)) {
		  b_set = true;
		  diff = (1ULL << index);
		} else {
		  //		  index--;
		  index = xrandom() % WORD_SIZE;
		}
		//	 } while ((b_set == false) && (index > RC5_LOG2W));
	 } while (b_set == false);
	 //	 printf("[%s:%d] diff %llX\n", __FILE__, __LINE__, (WORD_MAX_T)diff);
	 assert(b_set);
#else
	 WORD_T diff = 0;
#endif // #if 0
#endif


/* --- */

#if 1
	 WORD_T diff = (1ULL << (WORD_SIZE - 1));
	 assert(diff == 0x8000000000000000ULL);
	 bool b_set = false;
	 WORD_T index = 0;//(WORD_SIZE - 1);
	 do {
		if((((ret_M_L >> (WORD_SIZE - index - 1)) & 1) == 0) && 
			(((ret_M_R >> (WORD_SIZE - index - 1)) & 1) == 0)) {
		  b_set = true;
		  diff = (diff >> index);
		} else {
		  index++;
		}
		assert((index < (WORD_SIZE - RC5_LOG2W)));
		//	 } while ((b_set == false) && (index < (WORD_SIZE - RC5_LOG2W)));
	 } while ((b_set == false) && (index < (WORD_SIZE - RC5_LOG2W)));
	 //			 printf("[%s:%d] diff %llX\n", __FILE__, __LINE__, (WORD_MAX_T)diff);
#else
	 WORD_T diff = 0;
#endif // #if 0

/* --- */

  for(uint32_t j = 0; j < RC5_STRUCTURES_NTEXTS; j++) {

	 //	 WORD_T diff_L = dx[0];
	 //	 WORD_T diff_R = dx[1];
	 WORD_T new_L = (xrandom() & (~(mask_L))) | (mask_L & rand_L);
	 WORD_T new_R = (xrandom() & (~(mask_R))) | (mask_R & rand_R);
	 WORD_T new_LL = (xrandom() & (~(mask_L))) | (mask_L & rand_LL);
	 WORD_T new_RR = (xrandom() & (~(mask_R))) | (mask_R & rand_RR);
#if 0 // DEBUG
	 printf("[%s:%d] new_LR %016llX %016llX M_LR %016llX %016llX\n", __FILE__, __LINE__, 
			  (WORD_MAX_T)new_L, (WORD_MAX_T)new_R, (WORD_MAX_T)mask_L, (WORD_MAX_T)mask_R);
#endif // #if 1 // DEBUG
	 pair_t cp_pair = {{0,0}, {0,0}, {0,0}, {0,0}};
	 cp_pair.plaintext_first[left] = new_L;
	 cp_pair.plaintext_first[right] = new_R;
	 cp_pair.plaintext_second[left] = new_LL;// ^ diff;
	 cp_pair.plaintext_second[right] = new_RR;// ^ diff;
	 cp_pair.rand[left] = A_left;
	 cp_pair.rand[right] = A_right;


	 /* Init the goup vector */
	 goup_diff_vec.clear();

	 cnt_cptext_pairs++;

	 for(uint32_t i = 0; i < 2; i++) { // left pt = 0, right pt = 1
		plaintext_first[i] = cp_pair.plaintext_first[i];
		plaintext_second[i] = cp_pair.plaintext_second[i];
	 }

	 // encrypt pairs of texts
	 rc5_encrypt(nrounds, S, plaintext_first, ciphertext_first);  
	 rc5_encrypt(nrounds, S, plaintext_second, ciphertext_second);  

	 // fill the ciphertexts into the pair
	 for(uint32_t i = 0; i < 2; i++) { // left ct = 0, right ct = 1
		cp_pair.ciphertext_first[i] = ciphertext_first[i];
		cp_pair.ciphertext_second[i] = ciphertext_second[i];
	 }

	 assert(cp_pair.ciphertext_first[0] == ciphertext_first[0]);
	 assert(cp_pair.ciphertext_second[0] == ciphertext_second[0]);
	 assert(cp_pair.ciphertext_first[1] == ciphertext_first[1]);
	 assert(cp_pair.ciphertext_second[1] == ciphertext_second[1]);

	 bool b_is_already_stored = false;
	 bool b_is_already_stored_swapped = false;
	 bool b_good = rc5_pair_is_good(S, nrounds, cp_pair);
	 if(b_good) {
		cnt_good_all++;
		cp_pair.b_good = true;
		WORD_T x1_L = cp_pair.plaintext_first[left];
		WORD_T x1_R = cp_pair.plaintext_first[right];
		WORD_T x2_L = cp_pair.plaintext_second[left];
		WORD_T x2_R = cp_pair.plaintext_second[right];

		printf("\n[%s:%d] New good pair: x (%llX %llX) xx (%llX %llX)\n", __FILE__, __LINE__, 
				 (WORD_MAX_T)x1_L, (WORD_MAX_T)x1_R, (WORD_MAX_T)x2_L, (WORD_MAX_T)x2_R);

#if 1 // DEBUG: check for duplicates and swaps
		b_is_already_stored = rc5_pair_is_in_good_vec(cp_pair, (*good_pairs_vec));
		b_is_already_stored_swapped = rc5_pair_is_in_good_vec_swapped(cp_pair, (*good_pairs_vec));
		assert(b_is_already_stored == false);
		assert(b_is_already_stored_swapped == false);
#endif // #if 1 // DEBUG: check for duplicates

		printf("\n[%s:%d] Add good pair to good_pairs_vec #%2d : x (%llX %llX) xx (%llX %llX)\n", __FILE__, __LINE__, cnt_good_all, 
				 (WORD_MAX_T)x1_L, (WORD_MAX_T)x1_R, (WORD_MAX_T)x2_L, (WORD_MAX_T)x2_R);
		good_pairs_vec->push_back(cp_pair);

#if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages
		rc5_good_pair_debug_statistics(S, nrounds, cp_pair, fib_array,
												 (const gsl_matrix*(*)[2][2][2])AA_last, (const gsl_matrix*(*)[2][2])A_last, L_last, C_last,
												 (const gsl_matrix*(*)[2])A_mid, L_mid, C_mid,
												 min_prob_arr, sum_prob_arr, sum_hw_arr, max_hw_arr);
#endif // #if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages
	 } 

	 bool b_is_good_filters_all = false;
	 // all three filters in one
#if 1
	 b_is_good_filters_all = rc5_filters_all((const gsl_matrix*(*)[2][2])A_last, L_last, C_last,
														  (const gsl_matrix*(*)[2])A_mid, L_mid, C_mid, 
														  cp_pair, b_good, fib_array, p_thres_array, 
														  logp2hw_arr, logp2hw_arr_rows, logp2hw_arr_cols,
														  &goup_diff_vec,
														  &cnt_filtered_f1, &cnt_good_filtered_f1);
#endif // #if 0

	 if(b_is_good_filters_all) {
		cnt_filtered++;
#if 0 // DEBUG
		printf("\r[%s:%d] (%10d / %10lld) vec_2d size %10d cnt_filtered_f1 %10lld cnt_filtered %10lld", 
				 __FILE__, __LINE__, 
				 j, RC5_NTEXTS, (uint32_t)goup_diff_vec_2d->size() + 1, 
				 (long long int)cnt_filtered_f1, (long long int)cnt_filtered);
		fflush(stdout);
#endif // #if 0 // DEBUG
#if RC5_FILTER_GOUP_DEBUG
		assert(RC5_DEBUG_HAVE_MEMORY == 1);
		assert(goup_diff_vec.size() == g_goup_diff_vec_2d[cnt_vec_2d].size());
		bool b_are_equal = rc5_goup_diff_vecs_are_equal(goup_diff_vec, g_goup_diff_vec_2d[cnt_vec_2d]);
		assert(b_are_equal);
		cnt_vec_2d++;
#endif  // #if RC5_FILTER_GOUP_DEBUG

#if RC5_LOG_TO_FILE // store filtered pairs in file
		//			 assert((RC5_FILTER_SECOND_PASS == 0) || (RC5_FILTER_SECOND_PASS == 2)); // 1st pass or 3rd pass (post-processing of 1st pass)
		FILE* fp = fopen(g_filename, "a");
		rc5_pair_print_to_file(fp, cp_pair, b_good);
		fclose(fp);
#endif // #if 1 // store filtered pairs in file

#if 1 // DEBUG
		if(b_good) {
		  printf("\n[%s:%d] Good filtered: BOOM!\n", __FILE__, __LINE__);
		  printf("\n[%s:%d] Good filtered: BOOM!! [%10d / %10lld]\n", __FILE__, __LINE__, j, RC5_NTEXTS);
#if 1 // check the parameters
		  eq_x_params_t good_params;
		  rc5_pair_extract_last_round_params(S, nrounds, cp_pair, &good_params);
		  bool b_good_filtered = rc5_filtered_pair_match_last_round_params(S, nrounds, good_params, goup_diff_vec);
		  if(b_good_filtered) {
			 printf("[%s:%d] Good last round parameters: BOOOOOOOOOM!!\n", __FILE__, __LINE__);
			 //			 printf("[%s:%d] Good last round parameters: BOOOOOOOOOM!! | ocounter %d\n", __FILE__, __LINE__, ocounter);
			 printf("\n");
			 printf("[%s:%d] Good #%10d (%llX, %llX, %llX, %2d, %2d, %d)\n", __FILE__, __LINE__,
					  cnt_good_filtered, (WORD_MAX_T)good_params.dx, (WORD_MAX_T)good_params.y, (WORD_MAX_T)good_params.yy, 
					  good_params.rot_const, good_params.rot_const_prev, good_params.b_aux_data);
			 //			 rc5_print_pair(goup_diff_vec.pc_pair);
			 //			 assert(0 == 1);
		  }
#endif
		  cnt_good_filtered++;
		} 
#if 0
		else {
		  //		  double ratio_percent = ((double)cnt_filtered / (double)j) * 100.00;
		  double filter_strength = ((double)j / (double)cnt_filtered);
		  printf("\n[%s:%d] False positive: #filtered [%15lld (2^%4.2f) / %15d (2^%4.2f)] pass 1 in 2^%4.2f\n", __FILE__, __LINE__, (long long int)cnt_filtered, log2(cnt_filtered), j, log2(j), log2(filter_strength));
		}
#endif // #if 0
#endif // #if 1 // DEBUG

		/**
		 * Store the equal-rot trail corresponding to the filtered pair
		 * (i.e. to the candidate good pair)
		 */
		goup_diff_vec_2d->push_back(goup_diff_vec);
	 }
  }

/* --- */

	 //	 const WORD_T init_XX_L = init_X_L;// ^ diff;
	 //    const WORD_T init_XX_R = init_X_R;// ^ diff;
	 //    const WORD_T init_XX_L = xrandom();
	 //    const WORD_T init_XX_R = (xrandom() & ~(WORD_SIZE - 1)) | (init_X_R & (WORD_SIZE - 1)); // make the 6 LSB the same

/* --- */
void rc5_equal_rot_attack_first_pass_blind_oracle(const WORD_T S[RC5_STAB_LEN_T],
																	const uint32_t nrounds,
																	const WORD_T dx[2], // input difference
																	const gsl_matrix* AA_last[2][2][2][2], // last round
																	const gsl_matrix* A_last[2][2][2], // last round
																	const gsl_vector* L_last,
																	const gsl_vector* C_last,
																	const gsl_matrix* A_mid[2][2], // middle round
																	const gsl_vector* L_mid,
																	const gsl_vector* C_mid,
																	std::vector<std::vector<rc5_goup_diffs_t>>* goup_diff_vec_2d,
																	std::vector<pair_t>* good_pairs_vec,
																	const WORD_T mask_L, const WORD_T mask_R,
																	const WORD_T rand_L, const WORD_T rand_R)
{
  assert(RC5_FILTER_SECOND_PASS == 0); // first pass
  assert(RC5_FILTER_BLIND_ORACLE == 1); // blind oracle
  assert(nrounds == NROUNDS);
  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);

  assert(nrounds == NROUNDS);
  WORD_T** logp2hw_arr;
  WORD_T** logp2hw_arr_max;
  uint32_t logp2hw_arr_rows = ((2 * NROUNDS) + 3);
  uint32_t logp2hw_arr_cols = WORD_SIZE;

  yaarx_alloc_matrices_2d(&logp2hw_arr, logp2hw_arr_rows, logp2hw_arr_cols);
  yaarx_alloc_matrices_2d(&logp2hw_arr_max, logp2hw_arr_rows, logp2hw_arr_cols);

  //  uint64_t ret_cnt = 0;
  uint32_t fib_array_len = RC5_FIB_LEN;//= RC5_GOUP_LEVEL + 1;
  /**
   * Initialize the Fibonacci array.
   */
  std::vector<uint32_t> fib_array;
  std::vector<double> p_thres_array;
  for(uint32_t i = (RC5_FULL_FIB_LEN - fib_array_len); i < RC5_FULL_FIB_LEN; i++) {
	 fib_array.push_back(FIB[i - 2]);  // <--- last two entries are the ciphertexts
	 p_thres_array.push_back(RC5_P_THRES_ARRAY[i]);
  }
  assert(p_thres_array.size() == RC5_FIB_LEN);
  assert(fib_array.size() == RC5_FIB_LEN);

  WORD_T plaintext_first[2] = {0, 0};
  WORD_T plaintext_second[2] = {0, 0};
  WORD_T ciphertext_first[2] = {0, 0};
  WORD_T ciphertext_second[2] = {0, 0};

  uint32_t cnt_cptext_pairs = 0;
  uint64_t cnt_filtered_f1 = 0;
  uint64_t cnt_filtered = 0;
  uint32_t cnt_good_all = 0;
  uint32_t cnt_good_filtered = 0;
  uint32_t cnt_good_filtered_f1 = 0;

#if 1									  // DEBUG
  printf("[%s:%d] RC5_FIB_LEN %d fib_array.size() = %d\n", __FILE__, __LINE__, RC5_FIB_LEN, (uint32_t)fib_array.size());
  assert(fib_array.size() == RC5_FIB_LEN);
  printf("[%s:%d] fib_array = ", __FILE__, __LINE__);
  for(uint32_t i = 0; i < RC5_FIB_LEN; i++) {
	 printf("[%2d] %2d ", i, fib_array[i]);
  }
  printf("\n");
  printf("[%s:%d] p_thres_array = ", __FILE__, __LINE__);
  for(uint32_t i = 0; i < RC5_FIB_LEN; i++) {
	 printf("[%2d] %4.2f ", i, log2(p_thres_array[i]));
  }
  printf("\n");
#endif								  // #if 1p

  /**
   * Keeps the full trail from the goUP filter that corresponds to a
   * filtered pair. This is one element of the \ref goup_diff_vec_2d
   * array.
   */
  std::vector<rc5_goup_diffs_t> goup_diff_vec;

  // DEBUG: for counting the average Hamming weights and probabilities
#if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages
  uint32_t arr_len = (2*NROUNDS) + 3;
  double min_prob_arr[(2*NROUNDS) + 3] = {1.0};
  double sum_prob_arr[(2*NROUNDS) + 3] = {0.0};
  double sum_hw_arr[(2*NROUNDS) + 3] = {0.0};
  uint32_t max_hw_arr[(2*NROUNDS) + 3] = {0};
  // initialize arrays
  for(uint32_t i = 0; i < arr_len; i++) {
	 min_prob_arr[i] = 1.0;
	 sum_prob_arr[i] = 0.0;
	 sum_hw_arr[i] = 0.0;
	 max_hw_arr[i] = 0;
  }
#endif // #if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages

  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;

  assert(RC5_FILTER_USE_STRUCTURES == 1);
  assert(RC5_STRUCTURES_NBITS <= (WORD_SIZE - RC5_LOG2W));
  uint32_t lsb_start_idx = (WORD_SIZE - RC5_STRUCTURES_NBITS); // so that we have the 80000000 difference
  assert(RC5_STRUCTURES_NBITS <= WORD_SIZE);
  assert(lsb_start_idx < WORD_SIZE);
  assert(RC5_STRUCTURES_NBITS <= (WORD_SIZE - lsb_start_idx));

  const WORD_T A_left = rand_L;//xrandom() & MASK;
  const WORD_T A_right = rand_R;//xrandom() & MASK;
  //	 const WORD_T A_right = (rand_R & ~(0x1F));

#if 1 // DEBUG
  printf("[%s:%d] %s() Structures random A_L A_R %llX %llX\n", __FILE__, __LINE__, __FUNCTION__,
			(WORD_MAX_T)A_left, (WORD_MAX_T)A_right);
#endif // #if 1 // DEBUG

  // bit positions of structures
  //  for(uint32_t struct_bit_i = 0; struct_bit_i < RC5_STRUCTURES_NBITS; struct_bit_i++) 
{

  //	 uint32_t e_i = (1U << struct_bit_i); // basis vector e_i

	 //	 for(uint32_t j = 0; j < RC5_STRUCTURES_NTEXTS; j++) 
{ // delta_j : 2^k

		//		WORD_T D_j = j;
		//		if(((D_j >> struct_bit_i) & 1) != 0)  // the j-th bit of D_i is not set to 0
		//		  continue;

  //		WORD_T d_j = (D_j << lsb_start_idx);
  //		WORD_T dd_j = ((D_j ^ e_i) << lsb_start_idx);
		//	 WORD_T diff = d_j ^ dd_j;

		pair_t cp_pair = {{0,0}, {0,0}, {0,0}, {0,0}};
		//		cp_pair.plaintext_first[left] = (A_left ^ d_j) & MASK;
		//		cp_pair.plaintext_first[right] = (A_right ^ d_j) & MASK;
		//		cp_pair.plaintext_second[left] = (A_left ^ dd_j) & MASK;
		//		cp_pair.plaintext_second[right] = (A_right ^ dd_j) & MASK;
		cp_pair.plaintext_first[left] = (A_left) & MASK;
		cp_pair.plaintext_first[right] = (A_right) & MASK;
		cp_pair.plaintext_second[left] = (A_left) & MASK;
		cp_pair.plaintext_second[right] = (A_right) & MASK;
		cp_pair.rand[left] = A_left;
		cp_pair.rand[right] = A_right;



		/* Init the goup vector */
		goup_diff_vec.clear();

		cnt_cptext_pairs++;

		for(uint32_t i = 0; i < 2; i++) { // left pt = 0, right pt = 1
		  plaintext_first[i] = cp_pair.plaintext_first[i];
		  plaintext_second[i] = cp_pair.plaintext_second[i];
		}

		// encrypt pairs of texts
		rc5_encrypt(nrounds, S, plaintext_first, ciphertext_first);  
		rc5_encrypt(nrounds, S, plaintext_second, ciphertext_second);  

		// fill the ciphertexts into the pair
		for(uint32_t i = 0; i < 2; i++) { // left ct = 0, right ct = 1
		  cp_pair.ciphertext_first[i] = ciphertext_first[i];
		  cp_pair.ciphertext_second[i] = ciphertext_second[i];
		}

		assert(cp_pair.ciphertext_first[0] == ciphertext_first[0]);
		assert(cp_pair.ciphertext_second[0] == ciphertext_second[0]);
		assert(cp_pair.ciphertext_first[1] == ciphertext_first[1]);
		assert(cp_pair.ciphertext_second[1] == ciphertext_second[1]);

		bool b_is_already_stored = false;
		bool b_is_already_stored_swapped = false;
		bool b_good = rc5_pair_is_good(S, nrounds, cp_pair);
		if(b_good) {
		  cnt_good_all++;
		  cp_pair.b_good = true;
		  WORD_T x1_L = cp_pair.plaintext_first[left];
		  WORD_T x1_R = cp_pair.plaintext_first[right];
		  WORD_T x2_L = cp_pair.plaintext_second[left];
		  WORD_T x2_R = cp_pair.plaintext_second[right];

		  printf("\n[%s:%d] New good pair: x (%llX %llX) xx (%llX %llX)\n", __FILE__, __LINE__, 
					(WORD_MAX_T)x1_L, (WORD_MAX_T)x1_R, (WORD_MAX_T)x2_L, (WORD_MAX_T)x2_R);

#if 1 // DEBUG: check for duplicates and swaps
		  b_is_already_stored = rc5_pair_is_in_good_vec(cp_pair, (*good_pairs_vec));
		  b_is_already_stored_swapped = rc5_pair_is_in_good_vec_swapped(cp_pair, (*good_pairs_vec));
		  assert(b_is_already_stored == false);
		  assert(b_is_already_stored_swapped == false);
#endif // #if 1 // DEBUG: check for duplicates

		  printf("\n[%s:%d] Add good pair to good_pairs_vec #%2d : x (%llX %llX) xx (%llX %llX)\n", __FILE__, __LINE__, cnt_good_all, 
					(WORD_MAX_T)x1_L, (WORD_MAX_T)x1_R, (WORD_MAX_T)x2_L, (WORD_MAX_T)x2_R);
		  good_pairs_vec->push_back(cp_pair);

#if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages
		  rc5_good_pair_debug_statistics(S, nrounds, cp_pair, fib_array,
													(const gsl_matrix*(*)[2][2][2])AA_last, (const gsl_matrix*(*)[2][2])A_last, L_last, C_last,
													(const gsl_matrix*(*)[2])A_mid, L_mid, C_mid,
													min_prob_arr, sum_prob_arr, sum_hw_arr, max_hw_arr);
#endif // #if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages
		} 

		bool b_is_good_filters_all = false;
		// all three filters in one
#if 1
		b_is_good_filters_all = rc5_filters_all((const gsl_matrix*(*)[2][2])A_last, L_last, C_last,
															 (const gsl_matrix*(*)[2])A_mid, L_mid, C_mid, 
															 cp_pair, b_good, fib_array, p_thres_array, 
															 logp2hw_arr, logp2hw_arr_rows, logp2hw_arr_cols,
															 &goup_diff_vec,
															 &cnt_filtered_f1, &cnt_good_filtered_f1);
#endif // #if 0

		if(b_is_good_filters_all) {
		  cnt_filtered++;
#if 0 // DEBUG
		  printf("\r[%s:%d] (%10d / %10lld) vec_2d size %10d cnt_filtered_f1 %10lld cnt_filtered %10lld", 
					__FILE__, __LINE__, 
					j, RC5_NTEXTS, (uint32_t)goup_diff_vec_2d->size() + 1, 
					(long long int)cnt_filtered_f1, (long long int)cnt_filtered);
		  fflush(stdout);
#endif // #if 0 // DEBUG
#if RC5_FILTER_GOUP_DEBUG
		  assert(RC5_DEBUG_HAVE_MEMORY == 1);
		  assert(goup_diff_vec.size() == g_goup_diff_vec_2d[cnt_vec_2d].size());
		  bool b_are_equal = rc5_goup_diff_vecs_are_equal(goup_diff_vec, g_goup_diff_vec_2d[cnt_vec_2d]);
		  assert(b_are_equal);
		  cnt_vec_2d++;
#endif  // #if RC5_FILTER_GOUP_DEBUG

#if RC5_LOG_TO_FILE // store filtered pairs in file
		  //			 assert((RC5_FILTER_SECOND_PASS == 0) || (RC5_FILTER_SECOND_PASS == 2)); // 1st pass or 3rd pass (post-processing of 1st pass)
		  FILE* fp = fopen(g_filename, "a");
		  rc5_pair_print_to_file(fp, cp_pair, b_good);
		  fclose(fp);
#endif // #if 1 // store filtered pairs in file

#if 1 // DEBUG
		  if(b_good) {
			 printf("\n[%s:%d] Good filtered: BOOM!\n", __FILE__, __LINE__);
			 //			 printf("\n[%s:%d] Good filtered: BOOM!! [%10d / %10lld]\n", __FILE__, __LINE__, j, RC5_NTEXTS);
#if 1 // check the parameters
			 eq_x_params_t good_params;
			 rc5_pair_extract_last_round_params(S, nrounds, cp_pair, &good_params);
			 bool b_good_filtered = rc5_filtered_pair_match_last_round_params(S, nrounds, good_params, goup_diff_vec);
			 if(b_good_filtered) {
				printf("[%s:%d] Good last round parameters: BOOOOOOOOOM!!\n", __FILE__, __LINE__);
				//			 printf("[%s:%d] Good last round parameters: BOOOOOOOOOM!! | ocounter %d\n", __FILE__, __LINE__, ocounter);
				printf("\n");
				printf("[%s:%d] Good #%10d (%llX, %llX, %llX, %2d, %2d, %d)\n", __FILE__, __LINE__,
						 cnt_good_filtered, (WORD_MAX_T)good_params.dx, (WORD_MAX_T)good_params.y, (WORD_MAX_T)good_params.yy, 
						 good_params.rot_const, good_params.rot_const_prev, good_params.b_aux_data);
				//			 rc5_print_pair(goup_diff_vec.pc_pair);
				//			 assert(0 == 1);
			 }
#endif
			 cnt_good_filtered++;
		  } 
#if 0
		  else {
			 //		  double ratio_percent = ((double)cnt_filtered / (double)j) * 100.00;
			 double filter_strength = ((double)j / (double)cnt_filtered);
			 printf("\n[%s:%d] False positive: #filtered [%15lld (2^%4.2f) / %15d (2^%4.2f)] pass 1 in 2^%4.2f\n", __FILE__, __LINE__, (long long int)cnt_filtered, log2(cnt_filtered), j, log2(j), log2(filter_strength));
		  }
#endif // #if 0
#endif // #if 1 // DEBUG

		  /**
			* Store the equal-rot trail corresponding to the filtered pair
			* (i.e. to the candidate good pair)
			*/
		  goup_diff_vec_2d->push_back(goup_diff_vec);
		}
    }
  }

#if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages
  rc5_equal_rot_attack_average_stats(fib_array, cnt_good_all, min_prob_arr, sum_prob_arr, sum_hw_arr, max_hw_arr);
#endif //#if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages

  // update the GLOBALS!!
#if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages
  for(uint32_t i = 0; i < arr_len; i++) {
	 g_max_hw_arr[i] += (double)max_hw_arr[i];
	 g_min_prob_arr[i] += (double)min_prob_arr[i];
  }
#endif // #if RC5_EQUAL_ROT_ATTACK_DEBUG // DEBUG: statistics + counting averages

  printf("[%s:%s():%d] Exit statistics:\n", __FILE__, __FUNCTION__, __LINE__);
  printf("NROUNDS %d\n", nrounds);
  printf("WORD_SIZE %d\n", WORD_SIZE);
  printf("RC5_FILTER_CUT_HW1 %2d\n", RC5_FILTER_CUT_HW1);
  printf("RC5_ADD_APPROX  %2d\n", RC5_ADD_APPROX);
  printf("RC5_LAST_ROUND_ADD_APPROX  %2d\n", RC5_LAST_ROUND_ADD_APPROX);
  printf("RC5_ADD_APPROX_ORDER  %2d\n", RC5_ADD_APPROX_ORDER);
  printf("RC5_NTEXTS 2^%4.2f\n", log2((double)RC5_NTEXTS));
  printf("RC5_FIXED_KEY %d\n", RC5_FIXED_KEY);
  printf("RC5_FILTER_LAST_ROUND %d\n", RC5_FILTER_LAST_ROUND);
  printf("RC5_FILTER_ONETOLAST_ROUND %d\n", RC5_FILTER_ONETOLAST_ROUND);
  printf("RC5_FILTER_GOUP %d\n", RC5_FILTER_GOUP);
  printf("RC5_FILTER_GOUP_DEBUG %d\n", RC5_FILTER_GOUP_DEBUG);
  printf("RC5_DEBUG_HAVE_MEMORY %d\n", RC5_DEBUG_HAVE_MEMORY);
  printf("RC5_FILTER_GOUP_DIFF_SET %d\n", RC5_FILTER_GOUP_DIFF_SET);
  printf("#Filtered pairs f1: %lld (2^%f)\n", (long long int)cnt_filtered_f1, log2(cnt_filtered_f1));
  printf("#Filtered pairs all: %lld (2^%f)\n", (long long int)cnt_filtered, log2(cnt_filtered));
  printf("#Good pairs among filtered: %d\n", cnt_good_filtered);
  printf("#Good pairs among filtered f1: %d\n", cnt_good_filtered_f1);
  printf("#Good pairs total: %d\n", cnt_good_all);
  printf("#GoUP sets of trails: %d (2^%f)\n", (uint32_t)goup_diff_vec_2d->size(), log2(goup_diff_vec_2d->size()));

  printf("[%s:%d] A Strange Day: Good / Filtered / Good Filtered: %5d %5lld %5d \n", 
			__FILE__, __LINE__, cnt_good_all, (long long int)cnt_filtered, cnt_good_filtered);

  printf("RC5_P_THRES_ARRAY = ");
  for(uint32_t i = 0; i < RC5_FIB_LEN; i++) {
	 printf("[%2d] %4.2f ", i, log2(p_thres_array[i]));
  }
  printf("\n");
  printf("        FIB_ARRAY = ");
  for(uint32_t i = 0; i < RC5_FIB_LEN; i++) {
	 printf("[%2d] %2d ", i, fib_array[i]);
  }
  printf("\n");

  yaarx_free_matrices_2d(logp2hw_arr, logp2hw_arr_rows, logp2hw_arr_cols);
  yaarx_free_matrices_2d(logp2hw_arr_max, logp2hw_arr_rows, logp2hw_arr_cols);
}

/* --- */

/*
[./tests/rc5-tests.cc:1030] Init X_LR XX_LR C41ECF20A8AD5348 EC58A3CB7C5BA6D2 C41ECF20A8AD5348 EC58A3CB7C5BA6D2
[./tests/rc5-tests.cc:1175] EQ 1 and 2 and 3 and 4: OK! M_LR  41 61 22 28 00000000FF8003FE 0007E000FF8003FF X_LR 00000000A8800348 0000A0007C0002D2
[./tests/rc5-tests.cc:1457] Found solution: rot  41 61 22 28 MASK 00000000FF8003FE 0007E000FF8003FF X_LR 00000000A8800348 0000A0007C0002D2
[./tests/rc5-tests.cc:1465] WARNING! Overload 3 rand L R: 6e45ec9c E9B31364

*/

/* --- */
  //  printf("[%s:%d] S1 %llX -S1 %llX -S1 >>> 5 %llX\n", __FILE__, __LINE__, S[1], -S[1], RC5_ROTL(-S[1], 59));
  //  assert(0 == 1);

#if 0 // CHANGE THE EXPANDED KEY! xxx
  //  S[2] = 0xF97E0FDA8FA3A280;
  //  S[0] = -S[2];//0xF97E0FDA8FA3A280;//0x58E7FE3B18227B57;//0x6E45EC9C;//(~0);
  //  S[1] = 0x6E45EC9C;
  //  S[0] = ~S[1];
  //  S[2] = -(RC5_ROTL((S[0] ^ S[1]), S[0]));
  //  S[3] = 0x2EC76971B1A66380;
  //  S[4] = -S[3];
  //  S[5] = -S[3];
  //  S[2] = 0;
  //  S[3] = ~0;
  //  S[4] = 0;
  //  S[0] = S[0] & (~0x3F);
  //  S[1] = S[1] & (~0x3F);
  //  S[1] |= 0x18;
  //  S[2] = S[2] & (~0x3F);
  //  S[2] |= 0x18;
  //  S[1] |= 0x17;
#endif // #if 1 // CHANGE THE EXPANDED KEY!



/* --- */

#if 1
  const WORD rand_L = random32() & MASK;
  const WORD rand_R = random32() & MASK;
#else
  //  uint32_t weight = 4;
  //  const WORD rand_L = 0;//gen_sparse(weight, WORD_SIZE);
  //  const WORD rand_R = random32() & MASK;
  //  const WORD rand_R = gen_sparse(weight, WORD_SIZE);
  const WORD rand_L = random32() & MASK;
  const WORD rand_R = (random32() & MASK) & ~(0x1F);
#endif // #if 0

  for(uint32_t j = 0; j < RC5_KEY_NBYTES_B; j++) {
#if RC5_FIXED_KEY
	 key[j] = g_key[j];
#else
	 key[j] = random32() & 0xFF;
#endif  // #if RC5_FIXED_KEY
  }

#endif // #if RC5_BIN_READ_INPUT_ARGS_FROM_STDIN // read input arguments from file



/* --- */


/*
 * ORIGINAL (11 Jan 2015): Compute "good" constants for the structures.
 */
void test_rc5_compute_struct_const(uint8_t key[RC5_KEY_NBYTES_B],
											  std::vector<std::pair<WORD_T, WORD_T>>* rand_vec)
{
  WORD_T S[RC5_STAB_LEN_T] = {0};					// expanded key
  rc5_setup(key, S);  

  WORD_T s0 = S[0];
  WORD_T s1 = S[1];
  WORD_T s2 = S[2];
  WORD_T s3 = S[3];
  WORD_T s4 = S[4];
  WORD_T s5 = S[5];
  WORD_T s6 = S[6];

  //  uint32_t mod = 0x40;
  uint32_t lim_hi = 56;//55;//55;//57;
  //  uint32_t lim_hi_2 = lim_hi;//60;//57;
  uint32_t max = (1UL << (WORD_SIZE - lim_hi + 1));
  uint32_t lim_lo = 4;//7;
  uint32_t cnt = 0;

  //  WORD_T x_L = xrandom() & 0xFFF;
  for(WORD_T x_R = 0; x_R < max; x_R++) {
	 uint32_t r1 = (x_R + s1) & RC5_ROT_MASK;
	 if(!((r1 >= lim_hi) || (r1 <= lim_lo)))
	 //	 if(r1 < lim_hi)
		continue;
	 for(WORD_T x_L = 0; x_L < max; x_L++) {
		WORD_T A = RC5_ROTL(((x_L + s0) ^ (x_R + s1)), r1) + s2;
		uint32_t r2 = A & RC5_ROT_MASK;
		if(!((r2 >= lim_hi) || (r2 <= lim_lo)))
		  //		if(r2 < lim_hi)
		  continue;
		WORD_T B = RC5_ROTL(((x_R + s1) ^ A), r2) + s3;
		uint32_t r3 = B & RC5_ROT_MASK;
		if(!((r3 >= lim_hi) || (r3 <= lim_lo)))
		//		if(r3 < lim_hi)
		  continue;
		WORD_T C = RC5_ROTL((A ^ B), r3) + s4;
		uint32_t r4 = C & RC5_ROT_MASK;
		if(!((r4 >= lim_hi) || (r4 <= lim_lo)))
		//		if(r4 < lim_hi)
		  continue;
		WORD_T D = RC5_ROTL((B ^ C), r4) + s5;
		uint32_t r5 = D & RC5_ROT_MASK;
		if(!((r5 >= lim_hi) || (r5 <= lim_lo)))
		//		if(r5 < lim_hi)
		  continue;
		WORD_T E = RC5_ROTL((C ^ D), r5) + s6;
		uint32_t r6 = E & RC5_ROT_MASK;
		if(!((r6 >= lim_hi) || (r6 <= lim_lo)))
		//		if(r6 < lim_hi)
		  continue;
      std::pair<WORD_T, WORD_T> new_pair (x_L, x_R);
		rand_vec->push_back(new_pair);
		cnt++;
		printf("[%s:%d] %5d | %2d %2d %2d %2d %2d %2d | x_L x_R %llX %llX\n", __FILE__, __LINE__, 
				 cnt, r1, r2, r3, r4, r5, r6, (WORD_MAX_T)x_L, (WORD_MAX_T)x_R);
	 }
  }
}

/* --- */

/*
 * NEW (19 Jan 2015): Compute "good" constants for the structures.
 */
void test_rc5_compute_struct_const(uint8_t key[RC5_KEY_NBYTES_B],
											  std::vector<std::pair<WORD_T, WORD_T>>* rand_vec)
{
  WORD_T S[RC5_STAB_LEN_T] = {0};					// expanded key
  rc5_setup(key, S);  

  uint32_t lim_hi = 1;//WORD_SIZE + 1;//32;//55;//55;//57;
  uint32_t lim_lo = 0;//8;//7;
  WORD_T mask_lim = (~(0ULL) << (WORD_SIZE - lim_lo) | (~(0ULL) >> (lim_hi - 1)));

  //  WORD_T max = (1ULL << ((WORD_SIZE - (lim_hi - 1)) + lim_lo));
  WORD_T max = (1ULL << ((WORD_SIZE - (56 - 1)) + 0));
  //  WORD_T mask_lo = (~(0ULL) >> (lim_hi - 1));
  //  WORD_T mask_hi = (~(0ULL) >> (WORD_SIZE - lim_lo));
  uint32_t cnt = 0;

  uint32_t rot_lim_hi = 56;//57;//56;//55;//55;//57;
  uint32_t rot_lim_lo = 4;//3;//4;//7;

  printf("[%s:%d] mask_lim %llX max %lld\n", __FILE__, __LINE__, (WORD_MAX_T)mask_lim, (WORD_MAX_T)max);
  //  assert(0 == 1);

  WORD_T s0 = S[0] & mask_lim;
  WORD_T s1 = S[1] & mask_lim;
  WORD_T s2 = S[2] & mask_lim;
  WORD_T s3 = S[3] & mask_lim;
  WORD_T s4 = S[4] & mask_lim;
  WORD_T s5 = S[5] & mask_lim;
  WORD_T s6 = S[6] & mask_lim;

  //  WORD_T x_L = xrandom() & 0xFFF;
  for(WORD_T i = 0; i < max; i++) {
	 //	 printf("[%s:%d] %d %d\n", __FILE__, __LINE__, i, max);
	 //	 WORD_T i_lo = i & mask_lo;
	 //	 WORD_T i_hi = (i >> (WORD_SIZE - (lim_hi - 1))) & mask_hi;
	 WORD_T x_R = i;//(i_hi << (WORD_SIZE - lim_lo)) | i_lo;
	 uint32_t r1 = (x_R + s1) & RC5_ROT_MASK;
#if 0
	 printf("[%s:%d] mask hi lo i i_hi i_lo x_R %llX %llX | %llX: (%llX | %llX) %llX\n", __FILE__, __LINE__, 
			  (WORD_MAX_T)mask_hi, (WORD_MAX_T)mask_lo,
			  (WORD_MAX_T)i, (WORD_MAX_T)i_hi, (WORD_MAX_T)i_lo, (WORD_MAX_T)x_R);
#endif
	 if(!((r1 >= rot_lim_hi) || (r1 <= rot_lim_lo)))
	 //	 if(r1 < lim_hi)
		continue;
	 for(WORD_T j = 0; j < max; j++) {
		//		WORD_T j_lo = j & mask_lo;
		//		WORD_T j_hi = (j >> (WORD_SIZE - (lim_hi - 1))) & mask_hi;
		WORD_T x_L = j;//(j_hi << (WORD_SIZE - lim_lo)) | j_lo;
		WORD_T A = RC5_ROTL(((x_L + s0) ^ (x_R + s1)), r1) + s2;
		uint32_t r2 = A & RC5_ROT_MASK;
		if(!((r2 >= rot_lim_hi) || (r2 <= rot_lim_lo)))
		  //		if(r2 < rot_lim_hi)
		  continue;
		WORD_T B = RC5_ROTL(((x_R + s1) ^ A), r2) + s3;
		uint32_t r3 = B & RC5_ROT_MASK;
		if(!((r3 >= rot_lim_hi) || (r3 <= rot_lim_lo)))
		//		if(r3 < rot_lim_hi)
		  continue;
		WORD_T C = RC5_ROTL((A ^ B), r3) + s4;
		uint32_t r4 = C & RC5_ROT_MASK;
		if(!((r4 >= rot_lim_hi) || (r4 <= rot_lim_lo)))
		//		if(r4 < rot_lim_hi)
		  continue;
		WORD_T D = RC5_ROTL((B ^ C), r4) + s5;
		uint32_t r5 = D & RC5_ROT_MASK;
		if(!((r5 >= rot_lim_hi) || (r5 <= rot_lim_lo)))
		//		if(r5 < rot_lim_hi)
		  continue;
		WORD_T E = RC5_ROTL((C ^ D), r5) + s6;
		uint32_t r6 = E & RC5_ROT_MASK;
		if(!((r6 >= rot_lim_hi) || (r6 <= rot_lim_lo)))
		//		if(r6 < lim_hi)
		  continue;
      std::pair<WORD_T, WORD_T> new_pair (x_L, x_R);
		rand_vec->push_back(new_pair);
		cnt++;
		printf("[%s:%d] %5d | %2d %2d %2d %2d %2d %2d | x_L x_R %llX %llX\n", __FILE__, __LINE__, 
				 cnt, r1, r2, r3, r4, r5, r6, (WORD_MAX_T)x_L, (WORD_MAX_T)x_R);
	 }
  }
}

/* --- */
  //  uint32_t mask_lim = (~(0ULL) >> (lim_hi - 1));
  //  uint32_t mask_lim = (~(0ULL) << (WORD_SIZE - lim_lo));// | (~(0ULL) >> (lim_hi - 1));
  //  WORD_T mask_lim = (0xffffffffffffffffULL << 0x8ULL);// | (~(0ULL) >> (lim_hi - 1));
  //  uint32_t mask_lim_lo = (~(0ULL) >> (WORD_SIZE - lim_lo));


/* --- */
  for(uint32_t i = 0; i < 0x40; i++) {
	 for(uint32_t j = 0; j < 0x40; j++) {
		rand_L &= (~0x3F);
		rand_R &= (~0x3F);
		rand_L |= i;
		rand_R |= j;
		printf("[%s:%d] START STYXXX ---------------  RAND L R [%X %X] --------------------------------------- \n", __FILE__, __LINE__, i, j);
		printf("[%s:%d] rand L R %llX %llX\n", __FILE__, __LINE__, (WORD_MAX_T)rand_L, (WORD_MAX_T)rand_R);
		test_rc5_equal_rot_attack(key, rand_L, rand_R); // <-
		printf("[%s:%d] rand L R %llX %llX\n", __FILE__, __LINE__, (WORD_MAX_T)rand_L, (WORD_MAX_T)rand_R);
		printf("[%s:%d]   END STYXXX ---------------  RAND L R [%X %X] --------------------------------------- \n", __FILE__, __LINE__, i, j);
	 }
  }



/* --- */

#if 1 // DEBUG
		if(i == 1) {
		  printf("[%s:%d] Break bad at HR# %2d\n", __FILE__, __LINE__, (uint32_t)i);
		  printf("[%s:%d] B1 B2 %llX %llX\n", __FILE__, __LINE__, B1, B2);
		}
#endif // #if 1 // DEBUG

/* --- */
/*
 * Main function of the tests.
 */
int main(int argc, const char* argv[])
{
  assert(0==1);
  printf("[%s:%d] ================ TEST ===============\n", __FILE__, __LINE__);
#if 1
  printf("[%s:%d] Tests, WORD_SIZE  = %d, MASK = %llX, RC5_XOR = %d\n", __FILE__, __LINE__, WORD_SIZE, (WORD_MAX_T)MASK, RC5_XOR);
  printf("[%s:%d] RC5_FILTER_SECOND_PASS %d \n", __FILE__, __LINE__, RC5_FILTER_SECOND_PASS);
  assert(RC5_FILTER_SECOND_PASS == 1);
  printf("[%s:%d] RC5_FLEX_FIB %2d\n", __FILE__, __LINE__, RC5_FLEX_FIB);
  printf("[%s:%d] RC5_FILTER_CUT_HW1 %2d\n", __FILE__, __LINE__, RC5_FILTER_CUT_HW1);
  printf("[%s:%d] RC5_ADD_APPROX %2d\n", __FILE__, __LINE__, RC5_ADD_APPROX);
  printf("[%s:%d] RC5_LAST_ROUND_ADD_APPROX %2d\n", __FILE__, __LINE__, RC5_LAST_ROUND_ADD_APPROX);
  printf("[%s:%d] RC5_ADD_APPROX_ORDER %2d\n", __FILE__, __LINE__, RC5_ADD_APPROX_ORDER);
  printf("[%s:%d] RC5_ADD_APPROX_P_THRES %f 2^%f\n", __FILE__, __LINE__, RC5_ADD_APPROX_P_THRES, log2(RC5_ADD_APPROX_P_THRES));
  printf("NROUNDS %d\n", NROUNDS);
  //  printf("RC5_NSTRUCTURES %d\n", RC5_NSTRUCTURES);
  printf("RC5_STRUCTURES_NBITS %d\n", RC5_STRUCTURES_NBITS);
  printf("RC5_STRUCTURES_NTEXTS 2^%4.2f\n", log2((double)RC5_STRUCTURES_NTEXTS));
  printf("RC5_FILTER_CIPHERTEXT_HW_LIMIT %d\n", RC5_FILTER_CIPHERTEXT_HW_LIMIT);
  printf("RC5_CIPHERTEXT_HW_LIMIT_LEFT %d\n", RC5_CIPHERTEXT_HW_LIMIT_LEFT);
  printf("RC5_CIPHERTEXT_HW_LIMIT_RIGHT %d\n", RC5_CIPHERTEXT_HW_LIMIT_RIGHT);
  printf("RC5_FILTER_GOUP_ADD_EQUALS_XOR_APPROX %d\n", RC5_FILTER_GOUP_ADD_EQUALS_XOR_APPROX);
  printf("RC5_GOUP_EXPAND_ADD_DEPTH %d\n", RC5_GOUP_EXPAND_ADD_DEPTH);
  printf("GoUP NL: Rounds to go upper than the bottom two = %d\n", ((RC5_FIB_LEN - 2) - RC5_GOUP_EXPAND_ADD_DEPTH));
  printf("RC5_PAIRS_SORT_BY_CIPHERTEXT_DIFF_HW %d\n", RC5_PAIRS_SORT_BY_CIPHERTEXT_DIFF_HW);
  //  printf("RC5_FILTER_GOUP_LIMIT_NVARIANTS %d\n", RC5_FILTER_GOUP_LIMIT_NVARIANTS);
  //  printf("RC5_FILTER_GOUP_MAX_NVARIANTS %d\n", RC5_FILTER_GOUP_MAX_NVARIANTS);

  assert(FIB[RC5_FULL_FIB_LEN - 2] == RC5_CIPHERTEXT_HW_LIMIT_LEFT);
  assert(FIB[RC5_FULL_FIB_LEN - 1] == RC5_CIPHERTEXT_HW_LIMIT_RIGHT);
#endif

#if (RC5_FILTER_SECOND_PASS == 0) // FIRST pass
  printf("[%s:%d] RC5_FILTER_SECOND_PASS = %d\n", __FILE__, __LINE__, RC5_FILTER_SECOND_PASS);
  assert(RC5_FILTER_SECOND_PASS == 0);
  assert(RC5_FILTER_CUT_HW1 == 1);
  assert(RC5_FLEX_FIB == 0);
  assert(RC5_FILTER_USE_STRUCTURES == 1);
  assert(RC5_ORACLE_KM == 0);
  assert(RC5_ORACLE_BK == 0);
  assert(RC5_FILTER_CIPHERTEXT_HW_LIMIT == 1);
  assert(RC5_FILTER_ORACLE == 0);
  assert(RC5_GOUP_EXPAND_ADD_DEPTH == ((RC5_FIB_LEN - 2) - 0));
  assert(RC5_FILTER_GOUP_ADD_EQUALS_XOR_APPROX == 1);
  //  assert(RC5_BIN_READ_INPUT_ARGS_FROM_STDIN == 0);
  assert(RC5_FILTER_GOUP_LIMIT_NVARIANTS == 0);
  assert(RC5_LOG_TO_FILE == 1);
#if(WORD_SIZE == 32)
  assert((NROUNDS == 6) || (NROUNDS == 8));
#if(NROUNDS == 6)
  assert(RC5_STRUCTURES_NTEXTS == (1U << 14)); //6R
  assert(RC5_STRUCTURES_NBITS == 14); //6R
#endif // #if(NROUNDS == 8)
#if(NROUNDS == 8)
  assert(RC5_STRUCTURES_NTEXTS == (1U << 24)); //8R
  assert(RC5_STRUCTURES_NBITS == 24); //8R
#endif // #if(NROUNDS == 8)
#endif // #if(WORD_SIZE == 32)
#endif // #if (RC5_FILTER_SECOND_PASS == 0)


#if (RC5_FILTER_SECOND_PASS == 2) // post-filtering - after 1st and 2nd pass
  assert(0==1);
  printf("[%s:%d] RC5_FILTER_SECOND_PASS = %d\n", __FILE__, __LINE__, RC5_FILTER_SECOND_PASS);
  //assert(RC5_PAIRS_SORT_BY_CIPHERTEXT_DIFF_HW == 0);
  //  assert(RC5_PAIRS_SORT_BY_CIPHERTEXT_DIFF_HW == 1); // // sort the differences after (RC5_FILTER_SECOND_PASS == 2)
  assert(RC5_ORACLE_BK == 0);
  assert(RC5_ORACLE_KM == 0);
  assert(RC5_FILTER_USE_STRUCTURES == 0);
  //  assert(RC5_LOG_FILE_CONTAINS_RAND_LR == 1);
  //  assert(RC5_GOUP_EXPAND_ADD_DEPTH < ((RC5_FIB_LEN - 2) - 0));
  //  assert(RC5_FILTER_GOUP_ADD_EQUALS_XOR_APPROX == 0);
  assert(RC5_BIN_READ_INPUT_ARGS_FROM_STDIN == 0);
  assert(RC5_FILTER_GOUP_LIMIT_NVARIANTS == 1);
  assert(RC5_FILTER_GOUP_MAX_NVARIANTS == 1);
  assert(RC5_FILTER_CUT_HW1 == 0);
  assert(RC5_LOG_TO_FILE == 1);
#endif // #if (RC5_FILTER_SECOND_PASS == 2)

#if (RC5_FILTER_SECOND_PASS == 1) // SECOND pass
  printf("[%s:%d] RC5_FILTER_SECOND_PASS = %d\n", __FILE__, __LINE__, RC5_FILTER_SECOND_PASS);
  //  assert(RC5_BIN_READ_INPUT_ARGS_FROM_STDIN == 1);
  assert(NROUNDS == 8);
  //  assert(RC5_ORACLE_BK == 1);
  //  assert(RC5_ORACLE_KM == 0);
  assert(RC5_LOG_FILE_CONTAINS_RAND_LR == 0); // if BK oracle
  //  assert(RC5_BIN_READ_INPUT_ARGS_FROM_STDIN == 1);
  //  assert(RC5_LOG_FILE_CONTAINS_RAND_LR == 0);
  assert(RC5_FILTER_USE_STRUCTURES == 1); // struct + BK oracle
  //  assert(RC5_FILTER_USE_STRUCTURES == 0); // no struct + BK oracle
  //  assert(RC5_ORACLE_NTEXTS <= (1ULL << 27));
  assert(RC5_GOUP_EXPAND_ADD_DEPTH == ((RC5_FIB_LEN - 2) - 0));
  assert(RC5_FILTER_GOUP_ADD_EQUALS_XOR_APPROX == 1);
  assert(RC5_FILTER_GOUP_LIMIT_NVARIANTS == 0);
  assert(RC5_FILTER_CUT_HW1 == 1);
  assert(RC5_LOG_TO_FILE == 1);
#endif // #if (RC5_FILTER_SECOND_PASS == 2)

  srandom(time(NULL));

#if 1
  uint8_t key[RC5_KEY_NBYTES_B] = {0}; // master key

  //uint32_t struct_index

  /*
	* ./bin/rc5-tests filename 0x...16-byte-key rand_L rand_R
	*/
#if RC5_BIN_READ_INPUT_ARGS_FROM_STDIN // read input arguments from file

  /* Decode filename argument argv[1] */
#if RC5_FILTER_SECOND_PASS // READ input file to get filtered pairs (from 1st pass)

  if (argc != 5) {
	 std::cerr << " Usage: " << argv[0] << " Bad number of arguments: must be 3 (IN file, OUT file, rand, rand)." << std::endl;
	 exit(-1);
  }

  for(uint32_t i = 0; i < RC5_FILENAME_LEN; i++) { // zero filname array
	 g_rc5_filtered_pairs_filename[i] = 0; // global
  }
  sprintf(g_rc5_filtered_pairs_filename, "%s", argv[1]);
  printf("[%s:%d] INPUT filtered pairs filename from stdin: %s\n", __FILE__, __LINE__, g_rc5_filtered_pairs_filename);

  for(uint32_t i = 0; i < RC5_FILENAME_LEN; i++) { // zero filname array
	 g_filename[i] = 0; // global
  }
  sprintf(g_filename, "%s", argv[2]);
  printf("[%s:%d] OUTPUT filtered pairs filename from stdin: %s\n", __FILE__, __LINE__, g_filename);

  // get L random value from stdin
  WORD_T rand_L = 0;
  parse_rand_val_from_stdin(argv[3], &rand_L);

  // get R random value from stdin
  WORD_T rand_R = 0;
  parse_rand_val_from_stdin(argv[4], &rand_R);

  printf("[%s:%d] rand_L rand_R %llX %llX\n", __FILE__, __LINE__, (WORD_MAX_T)rand_L, (WORD_MAX_T)rand_R);
  // Dummy key and rand_l since the actual ones are extracted later from the file (or generated at random for more than 1 structure)
  for(uint32_t j = 0; j < RC5_KEY_NBYTES_B; j++) {
	 key[j] = 0;//xrandom() & 0xFF;
  }

#else // !RC5_FILTER_SECOND_PASS // WRITE input file to store filtered pairs
  assert(RC5_LOG_TO_FILE == 1); // store pairs
  if (argc != 5) {
	 std::cerr << " Usage: " << argv[0] << " Bad number of arguments: must be 4 (filename, key, rand, rand)." << std::endl;
	 exit(-1);
  }

  for(uint32_t i = 0; i < RC5_FILENAME_LEN; i++) { // zero filname array
	 g_filename[i] = 0; // global
  }
  sprintf(g_filename, "%s", argv[1]);
  printf("[%s:%d] Filename from stdin: %s\n", __FILE__, __LINE__, g_filename);

  /* Decode key argument */
  parse_key_from_stdin(argv[2], key);

  std::cout << "-- key is ";
  for (unsigned int i = 0; i < 16; ++i) {
	 //printf("%X ", key[i]);
	 std::cout << "0x" << FMT_HEX(2) << (unsigned int)key[i] << " ";
  }
  std::cout << std::endl;

  // get L random value from stdin
  WORD_T rand_L = 0;
  parse_rand_val_from_stdin(argv[3], &rand_L);

  // get R random value from stdin
  WORD_T rand_R = 0;
  parse_rand_val_from_stdin(argv[4], &rand_R);

  printf("[%s:%d] rand_L rand_R %llX %llX\n", __FILE__, __LINE__, (WORD_MAX_T)rand_L, (WORD_MAX_T)rand_R);
#if 1 // DEBUG print key as a full byte string
  printf("[%s:%d] key form stdin: A_L A_R = 0x", __FILE__, __LINE__);
  for(uint32_t j = 0; j < RC5_KEY_NBYTES_B; j++) {
	 printf("%02X", key[j]);
  }
  printf(" 0x%llX 0x%llX\n", (WORD_MAX_T)rand_L, (WORD_MAX_T)rand_R);
  printf("\n");
  //  printf("[%s:%d] A_L A_R 0x%X 0x%X\n", __FILE__, __LINE__, rand_L, rand_R);
#endif // #if 1 // DEBUG print key as a full byte string

#endif // #if RC5_FILTER_SECOND_PASS

#else // #if RC5_BIN_READ_INPUT_ARGS_FROM_STDIN 

  const WORD_T rand_L = xrandom() & MASK;
  const WORD_T rand_R = xrandom() & MASK;

  for(uint32_t j = 0; j < RC5_KEY_NBYTES_B; j++) {
#if RC5_FIXED_KEY
	 key[j] = g_key[j];
#else
	 key[j] = xrandom() & 0xFF;
#endif  // #if RC5_FIXED_KEY
  }

#endif // #if RC5_BIN_READ_INPUT_ARGS_FROM_STDIN // read input arguments from file

#endif // #if 0

  //  test_xdp_sub_fixed_x_approx_all();
  //  test_xdp_sub_fixed_x_approx();
  //  test_xdp_sub_approx_all();
  //  test_xdp_sub_approx();
  //  test_xdp_add_fixed_x_approx_all();
  //  test_xdp_add_fixed_x_approx();
  //  test_xdp_add_approx_all();
  //  test_xdp_add_approx();
  //  test_sub_bitwise();
  //  test_add_bitwise();
  //  test_add_sub_approx_any_order();
  //  test_add_sub_approx();
  //  test_add_block_approx();
  //  test_add_block_approx_all();

  //  test_rc5_xdp_add_mid_round_diff_set_out_all();
  //  test_rc5_xdp_add_mid_round_diff_set_out();
  //  test_rc5_xdp_add_mid_round_all();

  //  test_rc5_xdp_add_last_round_diff_set_out_all();
  //  test_rc5_xdp_add_last_round_diff_set_out();
  //  test_rc5_xdp_add_last_round();
  //  test_rc5_xdp_add_last_round_all();

  //  test_rc5_xdp_add_first_round_all();

  //  test_rc5_mid_round_matrices();
  //  test_rc5_mid_round_eq_xy_find_solutions_all();

  //  test_rc5_filtered_pairs_neutral_bits(); // <-
  //  test_rc5_filtered_pairs_slide_win_vote();
  //  test_rc5_last_round_matrices();
  //  test_rc5_last_round_eq_key();
  //  test_rc5_last_round_eq_x_find_solutions_exper()
  //  test_rc5_last_round_eq_x_find_solutions_rec_all();
  //  test_rc5_last_round_eq_x_find_solutions_rec();
  //  test_rc5_last_round_eq_x_bit_seq_match();
  //  test_rc5_last_round_eq_x_bit_seq_match_all();
  //  test_rc5_last_round_eq_x_bit_seq_match_rand();

  //  test_rc5_key_struct();

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32) || (WORD_SIZE == 64))
  //  test_rc5_lwcs(argc, argv);
  //  test_rc5();
  //  test_rc5_differential();
  //  test_rc5_good_pairs_collect(); // <-
  //  test_rc5_equal_rot_differential();
  //  test_rc5_fib_main();
  //  test_rc5_filter_go_up_debug_main();
  //  test_rc5_encrypt_get_intermediate_values();
  test_rc5_equal_rot_attack(key, rand_L, rand_R); // <-
  //  test_rc5_log_file_read();
  //  test_rc5_good_pairs_goup_filter_debug();
  //  test_rc5_equal_rot_attack_compute_average_prob_hw();
  //  test_rc5_filtered_pairs_read_from_file();
  //  test_rc5_last_round_rot_const_keyrec();
  //  test_rc5_compute_structures();
  //  test_rc5_filtered_pairs_process_from_file();
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32) ||  || (WORD_SIZE == 64))
#if 1
  printf("NROUNDS %d\n", NROUNDS);
  printf("RC5_NTEXTS 2^%4.2f\n", log2((double)RC5_NTEXTS));
  printf("RC5_ORACLE_NTEXTS 2^%4.2f\n", log2((double)RC5_ORACLE_NTEXTS));
  printf("RC5_FILTER_SECOND_PASS %d\n", RC5_FILTER_SECOND_PASS);
  //  printf("RC5_NSTRUCTURES %d\n", RC5_NSTRUCTURES);
  printf("RC5_STRUCTURES_NBITS %d\n", RC5_STRUCTURES_NBITS);
  printf("RC5_STRUCTURES_NTEXTS 2^%4.2f\n", log2((double)RC5_STRUCTURES_NTEXTS));
  printf("RC5_FILTER_ORACLE %d ", RC5_FILTER_ORACLE);
  printf("RC5_ORACLE_KM %d ", RC5_ORACLE_KM);
  printf("RC5_ORACLE_BK %d\n", RC5_ORACLE_BK);
  printf("RC5_FILTER_USE_STRUCTURES %d\n", RC5_FILTER_USE_STRUCTURES);
  printf("RC5_LOG_TO_FILE %d\n", RC5_LOG_TO_FILE);
  printf("RC5_FIXED_KEY %d\n", RC5_FIXED_KEY);
  printf("RC5_FILTER_CIPHERTEXT_HW_LIMIT %d\n", RC5_FILTER_CIPHERTEXT_HW_LIMIT);
  printf("RC5_CIPHERTEXT_HW_LIMIT_LEFT %d\n", RC5_CIPHERTEXT_HW_LIMIT_LEFT);
  printf("RC5_CIPHERTEXT_HW_LIMIT_RIGHT %d\n", RC5_CIPHERTEXT_HW_LIMIT_RIGHT);
  printf("RC5_FILTER_GOUP_ADD_EQUALS_XOR_APPROX %d\n", RC5_FILTER_GOUP_ADD_EQUALS_XOR_APPROX);
  printf("RC5_GOUP_EXPAND_ADD_DEPTH %d\n", RC5_GOUP_EXPAND_ADD_DEPTH);
  printf("GoUP NL: Rounds to go upper than the bottom two = %d\n", ((RC5_FIB_LEN - 2) - RC5_GOUP_EXPAND_ADD_DEPTH));
  //  printf("RC5_FILTER_GOUP_LIMIT_NVARIANTS %d\n", RC5_FILTER_GOUP_LIMIT_NVARIANTS);
  //  printf("RC5_FILTER_GOUP_MAX_NVARIANTS %d\n", RC5_FILTER_GOUP_MAX_NVARIANTS);
  printf("[%s:%d] RC5_FILTER_SECOND_PASS %d \n", __FILE__, __LINE__, RC5_FILTER_SECOND_PASS);
  printf("RC5_PAIRS_SORT_BY_CIPHERTEXT_DIFF_HW %d\n", RC5_PAIRS_SORT_BY_CIPHERTEXT_DIFF_HW);
#endif
  return 0;
}



/* --- */

/* 
NROUNDS 8
WORD_SIZE 64
RC5_FILTER_CUT_HW1  1
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 6550657 (2^22.643208)
#Filtered pairs all: 2 (2^1.000000)
#Good pairs among filtered: 2
#Good pairs among filtered f1: 70
#Good pairs total: 124
#GoUP sets of trails: 2 (2^1.000000)
RC5_P_THRES_ARRAY = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -2.32 [ 7] -2.32 [ 8] -2.32 [ 9] -2.32 [10] -11.00 [11] -13.00
        FIB_ARRAY = [ 0]  4 [ 1]  4 [ 2]  4 [ 3]  4 [ 4]  4 [ 5]  4 [ 6]  8 [ 7]  8 [ 8]  8 [ 9] 20 [10] 20 [11] 20

real    179m1.366s
user    179m5.240s
sys     0m0.012s

 */

/* --- */

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1))
#if(NROUNDS <= 8)
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1))
#endif // NROUNDS <= 8
#if(NROUNDS == 9)
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1))
#endif // NROUNDS == 9
#if(NROUNDS == 10)
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1))
#endif // #if(NROUNDS == 10)
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))


/* --- */

/*
 * Starting from some input difference, count the number of pairs
 * (x,x') that result in the same rotation amounts at all
 * rounds. These are "good pairs" as defined in [Biryukov,
 * Kushilevitz, Improved Differentials of RC5]
 *
 * For a given input chosen plaintext pair and fixed key store all
 * intermediate pairs and rotation constants during partial encryption
 * of \p nrounds with RC5.
 *
 * \param S the expanded key
 * \param nrounds Number of attacked rounds
 * \param good_pairs_vec store the list of good pairs
 * \param cptext_pairs_vec store the list of all chosen plaintexts
 *                         used in the experiment (needed in other
 *                         functions for DEBUG purpouses).
 *
 */
void rc5_equal_rot_differential(const WORD_T S[RC5_STAB_LEN_T],
										  const uint32_t nrounds,
										  const WORD_T dx[2])
{
  uint64_t cnt = 0;
  uint32_t rot_mask = (WORD_SIZE - 1);

  WORD_T plaintext_first[2] = {0, 0};
  WORD_T plaintext_second[2] = {0, 0};

  uint64_t ntexts = 0;
  while(ntexts < RC5_NTEXTS) {

	 plaintext_first[0] = xrandom() & MASK;
	 plaintext_first[1] = xrandom() & MASK;

	 plaintext_second[0] = plaintext_first[0] ^ dx[0];
	 plaintext_second[1] = plaintext_first[1] ^ dx[1];

	 bool b_is_rot_equal = true;

	 WORD_T A1 = plaintext_first[0] + S[0];
	 WORD_T B1 = plaintext_first[1] + S[1];
	 WORD_T A2 = plaintext_second[0] + S[0];
	 WORD_T B2 = plaintext_second[1] + S[1];

	 for(WORD_T i = 1; i <= nrounds; i++) { 

		bool b_is_sat = true;

		b_is_sat = ((B1 & rot_mask) == (B2 & rot_mask));

		if(!b_is_sat) {
		  assert(b_is_rot_equal == true);
		  b_is_rot_equal = false;
		  break;
		}
		A1 = RC5_ROTL(A1^B1, B1) + S[2*i]; 
		A2 = RC5_ROTL(A2^B2, B2) + S[2*i]; 

		b_is_sat = ((A1 & rot_mask) == (A2 & rot_mask));

		if(!b_is_sat) {
		  assert(b_is_rot_equal == true);
		  b_is_rot_equal = false;
		  break;
		}
		B1 = RC5_ROTL(B1^A1, A1) + S[2*i+1]; 
		B2 = RC5_ROTL(B2^A2, A2) + S[2*i+1]; 

	 }

	 if(!b_is_rot_equal) {
		ntexts++;
		continue;
	 }
	 cnt++;

	 ntexts++;
  }	// ntexts

  uint64_t nall = ntexts;
  double p = (double)cnt / (double)nall;
  printf("[%s:%d] %2dR p(%llX %llX -> *) = %f 2^%f | %d 2^%f\n", __FILE__, __LINE__, nrounds, 
			(WORD_MAX_T)dx[0],  (WORD_MAX_T)dx[1], p, log2(p), (uint32_t)cnt, log2(nall));
}


/* --- */

/* 

key[16] = {0xC3, 0xA6, 0x5B, 0xFD, 0xBA, 0x5A, 0x70, 0x8F, 0xAA, 0x9, 0x2A, 0x5E, 0x29, 0x3C, 0xA3, 0xCD};
expanded_key[26] = {0xC90E2C7CF22852B3, 0x3D61E414D5939169, 0x6177A1E3AD50616A, 0x5E24E10C0C907ACB, 0x415DD3EE9B8A170B, 0xCAB5D0ADD3EC5315, 0x27D06007DE52D388, 0x761DBB359B21D7E8, 0x8429D231A8451DD1, 0x913530AA90D85AF3, 0xF94DCBEC958A6109, 0x3317C03389E49F7B, 0xAF62CB4F1ED43F55, 0x6A950738CF1FF1EA, 0x4B4DD49E7AFE0573, 0x8B0C936989AD4664, 0x2106DAD64FD44065, 0x352891D73D57861C, 0xF0D68E04DA8046DE, 0xEEA176E11B0653E2, 0x7F0A358062C900E8, 0x2A13629500051822, 0xDD23DF57216FC8EB, 0xE20B45D9AE94BDBD, 0x7FE63C5958022543, 0xD762467C50074356};
[./src/rc5-dc.cc:2951]  8R p(8000000000000000 8000000000000000 -> *) = 0.000001 2^-20.192645 | 7 2^23.000000

 */

/* --- */

#if 1
  const WORD_T rand_L = xrandom() & MASK;
  const WORD_T rand_R = xrandom() & MASK;
#else
  //  uint32_t weight = 4;
  //  const WORD_T rand_L = 0;//gen_sparse(weight, WORD_SIZE);
  //  const WORD_T rand_R = xrandom() & MASK;
  //  const WORD_T rand_R = gen_sparse(weight, WORD_SIZE);
  const WORD_T rand_L = xrandom() & MASK;
  const WORD_T rand_R = (xrandom() & MASK) & ~(0x1F);
#endif // #if 0

/* --- */

#if(NROUNDS <= 8)
#define RC5_NTEXTS (1ULL << 23)// number of chosen plaintexts
#endif // #if(NROUNDS <= 8)
#if(NROUNDS == 9)
#define RC5_NTEXTS (1ULL << 25)//(1ULL << 27)// number of chosen plaintexts
#endif // #if(NROUNDS == 9)
#if(NROUNDS >= 10)
#define RC5_NTEXTS (1ULL << 23)//(1ULL << 32)//dummy
#endif // #if(NROUNDS >= 10)


/* --- */
  // rc5_equal_rot_attack_first_pass_structures : first pass structures 
  // rc5_equal_rot_attack_second_pass : no struct, no oracles
  // rc5_equal_rot_attack_second_pass_structures : structures, no oracle
  // rc5_equal_rot_attack_second_pass_structures_oracle_bk
  // rc5_equal_rot_attack_second_pass_structures_oracle_km
  // rc5_equal_rot_attack_post_process



/* --- */
/*
 * Generate a random 32-bit value.
 */
#if(WORD_SIZE <= 32)
uint32_t random32()
{
  return(random() ^ (random() << 16));
}
#else // #if(WORD_SIZE <= 32)
uint64_t random32()
{
  uint32_t rand_L = (random() ^ (random() << 16));
  uint32_t rand_R = (random() ^ (random() << 16));
  uint64_t rand64 = ((uint64_t)rand_L << 32) | (rand_R);
  return rand64;
}
#endif // #if(WORD_SIZE <= 32)


/* --- */

#if 0 // DEBUG
			 if(
				 //(depth == 9) && (s == 12) && (dx_vec[i] == 0x38000C00) && (ds_array_new.D[depth] == 0x30000)
				 ((depth == 5) && (s == 19) && (ds_array->D[depth] == 0xC0000000))
				 ) {
				printf("[%s:%d] depth %2d hw %2d pth 2^%4.2f s %2d v[%3d / %3d] %8X %8X => ", __FILE__, __LINE__, 
						 depth,  hw_thres, log2(p_thres), s, i, (uint32_t)dx_vec.size(), dx_vec[i], RC5_ROTR(dx, s));
				printf("Add D[%2d] s[%2d] %8X %2d | D[%2d] %8X\n", depth - 1, depth, 
						 ds_array_new.D[depth - 1], ds_array_new.S[depth], depth, ds_array_new.D[depth]);
				printf("[%s:%d] Go up to depth %2d\n", __FILE__, __LINE__, (depth - 1));
			 }
#endif // #if 1 // DEBUG


/* --- */
#if 0 // DEBUG
		  if(depth == 1) {
			 printf("[%s:%d] p_thres 2^%f CHECKPOINT 2 D %8X vec_size %d s = %d\n", __FILE__, __LINE__, 
					  log2(p_thres), ds_array->D[depth], (uint32_t)dx_vec.size(), s);
		  }

		  if((depth == 1) && (ds_array->D[depth] == 0x0)) {
//		  if((depth == 1) && (s == 16) && (ds_array->D[depth] == 0x0)) {
			 printf("[%s:%d] vec size %d\n", __FILE__, __LINE__, (uint32_t)dx_vec.size());
			 //			 assert(0 == 1);
		  }

		  for(uint32_t v = 0; v < (uint32_t)dx_vec.size(); v++) {
			 if(
				 //				 ((depth == 9) && (s == 12) && (dx_vec[v] == 0x38000C00)) ||
				 //				 ((depth == 8) && (s == 18) && (ds_array->D[depth] == 0xC0008000)) ||
				 //				 ((depth == 7) && (s == 0) && (ds_array->D[depth] == 0x40008000)) ||
				 //				 ((depth == 6) && (s == 17) && (ds_array->D[depth] == 0x8000))
				 //				 ((depth == 5) && (s == 19) && (ds_array->D[depth] == 0xC0000000))
				 //				 ((depth == 4) && (s == 16) && (ds_array->D[depth] == 0x0))
				 //				 ((depth == 3) && (s == 0) && (ds_array->D[depth] == 0x1800))
				 //				 ((depth == 2) && (s == 13) && (ds_array->D[depth] == 0x800))
				 ((depth == 1) && (s == 16) && (ds_array->D[depth] == 0x0))
				 ) {

				printf("[%s:%d] depth %2d hw %2d pth 2^%4.2f s %2d v[%3d / %3d] %8X\n", __FILE__, __LINE__, 
						 depth,  hw_thres, log2(p_thres), s, v, (uint32_t)dx_vec.size(), dx_vec[v]);
			 }
		  }
#endif // #if 1 // DEBUG


/* --- */

void rc5_equal_rot_attack_averagre_stats_print(double min_prob_arr[(2*NROUNDS) + 3],
															  double sum_prob_arr[(2*NROUNDS) + 3],
															  double sum_hw_arr[(2*NROUNDS) + 3],
															  uint32_t max_hw_arr[(2*NROUNDS) + 3])
{
  uint32_t arr_len = (2*NROUNDS) + 3;
  printf("\n");
  printf("[%s:%d]   average hw_arr = ", __FILE__, __LINE__);
  for(uint32_t i = 0; i < ((2*NROUNDS) + 3); i++) {
	 if(cnt_good_all > 0) {
		sum_hw_arr[i] = (double)sum_hw_arr[i] / (double)cnt_good_all;
	 }
	 printf("%4.2f ", sum_hw_arr[i]);
  }
  printf("\n");
  printf("[%s:%d] average prob_arr = ", __FILE__, __LINE__);
  for(uint32_t i = 0; i < arr_len; i++) {
	 sum_prob_arr[i] /= (double)cnt_good_all;
	 printf("%4.2f ", log2(sum_prob_arr[i]));
  }
  printf("\n");
  printf("[%s:%d]        FIB_ARRAY = ", __FILE__, __LINE__);
  for(uint32_t i = 0; i < RC5_FIB_LEN; i++) {
	 printf("%2d ", fib_array[i]);
  }
  printf("\n");
  printf("[%s:%d]       max hw_arr = ", __FILE__, __LINE__);
  for(uint32_t i = 0; i < arr_len; i++) {
	 if((((2*NROUNDS) + 3) - i) <= RC5_FIB_LEN) {
		uint32_t j = (RC5_FIB_LEN - (((2*NROUNDS) + 3) - i));
		if(max_hw_arr[i] > fib_array[j]) {
		  printf("{%2d} ", max_hw_arr[i]);
		} else {
		  printf("%2d ", max_hw_arr[i]);
		}
	 }
	 //	 printf("\n");
  }
  printf("\n");
  printf("[%s:%d]     min prob_arr = ", __FILE__, __LINE__);
  for(uint32_t i = 0; i < arr_len; i++) {
	 printf("%4.2f ", log2(min_prob_arr[i]));
  }
  printf("\n");
}




/* --- */

#if 0
#if RC5_BIN_READ_INPUT_ARGS_FROM_STDIN
	 WORD A_left_tmp = 0;
	 WORD A_right_tmp = 0;
	 if((rand_L == 0) && (rand_R == 0)) { // if 0 from input - get from the pair
		assert(RC5_LOG_FILE_CONTAINS_RAND_LR == 1);
		A_left_tmp = cp_pair_j.rand[left];
		A_right_tmp = cp_pair_j.rand[right];
		printf("[%s:%d] rand_L rand_R from stdin: %X %X\n", __FILE__, __LINE__, rand_L, rand_R);
		printf("[%s:%d] A_left A_right from pair: %8X %8X\n", __FILE__, __LINE__, A_left_tmp, A_right_tmp);
	 } else { // get from the input
		A_left_tmp = rand_L;
		A_right_tmp = rand_R;
		printf("[%s:%d] A_left A_right from stdin: %8X %8X\n", __FILE__, __LINE__, A_left_tmp, A_right_tmp);
	 }
	 const WORD A_left = A_left_tmp;
	 const WORD A_right = A_right_tmp;;
#else // !RC5_BIN_READ_INPUT_ARGS_FROM_STDIN
#if RC5_LOG_FILE_CONTAINS_RAND_LR // log file contains rand_L, rand_R values for the structure
	 const WORD A_left = cp_pair_j.rand[left];
	 const WORD A_right = cp_pair_j.rand[right];
	 printf("[%s:%d] A_left A_right from pair: %8X %8X\n", __FILE__, __LINE__, A_left, A_right);
#else // !RC5_LOG_FILE_CONTAINS_RAND_LR : generate random values
	 const WORD A_left = random32() & MASK;
	 const WORD A_right = random32() & MASK;
	 printf("[%s:%d] A_left A_right random: %8X %8X\n", __FILE__, __LINE__, A_left, A_right);
#endif // #if RC5_LOG_FILE_CONTAINS_RAND_LR
#endif //#if RC5_BIN_READ_INPUT_ARGS_FROM_STDIN
#endif // #if 0


/* --- */

#if 0
		  if(b_is_already_stored) {
			 printf("[%s:%d] DUPL GOOD SKIP: Duplicate pairs detected in good_pairs_vec: %d %d . Skipping filtration.\n", 
					  __FILE__, __LINE__, b_is_already_stored, b_is_already_stored_swapped);
			 continue;
			 printf("[%s:%d] DUPL GOOD SKIP: Duplicate pairs detected in good_pairs_vec: %d %d . Skipping filtration.\n", 
					  __FILE__, __LINE__, b_is_already_stored, b_is_already_stored_swapped);
			 continue;
		  }
		  if(b_is_already_stored_swapped) {
			 printf("[%s:%d] SWAP GOOD SKIP: Swapped pairs detected in good_pairs_vec: %d %d . Skipping filtration.\n", 
					  __FILE__, __LINE__, b_is_already_stored, b_is_already_stored_swapped);
			 continue;
		  }
#endif


// ---

void rc5_equal_rot_attack_first_pass(const WORD S[RC5_STAB_LEN_T],
												 const uint32_t nrounds,
												 const WORD dx[2], // input difference
												 const gsl_matrix* A_last[2][2][2], // last round
												 const gsl_vector* L_last,
												 const gsl_vector* C_last,
												 const gsl_matrix* A_mid[2][2], // middle round
												 const gsl_vector* L_mid,
												 const gsl_vector* C_mid,
												 std::vector<std::pair<WORD, WORD>> pt_struct_vec, // structures of plaintexts
												 std::vector<std::vector<rc5_goup_diffs_t>>* goup_diff_vec_2d,
												 std::vector<pair_t>* good_pairs_vec,
												 const WORD rand_L, 
												 const WORD rand_R) // for debug

// ---
  ret_ext = rc5_filter_go_up_nl(
										  (const gsl_matrix*(*)[2][2])A_last, L_last, C_last,
										  (const gsl_matrix*(*)[2])A_mid, L_mid, C_mid, 
										  cp_pair, fib_array, p_thres_array, 
										  logp2hw_arr, logp2hw_arr_rows, logp2hw_arr_cols,
										  &goup_diff_vec);

/* --- */


WORD g_good_pairs[2][2][2] = {
{{0x55FA1B30, 0x6E8BBABE}, {0x54FA1B30, 0x6F8BBABE}} {{0x9C1210D7, 0x70B19163}, {0x9C3210D7, 0x70B1A283}} ,
{{0xACCB4CB0, 0x97BAED3E}, {0x8CCB4CB0, 0xB7BAED3E}} {{0x57A6FD78, 0x6AE61961}, {0x8A90FC18, 0x8A5A5DD8}} };
const uint32_t g_key[16] = {0x6, 0x81, 0x29, 0x8C, 0x52, 0x91, 0x13, 0x7E, 0xE7, 0xE7, 0x48, 0x14, 0xD3, 0x4A, 0


// ---
  printf("%s.0x%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X", RC5_FILTERED_PAIRS_FILE, 
			 key[ 0], key[ 1], key[ 2], key[ 3], 
			 key[ 4], key[ 5], key[ 6], key[ 7], 
			 key[ 8], key[ 9], key[10], key[11], 
			 key[12], key[13], key[14], key[15]);
  assert(0 == 1);


// ---
      if(depth == 1) {
		    printf("[%s:%d] depth = %d\n", __FILE__, __LINE__, depth);
			 printf("[%s:%d] ((depth = %d < (RC5_FIB_LEN - 2) = %d) && (depth = %d >= RC5_GOUP_EXPAND_ADD_DEPTH = %d))\n", 
					  __FILE__, __LINE__, depth, (RC5_FIB_LEN - 2), depth, RC5_GOUP_EXPAND_ADD_DEPTH);
	   }

// ---
		if((depth == 8) && (ds_array->D[depth] == 0xC0008000)) {
		  printf("[%s:%d] depth = %d s %2d\n", __FILE__, __LINE__, depth, s);
		}

// ---

/*
rc5-log.txt

2 A5 1C C E 12 8D CA 19 C AA 35 99 98 F0 64 
F3F99060 B0B899C1 E3F99060 A0B899C1 B3E15A22 A06E76A2 6EE17022 946E2EA0 1 b8107ca0 fb517501
*/

// ---

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
#if 0 // General Case
// determined as average of MINs over 32 keys 
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
#if (NROUNDS >= 9) 
  ((double)1.0 / (double)((1U << 5) + 1)), // <---
  ((double)1.0 / (double)((1U << 8) + 1))
#else // original
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1))
#endif // #if (NROUNDS == 6) 
};
#else // Special case
// determined as average of MINs over 32 keys 
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
#if(NROUNDS <= 8)
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1))
#endif // NROUNDS <= 8
#if(NROUNDS == 9)
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1))
#endif // NROUNDS == 9
#if(NROUNDS == 10)
  //  ((double)1.0 / (double)((1U <<  5) + 1)),
  //  ((double)1.0 / (double)((1U <<  5) + 1))
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1))
  //  ((double)1.0 / (double)((1U <<  10) + 1)),
  //  ((double)1.0 / (double)((1U <<  10) + 1))
#endif // #if(NROUNDS == 10)
};
#endif // #if 0 // General Case
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))


// ---

#if RC5_FILTER_GOUP_LIMIT_NVARIANTS // Limit the number of stored variants for lower memory (for DEBUG)
		b_found = (goup_variants_hash_map->find(*ds_array) == goup_variants_hash_map->end());
#else
		rc5_goup_diffs_hash goup_variants_hash_function;
		uint32_t hash_val = goup_variants_hash_function(*ds_array);
		std::pair<rc5_goup_diffs_t, uint32_t> new_pair (*ds_array, hash_val);
		uint32_t old_size = goup_variants_hash_map->size();
		goup_variants_hash_map->insert(new_pair);
		uint32_t new_size = goup_variants_hash_map->size();
		b_found = (new_size == old_size);
#endif // #if RC5_FILTER_GOUP_LIMIT_NVARIANTS

// ---
#if RC5_FILTER_GOUP_LIMIT_NVARIANTS // Limit the number of stored variants for lower memory (for DEBUG)
		if(goup_variants_hash_map->size() < RC5_FILTER_GOUP_MAX_NVARIANTS) {
		  goup_variants_hash_map->insert(new_pair);
		}
#else // !RC5_FILTER_GOUP_LIMIT_NVARIANTS
		goup_variants_hash_map->insert(new_pair);
#endif // #if RC5_FILTER_GOUP_LIMIT_NVARIANTS


// ---
#if RC5_FILTER_GOUP_LIMIT_VARIANTS
			 uint32_t limit = RC5_FILTER_GOUP_LIMIT;
			 if(goup_variants_hash_map->size() > limit) {
				return true;
			 }
#endif // #if RC5_FILTER_GOUP_LIMIT_VARIANTS

// ---
#if RC5_FILTER_SECOND_PASS == 2
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  1) + 0)),
  ((double)1.0 / (double)((1U <<  1) + 0)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  13) + 1)),
  ((double)1.0 / (double)((1U <<  12) + 1)),
  ((double)1.0 / (double)((1U <<  11) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1))
};
#endif // #if RC5_FILTER_SECOND_PASS == 2


/* --- */
/* 
[./src/rc5-dc.cc:5038] Unique variants = 2 (2^1.000000)
RC5_P_THRES_ARRAY = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -2.32 [ 7] -4.09 [ 8] -5.04 [ 9] -5.04 [10] -5.04 [11] -5.04 
        FIB_ARRAY = [ 0]  2 [ 1]  2 [ 2]  2 [ 3]  2 [ 4]  2 [ 5]  2 [ 6]  2 [ 7]  2 [ 8]  2 [ 9]  5 [10]  5 [11]  5 
        FIB_ARRAY = [ 0]  2 [ 1]  2 [ 2]  2 [ 3]  2 [ 4]  2 [ 5]  2 [ 6]  2 [ 7]  2 [ 8]  2 [ 9]  5 [10]  5 [11]  5 
[./tests/rc5-tests.cc:830] #GoUP sets of trails: 8 (2^3.000000)
WORD g_good_pairs[16][2][2] = {
{{0x94616D3A, 0x44ABEC77}, {0x9461653A, 0x44ABE477}} {{0xC57E7351, 0x2CEA01EB}, {0xC63D7351, 0x6BEA036D}} ,
{{0x6837B25A, 0xB8FD3317}, {0x6817B25A, 0xB8DD3317}} {{0x63A8AEBF, 0xD4E31119}, {0x675AA67F, 0xF1D13AB9}} ,
{{0x9461653A, 0x44ABE477}, {0x94616D3A, 0x44ABEC77}} {{0xC63D7351, 0x6BEA036D}, {0xC57E7351, 0x2CEA01EB}} ,
{{0x6817B25A, 0xB8DD3317}, {0x6837B25A, 0xB8FD3317}} {{0x675AA67F, 0xF1D13AB9}, {0x63A8AEBF, 0xD4E31119}} ,
{{0x9461653A, 0x44ABE477}, {0x94616D3A, 0x44ABEC77}} {{0xC63D7351, 0x6BEA036D}, {0xC57E7351, 0x2CEA01EB}} ,
{{0x6817B25A, 0xB8DD3317}, {0x6837B25A, 0xB8FD3317}} {{0x675AA67F, 0xF1D13AB9}, {0x63A8AEBF, 0xD4E31119}} ,
{{0x9461653A, 0x44ABE477}, {0x94616D3A, 0x44ABEC77}} {{0xC63D7351, 0x6BEA036D}, {0xC57E7351, 0x2CEA01EB}} ,
{{0x6837B25A, 0xB8FD3317}, {0x6817B25A, 0xB8DD3317}} {{0x63A8AEBF, 0xD4E31119}, {0x675AA67F, 0xF1D13AB9}} ,
{{0x9461653A, 0x44ABE477}, {0x94616D3A, 0x44ABEC77}} {{0xC63D7351, 0x6BEA036D}, {0xC57E7351, 0x2CEA01EB}} ,
{{0x6837B25A, 0xB8FD3317}, {0x6817B25A, 0xB8DD3317}} {{0x63A8AEBF, 0xD4E31119}, {0x675AA67F, 0xF1D13AB9}} ,
{{0x94616D3A, 0x44ABEC77}, {0x9461653A, 0x44ABE477}} {{0xC57E7351, 0x2CEA01EB}, {0xC63D7351, 0x6BEA036D}} ,
{{0x6817B25A, 0xB8DD3317}, {0x6837B25A, 0xB8FD3317}} {{0x675AA67F, 0xF1D13AB9}, {0x63A8AEBF, 0xD4E31119}} ,
{{0x9461653A, 0x44ABE477}, {0x94616D3A, 0x44ABEC77}} {{0xC63D7351, 0x6BEA036D}, {0xC57E7351, 0x2CEA01EB}} ,
{{0x6817B25A, 0xB8DD3317}, {0x6837B25A, 0xB8FD3317}} {{0x675AA67F, 0xF1D13AB9}, {0x63A8AEBF, 0xD4E31119}} ,
{{0x94616D3A, 0x44ABEC77}, {0x9461653A, 0x44ABE477}} {{0xC57E7351, 0x2CEA01EB}, {0xC63D7351, 0x6BEA036D}} ,
{{0x6817B25A, 0xB8DD3317}, {0x6837B25A, 0xB8FD3317}} {{0x675AA67F, 0xF1D13AB9}, {0x63A8AEBF, 0xD4E31119}} };
const uint32_t g_key[16] = {0x11, 0x52, 0xE6, 0x3C, 0x5E, 0x42, 0x28, 0x79, 0xA8, 0xCA, 0xBD, 0xFC, 0x2F, 0xAB, 0x2, 0x4C};
[./tests/rc5-tests.cc:924] key A_L A_R = 0x1152E63C5E422879A8CABDFC2FAB24C 0x3EA38EFA 0xEE690FB7


 */

// ---

/**
 * Check if a pair is present in a vector of pairs
 */
bool rc5_pair_is_in_vec(const std::vector<pair_t> good_pairs_vec, const pair_t cp_pair)
{

}



// ---

#if RC5_ORACLE_BK

	 assert(pairs_pool_vec->size() == 1); // if input from stdin => we expect a file with a single pair
	 WORD A_right_temp = random32() & MASK;

#if 0 // WARNING!!!
	 if(b_is_first_processed == false) {
		A_right_temp = cp_pair_j.rand[right]; // get from the file
		printf("[%s:%d] RC5_ORACLE_BK b_is_first_processed = %d => original A_right = %8X\n", __FILE__, __LINE__, b_is_first_processed, A_right_temp);
	 } else {
		printf("[%s:%d] RC5_ORACLE_BK b_is_first_processed = %d =>   random A_right = %8X\n", __FILE__, __LINE__, b_is_first_processed, A_right_temp);
	 }
#endif 
	 const WORD A_right = A_right_temp;

#else // !RC5_ORACLE_BK
#if RC5_LOG_FILE_CONTAINS_RAND_LR
	 const WORD A_left = cp_pair_j.rand[left];
	 const WORD A_right = cp_pair_j.rand[right];
	 printf("[%s:%d] Rand_LR from pair: %8X %8X\n", __FILE__, __LINE__, A_left, A_right);
#else
	 const WORD A_left = rand_L;
	 const WORD A_right = rand_R;
#endif // #if RC5_LOG_FILE_CONTAINS_RAND_LR
#endif // #if RC5_ORACLE_BK

// ---

#if 0 // DEBUG
	 printf("[%s:%d] WARNING!!! Readjust the start index for DEBUG: %d -> %d\n", __FILE__, __LINE__, lsb_start_idx, bit_idx_iter);
	 lsb_start_idx = bit_idx_iter;
#endif // DEBUG


// ---

#if RC5_BIN_READ_INPUT_ARGS_FROM_STDIN // read input arguments from file

  /* Decode filename argument argv[1] */
#if RC5_FILTER_SECOND_PASS // READ input file to get filtered pairs (from 1st pass)
  assert(RC5_ORACLE_BK == 1);
  assert(RC5_FILTER_USE_STRUCTURES == 1);
  assert(RC5_LOG_TO_FILE == 0); // no storing of pairs, just reading

  if (argc != 4) {
	 std::cerr << " Usage: " << argv[0] << " Bad number of arguments: must be 4 (filename, rand, rand)." << std::endl;
	 exit(-1);
  }

  for(uint32_t i = 0; i < RC5_FILENAME_LEN; i++) { // zero filname array
	 g_rc5_filtered_pairs_filename[i] = 0; // global
  }
  sprintf(g_rc5_filtered_pairs_filename, "%s", argv[1]);
  printf("[%s:%d] Filtered pairs filename from stdin: %s\n", __FILE__, __LINE__, g_rc5_filtered_pairs_filename);

  // get L random value from stdin
  WORD rand_L_temp = 0;
  parse_rand_val_from_stdin(argv[2], &rand_L_temp);
  // get R random value from stdin
  WORD rand_R_temp = 0;
  parse_rand_val_from_stdin(argv[3], &rand_R_temp);

  const WORD rand_L = rand_L_temp;
  const WORD rand_R = rand_R_temp;

  printf("[%s:%d] rand_L rand_R %8X %8X\n", __FILE__, __LINE__, rand_L, rand_R);

  // Dummy key since the actual ones are extracted later from the file
  for(uint32_t j = 0; j < RC5_KEY_NBYTES_B; j++) {
	 key[j] = 0;//random32() & 0xFF;
  }

#else // !RC5_FILTER_SECOND_PASS // WRITE input file to store filtered pairs
  if (argc != 5) {
	 std::cerr << " Usage: " << argv[0] << " Bad number of arguments: must be 4 (filename, key, rand, rand)." << std::endl;
	 exit(-1);
  }

  for(uint32_t i = 0; i < RC5_FILENAME_LEN; i++) { // zero filname array
	 g_filename[i] = 0; // global
  }
  sprintf(g_filename, "%s", argv[1]);
  printf("[%s:%d] Filename from stdin: %s\n", __FILE__, __LINE__, g_filename);

  /* Decode key argument */
  parse_key_from_stdin(argv[2], key);

  std::cout << "-- key is ";
  for (unsigned int i = 0; i < 16; ++i) {
	 //printf("%X ", key[i]);
	 std::cout << "0x" << FMT_HEX(2) << (unsigned int)key[i] << " ";
  }
  std::cout << std::endl;

  // get L random value from stdin
  WORD rand_L = 0;
  parse_rand_val_from_stdin(argv[3], &rand_L);

  // get R random value from stdin
  WORD rand_R = 0;
  parse_rand_val_from_stdin(argv[4], &rand_R);

  printf("[%s:%d] rand_L rand_R %8X %8X\n", __FILE__, __LINE__, rand_L, rand_R);
#if 1 // DEBUG print key as a full byte string
  printf("[%s:%d] key form stdin: A_L A_R = 0x", __FILE__, __LINE__);
  for(uint32_t j = 0; j < RC5_KEY_NBYTES_B; j++) {
	 printf("%X", key[j]);
  }
  printf(" 0x%X 0x%X\n", rand_L, rand_R);
  printf("\n");
  //  printf("[%s:%d] A_L A_R 0x%X 0x%X\n", __FILE__, __LINE__, rand_L, rand_R);
#endif // #if 1 // DEBUG print key as a full byte string

#endif // #if RC5_FILTER_SECOND_PASS

#else // #if RC5_BIN_READ_INPUT_ARGS_FROM_STDIN 

#if 1
  const WORD rand_L = random32() & MASK;
  const WORD rand_R = random32() & MASK;
#else
  //  uint32_t weight = 4;
  //  const WORD rand_L = 0;//gen_sparse(weight, WORD_SIZE);
  //  const WORD rand_R = random32() & MASK;
  //  const WORD rand_R = gen_sparse(weight, WORD_SIZE);
  const WORD rand_L = random32() & MASK;
  const WORD rand_R = (random32() & MASK) & ~(0x1F);
#endif // #if 0

  for(uint32_t j = 0; j < RC5_KEY_NBYTES_B; j++) {
#if RC5_FIXED_KEY
	 key[j] = g_key[j];
#else
	 key[j] = random32() & 0xFF;
#endif  // #if RC5_FIXED_KEY

// ---

#if 0 // General Case
// determined as average of MINs over 32 keys 
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  //#if (NROUNDS >= 9) 
  //  ((double)1.0 / (double)((1U << 5) + 1)), // <---
  //  ((double)1.0 / (double)((1U << 8) + 1))
  //#else // original
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1))
  //#endif // #if (NROUNDS == 6) 
};
#else // Special case
// determined as average of MINs over 32 keys 
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  // ---
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)), // <---
  ((double)1.0 / (double)((1U <<  5) + 1))
};
#endif // #if 0 // General Case

// ---

// Special case tailored to a specific pair (for DEBUG)
// {{0x73B11631, 0x160BBB00}, {0xF3B11631, 0x960BBB00}},
#if 0//((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<   0) + 0)),
  ((double)1.0 / (double)((1U <<   0) + 0)),
  ((double)1.0 / (double)((1U <<   0) + 1)),
  ((double)1.0 / (double)((1U <<   0) + 1)),
  ((double)1.0 / (double)((1U <<   2) + 1)),
  ((double)1.0 / (double)((1U <<   3) + 1)),
  ((double)1.0 / (double)((1U <<   0) + 1)),
  ((double)1.0 / (double)((1U <<   2) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   3) + 1)),
  ((double)1.0 / (double)((1U <<   4) + 1)),
  ((double)1.0 / (double)((1U <<   5) + 1)),
  ((double)1.0 / (double)((1U <<   5) + 1)),
  ((double)1.0 / (double)((1U <<   4) + 1)),
  ((double)1.0 / (double)((1U <<   4) + 1)),
  ((double)1.0 / (double)((1U <<   9) + 1)),
  ((double)1.0 / (double)((1U <<   8) + 1)),
  ((double)1.0 / (double)((1U <<   9) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))


// ---

// TEST
#if 0//((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  12) + 1)),
  ((double)1.0 / (double)((1U <<  11) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))

// determined as average of MINs over 32 keys 
#if 0//((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  13) + 1)),
  ((double)1.0 / (double)((1U <<  12) + 1)),
  ((double)1.0 / (double)((1U <<  11) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))




// ---

	 //	 for(uint32_t r_i = 0; r_i < (1U << RC5_LOG2W); r_i++) 
     { // left LSB
	 const WORD A_left = rand_L;//random32() & MASK;
		 //		const WORD A_left = (rand_L & ~(0x1F)) | r_i;
	    for(uint32_t r_j = 0; r_j < (1U << RC5_LOG2W); r_j++) { // right LSB
		  const WORD A_right = (rand_R & ~(0x1F)) | r_j;



// ---

  // {------------

  assert(RC5_STRUCTURES_NBITS > (RC5_LOG2W + RC5_LOG2W));
  uint32_t structures_nbits = RC5_STRUCTURES_NBITS - (RC5_LOG2W + RC5_LOG2W);

  // ------------}


// ---

#if RC5_STRUCTURE_USE_EQUAL_LR_RAND
  assert(0 == 1);
  const WORD rand_R = rand_L;
#else 
  const WORD rand_R = random32() & MASK;
#endif // #if RC5_STRUCTURE_USE_EQUAL_LR_RAND

/* --- */

  //  uint32_t i = 0;
  //  std::vector<uint32_t>::iterator X_first_iter = X_first.begin();
  //  std::vector<uint32_t>::iterator X_second_iter = X_second.begin();
  //  for(X_first_iter = X_first.begin(); X_first_iter != X_first.end(); X_first_iter++, X_second_iter++,i++) {
	 //	 uint32_t x = *X_first_iter;
	 //	 uint32_t xx = *X_second_iter;

// ---

/*
 * Read a key pre-stored in a file
 *
 * \see rc5_filtered_pairs_read_from_file
 */ 
void rc5_key_read_from_file(const char* filename, 
									 WORD k[16], 
									 std::vector<pair_t>* pair_vec, 
									 std::vector<bool>* b_pair_is_good_vec)
{
  assert(RC5_KEY_READ_FROM_FILE == 1);
  FILE* fp = fopen(RC5_KEY_FILE, "r");
  //FILE* fp = fopen(filename, "r");
  if(!fp) {
	 printf("[%s:%d] File %s does not exist. Exiting...\n", __FILE__, __LINE__, RC5_LOG_FILE);
	 exit(1);
	 //	 return;
  }

  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;

  char* line = NULL;
  size_t len = 0;
  ssize_t read;
  uint32_t nline = 0;
  //  WORD k[16] = {0};

  // extract the key
  read = getline(&line, &len, fp);
  sscanf(line, "%X %X %X %X %X %X %X %X %X %X %X %X %X %X %X %X\n", 
			&k[0], &k[1], &k[2], &k[3], &k[4], &k[5], &k[6], &k[7],  
			&k[8], &k[9], &k[10], &k[11], &k[12], &k[13], &k[14], &k[15]);

#if 0									  // DEBUG
  printf("[%s:%d] key = \n", __FILE__, __LINE__);
  for(uint32_t i = 0; i < 16; i++) {
	 printf("%X ", k[i]);
  }
  printf("\n");
#endif  // #if 1									  // DEBUG

  while((read = getline(&line, &len, fp)) != -1) {
	 nline++;
	 //	 int n = 0;
	 pair_t cp_pair = {{0,0},{0,0},{0,0},{0,0}};
	 uint32_t is_good = 0;
	 WORD x1_L, x1_R, x2_L, x2_R, y1_L, y1_R, y2_L, y2_R;

	 sscanf(line, "%X %X %X %X %X %X %X %X %d\n", 
			  &x1_L, &x1_R, &x2_L, &x2_R, &y1_L, &y1_R, &y2_L, &y2_R, &is_good);


	 bool b_is_good = (is_good == 1);
	 b_pair_is_good_vec->push_back(b_is_good);

	 // plaintexts
	 cp_pair.plaintext_first[left] = x1_L;
	 cp_pair.plaintext_first[right] = x1_R;
	 cp_pair.plaintext_second[left] = x2_L;
	 cp_pair.plaintext_second[right] = x2_R;

	 // ciphertexts
	 cp_pair.ciphertext_first[left] = y1_L;
	 cp_pair.ciphertext_first[right] = y1_R;
	 cp_pair.ciphertext_second[left] = y2_L;
	 cp_pair.ciphertext_second[right] = y2_R;

	 pair_vec->push_back(cp_pair);

#if 0									  // DEBUG
	 //	 if(is_good) 
	 {
		WORD dx_L = x1_L ^ x2_L;
		WORD dx_R = x1_R ^ x2_R;
		printf("\nline #%5d: length %zu \n%s", nline, read, line);
		printf("%X %X %X %X %X %X %X %X %d\n", 
				 x1_L, x1_R, x2_L, x2_R, y1_L, y1_R, y2_L, y2_R, is_good);
		printf("%X %X\n", dx_L, dx_R);
	 }
#endif  // #if 1									  // DEBUG

  }

  fclose(fp);
}

// ---

#if 1 // DEBUG: print FIB and PTH array
  printf("[%s:%d] FIB[%2d : %2d] = \n", __FILE__, __LINE__, 0, (uint32_t)fib_array.size());
  for(uint32_t i = 0; i < RC5_FIB_LEN; i++) {
	 printf("%2d ", fib_array[i]);
  }
  printf("\n");
  printf("[%s:%d] PTH[%2d : %2d] = \n", __FILE__, __LINE__, 0, (uint32_t)p_thres_array.size());
  for(uint32_t i = 0; i < RC5_FIB_LEN; i++) {
	 printf("%4.2f ", log2(p_thres_array[i]));
  }
  printf("\n");
#endif // #if 1 // DEBUG: print FIB and PTH array

// ---

#if 1 // Intervals: add an end-of-all-pairs sign
			 // Left: (int1, int2, ...); Right: (int1, ...)
			 std::vector<rc5_interval_t> end_interval_vec_L; // left
			 std::vector<rc5_interval_t> end_interval_vec_R; // right

				rc5_interval_t end_L = {0, (WORD_SIZE - 1)};
				rc5_interval_t end_L = {0, (WORD_SIZE - 1)};

			 std::pair<std::vector<rc5_interval_t>, std::vector<rc5_interval_t>> end_pair = 
				std::make_pair(end_interval_vec_L, end_interval_vec_R);
			 pair_interval_vec.push_back(end_pair);
#endif // #if 1 // count neutral intervals



// ---

#if 0 // print neutral matrix
			 uint32_t cnt = 0;
			 for(uint32_t s = left; s <= right; s++) { // 1st left/right
				for(uint32_t t = left; t <= right; t++) { // 2nd left/right
				  for(uint32_t i = 0; i < WORD_SIZE; i++) {
					 printf("[%5d] ", cnt);
					 if(s == 0) {
						printf("L ");
					 } else {
						printf("R ");
					 }
					 if(t == 0) {
						printf("L ");
					 } else {
						printf("R ");
					 }
					 printf("i = %2d j[0:%2d] = ", i, (WORD_SIZE - 1));
					 for(uint32_t j = 0; j < WORD_SIZE; j++) {
						printf("%d", b_neutral[s][t][i][j]);
						cnt++;
					 }
					 printf("\n");
				  }
				  printf("\n");
				}
			 }
#endif // #if 0 // print neutral matrix


// ----

#if 0
			 std::vector<pair_t> new_pair_vec;
			 pair_t init_pair = pair_vec[0];
			 rc5_neutral_bits_generate_pairs(S, init_pair, b_neutral, &new_pair_vec);

			 uint32_t cnt_new = 0;
			 uint32_t cnt_good = 0;
			 for(std::vector<pair_t>::const_iterator vec_iter = new_pair_vec.begin(); 
				  vec_iter != new_pair_vec.end(); vec_iter++, cnt_new++) {

				pair_t cp_pair = *vec_iter;
				WORD x1_L = cp_pair.plaintext_first[left];
				WORD x1_R = cp_pair.plaintext_first[right];
				WORD x2_L = cp_pair.plaintext_second[left];
				WORD x2_R = cp_pair.plaintext_second[right];
				WORD dx_L = x1_L ^ x2_L;
				WORD dx_R = x1_R ^ x2_R;

				//	 printf("[%s:%d] %8X %8X\n", __FILE__, __LINE__, dx_L, dx_R);
				assert(dx_L == dx_R);

				bool b_good = rc5_pair_is_good(S, NROUNDS, cp_pair);
				if(b_good) {
				  cnt_good++;
				}
				printf("[%s:%d] new[%5d / %5d] %8X %8X %8X | %d\n", __FILE__, __LINE__, 
						 cnt_good, cnt_new, x1_L, x1_R, dx, b_good);

			 }
#endif


// ---

void rc5_filtered_pairs_neutral_bits(const WORD S[RC5_STAB_LEN_T], 
												 const std::vector<pair_t> pair_vec, const std::vector<bool> b_good_vec)
{
  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;

  // separate vector for each difference
  std::vector<std::pair<WORD, std::vector<pair_t>>> dx_vec;

  rc5_filtered_pairs_vec_1d_to_2d(S, pair_vec, &dx_vec);

#if 1 // DEBUG
  WORD dx_cnt = 0;
  for(std::vector<std::pair<WORD, std::vector<pair_t>>>::iterator dx_vec_iter = dx_vec.begin();
		dx_vec_iter != dx_vec.end(); dx_vec_iter++, dx_cnt++) {
		  //	     assert(1 == 0);
		  WORD dx = dx_vec_iter->first;
		  std::vector<pair_t> pair_vec = dx_vec_iter->second;
		  printf("[%s:%d] dx[%2d] %8X -> ", __FILE__, __LINE__, dx_cnt, dx);
		  for(std::vector<pair_t>::const_iterator vec_iter = pair_vec.begin(); 
				vec_iter != pair_vec.end(); vec_iter++) {

			 pair_t cp_pair = *vec_iter;
			 WORD x1_L = cp_pair.plaintext_first[left];
			 WORD x1_R = cp_pair.plaintext_first[right];
			 WORD x2_L = cp_pair.plaintext_second[left];
			 WORD x2_R = cp_pair.plaintext_second[right];
			 WORD dx_L = x1_L ^ x2_L;
			 WORD dx_R = x1_R ^ x2_R;

			 printf("(%8X %8X, %8X %8X) ", x1_L, x1_R, x2_L, x2_R);
			 assert(dx_L == dx_R);
			 assert(dx == dx_L);
		  }
		  printf("\n");

		  //bool b_neutral[2][2][WORD_SIZE][WORD_SIZE]
		  if(pair_vec.size() >= 1) {

			 //printf("[%s:%d] Look for neutral: pair_vec size %d\n", __FILE__, __LINE__, (uint32_t)pair_vec.size());
			 bool b_neutral[2][WORD_SIZE] = {{true}};
			 for(uint32_t i = 0; i < WORD_SIZE; i++) {
				b_neutral[left][i] = true;
				b_neutral[right][i] = true;
			 }

			 rc5_filtered_pairs_single_diff_neutral_bits(S, pair_vec, b_neutral);

#if 0
			 printf("[%s:%d] left ", __FILE__, __LINE__);
			 for(uint32_t i = 0; i < WORD_SIZE; i++) {
				printf("%d", b_neutral[left][i]);
			 }
			 printf(" ");
			 printf(" right ");
			 for(uint32_t i = 0; i < WORD_SIZE; i++) {
				printf("%d", b_neutral[right][i]);
			 }
			 printf("\n");
#endif
		  } else {
			 printf("[%s:%d] Not enough pairs: pair_vec size %d. Skipping...\n", __FILE__, __LINE__, (uint32_t)pair_vec.size());
		  }

  }

#endif
  printf("[%s:%d] RC5_EQUAL_ROT_CONST_NHALF_ROUNDS %2d\n", 
			__FILE__, __LINE__, RC5_EQUAL_ROT_CONST_NHALF_ROUNDS);
  printf("[%s:%d] Out of %s()\n", __FILE__, __LINE__, __FUNCTION__);
}

// ----

/**
 * All pairs in pair_vec have the same difference
 */
void rc5_filtered_pairs_single_diff_neutral_bits(const WORD S[RC5_STAB_LEN_T], const std::vector<pair_t> pair_vec, 
                                                 bool b_neutral[2][WORD_SIZE])
{
  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;

  uint32_t N = pair_vec.size();
  assert(pair_vec.size() >= N);
  for(uint32_t n = 0; n < N; n++) { // pairs
	 pair_t cp_pair = pair_vec[n];
	 for(uint32_t j = left; j <= right; j++) { // left/right
		for(uint32_t i = 0; i < WORD_SIZE; i++) { // bit position

		  pair_t cp_pair_modified = cp_pair;
		  WORD pt_original[2] = {cp_pair.plaintext_first[left], cp_pair.plaintext_first[right]};
		  WORD pt_modified[2] = {cp_pair.plaintext_first[left], cp_pair.plaintext_first[right]};
		  pt_modified[j] = cp_pair.plaintext_first[j] ^ (1U << i);
		  uint32_t r = rc5_pair_rot_const_are_same(S, NROUNDS, pt_original, pt_modified);
#if 0 // DEBUG
		  printf("[%s:%d] %d %2d r %2d\n", __FILE__, __LINE__, j, i, r);
#endif // #if 0 // DEBUG
		  if(b_neutral[j][i]) {
			 b_neutral[j][i] = (r >= RC5_EQUAL_ROT_CONST_NHALF_ROUNDS);
		  }
		}
	 }
  }
}

/* --- */

/**
 * All pairs in pair_vec have the same difference
 */
void rc5_filtered_pairs_single_diff_neutral_bits(const WORD S[RC5_STAB_LEN_T], const std::vector<pair_t> pair_vec, 
                                                 bool b_neutral[2][WORD_SIZE])
{
  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;

  for(uint32_t i = 0; i < WORD_SIZE; i++) { // bit position

	 bool b_first = true;
	 WORD x1_L_init_i = 0;
	 WORD x1_R_init_i = 0;

	 for(std::vector<pair_t>::const_iterator vec_iter = pair_vec.begin(); 
		  vec_iter != pair_vec.end(); vec_iter++) {

		pair_t cp_pair = *vec_iter;
		WORD x1_L_i = (cp_pair.plaintext_first[left] >> i) & 1;
		WORD x1_R_i = (cp_pair.plaintext_first[right] >> i) & 1;

		if((!b_first) && (b_neutral[left][i])) {
		  b_neutral[left][i] = (x1_L_init_i == x1_L_i);
		} 
		if((!b_first) && (b_neutral[right][i])) {
		  b_neutral[right][i] = (x1_R_init_i == x1_R_i);
		} 
		if(b_first) {
		  x1_L_init_i = x1_L_i;
		  x1_R_init_i = x1_R_i;
		  b_first = false;
		} 

	 }
  }
#if 1 // DEBUG
  for(std::vector<pair_t>::const_iterator vec_iter = pair_vec.begin(); 
		vec_iter != pair_vec.end(); vec_iter++) {
	 pair_t cp_pair = *vec_iter;
	 WORD x1_L = cp_pair.plaintext_first[left];
	 WORD x1_R = cp_pair.plaintext_first[right];
	 printf("[%s:%d] %8X ", __FILE__, __LINE__, x1_L);
	 print_binary(x1_L);
	 printf(" ");
	 printf("%8X ", x1_R);
	 print_binary(x1_R);
	 printf("\n");
  }
  printf("[%s:%d]  neutral ", __FILE__, __LINE__);
  for(uint32_t i = 0; i < WORD_SIZE; i++) {
	 printf("%d", b_neutral[left][i]);
  }
  printf("  neutral ");
  for(uint32_t i = 0; i < WORD_SIZE; i++) {
	 printf("%d", b_neutral[right][i]);
  }
  printf("\n");
#endif
}




/* --- */

/* *
 * Store the common bits between the two words \p a and \p b.
 */
void rc5_common_bits_count(const WORD a, bits_t* common_bits)
{
  WORD common_ones = (a & common_bits->val);
  WORD common_zeroes = (~a & ~(common_bits->val));

  common_bits->pos &= (common_ones | common_zeroes);

#if 0 // DEBUG
  printf(" v ");
  print_binary(common_bits->val);
  printf("\n");

  printf(" a ");
  print_binary(a);
  printf("\n");

  printf(" * ");
  print_binary(common_bits->pos);
  printf("\n");
#endif // #if 0 // DEBUG
}

void test_rc5_common_bits_count()
{
  WORD n = 0;

  WORD a = random32() & MASK;
  WORD val = a;
  WORD pos = 0xffffffff & MASK;
  bits_t common_bits = {val, pos};

#if 0
  WORD b = random32() & MASK;
  WORD common_ones = (a & b);
  WORD common_zeroes = (~a & ~b);
  WORD common_pos = common_ones | common_zeroes;

  printf(" a ");
  print_binary(a);
  printf("\n");

  printf(" b ");
  print_binary(b);
  printf("\n");

  printf(" * ");
  print_binary(common_bits.pos);
  printf("\n");
#endif

  while (n < 2) {
	 a = random32() & MASK;
	 rc5_common_bits_count(a, &common_bits);
	 n++;
  }
}

void rc5_filtered_pairs_common_bits(const WORD S[RC5_STAB_LEN_T], const std::vector<pair_t> pair_vec, const std::vector<bool> b_good_vec)
{
  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;
  std::vector<WORD> dx_vec;

  for(std::vector<pair_t>::const_iterator vec_iter = pair_vec.begin(); 
		vec_iter != pair_vec.end(); vec_iter++) {

	 pair_t cp_pair = *vec_iter;
	 WORD x1_L = cp_pair.plaintext_first[left];
	 WORD x1_R = cp_pair.plaintext_first[right];
	 WORD x2_L = cp_pair.plaintext_second[left];
	 WORD x2_R = cp_pair.plaintext_second[right];
	 WORD dx_L = x1_L ^ x2_L;
	 WORD dx_R = x1_R ^ x2_R;

	 //	 printf("[%s:%d] %8X %8X\n", __FILE__, __LINE__, dx_L, dx_R);
	 assert(dx_L == dx_R);

	 std::vector<WORD>::const_iterator dx_vec_iter = dx_vec.begin();
	 while((dx_vec_iter != dx_vec.end()) && (*dx_vec_iter != dx_R)) {
		dx_vec_iter++;
	 }
	 if(dx_vec_iter == dx_vec.end()) {
		dx_vec.push_back(dx_R);
		printf("[%s:%d] Add new %8X %8X dx_L dx_R %8X %8X\n", __FILE__, __LINE__, x1_L, x1_R, dx_L, dx_R);
	 }
	 bool b_good = rc5_pair_is_good(S, NROUNDS, cp_pair);
	 if(!b_good) {
		printf("[%s:%d] WARNING! Pairs is not good! Continuing...\n", __FILE__, __LINE__);
	 }
  }

  // cycle over unique diffs
  for(std::vector<WORD>::const_iterator dx_vec_iter = dx_vec.begin();
		dx_vec_iter != dx_vec.end(); dx_vec_iter++) {
	 //  {
	 pair_t new_pair = {{0}};
	 bits_t common_bits[2] = {{0, 0}, {0, 0}};
	 bool b_first = true;
	 printf("[%s:%d] --- dx %8X ---\n", __FILE__, __LINE__, *dx_vec_iter);
	 WORD cnt = 0;
	 // cycle over all filtered pairs
	 for(std::vector<pair_t>::const_iterator vec_iter = pair_vec.begin(); 
		  vec_iter != pair_vec.end(); vec_iter++) {
		pair_t cp_pair = *vec_iter;
		WORD x1_L = cp_pair.plaintext_first[left];
		WORD x1_R = cp_pair.plaintext_first[right];
		WORD x2_L = cp_pair.plaintext_second[left];
		WORD x2_R = cp_pair.plaintext_second[right];
		WORD dx_L = x1_L ^ x2_L;
		WORD dx_R = x1_R ^ x2_R;
		assert(dx_L == dx_R);
		if(dx_R == *dx_vec_iter) {
		//		if(1) {
#if 1 // DEBUG
		  printf("[%s:%d] dx %8X BEFORE common %8X %8X\n", __FILE__, __LINE__, 
					dx_R, common_bits[left].pos, common_bits[right].pos);
#endif // #if 1 // DEBUG
		  if(b_first) {
			 common_bits[left].val = x1_L;
			 common_bits[left].pos = 0xffffffff & MASK;

			 common_bits[right].val = x1_R;
			 common_bits[right].pos = 0xffffffff & MASK;

			 new_pair = *vec_iter;
			 b_first = false;
		  } else {
			 rc5_common_bits_count(x1_L, &common_bits[left]);
			 rc5_common_bits_count(x1_R, &common_bits[right]);

#if 0// DEBUG
			 printf(" V ");
			 print_binary(common_bits[left].val);
			 printf(" ");
			 print_binary(common_bits[right].val);
			 printf("\n");

			 printf(" X ");
			 print_binary(x1_L);
			 printf(" ");
			 print_binary(x1_R);
			 printf("\n");

			 printf(" * ");
			 print_binary(common_bits[left].pos);
			 printf(" ");
			 print_binary(common_bits[right].pos);
			 printf("\n");
#endif // #if 1 // DEBUG

		  }
#if 1 // DEBUG
		  printf("[%s:%d] dx %8X  AFTER common %8X %8X ", __FILE__, __LINE__, 
					dx_R, common_bits[left].pos, common_bits[right].pos);
		  printf(" | x1 L R %8X %8X dx %8X %d\n", x1_L, x1_R, dx_R, b_good_vec[cnt]);
#endif // #if 1 // DEBUG
		}
		cnt++;
	 }

	 if((hw32(common_bits[left].pos) == WORD_SIZE) &&
		 (hw32(common_bits[right].pos) == WORD_SIZE)) {
		printf("[%s:%d] WARNING! No freedom: %8X\n", __FILE__, __LINE__, common_bits[right].pos);
		continue; // no freedom
	 }

#if 1 // DEBUG
	 printf("[%s:%d] common L R val %8X %8X pos %8X %8X\n", __FILE__, __LINE__, 
			  common_bits[left].val, common_bits[right].val, 
			  common_bits[left].pos, common_bits[right].pos);
#endif // #if 1 // DEBUG

#if 1
	 uint32_t n_left = 0;
	 uint32_t n_right = 0;

	 uint32_t N_left = (1U << (WORD_SIZE - hw32(common_bits[left].pos)));
	 uint32_t N_right = (1U << (WORD_SIZE - hw32(common_bits[right].pos)));

	 uint32_t cnt_pairs = 0;
	 uint32_t cnt_good = 0;
	 uint32_t cnt_bad = 0;

	 while(n_left < N_left) {
		while(n_right < N_right) {

		  pair_t temp_pair = new_pair;

		  uint32_t n_left_i = 0;
		  uint32_t n_right_i = 0;

		  uint32_t n_left_msb = (WORD_SIZE - hw32(common_bits[left].pos));
		  uint32_t n_right_msb = (WORD_SIZE - hw32(common_bits[right].pos));

		  for(uint32_t i = 0; i < WORD_SIZE; i++) {
#if 1
			 uint32_t L = (common_bits[left].pos >> i) & 1;
			 if(L == 0) {
				//				uint32_t r = random32() % 2; 
				uint32_t r = (n_left >> n_left_i) & 1;
				temp_pair.plaintext_first[left] &= ~(1 << i); // zero bit
				temp_pair.plaintext_first[left] |= (r << i);
				assert(n_left_i < n_left_msb);
				n_left_i++;
			 }
#endif
#if 1
			 uint32_t R = (common_bits[right].pos >> i) & 1;
			 if(R == 0) {
				//				uint32_t r = random32() % 2; 
				uint32_t r = (n_right >> n_right_i) & 1;
				temp_pair.plaintext_first[right] &= ~(1 << i);
				temp_pair.plaintext_first[right] |= (r << i);
				assert(n_right_i < n_right_msb);
				n_right_i++;
			 }
#endif
		  }

		  assert(n_left_i == n_left_msb);
		  assert(n_right_i == n_right_msb);

#if 1 // DEBUG
		  bits_t temp_bits_L = common_bits[left];
		  bits_t temp_bits_R = common_bits[right];
		  WORD temp_val_L = new_pair.plaintext_first[left];
		  WORD temp_val_R = new_pair.plaintext_first[right];
		  rc5_common_bits_count(temp_val_L, &temp_bits_L);
		  //		printf("[%s:%d] %8X %8X\n", __FILE__, __LINE__, temp_val_L, common_bits[left].val);
		  assert(temp_bits_L.pos == common_bits[left].pos);
		  rc5_common_bits_count(temp_val_R, &temp_bits_R);
		  //		printf("[%s:%d] %8X %8X\n", __FILE__, __LINE__, temp_val_R, common_bits[right].val);
		  assert(temp_bits_R.pos == common_bits[right].pos);
		  assert(!((temp_val_L != common_bits[left].val) && (temp_val_R != common_bits[right].val)));
#endif

		  //		  for(uint32_t s = RC5_LOG2W; s < WORD_SIZE; s++) 
{
  //				WORD dx = 1U << s;
				WORD dx = *dx_vec_iter;
				cnt_pairs++;
#if 1
				temp_pair.plaintext_second[left] = temp_pair.plaintext_first[left] ^ dx;
				temp_pair.plaintext_second[right] = temp_pair.plaintext_first[right] ^ dx;
#endif
				WORD tmp_dx_L = temp_pair.plaintext_second[left] ^ temp_pair.plaintext_first[left];
				WORD tmp_dx_R = temp_pair.plaintext_second[right] ^ temp_pair.plaintext_first[right];

				bool b_good = rc5_pair_is_good(S, NROUNDS, temp_pair);
				if(!b_good) {
				  //		  printf("[%s:%d] WARNING! Pair is not good! Continuing...\n", __FILE__, __LINE__);
				  cnt_bad++;
				} else {
				  printf("[%s:%d] GOOD! [%5d / 2^%4.2f / 2^%4.2f] new pair %8X %8X | dx LR %8X %8X\n", __FILE__, __LINE__, 
							cnt_good, log2(cnt_bad), log2(cnt_pairs),
							temp_pair.plaintext_first[left], temp_pair.plaintext_first[right], tmp_dx_L, tmp_dx_R);
				  cnt_good++;
				}
				assert((cnt_good + cnt_bad) == cnt_pairs);
			 }
		  n_right++;
		}
		n_left++;
	 }
#endif
  }

  printf("[%s:%d] Out of %s()\n", __FILE__, __LINE__, __FUNCTION__);
}

void test_rc5_filtered_pairs_common_bits()
{
  //  uint32_t left = RC5_FEISTEL_LEFT;
  //  uint32_t right = RC5_FEISTEL_RIGHT;
  WORD key[16] = {0};
  std::vector<pair_t> pair_vec;
  std::vector<bool> b_pair_is_good_vec;
  rc5_filtered_pairs_read_from_file(RC5_LOG_FILE, key, &pair_vec, &b_pair_is_good_vec);

  WORD S[RC5_STAB_LEN_T] = {0};
  uint8_t master_key[16] = {0};
  for(uint32_t i = 0; i < 16; i++) {
	 master_key[i] = (uint8_t)key[i];
	 //	 printf("[%s:%d] %8X %8X\n", __FILE__, __LINE__, master_key[i], key[i]);
  }
  rc5_setup(master_key, S);  

  rc5_filtered_pairs_common_bits(S, pair_vec, b_pair_is_good_vec);

#if 0
  WORD cnt = 0;
  std::vector<pair_t>::const_iterator vec_iter = pair_vec.begin();
  for(vec_iter = pair_vec.begin(); vec_iter != pair_vec.end(); vec_iter++) {
	 pair_t cp_pair = *vec_iter;
	 WORD x1_L = cp_pair.plaintext_first[left];
	 WORD x1_R = cp_pair.plaintext_first[right];
	 WORD x2_L = cp_pair.plaintext_second[left];
	 WORD x2_R = cp_pair.plaintext_second[right];
	 bool b_good = b_pair_is_good_vec[cnt];
	 cnt++;
	 printf("[%s:%d] %8X %8X %8X %8X %d\n", __FILE__, __LINE__,
			  x1_L, x1_R, x2_L, x2_R, b_good);
  }
#endif
}


void test_rc5_filtered_pairs_slide_win_vote()
{
  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;
  WORD k[16] = {0};
  std::vector<pair_t> pair_vec;
  std::vector<bool> b_pair_is_good_vec;
  rc5_filtered_pairs_read_from_file(RC5_LOG_FILE, k, &pair_vec, &b_pair_is_good_vec);

  uint32_t slide_win_index = 0;
  uint32_t slide_win_value = 0;

  rc5_filtered_pairs_slide_win_vote(pair_vec, &slide_win_index, &slide_win_value);

  uint32_t mask_stride = (0xffffffff >> (WORD_SIZE - RC5_SLIDE_WIN_LEN));

  uint32_t slide_win_index_L = (slide_win_index >> RC5_LOG2W) & RC5_ROT_MASK;
  uint32_t slide_win_index_R = slide_win_index & RC5_ROT_MASK;

  uint32_t slide_win_value_L = (slide_win_value >> RC5_SLIDE_WIN_LEN) & mask_stride;
  uint32_t slide_win_value_R = slide_win_value & mask_stride;


  printf("[%s:%d] RC5_SLIDE_WIN_LEN %2d slide_win_index slide_win_value %2d %8X\n", 
			__FILE__, __LINE__, RC5_SLIDE_WIN_LEN, slide_win_index, slide_win_value);
  printf("[%s:%d] mask_stride %8X\n", __FILE__, __LINE__, mask_stride);

  //  slide_win_index = 14;
  //  slide_win_value = 0x2A;

  //  rc5_good_pairs_print(pair_vec);
#if 1
  uint32_t cnt = 0;
  uint32_t cnt_good_all = 0;
  uint32_t cnt_good = 0;
  uint32_t cnt_bad = 0;
  std::vector<pair_t>::const_iterator vec_iter = pair_vec.begin();
  for(vec_iter = pair_vec.begin(); vec_iter != pair_vec.end(); vec_iter++) {
	 pair_t cp_pair = *vec_iter;
	 WORD x1_L = cp_pair.plaintext_first[left];
	 WORD x1_R = cp_pair.plaintext_first[right];
	 WORD x2_L = cp_pair.plaintext_second[left];
	 WORD x2_R = cp_pair.plaintext_second[right];
	 bool b_good = b_pair_is_good_vec[cnt];

	 uint32_t stride_R_i = (x1_R >> slide_win_index_R) & mask_stride;
	 uint32_t stride_L_i = (x1_L >> slide_win_index_L) & mask_stride;

	 printf("%2d %2d %8X %8X | {{0x%08X, 0x%08X}, {0x%08X, 0x%08X}, %d} ", 
			  stride_L_i, stride_R_i, slide_win_value_L, slide_win_value_R, x1_L, x1_R, x2_L, x2_R, b_good);

	 if(b_good) {
		cnt_good_all++;
	 }

	 if((stride_L_i == slide_win_value_L) && (stride_R_i == slide_win_value_R)) {
		if(b_good) {
		  cnt_good++;
		  printf(" <- %2d", cnt_good);
		} else {
		  cnt_bad++;
		  printf(" X %2d", cnt_bad);
		}
	 }
	 printf("\n");

#if 0 // print ciphertext
	 WORD y1_L = cp_pair.ciphertext_first[left];
	 WORD y1_R = cp_pair.ciphertext_first[right];
	 WORD y2_L = cp_pair.ciphertext_second[left];
	 WORD y2_R = cp_pair.ciphertext_second[right];
	 printf("{{0x%08X, 0x%08X}, {0x%08X, 0x%08X}} ", y1_L, y1_R, y2_L, y2_R);
	 if((vec_iter + 1) != pair_vec.end()) {
		printf(",\n");
	 }
#endif // #if 1 // print ciphertext
	 cnt++;
  }
  printf("[%s:%d] Final count: filt_all/good_all/cnt_good/cnt_bad =\n Vote: %2d %2d %2d %2d\n", __FILE__, __LINE__, 
			(uint32_t)b_pair_is_good_vec.size(), cnt_good_all, cnt_good, cnt_bad);
#endif
}


/* --- */

/* 
`/tmp/rc5-filtered-pairs.txt.1412011678325676' -> `/tmp/rc5-log.txt'
[./tests/rc5-tests.cc:2305] Update MAX votes[ 5][       3]     1
[./tests/rc5-tests.cc:2305] Update MAX votes[ 5][      13]     9
[./tests/rc5-tests.cc:2317] MAX  0:  votes[ 5][      13]     9
[./tests/rc5-tests.cc:2347] RC5_SLIDE_WIN_LEN  6 slide_win_index slide_win_value  5       13
[./tests/rc5-tests.cc:2348] mask_stride       3F
      23       13 | {{0xF605563B, 0x307D9C6C}, {0xF605543B, 0x307D9E6C}, 1} 
      23       13 | {{0xB41DBE3B, 0x7265746C}, {0xB41DBA3B, 0x7265706C}, 0} 
      23       13 | {{0x76C7163B, 0xB0BFDC6C}, {0x76C7063B, 0xB0BFCC6C}, 1} 
      33       13 | {{0xB286143B, 0x74FEDE6C}, {0xB286043B, 0x74FECE6C}, 1} 
      13       13 | {{0xA95AA03B, 0x6F226A6C}, {0xA95AE03B, 0x6F222A6C}, 1}  <-  1
      23       13 | {{0xFE45863B, 0x383D4C6C}, {0xFE45C63B, 0x383D0C6C}, 1} 
      13       13 | {{0xC0A5383B, 0x06DDF26C}, {0xC0A7383B, 0x06DFF26C}, 1}  <-  2
      33       13 | {{0x8B068C3B, 0x4D7E466C}, {0x8B168C3B, 0x4D6E466C}, 0} 
      13       13 | {{0x8866D83B, 0x4E1E126C}, {0x8826D83B, 0x4E5E126C}, 1}  <-  3
      13       13 | {{0xF558903B, 0x33205A6C}, {0xF518903B, 0x33605A6C}, 1}  <-  4
      13       13 | {{0xC0C1383B, 0x06B9F26C}, {0xC1C1383B, 0x07B9F26C}, 1}  <-  5
      13       13 | {{0xE665883B, 0x201D426C}, {0xF665883B, 0x301D426C}, 1}  <-  6
      13       13 | {{0xE665C83B, 0x201D026C}, {0xF665C83B, 0x301D026C}, 1}  <-  7
      13       13 | {{0xE625883B, 0x205D426C}, {0xF625883B, 0x305D426C}, 1}  <-  8
       3       13 | {{0xBA862A3B, 0x7CFEE06C}, {0x9A862A3B, 0x5CFEE06C}, 1} 
      13       13 | {{0xF267283B, 0x341FE26C}, {0xD267283B, 0x141FE26C}, 1}  <-  9
      23       13 | {{0x6538AE3B, 0xA340646C}, {0xE538AE3B, 0x2340646C}, 1} 
[./tests/rc5-tests.cc:2400] Final count: filt_all/good_all/cnt_good/cnt_bad =
 Vote: 17 15  9  0

 */

/* --- */

void rc5_filtered_pairs_common_bits(const WORD S[RC5_STAB_LEN_T], const std::vector<pair_t> pair_vec, const std::vector<bool> b_good_vec)
{
  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;
  std::vector<WORD> dx_vec;

  for(std::vector<pair_t>::const_iterator vec_iter = pair_vec.begin(); 
		vec_iter != pair_vec.end(); vec_iter++) {

	 pair_t cp_pair = *vec_iter;
	 WORD x1_L = cp_pair.plaintext_first[left];
	 WORD x1_R = cp_pair.plaintext_first[right];
	 WORD x2_L = cp_pair.plaintext_second[left];
	 WORD x2_R = cp_pair.plaintext_second[right];
	 WORD dx_L = x1_L ^ x2_L;
	 WORD dx_R = x1_R ^ x2_R;

	 //	 printf("[%s:%d] %8X %8X\n", __FILE__, __LINE__, dx_L, dx_R);
	 assert(dx_L == dx_R);

	 std::vector<WORD>::const_iterator dx_vec_iter = dx_vec.begin();
	 while((dx_vec_iter != dx_vec.end()) && (*dx_vec_iter != dx_R)) {
		dx_vec_iter++;
	 }
	 if(dx_vec_iter == dx_vec.end()) {
		dx_vec.push_back(dx_R);
		printf("[%s:%d] Add new %8X %8X dx_L dx_R %8X %8X\n", __FILE__, __LINE__, x1_L, x1_R, dx_L, dx_R);
	 }
	 bool b_good = rc5_pair_is_good(S, NROUNDS, cp_pair);
	 if(!b_good) {
		printf("[%s:%d] WARNING! Pairs is not good! Continuing...\n", __FILE__, __LINE__);
	 }
  }

  // cycle over unique diffs
  for(std::vector<WORD>::const_iterator dx_vec_iter = dx_vec.begin();
		dx_vec_iter != dx_vec.end(); dx_vec_iter++) {
	 pair_t new_pair;
	 bits_t common_bits[2] = {{0, 0}, {0, 0}};
	 bool b_first = true;
	 printf("[%s:%d] --- dx %8X ---\n", __FILE__, __LINE__, *dx_vec_iter);
	 WORD cnt = 0;
	 // cycle over all filtered pairs
	 for(std::vector<pair_t>::const_iterator vec_iter = pair_vec.begin(); 
		  vec_iter != pair_vec.end(); vec_iter++) {
		pair_t cp_pair = *vec_iter;
		WORD x1_L = cp_pair.plaintext_first[left];
		WORD x1_R = cp_pair.plaintext_first[right];
		WORD x2_L = cp_pair.plaintext_second[left];
		WORD x2_R = cp_pair.plaintext_second[right];
		WORD dx_L = x1_L ^ x2_L;
		WORD dx_R = x1_R ^ x2_R;
		assert(dx_L == dx_R);
		//		if(dx_R == *dx_vec_iter) {
		if(1) {
#if 0 // DEBUG
		  printf("[%s:%d] dx %8X BEFORE common %8X %8X\n", __FILE__, __LINE__, 
					dx_R, common_bits[left].pos, common_bits[right].pos);
#endif // #if 1 // DEBUG
		  if(b_first) {
			 common_bits[left].val = x1_L;
			 common_bits[left].pos = 0xffffffff & MASK;

			 common_bits[right].val = x1_R;
			 common_bits[right].pos = 0xffffffff & MASK;

			 new_pair = *vec_iter;
			 b_first = false;
		  } else {
			 rc5_common_bits_count(x1_L, &common_bits[left]);
			 rc5_common_bits_count(x1_R, &common_bits[right]);

#if 0 // DEBUG
			 printf(" V ");
			 print_binary(common_bits[left].val);
			 printf(" ");
			 print_binary(common_bits[right].val);
			 printf("\n");

			 printf(" X ");
			 print_binary(x1_L);
			 printf(" ");
			 print_binary(x1_R);
			 printf("\n");

			 printf(" * ");
			 print_binary(common_bits[left].pos);
			 printf(" ");
			 print_binary(common_bits[right].pos);
			 printf("\n");
#endif // #if 1 // DEBUG

		  }
#if 0 // DEBUG
		  printf("[%s:%d] dx %8X  AFTER common %8X %8X ", __FILE__, __LINE__, 
					dx_R, common_bits[left].pos, common_bits[right].pos);
		  printf(" | x1 L R %8X %8X dx %8X %d\n", x1_L, x1_R, dx_R, b_good_vec[cnt]);
#endif // #if 1 // DEBUG
		}
		cnt++;
	 }

	 if((hw32(common_bits[left].pos) == WORD_SIZE) &&
		 (hw32(common_bits[right].pos) == WORD_SIZE)) {
		continue; // no freedom
	 }

#if 1 // DEBUG
	 printf("[%s:%d] dx %8X common L R val %8X %8X pos %8X %8X\n", __FILE__, __LINE__, 
			  *dx_vec_iter, common_bits[left].val, common_bits[right].val, 
			  common_bits[left].pos, common_bits[right].pos);
#endif // #if 1 // DEBUG

#if 1
	 uint32_t n_left = 0;
	 uint32_t n_right = 0;

	 uint32_t N_left = (1U << (WORD_SIZE - hw32(common_bits[left].pos)));
	 uint32_t N_right = (1U << (WORD_SIZE - hw32(common_bits[right].pos)));

	 while(n_left < N_left) {
		while(n_right < N_right) {

		  pair_t temp_pair = new_pair;

		  uint32_t n_left_i = 0;
		  uint32_t n_right_i = 0;

		  uint32_t n_left_msb = (WORD_SIZE - hw32(common_bits[left].pos));
		  uint32_t n_right_msb = (WORD_SIZE - hw32(common_bits[right].pos));

		  for(uint32_t i = 0; i < WORD_SIZE; i++) {
#if 1
			 uint32_t L = (common_bits[left].pos >> i) & 1;
			 if(L == 0) {
				//				uint32_t r = random32() % 2; 
				uint32_t r = (n_left >> n_left_i) & 1;
				temp_pair.plaintext_first[left] &= ~(1 << i); // zero bit
				temp_pair.plaintext_first[left] |= (r << i);
				assert(n_left_i < n_left_msb);
				n_left_i++;
			 }
#endif
#if 1
			 uint32_t R = (common_bits[right].pos >> i) & 1;
			 if(R == 0) {
				//				uint32_t r = random32() % 2; 
				uint32_t r = (n_right >> n_right_i) & 1;
				temp_pair.plaintext_first[right] &= ~(1 << i);
				temp_pair.plaintext_first[right] |= (r << i);
				assert(n_right_i < n_right_msb);
				n_right_i++;
			 }
#endif
		  }

		  assert(n_left_i == n_left_msb);
		  assert(n_right_i == n_right_msb);

#if 1 // DEBUG
		  bits_t temp_bits_L = common_bits[left];
		  bits_t temp_bits_R = common_bits[right];
		  WORD temp_val_L = new_pair.plaintext_first[left];
		  WORD temp_val_R = new_pair.plaintext_first[right];
		  rc5_common_bits_count(temp_val_L, &temp_bits_L);
		  //		printf("[%s:%d] %8X %8X\n", __FILE__, __LINE__, temp_val_L, common_bits[left].val);
		  assert(temp_bits_L.pos == common_bits[left].pos);
		  rc5_common_bits_count(temp_val_R, &temp_bits_R);
		  //		printf("[%s:%d] %8X %8X\n", __FILE__, __LINE__, temp_val_R, common_bits[right].val);
		  assert(temp_bits_R.pos == common_bits[right].pos);
		  assert(!((temp_val_L != common_bits[left].val) && (temp_val_R != common_bits[right].val)));
#endif

#if 1
		  temp_pair.plaintext_second[left] = temp_pair.plaintext_first[left] ^ *dx_vec_iter;
		  temp_pair.plaintext_second[right] = temp_pair.plaintext_first[right] ^ *dx_vec_iter;
#endif
		  WORD tmp_dx_L = temp_pair.plaintext_second[left] ^ temp_pair.plaintext_first[left];
		  WORD tmp_dx_R = temp_pair.plaintext_second[right] ^ temp_pair.plaintext_first[right];

		  bool b_good = rc5_pair_is_good(S, NROUNDS, temp_pair);
		  if(!b_good) {
			 //		  printf("[%s:%d] WARNING! Pair is not good! Continuing...\n", __FILE__, __LINE__);
		  } else {
			 printf("[%s:%d] new pair %8X %8X | dx LR %8X %8X\n", __FILE__, __LINE__, 
					  temp_pair.plaintext_first[left], temp_pair.plaintext_first[right], tmp_dx_L, tmp_dx_R);
			 printf("[%s:%d] GOOD! Pair is good! Continuing...\n", __FILE__, __LINE__);
			 //		  assert(1 == 0);
		  }
		  n_right++;
		}
		n_left++;
	 }
#endif
  }

  printf("[%s:%d] Out of %s()\n", __FILE__, __LINE__, __FUNCTION__);
}

/* --- */
	 uint32_t n = 0;
	 while(n < 10) {

		pair_t temp_pair = new_pair;

		for(uint32_t i = 0; i < WORD_SIZE; i++) {
#if 1
		  uint32_t L = (common_bits[left].pos >> i) & 1;
		  if(L == 0) {
			 uint32_t r = random32() % 2; 
			 //			 uint32_t b = (temp_pair.plaintext_first[left] >> i) & 1;
			 //			 if(r != b) {
			 if(1) {
				temp_pair.plaintext_first[left] |= (r << i);
				temp_pair.plaintext_second[left] |= (r << i); // <--- !
				//	temp_pair.plaintext_first[left] ^= (1U << i);
				//	temp_pair.plaintext_second[left] ^= (1U << i); // <--- !
			 }
		  }
#endif
#if 1
		  uint32_t R = (common_bits[right].pos >> i) & 1;
		  if(R == 0) {
			 uint32_t r = random32() % 2; 
			 //		 uint32_t b = (temp_pair.plaintext_first[right] >> i) & 1;
			 //			 if(r != b) {
			 if(1) {
				temp_pair.plaintext_first[right] |= (r << i);
		      temp_pair.plaintext_second[right] |= (r << i); // <--- !
				//	temp_pair.plaintext_first[right] ^= (1U << i);
				//	temp_pair.plaintext_second[right] ^= (1U << i); // <--- !
			 }
		  }
#endif
		}
#if 1 // DEBUG
		bits_t temp_bits_L = common_bits[left];
		bits_t temp_bits_R = common_bits[right];
		WORD temp_val_L = new_pair.plaintext_first[left];
		WORD temp_val_R = new_pair.plaintext_first[right];
		rc5_common_bits_count(temp_val_L, &temp_bits_L);
		//		printf("[%s:%d] %8X %8X\n", __FILE__, __LINE__, temp_val_L, common_bits[left].val);
		assert(temp_bits_L.pos == common_bits[left].pos);
		rc5_common_bits_count(temp_val_R, &temp_bits_R);
		//		printf("[%s:%d] %8X %8X\n", __FILE__, __LINE__, temp_val_R, common_bits[right].val);
		assert(temp_bits_R.pos == common_bits[right].pos);
		assert(!((temp_val_L != common_bits[left].val) && (temp_val_R != common_bits[right].val)));
#endif

#if 0
		temp_pair.plaintext_second[left] = temp_pair.plaintext_first[left] ^ *dx_vec_iter;
		temp_pair.plaintext_second[right] = temp_pair.plaintext_first[right] ^ *dx_vec_iter;
#endif
		WORD tmp_dx_L = temp_pair.plaintext_second[left] ^ temp_pair.plaintext_first[left];
		WORD tmp_dx_R = temp_pair.plaintext_second[right] ^ temp_pair.plaintext_first[right];
		printf("[%s:%d] new pair %8X %8X | dx LR %8X %8X\n", __FILE__, __LINE__, 
				 temp_pair.plaintext_first[left], temp_pair.plaintext_first[right], tmp_dx_L, tmp_dx_R);

		bool b_good = rc5_pair_is_good(S, NROUNDS, temp_pair);
		if(!b_good) {
		  //		  printf("[%s:%d] WARNING! Pair is not good! Continuing...\n", __FILE__, __LINE__);
		} else {
		  printf("[%s:%d] GOOD! Pair is good! Continuing...\n", __FILE__, __LINE__);
		  //		  assert(1 == 0);
		}
		n++;
	 }

/* ---- */

/* *
 * Only those bits of the word \p val are selected for which the bits
 * of \p pos that are equal to 1. Therefore all bits of \p val for
 * which the corresponding bit of \p pos are equal to 0, are ignored.
 */
typedef struct {
  WORD val;
  WORD pos;
} bits_t;



/* *
 * Store the common bits between the two words \p a and \p b.
 */
void rc5_common_bits_count(const WORD a, const WORD b, bits_t* common_bits)
{
  WORD common_ones = (a & b);
  WORD common_zeroes = (~a & ~b);
  WORD pos = common_ones | common_zeroes;
  WORD val = a;//(a & pos) | ();

  printf(" a ");
  print_binary(a);
  printf("\n");

  printf(" b ");
  print_binary(b);
  printf("\n");

  printf(" * ");
  print_binary(pos);
  printf("\n");

  printf(" 0 ");
  print_binary(common_zeroes);
  printf("\n");

  printf(" 1 ");
  print_binary(common_ones);
  printf("\n");
}

void test_rc5_common_bits_count()
{
  WORD a = random32() & MASK;
  WORD b = random32() & MASK;
  bits_t common_bits = {0, 0};
  rc5_common_bits_count(a, b, &common_bits);
}

/* --- */
void rc5_filtered_pairs_common_bits(const WORD S[RC5_STAB_LEN_T], const std::vector<pair_t> pair_vec, const std::vector<bool> b_good_vec)
{
  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;
  std::vector<WORD> dx_vec;

  for(std::vector<pair_t>::const_iterator vec_iter = pair_vec.begin(); 
		vec_iter != pair_vec.end(); vec_iter++) {

	 pair_t cp_pair = *vec_iter;
	 WORD x1_L = cp_pair.plaintext_first[left];
	 WORD x1_R = cp_pair.plaintext_first[right];
	 WORD x2_L = cp_pair.plaintext_second[left];
	 WORD x2_R = cp_pair.plaintext_second[right];
	 WORD dx_L = x1_L ^ x2_L;
	 WORD dx_R = x1_R ^ x2_R;

	 //	 printf("[%s:%d] %8X %8X\n", __FILE__, __LINE__, dx_L, dx_R);
	 assert(dx_L == dx_R);

	 std::vector<WORD>::const_iterator dx_vec_iter = dx_vec.begin();
	 while((dx_vec_iter != dx_vec.end()) && (*dx_vec_iter != dx_R)) {
		dx_vec_iter++;
	 }
	 if(dx_vec_iter == dx_vec.end()) {
		dx_vec.push_back(dx_R);
		printf("[%s:%d] Add new %8X %8X dx_L dx_R %8X %8X\n", __FILE__, __LINE__, x1_L, x1_R, dx_L, dx_R);
	 }
	 bool b_good = rc5_pair_is_good(S, NROUNDS, cp_pair);
	 if(!b_good) {
		printf("[%s:%d] WARNING! Pairs is not good! Continuing...\n", __FILE__, __LINE__);
	 }
  }

  for(std::vector<WORD>::const_iterator dx_vec_iter = dx_vec.begin();
		dx_vec_iter != dx_vec.end(); dx_vec_iter++) {
	 pair_t new_pair;
	 WORD common_bits[2] = {0, 0};
	 bool b_first = true;
	 printf("[%s:%d] --- dx %8X ---\n", __FILE__, __LINE__, *dx_vec_iter);
	 WORD cnt = 0;
	 for(std::vector<pair_t>::const_iterator vec_iter = pair_vec.begin(); 
		  vec_iter != pair_vec.end(); vec_iter++) {
		pair_t cp_pair = *vec_iter;
		WORD x1_L = cp_pair.plaintext_first[left];
		WORD x1_R = cp_pair.plaintext_first[right];
		WORD x2_L = cp_pair.plaintext_second[left];
		WORD x2_R = cp_pair.plaintext_second[right];
		WORD dx_L = x1_L ^ x2_L;
		WORD dx_R = x1_R ^ x2_R;
		assert(dx_L == dx_R);
		if(dx_R == *dx_vec_iter) {
		  printf("[%s:%d] dx %8X BEFORE common %8X %8X\n", __FILE__, __LINE__, dx_R, common_bits[left], common_bits[right]);

		  print_binary(common_bits[left]);
		  printf(" ");
		  print_binary(common_bits[right]);
		  printf("\n");
		  print_binary(x1_L);
		  printf(" ");
		  print_binary(x1_R);
		  printf("\n");

		  if(b_first) {
			 common_bits[left] = x1_L;
			 common_bits[right] = x1_R;
			 new_pair = *vec_iter;
			 b_first = false;
		  } else {
			 common_bits[left] &= ((common_bits[left] & x1_L) | (~common_bits[left] & ~x1_L));
			 common_bits[right] &= ((common_bits[right] & x1_R) | (~common_bits[right] & ~x1_R));

			 printf("[%s:%d] START --------------- \n", __FILE__, __LINE__);
			 print_binary(common_bits[left] & x1_L);
			 printf("\n");
			 print_binary(~common_bits[left] & ~x1_L);
			 printf("\n");
			 print_binary(common_bits[left]);
			 printf("\n");
			 printf("[%s:%d]   END --------------- \n", __FILE__, __LINE__);
		  }

		  printf("[%s:%d] dx %8X  AFTER common %8X %8X ", __FILE__, __LINE__, dx_R, common_bits[left], common_bits[right]);
		  printf(" | x1 L R %8X %8X dx %8X %d\n", x1_L, x1_R, dx_R, b_good_vec[cnt]);

		  print_binary(common_bits[left]);
		  printf(" ");
		  print_binary(common_bits[right]);
		  printf("\n");
		  print_binary(x1_L);
		  printf(" ");
		  print_binary(x1_R);
		  printf("\n");
		}
		//		printf("[%s:%d] common %8X %8X\n", __FILE__, __LINE__, common_bits[left], common_bits[right]);
		cnt++;
	 }
	 printf("[%s:%d] dx %8X common L R %8X %8X\n", __FILE__, __LINE__, *dx_vec_iter, common_bits[left], common_bits[right]);
#if 1
	 //	 pair_t cp_pair = *vec_iter;
	 //	 WORD rand_L = random32() & MASK;
	 //	 WORD rand_R = random32() & MASK;

	 //	 new_pair.plaintext_first[left]  = common_bits[left]  | rand_L;
	 //	 new_pair.plaintext_first[right] = common_bits[right] | rand_R;
	 //	 new_pair.plaintext_first[left]  = common_bits[left];
	 //	 new_pair.plaintext_first[right] = common_bits[right];

	 uint32_t n = 0;
	 while(n < 5) {

		pair_t temp_pair = new_pair;

		//		printf("[%s:%d] temp pair %8X %8X\n", __FILE__, __LINE__, temp_pair.plaintext_first[left], temp_pair.plaintext_first[right]);
#if 1
		for(uint32_t i = 0; i < WORD_SIZE; i++) {
		  uint32_t L = (common_bits[left] >> i) & 1;
		  if(L == 0) {
			 uint32_t r = random32() % 2; 
			 uint32_t b = (temp_pair.plaintext_first[left] >> i) & 1;
			 if(r != b) 
			 {
				temp_pair.plaintext_first[left] ^= (1U << i);
				//				printf(" %2d ", i);
			 }
		  }
		  uint32_t R = (common_bits[right] >> i) & 1;
		  if(R == 0) {
			 uint32_t r = random32() % 2; 
			 uint32_t b = (temp_pair.plaintext_first[right] >> i) & 1;
			 if(r != b) 
			 {
				temp_pair.plaintext_first[right] ^= (1U << i);
			 }
		  }
		}
		//		printf("\n");
#endif

		temp_pair.plaintext_second[left] = temp_pair.plaintext_first[left] ^ *dx_vec_iter;
		temp_pair.plaintext_second[right] = temp_pair.plaintext_first[right] ^ *dx_vec_iter;

		printf("[%s:%d] new pair %8X %8X\n", __FILE__, __LINE__, temp_pair.plaintext_first[left], temp_pair.plaintext_first[right]);

		bool b_good = rc5_pair_is_good(S, NROUNDS, temp_pair);
		if(!b_good) {
		  printf("[%s:%d] WARNING! Pairs is not good! Continuing...\n", __FILE__, __LINE__);
		} else {
		  printf("[%s:%d] GOOD! Pair is good! Continuing...\n", __FILE__, __LINE__);
		}
		n++;
	 }
#endif
  }

  printf("[%s:%d] Out of %s()\n", __FILE__, __LINE__, __FUNCTION__);
}


/* --- */

		pair_t cp_pair = *vec_iter;
		WORD x1_R = cp_pair.plaintext_first[right];
		//		printf("[%s:%d] before x1_R %8X\n", __FILE__, __LINE__, x1_R);
		x1_R = x1_R & mask;
		//		printf("[%s:%d] after1 x1_R %8X\n", __FILE__, __LINE__, x1_R);
		x1_R = x1_R >> i;
		//		printf("[%s:%d] after2 x1_R %8X\n", __FILE__, __LINE__, x1_R);
		assert(x1_R < (1U << RC5_SLIDE_WIN_LEN));

/* --- */

void test_rc5_construct_good_pairs()
{
  uint32_t k = random32() & MASK;
  //  uint32_t dy = random32() & MASK;
  //  uint32_t dx = dy;//(1U << (WORD_SIZE - 1));
  for(uint32_t dy = 0; dy < ALL_WORDS; dy++) {
	 for(uint32_t dx = 0; dx < ALL_WORDS; dx++) {
		printf("[%s:%d] --- dy %X | dx %X ---\n", __FILE__, __LINE__, dy, dx);
		for(uint32_t x = 0; x < ALL_WORDS; x++) {
		  uint32_t xx = dx ^ x;
		  uint32_t y = ADD(x, k);
		  uint32_t yy = ADD(xx, k);
		  if((y ^ yy) == dy) {
			 printf("[%s:%d] %X %X %X | %X\n", __FILE__, __LINE__, dx, x, xx, dy);
		  }
		}
	 }
  }
}


/* --- */

		// {--- comment 20140929 ---
		//		cp_pair.plaintext_first[left] = (A_left ^ d_j) & MASK;
		//		cp_pair.plaintext_second[left] = (A_left ^ dd_j) & MASK;
		//		cp_pair.plaintext_second[right] = (A_right ^ dd_j) & MASK;

/* --- */
/* 
Cases to check

./rc5-filtered-pairs.txt.1412016008692762:2:87A30752 122D05D6 87A30F52 122D0DD6 CBEC496E A9B0E8B8 CBEC48AE A3E8D8B8 1
	[./src/rc5-dc.cc:4668] Good #         0 ( 6581000, A9B0E8B8, A3E8D8B8, 14,  1, 1)
	[./src/rc5-dc.cc:4668] Good #         1 (80000238, 89AFB53B,  9AFB303, 11, 31, 1)
	[./src/rc5-dc.cc:4668] Good #         2 ( 17C0020, 68C3DD60, 69F7DD40, 12,  0, 1)


./rc5-filtered-pairs.txt.1412015817294290:4:9EAE7B22 CD5CC565 9EAA7B22 CD58C565 76ADF570 889EFE7A D6CFF570 889F9ED0 1
	[./src/rc5-dc.cc:4668] Good #         1 (    A05A, 889EFE7A, 889F9ED0, 16, 10, 1)

	[./src/rc5-dc.cc:4668] Good #         0 (  DF8400, B46FA567, B42C2967, 31,  0, 1)

	[./src/rc5-dc.cc:4668] Good #         1 (8000F001, 78174D9A, F817FD9B,  5, 13, 1)
	[./src/rc5-dc.cc:4668] Good #         2 (  100080, EFFA3614, EFEA3694, 13, 13, 1)
	[./src/rc5-dc.cc:4668] Good #         3 (23800003, 7FF9D22E, 9F79D22D, 10,  1, 1)

 */

/* --- */

	 // {-----
#if 0 // FIX <----
	 //    pair_t cp_pair = {{0,0}, {0,0}, {0,0}, {0,0}};
	 //    cp_pair.plaintext_first[0] = random32() & MASK;
	 //    cp_pair.plaintext_first[1] = random32() & MASK;
	 //    cp_pair.plaintext_first[0] = cp_pair_j.plaintext_first[0];
	 //    cp_pair.plaintext_first[1] = cp_pair_j.plaintext_first[1];
	 WORD A = random32() & MASK;
    for(uint32_t o = 0; o < RC5_STRUCTURES_NTEXTS; o++) { // delta_j : 2^k
	   assert(RC5_STRUCTURES_NBITS <= (WORD_SIZE - RC5_LOG2W));

		//	   WORD diff = (j << (WORD_SIZE - RC5_STRUCTURES_NBITS)); // <---
	   WORD diff = (o << (WORD_SIZE - RC5_STRUCTURES_NBITS));

		//      cp_pair.plaintext_second[0] = (cp_pair.plaintext_first[0] ^ diff) & MASK;
		//	   cp_pair.plaintext_second[1] = (cp_pair.plaintext_first[1] ^ diff) & MASK;
#endif
		// ---}


/* --- */

	 //	 ocounter++;
	 //	 printf("[%s:%d] Increment ocounter %d 2^%4.2f\n", __FILE__, __LINE__, ocounter, log2(ocounter));

#if 0 // DEBUG
	 if((cp_pair.plaintext_first[left] == cp_pair_j.plaintext_first[left]) &&
		 (cp_pair.plaintext_first[right] == cp_pair_j.plaintext_first[right]) &&
		 (cp_pair.plaintext_second[left] == cp_pair_j.plaintext_second[left]) &&
		 (cp_pair.plaintext_second[right] == cp_pair_j.plaintext_second[right])) {
		//		assert(0 == 1);
	 }
#endif


/* --- */

#if 0
	 if(pairs_pool_vec->size() >= 3) {
		oracle_size = (1U << 23);
	 }
	 if(pairs_pool_vec->size() == 2) {
		oracle_size = (1U << 24);
	 }
	 if(pairs_pool_vec->size() == 1) {
		oracle_size = (1U << 25);
	 }
#endif

/* --- */

/* --- */
#if 1 // DEBUG
  printf("[%s:%d]\n", __FILE__, __LINE__);
  for(uint32_t i = 0; i < arr_len; i++) {
	 printf("%4.2f ", log2(prob_arr[i]));
  }
  printf("\n");
  printf("\n -------- START CHECK -----\n");
  for(uint32_t i = 0; i < arr_len; i++) {
	 printf("[%s:%d] i = %2d | %f 2^%f\n", __FILE__, __LINE__, i, prob_arr[i], log2(prob_arr[i]));
	 if(prob_arr[i] == 0.0) {
		printf("[%s:%d] prob_arr[%d] is ZERO!\n", __FILE__, __LINE__, i);
	 }
	 assert(prob_arr[i] != 0.0);
  }
  printf("\n -------- END CHECK -----\n");
#endif // #if 1 // DEBUG


/* --- */
#if 1 // DEBUG
  for(uint32_t i = 0; i < nall_diffs; i++) {
	 printf("%2X ", D[i]);
	 print_binary(D[i], word_size);
	 printf("\n");
  }
  printf("\n");
#endif // #if 1 // DEBUG



/* --- */

void test_rc5_structures()
{
  uint32_t word_size = 8;
  //  uint32_t mask = (0xffffffff >> (32 - word_size));
  uint32_t k = 4;//8;
  WORD e[8] = {0};

  assert(k <= word_size);

  for(uint32_t i = 0; i < k; i++) {
	 e[i] = (1U << i);
  }
#if 1 // DEBUG
  for(uint32_t i = 0; i < k; i++) {
	 printf("%2X\n", e[i]);
  }
#endif // #if 1 // DEBUG

  uint32_t nall_diffs = (1U << k);
  WORD D[(1U << 4)] = {0};

  for(uint32_t i = 0; i < nall_diffs; i++) {
	 D[i] = 0;
	 uint32_t diff = 0;
	 for(uint32_t j = 0; j < word_size; j++) {
		if((i >> j) & 1) {
		  diff ^= e[j];
		}
	 }
	 D[i] = diff;
  }

#if 0 // DEBUG
  for(uint32_t i = 0; i < k; i++) {
	 printf("cnt[%d] %d\n", i, cnt[i]);
  }
#endif // #if 1 // DEBUG

}

/* --- */
#if RC5_FILTER_USE_STRUCTURES
  pair_t cp_pair = {{0,0}, {0,0}, {0,0}, {0,0}};
  cp_pair.plaintext_first[0] = random32() & MASK;
  cp_pair.plaintext_first[1] = random32() & MASK;
  for(uint32_t j = 0; j < RC5_STRUCTURES_NTEXTS; j++) { // delta_j : 2^k
	 //    cp_pair.plaintext_second[0] = (cp_pair.plaintext_first[0] ^ dx[0]) & MASK;
	 //	 cp_pair.plaintext_second[1] = (cp_pair.plaintext_first[1] ^ dx[1]) & MASK;
	 assert(RC5_STRUCTURES_NBITS <= (WORD_SIZE - RC5_LOG2W));
	 WORD diff = (j << (WORD_SIZE - RC5_STRUCTURES_NBITS));
#if 0 // DEBUG
	 printf("\r[%s:%d] [%10d / %10lld] %8X ", __FILE__, __LINE__, j, RC5_NTEXTS, diff);
	 fflush(stdout);
#endif // #if 0 // DEBUG
    cp_pair.plaintext_second[0] = (cp_pair.plaintext_first[0] ^ diff) & MASK;
	 cp_pair.plaintext_second[1] = (cp_pair.plaintext_first[1] ^ diff) & MASK;

#else // #if RC5_FILTER_USE_STRUCTURES

/* --- */

  WORD A = random32() & mask;
  //  WORD cnt[(1U << 4)] = {0};

  for(uint32_t i = 0; i < word_size; i++) {
	 //	 WORD S_i[(1U << 4)] = {0};
	 std::vector<WORD> S_i;
	 //	 cnt[j] = 0;
	 WORD x = A ^ e[i];
	 for(uint32_t j = 0; j < nall_diffs; j++) {
		//	 printf("D[%4d] %8X\n", i, D[i]);
		if((D[j] >> i) & 1) {
		//		if((D[j] & e[i]) != 0) {
		//		  cnt[j]++;
		  WORD xx = D[j];
		  S_i.push_back(xx);
		}
	 }
	 std::vector<WORD>::iterator iter = S_i.begin();
	 uint32_t k = 0;
	 while(iter != S_i.end()) {
		WORD xx = *iter;
		printf("%2d (%8X %8X) %8X %8X\n", k, x, xx, (x ^ xx), e[i]);
		iter++;
		k++;
	 }
	 printf("\n");
  }

/* --- */

/* 
59 + 10 + 20 + 56 + 3 + 20 + 4 + 283 + 18 + 11 + 6 + 6 + 20 + 2 + 10 + 12 + 40 + 4 + 35 + 18 + 15 + 9 + 25 + 23 + 35 + 2 + 83 + 11 + 5 + 3 + 58 + 40 + 8 + 22 + 17 + 109 + 11 + 10 + 33 + 5 + 16 + 18 + 24
1219

1219 / 45
27

1219/50
24
 */

/* --- */
/* 
   line # 
   Good pairs among filtered:
 */


/* --- */

		  //		  printf("[%s:%d] Before cnt %d\n", __FILE__, __LINE__, cnt);
		  //#if 1 // DEBUG
		  //		  //		  printf("[%s:%d] cnt %d (j %2d / %2d) | Add to new (%8X %8X %lld)\n", __FILE__, __LINE__, cnt, j, R[i].size(), y, yy, (long long int)params.nvariants);
		  //#endif // #if 1 // DEBUG
		  //		  j++;
		  //		  cnt++;
		  //		  printf("[%s:%d]  After cnt %d\n", __FILE__, __LINE__, cnt);


/* --- */

		  j = 0;
		  uint32_t R_len = R[i].size();
		  while(j < R_len) {

				params = R[i][j];
				if((params.y == y) && (params.yy == yy)) {

				}

				j++;
		  }

		}


/* --- */

/* 
R[ 0] 0 |
R[ 1] 15 | (8BECAF9A, 8B23409A,  1, 25 |    3648) (8BECAF9A, 8B23409A,  1, 23 |    1690) (8BECAF9A, 8B23409A,  1, 24 |     800) (3D79BE3E, FB6E3E3D,  1, 11 |     212) (8BECAF9A, 8B23409A,  1, 26 |     200) (8BECAF9A, 8B23409A,  1, 27 |     133) (3D79BE3E, FB6E3E3D,  1, 12 |      48) (3D79BE3E, FB6E3E3D,  1, 10 |      40) (8BECAF9A, 8B23409A,  1, 21 |      18) (3D79BE3E, FB6E3E3D,  1, 13 |       8) (3D79BE3E, FB6E3E3D,  1,  7 |       8) (3D79BE3E, FB6E3E3D,  1,  4 |       4) (3D79BE3E, FB6E3E3D,  1, 17 |       2) (8BECAF9A, 8B23409A,  1, 28 |       1) (8BECAF9A, 8B23409A,  1, 18 |       1)
R[ 2] 0 |
R[ 3] 1 | (E90BCDD4, E844CAD3,  3, 29 |       8)
R[ 4] 55 | (41791F53, 41B8EF53,  4, 23 |    8565) (41791F53, 41B8EF53,  4, 21 |    7433) (41791F53, 41B8EF53,  4, 22 |    7101) (EB61AFC1, AB61AFBF,  4, 30 |    5813) (41791F53, 41B8EF53,  4, 24 |    5260) (EB61AFC1, AB61AFBF,  4,  0 |    5074) (EB61AFC1, AB61AFBF,  4, 31 |    4776) (EB61AFC1, AB61AFBF,  4, 28 |    3612) (41791F53, 41B8EF53,  4, 26 |    3453) (EB61AFC1, AB61AFBF,  4,  1 |    3154) (41791F53, 41B8EF53,  4, 20 |    3114) (41791F53, 41B8EF53,  4, 30 |    2390) (41791F53, 41B8EF53,  4, 25 |    2368) (41791F53, 41B8EF53,  4, 31 |    2192) (41791F53, 41B8EF53,  4, 19 |    1670) (EB61AFC1, AB61AFBF,  4,  2 |    1365) (EB61AFC1, AB61AFBF,  4, 13 |    1358) (EB61AFC1, AB61AFBF,  4, 16 |    1328) (EB61AFC1, AB61AFBF,  4, 17 |    1324) (EB61AFC1, AB61AFBF,  4, 15 |    1316) (EB61AFC1, AB61AFBF,  4, 18 |    1314) (EB61AFC1, AB61AFBF,  4, 19 |    1289) (41791F53, 41B8EF53,  4, 29 |    1276) (41791F53, 41B8EF53,  4, 18 |    1259) (EB61AFC1, AB61AFBF,  4, 14 |    1254) (41791F53, 41B8EF53,  4, 27 |    1231) (EB61AFC1, AB61AFBF,  4, 20 |    1226) (EB61AFC1, AB61AFBF,  4,  7 |    1218) (41791F53, 41B8EF53,  4, 28 |    1191) (EB61AFC1, AB61AFBF,  4,  8 |    1183) (41791F53, 41B8EF53,  4, 17 |    1180) (EB61AFC1, AB61AFBF,  4,  6 |    1133) (EB61AFC1, AB61AFBF,  4, 29 |    1113) (41791F53, 41B8EF53,  4,  0 |    1067) (EB61AFC1, AB61AFBF,  4,  5 |    1043) (41791F53, 41B8EF53,  4, 11 |    1042) (EB61AFC1, AB61AFBF,  4, 21 |    1018) (41791F53, 41B8EF53,  4, 12 |     995) (EB61AFC1, AB61AFBF,  4,  9 |     991) (41791F53, 41B8EF53,  4, 16 |     976) (EB61AFC1, AB61AFBF,  4,  4 |     969) (EB61AFC1, AB61AFBF,  4,  3 |     956) (41791F53, 41B8EF53,  4,  1 |     949) (41791F53, 41B8EF53,  4,  2 |     887) (41791F53, 41B8EF53,  4, 13 |     880) (EB61AFC1, AB61AFBF,  4, 12 |     868) (41791F53, 41B8EF53,  4, 14 |     857) (41791F53, 41B8EF53,  4, 15 |     856) (EB61AFC1, AB61AFBF,  4, 10 |     851) (EB61AFC1, AB61AFBF,  4, 11 |     821) (41791F53, 41B8EF53,  4, 10 |     808) (41791F53, 41B8EF53,  4,  3 |     641) (EB61AFC1, AB61AFBF,  4, 27 |      15) (41791F53, 41B8EF53,  4,  9 |      14) (6DCF57BF, A9D0B3BD,  4, 15 |       2)
R[ 5] 0 |
R[ 6] 0 |
R[ 7] 26 | (87FD7A85,  7FD7A83,  7, 30 |    9011) (87FD7A85,  7FD7A83,  7,  0 |    8334) (87FD7A85,  7FD7A83,  7, 31 |    7441) (87FD7A85,  7FD7A83,  7, 28 |    5550) (87FD7A85,  7FD7A83,  7,  1 |    3662) (87FD7A85,  7FD7A83,  7, 29 |    3373) (87FD7A85,  7FD7A83,  7, 27 |    2056) (87FD7A85,  7FD7A83,  7, 13 |    1630) (87FD7A85,  7FD7A83,  7, 14 |    1302) (87FD7A85,  7FD7A83,  7, 15 |    1281) (87FD7A85,  7FD7A83,  7, 16 |    1271) (87FD7A85,  7FD7A83,  7, 17 |    1246) (87FD7A85,  7FD7A83,  7, 18 |    1200) (87FD7A85,  7FD7A83,  7, 12 |    1069) (87FD7A85,  7FD7A83,  7,  2 |    1030) (87FD7A85,  7FD7A83,  7, 19 |    1019) (87FD7A85,  7FD7A83,  7,  6 |     914) (87FD7A85,  7FD7A83,  7,  7 |     902) (87FD7A85,  7FD7A83,  7,  3 |     885) (87FD7A85,  7FD7A83,  7,  5 |     882) (87FD7A85,  7FD7A83,  7,  4 |     882) (87FD7A85,  7FD7A83,  7, 11 |     877) (87FD7A85,  7FD7A83,  7, 10 |     806) (87FD7A85,  7FD7A83,  7,  8 |     797) (87FD7A85,  7FD7A83,  7, 26 |     760) (87FD7A85,  7FD7A83,  7,  9 |     757)
R[ 8] 38 | (C0DDDC8F, 69D9DC87,  8,  6 |    5390) (C0DDDC8F, 69D9DC87,  8,  7 |    4154) (C0DDDC8F, 69D9DC87,  8,  8 |    2706) (69D9DC87, C0DDDC8F,  8,  6 |    2695) (69D9DC87, C0DDDC8F,  8,  7 |    2077) (C0DDDC8F, 69D9DC87,  8,  9 |    1870) (69D9DC87, C0DDDC8F,  8,  8 |    1353) (69D9DC87, C0DDDC8F,  8,  9 |     935) (C0DDDC8F, 69D9DC87,  8, 28 |     854) (C0DDDC8F, 69D9DC87,  8, 30 |     620) (69D9DC87, C0DDDC8F,  8, 28 |     427) (C0DDDC8F, 69D9DC87,  8, 10 |     390) (C0DDDC8F, 69D9DC87,  8, 31 |     364) (69D9DC87, C0DDDC8F,  8, 30 |     310) (C0DDDC8F, 69D9DC87,  8, 11 |     220) (69D9DC87, C0DDDC8F,  8, 10 |     195) (C0DDDC8F, 69D9DC87,  8, 29 |     192) (69D9DC87, C0DDDC8F,  8, 31 |     182) (69D9DC87, C0DDDC8F,  8, 11 |     110) (69D9DC87, C0DDDC8F,  8, 29 |      96) (C0DDDC8F, 69D9DC87,  8,  2 |      46) (69D9DC87, C0DDDC8F,  8,  2 |      23) (C0DDDC8F, 69D9DC87,  8, 20 |      10) (C0DDDC8F, 69D9DC87,  8,  3 |       6) (C0DDDC8F, 69D9DC87,  8,  0 |       6) (C0DDDC8F, 69D9DC87,  8,  5 |       6) (69D9DC87, C0DDDC8F,  8, 20 |       5) (C0DDDC8F, 69D9DC87,  8, 22 |       4) (C0DDDC8F, 69D9DC87,  8,  4 |       4) (C0DDDC8F, 69D9DC87,  8, 21 |       4) (C0DDDC8F, 69D9DC87,  8,  1 |       4) (69D9DC87, C0DDDC8F,  8,  3 |       3) (69D9DC87, C0DDDC8F,  8,  0 |       3) (69D9DC87, C0DDDC8F,  8,  5 |       3) (69D9DC87, C0DDDC8F,  8,  4 |       2) (69D9DC87, C0DDDC8F,  8, 21 |       2) (69D9DC87, C0DDDC8F,  8,  1 |       2) (69D9DC87, C0DDDC8F,  8, 22 |       2)
R[ 9] 0 |
R[10] 0 |
R[11] 2 | (38840FC0, 988014FF, 11, 10 |      64) (38840FC0, 988014FF, 11, 31 |       1)
R[12] 0 |
R[13] 0 |
R[14] 0 |
R[15] 19 | (C18AB13A, D1BAD13A, 15, 31 |   57021) (C18AB13A, D1BAD13A, 15, 23 |   22087) (C18AB13A, D1BAD13A, 15,  7 |   19151) (C18AB13A, D1BAD13A, 15, 24 |   16233) (C18AB13A, D1BAD13A, 15,  0 |   14224) (C18AB13A, D1BAD13A, 15, 16 |    7133) (C18AB13A, D1BAD13A, 15,  8 |    5689) (C18AB13A, D1BAD13A, 15, 15 |    4307) (C18AB13A, D1BAD13A, 15, 25 |    3867) (C18AB13A, D1BAD13A, 15,  6 |    2314) ( 507BFAD, 1C87830D, 15,  4 |     724) (C18AB13A, D1BAD13A, 15, 14 |     625) (C18AB13A, D1BAD13A, 15, 17 |     289) ( 507BFAD, 1C87830D, 15,  5 |     159) (C18AB13A, D1BAD13A, 15, 22 |      82) (C18AB13A, D1BAD13A, 15, 19 |      12) (C18AB13A, D1BAD13A, 15, 21 |      12) (C18AB13A, D1BAD13A, 15, 20 |       8) (C18AB13A, D1BAD13A, 15, 18 |       8)
R[16] 7 | (8CDE2099, 929E11A9, 16, 29 |     548) (8CDE2099, 929E11A9, 16, 10 |     195) (8CDE2099, 929E11A9, 16, 28 |      78) (8CDE2099, 929E11A9, 16, 11 |      59) (8CDE2099, 929E11A9, 16, 24 |      35) (8CDE2099, 929E11A9, 16, 27 |      11) (8CDE2099, 929E11A9, 16, 30 |       1)
R[17] 2 | (1588A8B0, 958806A2, 17, 30 |       1) (1588A8B0, 958806A2, 17,  4 |       1)
R[18] 1 | (F3B4BF81, 47B4C13C, 18,  2 |       1)
R[19] 0 |
R[20] 0 |
R[21] 0 |
R[22] 0 |
R[23] 39 | (D7537539,  7517537, 23, 29 |   12277) (D7537539,  7517537, 23, 16 |   10439) (69981A3E, 69741A92, 23, 31 |    9565) (D7537539,  7517537, 23, 30 |    7520) (D7537539,  7517537, 23, 18 |    5459) (69981A3E, 69741A92, 23, 30 |    4052) (69981A3E, 69741A92, 23, 15 |    1993) (D7537539,  7517537, 23, 28 |    1545) (69981A3E, 69741A92, 23, 16 |    1545) (D7537539,  7517537, 23, 17 |    1515) (D7537539,  7517537, 23, 19 |    1157) (69981A3E, 69741A92, 23, 14 |    1090) (69981A3E, 69741A92, 23,  0 |     832) (D7537539,  7517537, 23, 31 |     813) (69981A3E, 69741A92, 23,  1 |     423) (D7537539,  7517537, 23, 15 |     416) (69981A3E, 69741A92, 23, 29 |     273) (D7537539,  7517537, 23, 20 |     222) (69981A3E, 69741A92, 23, 28 |     196) (D7537539,  7517537, 23,  8 |     183) (589305B9, 98C305E1, 23, 30 |     134) (589305B9, 98C305E1, 23, 31 |      99) (69981A3E, 69741A92, 23, 17 |      94) (D7537539,  7517537, 23, 10 |      27) (D7537539,  7517537, 23,  9 |      25) (69981A3E, 69741A92, 23,  2 |      16) (D7537539,  7517537, 23, 21 |      14) (589305B9, 98C305E1, 23,  1 |      14) (589305B9, 98C305E1, 23,  0 |      10) (D7537539,  7517537, 23,  0 |       7) (69981A3E, 69741A92, 23, 18 |       3) (69981A3E, 69741A92, 23, 22 |       3) (69981A3E, 69741A92, 23,  6 |       3) (69981A3E, 69741A92, 23,  5 |       2) (69981A3E, 69741A92, 23, 21 |       2) (69981A3E, 69741A92, 23,  4 |       1) (69981A3E, 69741A92, 23, 20 |       1) (69981A3E, 69741A92, 23, 19 |       1) (69981A3E, 69741A92, 23,  3 |       1)
R[24] 10 | (C7D255F6, C7D93582, 24,  4 |    1112) (C7D255F6, C7D93582, 24,  0 |     498) (C7D255F6, C7D93582, 24, 27 |     486) (C7D255F6, C7D93582, 24, 28 |      86) (C7D255F6, C7D93582, 24, 25 |      68) (C7D255F6, C7D93582, 24, 31 |      31) (C7D255F6, C7D93582, 24,  1 |       9) (C7D255F6, C7D93582, 24, 26 |       4) (C7D255F6, C7D93582, 24,  5 |       3) (C7D255F6, C7D93582, 24,  2 |       1)
R[25] 0 |
R[26] 9 | (428E4CEB, 42646C54, 26, 13 |    1960) (428E4CEB, 42646C54, 26, 14 |     770) (8118BD6A, 810B9D70, 26, 24 |      86) (8118BD6A, 810B9D70, 26, 23 |      36) (8118BD6A, 810B9D70, 26, 29 |      31) (3FBA26AB, 3CB22691, 26, 12 |      12) (3FBA26AB, 3CB22691, 26, 13 |       8) (3FBA26AB, 3CB22691, 26, 30 |       4) (428E4CEB, 42646C54, 26, 11 |       2)
R[27] 0 |
R[28] 7 | ( AF04B07,  F2F4B23, 28, 30 |     335) ( AF04B07,  F2F4B23, 28, 31 |     126) ( AF04B07,  F2F4B23, 28, 10 |     121) ( AF04B07,  F2F4B23, 28, 19 |      53) ( AF04B07,  F2F4B23, 28, 18 |      21) ( AF04B07,  F2F4B23, 28, 11 |       1) ( AF04B07,  F2F4B23, 28,  9 |       1)
R[29] 19 | (5E3BB440, 5E3CE5E8, 29, 22 |    8260) (5E3BB440, 5E3CE5E8, 29, 23 |    2633) (5E3BB440, 5E3CE5E8, 29, 28 |    2608) (5E3BB440, 5E3CE5E8, 29, 30 |    1414) (5E3BB440, 5E3CE5E8, 29, 24 |    1252) (5E3BB440, 5E3CE5E8, 29, 21 |     947) (5E3BB440, 5E3CE5E8, 29, 31 |     833) (5E3BB440, 5E3CE5E8, 29, 25 |     617) (5E3BB440, 5E3CE5E8, 29, 20 |     368) (5E3BB440, 5E3CE5E8, 29, 19 |     227) (5E3BB440, 5E3CE5E8, 29, 29 |      47) (5E3BB440, 5E3CE5E8, 29, 26 |      39) (5E3BB440, 5E3CE5E8, 29,  0 |      33) (5E3BB440, 5E3CE5E8, 29, 27 |       6) (5E3BB440, 5E3CE5E8, 29, 16 |       3) (5E3BB440, 5E3CE5E8, 29, 10 |       2) (5E3BB440, 5E3CE5E8, 29, 18 |       1) (5E3BB440, 5E3CE5E8, 29, 17 |       1) (5E3BB440, 5E3CE5E8, 29,  1 |       1)
R[30] 0 |
R[31] 0 |
[./src/rc5-dc.cc:1896] Enter rc5_last_round_rot_const_keyrec()
[./src/rc5-dc.cc:1899] R sizes [0 : 31] =   0  15   0   1  55   0   0  26  38   0   0   2   0   0   0  19   7   2   1   0   0   0   0  39  10   0   9   0   7  19   0   0
[./src/rc5-dc.cc:1945] R[28] size 7
[
 */

/* --- */
	 uint32_t oracle_size = (1U << (RC5_LOG2W + RC5_LOG2W));
	 for(uint32_t o = 0; o < oracle_size; o++) {


	 pair_t cp_pair = cp_pair_j;
	 uint32_t x_left  = o & RC5_ROT_MASK; // 5 LSB
	 uint32_t x_right = (o >> RC5_LOG2W) & RC5_ROT_MASK; // 5 MSB

	 printf("[%s:%d] Oracle %8X %8X\n", __FILE__, __LINE__, x_left, x_right);

	 // replace the low 5 bits by the oracle
    uint32_t zero_lsb = 0xffffffff << RC5_LOG2W;

    cp_pair.plaintext_first[left]  = (cp_pair.plaintext_first[left] & zero_lsb) | x_left;
    cp_pair.plaintext_first[right] = (cp_pair.plaintext_first[right] & zero_lsb) | x_right;

    cp_pair.plaintext_second[left]  = (cp_pair.plaintext_second[left] & zero_lsb) | x_left;
    cp_pair.plaintext_second[right] = (cp_pair.plaintext_second[right] & zero_lsb) | x_right;

#if 0 // DEBUG
	 if((cp_pair.plaintext_first[left] == cp_pair_j.plaintext_first[left]) &&
		 (cp_pair.plaintext_first[right] == cp_pair_j.plaintext_first[right]) &&
		 (cp_pair.plaintext_second[left] == cp_pair_j.plaintext_second[left]) &&
		 (cp_pair.plaintext_second[right] == cp_pair_j.plaintext_second[right])) {
		//		assert(0 == 1);
	 }
#endif


/* --- */

/* 

E6 60 0 1B AA F6 EF 1E 8B F4 16 82 92 88 91 8E 
D3A69B4A D75E7FBC 53A69B4A 575E7FBC 2B4D8E2E 9BEC8E6E 1B4D20CE 7164866E 1
9E949CDE 7450D436 1E949CDE F450D436 1C9E2EA8 39929D67 1C7404A8 13999D67 1

sscanf(line, "%X %X %X %X %X %X %X %X %d\n", 
&x1_L, &x1_R, &x2_L, &x2_R, &y1_L, &y1_R, &y2_L, &y2_R, &is_good);

 */
/* 
dpt: 80000000 80000000

pt1: D3A69B4A D75E7FBC 
pt2: 53A69B4A 575E7FBC 

ct1: 2B4D8E2E 9BEC8E6E 
ct2: 1B4D20CE 7164866E

 */

/* --- */


/* 

	[./src/rc5-dc.cc:1739] R[ 5] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[10] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[15] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[20] #( 0/ 1) mask       1F key    9031E
	[./src/rc5-dc.cc:1739] R[25] #( 0/ 1) mask       1F key  179031E
	[./src/rc5-dc.cc:1739] R[30] #( 0/ 1) mask        3 key 3B79031E
	[./src/rc5-dc.cc:1713] Found key 7B79031E
	[./src/rc5-dc.cc:1912] WHILE OUT  4 >  0

	[./src/rc5-dc.cc:1739] R[ 4] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[ 9] #( 0/ 1) mask       1F key      11E
	[./src/rc5-dc.cc:1739] R[14] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[19] #( 0/ 1) mask       1F key    1031E
	[./src/rc5-dc.cc:1739] R[24] #( 0/ 1) mask       1F key   79031E
	[./src/rc5-dc.cc:1739] R[29] #( 0/ 1) mask        7 key 1B79031E
	[./src/rc5-dc.cc:1713] Found key 7B79031E
	[./src/rc5-dc.cc:1912] WHILE OUT  3 >  0

	[./src/rc5-dc.cc:1739] R[ 3] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[ 8] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[13] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[18] #( 0/ 1) mask       1F key    1031E
	[./src/rc5-dc.cc:1739] R[23] #( 0/ 1) mask       1F key   79031E
	[./src/rc5-dc.cc:1739] R[28] #( 0/ 1) mask        F key  B79031E
	[./src/rc5-dc.cc:1713] Found key 7B79031E
	[./src/rc5-dc.cc:1912] WHILE OUT  2 >  0

	[./src/rc5-dc.cc:1739] R[ 2] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[ 7] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[12] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[17] #( 0/ 1) mask       1F key    1031E
	[./src/rc5-dc.cc:1739] R[22] #( 0/ 1) mask       1F key   39031E
	[./src/rc5-dc.cc:1739] R[27] #( 0/ 1) mask       1F key  379031E
	[./src/rc5-dc.cc:1713] Found key 7B79031E
	[./src/rc5-dc.cc:1912] WHILE OUT  1 >  0

	[./src/rc5-dc.cc:1739] R[ 1] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[ 6] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[11] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[16] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[21] #( 0/ 1) mask       1F key   19031E
	[./src/rc5-dc.cc:1739] R[26] #( 0/ 1) mask       1F key  379031E
	[./src/rc5-dc.cc:1739] R[31] #( 0/ 1) mask        1 key 7B79031E
	[./src/rc5-dc.cc:1713] Found key 7B79031E

	[./src/rc5-dc.cc:1942] Correct key cnt = 5 / 1
	[./src/rc5-dc.cc:177] Key vec size 1 (2^0.000000)

[    0] 7B79031E 5 (2^2.321928)  <-
[./src/rc5-dc.cc:1954] #Key candidates 1 (2^0.000000)
[./src/rc5-dc.cc:1966] FOUND: Correct key S[17] 7B79031E appears 5 (2^2.321928) times among  1 (2^0.000000) candidates

real    0m15.604s
user    0m15.561s
sys     0m0.012s


 */


/* --- */
/* 
	[./src/rc5-dc.cc:1868] R[ 0] size 1
	[./src/rc5-dc.cc:1878] Init LSB R[ 0] #( 0/ 1) (AF0C8937 AEA6BF37  0 25)
	[./src/rc5-dc.cc:1912] WHILE OUT  5 >  0
	[./src/rc5-dc.cc:1739] R[ 5] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[10] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[15] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[20] #( 0/ 1) mask       1F key    9031E
	[./src/rc5-dc.cc:1739] R[25] #( 0/ 1) mask       1F key  179031E
	[./src/rc5-dc.cc:1793] new_i 30 | 2 < 5
	[./src/rc5-dc.cc:1794] Shorten mask 3
	[./src/rc5-dc.cc:1739] R[30] #( 0/ 1) mask        3 key 3B79031E
	[./src/rc5-dc.cc:1713] Found key 7B79031E
	[./src/rc5-dc.cc:1912] WHILE OUT  4 >  0
	[./src/rc5-dc.cc:1739] R[ 4] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[ 9] #( 0/ 1) mask       1F key      11E
	[./src/rc5-dc.cc:1739] R[14] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[19] #( 0/ 1) mask       1F key    1031E
	[./src/rc5-dc.cc:1739] R[24] #( 0/ 1) mask       1F key   79031E
	[./src/rc5-dc.cc:1793] new_i 29 | 3 < 5
	[./src/rc5-dc.cc:1794] Shorten mask 7
	[./src/rc5-dc.cc:1739] R[29] #( 0/ 1) mask        7 key 1B79031E
	[./src/rc5-dc.cc:1713] Found key 7B79031E
	[./src/rc5-dc.cc:1912] WHILE OUT  3 >  0
	[./src/rc5-dc.cc:1739] R[ 3] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[ 8] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[13] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[18] #( 0/ 1) mask       1F key    1031E
	[./src/rc5-dc.cc:1739] R[23] #( 0/ 1) mask       1F key   79031E
	[./src/rc5-dc.cc:1793] new_i 28 | 4 < 5
	[./src/rc5-dc.cc:1794] Shorten mask F
	[./src/rc5-dc.cc:1739] R[28] #( 0/ 1) mask        F key  B79031E
	[./src/rc5-dc.cc:1713] Found key 7B79031E
	[./src/rc5-dc.cc:1912] WHILE OUT  2 >  0
	[./src/rc5-dc.cc:1739] R[ 2] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[ 7] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[12] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[17] #( 0/ 1) mask       1F key    1031E
	[./src/rc5-dc.cc:1739] R[22] #( 0/ 1) mask       1F key   39031E
	[./src/rc5-dc.cc:1739] R[27] #( 0/ 1) mask       1F key  379031E
	[./src/rc5-dc.cc:1713] Found key 7B79031E
	[./src/rc5-dc.cc:1912] WHILE OUT  1 >  0
	[./src/rc5-dc.cc:1739] R[ 1] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[ 6] #( 0/ 1) mask       1F key       1E
	[./src/rc5-dc.cc:1739] R[11] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[16] #( 0/ 1) mask       1F key      31E
	[./src/rc5-dc.cc:1739] R[21] #( 0/ 1) mask       1F key   19031E
	[./src/rc5-dc.cc:1739] R[26] #( 0/ 1) mask       1F key  379031E
	[./src/rc5-dc.cc:1793] new_i 31 | 1 < 5
	[./src/rc5-dc.cc:1794] Shorten mask 1
	[./src/rc5-dc.cc:1739] R[31] #( 0/ 1) mask        1 key 7B79031E
	[./src/rc5-dc.cc:1713] Found key 7B79031E
	[./src/rc5-dc.cc:1942] Correct key cnt = 5 / 1
	[./src/rc5-dc.cc:177] Key vec size 1 (2^0.000000)
[    0] 7B79031E 5 (2^2.321928)  <-
[./src/rc5-dc.cc:1954] #Key candidates 1 (2^0.000000)
[./src/rc5-dc.cc:1966] FOUND: Correct key S[17] 7B79031E appears 5 (2^2.321928) times among  1 (2^0.000000) candidates

real    0m15.604s
user    0m15.561s
sys     0m0.012s

 */

/* ---- */
/* 
201400910
 */


/*
 * Recursively recover the bits of the round key using rot constants
 * guessed form the GoUP filter. Called from \ref
 * rc5_last_round_rot_const_keyrec .
 *
 * \param R array of \ref WORD_SIZE vectros - one for each rotation
 *          constant
 * \param key_cand_vec stored the suggested key candidates
 *
 * \see rc5_last_round_rot_const_keyrec
 */
void rc5_last_round_rot_const_keyrec_i(const uint32_t i_in, // rot const
													const uint32_t i_rot_mask_in, // can vary from 1 to RC5_ROT_MASK
													const WORD key_in, // k[i - 1 : 0]
													const WORD key_correct_value, // for DEBUG
													const std::vector<eq_x_params_t> R[WORD_SIZE],
													std::set<rc5_key_t, rc5_compare_key_by_value>* key_set)
//													std::vector<WORD>* key_cand_vec)
{
  const uint32_t i = i_in;
  const WORD key = key_in;
  const uint32_t i_rot_mask = i_rot_mask_in;
  //  printf("[%s:%d] Enter %s() i = %2d\n", __FILE__, __LINE__, __FUNCTION__, i);
  if(i >= WORD_SIZE) {
	 rc5_key_set_update((WORD)key, key_set);
#if 1 // DEBUG
	 if(key == key_correct_value) {
		printf("[%s:%d] Found key %8X\n", __FILE__, __LINE__, key);
	 }
#endif // #if 0 // DEBUG
	 //	 assert(key != key_correct_value);
	 //	 key_cand_vec->push_back(key);
	 return;
  }

  const std::vector<eq_x_params_t> i_R = R[i];
  if(i_R.size() == 0) {
	 printf("[%s:%d] WARNING! No pair with rot const = %d .\n", __FILE__, __LINE__, i);
	 printf("[%s:%d] Returning... \n", __FILE__, __LINE__);
	 return;
  }
  //  assert(i_R.size() != 0);

  std::vector<eq_x_params_t>::const_iterator i_iter = i_R.begin();

  /**
   * Cycle through all entries that have rotation constant i 
   */
  uint32_t R_cnt = 0;
  for(i_iter = i_R.begin(); i_iter != i_R.end(); i_iter++, R_cnt++) {

	 //	 printf("[%s:%d] CHECKPOINT %s()\n", __FILE__, __LINE__, __FUNCTION__);
#if 1 // DEBUG
	 printf("[%s:%d] R[%2d] #(%2d/%2d) mask %8X key %8X \n", __FILE__, __LINE__, i, R_cnt, i_R.size(), i_rot_mask, key);
#endif // #if 0 // DEBUG

	 eq_x_params_t i_params = *i_iter;
	 //	 WORD dx = i_params.dx;
	 WORD y = i_params.y;		  // right ciphertext-1
	 WORD yy = i_params.yy;		  // right cipheretxt-2 (DEBUG)
	 WORD r = i_params.rot_const; // rot const from left ciphertext  (= r7)
	 WORD r_prev = i_params.rot_const_prev; // rot const from previous round (= r6)
	 assert(i == r);
	 /**
     * x[log2(w)+r7-1 : r7] = (r7 ^ r6) <<< r7;
     */
	 WORD x = ((r ^ r_prev) << r); // <---- CHECK
	 assert(((r ^ r_prev) << r) == ((r ^ r_prev) << i));
	 /**
	  * Masks the i LS bits.
	  */
	 uint32_t i_mask = (0xffffffff >> (32 - i));
	 uint32_t c = 0;				  // borrow
	 uint32_t cc = 0;				  // borrow (DEBUG)
    /**
     * Check if the following subtraction generates a borrow:
     * y[i-1 : 0] - k[i-1 : 0]
     */
	 int32_t y_sub_k = (y & i_mask) - (key & i_mask);
	 if(y_sub_k < 0) {
		c = 1;
	 }
	 int32_t yy_sub_k = (yy & i_mask) - (key & i_mask);
	 if(yy_sub_k < 0) {
		cc = 1;
	 }
	 /**
	  * y[log2(w)+i-1 : i], x[log2(w)+i-1 : i]
	  */
	 WORD y_log2w_i = (y >> i) & i_rot_mask;
	 WORD yy_log2w_i = (yy >> i) & i_rot_mask;
	 WORD x_log2w_i = (x >> i) & i_rot_mask;

	 WORD key_log2w_i = (y_log2w_i - x_log2w_i - c) & i_rot_mask;
	 WORD kkey_log2w_i = (yy_log2w_i - x_log2w_i - cc) & i_rot_mask;

	 WORD key_rec = (key_log2w_i << i) | (key & i_mask);
	 WORD kkey_rec = (kkey_log2w_i << i) | (key & i_mask);

	 uint32_t new_i = i + hw32(i_rot_mask); // RC5_LOG2W = HW(RC5_ROT_MASK)
	 uint32_t new_i_rot_mask = RC5_ROT_MASK;//i_rot_mask;

	 if((new_i < WORD_SIZE) && ((WORD_SIZE - new_i) < hw32(RC5_ROT_MASK))) { // if less than log2(w) remain then shorten the mask 
		new_i_rot_mask = 0xffffffff >> (32 - (WORD_SIZE - new_i));
#if 1 // DEBUG
		printf("[%s:%d] new_i %d | %d < %d\n", __FILE__, __LINE__, new_i, (WORD_SIZE - new_i), hw32(RC5_ROT_MASK));
		printf("[%s:%d] Shorten mask %X\n", __FILE__, __LINE__, new_i_rot_mask);
#endif // #if 0 // DEBUG
	 }

	 if(key_rec == kkey_rec) {
		bool b_next = rc5_rot_const_array_get_next_nonzero_index(i , R, &new_i, &new_i_rot_mask);
		//	 if((b_next) && (key_rec == kkey_rec)) {	  // !!! <---
		if(b_next) {	  // !!! <---
		  rc5_last_round_rot_const_keyrec_i(new_i, new_i_rot_mask, key_rec, key_correct_value, R, key_set);
		} else {
#if 0 // DEBUG
		  printf("[%s:%d] b_next is false\n", __FILE__, __LINE__);
#endif // #if 0 // DEBUG
		}
	 } else {
#if 0 // DEBUG
		printf("[%s:%d] key_rec != kkey_rec %8X %8X\n", __FILE__, __LINE__, key_rec, kkey_rec);
#endif // #if 0 // DEBUG
	 }
  }

}

/**
 * From a set of candidate good chosen plaintext/ciphertext
 * (i.e. pairs that passed the filtering process), perform a key
 * recovery procedure of the last round key using knowledge of the
 * rotation constant in the last round and of the rotation constant in
 * the next to last round.
 *
 * The array \p R contains a set of (good or filtered) chosen
 * plaintext/ciphertext pairs subdivided into subsets accoring to the
 * rotation constant in the last round. The latter is determined by
 * the log2(WORD_SIZE) LS bits of the left part of the ciphertext.
 *
 * \p R is an array of vectors \p R. The array has \p WORD_SIZE
 * slots - one for each rotation constant. Note that some of the
 * vectors in \p R may be empty evctros.
 *
 * \param S expanded key (for DEBUG purpouses)
 * \param nrounds number of attacked rounds.
 * \param R array of \ref WORD_SIZE vectros - one for each rotation
 *          constant
 *
 * \return A key candidate was found: true/false.
 * \see rc5_pairs_classify_by_last_round_rot_const
 */
bool rc5_last_round_rot_const_keyrec(const WORD S[RC5_STAB_LEN_T], 
												 const uint32_t nrounds,
												 const std::vector<eq_x_params_t> R[WORD_SIZE])
{
  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);

#if 1	 // DEBUG
  printf("[%s:%d] R sizes [0 : %2d] = ", __FILE__, __LINE__, WORD_SIZE - 1);  
  for(uint32_t i = 0; i < WORD_SIZE; i++) {
	 printf(" %2d ", (uint32_t)R[i].size());
	 //	 printf("R[%2d] size %d\n", i, R[i].size());
  }
  printf("\n");
#endif //#if 0

  const uint32_t i = 0;
  const std::vector<eq_x_params_t> i_R = R[i];
  if(i_R.size() == 0) {
	 printf("[%s:%d] No pair with rot const = %d . Returning...\n", __FILE__, __LINE__, i);
	 return false;
  }

  WORD key_correct_value = S[2 + (2*nrounds) - 1];
  std::set<rc5_key_t, rc5_compare_key_by_value> key_set;
  printf("[%s:%d] R[%2d] size %d\n", __FILE__, __LINE__, i, (uint32_t)i_R.size());
  std::vector<eq_x_params_t>::const_iterator i_iter = i_R.begin();
  uint32_t R_cnt = 0;
  for(i_iter = i_R.begin(); i_iter != i_R.end(); i_iter++, R_cnt++) {
	 eq_x_params_t i_params = *i_iter;
	 WORD y = i_params.y;		  // R_n
	 WORD yy = i_params.yy;		  // RR_n
	 WORD r = i_params.rot_const; // r_n = L_n[log2(w) - 1 : 0]
	 WORD r_prev = i_params.rot_const_prev; // r_{n-1} = L_{n-1}[log2(w) - 1 : 0]
#if 1 // DEBUG
	 printf("[%s:%d] Init LSB R[%2d] #(%2d/%2d) (%8X %8X %2d %2d)\n", __FILE__, __LINE__, i, R_cnt, i_R.size(), y, yy, r, r_prev);
#endif // #if 0 // DEBUG
	 assert(i == r);
	 assert(i == 0);
	 WORD x = (r ^ r_prev); // x[log2(w) + r_n : r_n] = ((r_{n-1} ^ r_{n}) <<< r_{n});
#if 0
	 if(i == 0) {
		assert((y & RC5_ROT_MASK) == (yy & RC5_ROT_MASK));
	 }
#endif
	 WORD key_rec = (y - x) & RC5_ROT_MASK;
	 WORD kkey_rec = (yy - x) & RC5_ROT_MASK;

	 if(key_rec != kkey_rec) {
		// if if here, then the guess for r and r_prev must have been wrong
		printf("[%s:%d] key_rec != kkey_rec %8X %8X\n", __FILE__, __LINE__, key_rec, kkey_rec);
		continue;
	 }
	 assert(key_rec == kkey_rec);
#if 0									  // DEBUG
	 if(key_rec == (key_correct_value & RC5_ROT_MASK)) {
		printf("[%s:%d] Last %2d bits MATCH: %X %X\n", __FILE__, __LINE__,
				 hw32(RC5_ROT_MASK), key_rec, (key_correct_value & RC5_ROT_MASK));
	 }
#endif
	 uint32_t i_rot_mask = RC5_ROT_MASK;
	 uint32_t new_i = i + hw32(i_rot_mask); // RC5_LOG2W = HW(RC5_ROT_MASK)
	 uint32_t new_i_rot_mask = RC5_ROT_MASK;//i_rot_mask;

	 bool b_next = rc5_rot_const_array_get_next_nonzero_index(i , R, &new_i, &new_i_rot_mask);

	 //	 printf("[%s:%d] CHECKPOINT %s()\n", __FILE__, __LINE__, __FUNCTION__);
	 if(b_next) {
		//		rc5_last_round_rot_const_keyrec_i(new_i, new_i_rot_mask, key_rec, R, &key_cand_vec);
		rc5_last_round_rot_const_keyrec_i(new_i, new_i_rot_mask, key_rec, key_correct_value, R, &key_set);
	 } else {
		printf("[%s:%d] Could not find a non-zero slot in R[%2d : %2d] (b_next = FALSE (%d)). Exiting...\n", __FILE__, __LINE__, i, (i+RC5_LOG2W), b_next);
		//		printf("[%s:%d] All first %d slotes of R are empty. Terminating...\n", __FILE__, __LINE__, RC5_LOG2W);
	 }
  }

  rc5_key_t key_correct = {key_correct_value, 1};

  uint32_t cnt = 0;
  std::vector<rc5_key_t> key_vec;
  std::set<rc5_key_t, rc5_compare_key_by_value>::const_iterator vec_iter;
  for(vec_iter = key_set.begin(); vec_iter != key_set.end(); vec_iter++) {
	 rc5_key_t key = *vec_iter;
	 key_vec.push_back(key);
	 if(key.value == key_correct_value) {
		cnt = key.counter;
	 }
  }

  printf("[%s:%d] Correct key cnt = %d / %d\n", __FILE__, __LINE__, cnt, (uint32_t)key_set.size());

  std::sort(key_vec.begin(), key_vec.end(), rc5_struct_key_compare_by_counter);

  uint32_t k = 8;
  uint32_t N = k;
  if(key_vec.size() < k) {
    N = (uint32_t)key_vec.size();
  }
  rc5_key_vec_print(key_vec, key_correct_value, N);

  printf("[%s:%d] #Key candidates %d (2^%f)\n", 
			__FILE__, __LINE__, (uint32_t)key_set.size(), log2(key_set.size()));

  //  uint32_t cnt_correct = 0;
  //  std::vector<WORD> key_cand_vec;
  //  std::vector<rc5_key_t> key_cand_vec;

  std::set<rc5_key_t, rc5_compare_key_by_value>::iterator set_iter = 
	 key_set.lower_bound(key_correct);

  bool b_found = ((set_iter->value == key_correct.value) && (set_iter != key_set.end()));
  if(b_found) {
	 printf("[%s:%d] FOUND: Correct key S[%d] %8X appears %lld (2^%f) times among  %d (2^%f) candidates\n", 
			  __FILE__, __LINE__, (2 + (2*nrounds) - 1), S[2 + (2*nrounds) - 1], 
			  (long long int)set_iter->counter,  log2(set_iter->counter),
			  (uint32_t)key_set.size(), log2(key_set.size()));
	 assert(set_iter->counter == cnt);
  } else {
	 printf("[%s:%d] NOT found: Correct key S[%d] %8X NOT found among  %d (2^%f) candidates\n", 
			  __FILE__, __LINE__, (2 + (2*nrounds) - 1), S[2 + (2*nrounds) - 1], 
			  (uint32_t)key_set.size(), log2(key_set.size()));
  }

#if 0

  std::vector<rc5_key_t>::iterator key_cand_iter = key_cand_vec.begin();
  for(key_cand_iter = key_cand_vec.begin(); key_cand_iter != key_cand_vec.end(); key_cand_iter++) {

	 WORD key_cand = key_cand_iter->value;
	 WORD key_correct = S[2 + (2*nrounds) - 1];
	 if(key_cand == key_correct) {
		cnt_correct++;
#if 0
		printf("[%s:%d] %8X = %8X = S[%2d] cnt %d / %d\n", __FILE__, __LINE__, 
				 key_cand, key_correct, (2 + (2*nrounds) - 1), cnt_correct, key_cand_vec.size());
#endif
	 }
  }
  printf("[%s:%d] #Key candidates %d (2^%f)\n", 
			__FILE__, __LINE__, (uint32_t)key_cand_vec.size(), log2(key_cand_vec.size()));
  printf("[%s:%d] Correct key S[%d] %8X appears %d (2^%f) times among  %d (2^%f)\n", 
			__FILE__, __LINE__, (2 + (2*nrounds) - 1), S[2 + (2*nrounds) - 1], 
			cnt_correct,  log2(cnt_correct),
			(uint32_t)key_cand_vec.size(), log2(key_cand_vec.size()));
#endif
#if 0									  // DEBUG
  printf("[%s:%d] RC5_FIXED_KEY %d | Expanded key[%d] = {", __FILE__, __LINE__, RC5_FIXED_KEY, RC5_STAB_LEN_T);
  for(uint32_t j = 0; j < RC5_STAB_LEN_T; j++) {
	 printf("0x%8X, ", S[j]);
  }
  printf("};\n");
#endif  // #if 1
  return b_found;
}


/* --- */
#if 0 // bit by bit
		new_i_rot_mask = 1;
		new_i = i + 1;
#endif // #if 1 // bit by bit
#else // try bit by bit
	 uint32_t i_rot_mask = 1; // try next bit
	 uint32_t new_i = i + hw32(i_rot_mask); // RC5_LOG2W = HW(RC5_ROT_MASK)
	 uint32_t new_i_rot_mask = 1;
#endif // #if 0 // try by strides of 5 bits (original)
#else
		bool b_end = false;
	 if((new_i < WORD_SIZE) && ((WORD_SIZE - new_i) < hw32(new_i_rot_mask))) { // if less than log2(w) remain then shorten the mask 
		b_end = true;
#endif // #if 0 // original

/* --- */

		bool b_all_noise = true;
		  if((b_enough) && (params.dx == 1)) {
			 b_all_noise = false;
		  }
		// be sure not all is noise
		if((b_enough) && (b_all_noise)) {
		  b_enough = false;
		}


/* --- */

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
	 assert((WORD_SIZE == 16) || (WORD_SIZE == 32));
	 uint32_t bit_seq_len = (uint32_t)log2(WORD_SIZE);
 	 assert((bit_seq_len == 4) || (bit_seq_len == 5));
#if 1
	 uint32_t bit_seq = 0; // 00...0
	 b_match = rc5_last_round_eq_x_bit_seq_match_bit_i(i, dx_i, rot_const_prev, bit_seq, bit_seq_len);
#else // NEW!!! 
	 // {!!!!!!!!
	 uint32_t bit_seq = 0;
	 uint32_t dx_next_i = dx_i ^ ((dx_prev >> i) & 1);
	 //	 printf("[%s:%d] %d %d %d\n", __FILE__, __LINE__, dx_next_i, dx_i, ((dx_prev >> i) & 1));
	 b_match = rc5_last_round_eq_x_bit_seq_match_bit_i(i, dx_next_i, rot_const_prev, bit_seq, bit_seq_len);
	 // !!!!!!!!}
#endif // #if 0 // NEW
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))

/* --- */
/* 
20140909
 */
uint32_t rc5_filter_go_up_nl_i(const uint32_t depth, 
										 const pair_t pc_pair,
										 const gsl_matrix* A_last[2][2][2],
										 const gsl_vector* L_last,
										 const gsl_vector* C_last,
										 const gsl_matrix* A_mid[2][2],
										 const gsl_vector* L_mid,
										 const gsl_vector* C_mid,
										 uint32_t* count, 
										 const std::vector<uint32_t> fib_array, 
										 const std::vector<double> p_thres_array,
										 WORD** logp2hw_arr,
										 const uint32_t logp2hw_arr_rows,
										 const uint32_t logp2hw_arr_cols,
										 const rc5_goup_diffs_t* ds_array,
										 boost::unordered_map<rc5_goup_diffs_t, WORD, rc5_goup_diffs_hash, rc5_goup_diffs_equal_to>* goup_variants_hash_map)
{

  //  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);
#if RC5_FILTER_GOUP_DIFF_SET
  uint32_t left = RC5_FEISTEL_LEFT;
#endif // #if RC5_FILTER_GOUP_DIFF_SET
#if 1									  // DEBUG
  assert(ds_array->D.size() == (RC5_FIB_LEN  + 1));
  assert(ds_array->D.size() == ds_array->S.size());
  assert(ds_array->D.size() == ds_array->len);
#endif

#if 1
  if(hw32(ds_array->D[depth]) > fib_array[depth + 1]) {
	 bool b_ret = (goup_variants_hash_map->size() != 0);
	 return b_ret;
  }
#endif
#if 0 // single variant
  if(goup_variants_hash_map->size() != 0) {
	 bool b_ret = (goup_variants_hash_map->size() != 0);
	 return b_ret;
  }
#endif
  uint32_t s = 0;

#if RC5_FILTER_CUT_HW1
  if((depth != 0) && (hw32(ds_array->D[depth]) > 1)) {
#else
  if(depth != 0) {
#endif // #if RC5_FILTER_CUT_HW1
	 //	 bool b_passed = false;
	 //	 bool b_passed = (goup_variants_hash_map->size() == 0);
	 //	 while((!b_passed) && (s < WORD_SIZE)) {
    for(s = 0; s < WORD_SIZE; s++) {
	 //	 s = random32() % WORD_SIZE;
	 //	 {
		std::vector<uint32_t> dx_vec;
		const uint32_t rot_const_prev = s;
#if RC5_FILTER_GOUP_DIFF_SET // depth = 6
		double p_thres = p_thres_array[depth];
		const uint32_t hw_thres = fib_array[depth]; // WORD_SIZE;
		//		printf("[%s:%d]  p_thres[%2d] %f\n", __FILE__, __LINE__, depth, log2(p_thres));
		//		printf("[%s:%d] hw_thres[%2d] %2d\n", __FILE__, __LINE__, depth, hw_thres);
		//		const uint32_t hw_thres = fib_array[depth - 1]; // WORD_SIZE;
		//		const uint32_t hw_thres = fib_array[depth - 2]; // WORD_SIZE;
		const uint32_t dx_prev = RC5_ROTL(ds_array->D[depth], s); // D[5] <<< S[5]
		//		printf("[%s:%d] CHECKPOINT! depth %d (depth - 2) %d\n", __FILE__, __LINE__, depth, depth - 2);
		//		assert((int32_t)(depth - 2) >= 0);
		//		assert(depth > 0);
		//		printf("[%s:%d] i %2d |  p_thres[%2d] %f\n", __FILE__, __LINE__, depth, depth, log2(p_thres));
		//		printf("[%s:%d] i %2d | hw_thres[%2d] %2d\n", __FILE__, __LINE__, depth, depth - 2, hw_thres);
		WORD y = 0;
		WORD yy = 0;
		// Generate a set of diffs dx
		if(depth == (RC5_FIB_LEN - 2)) { // depth = 6
		  pair_t pc_pair = ds_array->pc_pair;
		  y = pc_pair.ciphertext_first[left]; // y[7] = left ciphertext 1
		  yy = pc_pair.ciphertext_second[left]; // yy[7] = left ciphertext 2
		  assert((y ^ yy) == ds_array->D[ds_array->len - 2]);
		  // (y[7], yy[7] -> {dx[5]})
		  rc5_xdp_add_last_round_diff_set_out(A_last, L_last, C_last, 
														  y, yy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec); 
		}
#if 0 // go upper than the bottom two
		WORD dy = 0;
		//		if((depth < (RC5_FIB_LEN - 2)) && (depth >= (RC5_FIB_LEN - 7))) { // depth = 5
		//		if((depth < (RC5_FIB_LEN - 2)) && (depth >= (RC5_FIB_LEN - 5))) { // depth = 5
		//		if(depth == (RC5_FIB_LEN - 3)) {
		if((depth < (RC5_FIB_LEN - 2)) && (depth >= (RC5_FIB_LEN - RC5_FIB_LEN))) { // depth = 12
		//		if((depth < (RC5_FIB_LEN - 2)) && (depth >= (RC5_FIB_LEN - 4))) { // depth = 12
		  dy = ds_array->D[depth + 1]; // D[6]
		  rc5_xdp_add_mid_round_diff_set_out(A_mid, L_mid, C_mid, 
														 dy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec); 
		}
#endif // #if 0 // go upper than the bottom two
#endif // #if RC5_FILTER_GOUP_DIFF_SET
#if 1 // add also the input difference (if ADD is XOR)
		/*
		 * When the ADD is approximated as XOR, the output difference is
		 * the same as the input difference \p ds_array->D[depth + 1] so
		 * add it to the list if it has zeros in the places of the
		 * rotation constants for the enxt round.
		 */
		//		if((rot_seq == 0) && (hw32(RC5_ROTR(ds_array->D[depth + 1], s) ^ ds_array->D[depth]) <= hw_thres)) {
		uint32_t rot_seq = RC5_ROTR(ds_array->D[depth + 1], rot_const_prev) & RC5_ROT_MASK;
		if(rot_seq == 0) {
		  dx_vec.push_back(ds_array->D[depth + 1]); // dx[5] == D[7]
		}
#endif
		for(uint32_t i = 0; i < dx_vec.size(); i++) {
		  WORD dx = dx_vec[i]; // dx[5]

#if RC5_FLEX_FIB
		  double p_i = 1.0;
		  if(depth == (RC5_FIB_LEN - 2)) { // depth = 6
			 p_i = rc5_xdp_add_last_round(A_last, L_last, C_last, y, yy, dx);
		  } else {
			 p_i = rc5_xdp_add_mid_round(A_mid, L_mid, C_mid, dy, dx);
		  }
		  uint32_t log2p = (uint32_t)std::abs(log2(p_i));
		  uint32_t i_round = depth + (RC5_FULL_FIB_LEN - (RC5_FIB_LEN - 2)) - 1;
		  if((i_round > ((2*NROUNDS) + 2))) {
			 printf("[%s:%d] i_round %2d depth %2d\n", __FILE__, __LINE__, i_round, depth);
		  }
		  assert(i_round <= ((2*NROUNDS) + 2));
		  //		  uint32_t hw_thres_flex = LOGP2HW_ARRAY[i_round][log2p];
		  uint32_t hw_thres_flex = logp2hw_arr[i_round][log2p];
		  //		  printf("[%s:%d] i_round %2d depth %2d p 2^%f | LOGP2HW_ARRAY[%2d][%2d] = %2d\n", __FILE__, __LINE__, i_round, depth, log2(p_i), i_round, log2p, hw_thres_flex);
		  uint32_t hw = hw32(dx ^ dx_prev);
		  if(hw > hw_thres_flex) {
			 continue;
		  }
#endif // #if RC5_FLEX_FIB

		  if((RC5_ROTR(dx, s) & RC5_ROT_MASK) == 0) { // if (dx[5] >>> S[6]) = 0
			 rc5_goup_diffs_t ds_array_new = *ds_array;
			 ds_array_new.D[depth - 1] =  RC5_ROTR(dx, s) ^ ds_array_new.D[depth]; // D[5] = (dx[5] >>> S[6]) ^ D[6]		 
			 ds_array_new.S[depth] = s; // S[6]
			 //			 b_passed = true;

			 // recursive call
			 rc5_filter_go_up_nl_i(depth - 1, pc_pair, A_last, L_last, C_last, A_mid, L_mid, C_mid, count, fib_array, p_thres_array, logp2hw_arr, logp2hw_arr_rows, logp2hw_arr_cols, &ds_array_new, goup_variants_hash_map);

#if RC5_FILTER_GOUP_LIMIT_VARIANTS
			 uint32_t limit = RC5_FILTER_GOUP_LIMIT;
			 if(goup_variants_hash_map->size() > limit) {
				return true;
			 }
#endif // #if RC5_FILTER_GOUP_LIMIT_VARIANTS
		  }
		}
		//		s++;
	 }
  } else {							  // reached the top

	 /**
	  * If the GoUP filter covers all half rounds but the first
	  */
	 bool b_match_input_diff = true;
	 if(RC5_GOUP_LEVEL == (2*NROUNDS)) {
		assert(depth == 0);
		b_match_input_diff = rc5_is_goup_diffs_match_inputs(pc_pair, *ds_array, A_last, L_last, C_last, A_mid, L_mid, C_mid);
	 }
#if RC5_FILTER_CUT_HW1
	 if((b_match_input_diff) && (hw32(ds_array->D[depth]) <= 1)) {
#else // #if RC5_FILTER_CUT_HW1
	 if(b_match_input_diff) {
#endif // #if RC5_FILTER_CUT_HW1
		bool b_found = false;
		rc5_goup_diffs_hash goup_variants_hash_function;
		uint32_t hash_val = goup_variants_hash_function(*ds_array);
		std::pair<rc5_goup_diffs_t, uint32_t> new_pair (*ds_array, hash_val);
		uint32_t old_size = goup_variants_hash_map->size();
#if 1 // do not store variant
		goup_variants_hash_map->insert(new_pair);
#endif
		uint32_t new_size = goup_variants_hash_map->size();
		b_found = (new_size == old_size);
		if(!b_found) {
		  (*count)++;					  // accumulate num. of variants
#if 0 // DEBUG
		  //		  printf("[%s:%d] Add variant #%10d | dx[%2d / %10d] pair# %10d\n", __FILE__, __LINE__, *count, g_index, g_size, g_pair_i);
		  printf("[%s:%d] Add variant #%10d\n", __FILE__, __LINE__, *count);
		  for(uint32_t i = 0; i < ds_array->D.size(); i++) {
			 printf("D[%2d] %8X s %2d HW %2d\n", i, ds_array->D[i], ds_array->S[i], 
					  hw32(ds_array->D[i]));
		  }
		  printf("\n");
#endif // #if 0 // DEBUG
		}
		bool b_ret = (goup_variants_hash_map->size() != 0);
		return b_ret;
	 }
  }
  bool b_ret = (goup_variants_hash_map->size() != 0);
  return b_ret;
}


/* --- */
/* 
86 8F 39 16 7 CE B 9C BA 32 6B 2F 3A B 43 F2 
FD1F4557 3F7EAED7 7D1F4557 BF7EAED7 BB55D062 33DEC46A BB55C162 33DF07EA 1
244E81E0 83076FFF A44E81E0 3076FFF 30F8AB60 DE07EAA1 2C38AB60 D947EAA1 1
DA0D586E 6B048016 5A0D586E EB048016 F7AC0187 3EB0201E 17C40107 6EACE02E 1
342E591B 1CBAB3BF B42E591B 9CBAB3BF 2F4D68CC F921021D EFCD74CC D8DAFE25 1
 */
/* --- */
// determined as average of MINs over 32 keys 
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  13) + 1)),
  ((double)1.0 / (double)((1U <<  12) + 1)),
  ((double)1.0 / (double)((1U <<  11) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))

/* --- */

#if 1
		for(std::vector<pair_t>::iterator vec_iter = dx_set_all.begin(); 
			 vec_iter != dx_set_all.end(); vec_iter++) {

		}
#endif

/* --- */

/* 
RC5_P_THRES_ARRAY = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -2.32 [ 7] -4.09 [ 8] -5.04 [ 9] -6.02 [10] -7.01 [11] -8.01
        FIB_ARRAY = [ 0]  1 [ 1]  0 [ 2]  2 [ 3]  2 [ 4]  2 [ 5]  2 [ 6]  2 [ 7]  2 [ 8]  2 [ 9]  5 [10]  5 [11]  5

- The GoUP fuilter covers the bottom 11 half-rounds
- Only the ADD in the bottom 2 rounds are expanded (with prob. thres. -7 and -8 resp.)

- 50 experiments (= 50 keys chosen at random)
- 38% filter zero (18 of 50)
- 14% have noise (7 of 50)

Good (G) / Filtered (F) / Good filtered (GF) / Variants (V)

 G / F / GF / V 
---------------

 8 / 0 / 0 / 0
 8 / 0 / 0 / 0
 5 / 1 / 1 / 3
 9 / 1 / 1 / 22
 9 / 2 / 2 / 14
10 / 3 / 2 / 15
 2 / 1 / 1 / 12
 9 / 2 / 2 / 8
 3 / 0 / 0 / 0
13 / 4 / 4 / 25
 3 / 1 / 1 / 5
10 / 3 / 2 / 15
 9 / 2 / 1 / 7
 9 / 1 / 0 / 1
 1 / 0 / 0 / 0
 3 / 0 / 0 / 0
 3 / 2 / 0 / 3
 8 / 0 / 0 / 0 
 7 / 2 / 2 / 16
 6 / 0 / 0 / 0
 4 / 2 / 1 / 3
 5 / 0 / 0 / 0
 6 / 2 / 2 / 20
16 / 1 / 1 / 26
 6 / 1 / 1 / 4
 4 / 2 / 2 / 7
11 / 2 / 2 / 6
16 / 3 / 3 / 7
 3 / 0 / 0 / 0
 3 / 0 / 0 / 0
 6 / 1 / 1 / 14
 4 / 0 / 0 / 0
10 / 1 / 1 / 27
 3 / 0 / 0 / 0
 6 / 0 / 0 / 0
10 / 2 / 2 / 25
 3 / 1 / 1 / 3
 9 / 0 / 0 / 0
 7 / 0 / 0 / 0
 9 / 0 / 0 / 0
 4 / 2 / 1 / 21
 5 / 1 / 1 / 4
 7 / 2 / 2 / 4
26 / 4 / 4 / 46
 3 / 0 / 0 / 0
 7 / 1 / 1 / 22
 6 / 0 / 0 / 0
13 / 4 / 2 / 13
 9 / 1 / 1 / 4
 5 / 0 / 0 / 0

 */


/* --- */

 #if 0 // ----------- DEBUG -----------------
  std::vector<uint32_t>::iterator X_first_iter = X_first->begin();
  std::vector<uint32_t>::iterator X_second_iter = X_second->begin();
#if 1 // DEBUG
  printf("[%s:%d] Good pair intermediate values:\n", __FILE__, __LINE__);
  uint32_t i = 0;
#endif // #if 1 // DEBUG
  for(X_first_iter = X_first->begin(); X_first_iter != X_first->end(); X_first_iter++, X_second_iter++) {
#if 1
	 uint32_t x = *X_first_iter;
	 uint32_t xx = *X_second_iter;
	 uint32_t dx = (x ^ xx);
#if 1	// DEBUG
	 uint32_t hwx = hw32(dx);
	 printf("X[%2d] %8X %8X %8X s %2d HW %2d ", i, x, xx, dx, (x & RC5_ROT_MASK), hwx);
	 //	 if((((2*NROUNDS) + 3) - i) <= RC5_FIB_LEN) {
	 if((i >= (((2*NROUNDS) + 3) - RC5_FIB_LEN - 2)) && (i <= ((2*NROUNDS)))) {
		//		uint32_t j = i-1;//(RC5_FIB_LEN - (((2*NROUNDS) + 3) - i));
		//		uint32_t j = i+1;
		uint32_t j = (i+1) - 2;
		printf("F[%2d] %2d", j, FIB[j]);
		if(hwx > FIB[j]) {
		  printf(" . ");
		}
	 }
	 printf("\n");
#endif  // #if 0	// DEBUG
	 if(i == 0) {  // plaintext: X[0]
		assert(x == pt_first[left]);
		assert(xx == pt_second[left]);
#if 0	// DEBUG
		printf(" left PT");
#endif  // #if 0	// DEBUG
	 }
	 if(i == 1) { // plaintext: X[1]
		assert(x == pt_first[right]);
		assert(xx == pt_second[right]);
#if 0	// DEBUG
		printf(" right PT");
#endif  // #if 0	// DEBUG
	 }
	 if(i == ((2*nrounds) + 2 - 1)) { // left ciphertext: X[(2*nrounds) + 1]
		assert(x == ct_first[left]);
		assert(xx == ct_second[left]);
#if 0	// DEBUG
		printf(" left CT");
#endif  // #if 0	// DEBUG
	 }
	 if(i == ((2 * nrounds) + 2)) { // right ciphertext: X[(2*nrounds) + 2]
		assert(x == ct_first[right]);
		assert(xx == ct_second[right]);
#if 0	// DEBUG
		printf(" right CT");
#endif  // #if 0	// DEBUG
	 }
	 // rot consts for x and xx must be equal for all except the last
	 // round i.e. (dx & RC5_ROT_MASK) == 0
	 if(i < ((2 * nrounds) + 2)) { 
		assert((dx & RC5_ROT_MASK) == 0);
	 }
#if 0	// DEBUG
	 printf("\n");
#endif  // #if 0	// DEBUG
	 i++;
#endif  // #if 0
  }
#endif //#if 0 // ----------- DEBUG -----------------

/* --- */

#if RC5_PROB_SCORE // score
			 double p_i = 1.0;
			 if(depth == (RC5_FIB_LEN - 2)) { // depth = 6
				p_i = rc5_xdp_add_last_round(A_last, L_last, C_last, y, yy, dx);
			 } else {
				p_i = rc5_xdp_add_mid_round(A_mid, L_mid, C_mid, dy, dx);
			 }
			 ds_array_new.p[depth] = p_i;

			 double score = 0.0;
			 double thres_score = 0.0;
			 // [ 1] -2.32, [ 2] -4.64, [ 3] -6.97, [ 4] -9.29, [ 5] -11.61, [ 6] -13.93, [ 7] -18.02, [ 8] -23.06, [ 9] -29.09, [10] -36.10, [11] -44.10, };
			 //			 printf("[%s:%d] double P_SCORE_ARR[] = { ", __FILE__, __LINE__);
			 for(uint32_t i = depth; i <= (ds_array_new.len - 2); i++) {
				score += log2(ds_array_new.p[i]);
				thres_score += log2(p_thres_array[i]);
				//				if(depth == 1) {
				//				  printf("[%2d] %4.2f, ", i, thres_score);
				//				}
#if 0 // DEBUG
				printf("[%s:%d] %2d | this %4.2f %4.2f | thres  %4.2f %4.2f\n", __FILE__, __LINE__, i, log2(ds_array_new.p[i]), score, log2(p_thres_array[i]), thres_score);
#endif
			 }
			 //			 if(depth == 1) {
			 //				printf("};\n");
			 //			 }

			 //			 double min_score = logp_score_array[depth];
			 //			 printf("[%s:%d] depth %2d score < min_score: %4.2f %4.2f\n", __FILE__, __LINE__, depth, score, min_score);
			 if(score < thres_score) {
#if 1 // DEBUG
				for(uint32_t i = depth; i <= (ds_array_new.len - 2); i++) {
				  printf("[%s:%d] %2d | this %4.2f | thres  %4.2f\n", __FILE__, __LINE__, i, log2(ds_array_new.p[i]), log2(p_thres_array[i]));
				}
#endif
				printf("[%s:%d] Low score at depth %2d score < thres_score: %4.2f %4.2f Skipping\n", __FILE__, __LINE__, depth, score, thres_score);
				continue;
			 }
#endif // #if RC5_PROB_SCORE


#if RC5_PROB_SCORE
	 double p_i = 1.0;
	 p_i = rc5_xdp_add_last_round(A_last, L_last, C_last, y, yy, dx_i);
	 ds_array.p[ds_array.len - 2] = p_i;
#endif // #if RC5_PROB_SCORE




/* ---- */

/*
 * --- WITH A score functionality ---
 *
 * Non-lnear (w.r.t. XOR) version of the goUP filter for good pairs
 * for RC5 proposed by [Biryukov, Kushilevitz]
 * 
 * \note (depth + 1) must be equal to the index of the right
 *       ciphertext: \p depth = (RC5_FIB_LEN - 1) = 7 so that D[depth
 *       + 1] = D[RC5_FIB_LEN] = D[8] . For example for \ref
 *       RC5_GOUP_LEVEL = RC5_FIB_LEN - 2 = 6, the filter starts at
 *       depth = 6.
 *
 * \see rc5_filter_go_up_ext_i
 */
uint32_t rc5_filter_go_up_nl_i(const uint32_t depth, 
										 const pair_t pc_pair,
										 const gsl_matrix* A_last[2][2][2],
										 const gsl_vector* L_last,
										 const gsl_vector* C_last,
										 const gsl_matrix* A_mid[2][2],
										 const gsl_vector* L_mid,
										 const gsl_vector* C_mid,
										 uint32_t* count, 
										 const std::vector<uint32_t> fib_array, 
										 const std::vector<double> p_thres_array,
										 WORD** logp2hw_arr,
										 const uint32_t logp2hw_arr_rows,
										 const uint32_t logp2hw_arr_cols,
										 const rc5_goup_diffs_t* ds_array,
										 boost::unordered_map<rc5_goup_diffs_t, WORD, rc5_goup_diffs_hash, rc5_goup_diffs_equal_to>* goup_variants_hash_map)
{

  //  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);
#if RC5_FILTER_GOUP_DIFF_SET
  uint32_t left = RC5_FEISTEL_LEFT;
#endif // #if RC5_FILTER_GOUP_DIFF_SET
#if 1									  // DEBUG
  assert(ds_array->D.size() == (RC5_FIB_LEN  + 1));
  assert(ds_array->D.size() == ds_array->S.size());
  assert(ds_array->D.size() == ds_array->len);
#endif

#if 1
  if(hw32(ds_array->D[depth]) > fib_array[depth + 1]) {
	 bool b_ret = (goup_variants_hash_map->size() != 0);
	 return b_ret;
  }
#endif
  uint32_t s;

  //  if(depth != 0) {
  if((depth != 0) && (hw32(ds_array->D[depth]) > 1)) {
	 for(s = 0; s < WORD_SIZE; s++) {
		std::vector<uint32_t> dx_vec;
		const uint32_t rot_const_prev = s;
#if RC5_FILTER_GOUP_DIFF_SET // depth = 6
		double p_thres = p_thres_array[depth];
		const uint32_t dx_prev = RC5_ROTL(ds_array->D[depth], s); // D[5] <<< S[5]
		//		const uint32_t hw_thres = fib_array[depth - 1]; // WORD_SIZE;
		const uint32_t hw_thres = fib_array[depth]; // WORD_SIZE;
		WORD y = 0;
		WORD yy = 0;
		// Generate a set of diffs dx
		if(depth == (RC5_FIB_LEN - 2)) { // depth = 6
		  pair_t pc_pair = ds_array->pc_pair;
		  y = pc_pair.ciphertext_first[left]; // y[7] = left ciphertext 1
		  yy = pc_pair.ciphertext_second[left]; // yy[7] = left ciphertext 2
		  assert((y ^ yy) == ds_array->D[ds_array->len - 2]);
		  // (y[7], yy[7] -> {dx[5]})
		  rc5_xdp_add_last_round_diff_set_out(A_last, L_last, C_last, 
														  y, yy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec); 
		}
#if 1 // go upper than the bottom two
		WORD dy = 0;
		//		if((depth < (RC5_FIB_LEN - 2)) && (depth >= (RC5_FIB_LEN - 5))) { // depth = 5
		if((depth < (RC5_FIB_LEN - 2)) && (depth >= (RC5_FIB_LEN - RC5_FIB_LEN))) { // depth = 12
		  dy = ds_array->D[depth + 1]; // D[6]
		  rc5_xdp_add_mid_round_diff_set_out(A_mid, L_mid, C_mid, 
														 dy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec); 
		}
#endif // #if 0 // go upper than the bottom two
#endif // #if RC5_FILTER_GOUP_DIFF_SET
#if 0 // add also the input difference (if ADD is XOR)
		/*
		 * When the ADD is approximated as XOR, the output difference is
		 * the same as the input difference \p ds_array->D[depth + 1] so
		 * add it to the list if it has zeros in the places of the
		 * rotation constants for the enxt round.
		 */
		//		if((rot_seq == 0) && (hw32(RC5_ROTR(ds_array->D[depth + 1], s) ^ ds_array->D[depth]) <= hw_thres)) {
		uint32_t rot_seq = RC5_ROTR(ds_array->D[depth + 1], rot_const_prev) & RC5_ROT_MASK;
		if(rot_seq == 0) {
		  dx_vec.push_back(ds_array->D[depth + 1]); // dx[5] == D[7]
		}
#endif
		for(uint32_t i = 0; i < dx_vec.size(); i++) {
		  WORD dx = dx_vec[i]; // dx[5]
#if RC5_FLEX_FIB
		  double p_i = 1.0;
		  if(depth == (RC5_FIB_LEN - 2)) { // depth = 6
			 p_i = rc5_xdp_add_last_round(A_last, L_last, C_last, y, yy, dx);
		  } else {
			 p_i = rc5_xdp_add_mid_round(A_mid, L_mid, C_mid, dy, dx);
		  }
		  uint32_t log2p = (uint32_t)std::abs(log2(p_i));
		  uint32_t i_round = depth + (RC5_FULL_FIB_LEN - (RC5_FIB_LEN - 2)) - 1;
		  if((i_round > ((2*NROUNDS) + 2))) {
			 printf("[%s:%d] i_round %2d depth %2d\n", __FILE__, __LINE__, i_round, depth);
		  }
		  assert(i_round <= ((2*NROUNDS) + 2));
		  //		  uint32_t hw_thres_flex = LOGP2HW_ARRAY[i_round][log2p];
		  uint32_t hw_thres_flex = logp2hw_arr[i_round][log2p];
		  //		  printf("[%s:%d] i_round %2d depth %2d p 2^%f | LOGP2HW_ARRAY[%2d][%2d] = %2d\n", __FILE__, __LINE__, i_round, depth, log2(p_i), i_round, log2p, hw_thres_flex);
		  uint32_t hw = hw32(dx ^ dx_prev);
		  if(hw > hw_thres_flex) {
			 continue;
		  }
#endif // #if RC5_FLEX_FIB

		  if((RC5_ROTR(dx, s) & RC5_ROT_MASK) == 0) { // if (dx[5] >>> S[6]) = 0
			 rc5_goup_diffs_t ds_array_new = *ds_array;
			 ds_array_new.D[depth - 1] =  RC5_ROTR(dx, s) ^ ds_array_new.D[depth]; // D[5] = (dx[5] >>> S[6]) ^ D[6]		 
			 ds_array_new.S[depth] = s; // S[6]

#if RC5_PROB_SCORE // score
			 double p_i = 1.0;
			 if(depth == (RC5_FIB_LEN - 2)) { // depth = 6
				p_i = rc5_xdp_add_last_round(A_last, L_last, C_last, y, yy, dx);
			 } else {
				p_i = rc5_xdp_add_mid_round(A_mid, L_mid, C_mid, dy, dx);
			 }
			 ds_array_new.p[depth] = p_i;

			 double score = 0.0;
			 double thres_score = 0.0;
			 // [ 1] -2.32, [ 2] -4.64, [ 3] -6.97, [ 4] -9.29, [ 5] -11.61, [ 6] -13.93, [ 7] -18.02, [ 8] -23.06, [ 9] -29.09, [10] -36.10, [11] -44.10, };
			 //			 printf("[%s:%d] double P_SCORE_ARR[] = { ", __FILE__, __LINE__);
			 for(uint32_t i = depth; i <= (ds_array_new.len - 2); i++) {
				score += log2(ds_array_new.p[i]);
				thres_score += log2(p_thres_array[i]);
				//				if(depth == 1) {
				//				  printf("[%2d] %4.2f, ", i, thres_score);
				//				}
#if 0 // DEBUG
				printf("[%s:%d] %2d | this %4.2f %4.2f | thres  %4.2f %4.2f\n", __FILE__, __LINE__, i, log2(ds_array_new.p[i]), score, log2(p_thres_array[i]), thres_score);
#endif
			 }
			 //			 if(depth == 1) {
			 //				printf("};\n");
			 //			 }

			 //			 double min_score = logp_score_array[depth];
			 //			 printf("[%s:%d] depth %2d score < min_score: %4.2f %4.2f\n", __FILE__, __LINE__, depth, score, min_score);
			 if(score < thres_score) {
#if 1 // DEBUG
				for(uint32_t i = depth; i <= (ds_array_new.len - 2); i++) {
				  printf("[%s:%d] %2d | this %4.2f | thres  %4.2f\n", __FILE__, __LINE__, i, log2(ds_array_new.p[i]), log2(p_thres_array[i]));
				}
#endif
				printf("[%s:%d] Low score at depth %2d score < thres_score: %4.2f %4.2f Skipping\n", __FILE__, __LINE__, depth, score, thres_score);
				continue;
			 }
#endif // #if RC5_PROB_SCORE


			 // recursive call
			 rc5_filter_go_up_nl_i(depth - 1, pc_pair, A_last, L_last, C_last, A_mid, L_mid, C_mid, count, fib_array, p_thres_array, logp2hw_arr, logp2hw_arr_rows, logp2hw_arr_cols, &ds_array_new, goup_variants_hash_map);

#if RC5_FILTER_GOUP_LIMIT_VARIANTS
			 uint32_t limit = RC5_FILTER_GOUP_LIMIT;
			 if(goup_variants_hash_map->size() > limit) {
				return true;
			 }
#endif // #if RC5_FILTER_GOUP_LIMIT_VARIANTS
		  }
		}
	 }
  } else {							  // reached the top

	 /**
	  * If the GoUP filter covers all half rounds but the first
	  */
	 bool b_match_input_diff = true;
	 if(RC5_GOUP_LEVEL == (2*NROUNDS)) {
		assert(depth == 0);
		b_match_input_diff = rc5_is_goup_diffs_match_inputs(pc_pair, *ds_array, A_last, L_last, C_last, A_mid, L_mid, C_mid);
	 }
	 //	 if(b_match_input_diff) {
	 if((b_match_input_diff) && (hw32(ds_array->D[depth]) <= 1)) {
		bool b_found = false;
		rc5_goup_diffs_hash goup_variants_hash_function;
		uint32_t hash_val = goup_variants_hash_function(*ds_array);
		std::pair<rc5_goup_diffs_t, uint32_t> new_pair (*ds_array, hash_val);
		uint32_t old_size = goup_variants_hash_map->size();
#if 1 // do not store variant
		goup_variants_hash_map->insert(new_pair);
#endif
		uint32_t new_size = goup_variants_hash_map->size();
		b_found = (new_size == old_size);
		if(!b_found) {
		  (*count)++;					  // accumulate num. of variants
#if 0 // DEBUG
		  //		  printf("[%s:%d] Add variant #%10d | dx[%2d / %10d] pair# %10d\n", __FILE__, __LINE__, *count, g_index, g_size, g_pair_i);
		  printf("[%s:%d] Add variant #%10d\n", __FILE__, __LINE__, *count);
		  for(uint32_t i = 0; i < ds_array->D.size(); i++) {
			 printf("D[%2d] %8X s %2d HW %2d\n", i, ds_array->D[i], ds_array->S[i], 
					  hw32(ds_array->D[i]));
		  }
		  printf("\n");
#endif // #if 0 // DEBUG
		}
		bool b_ret = (goup_variants_hash_map->size() != 0);
		return b_ret;
	 }
  }
  bool b_ret = (goup_variants_hash_map->size() != 0);
  return b_ret;
}

/* --- */

#if 0//RC5_PROB_SCORE // score
  std::vector<double> logp_score_array;
  double logp_score = log2(p_thres_array[0]);
  logp_score_array.push_back(logp_score);
  for(uint32_t i = 1; i < p_thres_array.size(); i++) {
	 logp_score = logp_score_array[i-1] + log2(p_thres_array[i]);
	 logp_score_array.push_back(logp_score);
	 //	 printf("%4.2f\n", logp_score_array[i]);
  }
#endif // #if RC5_PROB_SCORE
#if 0//RC5_PROB_SCORE
		  printf("[%s:%d] Score = \n", __FILE__, __LINE__);
		  for(uint32_t i = 1; i < p_thres_array.size(); i++) {
			 printf("%4.2f ", logp_score_array[i]);
		  }
		  printf("\n");
#endif // #if RC5_PROB_SCORE


/* --- */

/* 
#Rounds 8
WORD_SIZE 32
RC5_FILTER_CUT_HW1  0
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 262186 (2^18.000231)
#Filtered pairs all: 13 (2^3.700440)
#Good pairs among filtered: 2
#Good pairs among filtered f1: 6
#Good pairs total: 6
#GoUP sets of trails: 13 (2^3.700440)

 */


/* 
#Rounds 8
WORD_SIZE 32
RC5_FILTER_CUT_HW1  0
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 262514 (2^18.002035)
#Filtered pairs all: 6 (2^2.584963)
#Good pairs among filtered: 3
#Good pairs among filtered f1: 8
#Good pairs total: 8
#GoUP sets of trails: 6 (2^2.584963)
RC5_P_THRES_ARRAY = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -2.32 [ 7] -4.09 [ 8] -5.04 [ 9] -6.02 [10] -7.01 [11] -8.01
        FIB_ARRAY = [ 0]  2 [ 1]  2 [ 2]  2 [ 3]  2 [ 4]  2 [ 5]  2 [ 6]  2 [ 7]  4 [ 8]  4 [ 9]  5 [10] 15 [11] 16
        FIB_ARRAY = [ 0]  2 [ 1]  2 [ 2]  2 [ 3]  2 [ 4]  2 [ 5]  2 [ 6]  2 [ 7]  4 [ 8]  4 [ 9]  5 [10] 15 [11] 16
		  [./tests/rc5-tests.cc:631] #GoUP sets of trails: 6 (2^2.584963)
const uint32_t g_key[16] = {0x12, 0x24, 0x65, 0x46, 0xCB, 0xCB, 0x27, 0x49, 0x8C, 0x68, 0x2B, 0x23, 0xCA, 0x61, 0xB1, 0xCE};
[./tests/rc5-tests.cc:695] Test OK!

real    13m57.608s
user    4m29.829s
sys     0m6.440s

 */

/* 

- "very good" pairs key 0:

const uint32_t g_key[16] = {0xF0, 0x26, 0xF, 0x7E, 0x33, 0x66, 0xB3, 0xD8, 0x1D, 0xB3, 0x2C, 0x73, 0xEB, 0xEF, 0x2F, 0xFB};

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -1.00 -2.00 -1.00 -3.00 0.00 -2.00 -4.00 -6.00 -8.00 -8.00 -4.00 -13.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  1  2  1  1  0  2  3  4  6  8  7 14 

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -2.00 -2.00 0.00 -2.00 -1.00 -2.00 0.00 -2.00 -2.00 -3.00 -3.00 -8.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  2  2  0  1  1  1  0  2  1  2  6 11 

- "very good" pairs key 1: 

const uint32_t g_key[16] = {0x24, 0xF0, 0x81, 0x43, 0x84, 0x8B, 0x67, 0xE0, 0xCA, 0x63, 0xCD, 0x3B, 0x52, 0x1D, 0xE1, 0xF1};

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 -2.00 0.00 -2.00 -2.00 0.00 -2.00 -2.00 -2.00 -4.00 -9.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  1  1  1  0  2  2  0  2  1  3  6 10 

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 -4.00 -6.00 -6.00 -7.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  1  1  0  1  1  0  1  1  5  4  7 12 

- "very good" pairs key 2:

const uint32_t g_key[16] = {0xC2, 0x47, 0x8C, 0x7, 0x99, 0xCA, 0xD1, 0xDD, 0x2E, 0x57, 0x95, 0x7F, 0xDC, 0x23, 0x0, 0xC1};

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 -5.00 -6.00 -6.00 -9.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  1  1  0  1  1  0  1  1  4  4  7  8 

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 -2.00 0.00 -1.00 -1.00 0.00 -2.00 -2.00 -6.00 -5.00 -6.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  1  1  1  0  1  1  0  2  2  5  5  8 

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 0.00 0.00 0.00 -1.00 -1.00 -2.00 -3.00 -2.00 -4.00 -4.00 -8.00 -7.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  1  1  0  1  1  1  2  2  4  4  8  8 

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 0.00 -2.00 -2.00 -3.00 -7.00 -4.00 -11.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  1  1  0  1  1  0  2  1  3  5  6 11 

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 0.00 0.00 -1.00 -1.00 -3.00 -5.00 -6.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  1  1  0  1  1  0  1  2  1  4  3  8 

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -2.00 -2.00 0.00 -2.00 -4.00 -5.00 -2.00 0.00 -5.00 -6.00 -9.00 -5.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  2  2  0  1  4  2  2  0  5  6  4  8 

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -3.00 -3.00 -1.00 -3.00 0.00 -1.00 -1.00 0.00 0.00 -2.00 -3.00 -5.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  3  2  1  1  0  1  1  0  1  2  3  8 

- "very good" pairs key 3:

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 -2.00 -1.00 -1.00 0.00 -1.00 -2.00 -4.00 -6.00 -8.00 -10.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  1  1  2  1  1  0  1  2  3  5 11 10 

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -2.00 -2.00 0.00 -2.00 -1.00 0.00 -1.00 0.00 -1.00 0.00 -1.00 -7.00 -8.00 -6.00 -10.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  2  2  0  1  1  0  1  1  2  1  1  7  5 11 17 

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 -3.00 -5.00 -5.00 -10.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  1  1  0  1  1  0  1  1  3  5 11 17 

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 0.00 -1.00 0.00 -4.00 -5.00 -4.00 -13.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  1  1  0  1  1  0  1  1  4  4 11 15 

[./src/rc5-dc.cc:3315]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 0.00 0.00 0.00 -1.00 -1.00 -2.00 -4.00 -6.00 -9.00 -10.00 -6.00 -13.00 
[./src/rc5-dc.cc:3324]    hw_arr =  1  1  1  0  1  1  0  1  1  0  1  1  2  4  3  6  6  9 10  <- special

 */

/* --- */
	 //	 if(hw32(ds_array->D[depth]) <= 1) { // limit HW to 1
	 //		return false;
	 //	 }


//WORD g_dx = 0;
//uint32_t g_index = 0;
//uint32_t g_size = 0;
//uint32_t g_pair_i = 0;

	 //	 g_index = cnt_diff;
	 //	 g_dx = dx_i;
	 //	 g_size = dx_set_all.size();

	 //	 uint32_t hw_thres_flex = LOGP2HW_ARRAY[i_round][log2p];
	 //	 printf("[%s:%d] dx_i %8X i_round %2d depth %2d | p 2^%4.2f LOGP2HW_ARRAY[%2d][%2d] = %2d\n", __FILE__, __LINE__, dx_i, i_round, depth, log2(p_i), i_round, log2p, hw_thres_flex);


/* ---- */

#if RC5_FLEX_FIB_MAX // max
		  LOGP2HW_ARRAY[i][log2p] = std::max(hw32(diff_arr[i-2]), LOGP2HW_ARRAY[i][log2p]);
#else // average
		  LOGP2HW_ARRAY[i][log2p] = (hw32(diff_arr[i-2]) + LOGP2HW_ARRAY[i][log2p]);
		  LOGP2HW_ARRAY_AVRG_CNT[i][log2p]++;
#endif // #if RC5_FLEX_FIB_MAX // max


/* --- */

	 for(uint32_t i = 3; i < arr_len; i++) {

		if(i < 2) {
		  ;
		}

		if(i == 2) {
		  // do nothing;
		  // note: i = 2 is a copy
		}

		if((i >= 3) && (i < (arr_len - 2))) { // intermediate rounds

		  double p = prob_arr[i];
		  uint32_t log2p = (uint32_t)std::abs(log2(p));
		  assert((log2p < WORD_SIZE) && (log2p >= 0));
		  //		LOGP2HW_ARRAY[i][log2p] = hw32(diff_arr[i-2]);
#if RC5_FLEX_FIB_MAX // max
		  LOGP2HW_ARRAY[i][log2p] = std::max(hw32(diff_arr[i-2]), LOGP2HW_ARRAY[i][log2p]);
#else // average
		  LOGP2HW_ARRAY[i][log2p] = (hw32(diff_arr[i-2]) + LOGP2HW_ARRAY[i][log2p]);
		  LOGP2HW_ARRAY_AVRG_CNT[i][log2p]++;
#endif // #if RC5_FLEX_FIB_MAX // max

		} 

		if(i >= (arr_len - 2)) { // last two rounds
		  double p = prob_arr[i];
		  uint32_t log2p = (uint32_t)std::abs(log2(p));
		  assert((log2p < WORD_SIZE) && (log2p >= 0));

		  logp2hw_arr_max[i][log2p] = std::max(hw32(diff_arr[i-2]), LOGP2HW_ARRAY[i][log2p]);
		  logp2hw_arr[i][log2p] = (hw32(diff_arr[i-2]) + LOGP2HW_ARRAY[i][log2p]);
		  avrg_cnt[i][log2p]++;

#if RC5_FLEX_FIB_MAX // max
		  LOGP2HW_ARRAY[i][log2p] = std::max(hw32(diff_arr[i-2]), LOGP2HW_ARRAY[i][log2p]);
#else // average
		  LOGP2HW_ARRAY[i][log2p] = (hw32(diff_arr[i-2]) + LOGP2HW_ARRAY[i][log2p]);
		  LOGP2HW_ARRAY_AVRG_CNT[i][log2p]++;
#endif // #if RC5_FLEX_FIB_MAX // max
		}

	 }

  }


/* --- */
#if 0//RC5_FLEX_FIB // compute LOGP2HW_ARRAY
		uint32_t log2p = (uint32_t)std::abs(log2(p));
		assert((log2p < WORD_SIZE) && (log2p >= 0));
		//		LOGP2HW_ARRAY[i][log2p] = hw32(diff_arr[i-2]);
#if RC5_FLEX_FIB_MAX // max
		LOGP2HW_ARRAY[i][log2p] = std::max(hw32(diff_arr[i-2]), LOGP2HW_ARRAY[i][log2p]);
#else // average
		LOGP2HW_ARRAY[i][log2p] = (hw32(diff_arr[i-2]) + LOGP2HW_ARRAY[i][log2p]);
		LOGP2HW_ARRAY_AVRG_CNT[i][log2p]++;
#endif // #if RC5_FLEX_FIB_MAX // max
#endif // #if RC5_FLEX_FIB // compute LOGP2HW_ARRAY

#if 0//RC5_FLEX_FIB // compute LOGP2HW_ARRAY
		uint32_t log2p = (uint32_t)std::abs(log2(p));
		assert((log2p < WORD_SIZE) && (log2p >= 0));
		//		LOGP2HW_ARRAY[i][log2p] = hw32(diff_arr[i-2]);
#if RC5_FLEX_FIB_MAX // max
		LOGP2HW_ARRAY[i][log2p] = std::max(hw32(diff_arr[i-2]), LOGP2HW_ARRAY[i][log2p]);
#else // average
		LOGP2HW_ARRAY[i][log2p] = (hw32(diff_arr[i-2]) + LOGP2HW_ARRAY[i][log2p]);
		LOGP2HW_ARRAY_AVRG_CNT[i][log2p]++;
#endif // #if RC5_FLEX_FIB_MAX // max
		//		printf("[%s:%d] y %8X yy %8X dx %8X hw %2d diff[%2d] %8X hw %2d\n", 
		//				 __FILE__, __LINE__, y, yy, dx, hw32(dx), i-2, diff_arr[i-2], hw32(diff_arr[i-2]));
#endif // #if RC5_FLEX_FIB // compute LOGP2HW_ARRAY
#if 0//!RC5_FLEX_FIB_MAX // average
  for(uint32_t i = 0; i < ((2*NROUNDS) + 3); i++) {
	 for(uint32_t j = 0; j < WORD_SIZE; j++) {
		printf("before %d ", LOGP2HW_ARRAY[i][j]);
		double a = ((double)LOGP2HW_ARRAY[i][j] / (double)LOGP2HW_ARRAY_AVRG_CNT[i][j]);
		LOGP2HW_ARRAY[i][j] = (uint32_t)a;
		printf(" after %d %f %d\n", LOGP2HW_ARRAY[i][j], a, LOGP2HW_ARRAY_AVRG_CNT[i][j]);
	 }
  }
#endif // #if RC5_FLEX_FIB_MAX // max



/* --- */

/* 
RC5_P_THRES_ARRAY = [ 0] -1.00 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -3.17 [ 5] -4.09 [ 6] -4.09 [ 7] -5.04 [ 8] -6.02 [ 9] -6.02 [10] -8.01 [11] -10.00
        FIB_ARRAY = [ 0]  0 [ 1]  1 [ 2]  1 [ 3]  0 [ 4]  2 [ 5]  2 [ 6]  2 [ 7]  2 [ 8]  2 [ 9]  2 [10]  2 [11]  2
        FIB_ARRAY = [ 0]  0 [ 1]  1 [ 2]  1 [ 3]  0 [ 4]  2 [ 5]  2 [ 6]  2 [ 7]  2 [ 8]  2 [ 9]  2 [10]  2 [11]  2
		  [./tests/rc5-tests.cc:628] #GoUP sets of trails: 0 (2^-inf)
		  [./tests/rc5-tests.cc:633] Print LOGP2HW_ARRAY
LOGP2HW_ARRAY[ 0] =  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
LOGP2HW_ARRAY[ 1] =  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
LOGP2HW_ARRAY[ 2] =  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
LOGP2HW_ARRAY[ 3] =  1  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
LOGP2HW_ARRAY[ 4] =  1  1  1  1  1  1  1  1  .  1  1  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
LOGP2HW_ARRAY[ 5] =  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
LOGP2HW_ARRAY[ 6] =  1  1  1  1  1  1  2  2  2  2  3  3  5  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
LOGP2HW_ARRAY[ 7] =  1  1  1  1  1  1  2  2  2  2  3  2  3  1  6  .  4  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
LOGP2HW_ARRAY[ 8] =  1  .  .  1  1  1  2  2  3  3  3  3  4  4  5  4  4  5  6  .  .  9  .  .  .  .  .  .  .  .  .  .
LOGP2HW_ARRAY[ 9] =  1  1  1  1  1  2  2  2  3  3  4  4  5  4  5  6  5  7  7  8  7  9  3  .  .  .  .  .  .  .  .  .
LOGP2HW_ARRAY[10] =  1  1  1  1  1  2  2  3  3  4  4  5  5  5  6  6  7  6  7  8  . 11  .  .  .  .  .  .  .  .  .  .
LOGP2HW_ARRAY[11] =  2  .  1  2  2  2  3  3  4  4  4  5  5  6  6  6  7  7  8  7  8  9  8  7  .  6  .  .  .  .  .  .
LOGP2HW_ARRAY[12] =  1  .  1  1  2  3  3  4  4  5  5  6  6  6  6  7  7  8  8  8  9  9  8 11  .  .  .  .  .  .  .  .
LOGP2HW_ARRAY[13] =  3  1  2  2  3  4  5  6  6  7  7  8  8  9  9  9  9 10 10 12  7 12  .  .  .  .  .  .  .  .  .  .
LOGP2HW_ARRAY[14] =  .  .  2  2  3  4  5  6  7  7  8  9  9  9 10 10 10 10 10 11 13 12  .  .  .  .  .  .  .  .  .  .
const uint32_t g_key[16] = {0xC4, 0x39, 0xBB, 0x95, 0xA6, 0xCF, 0xD9, 0x12, 0xA, 0x78, 0x12, 0xB8, 0xB0, 0xF2, 0xC7, 0xCC};
[./tests/rc5-tests.cc:710] Test OK!

real    0m2.822s
user    0m2.752s
sys     0m0.032s

 */

/* --- */

// see: rc5_good_pairs_goup_filter_debug
void rc5_compute_flex_fib_array(const WORD S[RC5_STAB_LEN_T],
										  const std::vector<pair_t> good_pairs_vec,
										  const gsl_matrix* AA_last[2][2][2][2], // last round including values for x
										  const gsl_matrix* A_last[2][2][2], // last round
										  const gsl_vector* L_last,
										  const gsl_vector* C_last,
										  const gsl_matrix* A_mid[2][2], // middle round
										  const gsl_vector* L_mid,
										  const gsl_vector* C_mid)
{
  assert(RC5_FLEX_FIB == 1);

  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;
  uint32_t cnt_good = 0;

  uint32_t arr_len = (2*NROUNDS) + 3;

  for(uint32_t j = 0; j < RC5_NTEXTS; j++) {
    pair_t cp_pair;
    cp_pair.plaintext_first[0] = random32() & MASK;
    cp_pair.plaintext_first[1] = random32() & MASK;
    cp_pair.plaintext_second[0] = (cp_pair.plaintext_first[0] ^ dx[0]) & MASK;
	 cp_pair.plaintext_second[1] = (cp_pair.plaintext_first[1] ^ dx[1]) & MASK;

	 //  std::vector<pair_t>::const_iterator vec_iter = good_pairs_vec.begin();
	 //  for(vec_iter = good_pairs_vec.begin(); vec_iter != good_pairs_vec.end(); vec_iter++) {

	 bool b_good = rc5_pair_is_good(S, nrounds, cp_pair);
	 if(!b_good) 
		continue;

#if 0 // DEBUG: statistics + counting averages
	 rc5_good_pair_debug_statistics(S, nrounds, cp_pair, fib_array,
											  (const gsl_matrix*(*)[2][2][2])AA_last, (const gsl_matrix*(*)[2][2])A_last, L_last, C_last,
											  (const gsl_matrix*(*)[2])A_mid, L_mid, C_mid,
											  min_prob_arr, sum_prob_arr, sum_hw_arr, max_hw_arr);
#endif // #if 1 // DEBUG

	 cnt_good++;
	 printf("-------------------- [%5d] --------------------\n", cnt_good);
	 pair_t cp_pair = *vec_iter;
	 //	 pair_t cp_pair = good_pairs_vec[vec_index];
	 std::vector<uint32_t> X_first;		  // intermediate values from encryption
	 std::vector<uint32_t> X_second;
	 rc5_encrypt_pair_get_intermediate_values(S, NROUNDS, cp_pair, &X_first, &X_second);

	 /**
	  * Fill in the cipheretxts.
	  */
	 cp_pair.ciphertext_first[left] = X_first[((2 * NROUNDS) + 1)]; 
	 cp_pair.ciphertext_first[right] = X_first[((2 * NROUNDS) + 2)]; 
	 cp_pair.ciphertext_second[left] = X_second[((2 * NROUNDS) + 1)]; 
	 cp_pair.ciphertext_second[right] = X_second[((2 * NROUNDS) + 2)]; 

	 bool b_good = rc5_pair_is_good(S, NROUNDS, cp_pair);
	 assert(b_good);

	 /**
	  * Intermediate values.
	  */
	 WORD diff_arr[(2*NROUNDS) + 3] = {0};
	 WORD rot_arr[(2*NROUNDS) + 3] = {0};
	 double prob_arr[(2*NROUNDS) + 3] = {0.0};
	 uint32_t set_size_arr[(2*NROUNDS) + 3] = {0};
	 WORD y_last_right = 0;
	 WORD yy_last_right = 0;
	 WORD y_last_left = 0;
	 WORD yy_last_left = 0;
	 uint32_t i = 0;
	 std::vector<uint32_t>::iterator X_first_iter = X_first.begin();
	 std::vector<uint32_t>::iterator X_second_iter = X_second.begin();
	 for(X_first_iter = X_first.begin(); X_first_iter != X_first.end(); X_first_iter++, X_second_iter++, i++) {
		assert(i < ((2*NROUNDS) + 3));
		WORD x = *X_first_iter;
		WORD xx = *X_second_iter;
		WORD dx = (x ^ xx);
		diff_arr[i] = dx;
		if((i >= 2) && (i != ((2*NROUNDS) + 2))) {
		  rot_arr[i] = (x & RC5_ROT_MASK);
		  assert((x & RC5_ROT_MASK) == (xx & RC5_ROT_MASK));
		}
		if(i == ((2*NROUNDS) + 1)) {
		  y_last_left = x; // left ciphertext
		  yy_last_left = xx;
		}
		if(i == ((2*NROUNDS) + 2)) {
		  y_last_right = x; // right ciphertext
		  yy_last_right = xx;
		}
	 }

	 rc5_equal_rot_trail_dp((const gsl_matrix*(*)[2][2][2])AA_last, (const gsl_matrix*(*)[2][2])A_last, L_last, C_last,
									(const gsl_matrix*(*)[2])A_mid, L_mid, C_mid, 									
									diff_arr, rot_arr, prob_arr, set_size_arr, arr_len, 
									y_last_left, yy_last_left, y_last_right, yy_last_right);

  }
  printf("[%s:%d] Print LOGP2HW_ARRAY\n", __FILE__, __LINE__);
  for(uint32_t i = 0; i < ((2*NROUNDS) + 3); i++) {
	 printf("LOGP2HW_ARRAY[%2d] = ", i);
	 for(uint32_t j = 0; j < WORD_SIZE; j++) {
		if(LOGP2HW_ARRAY[i][j]) {
		  printf("%2d ", LOGP2HW_ARRAY[i][j]);
		} else {
		  printf(" . ");
		}
	 }
	 printf("\n");
  }
}

/* --- */
#if 1 // DEBUG
	 //	 printf("[%s:%d]    prob_arr = ", __FILE__, __LINE__);
	 printf("     Score = ");
	 double score_log = 0.0;
	 for(uint32_t i = 0; i < arr_len; i++) {
		if(i >= (arr_len - RC5_FIB_LEN)) {
		  score_log = score_log + log2(prob_arr[i]);
		  printf("%4.2f ", score_log);
		}
	 }
	 printf("\n");
#endif // #if 1 // DEBUG

/* --- */
 else {
		//		printf("[%s:%d] Does not b_match_input_diff %d. Exiting...\n", __FILE__, __LINE__, b_match_input_diff);
		assert(goup_variants_hash_map->size() == 0);
		return false;
	 }

/* --- */
			 /*
			  * The function will return FALSE only if the GoUP filter
			  * covers the full trail up to the top and it has found that
			  * this variant does not match the input difference.
			  */
			 if(b_ret == false) {
				//				printf("[%s:%d] Return false! Exiting...\n", __FILE__, __LINE__);
				assert(RC5_GOUP_LEVEL == (2*NROUNDS));
				return false;
			 } else {
				assert(b_ret == true);
			 }

/* --- */

/*
 * Return a character string representing the current date and time.
 */
char* get_timestamp ()
{
  time_t now = time (NULL);
  return asctime(localtime (&now));
}


/* --- */

/* 
128 keys, 20140715

	[./tests/rc5-tests.cc:1337] Average g_max hw_arr = 0.99 0.99 0.99 0.00 2.11 2.20 2.71 3.86 4.55 5.58 6.64 7.68 8.47 9.63 10.66 11.88 12.84 13.83 15.70
	[./tests/rc5-tests.cc:1337] Average g_max hw_arr = 1 1 1 0 2 2 3 4 5 6 7 8 9 10 11 12 13 14 16
                              1, 1, 1, 0, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16

 */

/* 

32 keys, 20140715

	[./tests/rc5-tests.cc:1337] Average g_max hw_arr = 1.00 1.00 1.00 0.00 2.00 2.19 2.50 3.34 3.91 4.72 6.25 6.91 7.75 9.19 10.38 11.59 13.06 13.69 16.91
	[./tests/rc5-tests.cc:1344] Average g_min prob_arr = 0.00 0.00 0.00 0.00 -1.72 -2.12 -1.95 -2.81 -3.10 -3.34 -4.81 -5.31 -6.63 -8.57 -8.30 -11.00 -12.66 -11.58 -14.45

real    2m45.449s
user    2m44.970s
sys     0m0.004s

 */

/*
Average of MAX HW and MIN prob over 32 keys
  [./tests/rc5-tests.cc:1128] Average g_max hw_arr = 
1.00 1.00 1.00 0.00 1.97 2.28 2.56 3.50 | 4.47 4.84 6.56 7.31 8.12 9.06 10.19 12.50 13.03 14.31 15.75
  [./tests/rc5-tests.cc:1135] Average g_min prob_arr = 
0.00 0.00 0.00 0.00 -1.74 -2.00 -1.54 -2.56 -3.02 -3.51 -4.22 -5.63 -5.95 -7.52 -9.40 -12.37 -11.99 -10.71 -13.52
*/
/*


/* --- */

/* 

Store at most one VARIANT:

[./src/rc5-dc.cc:3332] False positive: #filtered [          15775 (2^13.95) /         8388557 (2^23.00)]
[./src/rc5-dc.cc:3347]   average hw_arr = 1.00 1.00 1.00 0.00 1.00 1.00 0.60 1.40 1.80 0.40 1.40 2.40 2.40 5.00 4.60 7.20 8.20 10.20 14.00
[./src/rc5-dc.cc:3358] average prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 -0.57 -1.07 -1.68 -0.23 -1.00 -1.68 -1.45 -3.06 -2.69 -5.74 -9.12 -6.96 -9.21
[./src/rc5-dc.cc:3366]        FIB_ARRAY =  4  5  5  7  8  9 10 11 13 13 13 13
[./src/rc5-dc.cc:3371]       max hw_arr =  3  3  2  2  4  6  9  7 {15} 10 {16} {20}
[./src/rc5-dc.cc:3388]     min prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 -3.00 -3.00 -4.00 -2.00 -3.00 -4.00 -6.00 -12.00 -8.00 -18.00 -14.00 -12.00 -17.00
[./src/rc5-dc.cc:rc5_equal_rot_attack():3401] Exit statistics:
#Rounds 8
WORD_SIZE 32
RC5_FILTER_CUT_HW1  0
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 261916 (2^17.998745)
#Filtered pairs all: 15775 (2^13.945352)
#Good pairs among filtered: 5
#Good pairs among filtered f1: 5
#Good pairs total: 5
#GoUP sets of trails: 15775 (2^13.945352)
RC5_P_THRES_ARRAY = [ 0] -3.17 [ 1] -4.09 [ 2] -4.09 [ 3] -5.04 [ 4] -6.02 [ 5] -6.02 [ 6] -8.01 [ 7] -10.00 [ 8] -13.00 [ 9] -14.00 [10] -14.00 [11] -16.00
        FIB_ARRAY = [ 0]  4 [ 1]  5 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
        FIB_ARRAY = [ 0]  4 [ 1]  5 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
		  [./tests/rc5-tests.cc:594] #GoUP sets of trails: 15775 (2^13.945352)
WORD g_good_pairs[5][2][2] = {
{{0x8A8F7BB4, 0x864713B6}, {0x0A8F7BB4, 0x064713B6}},
{{0x360D3656, 0x0DCF294E}, {0xB60D3656, 0x8DCF294E}},
{{0x554019D7, 0x98B8EE6C}, {0xD54019D7, 0x18B8EE6C}},
{{0xDB640C31, 0xDF08491C}, {0x5B640C31, 0x5F08491C}},
{{0xBCACB047, 0xDAE842D6}, {0x3CACB047, 0x5AE842D6}}};
const uint32_t g_key[16] = {0x5A, 0xFF, 0xFD, 0x4E, 0x27, 0x3B, 0x83, 0x1A, 0x18, 0xB5, 0x5F, 0xE4, 0x2E, 0x5A, 0xC0, 0xB0};
[./tests/rc5-tests.cc:661] Test OK!

real    207m37.455s
user    207m1.868s
sys     0m0.036s
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$



[./src/rc5-dc.cc:3332] False positive: #filtered [          15909 (2^13.96) /         8387543 (2^23.00)]
[./src/rc5-dc.cc:3347]   average hw_arr = 1.00 1.00 1.00 0.00 1.00 1.38 1.75 2.12 1.25 1.62 2.50 2.88 2.88 3.62 4.38 8.25 10.88 9.25 15.00
  [./src/rc5-dc.cc:3358] average prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.25 -0.81 -1.35 -0.91 -1.30 -1.41 -1.58 -2.24 -2.14 -3.49 -5.47 -9.63 -6.48 -11.75
  [./src/rc5-dc.cc:3366]        FIB_ARRAY =  4  5  5  7  8  9 10 11 13 13 13 13
  [./src/rc5-dc.cc:3371]       max hw_arr = { 7}  3  3  7  7  8 {11}  7 {16} {16} {14} {18}
[./src/rc5-dc.cc:3388]     min prob_arr = 0.00 0.00 0.00 0.00 -1.00 -3.00 -8.00 -7.00 -3.00 -4.00 -8.00 -7.00 -12.00 -14.00 -9.00 -18.00 -19.00 -11.00 -14.00
																													  [./src/rc5-dc.cc:rc5_equal_rot_attack():3401] Exit statistics:
#Rounds 8
WORD_SIZE 32
RC5_FILTER_CUT_HW1  0
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 262709 (2^18.003106)
#Filtered pairs all: 15909 (2^13.957556)
#Good pairs among filtered: 8
#Good pairs among filtered f1: 8
#Good pairs total: 8
#GoUP sets of trails: 15909 (2^13.957556)
RC5_P_THRES_ARRAY = [ 0] -3.17 [ 1] -4.09 [ 2] -4.09 [ 3] -5.04 [ 4] -6.02 [ 5] -6.02 [ 6] -8.01 [ 7] -10.00 [ 8] -13.00 [ 9] -14.00 [10] -14.00 [11] -16.00
        FIB_ARRAY = [ 0]  4 [ 1]  5 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
        FIB_ARRAY = [ 0]  4 [ 1]  5 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
  [./tests/rc5-tests.cc:594] #GoUP sets of trails: 15909 (2^13.957556)
  WORD g_good_pairs[8][2][2] = {
  {{0xBA227BC2, 0x1F315C1D}, {0x3A227BC2, 0x9F315C1D}},
  {{0x4231624D, 0x2158B928}, {0xC231624D, 0xA158B928}},
  {{0x0AA5E77F, 0x6B505C3D}, {0x8AA5E77F, 0xEB505C3D}},
  {{0x5CE47329, 0xBCC2E3C4}, {0xDCE47329, 0x3CC2E3C4}},
  {{0xC066F41F, 0xAE017BAC}, {0x4066F41F, 0x2E017BAC}},
  {{0x5BF6E45E, 0x6E6A5095}, {0xDBF6E45E, 0xEE6A5095}},
  {{0xF35BE761, 0xA8AADA68}, {0x735BE761, 0x28AADA68}},
  {{0x1F116FCF, 0x943D1D0E}, {0x9F116FCF, 0x143D1D0E}}};
const uint32_t g_key[16] = {0x6F, 0x2B, 0x60, 0x4D, 0xAD, 0xE3, 0xD8, 0x5, 0x72, 0x63, 0x94, 0x17, 0x12, 0xEC, 0xD4, 0x8E};
[./tests/rc5-tests.cc:661] Test OK!

real    216m14.364s
user    215m37.301s
sys     0m0.036s


[./src/rc5-dc.cc:3332] False positive: #filtered [          15902 (2^13.96) /         8387261 (2^23.00)]
[./src/rc5-dc.cc:3347]   average hw_arr = 1.00 1.00 1.00 0.00 1.20 1.80 1.00 1.20 0.60 1.00 1.20 0.80 0.60 2.00 3.00 4.20 5.00 5.40 9.00
  [./src/rc5-dc.cc:3358] average prob_arr = 0.00 0.00 0.00 0.00 -1.15 -1.62 -0.80 -1.06 -0.32 -0.83 -1.00 -0.62 -0.86 -1.50 -1.30 -2.30 -2.86 -2.19
  [./src/rc5-dc.cc:3366]        FIB_ARRAY =  4  5  5  7  8  9 10 11 13 13 13 13
  [./src/rc5-dc.cc:3371]       max hw_arr =  3  1  2  2  2  1  5  9 11  9 12 {14}
  [./src/rc5-dc.cc:3388]     min prob_arr = 0.00 0.00 0.00 0.00 -2.00 -3.00 -3.00 -5.00 -1.00 -4.00 -2.00 -2.00 -2.00 -6.00 -10.00 -16.00 -15.00 -10
  [./src/rc5-dc.cc:rc5_equal_rot_attack():3401] Exit statistics:
#Rounds 8
WORD_SIZE 32
RC5_FILTER_CUT_HW1  0
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 261259 (2^17.995121)
#Filtered pairs all: 15902 (2^13.956921)
#Good pairs among filtered: 5
#Good pairs among filtered f1: 5
#Good pairs total: 5
#GoUP sets of trails: 15902 (2^13.956921)
RC5_P_THRES_ARRAY = [ 0] -3.17 [ 1] -4.09 [ 2] -4.09 [ 3] -5.04 [ 4] -6.02 [ 5] -6.02 [ 6] -8.01 [ 7] -10.00 [ 8] -13.00 [ 9] -14.00 [10] -14.00 [
        FIB_ARRAY = [ 0]  4 [ 1]  5 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
        FIB_ARRAY = [ 0]  4 [ 1]  5 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
		  [./tests/rc5-tests.cc:594] #GoUP sets of trails: 15902 (2^13.956921)
		  WORD g_good_pairs[5][2][2] = {
	 {{0x2298DB65, 0x6A0C55AE}, {0xA298DB65, 0xEA0C55AE}},
	 {{0x86F2C6EB, 0xC1194603}, {0x06F2C6EB, 0x41194603}},
	 {{0xC5B82202, 0xF8B85FEF}, {0x45B82202, 0x78B85FEF}},
	 {{0xD958A2F4, 0x0505D5F4}, {0x5958A2F4, 0x8505D5F4}},
	 {{0x28A4B37F, 0xDFF007CF}, {0xA8A4B37F, 0x5FF007CF}}};
		  const uint32_t g_key[16] = {0xB7, 0xCA, 0xD5, 0xBC, 0xDC, 0xB9, 0xA8, 0x19, 0x83, 0xA7, 0xCA, 0xDC, 0x4F, 0x66, 0xC9, 0x89};
		  [./tests/rc5-tests.cc:661] Test OK!

real    358m2.893s
user    357m1.535s
sys     0m0.052s

		  [./src/rc5-dc.cc:3332] False positive: #filtered [          15876 (2^13.95) /         8388510 (2^23.00)]
		  [./src/rc5-dc.cc:3347]   average hw_arr = 1.00 1.00 1.00 0.00 1.12 1.00 0.25 1.25 1.88 2.88 2.00 2.38 3.38 3.75 5.25 6.88 8.12 8.88 8.88
		  [./src/rc5-dc.cc:3358] average prob_arr = 0.00 0.00 0.00 0.00 -0.91 -0.91 -0.33 -1.25 -1.37 -1.53 -1.18 -1.71 -1.52 -2.12 -2.66 -3.81 -2.95 -5.74 -6.26
		  [./src/rc5-dc.cc:3366]        FIB_ARRAY =  4  5  5  7  8  9 10 11 13 13 13 13
		  [./src/rc5-dc.cc:3371]       max hw_arr =  3  4 { 7}  7  6  8  7 {14} 12 {16} {15} {14}
		  [./src/rc5-dc.cc:3388]     min prob_arr = 0.00 0.00 0.00 0.00 -2.00 -2.00 -3.00 -3.00 -5.00 -8.00 -10.00 -11.00 -13.00 -11.00 -14.00 -15.00 -18.00 -16.00 -14.00
		  [./src/rc5-dc.cc:rc5_equal_rot_attack():3401] Exit statistics:
#Rounds 8
WORD_SIZE 32
RC5_FILTER_CUT_HW1  0
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 262657 (2^18.002821)
#Filtered pairs all: 15876 (2^13.954560)
#Good pairs among filtered: 8
#Good pairs among filtered f1: 8
#Good pairs total: 8
#GoUP sets of trails: 15876 (2^13.954560)
RC5_P_THRES_ARRAY = [ 0] -3.17 [ 1] -4.09 [ 2] -4.09 [ 3] -5.04 [ 4] -6.02 [ 5] -6.02 [ 6] -8.01 [ 7] -10.00 [ 8] -13.00 [ 9] -14.00 [10] -14.00 [11] -16.00
        FIB_ARRAY = [ 0]  4 [ 1]  5 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
        FIB_ARRAY = [ 0]  4 [ 1]  5 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
		  [./tests/rc5-tests.cc:594] #GoUP sets of trails: 15876 (2^13.954560)
		  WORD g_good_pairs[8][2][2] = {
			 {{0xBBD65F1A, 0x567503E8}, {0x3BD65F1A, 0xD67503E8}},
			 {{0xDF1D874D, 0xCC6DEB9D}, {0x5F1D874D, 0x4C6DEB9D}},
			 {{0xD2AE4349, 0xFB35F972}, {0x52AE4349, 0x7B35F972}},
			 {{0x8A7E0C5D, 0x5DE6B591}, {0x0A7E0C5D, 0xDDE6B591}},
			 {{0x5409F7F2, 0xDF1227BB}, {0xD409F7F2, 0x5F1227BB}},
			 {{0x18BD30A4, 0x283C592A}, {0x98BD30A4, 0xA83C592A}},
			 {{0x52E4E228, 0x515733D8}, {0xD2E4E228, 0xD15733D8}},
			 {{0xBAD92DB0, 0x01665060}, {0x3AD92DB0, 0x81665060}}};
		  const uint32_t g_key[16] = {0xC3, 0xB8, 0x74, 0x5A, 0x24, 0x5, 0x6D, 0xC3, 0x1C, 0x1C, 0xC2, 0x45, 0xE1, 0x12, 0xFB, 0x2E};
		  [./tests/rc5-tests.cc:661] Test OK!

real    313m10.141s
user    312m16.375s
sys     0m0.028s


		  [./src/rc5-dc.cc:3332] False positive: #filtered [          15983 (2^13.96) /         8387975 (2^23.00)]
		  [./src/rc5-dc.cc:3347]   average hw_arr = 1.00 1.00 1.00 0.00 1.67 1.83 1.00 2.83 2.33 1.50 3.17 2.33 2.83 3.17 4.50 5.50 6.67 6.83 9.00
		  [./src/rc5-dc.cc:3358] average prob_arr = 0.00 0.00 0.00 0.00 -1.42 -1.54 -0.49 -1.68 -1.19 -0.41 -1.68 -1.26 -0.87 -1.57 -2.58 -5.13 -8.39 -6.78 -6.20
		  [./src/rc5-dc.cc:3366]        FIB_ARRAY =  4  5  5  7  8  9 10 11 13 13 13 13
		  [./src/rc5-dc.cc:3371]       max hw_arr = { 9} { 8} { 7} {11}  8 {11} {11} {15}  8 10  9 13
		  [./src/rc5-dc.cc:3388]     min prob_arr = 0.00 0.00 0.00 0.00 -2.00 -4.00 -6.00 -12.00 -9.00 -11.00 -12.00 -8.00 -14.00 -15.00 -20.00 -10.00 -16.00 -14.00 -11.00
		  [./src/rc5-dc.cc:rc5_equal_rot_attack():3401] Exit statistics:
#Rounds 8
WORD_SIZE 32
RC5_FILTER_CUT_HW1  0
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 261434 (2^17.996087)
#Filtered pairs all: 15983 (2^13.964251)
#Good pairs among filtered: 6
#Good pairs among filtered f1: 6
#Good pairs total: 6
#GoUP sets of trails: 15983 (2^13.964251)
RC5_P_THRES_ARRAY = [ 0] -3.17 [ 1] -4.09 [ 2] -4.09 [ 3] -5.04 [ 4] -6.02 [ 5] -6.02 [ 6] -8.01 [ 7] -10.00 [ 8] -13.00 [ 9] -14.00 [10] -14.00 [11] -16.00
        FIB_ARRAY = [ 0]  4 [ 1]  5 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
        FIB_ARRAY = [ 0]  4 [ 1]  5 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
		  [./tests/rc5-tests.cc:594] #GoUP sets of trails: 15983 (2^13.964251)
		  WORD g_good_pairs[6][2][2] = {
			 {{0x38724F99, 0xED77339F}, {0xB8724F99, 0x6D77339F}},
			 {{0xDAC2CB5F, 0x92CBEEC4}, {0x5AC2CB5F, 0x12CBEEC4}},
			 {{0x1939A9D1, 0xF35EE156}, {0x9939A9D1, 0x735EE156}},
			 {{0xCC1B1FEF, 0xAFCD636F}, {0x4C1B1FEF, 0x2FCD636F}},
			 {{0x7E3986B7, 0x7DFC4DB7}, {0xFE3986B7, 0xFDFC4DB7}},
			 {{0xA5DB9564, 0xF1591B0B}, {0x25DB9564, 0x71591B0B}}};
		  const uint32_t g_key[16] = {0x49, 0x7D, 0xA2, 0xE, 0xAA, 0x80, 0xED, 0xE0, 0xB7, 0x88, 0x92, 0xF6, 0x85, 0xD, 0xFC, 0xB4};
		  [./tests/rc5-tests.cc:661] Test OK!

real    361m7.899s
user    360m6.022s
sys     0m0.040s


 */

/* --- */

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   0) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   0) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   0) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   2) + 1)),
  ((double)1.0 / (double)((1U <<   4) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),
  ((double)1.0 / (double)((1U <<   8) + 1)),
  ((double)1.0 / (double)((1U <<  11) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))


/* --- */
	 if(dx_i == 0x9A007307) {
		assert(0 == 1);
	 }



/* --- */
#if 0
  gsl_vector* L_last;
  gsl_vector* C_last;
  gsl_matrix* A_last[2][2][2];
  gsl_matrix* AA_last[2][2][2][2];

  L_last = gsl_vector_calloc(RC5_LAST_ROUND_MSIZE);
  C_last = gsl_vector_calloc(RC5_LAST_ROUND_MSIZE);

  gsl_vector_set_all(L_last, 1.0);
  gsl_vector_set_all(C_last, 0.0);
  gsl_vector_set(C_last, RC5_LAST_ROUND_ISTATE, 1.0);

  rc5_last_round_eq_alloc_matrices_3d(A_last);
  rc5_last_round_eq_alloc_matrices_4d(AA_last);

  rc5_last_round_eq_x_sf(AA_last);

  rc5_last_round_eq_add_matrices(A_last, AA_last);
  rc5_last_round_eq_normalize_matrices(A_last); 

  // matrices for the mid round
  gsl_vector* L_mid;
  gsl_vector* C_mid;
  gsl_matrix* A_mid[2][2];
  gsl_matrix* AA_mid[2][2][2][2];

  L_mid = gsl_vector_calloc(RC5_MID_ROUND_MSIZE);
  C_mid = gsl_vector_calloc(RC5_MID_ROUND_MSIZE);

  gsl_vector_set_all(L_mid, 1.0);
  gsl_vector_set_all(C_mid, 0.0);
  gsl_vector_set(C_mid, RC5_MID_ROUND_ISTATE, 1.0);

  rc5_mid_round_eq_alloc_matrices_2d(A_mid);
  rc5_mid_round_eq_alloc_matrices_4d(AA_mid);

  rc5_mid_round_eq_xy_sf(AA_mid);
  rc5_mid_round_eq_add_matrices(A_mid, AA_mid);
  rc5_mid_round_eq_normalize_matrices(A_mid); 

#endif

#if 0
  rc5_mid_round_eq_free_matrices_2d(A_mid);
  rc5_mid_round_eq_free_matrices_4d(AA_mid);
  gsl_vector_free(C_mid);
  gsl_vector_free(L_mid);

  gsl_vector_free(C_last);
  gsl_vector_free(L_last);
  rc5_last_round_eq_free_matrices_3d(A_last);
  rc5_last_round_eq_free_matrices_4d(AA_last);
#endif



/* ---- */

/*
 * Generate the rot const for the next round
 * NOTE: not used
 * \see rc5_last_round_add_approx_rot_const_next , rc5_last_round_add_approx_match
 */
void rc5_mid_round_add_approx_rot_const_next(const uint32_t i, const WORD dx, 
															const WORD dy, const WORD dz, const uint32_t order_in,
															const uint32_t rot_const, WORD* rot_const_next_bitmask)
{
  //  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);
  assert(order_in == RC5_ADD_APPROX_ORDER);

  const uint32_t logn = (uint32_t)log2(WORD_SIZE);
  assert((logn == 4) || (logn == 5));

  assert(logn <= RC5_ADD_APPROX_ORDER);

  const uint32_t w = WORD_SIZE;
  const uint32_t r = rot_const;
  const uint32_t L = logn; // log2(n) 
  assert((i == (w - 1)) || (i == (L + r - 1)));

  uint32_t order = 0;//order_in;
  uint32_t i_lo = 0;
  if(i < order_in) {
	 i_lo = 0;
	 order = i + 1;
	 assert(0 == 1);
  } else { // i >= order_in
	 i_lo = (i - order_in + 1);
	 order = order_in;
  }
  uint32_t N = (1U << order);
  uint32_t mask_order = (0xffffffff >> (32 - order)); 
  //  bool b_match = false;
  WORD x_seq = 0;
  while(x_seq < N) {
	 //	 printf("[%s:%d] x_seq %d\n", __FILE__, __LINE__, x_seq);
	 WORD x = (x_seq) & mask_order;
	 WORD xx = (x ^ (dx >> i_lo)) & mask_order;
	 WORD y_seq = 0;
	 while(y_seq < N) {
		//		printf("[%s:%d] y_seq %d\n", __FILE__, __LINE__, y_seq);
		WORD y = y_seq & mask_order;
		WORD yy = (y ^ (dy >> i_lo)) & mask_order;
		WORD z = (x - y) & mask_order; // (x - y) mod 2^{order}
		WORD zz = (xx - yy) & mask_order; 
#if 1 // DEBUG
		WORD z_mod = (WORD)(x - y + N) & mask_order; // (x - y) mod 2^{order}
		WORD zz_mod = (WORD)(xx - yy + N) & mask_order; 
		assert(z == z_mod);
		assert(zz == zz_mod);
#endif // #if 1 // DEBUG
		WORD diff = (z ^ zz) & mask_order;
		WORD dz_lo = (dz >> i_lo) & mask_order;
		bool b_match = (diff == dz_lo);

		if(b_match) {
		  if(((L + r - 1) < WORD_SIZE) && (i == (L + r - 1))) {
			 uint32_t dz_logn = (dz_lo >> (i_lo - r));
			 if(dz_logn != 0) {
				printf("[%s:%d] i %2d i_lo %2d r %2d dz_lo %8X dz_logn %8X\n", __FILE__, __LINE__, i, i_lo, r, dz_lo, dz_logn);
			 }
			 assert(dz_logn == 0);
			 /*
			  * Shift z right by the difference between the approx order
			  * and the rotation constant r
			  */
			 uint32_t z_logn = (z >> (order - L));
			 uint32_t zz_logn = (zz >> (order - L));
			 uint32_t r_next = (z_logn ^ r) & RC5_ROT_MASK;
			 uint32_t rr_next = (zz_logn ^ r) & RC5_ROT_MASK;
			 if(r_next == rr_next) {
				(*rot_const_next_bitmask) |= (1 << r_next);
			 }
#if 1 // DEBUG
			 if(r_next != rr_next) {
				printf("[%s:%d] i_lo %2d r %2d z %8X zz %8X | z_logn %8X zz_logn %8X r_next %2d %2d\n", 
						 __FILE__, __LINE__, i_lo, r, z, zz, z_logn, zz_logn, r_next, rr_next);
			 }
#endif // #if 1 // DEBUG
			 assert(r_next == rr_next);
		  }

		  if(((L + r - 1) >= WORD_SIZE) && (i == (WORD_SIZE - 1))) {
			 uint32_t nbits_lo = (L + r - w); 
			 uint32_t mask_lo = (0xffffffff >> (32 - nbits_lo));
			 uint32_t nbits_hi = L - nbits_lo;
			 uint32_t mask_hi = (0xffffffff << (32 - nbits_hi)) & MASK;

			 if((mask_hi == 0) || (mask_lo == 0)) {
				printf("[%s:%d] i %2d (L + r - 1) %2d mask_hi %8X mask_lo %8X\n", __FILE__, __LINE__, i, (L + r - 1), mask_hi, mask_lo);
			 }
			 assert(mask_hi != 0);
			 assert(mask_lo != 0);

			 uint32_t N_lo = (1U << nbits_lo);

			 //			 assert(nbits_lo == (order - L));

			 /*
			  * Note: x,y contain "order" LS bits => select the high
			  * nbits_hi bits of them
			  */
			 uint32_t x_seq_hi = (x >> (order - nbits_hi));
			 uint32_t xx_seq_hi = (xx >> (order - nbits_hi));

			 uint32_t y_seq_hi = (y >> (order - nbits_hi));
			 uint32_t yy_seq_hi = (yy >> (order - nbits_hi));

#if 0 // DEBUG
			 printf("[%s:%d] x %8X x_seq_hi %8X xx_seq_hi %8X\n", __FILE__, __LINE__, x, x_seq_hi, xx_seq_hi);
			 printf("[%s:%d] y %8X y_seq_hi %8X yy_seq_hi %8X\n", __FILE__, __LINE__, y, y_seq_hi, yy_seq_hi);
#endif // #if 1 // DEBUG

			 uint32_t x_seq_lo = 0;
			 while(x_seq_lo < N_lo) {
				//				printf("[%s:%d] x_seq_lo %d\n", __FILE__, __LINE__, x_seq_lo);
				uint32_t xx_seq_lo = (x_seq_lo ^ dx) & mask_lo;

				uint32_t y_seq_lo = 0;
				while(y_seq_lo < N_lo) {
				  //				  printf("[%s:%d] y_seq_lo %d\n", __FILE__, __LINE__, y_seq_lo);
				  uint32_t yy_seq_lo = (y_seq_lo ^ dy) & mask_lo;

				  uint32_t x_seq_logn = (x_seq_hi << nbits_lo) | (x_seq_lo);
				  uint32_t xx_seq_logn = (xx_seq_hi << nbits_lo) | (xx_seq_lo);

				  uint32_t y_seq_logn = (y_seq_hi << nbits_lo) | (y_seq_lo);
				  uint32_t yy_seq_logn = (yy_seq_hi << nbits_lo) | (yy_seq_lo);

				  uint32_t z_logn = (x_seq_logn - y_seq_logn) & RC5_ROT_MASK;
				  uint32_t zz_logn = (xx_seq_logn - yy_seq_logn) & RC5_ROT_MASK;

				  uint32_t r_next = (z_logn ^ r) & RC5_ROT_MASK;
				  uint32_t rr_next = (zz_logn ^ r) & RC5_ROT_MASK;


				  if(r_next == rr_next) {
					 (*rot_const_next_bitmask) |= (1 << r_next);
				  }
#if 1 // DEBUG
				  if(r_next != rr_next) {
					 printf("[%s:%d] i_lo %2d r %2d z %8X zz %8X | z_logn %8X zz_logn %8X r_next %2d %2d\n", 
							  __FILE__, __LINE__, i_lo, r, z, zz, z_logn, zz_logn, r_next, rr_next);
					 printf("[%s:%d] x %8X x_seq_hi %8X xx_seq_hi %8X\n", __FILE__, __LINE__, x, x_seq_hi, xx_seq_hi);
					 printf("[%s:%d] y %8X y_seq_hi %8X yy_seq_hi %8X\n", __FILE__, __LINE__, y, y_seq_hi, yy_seq_hi);
					 printf("[%s:%d] x_seq_logn %8X y_seq_logn %8X z_logn %8X r_next %2d\n", __FILE__, __LINE__, x_seq_logn, y_seq_logn, z_logn, r_next);
					 printf("[%s:%d] xx_seq_logn %8X yy_seq_logn %8X zz_logn %8X rr_next %2d\n", __FILE__, __LINE__, xx_seq_logn, yy_seq_logn, zz_logn, rr_next);
				  }
#endif // #if 1 // DEBUG
				  assert(r_next == rr_next);

				  y_seq_lo++;
				}
				x_seq_lo++;
			 }
		  } // if
		} // if b_match
		y_seq++;
	 }
	 x_seq++;
  }
}

/*
 * Generate the rot const for the next round
 * NOTE: not used
 * \see rc5_last_round_add_approx_match
 */
void rc5_last_round_add_approx_rot_const_next(const uint32_t i, const WORD x_in, const WORD xx_in, 
															 const WORD dy, const WORD dz, const uint32_t order, 
															 const uint32_t rot_const, WORD* rot_const_next_bitmask)
{
  assert(0 == 1);
  //  printf("[%s:%d] %8X\n", __FILE__, __LINE__, *rot_const_next_bitmask);
  assert((*rot_const_next_bitmask == 0xffffffff) || (*rot_const_next_bitmask == 0));
  uint32_t r_next = 0;

  uint32_t logn = (uint32_t)log2(WORD_SIZE);
  assert((logn == 4) || (logn == 5));
  assert(logn <= order);

  const uint32_t w = WORD_SIZE;
  const uint32_t r = rot_const;
  const uint32_t L = logn; // log2(n) 
  assert((i == (w - 1)) || (i == (L + r - 1)));

  uint32_t N = (1U << order);
  uint32_t mask_stride = (0xffffffff >> (32 - order)) << (i + 1 - order); 

  if(!((((L + r - 1) < WORD_SIZE) && (i == (L + r - 1))) || 
		 (((L + r - 1) >= WORD_SIZE) && (i == (w - 1))))) {
	 assert(0 == 1);
	 return;
  }

  WORD seq = 0;

  while(seq < N) {
	 WORD y = (seq << (i + 1 - order)) & mask_stride; // ...000***000...
	 WORD yy = (y ^ dy) & mask_stride;
	 WORD x = x_in & mask_stride;
	 WORD xx = xx_in  & mask_stride;
	 WORD diff_stride = ((x - y) ^ (xx - yy)) & mask_stride;
	 bool b_match = (((diff_stride >> i) & 1) == ((dz >> i) & 1)); // diff[i] ?= dz[i]

	 if(b_match) {

		if(((L + r - 1) < WORD_SIZE) && (i == (L + r - 1))) {
		  r_next = ((((x - y) & mask_stride) >> r) ^ r) & RC5_ROT_MASK; // (y - key) xor r_prev
		  uint32_t rr_next = ((((xx - yy) & mask_stride) >> r) ^ r) & RC5_ROT_MASK; // (y - key) xor r_prev
		  if(r_next == rr_next) {
			 //			 rot_const_next_bitmask->push_back(r_next); // add rot const
			 //			 printf("[%s:%d] Before: %8X\n", __FILE__, __LINE__, *rot_const_next_bitmask);
			 (*rot_const_next_bitmask) |= (1 << r_next);
			 //			 printf("[%s:%d]  After: %8X\n", __FILE__, __LINE__, *rot_const_next_bitmask);
#if 0 // DEBUG
			 printf("[%s:%d] L %2d r %2d i %2d x %8X xx %8X y %8X r_next %2d rr_next %2d\n", __FILE__, __LINE__, L, r, i, x, xx, y, r_next, rr_next);
#endif // #if 0 // DEBUG
		  }
		  assert(r_next < WORD_SIZE);
		  //		  if(hw32(*rot_const_next_bitmask) > 3)
		  //			 return;	// <--- !
		}

		if(((L + r - 1) >= WORD_SIZE) && (i == (w - 1))) {
		  uint32_t nbits_lo = (L + r - w); 
		  uint32_t mask_lo = (0xffffffff >> (32 - nbits_lo));
		  uint32_t nbits_hi = L - nbits_lo;
		  uint32_t mask_hi = (0xffffffff << (32 - nbits_hi)) & MASK;

		  uint32_t N_lo = (1U << nbits_lo);
		  uint32_t seq_lo = 0;
		  while(seq_lo < N_lo) {

			 uint32_t y_aug = y | seq_lo;
			 uint32_t yy_aug = (y_aug ^ dy);

			 r_next = (((((x_in - y_aug) & mask_lo) << nbits_hi) | (((x_in - y_aug) & mask_hi) >> (32 - nbits_hi))) ^ r) & RC5_ROT_MASK;
			 uint32_t rr_next = (((((xx_in - yy_aug) & mask_lo) << nbits_hi) | (((xx_in - yy_aug) & mask_hi) >> (32 - nbits_hi))) ^ r) & RC5_ROT_MASK;
			 if(r_next == rr_next) {
				//				rot_const_next_bitmask->push_back(r_next); // add rot const
				(*rot_const_next_bitmask) |= (1 << r_next);
#if 0 // DEBUG
				printf("[%s:%d] L %2d r %2d i %2d x %8X xx %8X y %8X yy %8X r_next %2d rr_next %2d mask_hi %8X mask_lo %8X\n", 
						 __FILE__, __LINE__, L, r, i, x_in, xx_in, y_aug, yy_aug, r_next, rr_next, mask_hi, mask_lo);
#endif // #if 0 // DEBUG
				//				if(hw32(*rot_const_next_bitmask) > 3)
				//				  return;	// <--- !
			 }
			 seq_lo++;
		  }
		}

	 } // if b_match
	 seq++;
  }
}

/* --- */
// determined as average of MINs over 32 keys 
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<   0) + 1)),
  ((double)1.0 / (double)((1U <<   0) + 1)),
  ((double)1.0 / (double)((1U <<   0) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   0) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   0) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   0) + 1)),
  ((double)1.0 / (double)((1U <<   1) + 1)),
  ((double)1.0 / (double)((1U <<   2) + 1)),
  ((double)1.0 / (double)((1U <<   4) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),
  ((double)1.0 / (double)((1U <<   8) + 1)),
  ((double)1.0 / (double)((1U <<  11) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))


/* --- */
#if RC5_FLEX_FIB
	 double p_i = rc5_xdp_add_last_round((const gsl_matrix*(*)[2][2])A_last, L_last, C_last, y, yy, dx_i);
	 uint32_t log2p = (uint32_t)std::abs(log2(p_i));
	 uint32_t i_round = depth + (RC5_FULL_FIB_LEN - (RC5_FIB_LEN - 2));
	 if((i_round != ((2*NROUNDS) + 2))) {
		printf("[%s:%d] i_round %2d depth %2d\n", __FILE__, __LINE__, i_round, depth);
	 }
	 assert(i_round == ((2*NROUNDS) + 2));
	 uint32_t hw_thres_flex = LOGP2HW_ARRAY[i_round][log2p];
#if 0
	 if(hw_thres_flex) {
		printf("[%s:%d] hw_thres_flex %d log2p %2d 2^%f y %8X yy %8X dx_i %8X\n", __FILE__, __LINE__, hw_thres_flex, log2p, log2(p_i), y, yy, dx_i);
	 }
#endif
	 uint32_t hw = hw32(dx_i ^ dx_prev);
	 //	 if(0x47D9F20 == (dx_i ^ dx_prev)) {
	 if(0x47D9F20 == dx_i) {
		printf("[%s:%d] hw %2d hw_thres_flex %2d dx_i %8X dx_prev %8X (dx_i ^ dx_prev) %8X\n", __FILE__, __LINE__, hw, hw_thres_flex, dx_i, dx_prev, (dx_i ^ dx_prev));
		assert(0 == 1);
	 }
	 if(hw > hw_thres_flex) {
		//		printf("[%s:%d] Last round skipped! p_i 2^%4.2f hw_thres_flex %2d hw %2d\n", __FILE__, __LINE__, log2(p_i), hw_thres_flex, hw);
		vec_iter++;
		continue;
	 } 
#endif

/* --- */

#if !RC5_FLEX_FIB
uint32_t FIB[RC5_FULL_FIB_LEN] =   { 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 3, 12, 12, 12};
//uint32_t FIB[RC5_FULL_FIB_LEN] =   { 1, 1, 0, 1, 1, 0, 1, 2, 5, 7, 8, 9, 10, 11, 13, 13, 13, 13};
//uint32_t FIB[RC5_FULL_FIB_LEN] = { 1, 1, 0, 1, 1, 0, 2, 2, 2, 2, 2,  2,  2,  2,  2,  2,  2, 2}; //<--- !!! to be fast (for computing averages in DEBUG)
#endif
#if RC5_FLEX_FIB
uint32_t FIB[RC5_FULL_FIB_LEN] = { 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32};
/*
 * The index of the array represents the absolute value of the log of
 * all probabilities from 2^-0 to 2^-18. The content of each entry is
 * Hamming weight limit.
 */
//uint32_t FLEX_FIB_LOGP2HW[RC5_FULL_FIB_LEN] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17};
uint32_t LOGP2HW_ARRAY[(2*NROUNDS) + 3][WORD_SIZE] = {{0}};//{ 0,  0,  1,  0,  1,  0,  0,  0,  3,  0,  0, 12,  0,  0,  2,  0,  0,  0};
#endif
#endif  // #if(WORD_SIZE == 32)

/* --- */

// determined as average of MINs over 32 keys 
#if RC5_FLEX_FIB//((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),//<<  6) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),// << 8) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1))//<< 13) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))


/* --- 20140712 --- */

/*
 * Non-lnear (w.r.t. XOR) version of the goUP filter for good pairs
 * for RC5 proposed by [Biryukov, Kushilevitz]
 * 
 * \note (depth + 1) must be equal to the index of the right
 *       ciphertext: \p depth = (RC5_FIB_LEN - 1) = 7 so that D[depth
 *       + 1] = D[RC5_FIB_LEN] = D[8] . For example for \ref
 *       RC5_GOUP_LEVEL = RC5_FIB_LEN - 2 = 6, the filter starts at
 *       depth = 6.
 *
 * \see rc5_filter_go_up_ext_i
 */
uint32_t rc5_filter_go_up_nl_i(const uint32_t depth, 
										 const pair_t pc_pair,
										 const gsl_matrix* A_last[2][2][2],
										 const gsl_vector* L_last,
										 const gsl_vector* C_last,
										 const gsl_matrix* A_mid[2][2],
										 const gsl_vector* L_mid,
										 const gsl_vector* C_mid,
										 uint32_t* count, 
										 const std::vector<uint32_t> fib_array, 
										 const std::vector<double> p_thres_array,
										 const rc5_goup_diffs_t* ds_array,
										 boost::unordered_map<rc5_goup_diffs_t, WORD, rc5_goup_diffs_hash, rc5_goup_diffs_equal_to>* goup_variants_hash_map,
										 std::vector<rc5_goup_diffs_t>* goup_diff_vec,
										 bool* b_found_hw1)
{
#if (RC5_FILTER_CUT_HW1 == 1)
  assert(*b_found_hw1 == false);
#endif // #if (RC5_FILTER_CUT_HW1 == 1)
  uint32_t flag = 0;
  uint32_t s;
#if RC5_FILTER_GOUP_DIFF_SET
  uint32_t left = RC5_FEISTEL_LEFT;
  //  uint32_t right = RC5_FEISTEL_RIGHT;
#endif // #if RC5_FILTER_GOUP_DIFF_SET
#if 1									  // DEBUG
  assert(ds_array->D.size() == (RC5_FIB_LEN  + 1));
  assert(ds_array->D.size() == ds_array->S.size());
  assert(ds_array->D.size() == ds_array->len);
#endif
#if 1
  if(hw32(ds_array->D[depth]) > fib_array[depth]) {
	 bool b_ret = (goup_variants_hash_map->size() != 0);
	 //	 printf("[%s:%d] ret %d size %d\n", __FILE__, __LINE__, b_ret, goup_variants_hash_map->size());
	 return b_ret;
  }
#endif

  if(depth != 0) {
  //	 for(s = 0; s < WORD_SIZE; s++) {
	 for(s = 0; s < 16; s++) {
  //	 {
		s = random32() % WORD_SIZE;
		std::vector<uint32_t> dx_vec;
#if RC5_FILTER_GOUP_DIFF_SET // depth = 6
		double p_thres = p_thres_array[depth];
		const uint32_t dx_prev = RC5_ROTL(ds_array->D[depth], s); // D[5] <<< S[5]
		const uint32_t rot_const_prev = s;
		const uint32_t hw_thres = fib_array[depth - 1]; // WORD_SIZE;
		WORD y = 0;
		WORD yy = 0;
		WORD dy = 0;
		// Generate a set of diffs dx
		if(depth == (RC5_FIB_LEN - 2)) { // depth = 6
		  pair_t pc_pair = ds_array->pc_pair;
		  y = pc_pair.ciphertext_first[left]; // y[7] = left ciphertext 1
		  yy = pc_pair.ciphertext_second[left]; // yy[7] = left ciphertext 2
		  assert((y ^ yy) == ds_array->D[ds_array->len - 2]);
		  // (y[7], yy[7] -> {dx[5]})
		  rc5_xdp_add_last_round_diff_set_out(A_last, L_last, C_last, 
														  y, yy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec); 
		}
#if 1
		//		if((depth < (RC5_FIB_LEN - 2)) && (depth >= (RC5_FIB_LEN - 5))) { // depth = 5
		if((depth < (RC5_FIB_LEN - 2)) && (depth >= (RC5_FIB_LEN - RC5_FIB_LEN))) { // depth = 12
		  dy = ds_array->D[depth + 1]; // D[6]
		  rc5_xdp_add_mid_round_diff_set_out(A_mid, L_mid, C_mid, 
														 dy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec); 
		}
#endif
#endif // #if RC5_FILTER_GOUP_DIFF_SET
#if 1 // add also the input difference (if ADD is XOR)
		/*
		 * When the ADD is approximated as XOR, the output difference is
		 * the same as the input difference \p ds_array->D[depth + 1] so
		 * add it to the list if it has zeros in the places of the
		 * rotation constants for the enxt round.
		 */
		uint32_t rot_seq = RC5_ROTR(ds_array->D[depth + 1], rot_const_prev) & RC5_ROT_MASK;
		if(rot_seq == 0) {
		  dx_vec.push_back(ds_array->D[depth + 1]); // dx[5] == D[7]
		}
#endif

		for(uint32_t i = 0; i < dx_vec.size(); i++) {
		  WORD dx = dx_vec[i]; // dx[5]
#if RC5_FLEX_FIB
		  double p_i = 1.0;
		  if(depth == (RC5_FIB_LEN - 2)) { // depth = 6
			 p_i = rc5_xdp_add_last_round(A_last, L_last, C_last, y, yy, dx);
		  } else {
			 p_i = rc5_xdp_add_mid_round(A_mid, L_mid, C_mid, dy, dx);
		  }
		  uint32_t log2p = (uint32_t)std::abs(log2(p_i));
		  uint32_t i_round = depth + (RC5_FULL_FIB_LEN - (RC5_FIB_LEN - 2));
		  if((i_round > ((2*NROUNDS) + 2))) {
			 printf("[%s:%d] i_round %2d depth %2d\n", __FILE__, __LINE__, i_round, depth);
		  }
		  assert(i_round <= ((2*NROUNDS) + 2));
		  uint32_t hw_thres_flex = LOGP2HW_ARRAY[i_round][log2p];
		  uint32_t hw = hw32(dx ^ dx_prev);
		  if(hw > hw_thres_flex) {
			 continue;
		  }
#endif // #if RC5_FLEX_FIB
		  //		  if(((RC5_ROTR(dx, s) ^ ds_array->D[depth]) & RC5_ROT_MASK) == 0) { // if (dx[5] >>> S[6]) = 0
		  if((RC5_ROTR(dx, s) & RC5_ROT_MASK) == 0) { // if (dx[5] >>> S[6]) = 0

			 rc5_goup_diffs_t ds_array_new = *ds_array;
			 ds_array_new.D[depth - 1] =  RC5_ROTR(dx, s) ^ ds_array_new.D[depth]; // D[5] = (dx[5] >>> S[6]) ^ D[6]		 
			 ds_array_new.S[depth] = s; // S[6]
#if 0 // EDBUG
			 if(depth == (RC5_FIB_LEN - 2)) { // depth = 6
				printf("[%s:%d] dx[%2d] %8X | depth %2d (s %2d y %8X yy %8X D[%2d] %8X) -> D[%2d] %8X #var %15d\n", 
						 __FILE__, __LINE__, i, dx, depth, s, y, yy, depth, ds_array_new.D[depth], depth-1, ds_array_new.D[depth-1], *count);
			 } else {
				printf("[%s:%d] dx[%2d] %8X | depth %2d (s %2d dy %8X D[%2d] %8X) -> D[%2d] %8X #var %15d\n", 
						 __FILE__, __LINE__, i, dx, depth, s, dy, depth, ds_array_new.D[depth], depth-1, ds_array_new.D[depth-1], *count);
			 }
#endif // #if 0 // EDBUG
			 // recursive call for correct count of variants

			 if(rc5_filter_go_up_nl_i(depth - 1, pc_pair, A_last, L_last, C_last, A_mid, L_mid, C_mid, count, fib_array, p_thres_array, &ds_array_new, goup_variants_hash_map, goup_diff_vec, b_found_hw1)) {
				flag = 1;
			 } 
			 /*
			  * Cut the search tree in the GoUP filter as soon as a difference of
			  * Hamming weight 1 is found
			  */
#if (RC5_FILTER_CUT_HW1 == 1)
			 if(*b_found_hw1) {
				return 1; // return if we have found a difference with HW 1
			 }
			 if(hw32(ds_array_new.D[depth - 1]) == 1) {
#if 0 // DEBUG
				printf("[%s:%d] D[%2d] %8X HW %2d\n", __FILE__, __LINE__, (depth - 1), ds_array_new.D[depth - 1], hw32(ds_array_new.D[depth - 1]));
#endif
				// {--- ADD variant to hash table
				bool b_found = false;
				rc5_goup_diffs_hash goup_variants_hash_function;
				uint32_t hash_val = goup_variants_hash_function(ds_array_new);
				std::pair<rc5_goup_diffs_t, uint32_t> new_pair (ds_array_new, hash_val);
				uint32_t old_size = goup_variants_hash_map->size();
				goup_variants_hash_map->insert(new_pair);
				uint32_t new_size = goup_variants_hash_map->size();
				b_found = (new_size == old_size);
				if(!b_found) {
				  (*count)++;					  // accumulate num. of variants
#if 1 // DEBUG
				  printf("[%s:%d] Hamming Weight = 1 CUT: Add variant #%10d\n", __FILE__, __LINE__, *count);
				  for(uint32_t i = 0; i < ds_array_new.D.size(); i++) {
					 printf("D[%2d] %8X %2d\n", i, ds_array_new.D[i], ds_array_new.S[i]);
				  }
				  printf("\n");
#endif // #if 0 // DEBUG
				}
				// --- ADD variant to hash table ---}
				*b_found_hw1 = true;
				flag = 1;
				return 1;
			 }
#endif // #if (RC5_FILTER_CUT_HW1 == 1)
		  }
		}
	 }
  } else {							  // reached the top
#if RC5_FILTER_CUT_HW1
	 *b_found_hw1 = (hw32(ds_array->D[depth]) == 1);
#if 0 // DEBUG
	 printf("\n[%s:%d] Reached depth zero: depth %2d b_found_hw1 %d\n", __FILE__, __LINE__, depth, *b_found_hw1);
	 printf("[%s:%d] depth = %d D[%2d] %8X HW %2d\n", __FILE__, __LINE__, depth, depth, ds_array->D[depth], hw32(ds_array->D[depth]));
#endif // #if 0 // DEBUG
#endif // #if (RC5_FILTER_CUT_HW1 == 1)
	 /**
	  * If the GoUP filter covers all half rounds but the first
	  */
	 bool b_match_input_diff = true;
	 if(RC5_GOUP_LEVEL == (2*NROUNDS)) {
		assert(depth == 0);
		b_match_input_diff = rc5_is_goup_diffs_match_inputs(pc_pair, *ds_array, A_last, L_last, C_last, A_mid, L_mid, C_mid);
	 }

#if RC5_FILTER_CUT_HW1
	 b_match_input_diff = *b_found_hw1;
#endif // #if RC5_FILTER_CUT_HW1

	 if(!b_match_input_diff) {
		assert(0 == 1);
#if 0 // DEBUG
		printf("[%s:%d] Skip variant #%10d\n", __FILE__, __LINE__, *count);
		for(uint32_t i = 0; i < ds_array->D.size(); i++) {
		  printf("D[%2d] %8X %2d\n", i, ds_array->D[i], ds_array->S[i]);
		}
		printf("\n");
#endif // #if 0 // DEBUG
		return 0;
	 }

	 if(b_match_input_diff) {
#if (RC5_FILTER_CUT_HW1 == 1)
		assert(hw32(ds_array->D[depth]) == 1);
#endif // #if (RC5_FILTER_CUT_HW1 == 1)
		bool b_found = false;
		rc5_goup_diffs_hash goup_variants_hash_function;
		uint32_t hash_val = goup_variants_hash_function(*ds_array);
		std::pair<rc5_goup_diffs_t, uint32_t> new_pair (*ds_array, hash_val);
		uint32_t old_size = goup_variants_hash_map->size();
#if 1 // do not store variant
		goup_variants_hash_map->insert(new_pair);
#endif
		uint32_t new_size = goup_variants_hash_map->size();

		b_found = (new_size == old_size);

		if(!b_found) {

		  (*count)++;					  // accumulate num. of variants
#if 1 // DEBUG
		  printf("[%s:%d] Add variant #%10d\n", __FILE__, __LINE__, *count);
		  for(uint32_t i = 0; i < ds_array->D.size(); i++) {
			 printf("D[%2d] %8X %2d\n", i, ds_array->D[i], ds_array->S[i]);
		  }
		  printf("\n");
#endif // #if 0 // DEBUG
#if 0 // DEBUG
		  printf("\r[%s:%d] Add variant #%10d ", __FILE__, __LINE__, *count);
		  fflush(stdout);
#endif // #if 0 // DEBUG
		}
		//		flag = 1;
		//		printf("[%s:%d] ret %d size %d\n", __FILE__, __LINE__, 1, goup_variants_hash_map->size());
		bool b_ret = (goup_variants_hash_map->size() != 0);
		//		printf("[%s:%d] ret %d size %d\n", __FILE__, __LINE__, b_ret, goup_variants_hash_map->size());
		return b_ret;
  //		return 1;
		//		return (goup_variants_hash_map->size() > 0);
	 }
  }
  bool b_ret = (goup_variants_hash_map->size() != 0);
  //  printf("[%s:%d] ret %d size %d\n", __FILE__, __LINE__, b_ret, goup_variants_hash_map->size());
  return b_ret;
  //  return flag;
}

/*
 * Wrapper for \ref rc5_filter_go_up_nl_i
 *
 * \param A transition matrix A[x[i]]|[y[i]][yy[i]][dx[i]]
 * \param ndiff number of differences to store
 * \param goup_diff_vec vector of filtered pairs with corresponding
 *                      diffreneces for \p ndiff rounds
 * \param pc_pair candidate good pair of chosen plaintexts/ciphertexts
 *                that will be tested against the filter
 * \note \p fib_array is of length fib_array_len
 *
 */
uint32_t rc5_filter_go_up_nl(const gsl_matrix* A_last[2][2][2], // last round
									  const gsl_vector* L_last,
									  const gsl_vector* C_last,
									  const gsl_matrix* A_mid[2][2], // middle round
									  const gsl_vector* L_mid,
									  const gsl_vector* C_mid,
									  const pair_t pc_pair,
									  const std::vector<uint32_t> fib_array, 
									  std::vector<double> p_thres_array,
									  std::vector<rc5_goup_diffs_t>* goup_diff_vec)
{
#if 0
  printf("\r[%s:%d] Enter %s()", __FILE__, __LINE__, __FUNCTION__);
  fflush(stdout);
#endif
  assert(fib_array.size() == RC5_FIB_LEN); // = 8
  uint32_t ret = 0;
  uint32_t count = 0;
  uint32_t rot_mask = RC5_ROT_MASK;
  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;
  //  const uint32_t depth = (RC5_FIB_LEN - 1); // (= 7) so that D[depth + 1] = D[RC5_FIB_LEN ] = D[8] <--- !
  const uint32_t depth = (RC5_FIB_LEN - 2); // (= 6) so that D[depth + 1] = D[7] <--- 

  assert(rot_mask == RC5_ROT_MASK);

  //  rc5_goup_diffs_t ds_array;	  // (D[0:(RC5_FIB_LEN - 1), S[0:(RC5_FIB_LEN - 1)])
  rc5_goup_diffs_t ds_array;	  // (D[0...RC5_FIB_LEN], S[0...RC5_FIB_LEN])
  ds_array.len = (RC5_FIB_LEN  + 1); // = 9
  ds_array.pc_pair = pc_pair;
  for(uint32_t i = 0; i < ds_array.len; i++) {
	 ds_array.D.push_back(0);
	 ds_array.S.push_back(0);
  }

  // last rot const must be the same
  assert((pc_pair.ciphertext_first[left] & rot_mask) == (pc_pair.ciphertext_second[left] & rot_mask));

  // D[8], S[8] = \empty
  ds_array.D[ds_array.len - 1] = pc_pair.ciphertext_first[right] ^ pc_pair.ciphertext_second[right]; // D[i+1], D[8]
  ds_array.S[ds_array.len - 1] = 0;	  // don't care about rot const S[i+1], S[8]

  // D[7], S[7]
  ds_array.D[ds_array.len - 2] = pc_pair.ciphertext_first[left] ^ pc_pair.ciphertext_second[left]; // D[i], D[7]
  ds_array.S[ds_array.len - 2] = (pc_pair.ciphertext_first[left] & rot_mask); // S[i], S[7]

  std::vector<uint32_t> dx_set_all;
  uint32_t y = pc_pair.ciphertext_first[right]; // y[8] = right ciphertext 1
  uint32_t yy = pc_pair.ciphertext_second[right]; // yy[8] = right ciphertext 2
#if RC5_FILTER_GOUP_DIFF_SET
  const double p_thres = p_thres_array[RC5_FIB_LEN - 1];
  const uint32_t hw_thres = fib_array[depth - 1]; // WORD_SIZE;
  const uint32_t rot_const_prev = ds_array.S[ds_array.len - 2]; // S[7]
  //  const uint32_t dx_prev = RC5_ROTL(ds_array.D[depth - 2], ds_array.S[ds_array.len - 2]);
  const uint32_t dx_prev = RC5_ROTL(ds_array.D[ds_array.len - 2], ds_array.S[ds_array.len - 2]);

  rc5_xdp_add_last_round_diff_set_out((const gsl_matrix*(*)[2][2])A_last, L_last, C_last, 
												  y, yy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_set_all);
#endif // #if RC5_FILTER_GOUP_DIFF_SET
#if 1 // add also the input difference (if ADD is XOR)
  /*
   * When the ADD is approximated as XOR, the output difference is the
	* same as the input difference \p ds_array->D[depth + 1] so add
	* it to the list if it has zeros in the places of the rotation
	* constants for the enxt round.
	*/
  WORD dy = (y ^ yy);
  uint32_t rot_seq = RC5_ROTR(dy, rot_const_prev) & RC5_ROT_MASK;
  if(rot_seq == 0) {
	 dx_set_all.push_back(dy);
  }
#endif
#if 0 // DEBUG
  printf("\r[%s:%d] Depth %2d y yy %8X %8X #dx %d", 
			__FILE__, __LINE__, depth, y, yy, dx_set_all.size());
  fflush(stdout);
#endif
#if 0 // DEBUG
  printf("[%s:%d] Depth %2d p_thres 2^%4.2f y yy %8X %8X #dx %d\n", 
			__FILE__, __LINE__, depth, log2(p_thres), y, yy, dx_set_all.size());
#endif

  bool b_found_hw1 = false;
  uint32_t cnt_diff = 0;
  std::vector<uint32_t>::iterator vec_iter = dx_set_all.begin();
  while((vec_iter != dx_set_all.end()) && (!b_found_hw1)) {
	 WORD dx_i = *vec_iter;
#if RC5_FLEX_FIB
	 double p_i = rc5_xdp_add_last_round((const gsl_matrix*(*)[2][2])A_last, L_last, C_last, y, yy, dx_i);
	 uint32_t log2p = (uint32_t)std::abs(log2(p_i));
	 uint32_t i_round = depth + (RC5_FULL_FIB_LEN - (RC5_FIB_LEN - 2));
	 if((i_round != ((2*NROUNDS) + 2))) {
		printf("[%s:%d] i_round %2d depth %2d\n", __FILE__, __LINE__, i_round, depth);
	 }
	 assert(i_round == ((2*NROUNDS) + 2));
	 uint32_t hw_thres_flex = LOGP2HW_ARRAY[i_round][log2p];
#if 0
	 if(hw_thres_flex) {
		printf("[%s:%d] hw_thres_flex %d log2p %2d 2^%f y %8X yy %8X dx_i %8X\n", __FILE__, __LINE__, hw_thres_flex, log2p, log2(p_i), y, yy, dx_i);
	 }
#endif
	 uint32_t hw = hw32(dx_i ^ dx_prev);
	 //	 if(0x47D9F20 == (dx_i ^ dx_prev)) {
	 if(0x47D9F20 == dx_i) {
		printf("[%s:%d] hw %2d hw_thres_flex %2d dx_i %8X dx_prev %8X (dx_i ^ dx_prev) %8X\n", __FILE__, __LINE__, hw, hw_thres_flex, dx_i, dx_prev, (dx_i ^ dx_prev));
		assert(0 == 1);
	 }
	 if(hw > hw_thres_flex) {
		//		printf("[%s:%d] Last round skipped! p_i 2^%4.2f hw_thres_flex %2d hw %2d\n", __FILE__, __LINE__, log2(p_i), hw_thres_flex, hw);
		vec_iter++;
		continue;
	 } 
#endif
	 cnt_diff++;
#if 0									  // DEBUG
	 printf("\r[%s:%d] p_thres 2^%4.2f [%5d] dx %8X set size %10d", 
			  __FILE__, __LINE__, log2(p_thres), cnt_diff, dx_i, (uint32_t)dx_set_all.size());
	 fflush(stdout);
#endif // #if 0
#if 0									  // DEBUG
	 printf("[%s:%d] p_thres 2^%4.2f [%5d] dx %8X set size %10d\n", 
			  __FILE__, __LINE__, log2(p_thres), cnt_diff, dx_i, (uint32_t)dx_set_all.size());
#endif // #if 0

	 // D[6] = (D[8] >>> S[7]) ^ D[7], S[6] = empty
	 ds_array.D[ds_array.len - 3] = // D[6] 
		RC5_ROTR(dx_i, ds_array.S[ds_array.len - 2]) ^ ds_array.D[ds_array.len - 2];
	 ds_array.S[ds_array.len - 3] = 0; // S[6] : unknown - to be computed

#if 0									  // DEBUG
	 printf("[%s:%d] D[%2d] %8X S[%2d] %2d\n", 
			  __FILE__, __LINE__, 
			  (ds_array.len - 3), ds_array.D[ds_array.len - 3],
			  (ds_array.len - 2), ds_array.S[ds_array.len - 2]);
#endif // #if 0

	 //	 assert((ds_array.D[ds_array.len - 3] & RC5_ROT_MASK) == 0);
	 assert((ds_array.D[ds_array.len - 2] & RC5_ROT_MASK) == 0);

	 if((ds_array.D[ds_array.len - 3] & RC5_ROT_MASK) == 0) {

#if 1
		boost::unordered_map<rc5_goup_diffs_t, WORD, rc5_goup_diffs_hash, rc5_goup_diffs_equal_to> goup_variants_hash_map;
#endif
		ret = rc5_filter_go_up_nl_i(depth, pc_pair,
											 (const gsl_matrix*(*)[2][2])A_last, L_last, C_last,
											 (const gsl_matrix*(*)[2])A_mid, L_mid, C_mid, &count, 
											 fib_array, p_thres_array, &ds_array, &goup_variants_hash_map, goup_diff_vec, &b_found_hw1);
		if(goup_variants_hash_map.size() > 0) {
		  printf("[%s:%d] ret %2d size %2d\n", __FILE__, __LINE__, ret, goup_variants_hash_map.size());
		  assert(ret > 0);
		}
#if (RC5_FILTER_CUT_HW1 == 0)
		assert(b_found_hw1 == false);
#endif // #if (RC5_FILTER_CUT_HW1 == 0)
		//		printf("[%s:%d] ret %2d\n", __FILE__, __LINE__, ret);

		//	store the all the GoUp variants from the hash table to the goup array
		boost::unordered_map<rc5_goup_diffs_t, WORD, rc5_goup_diffs_hash, rc5_goup_diffs_equal_to>::iterator ds_variants_iter = goup_variants_hash_map.begin();
		while(ds_variants_iter != goup_variants_hash_map.end()) {
		  rc5_goup_diffs_t ds_array =  ds_variants_iter->first;
		  goup_diff_vec->push_back(ds_array);
		  ds_variants_iter++;
		}
	 }

#if 1 // do not store them
	 if(!(goup_diff_vec->size() == count)) {
		printf("[%s:%d] count %d g_count = %d\n", __FILE__, __LINE__, count, g_count);
	 }
	 assert(goup_diff_vec->size() == count);
#endif
#if 0 // DEBUG
	 printf("\r[%s:%d] (%10d / %10d) #Variants %d 2^%4.2f", __FILE__, __LINE__, 
			  cnt_diff, (uint32_t)dx_set_all.size(), (uint32_t)goup_diff_vec->size(), log2(goup_diff_vec->size()));
	 fflush(stdout);
#endif // #if 1 // DEBUG
	 vec_iter++;
  }
  return ret;
}


/* --- */

#if RC5_FLEX_FIB
	 gsl_vector* C_tmp = gsl_vector_calloc(RC5_MID_ROUND_MSIZE);
	 gsl_vector_set(C_tmp, RC5_MID_ROUND_ISTATE, 1.0);
	 double p_i = rc5_xdp_add_mid_round((const gsl_matrix*(*)[2])A, L, C_tmp, dy, dx);
	 assert(RC5_FLEX_FACT >= (uint32_t)std::abs(log2(p_i)));
	 uint32_t hw_thres_flex = 2;//RC5_FLEX_FACT - (uint32_t)std::abs(log2(p_i));
	 uint32_t hw = hw32(dx ^ dx_prev);
	 if(hw > hw_thres_flex) {
		//		printf("\r[%s:%d] Skipped! p_i 2^%4.2f hw_thres_flex %2d hw %2d ", __FILE__, __LINE__, log2(p_i), hw_thres_flex, hw);
		//		fflush(stdout);
	 } else {
		//		printf("\r[%s:%d]   Added. p_i 2^%4.2f hw_thres_flex %2d hw %2d ", __FILE__, __LINE__, log2(p_i), hw_thres_flex, hw);
		//		fflush(stdout);
		dx_vec->push_back(dx);
	 }
#else
	 dx_vec->push_back(dx);
#endif
#if RC5_FLEX_FIB
	 gsl_vector* C_tmp = gsl_vector_calloc(RC5_LAST_ROUND_MSIZE);
	 gsl_vector_set(C_tmp, RC5_LAST_ROUND_ISTATE, 1.0);
	 double p_i = rc5_xdp_add_last_round((const gsl_matrix*(*)[2][2])A, L, C_tmp, y, yy, dx);
	 assert(RC5_FLEX_FACT >= (uint32_t)std::abs(log2(p_i)));
	 //	 uint32_t logp = (uint32_t)std::abs(log2(p_i));
	 uint32_t hw_thres_flex = 12;//RC5_FLEX_FACT - (uint32_t)std::abs(log2(p_i));
	 uint32_t hw = hw32(dx ^ dx_prev);
	 if(hw > hw_thres_flex) {
		//		printf("\r[%s:%d] Skipped! p_i 2^%4.2f hw_thres_flex %2d hw %2d ", __FILE__, __LINE__, log2(p_i), hw_thres_flex, hw);
		//		fflush(stdout);
	 } else {
		//		printf("\r[%s:%d]   Added. p_i 2^%4.2f hw_thres_flex %2d hw %2d ", __FILE__, __LINE__, log2(p_i), hw_thres_flex, hw);
		//		fflush(stdout);
		dx_vec->push_back(dx);
	 }
#else
	 dx_vec->push_back(dx);
#endif

/* --- */
#if 0
		  if(hw_thres_flex) {
			 printf("[%s:%d] i_round %2d hw_thres_flex %d\n", __FILE__, __LINE__, i_round, hw_thres_flex);
		  }
#endif


/* --- */

/*
 * For fixed y,yy generate all output differences dx for which the
 * probability xdp^{+}_LR(y, yy -> dx) over all keys is above a
 * certain threshold.
 * 
 * \p rot_const_next_vec_2d contains a list of possible rot constants
 * for every difference in \p dx_vec
 */
void rc5_xdp_add_last_round_diff_set_out_i(const uint32_t i, 
														 const double p_thres, const uint32_t hw_thres,
														 const gsl_matrix* A[2][2][2], 
														 const gsl_vector* L, const gsl_vector* C, 
														 const uint32_t y, const uint32_t yy, 
														 const uint32_t dx_prev, // D[6] <<< S[6]
														 const uint32_t rot_const_prev, // S[6]
														 const uint32_t dx, 
														 const double p, 
														 std::vector<uint32_t>* dx_vec)
{
  //  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);
  if(i == WORD_SIZE) {
#if RC5_FLEX_FIB
	 gsl_vector* C_tmp = gsl_vector_calloc(RC5_LAST_ROUND_MSIZE);
	 gsl_vector_set(C_tmp, RC5_LAST_ROUND_ISTATE, 1.0);
	 double p_i = rc5_xdp_add_last_round((const gsl_matrix*(*)[2][2])A, L, C_tmp, y, yy, dx);
	 assert(RC5_FLEX_FACT >= (uint32_t)std::abs(log2(p_i)));
	 //	 uint32_t logp = (uint32_t)std::abs(log2(p_i));
	 uint32_t hw_thres_flex = 2;//RC5_FLEX_FACT - (uint32_t)std::abs(log2(p_i));
	 uint32_t hw = hw32(dx ^ dx_prev);
	 if(hw > hw_thres_flex) {
		printf("\r[%s:%d] Skipped! p_i 2^%4.2f hw_thres_flex %2d hw %2d ", __FILE__, __LINE__, log2(p_i), hw_thres_flex, hw);
		fflush(stdout);
	 } else {
		printf("\r[%s:%d]   Added. p_i 2^%4.2f hw_thres_flex %2d hw %2d ", __FILE__, __LINE__, log2(p_i), hw_thres_flex, hw);
		fflush(stdout);
		dx_vec->push_back(dx);
	 }
#else
	 dx_vec->push_back(dx);
#endif
	 assert(p > p_thres);
#if 0 // DEBUG
	 gsl_vector* C_tmp = gsl_vector_calloc(RC5_LAST_ROUND_MSIZE);
	 gsl_vector_set(C_tmp, RC5_LAST_ROUND_ISTATE, 1.0);
	 double p_th = rc5_xdp_add_last_round((const gsl_matrix*(*)[2][2])A, L, C_tmp, y, yy, dx);
	 //	 printf("[%s:%d] (%8X, %8X -> %8X) 2^%4.2f 2^%4.2f\n", 
	 //			  __FILE__, __LINE__, y, yy, dx, log2(p_th), log2(p));
	 assert(p == p_th);
	 gsl_vector_free(C_tmp);
#endif // #if 1 // DEBUG
	 return;
  }

  uint32_t y_i = (y >> i) & 1;
  uint32_t yy_i = (yy >> i) & 1;

  for(uint32_t dx_i = 0; dx_i < 2; dx_i++) {

	 bool b_match = true;
	 /**
     * Check if a sequence of log2(n) bits of dx starting from
     * position \p rot_const_prev is equal to the 0 bit sequence. This
     * check is relevant only for RC5 and is related to the equal
     * rotation constant requirement.
     */
	 //#if 1 // RC5 relevant
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
	 assert((WORD_SIZE == 16) || (WORD_SIZE == 32));
	 uint32_t bit_seq = 0; // 00...0
	 uint32_t bit_seq_len = (uint32_t)log2(WORD_SIZE);
 	 assert((bit_seq_len == 4) || (bit_seq_len == 5));
	 b_match = rc5_last_round_eq_x_bit_seq_match_bit_i(i, dx_i, rot_const_prev, bit_seq, bit_seq_len);
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
	 //	 if(!b_match)
	 //		continue;
	 if(b_match) {

		gsl_vector* R = gsl_vector_calloc(RC5_LAST_ROUND_MSIZE);
		double new_p = 0;

		gsl_blas_dgemv(CblasNoTrans, 1.0, A[y_i][yy_i][dx_i], C, 0.0, R); // R <- A C
		gsl_blas_ddot(L, R, &new_p);

		assert(new_p <= 1.0);
		assert(new_p >= 0.0);

		if(new_p > p_thres) {
		  uint32_t new_dx = (dx_i << i) | dx;
		  //		uint32_t mask_lsb_i = (0xffffffff >> (32 - i));
		  uint32_t mask_lsb_i = (0xffffffff >> (32 - (i + 1))); // <--- (i+1) !!
		  uint32_t hw = hw32((new_dx ^ dx_prev) & mask_lsb_i); // D[5] = (dx ^ (D[6] <<< S[6]))
		  if(hw <= hw_thres) {
			 bool b_approx_match = true;
#if RC5_LAST_ROUND_ADD_APPROX//RC5_ADD_APPROX
			 uint32_t order = RC5_ADD_APPROX_ORDER;
			 uint32_t dk = 0;
			 b_approx_match = rc5_last_round_add_approx_match(i, y, yy, dk, new_dx, order);
#endif
			 if(b_approx_match) {
				rc5_xdp_add_last_round_diff_set_out_i(i+1, p_thres, hw_thres, A, L, R, y, yy, dx_prev, 
																  rot_const_prev, new_dx, new_p, dx_vec);
			 }
		  }
		}

		gsl_vector_free(R);
	 } // b_match
  }

  return;
}

/*
 * For fixed dy generate all output differences dx for which the
 * probability xdp^{+}_MR(dy -> dx) over all keys is above a
 * certain threshold.
 *
 * The set of output differences satisfies the following conditions:
 *
 * - xdp^{+}_MR(dy -> dx) > p_thres
 * - HW(dx ^ dx_prev) <= hw_thres (HW = Hamming Weight)
 *
 * \see rc5_xdp_add_last_round_diff_set_out_i
 */
void rc5_xdp_add_mid_round_diff_set_out_i(const uint32_t i, 
														const double p_thres, const uint32_t hw_thres,
														const gsl_matrix* A[2][2], 
														const gsl_vector* L, const gsl_vector* C, 
														const uint32_t dy, 
														const uint32_t dx_prev, // D[6] <<< S[6]
														const uint32_t rot_const_prev, // S[6]
														const uint32_t dx,
														const double p, 
														std::vector<uint32_t>* dx_vec)
{
  if(i == WORD_SIZE) {
#if RC5_FLEX_FIB
	 gsl_vector* C_tmp = gsl_vector_calloc(RC5_MID_ROUND_MSIZE);
	 gsl_vector_set(C_tmp, RC5_MID_ROUND_ISTATE, 1.0);
	 double p_i = rc5_xdp_add_mid_round((const gsl_matrix*(*)[2])A, L, C_tmp, dy, dx);
	 assert(RC5_FLEX_FACT >= (uint32_t)std::abs(log2(p_i)));
	 uint32_t hw_thres_flex = 2;//RC5_FLEX_FACT - (uint32_t)std::abs(log2(p_i));
	 uint32_t hw = hw32(dx ^ dx_prev);
	 if(hw > hw_thres_flex) {
		printf("\r[%s:%d] Skipped! p_i 2^%4.2f hw_thres_flex %2d hw %2d ", __FILE__, __LINE__, log2(p_i), hw_thres_flex, hw);
		fflush(stdout);
	 } else {
		printf("\r[%s:%d]   Added. p_i 2^%4.2f hw_thres_flex %2d hw %2d ", __FILE__, __LINE__, log2(p_i), hw_thres_flex, hw);
		fflush(stdout);
		dx_vec->push_back(dx);
	 }
#else
	 dx_vec->push_back(dx);
#endif
	 assert(p > p_thres);
#if 0 // DEBUG
	 gsl_vector* C_tmp = gsl_vector_calloc(RC5_MID_ROUND_MSIZE);
	 gsl_vector_set(C_tmp, RC5_MID_ROUND_ISTATE, 1.0);
	 double p_th = rc5_xdp_add_mid_round((const gsl_matrix*(*)[2])A, L, C_tmp, dy, dx);
	 //	 printf("[%s:%d] (%8X, %8X -> %8X) 2^%4.2f 2^%4.2f\n", 
	 //			  __FILE__, __LINE__, y, yy, dx, log2(p_th), log2(p));
	 assert(p == p_th);
	 gsl_vector_free(C_tmp);
#endif // #if 1 // DEBUG
	 return;
  }

  uint32_t dy_i = (dy >> i) & 1;

  for(uint32_t dx_i = 0; dx_i < 2; dx_i++) {

	 bool b_match = true;
	 /**
     * Check if a sequence of log2(n) bits of dx starting from
     * position \p rot_const_prev is equal to the 0 bit sequence. This
     * check is relevant only for RC5 and is related to the equal
     * rotation constant requirement.
     */
	 //#if 1 // RC5 relevant
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
	 assert((WORD_SIZE == 16) || (WORD_SIZE == 32));
	 uint32_t bit_seq = 0; // 00...0
	 uint32_t bit_seq_len = (uint32_t)log2(WORD_SIZE);
	 assert((bit_seq_len == 4) || (bit_seq_len == 5));
	 b_match = rc5_last_round_eq_x_bit_seq_match_bit_i(i, dx_i, rot_const_prev, bit_seq, bit_seq_len);
#endif
	 //	 if(!b_match)
	 //		continue;
	 if(b_match) {

		gsl_vector* R = gsl_vector_calloc(RC5_MID_ROUND_MSIZE);
		double new_p = 0;

		gsl_blas_dgemv(CblasNoTrans, 1.0, A[dy_i][dx_i], C, 0.0, R); // R <- A C
		gsl_blas_ddot(L, R, &new_p);

		assert(new_p <= 1.0);
		assert(new_p >= 0.0);

		if(new_p > p_thres) {
		  uint32_t new_dx = (dx_i << i) | dx;
		  //		uint32_t mask_lsb_i = (0xffffffff >> (32 - i));
		  uint32_t mask_lsb_i = (0xffffffff >> (32 - (i + 1))); // <--- (i+1) !!
		  uint32_t hw = hw32((new_dx ^ dx_prev) & mask_lsb_i); // D[5] = (dx ^ (D[6] <<< S[6]))
		  if(hw <= hw_thres) {
			 bool b_approx_match = true;
			 /**
			  * apply approximation only if the prob threshold is very low
			  * (e.g. below 2^-5)
			  */
#if RC5_ADD_APPROX // ADD-APPROX
			 uint32_t order = RC5_ADD_APPROX_ORDER;
			 uint32_t dk = 0;
			 b_approx_match = rc5_mid_round_add_approx_match(i, dy, dk, new_dx, order);
#endif // #if 0 // ROT-const
			 if(b_approx_match) {
				rc5_xdp_add_mid_round_diff_set_out_i(i+1, p_thres, hw_thres, 
																 A, L, R, dy, dx_prev, rot_const_prev, new_dx, new_p, dx_vec);
			 } 
		  }
		}

		gsl_vector_free(R);
	 } // b_match
  }

  return;
}

/* 
FIB HW 1 CUTS over many keys

[./src/rc5-dc.cc:3415]   average hw_arr = 1.00 1.00 1.00 0.00 1.25 1.00 1.50 3.50 3.00 5.00 5.00 4.50 5.75 6.25 9.50 8.00 11.00 12.00 14.50
[./src/rc5-dc.cc:3426] average prob_arr = 0.00 0.00 0.00 0.00 -1.19 -1.19 -0.75 -1.83 -1.00 -1.64 -2.30 -1.30 -3.00 -3.66 -7.66 -9.71 -10.38 -12.48 -12.83
[./src/rc5-dc.cc:3434]        FIB_ARRAY =  1  2  5  7  8  9 10 11 13 13 13 13
[./src/rc5-dc.cc:3439]       max hw_arr = {10} {10} {13} {14} {14} {16} 10 {13} 11 {15} {17} {18}
[./src/rc5-dc.cc:3456]     min prob_arr = 0.00 0.00 0.00 0.00 -2.00 -2.00 -3.00 -11.00 -13.00 -15.00 -16.00 -18.00 -18.00 -12.00 -14.00 -13.00 -18.00 -15.00 -14.00
[./src/rc5-dc.cc:rc5_equal_rot_attack():3469] Exit statistics:

#Rounds 8
WORD_SIZE 32
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 262448 (2^18.001672)
#Filtered pairs all: 1321 (2^10.367415)
#Good pairs among filtered: 2
#Good pairs among filtered f1: 4
#Good pairs total: 4
#GoUP sets of trails: 1321 (2^10.367415)
RC5_P_THRES_ARRAY = [ 0] -1.58 [ 1] -1.58 [ 2] -1.58 [ 3] -2.32 [ 4] -2.32 [ 5] -3.17 [ 6] -4.09 [ 7] -4.09 [ 8] -5.04 [ 9] -7.01 [10] -8.01 [11] -9.00
        FIB_ARRAY = [ 0]  1 [ 1]  2 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
        FIB_ARRAY = [ 0]  1 [ 1]  2 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
		  [./tests/rc5-tests.cc:532] #GoUP sets of trails: 1321 (2^10.367415)
WORD g_good_pairs[4][2][2] = {
{{0x329C8E4E, 0xD863A22B}, {0xB29C8E4E, 0x5863A22B}},
{{0x5A4ED76B, 0x6CB15AF7}, {0xDA4ED76B, 0xECB15AF7}},
{{0x1B5D180A, 0x1F2AE0E4}, {0x9B5D180A, 0x9F2AE0E4}},
{{0x8A233999, 0xA51516F3}, {0x0A233999, 0x251516F3}}};
const uint32_t g_key[16] = {0xF2, 0xB8, 0x32, 0xF9, 0x92, 0xF9, 0x59, 0xC5, 0x95, 0xE8, 0xE, 0x5, 0x48, 0x36, 0xC0, 0x5A};
[./tests/rc5-tests.cc:599] Test OK!

real    700m54.642s
user    698m5.470s
sys     0m47.895s


[./src/rc5-dc.cc:3415]   average hw_arr = 1.00 1.00 1.00 0.00 1.17 1.00 0.50 1.50 1.83 1.50 1.83 2.33 3.50 4.17 3.83 5.17 6.33 8.67 10.33
  [./src/rc5-dc.cc:3426] average prob_arr = 0.00 0.00 0.00 0.00 -0.88 -0.88 -0.34 -1.09 -1.41 -0.58 -0.78 -1.41 -1.58 -2.73 -1.96 -1.56 -3.24 -3.98 -5.25
  [./src/rc5-dc.cc:3434]        FIB_ARRAY =  1  2  5  7  8  9 10 11 13 13 13 13
  [./src/rc5-dc.cc:3439]       max hw_arr = { 4} { 6} { 7} { 8}  8 {12} {12}  8 10 13 {16} {14}
[./src/rc5-dc.cc:3456]     min prob_arr = 0.00 0.00 0.00 0.00 -2.00 -2.00 -2.00 -4.00 -6.00 -10.00 -13.00 -11.00 -14.00 -13.00 -9.00 -12.00 -18.00 -12.00 -15.00
[./src/rc5-dc.cc:rc5_equal_rot_attack():3469] Exit statistics:
#Rounds 8
WORD_SIZE 32
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 262985 (2^18.004621)
#Filtered pairs all: 1291 (2^10.334273)
#Good pairs among filtered: 5
#Good pairs among filtered f1: 6
#Good pairs total: 6
#GoUP sets of trails: 1291 (2^10.334273)
RC5_P_THRES_ARRAY = [ 0] -1.58 [ 1] -1.58 [ 2] -1.58 [ 3] -2.32 [ 4] -2.32 [ 5] -3.17 [ 6] -4.09 [ 7] -4.09 [ 8] -5.04 [ 9] -7.01 [10] -8.01 [11] -9.00
        FIB_ARRAY = [ 0]  1 [ 1]  2 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
        FIB_ARRAY = [ 0]  1 [ 1]  2 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
  [./tests/rc5-tests.cc:532] #GoUP sets of trails: 1291 (2^10.334273)
  WORD g_good_pairs[6][2][2] = {
  {{0x781CA44F, 0xE5AE3837}, {0xF81CA44F, 0x65AE3837}},
  {{0x64604642, 0xD23926AE}, {0xE4604642, 0x523926AE}},
  {{0xD0BC5699, 0x5E46BBC2}, {0x50BC5699, 0xDE46BBC2}},
  {{0x12990EC5, 0x482ECAFF}, {0x92990EC5, 0xC82ECAFF}},
  {{0xE58F8887, 0x4E9280A6}, {0x658F8887, 0xCE9280A6}},
  {{0xA771D4CB, 0xFE4270BE}, {0x2771D4CB, 0x7E4270BE}}};
const uint32_t g_key[16] = {0xCD, 0x20, 0xAB, 0x76, 0xD9, 0xDE, 0xA7, 0x29, 0x53, 0xF9, 0x3F, 0x3A, 0xFF, 0x50, 0x5C, 0x38};
[./tests/rc5-tests.cc:599] Test OK!

real    468m35.568s
user    467m14.676s
sys     0m0.340s


[./src/rc5-dc.cc:3415]   average hw_arr = 1.00 1.00 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.50 1.00 2.50 2.50 5.50 5.50 9.00 10.50 11.00 14.50
  [./src/rc5-dc.cc:3426] average prob_arr = 0.00 0.00 0.00 0.00 -0.42 -0.42 0.00 -1.00 -1.00 -0.68 -0.68 -1.83 -2.42 -4.96 -9.68 -11.42 -7.00 -11.83 -10.91
  [./src/rc5-dc.cc:3434]        FIB_ARRAY =  1  2  5  7  8  9 10 11 13 13 13 13
  [./src/rc5-dc.cc:3439]       max hw_arr =  1  1  1  2  4  3  7  6  9 {15} 13 {15}
[./src/rc5-dc.cc:3456]     min prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 -2.00 -2.00 -4.00 -3.00 -9.00 -11.00 -12.00 -20.00 -14.00 -14.00
																										[./src/rc5-dc.cc:rc5_equal_rot_attack():3469] Exit statistics:
#Rounds 8
WORD_SIZE 32
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 261951 (2^17.998937)
#Filtered pairs all: 1356 (2^10.405141)
#Good pairs among filtered: 9
#Good pairs among filtered f1: 13
#Good pairs total: 13
#GoUP sets of trails: 1356 (2^10.405141)
RC5_P_THRES_ARRAY = [ 0] -1.58 [ 1] -1.58 [ 2] -1.58 [ 3] -2.32 [ 4] -2.32 [ 5] -3.17 [ 6] -4.09 [ 7] -4.09 [ 8] -5.04 [ 9] -7.01 [10] -8.01 [11] -9.00
        FIB_ARRAY = [ 0]  1 [ 1]  2 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
        FIB_ARRAY = [ 0]  1 [ 1]  2 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
  [./tests/rc5-tests.cc:532] #GoUP sets of trails: 1356 (2^10.405141)
  WORD g_good_pairs[13][2][2] = {
  {{0x18A35AB2, 0xB8B5A403}, {0x98A35AB2, 0x38B5A403}},
  {{0xDD82A7C2, 0x82D681DA}, {0x5D82A7C2, 0x02D681DA}},
  {{0x1370B0F0, 0x3EDDE253}, {0x9370B0F0, 0xBEDDE253}},
  {{0x9DF12493, 0x4F1324BA}, {0x1DF12493, 0xCF1324BA}},
  {{0x47DFB488, 0xC5C69C9A}, {0xC7DFB488, 0x45C69C9A}},
  {{0x463741E9, 0xAD2C46EC}, {0xC63741E9, 0x2D2C46EC}},
  {{0x705A2834, 0x5806BFF9}, {0xF05A2834, 0xD806BFF9}},
  {{0x0F05DD54, 0xEA67AD50}, {0x8F05DD54, 0x6A67AD50}},
  {{0xD126B268, 0x63F650B3}, {0x5126B268, 0xE3F650B3}},
  {{0x520709BF, 0x65212DEF}, {0xD20709BF, 0xE5212DEF}},
  {{0x01359336, 0x7C6D0A7A}, {0x81359336, 0xFC6D0A7A}},
  {{0xCC72E41D, 0xDACE48FE}, {0x4C72E41D, 0x5ACE48FE}},
  {{0x739C5DA3, 0xF35C3073}, {0xF39C5DA3, 0x735C3073}}};
const uint32_t g_key[16] = {0x1E, 0x6B, 0xC2, 0xB9, 0xA0, 0xD0, 0xFD, 0x3C, 0x7A, 0xA7, 0xF1, 0x9F, 0x39, 0xAD, 0x89, 0x24};
[./tests/rc5-tests.cc:599] Test OK!

real    481m28.259s
user    480m5.804s
sys     0m0.004s

[./src/rc5-dc.cc:3415]   average hw_arr = 1.00 1.00 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.50 1.00 2.50 2.50 5.50 5.50 9.00 10.50 11.00 14.50
  [./src/rc5-dc.cc:3426] average prob_arr = 0.00 0.00 0.00 0.00 -0.42 -0.42 0.00 -1.00 -1.00 -0.68 -0.68 -1.83 -2.42 -4.96 -9.68 -11.42 -7.00 -11.83 -10.91
  [./src/rc5-dc.cc:3434]        FIB_ARRAY =  1  2  5  7  8  9 10 11 13 13 13 13
  [./src/rc5-dc.cc:3439]       max hw_arr =  1  1  1  2  4  3  7  6  9 {15} 13 {15}
[./src/rc5-dc.cc:3456]     min prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 -2.00 -2.00 -4.00 -3.00 -9.00 -11.00 -12.00 -20.00 -14.00 -14.00
																										[./src/rc5-dc.cc:rc5_equal_rot_attack():3469] Exit statistics:
#Rounds 8
WORD_SIZE 32
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 262106 (2^17.999791)
#Filtered pairs all: 1294 (2^10.337622)
#Good pairs among filtered: 1
#Good pairs among filtered f1: 2
#Good pairs total: 2
#GoUP sets of trails: 1294 (2^10.337622)
RC5_P_THRES_ARRAY = [ 0] -1.58 [ 1] -1.58 [ 2] -1.58 [ 3] -2.32 [ 4] -2.32 [ 5] -3.17 [ 6] -4.09 [ 7] -4.09 [ 8] -5.04 [ 9] -7.01 [10] -8.01 [11] -9.00
        FIB_ARRAY = [ 0]  1 [ 1]  2 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
        FIB_ARRAY = [ 0]  1 [ 1]  2 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
  [./tests/rc5-tests.cc:532] #GoUP sets of trails: 1294 (2^10.337622)
  WORD g_good_pairs[2][2][2] = {
  {{0x260FBF88, 0xF27070BE}, {0xA60FBF88, 0x727070BE}},
  {{0xB6816BB8, 0x0E83A3E1}, {0x36816BB8, 0x8E83A3E1}}};
const uint32_t g_key[16] = {0x56, 0x49, 0x8E, 0x35, 0xD4, 0x64, 0x26, 0x2E, 0x4D, 0xCB, 0x20, 0x26, 0xE8, 0xB8, 0xD0, 0x1};
[./tests/rc5-tests.cc:599] Test OK!

real    667m41.071s
user    665m46.777s
sys     0m0.000s

[./src/rc5-dc.cc:3415]   average hw_arr = 1.00 1.00 1.00 0.00 1.00 1.00 0.20 0.80 1.00 3.00 3.20 3.20 2.80 4.20 4.20 7.00 9.00 10.60 12.40
  [./src/rc5-dc.cc:3426] average prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 -0.23 -0.74 -1.00 -2.93 -4.15 -2.07 -2.00 -5.22 -3.13 -5.99 -5.31 -6.22 -8.29
  [./src/rc5-dc.cc:3434]        FIB_ARRAY =  1  2  5  7  8  9 10 11 13 13 13 13
  [./src/rc5-dc.cc:3439]       max hw_arr =  1  1  5  4  4  5  8  7 11 {15} {17} {17}
[./src/rc5-dc.cc:3456]     min prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 -2.00 -1.00 -1.00 -5.00 -5.00 -5.00 -5.00 -10.00 -9.00 -14.00 -16.00 -17.00 -16.00																											  [./src/rc5-dc.cc:rc5_equal_rot_attack():3469] Exit statistics:
#Rounds 8
WORD_SIZE 32
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 263191 (2^18.005751)
#Filtered pairs all: 1327 (2^10.373953)
#Good pairs among filtered: 3
#Good pairs among filtered f1: 5
#Good pairs total: 5
#GoUP sets of trails: 1327 (2^10.373953)
RC5_P_THRES_ARRAY = [ 0] -1.58 [ 1] -1.58 [ 2] -1.58 [ 3] -2.32 [ 4] -2.32 [ 5] -3.17 [ 6] -4.09 [ 7] -4.09 [ 8] -5.04 [ 9] -7.01 [10] -8.01 [11] -9.00
        FIB_ARRAY = [ 0]  1 [ 1]  2 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
        FIB_ARRAY = [ 0]  1 [ 1]  2 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
  [./tests/rc5-tests.cc:532] #GoUP sets of trails: 1327 (2^10.373953)
  WORD g_good_pairs[5][2][2] = {
  {{0x983E000F, 0x94F4FD55}, {0x183E000F, 0x14F4FD55}},
  {{0x13CE424C, 0x4B7C9DB1}, {0x93CE424C, 0xCB7C9DB1}},
  {{0x94EE93C3, 0xD099AAE2}, {0x14EE93C3, 0x5099AAE2}},
  {{0x77F8B35C, 0xB954D503}, {0xF7F8B35C, 0x3954D503}},
  {{0xC73927F7, 0x1EDDFD12}, {0x473927F7, 0x9EDDFD12}}};
const uint32_t g_key[16] = {0xE0, 0xA6, 0x39, 0x41, 0xB, 0x77, 0x48, 0xF0, 0x6, 0xE9, 0x83, 0xE5, 0xF7, 0x8, 0x71, 0xBB};
[./tests/rc5-tests.cc:599] Test OK!

real    482m0.462s
user    480m37.886s
sys     0m0.000s




 */


/* --- */

/*
  [./tests/rc5-tests.cc:526] RC5_FIXED_KEY 1 | Master key[16] = {0xF0, 0xCC, 0x3A, 0x11, 0xA7, 0x31, 0x 2, 0x9D, 0x68, 0x82, 0xA9, 0xE0, 0x2B, 0xB8, 0x62, 0x22, };
  [./tests/rc5-tests.cc:536] RC5_FIXED_KEY 1 | Expanded key[26] = {0x353FB2E2, 0x7D87DB44, 0x76B5B07C, 0xFF48FE12, 0xEF0BDE27, 0x356C277C, 0xCB3805B7, 0x42256E09, 0x259AF064, 0xF2857C31, 0xCE913D8A, 0x20E94310, 0xBC797D98, 0x79A866ED, 0xA9D0E7AF, 0x9E98AF01, 0x6D29B552, 0x45ED39F4, 0x206D6647, 0xC2550209, 0xBE0FB8AF, 0x38315D0E, 0x49E2CE3E, 0x73911F33, 0xCC7E5120, 0x5E7B0C3C, };
#Rounds 8
WORD_SIZE 32
RC5_NTEXTS 2^25.00
RC5_FIXED_KEY 1
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 1
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
#Filtered pairs: 3 (2^1.584963)
#Good pairs among filtered: 2
#Good pairs total: 22
#GoUP sets of trails: 3 (2^1.584963)
[./tests/rc5-tests.cc:546] #GoUP sets of trails: 3 (2^1.584963)
[./src/rc5-dc.cc:454] Enter rc5_last_round_eq_x_params_hash_map()
[./src/rc5-dc.cc:546] Unique variants = 45 (2^5.491853), #All GoUP variants = 714194 (2^19.445956), cnt_vec_1d = 3 (2^1.584963)
[./src/rc5-dc.cc:398] Enter rc5_last_round_eq_x_params_hash_map_count_good()
[./src/rc5-dc.cc:399] Filtered params hash map size 45
[./src/rc5-dc.cc:425] Good # 1 (    C000, 6E1135E0, 6E1035E0, 17,  0, 1)
[./src/rc5-dc.cc:425] Good # 2 ( 1830000, 7C6F1672, 7B945672,  1, 30, 1)
*/
#if 1 
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
unsigned char g_master_key[RC5_KEY_NBYTES_B] = {0xF0, 0xCC, 0x3A, 0x11, 0xA7, 0x31, 0x02, 0x9D, 0x68, 0x82, 0xA9, 0xE0, 0x2B, 0xB8, 0x62, 0x22};
uint32_t g_good_pairs_len = 22;
uint32_t g_good_pairs[22][2][2] = {
  {{0x764DCBBE, 0xC45EC8DB}, {0xF64DCBBE, 0x445EC8DB}},
  {{0x56130D03, 0x7040A5A3}, {0xD6130D03, 0xF040A5A3}},
  {{0x2F765F52, 0x4F788887}, {0xAF765F52, 0xCF788887}},
  {{0xB5EC63C3, 0x0D28CF77}, {0x35EC63C3, 0x8D28CF77}},
  {{0x8C9E2CA9, 0xAF4A9268}, {0x0C9E2CA9, 0x2F4A9268}},
  {{0x3A9751B8, 0x0FBC428A}, {0xBA9751B8, 0x8FBC428A}},
  {{0x904C284E, 0x12E3795C}, {0x104C284E, 0x92E3795C}},
  {{0xF50BC088, 0x4AABC993}, {0x750BC088, 0xCAABC993}},
  {{0xCB7728CF, 0x41DA9BD5}, {0x4B7728CF, 0xC1DA9BD5}},
  {{0x6D892F7E, 0x05953255}, {0xED892F7E, 0x85953255}},
  {{0x952B5379, 0x529EB6DA}, {0x152B5379, 0xD29EB6DA}},
  {{0xCEA93661, 0x8B66F542}, {0x4EA93661, 0x0B66F542}},
  {{0x1421F45E, 0x3AC665FF}, {0x9421F45E, 0xBAC665FF}},
  {{0x81ED9479, 0x1615012A}, {0x01ED9479, 0x9615012A}},
  {{0x1C0585E0, 0x189CA49E}, {0x9C0585E0, 0x989CA49E}},
  {{0xF45920F5, 0xD910A6CB}, {0x745920F5, 0x5910A6CB}},
  {{0xA077915F, 0x6E11CBBA}, {0x2077915F, 0xEE11CBBA}},
  {{0x16901F3B, 0x6E94C5AA}, {0x96901F3B, 0xEE94C5AA}},
  {{0x72DD2FB9, 0xC13E8643}, {0xF2DD2FB9, 0x413E8643}},
  {{0xADA4F4A3, 0xEC4DCF68}, {0x2DA4F4A3, 0x6C4DCF68}},
  {{0xCC5430B9, 0xCF0259AB}, {0x4C5430B9, 0x4F0259AB}},
  {{0xA457088F, 0x0FF5075E}, {0x2457088F, 0x8FF5075E}}};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
#endif // #if 1


/* --- */
  // for(uint32_t i = ((2*NROUNDS) + 1); i >= (((2*NROUNDS) + 1) - RC5_FIB_LEN); i--) {
  //  for(uint32_t i = (((2*NROUNDS) + 1) - RC5_FIB_LEN); i <= ((2*NROUNDS) + 1) ; i++) {

  //		  printf("\n-------------------- [%5d] --------------------\n", cnt_good_all);
	 //			 printf("[%s:%d] CHECKPOINT i %2d\n", __FILE__, __LINE__, i);
	 //			 sum_hw_arr[i] = sum_hw_arr[i] + (double)hwx;

/* ---- */

#if 0 // print intermediate values
	 if(i >= 2) {
		printf("X[%2d] %8X %8X %8X %2d (%2d) ", i, x, xx, dx, rot_arr[i], hwx);
	 } else {
		printf("X[%2d] %8X %8X %8X (%2d) ", i, x, xx, dx, hwx);
	 }
	 if((((2*NROUNDS) + 3) - i) <= RC5_FIB_LEN) {
		uint32_t j = (RC5_FIB_LEN - (((2*NROUNDS) + 3) - i));
		printf("F %2d", fib_array[j]);
		if(hwx > fib_array[j]) {
		  printf(" . ");
		}
	 }
	 printf("\n");
#endif // #if 0 // print intermediate values
		 //		  printf(" %2d ", hw32(dx));

/* --- */
/* 
#Rounds 8
WORD_SIZE 32
RC5_ADD_APPROX   0
RC5_LAST_ROUND_ADD_APPROX   0
RC5_ADD_APPROX_ORDER   5
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 0
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs f1: 261859 (2^17.998431)
#Filtered pairs all: 1321 (2^10.367415)
#Good pairs among filtered: 12
#Good pairs among filtered f1: 16
#Good pairs total: 16
#GoUP sets of trails: 1321 (2^10.367415)
RC5_P_THRES_ARRAY = [ 0] -1.58 [ 1] -1.58 [ 2] -1.58 [ 3] -2.32 [ 4] -2.32 [ 5] -3.17 [ 6] -4.09 [ 7] -4.09 [ 8] -5.04 [ 9] -7.01 [10] -8.01 [11] -9.00
        FIB_ARRAY = [ 0]  1 [ 1]  2 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 13 [10] 13 [11] 13
		  [./tests/rc5-tests.cc:522] #GoUP sets of trails: 1321 (2^10.367415)
		  [./tests/rc5-tests.cc:577] Test OK!

real    482m4.057s
user    480m38.434s
sys     0m3.076s

 */

/* ---- */

// determined as average of MINs over 32 keys 
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  13) + 1)),
  ((double)1.0 / (double)((1U <<  12) + 1)),
  ((double)1.0 / (double)((1U <<  11) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),//<<  6) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),// << 8) + 1)),
  ((double)1.0 / (double)((1U <<  9) + 1))//<< 13) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))

/* --- */

// determined as average of MINs over 32 keys 
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  13) + 1)),
  ((double)1.0 / (double)((1U <<  12) + 1)),
  ((double)1.0 / (double)((1U <<  11) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))


/* --- */

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),//<<  6) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),// << 8) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1))//<< 13) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))

/* --- */
#if 0
  if(hw32(ds_array->D[depth]) == 1) {
	 g_hw_flag = 1;
  }
  if(g_hw_flag == 1) {
	 return 0;
  }
#endif
#if 0
  if(hw32(ds_array->D[depth]) == 1) {
	 return 0;
  }
#endif
				//				if(g_hw_flag == 1) {
				//				  return 0;
				//				}

/* --- */
void rc5_xdp_add_mid_round_diff_set_out_i(const uint32_t i, 
														const double p_thres, const uint32_t hw_thres,
														const gsl_matrix* A[2][2], 
														const gsl_vector* L, const gsl_vector* C, 
														const uint32_t dy, 
														const uint32_t dx_prev, // D[6] <<< S[6]
														const uint32_t rot_const_prev, // S[6]
														const uint32_t dx,
														const double p, 
														std::vector<uint32_t>* dx_vec)
{
  if(i == WORD_SIZE) {
	 dx_vec->push_back(dx);
	 assert(p > p_thres);
#if 0 // DEBUG
	 gsl_vector* C_tmp = gsl_vector_calloc(RC5_MID_ROUND_MSIZE);
	 gsl_vector_set(C_tmp, RC5_MID_ROUND_ISTATE, 1.0);
	 double p_th = rc5_xdp_add_mid_round((const gsl_matrix*(*)[2])A, L, C_tmp, dy, dx);
	 //	 printf("[%s:%d] (%8X, %8X -> %8X) 2^%4.2f 2^%4.2f\n", 
	 //			  __FILE__, __LINE__, y, yy, dx, log2(p_th), log2(p));
	 assert(p == p_th);
	 gsl_vector_free(C_tmp);
#endif // #if 1 // DEBUG
	 return;
  }

  uint32_t dy_i = (dy >> i) & 1;

  for(uint32_t dx_i = 0; dx_i < 2; dx_i++) {

	 bool b_match = true;
	 /**
     * Check if a sequence of log2(n) bits of dx starting from
     * position \p rot_const_prev is equal to the 0 bit sequence. This
     * check is relevant only for RC5 and is related to the equal
     * rotation constant requirement.
     */
	 //#if 1 // RC5 relevant
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
	 assert((WORD_SIZE == 16) || (WORD_SIZE == 32));
	 uint32_t bit_seq = 0; // 00...0
	 uint32_t bit_seq_len = (uint32_t)log2(WORD_SIZE);
	 assert((bit_seq_len == 4) || (bit_seq_len == 5));
	 b_match = rc5_last_round_eq_x_bit_seq_match_bit_i(i, dx_i, rot_const_prev, bit_seq, bit_seq_len);
#endif
	 //	 if(!b_match)
	 //		continue;
	 if(b_match) {

		gsl_vector* R = gsl_vector_calloc(RC5_MID_ROUND_MSIZE);
		double new_p = 0;

		gsl_blas_dgemv(CblasNoTrans, 1.0, A[dy_i][dx_i], C, 0.0, R); // R <- A C
		gsl_blas_ddot(L, R, &new_p);

		assert(new_p <= 1.0);
		assert(new_p >= 0.0);

		if(new_p > p_thres) {
		  uint32_t new_dx = (dx_i << i) | dx;
		  //		uint32_t mask_lsb_i = (0xffffffff >> (32 - i));
		  uint32_t mask_lsb_i = (0xffffffff >> (32 - (i + 1))); // <--- (i+1) !!
		  uint32_t hw = hw32((new_dx ^ dx_prev) & mask_lsb_i); // D[5] = (dx ^ (D[6] <<< S[6]))
		  if(hw <= hw_thres) {
			 bool b_approx_match = true;
			 /**
			  * apply approximation only if the prob threshold is very low
			  * (e.g. below 2^-5)
			  */
#if RC5_ADD_APPROX // ADD-APPROX
			 uint32_t order = RC5_ADD_APPROX_ORDER;
			 uint32_t dk = 0;
			 b_approx_match = rc5_mid_round_add_approx_match(i, dy, dk, new_dx, order);
#endif // #if 0 // ROT-const
#if 0 // NEXT CONST
			 if(b_approx_match) {
				const uint32_t logn = (uint32_t)log2(WORD_SIZE);
				const uint32_t w = WORD_SIZE;
				const uint32_t r = rot_const_prev;
				const uint32_t L = logn; // log2(n) 
				if((i >= RC5_ADD_APPROX_ORDER) && ((i == (w - 1)) || (i == (L + r - 1)))) {
				  uint32_t order = RC5_ADD_APPROX_ORDER;
				  uint32_t dk = 0;
				  rc5_mid_round_add_approx_rot_const_next(i, dy, dk, new_dx, order, rot_const_prev, &g_rot_const_next_bitmask);
				}
			 }
#endif
			 if(b_approx_match) {
				rc5_xdp_add_mid_round_diff_set_out_i(i+1, p_thres, hw_thres, 
																 A, L, R, dy, dx_prev, rot_const_prev, new_dx, new_p, dx_vec);
			 } 
		  }
		}

		gsl_vector_free(R);
	 } // b_match
  }

  return;
}

/* --- */


#if 0
  uint32_t D2 = ds_array.D[0];
  WORD dy = D2;
  WORD dk = 0;
  double p = xdp_add_lm(dx, dk, dy);

  b_match = (p > 0.0);
  if(b_match) {
	 uint32_t D3 = ds_array.D[1];
	 uint32_t s = 0;
	 b_match = false;
	 while((!b_match) && (s < WORD_SIZE)) {
		double p_thres = 0.5;//1.0;//p_thres_array[depth];
		const uint32_t dx_prev = RC5_ROTL(D2, s);
		const uint32_t rot_const_prev = s;
		const uint32_t hw_thres = 1;
		std::vector<uint32_t> dx_vec;
		WORD dy = D3;
		rc5_xdp_add_mid_round_diff_set_out(A_mid, L_mid, C_mid, 
													  dy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec); 
		uint32_t j = 0;
		while((!b_match) && (j < dx_vec.size())) {
		  WORD dx = 0x80000000;
		  WORD dy = RC5_ROTR(dx_vec[j], s) ^ D2;
		  double p = xdp_add_lm(dx, dk, dy);
		  b_match = (p > 0.0);
		  j++;
		}
		s++;
	 }
  }
#endif

/* --- */

/*
 * Construct the transition matrices for counting the number
 * of solutions x of the equation from the first round of RC5:
 *
 * \f$ y - x = (y ^ dy) - x* = k\f$
 *
 * for fixed x, x* and dy.
 *
 * \param A[y[i]][x[i]][x*[i]][dy[i]]: zero-initialized set of matrices
 * \returns Transition probability matrices A for \f$ y - x = (y ^ dy) - x* = k\f$.
 *
 * \f$A[2][2][2][2] = A[y[i]][x[i]][xx[i]][dy[i]]\f$, where 
 * 
 *   - \f$x[i]\f$ : the i-th bit of the first input value.
 *   - \f$xx[i]\f$ : the i-th bit of the second input value.
 *   - \f$dy[i]\f$ : the i-th bit of the output difference.
 *
 *
 *  
 * \see rc5_last_round_eq_x_sf
 */
void rc5_first_round_eq_x_sf(gsl_matrix* A[2][2][2][2])
{
  uint32_t ndiffs = (1U << 3); 	  // number of operands: x, xx, dy
  uint32_t nvals = 2;				  // values of x = 0,1
  uint32_t nstates = RC5_LAST_ROUND_MSIZE;

  for(uint32_t i = 0; i < ndiffs; i++) {
	 //	 uint32_t y = (i >> 0) & 1;
	 //	 uint32_t yy = (i >> 1) & 1;
	 //	 uint32_t dx = (i >> 2) & 1;
	 uint32_t x = (i >> 0) & 1;
	 uint32_t xx = (i >> 1) & 1;
	 uint32_t dy = (i >> 2) & 1;

	 for(int32_t u = 0; u < (int)nstates; u++) { // (s2, s1)
		int32_t t = u;
		int32_t in_s1 = (t & 1) - 1;
		t /= 2;
		int32_t in_s2 = (t & 1) - 1;
		t /= 2;
#if 0									  // DEBUG
		printf("S[%2d] (s2 s1) %2d%2d\n", u, in_s2, in_s1);
#endif

		for(uint32_t j = 0; j < nvals; j++) {
		  //		  uint32_t x = j;
		  //		  uint32_t xx = (j ^ dx);
		  uint32_t y = j;
		  uint32_t yy = (j ^ dy);

		  // k = y - x
		  uint32_t k = y ^ x ^ (in_s1 & 1);
		  int32_t out_s1 = (int32_t)(y - x + in_s1) >> 1; // signed shift i.e. -1 >> 1 == -1

		  // k = yy - xx
		  uint32_t kk = yy ^ xx ^ (in_s2 & 1);
		  int32_t out_s2 = (int32_t)(yy - xx + in_s2) >> 1;

		  uint32_t r = 0;

		  if(k == kk) {
			 r = 1;
		  }

		  // checks
		  assert((out_s1 == 0) || (out_s1 == -1));
		  assert((out_s2 == 0) || (out_s2 == -1));

		  uint32_t v = 0;

		  // compose the output state
		  v  = (out_s2 + 1);
		  v *= 2;
		  v += (out_s1 + 1);

		  // add a link between U and V in the adjacency matrix
		  // 
		  //                   input u
		  //                     |
		  //                     V
		  //              [x] [x] [x] [x]  
		  // output v <-  [x] [x] [x] [x]  
		  //              [x] [x] [x] [x]  
		  // 
		  uint32_t col = u;
		  uint32_t row = v;

		  uint32_t e = 0;
		  //		  e = gsl_matrix_get(A[x][y][yy][dx], row, col);
		  e = gsl_matrix_get(A[y][x][xx][dy], row, col);
		  e += r;
		  //		  gsl_matrix_set(A[x][y][yy][dx], row, col, e);
		  gsl_matrix_set(A[y][x][xx][dy], row, col, e);

		} // vals

	 }	// in-states

  } // diffs
}

/* --- */
bool rc5_is_goup_diffs_match_inputs(const pair_t pc_pair,
												const rc5_goup_diffs_t ds_array,
												const gsl_matrix* A_last[2][2][2],
												const gsl_vector* L_last,
												const gsl_vector* C_last,
												const gsl_matrix* A_mid[2][2],
												const gsl_vector* L_mid,
												const gsl_vector* C_mid)
{
  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;
  pair_t pc_pair = ds_array->pc_pair;

  WORD x = pc_pair.plaintext_first[left];
  WORD xx = pc_pair.plaintext_second[left];
  WORD dx = = (x ^ xx);
  assert(dx == 0x80000000);

  bool b_match = false;

  uint32_t D2 = ds_array.D[0];
  WORD dy = D2;
  WORD dk = 0;
  double p = xdp_add_lm(dx, dk, dy);
  b_match = (p > 0.0);
  if(b_match) {
	 uint32_t D3 = ds_array.D[1];
	 uint32_t s = 0;
	 b_match = false;
	 while((!b_match) && (s < WORD_SIZE)) {
		double p_thres = 0.5;//1.0;//p_thres_array[depth];
		const uint32_t dx_prev = RC5_ROTL(D2, s);
		const uint32_t rot_const_prev = s;
		const uint32_t hw_thres = 1;
		std::vector<uint32_t> dx_vec;
		WORD dy = D3;
		rc5_xdp_add_mid_round_diff_set_out(A_mid, L_mid, C_mid, 
													  dy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec); 
		uint32_t j = 0;
		while((!b_match) && (j < dx_vec.size())) {
		  WORD dx = 0x80000000;
		  WORD dy = RC5_ROTR(dx_vec[j], s) ^ D2;
		  double p = xdp_add_lm(dx, dk, dy);
		  //				printf("[%s:%d] dx %8X -> dy %8X s %2d\n", __FILE__, __LINE__, dx, dy, s);
		  b_match = (p > 0.0);
		  j++;
		}
		s++;
	 }
  }
  return b_match;
}


/* --- */

/*
 * Generate the rot const for the next round
 * NOTE: not used
 * \see rc5_last_round_add_approx_rot_const_next , rc5_last_round_add_approx_match
 */
void rc5_mid_round_add_approx_rot_const_next(const uint32_t i, const WORD dx, 
															const WORD dy, const WORD dz, const uint32_t order,
															const uint32_t rot_const, WORD* rot_const_next_bitmask)
{
  assert(0 == 1);
  uint32_t r_next = 0;
  uint32_t logn = (uint32_t)log2(WORD_SIZE);
  assert((logn == 4) || (logn == 5));
  assert(logn <= order);

  const uint32_t w = WORD_SIZE;
  const uint32_t r = rot_const;
  const uint32_t L = logn; // log2(n) 
  assert((i == (w - 1)) || (i == (L + r - 1)));

  uint32_t N = (1U << order);
  uint32_t mask_stride = (0xffffffff >> (32 - order)) << (i + 1 - order); 

  if(!((((L + r - 1) < WORD_SIZE) && (i == (L + r - 1))) || 
		 (((L + r - 1) >= WORD_SIZE) && (i == (w - 1))))) {
	 assert(0 == 1);
	 return;
  }

  WORD y_seq = 0;
  WORD x_seq = 0;
  while(x_seq < N) {
	 WORD x = (x_seq << (i + 1 - order)) & mask_stride; // ...000***000...
	 WORD xx = (x ^ dx) & mask_stride;
	 while(y_seq < N) {
		WORD y = (y_seq << (i + 1 - order)) & mask_stride; // ...000***000...
		WORD yy = (y ^ dy) & mask_stride;
		WORD diff_stride = ((x - y) ^ (xx - yy)) & mask_stride;
		bool b_match = (((diff_stride >> i) & 1) == ((dz >> i) & 1)); // diff[i] ?= dz[i]

		if(b_match) {

		  if(((L + r - 1) < WORD_SIZE) && (i == (L + r - 1))) {
			 r_next = ((((x - y) & mask_stride) >> r) ^ r) & RC5_ROT_MASK; // (y - key) xor r_prev
			 uint32_t rr_next = ((((xx - yy) & mask_stride) >> r) ^ r) & RC5_ROT_MASK; // (y - key) xor r_prev
			 if(r_next == rr_next) {
				//				rot_const_next_vec->push_back(r_next);
				(*rot_const_next_bitmask) |= (1 << r_next);
#if 0 // DEBUG
				printf("[%s:%d] L %2d r %2d i %2d x %8X xx %8X y %8X r_next %2d rr_next %2d\n", __FILE__, __LINE__, L, r, i, x, xx, y, r_next, rr_next);
#endif // #if 0 // DEBUG
			 }
			 assert(r_next < WORD_SIZE);
			 //			 if(hw32(*rot_const_next_bitmask) > 3)
			 //				return;	// <--- !
		  }

		  if(((L + r - 1) >= WORD_SIZE) && (i == (w - 1))) {
			 uint32_t nbits_lo = (L + r - w); 
			 uint32_t mask_lo = (0xffffffff >> (32 - nbits_lo));
			 uint32_t nbits_hi = L - nbits_lo;
			 uint32_t mask_hi = (0xffffffff << (32 - nbits_hi)) & MASK;

			 uint32_t N_lo = (1U << nbits_lo);

			 uint32_t x_seq_lo = 0;
			 while(x_seq_lo < N_lo) {
				uint32_t x_aug = x | x_seq_lo;
				uint32_t xx_aug = (x_aug ^ dx);

				uint32_t y_seq_lo = 0;
				while(y_seq_lo < N_lo) {
				  uint32_t y_aug = y | y_seq_lo;
				  uint32_t yy_aug = (y_aug ^ dy);

				  r_next = (((((x_aug - y_aug) & mask_lo) << nbits_hi) | (((x_aug - y_aug) & mask_hi) >> (32 - nbits_hi))) ^ r) & RC5_ROT_MASK;
				  uint32_t rr_next = (((((xx_aug - yy_aug) & mask_lo) << nbits_hi) | (((xx_aug - yy_aug) & mask_hi) >> (32 - nbits_hi))) ^ r) & RC5_ROT_MASK;
				  if(r_next == rr_next) {
					 //					 rot_const_next_vec->push_back(r_next); // add rot const
					 (*rot_const_next_bitmask) |= (1 << r_next);
#if 0 // DEBUG
					 printf("[%s:%d] L %2d r %2d i %2d x %8X xx %8X y %8X yy %8X r_next %2d rr_next %2d mask_hi %8X mask_lo %8X\n", 
							  __FILE__, __LINE__, L, r, i, x_aug, xx_aug, y_aug, yy_aug, r_next, rr_next, mask_hi, mask_lo);
#endif // #if 0 // DEBUG
					 //					 if(hw32(*rot_const_next_bitmask) > 3)
					 //						return;	// <--- !
				  }
				  y_seq_lo++;
				}
				x_seq_lo++;
			 }
			 //		b_match = b_match_lo;
		  }

		} // if b_match
		y_seq++;
	 }
	 x_seq++;
  }
}

/* --- */
#if 1 // original thresholds
	 fib_array.push_back(FIB[i]);
	 p_thres_array.push_back(RC5_P_THRES_ARRAY[i]);
#else // maximum thresholds
	 double hw_thres = WORD_SIZE;
	 fib_array.push_back(hw_thres);
	 double p_thres = 0.0;
	 p_thres_array.push_back(p_thres);
#endif

/* --- */
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),//<<  6) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),// << 8) + 1)),
  ((double)1.0 / (double)((1U <<  9) + 1))//<< 13) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))



/* --- */

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),//<<  6) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),// << 8) + 1)),
  ((double)1.0 / (double)((1U <<  9) + 1))//<< 13) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))


/* 
#if 0
		printf("[%s:%d] Inputs to rc5_xdp_add_last_round_diff_set_out()\n", __FILE__, __LINE__);
		printf("y %8X yy %8X dx_prev %8X rotconst_prev %2d p_thres 2^%f hw_thres %2d dx_set_all.size() %d\n",
				 y, yy, dx_prev, rot_const_prev, log2(p_thres), hw_thres, (uint32_t)dx_set_all.size());
#endif

 */

/* --- */

		printf("[%s:%d] i %2d mask_i %8X\n", __FILE__, __LINE__, i, mask_lsb_i);


/* --- */

		printf("[%s:%d] hw %d hw_thres %d dx_prev %8X (new_dx ^ dx_prev) & mask_lsb_i = %8X mask_lsb_i %8X\n", 
				 __FILE__, __LINE__, hw, hw_thres, dx_prev, (new_dx ^ dx_prev) & mask_lsb_i, mask_lsb_i);


/* --- */

/* 
	[./tests/rc5-tests.cc:1559] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF, RC5_XOR = 0
	[./tests/rc5-tests.cc:1560] RC5_ADD_APPROX  0
	[./tests/rc5-tests.cc:1561] RC5_ADD_APPROX_ORDER  5
	[./tests/rc5-tests.cc:1562] RC5_ADD_APPROX_P_THRES 1.000000 2^0.000000
	[./tests/rc5-tests.cc:488] RC5_FIXED_KEY 1 | Master key[16] = {0x8F, 0x4E, 0xE8, 0xCD, 0x3F, 0x8A, 0x8B, 0x65, 0x4E, 0x9B, 0x9E, 0x74, 0x71, 0xCB, 0x38, 0x75, };
	[./tests/rc5-tests.cc:498] RC5_FIXED_KEY 1 | Expanded key[26] = {0xF67C047E, 0xC9550B9E, 0x2740CDBD, 0xF98D79A4, 0x18543AF9, 0x2722AAED, 0x D8DD454, 0x3FDAAA73, 0x81FC0514, 0xC09E9564, 0x1CA415AC, 0xD724C686, 0xF3445171, 0xD61C71CF, 0x50D5028D, 0x7E215363, 0x83457D50, 0xD689B160, 0x4FAA3982, 0x2748F92F, 0x73C54120, 0x750AB740, 0x4CB425F1, 0xA037AC80, 0xDD5B6761, 0xB86C353D, };
	[./src/rc5-dc.cc:1840]  8R p(80000000 80000000 -> *) = 0.000000 2^-21.000000 | 4 2^23.000000
	[./src/rc5-dc.cc:2932] Enter rc5_equal_rot_attack()
	[./src/rc5-dc.cc:3002] RC5_FIB_LEN 12 fib_array.size() = 12
	[./src/rc5-dc.cc:3004] fib_array = [ 0]  2 [ 1]  2 [ 2]  2 [ 3]  2 [ 4]  2 [ 5]  2 [ 6]  2 [ 7]  2 [ 8]  2 [ 9]  2 [10]  2 [11]  2
	[./src/rc5-dc.cc:3009] p_thres_array = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -3.17 [ 7] -4.09 [ 8] -5.04 [ 9] -6.02 [10] -8.01 [11] -13.00


R[ 1]: A 6C4B4C8D B D7CA5960 | S[ 2] 2740CDBD S[ 3] F98D79A4
R[ 2]: A D3D550E6 B 2EE50C6E | S[ 4] 18543AF9 S[ 5] 2722AAED
R[ 3]: A 24B013A0 B 4A2FCA41 | S[ 6] D8DD454 S[ 7] 3FDAAA73
R[ 4]: A 5F3BB8D6 B 6663DA80 | S[ 8] 81FC0514 S[ 9] C09E9564
R[ 5]: A 55FC7802 B A5A3508E | S[10] 1CA415AC S[11] D724C686
R[ 6]: A BD678D88 B 9AF977E7 | S[12] F3445171 S[13] D61C71CF
R[ 7]: A 20523A20 B 38CCA12A | S[14] 50D5028D S[15] 7E215363
R[ 8]: A FDB1A5B2 B E8ECC754 | S[16] 83457D50 S[17] D689B160
R[ 1]: A 6C4B4C8D B D7CA4960 | S[ 2] 2740CDBD S[ 3] F98D79A4
R[ 2]: A D3D540E6 B 2EE50C6E | S[ 4] 18543AF9 S[ 5] 2722AAED
R[ 3]: A 20B013A0 B 4E2FCA41 | S[ 6] D8DD454 S[ 7] 3FDAAA73
R[ 4]: A 5F3BB8D6 B 6662DA80 | S[ 8] 81FC0514 S[ 9] C09E9564
R[ 5]: A 55FD7802 B A5A3508E | S[10] 1CA415AC S[11] D724C686
R[ 6]: A 7D678D88 B 9AF978A7 | S[12] F3445171 S[13] D61C71CF
R[ 7]: A 204F9A80 B 38D8358A | S[14] 50D5028D S[15] 7E215363
R[ 8]: A E201A5B2 B 176D1CC6 | S[16] 83457D50 S[17] D689B160
[./src/rc5-dc.cc:3442] HRound#[ 5] | 0 0 0 1           key 2722AAED y 2EE50C6E yy 2EE50C6E x  7C26181 xx  7C26181 dx        0
[./src/rc5-dc.cc:3442] HRound#[ 6] | 0 0 0 0 1         key  D8DD454 y 24B013A0 yy 20B013A0 x 17223F4C xx 13223F4C dx  4000000
[./src/rc5-dc.cc:3442] HRound#[ 7] | 0 0 0 0 0 0 0 0 1 key 3FDAAA73 y 4A2FCA41 yy 4E2FCA41 x  A551FCE xx  E551FCE dx  4000000  <- X
[./src/rc5-dc.cc:3442] HRound#[ 8] | 0 0 0 0 1         key 81FC0514 y 5F3BB8D6 yy 5F3BB8D6 x DD3FB3C2 xx DD3FB3C2 dx        0
[./src/rc5-dc.cc:3442] HRound#[ 9] | 0 0 0 1           key C09E9564 y 6663DA80 yy 6662DA80 x A5C5451C xx A5C4451C dx    10000
[./src/rc5-dc.cc:3442] HRound#[10] | 0 0 0 1           key 1CA415AC y 55FC7802 yy 55FD7802 x 39586256 xx 39596256 dx    10000
[./src/rc5-dc.cc:3442] HRound#[11] | 0 0 0 0 0 1       key D724C686 y A5A3508E yy A5A3508E x CE7E8A08 xx CE7E8A08 dx        0  <- X
[./src/rc5-dc.cc:3442] HRound#[12] | 0 0 0 1           key F3445171 y BD678D88 yy 7D678D88 x CA233C17 xx 8A233C17 dx 40000000
[./src/rc5-dc.cc:3442] HRound#[13] | 0 0 0 1           key D61C71CF y 9AF977E7 yy 9AF978A7 x C4DD0618 xx C4DD06D8 dx       C0
[./src/rc5-dc.cc:3442] HRound#[14] | 0 0 0 0 0 0 0 0 1 key 50D5028D y 20523A20 yy 204F9A80 x CF7D3793 xx CF7A97F3 dx    7A060  <- X
[./src/rc5-dc.cc:3442] HRound#[15] | 0 0 0 0 1         key 7E215363 y 38CCA12A yy 38D8358A x BAAB4DC7 xx BAB6E227 dx   1DAFE0
[./src/rc5-dc.cc:3442] HRound#[16] | 0 0 0 0 0 1       key 83457D50 y FDB1A5B2 yy E201A5B2 x 7A6C2862 xx 5EBC2862 dx 24D00000  <- X
[./src/rc5-dc.cc:3442] HRound#[17] | 0 0 0 0 1         key D689B160 y E8ECC754 yy 176D1CC6 x 126315F4 xx 40E36B66 dx 52807E92

X[ 0] 552917A6 D52917A6 80000000 ( 1)
X[ 1] C858B1CD 4858B1CD 80000000 ( 1)
X[ 2] 91ADBD6B 11ADBD6B 80000000 11 ( 1)
X[ 3] 6C4B4C8D 6C4B4C8D        0 13 ( 0)
X[ 4] D7CA5960 D7CA4960     1000  0 ( 1)
X[ 5] D3D550E6 D3D540E6     1000  6 ( 1)
X[ 6] 2EE50C6E 2EE50C6E        0 14 ( 0)
X[ 7] 24B013A0 20B013A0  4000000  0 ( 1) F  2
X[ 8] 4A2FCA41 4E2FCA41  4000000  1 ( 1) F  2
X[ 9] 5F3BB8D6 5F3BB8D6        0 22 ( 0) F  2
X[10] 6663DA80 6662DA80    10000  0 ( 1) F  2
X[11] 55FC7802 55FD7802    10000  2 ( 1) F  2
X[12] A5A3508E A5A3508E        0 14 ( 0) F  2
X[13] BD678D88 7D678D88 C0000000  8 ( 2) F  2
X[14] 9AF977E7 9AF978A7      F40  7 ( 5) F  2 .
X[15] 20523A20 204F9A80   1DA0A0  0 ( 8) F  2 .
X[16] 38CCA12A 38D8358A   1494A0 10 ( 7) F  2 .
X[17] FDB1A5B2 E201A5B2 1FB00000 18 ( 8) F  2 .
X[18] E8ECC754 176D1CC6 FF81DB92  0 (19) F  2 .
[./src/rc5-dc.cc:3361] [ 3]        0 -> (([ 1] 80000000 ^ [ 2] 80000000) <<< [ 2] 11) 1.000000 2^0.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3361] [ 4]     1000 -> (([ 2] 80000000 ^ [ 3]        0) <<< [ 3] 13) 0.500000 2^-1.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3361] [ 5]     1000 -> (([ 3]        0 ^ [ 4]     1000) <<< [ 4]  0) 0.500000 2^-1.00 | p_th 2^-inf hw_thres  0 #set 0
[./src/rc5-dc.cc:3361] [ 6]        0 -> (([ 4]     1000 ^ [ 5]     1000) <<< [ 5]  6) 1.000000 2^0.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3361] [ 7]  4000000 -> (([ 5]     1000 ^ [ 6]        0) <<< [ 6] 14) 0.500000 2^-1.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3361] [ 8]  4000000 -> (([ 6]        0 ^ [ 7]  4000000) <<< [ 7]  0) 0.500000 2^-1.00 | p_th 2^-inf hw_thres  0 #set 0
[./src/rc5-dc.cc:3361] [ 9]        0 -> (([ 7]  4000000 ^ [ 8]  4000000) <<< [ 8]  1) 1.000000 2^0.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3361] [10]    10000 -> (([ 8]  4000000 ^ [ 9]        0) <<< [ 9] 22) 0.500000 2^-1.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3361] [11]    10000 -> (([ 9]        0 ^ [10]    10000) <<< [10]  0) 0.500000 2^-1.00 | p_th 2^-inf hw_thres  0 #set 0
[./src/rc5-dc.cc:3361] [12]        0 -> (([10]    10000 ^ [11]    10000) <<< [11]  2) 1.000000 2^0.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3361] [13] C0000000 -> (([11]    10000 ^ [12]        0) <<< [12] 14) 0.500000 2^-1.00 | p_th 2^-inf hw_thres  1 #set 2
[./src/rc5-dc.cc:3361] [14]      F40 -> (([12]        0 ^ [13] C0000000) <<< [13]  8) 0.015625 2^-6.00 | p_th 2^-inf hw_thres  0 #set 0
[./src/rc5-dc.cc:3361] [15]   1DA0A0 -> (([13] C0000000 ^ [14]      F40) <<< [14]  7) 0.000977 2^-10.00 | p_th 2^-inf hw_thres  2 #set 0
[./src/rc5-dc.cc:3361] [16]   1494A0 -> (([14]      F40 ^ [15]   1DA0A0) <<< [15]  0) 0.000061 2^-14.00 | p_th 2^-inf hw_thres  5 #set 0

[./src/rc5-dc.cc:3392] Inputs to rc5_xdp_add_last_round_diff_set_out()

y FDB1A5B2 yy E201A5B2 dx_prev 52528000 rotconst_prev 10 p_thres 2^-inf hw_thres  8 dx_set_all.size() 68
[./src/rc5-dc.cc:3399] [17] 1FB00000 = (FDB1A5B2 ^ E201A5B2) -> (([15]   1DA0A0 ^ [16]   1494A0) <<< [16] 10) 0.031250 2^-5.00 | p_th 2^-inf hw_thres  8 #set 68

[./src/rc5-dc.cc:3392] Inputs to rc5_xdp_add_last_round_diff_set_out()

y E8ECC754 yy 176D1CC6 dx_prev     7EC0 rotconst_prev 18 p_thres 2^-inf hw_thres  7 dx_set_all.size() 0
[./src/rc5-dc.cc:3399] [18] FF81DB92 = (E8ECC754 ^ 176D1CC6) -> (([16]   1494A0 ^ [17] 1FB00000) <<< [17] 18) 0.000122 2^-13.00 | p_th 2^-inf hw_thres  7 #set 0

[./src/rc5-dc.cc:2878]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 0.00 -1.00 -1.00 0.00 -1.00 -6.00 -10.00 -14.00 -5.00 -13.00
[./src/rc5-dc.cc:2887]    hw_arr =  1  1  1  0  1  1  0  1  1  0  1  1  0  2  5  8  7  8 19
[./src/rc5-dc.cc:2894] set sizes =     0     0     0     1     1     0     1     1     0     1     1     0     1     2     0     0     0    68     0

 */



/* ------------------ */



/* 

	[./tests/rc5-tests.cc:1560] RC5_ADD_APPROX  0
	[./tests/rc5-tests.cc:1561] RC5_ADD_APPROX_ORDER  5
	[./tests/rc5-tests.cc:1562] RC5_ADD_APPROX_P_THRES 1.000000 2^0.000000
	[./tests/rc5-tests.cc:488] RC5_FIXED_KEY 1 | Master key[16] = {0x8F, 0x4E, 0xE8, 0xCD, 0x3F, 0x8A, 0x8B, 0x65, 0x4E, 0x9B, 0x9E, 0x74, 0x71, 0xCB, 0x38, 0x75, };
	[./tests/rc5-tests.cc:498] RC5_FIXED_KEY 1 | Expanded key[26] = {0xF67C047E, 0xC9550B9E, 0x2740CDBD, 0xF98D79A4, 0x18543AF9, 0x2722AAED, 0x D8DD454, 0x3FDAAA73, 0x81FC0514, 0xC09E9564, 0x1CA415AC, 0xD724C686, 0xF3445171, 0xD61C71CF, 0x50D5028D, 0x7E215363, 0x83457D50, 0xD689B160, 0x4FAA3982, 0x2748F92F, 0x73C54120, 0x750AB740, 0x4CB425F1, 0xA037AC80, 0xDD5B6761, 0xB86C353D, };
	[./src/rc5-dc.cc:1839]  8R p(80000000 80000000 -> *) = 0.000001 2^-20.678072 | 5 2^23.000000
	[./src/rc5-dc.cc:2931] Enter rc5_equal_rot_attack()
	[./src/rc5-dc.cc:3001] RC5_FIB_LEN 12 fib_array.size() = 12
	[./src/rc5-dc.cc:3003] fib_array = [ 0]  2 [ 1]  2 [ 2]  2 [ 3]  2 [ 4]  2 [ 5]  2 [ 6]  2 [ 7]  2 [ 8]  2 [ 9]  2 [10]  2 [11]  2
	[./src/rc5-dc.cc:3008] p_thres_array = [ 0] -2.32 [ 1] -2.32 [ 2] -2.32 [ 3] -2.32 [ 4] -2.32 [ 5] -2.32 [ 6] -3.17 [ 7] -4.09 [ 8] -5.04 [ 9] -6.02 [10] -8.01 [11] -13.00


key schedule (A B values are irrleveant)

R[ 1]: A D8D55E48 B 572EFD00 | S[ 2] 2740CDBD S[ 3] F98D79A4
R[ 2]: A A84FDE41 B 25E4F170 | S[ 4] 18543AF9 S[ 5] 2722AAED
R[ 3]: A 3CBF61FF B CC8872BA | S[ 6] D8DD454 S[ 7] 3FDAAA73
R[ 4]: A 99BCE161 B 6B07BD1A | S[ 8] 81FC0514 S[ 9] C09E9564
R[ 5]: A  C6F031D B C411DE46 | S[10] 1CA415AC S[11] D724C686
R[ 6]: A 12FBA863 B 8D7022FD | S[12] F3445171 S[13] D61C71CF
R[ 7]: A 24C673E0 B 27D7A480 | S[14] 50D5028D S[15] 7E215363
R[ 8]: A 865754B0 B C6BA52E0 | S[16] 83457D50 S[17] D689B160
R[ 1]: A D8D55E48 B 572EFD80 | S[ 2] 2740CDBD S[ 3] F98D79A4
R[ 2]: A A84FDEC1 B 25E4F170 | S[ 4] 18543AF9 S[ 5] 2722AAED
R[ 3]: A 3D3F61FF B CC4872BA | S[ 6] D8DD454 S[ 7] 3FDAAA73
R[ 4]: A 99C1E161 B 6BB1BD1A | S[ 8] 81FC0514 S[ 9] C09E9564
R[ 5]: A  C6DD71D B C42053C6 | S[10] 1CA415AC S[11] D724C686
R[ 6]: A  6A58863 B EA4B4EFD | S[12] F3445171 S[13] D61C71CF
R[ 7]: A 2E72DB60 B 425AE900 | S[14] 50D5028D S[15] 7E215363
R[ 8]: A EF6DAFB0 B 1D3A5E97 | S[16] 83457D50 S[17] D689B160


	[./src/rc5-dc.cc:3058] Pair is good # 2
	[./src/rc5-dc.cc:3436] HRound#[ 5] | 0 0 0 0 1           key 2722AAED y 1C01DE0D yy 1C01DE0D x F4DF3320 xx F4DF3320 dx        0
	[./src/rc5-dc.cc:3436] HRound#[ 6] | 0 0 0 1             key  D8DD454 y 7BD787F3 yy 7BDF87F3 x 6E49B39F xx 6E51B39F dx   180000
	[./src/rc5-dc.cc:3436] HRound#[ 7] | 0 0 0 0 0 0 0 0 1   key 3FDAAA73 y  FCDE925 yy  FCDE965 x CFF33EB2 xx CFF33EF2 dx       40  <- X
	[./src/rc5-dc.cc:3436] HRound#[ 8] | 0 0 0 1             key 81FC0514 y  549DFE2 yy  449D7E2 x 834DDACE xx 824DD2CE dx  1000800
	[./src/rc5-dc.cc:3436] HRound#[ 9] | 0 0 0 0 1           key C09E9564 y EAAF7080 yy EEAF8F80 x 2A10DB1C xx 2E10FA1C dx  4002100
	[./src/rc5-dc.cc:3436] HRound#[10] | 0 0 0 0 0 0 0 1     key 1CA415AC y  C8AC50E yy  78A6E0E x EFE6AF62 xx EAE65862 dx  500F700  <- X
	[./src/rc5-dc.cc:3436] HRound#[11] | 0 0 0 0 1           key D724C686 y 4488800F yy 4F8880CF x 6D63B989 xx 7863BA49 dx 150003C0
	[./src/rc5-dc.cc:3436] HRound#[12] | 0 0 0 1             key F3445171 y 15C4F572 yy 6AA4F572 x 2280A401 xx 7760A401 dx 55E00000
	[./src/rc5-dc.cc:3436] HRound#[13] | 0 0 0 0 0 0 1       key D61C71CF y AC11B700 yy AD110680 x D5F54531 xx D6F494B1 dx  301D180  <- X
	[./src/rc5-dc.cc:3436] HRound#[14] | 0 0 0 0 0 1         key 50D5028D y  AAA44FF yy 188AF67F x B9D54272 xx C7B5F3F2 dx 7E60B180  <- X
	[./src/rc5-dc.cc:3436] HRound#[15] | 0 0 0 0 0 0 0 0 0 1 key 7E215363 y 517F4D62 yy 58EF4BE2 x D35DF9FF xx DACDF87F dx  9900180  <- X
	[./src/rc5-dc.cc:3436] HRound#[16] | 0 0 0 0 0 1         key 83457D50 y F299A3C5 yy 84DC73C5 x 6F542675 xx  196F675 dx 6EC2D000  <- X
	[./src/rc5-dc.cc:3436] HRound#[17] | 0 0 0 0 0 1         key D689B160 y 53678654 yy 5CF0B65B x 7CDDD4F4 xx 866704FB dx FABAD00F  <- X

X[ 0] 6C5D85E0 EC5D85E0 80000000 ( 1)
X[ 1] 9B21D2CE 1B21D2CE 80000000 ( 1)
X[ 2] 6476DE6C E476DE6C 80000000 12 ( 1)
X[ 3] 1C83EE27 1C83EE27        0  7 ( 0)
X[ 4] 74259F60 74259FA0       C0  0 ( 2)
X[ 5] 80FAAC40 80FAAC80       C0  0 ( 2)
X[ 6] 1C01DE0D 1C01DE0D        0 13 ( 0)
X[ 7] 7BD787F3 7BDF87F3    80000 19 ( 1) F  2
X[ 8]  FCDE925  FCDE965       40  5 ( 1) F  2
X[ 9]  549DFE2  449D7E2  1000800  2 ( 2) F  2
X[10] EAAF7080 EEAF8F80  400FF00  0 ( 9) F  2 .
X[11]  C8AC50E  78A6E0E  B00AB00 14 ( 8) F  2 .
X[12] 4488800F 4F8880CF  B0000C0 15 ( 5) F  2 .
X[13] 15C4F572 6AA4F572 7F600000 18 ( 9) F  2 .
X[14] AC11B700 AD110680  100B180  0 ( 6) F  2 .
X[15]  AAA44FF 188AF67F 1220B280 31 ( 8) F  2 .
X[16] 517F4D62 58EF4BE2  9900680  2 ( 7) F  2 .
X[17] F299A3C5 84DC73C5 7645D000  5 (11) F  2 .
X[18] 53678654 5CF0B65B  F97300F  0 (15) F  2 .
[./src/rc5-dc.cc:3360] [ 3]        0 -> (([ 1] 80000000 ^ [ 2] 80000000) <<< [ 2] 12) 1.000000 2^0.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3360] [ 4]       C0 -> (([ 2] 80000000 ^ [ 3]        0) <<< [ 3]  7) 0.250000 2^-2.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3360] [ 5]       C0 -> (([ 3]        0 ^ [ 4]       C0) <<< [ 4]  0) 0.250000 2^-2.00 | p_th 2^-inf hw_thres  0 #set 0
[./src/rc5-dc.cc:3360] [ 6]        0 -> (([ 4]       C0 ^ [ 5]       C0) <<< [ 5]  0) 1.000000 2^0.00 | p_th 2^-inf hw_thres  2 #set 1
[./src/rc5-dc.cc:3360] [ 7]    80000 -> (([ 5]       C0 ^ [ 6]        0) <<< [ 6] 13) 0.250000 2^-2.00 | p_th 2^-inf hw_thres  2 #set 2
[./src/rc5-dc.cc:3360] [ 8]       40 -> (([ 6]        0 ^ [ 7]    80000) <<< [ 7] 19) 0.500000 2^-1.00 | p_th 2^-inf hw_thres  0 #set 0
[./src/rc5-dc.cc:3360] [ 9]  1000800 -> (([ 7]    80000 ^ [ 8]       40) <<< [ 8]  5) 0.250000 2^-2.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3360] [10]  400FF00 -> (([ 8]       40 ^ [ 9]  1000800) <<< [ 9]  2) 0.001953 2^-9.00 | p_th 2^-inf hw_thres  1 #set 0
[./src/rc5-dc.cc:3360] [11]  B00AB00 -> (([ 9]  1000800 ^ [10]  400FF00) <<< [10]  0) 0.000244 2^-12.00 | p_th 2^-inf hw_thres  2 #set 0
[./src/rc5-dc.cc:3360] [12]  B0000C0 -> (([10]  400FF00 ^ [11]  B00AB00) <<< [11] 14) 0.001953 2^-9.00 | p_th 2^-inf hw_thres  9 #set 247
[./src/rc5-dc.cc:3360] [13] 7F600000 -> (([11]  B00AB00 ^ [12]  B0000C0) <<< [12] 15) 0.000977 2^-10.00 | p_th 2^-inf hw_thres  8 #set 524
[./src/rc5-dc.cc:3360] [14]  100B180 -> (([12]  B0000C0 ^ [13] 7F600000) <<< [13] 18) 0.001953 2^-9.00 | p_th 2^-inf hw_thres  5 #set 0
[./src/rc5-dc.cc:3360] [15] 1220B280 -> (([13] 7F600000 ^ [14]  100B180) <<< [14]  0) 0.000061 2^-14.00 | p_th 2^-inf hw_thres  9 #set 2453
[./src/rc5-dc.cc:3360] [16]  9900680 -> (([14]  100B180 ^ [15] 1220B280) <<< [15] 31) 0.003906 2^-8.00 | p_th 2^-inf hw_thres  6 #set 0
[./src/rc5-dc.cc:3393] [17] 7645D000 = (F299A3C5 ^ 84DC73C5) -> (([15] 1220B280 ^ [16]  9900680) <<< [16]  2) 0.000488 2^-11.00 | p_th 2^-inf hw_thres  8 #set 264
[./src/rc5-dc.cc:3393] [18]  F97300F = (53678654 ^ 5CF0B65B) -> (([16]  9900680 ^ [17] 7645D000) <<< [17]  5) 0.000008 2^-17.00 | p_th 2^-inf hw_thres  7 #set 0
[./src/rc5-dc.cc:2877]  prob_arr = 0.00 0.00 0.00 0.00 -2.00 -2.00 0.00 -2.00 -1.00 -2.00 -9.00 -12.00 -9.00 -10.00 -9.00 -14.00 -8.00 -11.00 -17.00
[./src/rc5-dc.cc:2886]    hw_arr =  1  1  1  0  2  2  0  1  1  2  9  8  5  9  6  8  7 11 15
[./src/rc5-dc.cc:2893] set sizes =     0     0     0     1     1     0     1     2     0     1     0     0   247   524     0  2453     0   264     0
[./src/rc5-dc.cc:3108] Good filtered f1: 2
[./src/rc5-dc.cc:2434] p_thres 2^-13.00 [    1] dx 328818B7 set size          1
[
 */



/* --- */


/* 
X[ 0] 5DCE3F21 DDCE3F21 80000000 ( 1)
X[ 1] A0E344AF 20E344AF 80000000 ( 1)
X[ 2] 6A38504D EA38504D 80000000 13 ( 1)
X[ 3] 69BB158B 69BB158B        0 11 ( 0)
X[ 4] 13BBA9C0 13BBADC0      400  0 ( 1)
X[ 5] 9254F744 9254F344      400  4 ( 1)
X[ 6] 46189335 46189335        0 21 ( 0)
X[ 7] 9BC85DE0 1BC85DE0 80000000  0 ( 1) F  2
X[ 8] 1DAB7948 9DAB7948 80000000  8 ( 1) F  2
X[ 9] E520AD9A E520AD9A        0 26 ( 0) F  2
X[10]  C80C4B7  A80C4B7  6000000 23 ( 2) F  2
X[11] B398E5E0 B39BE5E0    30000  0 ( 2) F  2
X[12] 963CE7DD 903FE7DD  6030000 29 ( 4) F  2 .
X[13] 97F8D1B8 97B8D1B8   400000 24 ( 1) F  2
X[14] 3B1E3605 3B23F905   3DCF00  5 (11) F  2 .
X[15] EDB1FA42 E43A1A42  98BE000  2 ( 9) F  2 .
X[16] D8E08482 FA88E082 22686400  2 ( 8) F  2 .
X[17] 588B7850 FE116850 A69A1000 16 ( 9) F  2 .
X[18] D35C31CB 5F5BB5F9 8C078432  0 (11) F  2 .
[./src/rc5-dc.cc:3360] [ 3]        0 -> (([ 1] 80000000 ^ [ 2] 80000000) <<< [ 2] 13) 1.000000 2^0.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3360] [ 4]      400 -> (([ 2] 80000000 ^ [ 3]        0) <<< [ 3] 11) 0.500000 2^-1.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3360] [ 5]      400 -> (([ 3]        0 ^ [ 4]      400) <<< [ 4]  0) 0.500000 2^-1.00 | p_th 2^-inf hw_thres  0 #set 0
[./src/rc5-dc.cc:3360] [ 6]        0 -> (([ 4]      400 ^ [ 5]      400) <<< [ 5]  4) 1.000000 2^0.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3360] [ 7] 80000000 -> (([ 5]      400 ^ [ 6]        0) <<< [ 6] 21) 1.000000 2^0.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3360] [ 8] 80000000 -> (([ 6]        0 ^ [ 7] 80000000) <<< [ 7]  0) 1.000000 2^0.00 | p_th 2^-inf hw_thres  0 #set 0
[./src/rc5-dc.cc:3360] [ 9]        0 -> (([ 7] 80000000 ^ [ 8] 80000000) <<< [ 8]  8) 1.000000 2^0.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3360] [10]  6000000 -> (([ 8] 80000000 ^ [ 9]        0) <<< [ 9] 26) 0.250000 2^-2.00 | p_th 2^-inf hw_thres  1 #set 1
[./src/rc5-dc.cc:3360] [11]    30000 -> (([ 9]        0 ^ [10]  6000000) <<< [10] 23) 0.250000 2^-2.00 | p_th 2^-inf hw_thres  0 #set 0
[./src/rc5-dc.cc:3360] [12]  6030000 -> (([10]  6000000 ^ [11]    30000) <<< [11]  0) 0.062500 2^-4.00 | p_th 2^-inf hw_thres  2 #set 5
[./src/rc5-dc.cc:3360] [13]   400000 -> (([11]    30000 ^ [12]  6030000) <<< [12] 29) 0.250000 2^-2.00 | p_th 2^-inf hw_thres  2 #set 0
[./src/rc5-dc.cc:3360] [14]   3DCF00 -> (([12]  6030000 ^ [13]   400000) <<< [13] 24) 0.000244 2^-12.00 | p_th 2^-inf hw_thres  4 #set 127
[./src/rc5-dc.cc:3360] [15]  98BE000 -> (([13]   400000 ^ [14]   3DCF00) <<< [14]  5) 0.000122 2^-13.00 | p_th 2^-inf hw_thres  1 #set 0
[./src/rc5-dc.cc:3360] [16] 22686400 -> (([14]   3DCF00 ^ [15]  98BE000) <<< [15]  2) 0.000061 2^-14.00 | p_th 2^-inf hw_thres 11 #set 11638
[./src/rc5-dc.cc:3393] [17] A69A1000 = (588B7850 ^ FE116850) -> (([15]  98BE000 ^ [16] 22686400) <<< [16]  2) 0.000977 2^-10.00 | p_th 2^-inf hw_thres  9 #set 579
[./src/rc5-dc.cc:3393] [18] 8C078432 = (D35C31CB ^ 5F5BB5F9) -> (([16] 22686400 ^ [17] A69A1000) <<< [17] 16) 0.000488 2^-11.00 | p_th 2^-inf hw_thres  8 #set 0
[./src/rc5-dc.cc:2877]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 0.00 0.00 0.00 -2.00 -2.00 -4.00 -2.00 -12.00 -13.00 -14.00 -10.00 -11.00
[./src/rc5-dc.cc:2886]    hw_arr =  1  1  1  0  1  1  0  1  1  0  2  2  4  1 11  9  8  9 11
[./src/rc5-dc.cc:2893] set sizes =     0     0     0     1     1     0     1     1     0     1     1     0     5     0   127     0 11638   579     0
[./src/rc5-dc.cc:3108] Good filtered f1: 1

 */


/* --- */

/* 
X[ 0] B817C4F3 3817C4F3 80000000 ( 1)
X[ 1]  294DAE5 8294DAE5 80000000 ( 1)
X[ 2] CBE9E683 4BE9E683 80000000  3 ( 1)
X[ 3] 53124D50 53124D50        0 16 ( 0)
X[ 4] A561129F A560929F    18000 31 ( 2)
X[ 5] 138DEAE0 138DAAE0     4000  0 ( 1)
X[ 6] DE0FA36C DE0FE36C     4000 12 ( 1)
X[ 7] 3226A12C 3226A12C        0 12 ( 0) F  2
X[ 8] CFFEB935 D3FEB935 1C000000 21 ( 3) F  2 .
X[ 9] 853BC017 85384017    38000 23 ( 3) F  2 .
X[10] 51C3F820 51C9F8E0    A00C0  0 ( 4) F  2 .
X[11] F19C4DE3 F195CEA3    98340  3 ( 6) F  2 .
X[12] DA2274A3 DA0678A3   240C00  3 ( 4) F  2 .
X[13] 51361B72 4FE20172 1ED41A00 18 (11) F  2 .
X[14] 95629E20 BD62C960 28005740  0 ( 8) F  2 .
X[15] 152987DF 4355CA9F 567C4D40 31 (14) F  2 .
X[16] 3E46E062 7D3CD562 437A3500  2 (12) F  2 .
X[17] 31031C44 7CE9FD44 4DEAE100  4 (13) F  2 .
X[18] CAE973C0 F3DC33C0 39354000  0 ( 9) F  2 .
[./src/rc5-dc.cc:3360] [ 3]        0 -> (([ 1] 80000000 ^ [ 2] 80000000) <<< [ 2]  3) 1.000000 2^0.00 | p_th 2^-inf #set 1
[./src/rc5-dc.cc:3360] [ 4]    18000 -> (([ 2] 80000000 ^ [ 3]        0) <<< [ 3] 16) 0.250000 2^-2.00 | p_th 2^-inf #set 1
[./src/rc5-dc.cc:3360] [ 5]     4000 -> (([ 3]        0 ^ [ 4]    18000) <<< [ 4] 31) 0.250000 2^-2.00 | p_th 2^-inf #set 0
[./src/rc5-dc.cc:3360] [ 6]     4000 -> (([ 4]    18000 ^ [ 5]     4000) <<< [ 5]  0) 0.125000 2^-3.00 | p_th 2^-inf #set 3
[./src/rc5-dc.cc:3360] [ 7]        0 -> (([ 5]     4000 ^ [ 6]     4000) <<< [ 6] 12) 1.000000 2^0.00 | p_th 2^-inf #set 1
[./src/rc5-dc.cc:3360] [ 8] 1C000000 -> (([ 6]     4000 ^ [ 7]        0) <<< [ 7] 12) 0.125000 2^-3.00 | p_th 2^-inf #set 1
[./src/rc5-dc.cc:3360] [ 9]    38000 -> (([ 7]        0 ^ [ 8] 1C000000) <<< [ 8] 21) 0.125000 2^-3.00 | p_th 2^-inf #set 0
[./src/rc5-dc.cc:3360] [10]    A00C0 -> (([ 8] 1C000000 ^ [ 9]    38000) <<< [ 9] 23) 0.015625 2^-6.00 | p_th 2^-inf #set 11
[./src/rc5-dc.cc:3360] [11]    98340 -> (([ 9]    38000 ^ [10]    A00C0) <<< [10]  0) 0.007812 2^-7.00 | p_th 2^-inf #set 0
[./src/rc5-dc.cc:3360] [12]   240C00 -> (([10]    A00C0 ^ [11]    98340) <<< [11]  3) 0.007812 2^-7.00 | p_th 2^-inf #set 0
[./src/rc5-dc.cc:3360] [13] 1ED41A00 -> (([11]    98340 ^ [12]   240C00) <<< [12]  3) 0.000015 2^-16.00 | p_th 2^-inf #set 293
[./src/rc5-dc.cc:3360] [14] 28005740 -> (([12]   240C00 ^ [13] 1ED41A00) <<< [13] 18) 0.000122 2^-13.00 | p_th 2^-inf #set 0
[./src/rc5-dc.cc:3360] [15] 567C4D40 -> (([13] 1ED41A00 ^ [14] 28005740) <<< [14]  0) 0.000015 2^-16.00 | p_th 2^-inf #set 310430
[./src/rc5-dc.cc:3360] [16] 437A3500 -> (([14] 28005740 ^ [15] 567C4D40) <<< [15] 31) 0.000004 2^-18.00 | p_th 2^-inf #set 0
[./src/rc5-dc.cc:3393] [17] 4DEAE100 = (31031C44 ^ 7CE9FD44) -> (([15] 567C4D40 ^ [16] 437A3500) <<< [16]  2) 0.000977 2^-10.00 | p_th 2^-12.00 #set 338
[./src/rc5-dc.cc:3393] [18] 39354000 = (CAE973C0 ^ F3DC33C0) -> (([16] 437A3500 ^ [17] 4DEAE100) <<< [17]  4) 0.003906 2^-8.00 | p_th 2^-10.00 #set 0
[./src/rc5-dc.cc:2877]  prob_arr = 0.00 0.00 0.00 0.00 -2.00 -2.00 -3.00 0.00 -3.00 -3.00 -6.00 -7.00 -7.00 -16.00 -13.00 -16.00 -18.00 -10.00 -8.00
[./src/rc5-dc.cc:2886]    hw_arr =  1  1  1  0  2  1  1  0  3  3  4  6  4 11  8 14 12 13  9
[./src/rc5-dc.cc:2893] set sizes =     0     0     0     1     1     0     3     1     1     0    11     0     0   293     0 310430     0   338     0
[./src/rc5-dc.cc:3108] Good filtered f1: 2

 */


 /* 
X[ 0] A112E8F6 2112E8F6 80000000 ( 1)
X[ 1] CC4CA0B2 4C4CA0B2 80000000 ( 1)
X[ 2] 95A1AC50 15A1AC50 80000000 16 ( 1)
X[ 3] 6864CFEC 6864CFEC        0 12 ( 0)
X[ 4] 4FC94980 4FC94180      800  0 ( 1)
X[ 5] 4001C165 4001C965      800  5 ( 1)
X[ 6] 2033C78E 2033C78E        0 14 ( 0)
X[ 7] 8F48AC60 9148AC60 1E000000  0 ( 4) F  2 .
X[ 8] EF561661 F1561661 1E000000  1 ( 4) F  2 .
X[ 9] 42397916 42397916        0 22 ( 0) F  2
X[10] 9E89F13F 9E8B713F    28000 31 ( 2) F  2
X[11]  AFC59C0  AFD19C0    14000  0 ( 2) F  2
X[12] 6B9A6F85 6B9B2F85    14000  5 ( 2) F  2
X[13] 200B1A1D 200B1A1D        0 29 ( 0) F  2
X[14] DF8EA082 DF8E7882     D800  2 ( 4) F  2 .
X[15] 4EEBED0C 4EEA8D0C    16000 12 ( 3) F  2 .
X[16] D2FA3C79 CD7A3C79 1F800000 25 ( 6) F  2 .
X[17] 6E7DA0F2 6E4C9EB2   313E40 18 ( 9) F  2 .
X[18] 48B8A37E 61B83E3A 29009D44  0 (10) F  2 .
[./src/rc5-dc.cc:3360] [ 3]        0 -> (([ 1] 80000000 ^ [ 2] 80000000) <<< [ 2] 16) 1.000000 2^0.00 | p_th 2^-inf #set 1
[./src/rc5-dc.cc:3360] [ 4]      800 -> (([ 2] 80000000 ^ [ 3]        0) <<< [ 3] 12) 0.500000 2^-1.00 | p_th 2^-inf #set 1
[./src/rc5-dc.cc:3360] [ 5]      800 -> (([ 3]        0 ^ [ 4]      800) <<< [ 4]  0) 0.500000 2^-1.00 | p_th 2^-inf #set 0
[./src/rc5-dc.cc:3360] [ 6]        0 -> (([ 4]      800 ^ [ 5]      800) <<< [ 5]  5) 1.000000 2^0.00 | p_th 2^-inf #set 1
[./src/rc5-dc.cc:3360] [ 7] 1E000000 -> (([ 5]      800 ^ [ 6]        0) <<< [ 6] 14) 0.062500 2^-4.00 | p_th 2^-inf #set 1
[./src/rc5-dc.cc:3360] [ 8] 1E000000 -> (([ 6]        0 ^ [ 7] 1E000000) <<< [ 7]  0) 0.062500 2^-4.00 | p_th 2^-inf #set 0
[./src/rc5-dc.cc:3360] [ 9]        0 -> (([ 7] 1E000000 ^ [ 8] 1E000000) <<< [ 8]  1) 1.000000 2^0.00 | p_th 2^-inf #set 1
[./src/rc5-dc.cc:3360] [10]    28000 -> (([ 8] 1E000000 ^ [ 9]        0) <<< [ 9] 22) 0.062500 2^-4.00 | p_th 2^-inf #set 8
[./src/rc5-dc.cc:3360] [11]    14000 -> (([ 9]        0 ^ [10]    28000) <<< [10] 31) 0.250000 2^-2.00 | p_th 2^-inf #set 0
[./src/rc5-dc.cc:3360] [12]    14000 -> (([10]    28000 ^ [11]    14000) <<< [11]  0) 0.062500 2^-4.00 | p_th 2^-inf #set 6
[./src/rc5-dc.cc:3360] [13]        0 -> (([11]    14000 ^ [12]    14000) <<< [12]  5) 1.000000 2^0.00 | p_th 2^-inf #set 1
[./src/rc5-dc.cc:3360] [14]     D800 -> (([12]    14000 ^ [13]        0) <<< [13] 29) 0.031250 2^-5.00 | p_th 2^-inf #set 2
[./src/rc5-dc.cc:3360] [15]    16000 -> (([13]        0 ^ [14]     D800) <<< [14]  2) 0.062500 2^-4.00 | p_th 2^-inf #set 0
[./src/rc5-dc.cc:3360] [16] 1F800000 -> (([14]     D800 ^ [15]    16000) <<< [15] 12) 0.015625 2^-6.00 | p_th 2^-inf #set 54
[./src/rc5-dc.cc:3393] [17]   313E40 = (6E7DA0F2 ^ 6E4C9EB2) -> (([15]    16000 ^ [16] 1F800000) <<< [16] 25) 0.001953 2^-9.00 | p_th 2^-11.00 #set 0
[./src/rc5-dc.cc:3393] [18] 29009D44 = (48B8A37E ^ 61B83E3A) -> (([16] 1F800000 ^ [17]   313E40) <<< [17] 18) 0.000061 2^-14.00 | p_th 2^-16.00 #set 0
[./src/rc5-dc.cc:2877]  prob_arr = 0.00 0.00 0.00 0.00 -1.00 -1.00 0.00 -4.00 -4.00 0.00 -4.00 -2.00 -4.00 0.00 -5.00 -4.00 -6.00 -9.00 -14.00
[./src/rc5-dc.cc:2886]    hw_arr =  1  1  1  0  1  1  0  4  4  0  2  2  2  0  4  3  6  9 10
[./src/rc5-dc.cc:2893] set sizes =     0     0     0     1     1     0     1     1     0     1     8     0     6     1     2     0    54     0     0
[./src/rc5-dc.cc:3108] Good filtered f1: 1

 */

 /* 
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),//<<  6) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),// << 8) + 1)),
  ((double)1.0 / (double)((1U <<  13) + 1))//<< 13) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))

 */


/* --- */

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),//<<  6) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),// << 8) + 1)),
  ((double)1.0 / (double)((1U <<  13) + 1))//<< 13) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))



/* --- */

bool rc5_mid_round_add_approx_match(const uint32_t i, const WORD dx, 
												const WORD dy, const WORD dz, const uint32_t order_in)
{
  assert(order_in == RC5_ADD_APPROX_ORDER);
  uint32_t order = 0;//order_in;
  uint32_t i_lo = 0;
  if(i < order_in) {
	 i_lo = 0;
	 order = i + 1;
  } else { // i >= order_in
	 i_lo = (i - order_in + 1);
	 order = order_in;
  }
  uint32_t mask_order = (0xffffffff >> (32 - order)); 
  uint32_t N = (1U << order);
  //  uint32_t i_lo = (i + 1 - order);
  //  uint32_t N = (1U << order);
  //  uint32_t mask_order = (0xffffffff >> (32 - order)); 
  //  uint32_t mask_stride = mask_order << i_lo; 
  //  uint32_t mask_stride = (0xffffffff >> (32 - order)) << (i + 1 - order); 

#if 0 // DEBUG
  printf("[%s:%d] i %2d order %2d mask_stride %8X dz %8X\n", 
			__FILE__, __LINE__, i, order, mask_stride, dz);
#endif // #if 0 // DEBUG

  bool b_match = false;
  WORD y_seq = 0;
  WORD x_seq = 0;
  while((x_seq < N) && (!b_match)) {
	 //	 WORD x = (x_seq << i); // ...000***000...
#if 0 // original
	 WORD x = (x_seq << (i + 1 - order)) & mask_stride; // ...000***000...
	 WORD xx = (x ^ dx) & mask_stride;
#else
	 WORD x = (x_seq) & mask_order;
	 WORD xx = (x ^ (dx >> i_lo)) & mask_order;
#endif
	 while((y_seq < N) && (!b_match)) {
#if 0 // original
		//		WORD y = (y_seq << i); // ...000***000...
		WORD y = (y_seq << (i + 1 - order)) & mask_stride; // ...000***000...
		WORD yy = (y ^ dy) & mask_stride;
		//		WORD diff_stride = ((x - y) ^ (xx - yy)) & mask_stride;
		//		b_match = (((diff_stride >> i) & 1) == ((dz >> i) & 1)); // diff[i] ?= dz[i]
		WORD diff_stride = ((((x >> (i + 1 - order)) & mask_order) - ((y >> (i + 1 - order)) & mask_order)) ^ 
								  (((xx >> (i + 1 - order)) & mask_order) - ((yy >> (i + 1 - order)) & mask_order)));
		b_match = ((diff_stride & mask_order) == ((dz >> (i + 1 - order)) & mask_order));
#else
		WORD y = y_seq & mask_order;
		WORD yy = (y ^ (dy >> i_lo)) & mask_order;

		//		WORD z = (WORD)(x - y + N) & mask_order; // (x - y) mod 2^{order}
		//		WORD zz = (WORD)(xx - yy + N) & mask_order; 
		WORD z_mod = (WORD)(x - y + N) & mask_order; // (x - y) mod 2^{order}
		WORD zz_mod = (WORD)(xx - yy + N) & mask_order; 
		WORD z = (x - y) & mask_order; // (x - y) mod 2^{order}
		WORD zz = (xx - yy) & mask_order; 
		if(!(z == z_mod)) {
		  printf("[%s:%d] z %8X z_mod %8X\n", __FILE__, __LINE__, z, z_mod);
		}
		assert(z == z_mod);
		if(!(zz == zz_mod)) {
		  printf("[%s:%d] zz %8X zz_mod %8X\n", __FILE__, __LINE__, zz, zz_mod);
		}
		assert(zz == zz_mod);
		WORD diff = (z ^ zz) & mask_order;
		WORD dz_lo = (dz >> i_lo) & mask_order;
		b_match = (diff == dz_lo);
#endif
		y_seq++;
	 }
	 x_seq++;
  }
  return b_match;
}

bool rc5_last_round_add_approx_match(const uint32_t i, const WORD x_in, const WORD xx_in, 
												 const WORD dy, const WORD dz, const uint32_t order_in)
{
  assert(order_in == RC5_ADD_APPROX_ORDER);
  uint32_t order = 0;//order_in;
  uint32_t i_lo = 0;
  if(i < order_in) {
	 i_lo = 0;
	 order = i + 1;
  } else { // i >= order_in
	 i_lo = (i - order_in + 1);
	 order = order_in;
  }
  uint32_t mask_order = (0xffffffff >> (32 - order)); 
  uint32_t N = (1U << order);
  //  uint32_t i_lo = (i + 1 - order);
  //  uint32_t mask_stride = mask_order << i_lo; 
  //  uint32_t mask_stride = (0xffffffff >> (32 - order)) << (i + 1 - order); 

#if 0 // DEBUG
  printf("[%s:%d] i %2d order %2d mask_stride %8X dz %8X\n", 
			__FILE__, __LINE__, i, order, mask_stride, dz);
#endif // #if 0 // DEBUG

  bool b_match = false;
  WORD seq = 0;

  while((seq < N) && (!b_match)) {
	 //	 WORD y = (seq << i) & mask_stride; // ...000***000...
#if 0 // original
	 WORD y = (seq << (i + 1 - order)) & mask_stride; // ...000***000...
	 WORD yy = (y ^ dy) & mask_stride;
	 WORD x = x_in & mask_stride;
	 WORD xx = xx_in  & mask_stride;
	 WORD diff_stride = ((((x >> (i + 1 - order)) & mask_order) - ((y >> (i + 1 - order)) & mask_order)) ^ 
								(((xx >> (i + 1 - order)) & mask_order) - ((yy >> (i + 1 - order)) & mask_order)));
	 b_match = ((diff_stride & mask_order) == ((dz >> (i + 1 - order)) & mask_order));
#else // new
	 WORD y = seq & mask_order;
	 WORD yy = (y ^ (dy >> i_lo)) & mask_order;
	 WORD x = (x_in >> i_lo) & mask_order;
	 WORD xx = (xx_in >> i_lo) & mask_order;
	 // ((uint32_t)(x - y + MOD) & MASK)
	 WORD z_mod = (WORD)(x - y + N) & mask_order; // (x - y) mod 2^{order}
	 WORD zz_mod = (WORD)(xx - yy + N) & mask_order; 
	 WORD z = (x - y) & mask_order; // (x - y) mod 2^{order}
	 WORD zz = (xx - yy) & mask_order; 
	 if(!(z == z_mod)) {
		printf("[%s:%d] z %8X z_mod %8X\n", __FILE__, __LINE__, z, z_mod);
	 }
	 assert(z == z_mod);
	 if(!(zz == zz_mod)) {
		printf("[%s:%d] zz %8X zz_mod %8X\n", __FILE__, __LINE__, zz, zz_mod);
	 }
	 assert(zz == zz_mod);
	 WORD diff = (z ^ zz) & mask_order;
	 WORD dz_lo = (dz >> i_lo) & mask_order;
	 b_match = (diff == dz_lo);
#endif
	 //	 b_match = ((diff_stride & mask_order) == (dz & mask_order));
	 //	 WORD diff_stride = ((x - y) ^ (xx - yy)) & mask_stride;
	 //	 b_match = (((diff_stride >> i) & 1) == ((dz >> i) & 1)); // diff[i] ?= dz[i]

#if 0 // DEBUG
	 if(b_match) {
		printf("[%s:%d] i %d mask_stride %8X diff_stride %8X seq %8X y %8X yy %8X x %8X xx %8X dz %8X\n", 
				 __FILE__, __LINE__, i, mask_stride, diff_stride, seq, y, yy, x, xx, dz);
	 }
#endif // #if 0 // DEBUG
	 seq++;
  }
  return b_match;
}


/* --- */

void test_add_sub_approx()
{
  assert(WORD_SIZE <= 16);
  assert(WORD_SIZE >= 6);
  // add
  uint32_t add_nequal_o1 = 0;
  uint32_t add_nequal_o2 = 0;
  uint32_t add_nequal_o3 = 0;
  uint32_t add_nequal_o4 = 0;
  uint32_t add_nequal_o5 = 0;
  uint32_t add_nequal_o6 = 0;
  // sub
  uint32_t sub_nequal_o1 = 0;
  uint32_t sub_nequal_o2 = 0;
  uint32_t sub_nequal_o3 = 0;
  uint32_t sub_nequal_o4 = 0;
  uint32_t sub_nequal_o5 = 0;
  uint32_t sub_nequal_o6 = 0;
  for(WORD x = 0; x < ALL_WORDS; x++) {
	 for(WORD y = 0; y < ALL_WORDS; y++) {
		if(1) { // ADD
		  WORD z = ADD(x, y);
		  WORD z_o1 = add_approx_o1(x, y);
		  if(z == z_o1) {
			 add_nequal_o1++;
		  }
		  WORD z_o2 = add_approx_o2(x, y);
		  if(z == z_o2) {
			 add_nequal_o2++;
		  }
		  WORD z_o3 = add_approx_o3(x, y);
		  if(z == z_o3) {
			 add_nequal_o3++;
		  }
		  WORD z_o4 = add_approx_o4(x, y);
		  if(z == z_o4) {
			 add_nequal_o4++;
		  }
		  WORD z_o5 = add_approx_o5(x, y);
		  if(z == z_o5) {
			 add_nequal_o5++;
		  }
		  WORD z_o6 = add_approx_o6(x, y);
		  if(z == z_o6) {
			 add_nequal_o6++;
		  }
		}
		if(1) { // SUB
		  WORD z = SUB(x, y);
		  WORD z_o1 = sub_approx_o1(x, y);
		  if(z == z_o1) {
			 sub_nequal_o1++;
		  }
		  WORD z_o2 = sub_approx_o2(x, y);
		  if(z == z_o2) {
			 sub_nequal_o2++;
		  }
		  WORD z_o3 = sub_approx_o3(x, y);
		  if(z == z_o3) {
			 sub_nequal_o3++;
		  }
		  WORD z_o4 = sub_approx_o4(x, y);
		  if(z == z_o4) {
			 sub_nequal_o4++;
		  }
		  WORD z_o5 = sub_approx_o5(x, y);
		  if(z == z_o5) {
			 sub_nequal_o5++;
		  }
		  WORD z_o6 = sub_approx_o6(x, y);
		  if(z == z_o6) {
			 sub_nequal_o6++;
		  }
		}
	 }
  }
  uint64_t nall = (ALL_WORDS * ALL_WORDS);

  double add_ratio_o1 = ((double)add_nequal_o1 / (double)nall);
  double sub_ratio_o1 = ((double)sub_nequal_o1 / (double)nall);
  printf("[%s:%d] #equal o1: [%5d %5d / %5lld] %10.9f %10.9f %4.2f %% %4.2f %%\n", 
			__FILE__, __LINE__,  add_nequal_o1, sub_nequal_o1, (long long int)nall, 
			add_ratio_o1, sub_ratio_o1, (add_ratio_o1 * 100.00), (sub_ratio_o1 * 100.00));
  assert(add_nequal_o1 == sub_nequal_o1);

  double add_ratio_o2 = ((double)add_nequal_o2 / (double)nall);
  double sub_ratio_o2 = ((double)sub_nequal_o2 / (double)nall);
  printf("[%s:%d] #equal o2: [%5d %5d / %5lld] %10.9f %10.9f %4.2f %% %4.2f %%\n", 
			__FILE__, __LINE__,  add_nequal_o2, sub_nequal_o2, (long long int)nall, 
			add_ratio_o2, sub_ratio_o2, (add_ratio_o2 * 100.00), (sub_ratio_o2 * 100.00));
  assert(add_nequal_o2 == sub_nequal_o2);

  double add_ratio_o3 = ((double)add_nequal_o3 / (double)nall);
  double sub_ratio_o3 = ((double)sub_nequal_o3 / (double)nall);
  printf("[%s:%d] #equal o3: [%5d %5d / %5lld] %10.9f %10.9f %4.2f %% %4.2f %%\n", 
			__FILE__, __LINE__,  add_nequal_o3, sub_nequal_o3, (long long int)nall, 
			add_ratio_o3, sub_ratio_o3, (add_ratio_o3 * 100.00), (sub_ratio_o3 * 100.00));
  assert(add_nequal_o3 == sub_nequal_o3);

  double add_ratio_o4 = ((double)add_nequal_o4 / (double)nall);
  double sub_ratio_o4 = ((double)sub_nequal_o4 / (double)nall);
  printf("[%s:%d] #equal o4: [%5d %5d / %5lld] %10.9f %10.9f %4.2f %% %4.2f %%\n", 
			__FILE__, __LINE__,  add_nequal_o4, sub_nequal_o4, (long long int)nall, 
			add_ratio_o4, sub_ratio_o4, (add_ratio_o4 * 100.00), (sub_ratio_o4 * 100.00));
  assert(add_nequal_o4 == sub_nequal_o4);

  double add_ratio_o5 = ((double)add_nequal_o5 / (double)nall);
  double sub_ratio_o5 = ((double)sub_nequal_o5 / (double)nall);
  printf("[%s:%d] #equal o5: [%5d %5d / %5lld] %10.9f %10.9f %4.2f %% %4.2f %%\n", 
			__FILE__, __LINE__,  add_nequal_o5, sub_nequal_o5, (long long int)nall, 
			add_ratio_o5, sub_ratio_o5, (add_ratio_o5 * 100.00), (sub_ratio_o5 * 100.00));
  assert(add_nequal_o5 == sub_nequal_o5);

  double add_ratio_o6 = ((double)add_nequal_o6 / (double)nall);
  double sub_ratio_o6 = ((double)sub_nequal_o6 / (double)nall);
  printf("[%s:%d] #equal o6: [%5d %5d / %5lld] %10.9f %10.9f %4.2f %% %4.2f %%\n", 
			__FILE__, __LINE__,  add_nequal_o6, sub_nequal_o6, (long long int)nall, 
			add_ratio_o6, sub_ratio_o6, (add_ratio_o6 * 100.00), (sub_ratio_o6 * 100.00));
  assert(add_nequal_o6 == sub_nequal_o6);
}

/* --- */
#if 0
void rc5_last_but_one_round_add_approx(const WORD S[RC5_STAB_LEN_T], std::vector<uint32_t> X_first, std::vector<uint32_t> X_second)
{
  //  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);
  assert(X_first.size() == ((2*NROUNDS) + 3));
  assert(X_first.size() == X_second.size());
  uint32_t nequal_x_o1 = 0;
  uint32_t nequal_x_o2 = 0;
  uint32_t nequal_x_o3 = 0;
  uint32_t nequal_x_o4 = 0;
  uint32_t nequal_x_o5 = 0;
  uint32_t nequal_x_o6 = 0;
  uint32_t nequal_xx_o1 = 0;
  uint32_t nequal_xx_o2 = 0;
  uint32_t nequal_xx_o3 = 0;
  uint32_t nequal_xx_o4 = 0;
  uint32_t nequal_xx_o5 = 0;
  uint32_t nequal_xx_o6 = 0;
  uint32_t rot_const = X_first[((2*NROUNDS) + 0)] & RC5_ROT_MASK;
  uint32_t rrot_const = X_second[((2*NROUNDS) + 0)] & RC5_ROT_MASK;
  WORD x = RC5_ROTL((X_first[(2*NROUNDS) - 1] ^ X_first[((2*NROUNDS) + 0)]), rot_const);
  WORD xx = RC5_ROTL((X_second[(2*NROUNDS) - 1] ^ X_second[((2*NROUNDS) + 0)]), rrot_const);
  WORD y = X_first[((2*NROUNDS) + 1)];
  WORD yy = X_second[((2*NROUNDS) + 1)];
  WORD key = S[((2*NROUNDS) + 0)];

  // y - key = x
#if 1 // DEBUG
  WORD x_tmp = SUB(y, key);
  WORD xx_tmp = SUB(yy, key);
  //  printf("[%s:%d] %8X %8X %8X %8X\n", __FILE__, __LINE__, x, x_tmp, xx, xx_tmp);
  assert(x_tmp == x);
  assert(xx_tmp == xx);
#endif // #if 1 // DEBUG

  WORD x_o1 = sub_approx_o1(y, key);
  if(x == x_o1) {
	 nequal_x_o1++;
  }
  WORD x_o2 = sub_approx_o2(y, key);
  if(x == x_o2) {
	 nequal_x_o2++;
  }
  WORD x_o3 = sub_approx_o3(y, key);
  if(x == x_o3) {
	 nequal_x_o3++;
  }
  WORD x_o4 = sub_approx_o4(y, key);
  if(x == x_o4) {
	 nequal_x_o4++;
  }
  WORD x_o5 = sub_approx_o5(y, key);
  if(x == x_o5) {
	 nequal_x_o5++;
  }
  WORD x_o6 = sub_approx_o6(y, key);
  if(x == x_o6) {
	 nequal_x_o6++;
  }
  printf("[%s:%d] HRound#%2d  nequal_x %3d %3d %3d %3d %3d %3d\n", 
			__FILE__, __LINE__, (2*NROUNDS) - 1, nequal_x_o1, nequal_x_o2, nequal_x_o3, nequal_x_o4, nequal_x_o5, nequal_x_o6);

  WORD xx_o1 = sub_approx_o1(yy, key);
  if(xx == xx_o1) {
	 nequal_xx_o1++;
  }
  WORD xx_o2 = sub_approx_o2(yy, key);
  if(xx == xx_o2) {
	 nequal_xx_o2++;
  }
  WORD xx_o3 = sub_approx_o3(yy, key);
  if(xx == xx_o3) {
	 nequal_xx_o3++;
  }
  WORD xx_o4 = sub_approx_o4(yy, key);
  if(xx == xx_o4) {
	 nequal_xx_o4++;
  }
  WORD xx_o5 = sub_approx_o5(yy, key);
  if(xx == xx_o5) {
	 nequal_xx_o5++;
  }
  WORD xx_o6 = sub_approx_o6(yy, key);
  if(xx == xx_o6) {
	 nequal_xx_o6++;
  }
  printf("[%s:%d] HRound#%2d nequal_xx %3d %3d %3d %3d %3d %3d\n", 
			__FILE__, __LINE__, (2*NROUNDS) - 1, nequal_xx_o1, nequal_xx_o2, nequal_xx_o3, nequal_xx_o4, nequal_xx_o5, nequal_xx_o6);
}
void rc5_last_round_add_approx(const WORD S[RC5_STAB_LEN_T], std::vector<uint32_t> X_first, std::vector<uint32_t> X_second)
{
  //  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);
  assert(X_first.size() == ((2*NROUNDS) + 3));
  assert(X_first.size() == X_second.size());
  uint32_t nequal_x_o1 = 0;
  uint32_t nequal_x_o2 = 0;
  uint32_t nequal_x_o3 = 0;
  uint32_t nequal_x_o4 = 0;
  uint32_t nequal_x_o5 = 0;
  uint32_t nequal_x_o6 = 0;
  uint32_t nequal_xx_o1 = 0;
  uint32_t nequal_xx_o2 = 0;
  uint32_t nequal_xx_o3 = 0;
  uint32_t nequal_xx_o4 = 0;
  uint32_t nequal_xx_o5 = 0;
  uint32_t nequal_xx_o6 = 0;
  uint32_t rot_const = X_first[((2*NROUNDS) + 1)] & RC5_ROT_MASK;
  uint32_t rrot_const = X_second[((2*NROUNDS) + 1)] & RC5_ROT_MASK;
  WORD x = RC5_ROTL((X_first[(2*NROUNDS)] ^ X_first[((2*NROUNDS) + 1)]), rot_const);
  WORD xx = RC5_ROTL((X_second[(2*NROUNDS)] ^ X_second[((2*NROUNDS) + 1)]), rrot_const);
  WORD y = X_first[((2*NROUNDS) + 2)];
  WORD yy = X_second[((2*NROUNDS) + 2)];
  WORD key = S[((2*NROUNDS) + 1)];

  // y - key = x
#if 1 // DEBUG
  WORD x_tmp = SUB(y, key);
  WORD xx_tmp = SUB(yy, key);
  //  printf("[%s:%d] %8X %8X %8X %8X\n", __FILE__, __LINE__, x, x_tmp, xx, xx_tmp);
  assert(x_tmp == x);
  assert(xx_tmp == xx);
#endif // #if 1 // DEBUG

  WORD x_o1 = sub_approx_o1(y, key);
  if(x == x_o1) {
	 nequal_x_o1++;
  }
  WORD x_o2 = sub_approx_o2(y, key);
  if(x == x_o2) {
	 nequal_x_o2++;
  }
  WORD x_o3 = sub_approx_o3(y, key);
  if(x == x_o3) {
	 nequal_x_o3++;
  }
  WORD x_o4 = sub_approx_o4(y, key);
  if(x == x_o4) {
	 nequal_x_o4++;
  }
  WORD x_o5 = sub_approx_o5(y, key);
  if(x == x_o5) {
	 nequal_x_o5++;
  }
  WORD x_o6 = sub_approx_o6(y, key);
  if(x == x_o6) {
	 nequal_x_o6++;
  }
  printf("[%s:%d] HRound#%2d  nequal_x %3d %3d %3d %3d %3d %3d\n", 
			__FILE__, __LINE__, (2*NROUNDS), nequal_x_o1, nequal_x_o2, nequal_x_o3, nequal_x_o4, nequal_x_o5, nequal_x_o6);

  WORD xx_o1 = sub_approx_o1(yy, key);
  if(xx == xx_o1) {
	 nequal_xx_o1++;
  }
  WORD xx_o2 = sub_approx_o2(yy, key);
  if(xx == xx_o2) {
	 nequal_xx_o2++;
  }
  WORD xx_o3 = sub_approx_o3(yy, key);
  if(xx == xx_o3) {
	 nequal_xx_o3++;
  }
  WORD xx_o4 = sub_approx_o4(yy, key);
  if(xx == xx_o4) {
	 nequal_xx_o4++;
  }
  WORD xx_o5 = sub_approx_o5(yy, key);
  if(xx == xx_o5) {
	 nequal_xx_o5++;
  }
  WORD xx_o6 = sub_approx_o6(yy, key);
  if(xx == xx_o6) {
	 nequal_xx_o6++;
  }
  printf("[%s:%d] HRound#%2d nequal_xx %3d %3d %3d %3d %3d %3d\n", 
			__FILE__, __LINE__, (2*NROUNDS), nequal_xx_o1, nequal_xx_o2, nequal_xx_o3, nequal_xx_o4, nequal_xx_o5, nequal_xx_o6);
}
#endif


/* --- */
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
// determined as average of MINs over 32 keys 
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  13) + 1)),
  ((double)1.0 / (double)((1U <<  13) + 1)),//<<  6) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),// << 8) + 1)),
  ((double)1.0 / (double)((1U <<  16) + 1))//<< 13) + 1))
  //  ((double)1.0 / (double)((1U <<  8) + 1)),// << 8) + 1)),
  //  ((double)1.0 / (double)((1U <<  9) + 1))//<< 13) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  1) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),//<<  6) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),// << 8) + 1)),
  ((double)1.0 / (double)((1U <<  16) + 1))//<< 13) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))

/*
RC5_P_THRES_ARRAY = [ 0] -1.58 [ 1] -1.58 [ 2] -1.58 [ 3] -2.32 [ 4] -2.32 [ 5] -3.17 [ 6] -4.09 [ 7] -4.09 [ 8] -5.04 [ 9] -7.01 [10] -14.00 [11] -16.00 
        FIB_ARRAY = [ 0]  4 [ 1]  4 [ 2]  5 [ 3]  6 [ 4]  7 [ 5]  8 [ 6]  9 [ 7] 10 [ 8] 12 [ 9] 13 [10] 14 [11] 16 
		  [./tests/rc5-tests.cc:514] #GoUP sets of trails: 259 (2^8.016808)
*/

/* --- */
#if 0 // ROT-const
				if(b_approx_match) {
				  uint32_t logn = (uint32_t)log2(WORD_SIZE);
				  assert((logn == 4) || (logn == 5));
				  const uint32_t w = WORD_SIZE;
				  const uint32_t r = rot_const_prev;
				  const uint32_t L = logn; // log2(n) 
				  if((((L + r - 1) < WORD_SIZE) && (i == (L + r - 1))) || 
					  (((L + r - 1) >= WORD_SIZE) && (i == (w - 1)))) {
					 //					 rc5_last_round_add_approx_rot_const_next(i, y, yy, dk, new_dx, order, rot_const_prev, &new_rot_const_next_bitmask);
					 rc5_last_round_add_approx_rot_const_next(i, y, yy, dk, new_dx, logn, rot_const_prev, &new_rot_const_next_bitmask);
				  }
				}
#endif // #if 0 // ROT-const

/* --- */

	 //	 // Guess S[6] over all possibilities 0..31
	 //	 std::vector<uint32_t>::const_iterator vec_iter = rot_const_next_vec.begin();
	 //	 while(vec_iter != rot_const_next_vec.end()) {
	 //		s = *vec_iter;
		//		printf("[%s:%d] s %2d\n", __FILE__, __LINE__, s);
	 //		vec_iter++;

/* --- */

	 printf("\n");
	 std::vector<uint32_t>::iterator rot_const_iter = rot_const_next_vec.begin();
	 while(rot_const_iter != rot_const_next_vec.end()) {
		uint32_t r = *rot_const_iter;
		printf("%2d ", r);
		rot_const_iter++;
	 }
	 printf("\n");


/* --- */
  //  const uint32_t rot_const_dummy = 0;
  //  uint32_t rot_const_next_dummy = 0;
  //  std::vector<uint32_t> rot_const_next_vec_dummy;

			 //			 b_approx_match = rc5_last_round_add_approx_match(i, y, yy, dk, dx, order, rot_const_dummy, &rot_const_next_dummy);

/* --- */
#if 0
  std::vector<uint32_t>::iterator dx_vec_iter = dx_vec->begin();
  std::vector<uint32_t>::iterator rot_vec_iter = rot_const_next_vec->begin();
  i = 0;
  while(rot_vec_iter != rot_const_next_vec->end()) {
	 uint32_t r = *rot_vec_iter;
	 uint32_t dx = *dx_vec_iter;
	 if(r) {
		printf("\r[%s:%d] %5d / %5d | dx %8X r %2d", __FILE__, __LINE__, i, rot_const_next_vec->size(), dx, r);
		fflush(stdout);
	 }
	 //	 assert(r == 0);
	 rot_vec_iter++;
	 dx_vec_iter++;
	 i++;
  }
#endif

/* --- */
  //  std::vector<uint32_t>::iterator rot_vec_iter = rot_const_next_vec.begin();
  //  while(rot_vec_iter != rot_const_next_vec.end()) {
  //	 uint32_t r = *rot_vec_iter;
	 //	 printf("[%s:%d] %d\n", __FILE__, __LINE__, r);
  //	 assert(r == 0);
  //	 rot_vec_iter++;
  //  }
  //  printf("[%s:%d] diffs %d rconst %d\n", __FILE__, __LINE__, dx_set_all.size(), rot_const_next_vec.size());
  //  assert(dx_set_all.size() == rot_const_next_vec.size());

/* --- */
			 //			 printf("[%s:%d] %8X %8X %8X (%8X | %8X) = %d \n", __FILE__, __LINE__, x_in, y_aug, (x_in - y_aug), (((x_in - y_aug) & mask_lo) << nbits_hi), (((x_in - y_aug) & mask_hi) >> (32 - nbits_hi)), r_next);

/* --- */

		  //		  printf("[%s:%d] %8X %8X %8X %8X | %8X\n", __FILE__, __LINE__, x_in, y_aug, (x_in - y_aug), (((x_in - y_aug) & mask_lo) << nbits_hi), (((x_in - y_aug) & mask_hi) >> (32 - nbits_hi)));


/* --- */

	 if((i == r) && (b_match)) {

		assert(y == 0);
		assert(seq == 0);

		if((i + L - 1) < w) { // case 1
		  r_next = (y >> i) & RC5_ROT_MASK;
#if 1 // DEBUG
		  uint32_t r_next_tmp = (y >> i) & RC5_ROT_MASK;
		  assert(r_next == r_next_tmp);
		  if(r_next != 0) {
			 printf("[%s:%d] rot_const %d r_next %2d %2d y %X\n", __FILE__, __LINE__, rot_const, r_next, r_next_tmp, y);
		  }
		  //		  printf("[%s:%d] i %d diff_stride %X y %X yy %X x %X xx %X\n", __FILE__, __LINE__, i, diff_stride, y, yy, x, xx);
		  assert(r_next == 0);
#endif // #if 1 // DEBUG
		}

		if((i + L - 1) >= w) { // case 2
		  uint32_t mask_hi = (0xffffffff << i) & MASK; // 111000...00
		  uint32_t mask_lo = (0xffffffff >> (32 - (i + L - 1 - w))); // 00...00011
		  r_next = (((y & mask_lo) << (w - i)) | ((y & mask_hi) >> i)) & RC5_ROT_MASK;
#if 1 // DEBUG
		  uint32_t r_next_tmp = (((yy & mask_lo) << (w - i)) | ((yy & mask_hi) >> i)) & RC5_ROT_MASK;
		  assert(r_next == r_next_tmp);
		  if(r_next != 0) {
			 printf("[%s:%d] rot_const %d r_next %2d %2d y %X\n", __FILE__, __LINE__, rot_const, r_next, r_next_tmp, y);
		  }
		  //		  printf("[%s:%d] i %d diff_stride %X y %X yy %X x %X xx %X\n", __FILE__, __LINE__, i, diff_stride, y, yy, x, xx);
		  assert(r_next == 0);
#endif // #if 1 // DEBUG
		}

	 }

  if(r_next != 0)
	 printf("[%s:%d] rot_const_next %2d\n", __FILE__, __LINE__, *rot_const_next);

/* --- */

/*
 * See \ref xdp_sub_fixed_x_approx_rec
 */
bool rc5_last_round_add_approx_match(const uint32_t i, const WORD x, const WORD xx, 
												 const WORD dy, const WORD dz, const uint32_t order)

{
  uint32_t N = (1U << order);
  uint32_t mask_stride = (0xffffffff >> (32 - order)) << (i + 1 - order); 
#if 0 // DEBUG
  printf("[%s:%d] i %2d order %2d mask_stride %8X dz %8X\n", 
			__FILE__, __LINE__, i, order, mask_stride, dz);
#endif // #if 0 // DEBUG
  bool b_match = false;
  WORD seq = 0;
  while((seq < N) && (!b_match)) {
	 WORD y = (seq << i); // ...000***000...
	 WORD yy = (y ^ dy) & mask_stride;
	 WORD diff_stride = ((x - y) ^ (xx - yy)) & mask_stride;
	 b_match = (((diff_stride >> i) & 1) == ((dz >> i) & 1)); // diff[i] ?= dz[i]
	 seq++;
  }
  return b_match;
}

/* --- */

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
// determined as average of MINs over 32 keys 
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),
  ((double)1.0 / (double)((1U <<  8) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  13) + 1)),
  ((double)1.0 / (double)((1U <<  12) + 1)),//<<  6) + 1)),
  ((double)1.0 / (double)((1U <<  11) + 1)),// << 8) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1))//<< 13) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))


/* --- */

/* 
RC5 Averages over 32 random keys

RC5_P_THRES_ARRAY = [ 0] -3.17 [ 1] -4.09 [ 2] -4.09 [ 3] -5.04 [ 4] -6.02 [ 5] -6.02 [ 6] -8.01 [ 7] -10.00 [ 8] -13.00 [ 9] -12.00 [10] -11.00 [11] -14.00
        FIB_ARRAY = [ 0]  4 [ 1]  5 [ 2]  5 [ 3]  7 [ 4]  8 [ 5]  9 [ 6] 10 [ 7] 11 [ 8] 13 [ 9] 14 [10] 15 [11] 16
		  [./tests/rc5-tests.cc:514] #GoUP sets of trails: 391 (2^8.611025)
		  [./tests/rc5-tests.cc:569] Test OK!
		  [./tests/rc5-tests.cc:1148] Average g_max hw_arr = 1.00 1.00 1.00 0.00 2.03 2.44 3.22 4.69 5.06 6.03 6.72 7.97 9.41 10.62 11.38 13.09 13.72 14.28 16.03
		  [./tests/rc5-tests.cc:1155] Average g_min prob_arr = 0.00 0.00 0.00 0.00 -1.81 -2.14 -2.25 -3.18 -3.62 -4.87 -5.27 -7.24 -7.51 -10.17 -10.11 -9.90 -15.37 -12.15 -12.79
		  [./tests/rc5-tests.cc:1168] Add approx orders
HR[ 0] =   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[ 1] =   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[ 2] =   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[ 3] =   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[ 4] =   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[ 5] =   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[ 6] =   .   .   .  17  45  57  42  24  15   6   .   2   2   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[ 7] =   .   .   .  13  40  67  45  24   9   4   2   5   .   .   .   .   .   .   .   1   .   .   .   .   .   .   .   .   .   .   .   .
HR[ 8] =   .   .   4  19  58  57  30  19  13   8   1   1   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[ 9] =   .   .   .  20  50  52  44  18  11   4   6   1   3   1   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[10] =   .   .   1  13  47  66  36  17  14  11   3   1   .   .   .   .   1   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[11] =   .   .   1   9  41  59  40  35  16   5   4   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[12] =   .   .   .  12  36  54  44  26  19   7   4   5   1   1   1   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[13] =   .   .   1  19  44  48  37  28  18   6   4   3   2   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[14] =   .   .   .  11  33  58  33  38  21   6   2   4   2   1   1   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[15] =   .   .   .   3  39  52  54  36  15   8   .   2   .   .   1   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[16] =   .   .   .   7  36  52  54  27  15   9   6   4   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[17] =   .   .   .   5  29  56  51  35  19   7   2   3   1   1   .   1   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
HR[18] =   .   .   .   8  37  58  44  28  19  11   3   .   1   1   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .   .
[./tests/rc5-tests.cc:1186] Best approx orders
HR[ 6] cnt  57 order  5
HR[ 7] cnt  67 order  5
HR[ 8] cnt  58 order  4
HR[ 9] cnt  52 order  5
HR[10] cnt  66 order  5
HR[11] cnt  59 order  5
HR[12] cnt  54 order  5
HR[13] cnt  48 order  5
HR[14] cnt  58 order  5
HR[15] cnt  54 order  6
HR[16] cnt  54 order  6
HR[17] cnt  56 order  5
HR[18] cnt  58 order  5

real    16m10.165s
user    16m7.368s
sys     0m0.020s


 */

/* --- */

  WORD x_o1 = sub_approx_o1(y, key);
  if(x == x_o1) {
	 nequal_x_o1++;
  }
  WORD x_o2 = sub_approx_o2(y, key);
  if(x == x_o2) {
	 nequal_x_o2++;
  }
  WORD x_o3 = sub_approx_o3(y, key);
  if(x == x_o3) {
	 nequal_x_o3++;
  }
  WORD x_o4 = sub_approx_o4(y, key);
  if(x == x_o4) {
	 nequal_x_o4++;
  }
  WORD x_o5 = sub_approx_o5(y, key);
  if(x == x_o5) {
	 nequal_x_o5++;
  }
  WORD x_o6 = sub_approx_o6(y, key);
  if(x == x_o6) {
	 nequal_x_o6++;
  }
  printf("[%s:%d] HRound#%2d  nequal_x %3d %3d %3d %3d %3d %3d\n", 
			__FILE__, __LINE__, i, nequal_x_o1, nequal_x_o2, nequal_x_o3, nequal_x_o4, nequal_x_o5, nequal_x_o6);

  WORD xx_o1 = sub_approx_o1(yy, key);
  if(xx == xx_o1) {
	 nequal_xx_o1++;
  }
  WORD xx_o2 = sub_approx_o2(yy, key);
  if(xx == xx_o2) {
	 nequal_xx_o2++;
  }
  WORD xx_o3 = sub_approx_o3(yy, key);
  if(xx == xx_o3) {
	 nequal_xx_o3++;
  }
  WORD xx_o4 = sub_approx_o4(yy, key);
  if(xx == xx_o4) {
	 nequal_xx_o4++;
  }
  WORD xx_o5 = sub_approx_o5(yy, key);
  if(xx == xx_o5) {
	 nequal_xx_o5++;
  }
  WORD xx_o6 = sub_approx_o6(yy, key);
  if(xx == xx_o6) {
	 nequal_xx_o6++;
  }
  printf("[%s:%d] HRound#%2d nequal_xx %3d %3d %3d %3d %3d %3d\n", 
			__FILE__, __LINE__, i, nequal_xx_o1, nequal_xx_o2, nequal_xx_o3, nequal_xx_o4, nequal_xx_o5, nequal_xx_o6);

/* --- */

void rc5_single_round_add_approx(const uint32_t i, const WORD S[RC5_STAB_LEN_T], 
											std::vector<uint32_t> X_first, std::vector<uint32_t> X_second)
{
  //  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);
  assert(X_first.size() == ((2*NROUNDS) + 3));
  assert(X_first.size() == X_second.size());
  assert(i < ((2*NROUNDS) + 3));
  uint32_t nequal_x_o1 = 0;
  uint32_t nequal_x_o2 = 0;
  uint32_t nequal_x_o3 = 0;
  uint32_t nequal_x_o4 = 0;
  uint32_t nequal_x_o5 = 0;
  uint32_t nequal_x_o6 = 0;
  uint32_t nequal_xx_o1 = 0;
  uint32_t nequal_xx_o2 = 0;
  uint32_t nequal_xx_o3 = 0;
  uint32_t nequal_xx_o4 = 0;
  uint32_t nequal_xx_o5 = 0;
  uint32_t nequal_xx_o6 = 0;
  uint32_t rot_const = X_first[i] & RC5_ROT_MASK;
  uint32_t rrot_const = X_second[i] & RC5_ROT_MASK;
  WORD x = RC5_ROTL((X_first[i-1] ^ X_first[i]), rot_const);
  WORD xx = RC5_ROTL((X_second[i-1] ^ X_second[i]), rrot_const);
  WORD y = X_first[i+1];
  WORD yy = X_second[i+1];
  WORD key = S[i];

  // y - key = x
#if 1 // DEBUG
  WORD x_tmp = SUB(y, key);
  WORD xx_tmp = SUB(yy, key);
  //  printf("[%s:%d] %8X %8X %8X %8X\n", __FILE__, __LINE__, x, x_tmp, xx, xx_tmp);
  assert(x_tmp == x);
  assert(xx_tmp == xx);
#endif // #if 1 // DEBUG

  WORD x_o1 = sub_approx_o1(y, key);
  if(x == x_o1) {
	 nequal_x_o1++;
  }
  WORD x_o2 = sub_approx_o2(y, key);
  if(x == x_o2) {
	 nequal_x_o2++;
  }
  WORD x_o3 = sub_approx_o3(y, key);
  if(x == x_o3) {
	 nequal_x_o3++;
  }
  WORD x_o4 = sub_approx_o4(y, key);
  if(x == x_o4) {
	 nequal_x_o4++;
  }
  WORD x_o5 = sub_approx_o5(y, key);
  if(x == x_o5) {
	 nequal_x_o5++;
  }
  WORD x_o6 = sub_approx_o6(y, key);
  if(x == x_o6) {
	 nequal_x_o6++;
  }
  printf("[%s:%d] HRound#%2d  nequal_x %3d %3d %3d %3d %3d %3d\n", 
			__FILE__, __LINE__, i, nequal_x_o1, nequal_x_o2, nequal_x_o3, nequal_x_o4, nequal_x_o5, nequal_x_o6);

  WORD xx_o1 = sub_approx_o1(yy, key);
  if(xx == xx_o1) {
	 nequal_xx_o1++;
  }
  WORD xx_o2 = sub_approx_o2(yy, key);
  if(xx == xx_o2) {
	 nequal_xx_o2++;
  }
  WORD xx_o3 = sub_approx_o3(yy, key);
  if(xx == xx_o3) {
	 nequal_xx_o3++;
  }
  WORD xx_o4 = sub_approx_o4(yy, key);
  if(xx == xx_o4) {
	 nequal_xx_o4++;
  }
  WORD xx_o5 = sub_approx_o5(yy, key);
  if(xx == xx_o5) {
	 nequal_xx_o5++;
  }
  WORD xx_o6 = sub_approx_o6(yy, key);
  if(xx == xx_o6) {
	 nequal_xx_o6++;
  }
  printf("[%s:%d] HRound#%2d nequal_xx %3d %3d %3d %3d %3d %3d\n", 
			__FILE__, __LINE__, i, nequal_xx_o1, nequal_xx_o2, nequal_xx_o3, nequal_xx_o4, nequal_xx_o5, nequal_xx_o6);
}

/* --- */
#if 0
	 WORD minus_y  = (MOD - y); // -k = 2^n - k
	 WORD minus_yy = (MOD - yy); // -k = 2^n - k
	 uint32_t order = 5;
	 double p_ap = xdp_add_fixed_x_approx_rec(minus_y, minus_yy, 0, dx_i, order);
	 if(p_ap == 0) {
		//		printf("[%s:%d] Skipped %8X %f\n", __FILE__, __LINE__, dx_i, p_ap);
		continue;
	 }
#endif

/* --- */

  // y + minus_key = x
#if 0
  WORD minus_key = (MOD - key); // -k = 2^n - k
  assert(ADD(key, minus_key) == 0);
  printf("[%s:%d] (k,-k) %8X %8X (x,xx) %8X %8X (y,yy) %8X %8X\n", 
			__FILE__, __LINE__, key, minus_key, x, xx, y, yy);
#endif
#if 0 // DEBUG
  WORD x_tmp = ADD(minus_key, y);
  WORD xx_tmp = ADD(minus_key, yy);
  //  printf("[%s:%d] %8X %8X %8X %8X\n", __FILE__, __LINE__, x, x_tmp, xx, xx_tmp);
  assert(x_tmp == x);
  assert(xx_tmp == xx);
#endif // #if 1 // DEBUG

/* --- */
/*
Average of MAX HW and MIN prob over 32 keys
  [./tests/rc5-tests.cc:1128] Average g_max hw_arr = 
1.00 1.00 1.00 0.00 1.97 2.28 2.56 3.50 4.47 4.84 6.56 7.31 8.12 9.06 10.19 12.50 13.03 14.31 15.75
  [./tests/rc5-tests.cc:1135] Average g_min prob_arr = 
0.00 0.00 0.00 -1.74 -2.00 -1.54 -2.56 -3.02 -3.51 -4.22 -5.63 -5.95 -7.52 -9.40 -12.37 -11.99 -10.71 -13.52
*/

//double RC5_PTHRES[RC5_FIB_LEN] = { 2,  3,  4,  5,  8, 13, 13, 13};
// p_thres: -12, -8, -6, -5, -4

/* --- */

/* 
0.00 
0.00 
0.00 
-1.74 
-2.00 

-1.54 
-2.56 
-3.02 
-3.51 
-4.22 
-5.63 
-5.95 
-7.52 
-9.40 
-12.37 
-11.99 
-10.71 
-13.52
 */

/* --- */

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  12) + 1)),//<<  6) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),// << 8) + 1)),
  ((double)1.0 / (double)((1U <<  16) + 1))//<< 13) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))

/* --- */

#if RC5_XOR																		  // XOR-linear version of RC5
uint32_t FIB[RC5_FIB_LEN] = { 1,  2,  3,  5,  8, 13, 13, 13}; // Fib. cuts for a search tree
#else																				  // original version
//uint32_t FIB[RC5_FIB_LEN] = { 5,  8,  8,  8, 12, 13, 13, 15};
//uint32_t FIB[RC5_FIB_LEN] = { 4,  7,  7,  8, 11, 12, 12, 14};
//uint32_t FIB[RC5_FIB_LEN] = { 3,  3,  4,  6,  8, 13, 13, 14};
#if(WORD_SIZE == 32)
//uint32_t FIB[RC5_FIB_LEN] = { 3,  3,  5,  6,  8, 13, 13, 14};
//uint32_t FIB[RC5_FIB_LEN] = { 4,  4,  6,  7,  9, 14, 15, 16};

uint32_t FIB[RC5_FULL_FIB_LEN] = { 1, 1, 0, 2, 3, 3, 4, 5, 5, 7, 8, 9, 10, 12, 15, 15, 18, 18};
//uint32_t FIB[RC5_FULL_FIB_LEN] = { 1, 1, 0, 1, 1, 0, 3, 3, 4, 4, 5, 5,  5,  8, 10, 15, 20, 20};
//uint32_t FIB[RC5_FULL_FIB_LEN] = { 1, 1, 0, 1, 1, 0, 3, 3, 4, 4, 5, 5, 5, 8, 10, 15, 16, 16};
//uint32_t FIB[RC5_FULL_FIB_LEN] = { 1, 1, 0, 2, 2, 3, 4, 4, 5, 6, 7,  8,  9, 10,  12, 13, 14, 16}; // <- original
// 1.00 1.00 1.00 0.00 1.97 2.28 2.56 3.50 4.47 4.84 6.56 7.31 8.12 9.06 10.19 12.50 13.03 14.31 15.75
//uint32_t FIB[RC5_FIB_LEN] = { 1,  2,  3,  5,  8, 13, 13, 13}; // Fib. cuts for a search tree - original XOR version
//uint32_t FIB[RC5_FIB_LEN] = { 1,  2,  3,  5,  8, 13, 15, 16};
//uint32_t FIB[RC5_FIB_LEN] = { 2,  3,  4, 5,  8, 15, 16, 16}; // f/g/t = 6/4/9
//uint32_t FIB[RC5_FIB_LEN] = {5, 5, 5, 6, 7, 9, 9, 10, 13, 17, 17, 17};
//uint32_t FIB[RC5_FIB_LEN] = { 2,  3,  4, 5,  8, 13, 13, 13}; // <- original
/*
Average of MAX HW and MIN prob over 32 keys
  [./tests/rc5-tests.cc:1128] Average g_max hw_arr = 
1.00 1.00 1.00 0.00 1.97 2.28 2.56 3.50 | 4.47 4.84 6.56 7.31 8.12 9.06 10.19 12.50 13.03 14.31 15.75
  [./tests/rc5-tests.cc:1135] Average g_min prob_arr = 
0.00 0.00 0.00 0.00 -1.74 -2.00 -1.54 -2.56 -3.02 -3.51 -4.22 -5.63 -5.95 -7.52 -9.40 -12.37 -11.99 -10.71 -13.52
*/
/*
  f/g/t = 198/5/6
  [./src/rc5-dc.cc:2866] fib_array = [ 0]  5 [ 1]  5 [ 2]  5 [ 3]  8 [ 4] 10 [ 5] 15 [ 6] 16 [ 7] 16
  [./src/rc5-dc.cc:2871] p_thres_array = [ 0] -1.00 [ 1] -1.00 [ 2] -2.00 [ 3] -4.00 [ 4] -5.00 [ 5] -6.00 [ 6] -9.00 [ 7] -10.00
  f/g/t = 66/3/7
  [./src/rc5-dc.cc:2866] fib_array = [ 0]  4 [ 1]  4 [ 2]  5 [ 3]  8 [ 4] 10 [ 5] 15 [ 6] 16 [ 7] 16
  [./src/rc5-dc.cc:2871] p_thres_array = [ 0] -1.00 [ 1] -1.00 [ 2] -2.00 [ 3] -4.00 [ 4] -5.00 [ 5] -6.00 [ 6] -9.00 [ 7] -10.00
  f/g/t = 113/3/8
  [./src/rc5-dc.cc:2866] fib_array = [ 0]  4 [ 1]  5 [ 2]  5 [ 3]  8 [ 4] 10 [ 5] 15 [ 6] 16 [ 7] 16
  [./src/rc5-dc.cc:2871] p_thres_array = [ 0] -1.00 [ 1] -1.00 [ 2] -2.00 [ 3] -4.00 [ 4] -5.00 [ 5] -6.00 [ 6] -9.00 [ 7] -10.00
[
*/
//#endif // #if RC5_FULL_FIB
#endif  // #if(WORD_SIZE == 32)
#if(WORD_SIZE == 16)
//uint32_t FIB[RC5_FIB_LEN] = { 2,  3,  4,  4,  5,  9,  9, 10};
//uint32_t FIB[RC5_FIB_LEN] = { 3,  3,  4,  4,  5,  5,  6, 7};
//uint32_t FIB[RC5_FIB_LEN] = { 2,  2,  2,  2,  2,  2,  3, 6};
//uint32_t FIB[RC5_FIB_LEN] = { 0,  2,  2,  2,  2,  2,  3, 6};
//uint32_t FIB[RC5_FIB_LEN] = { 2,  2,  2,  2,  2,  2,  2, 4};
uint32_t FIB[RC5_FULL_FIB_LEN] = { 1, 1, 0, 1, 1, 0, 2, 2, 2, 2, 2,  2,  2,  2,  2,  2,  2, 4};
#endif  // #if(WORD_SIZE == 16)
#endif


/* --- */

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  7) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  10) + 1)),
  ((double)1.0 / (double)((1U <<  12) + 1)),//<<  6) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),// << 8) + 1)),
  ((double)1.0 / (double)((1U <<  16) + 1))//<< 13) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))



/* --- */

#if ((WORD_SIZE == 16) || (WORD_SIZE == 32))
double RC5_P_THRES_ARRAY[RC5_FULL_FIB_LEN] = {
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  0) + 0)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  2) + 1)),
  ((double)1.0 / (double)((1U <<  3) + 1)),
  ((double)1.0 / (double)((1U <<  4) + 1)),
  ((double)1.0 / (double)((1U <<  5) + 1)),
  ((double)1.0 / (double)((1U <<  6) + 1)),//<<  6) + 1)),
  ((double)1.0 / (double)((1U <<  14) + 1)),// << 8) + 1)),
  ((double)1.0 / (double)((1U <<  16) + 1))//<< 13) + 1))
};
#endif // #if ((WORD_SIZE == 16) || (WORD_SIZE == 32))

/* --- */

/* 
i 15 order  5 mask_stride     F800 dz     44EC

 */

/* --- */

void test_xdp_sub_fixed_x_approx()
{
  printf("[%s:%d] Enter %s() ...\n", __FILE__, __LINE__, __FUNCTION__);

  uint32_t x = random32() & MASK;
  uint32_t xx = random32() & MASK;
  uint32_t db = 0;//random32() & MASK;
  uint32_t dc = (x ^ xx ^ db);//random32() & MASK;

  //  uint32_t order = 2;
  uint32_t order_max = std::min(6, WORD_SIZE);
  for(uint32_t order = 2; order <= order_max; order++) {
	 double p_ap = xdp_sub_fixed_x_approx_rec(x, xx, db, dc, order);

	 printf("[%s:%d] Order %2d ", __FILE__, __LINE__, order);
	 printf("XDP_SUB_FIXED_X_AP[(%8X,%8X,%8X)->%8X] = %6.5f 2^%4.2f\n", x, xx, db, dc, p_ap, log2(p_ap));
#if (WORD_SIZE <= 10)
	 double p_ex = xdp_add_fixed_x_approx_exper(x, xx, db, dc, order);
	 if(p_ex != p_ap) {
		printf("Error! XDP_SUB_FIXED_X_EX[(%8X,%8X,%8X)->%8X] = %6.5f 2^%4.2f\n", x, xx, db, dc, p_ex, log2(p_ex));
	 }
	 assert(p_ex == p_ap);
#endif // #if (WORD_SIZE <= 10)
  }
  printf("[%s:%d] Test OK\n", __FILE__, __LINE__);
}

void test_xdp_sub_fixed_x_approx_all()
{
  printf("[%s:%d] Enter %s() ...\n", __FILE__, __LINE__, __FUNCTION__);

  //  uint32_t order = WORD_SIZE;
  for(uint32_t order = 2; order <= WORD_SIZE; order++) {
	 for(WORD x = 0; x < ALL_WORDS; x++) {
		for(WORD xx = 0; xx < ALL_WORDS; xx++) {
		  for(WORD db = 0; db < ALL_WORDS; db++) {
			 for(WORD dc = 0; dc < ALL_WORDS; dc++) {

				double p_ap = xdp_sub_fixed_x_approx_rec(x, xx, db, dc, order);
				double p_ex = xdp_sub_fixed_x_approx_exper(x, xx, db, dc, order);

				if(p_ex != p_ap) 
				{
				  printf("[%s:%d] XDP_SUB_FIXED_X_EX[(%8X,%8X,%8X)->%8X] = %6.5f\n", 
							__FILE__, __LINE__, x, xx, db, dc, p_ex);
				  printf("[%s:%d] XDP_SUB_FIXED_X_AP[(%8X,%8X,%8X)->%8X] = %6.5f\n", 
							__FILE__, __LINE__, x, xx, db, dc, p_ap);
				}
				assert(p_ex == p_ap);
			 }
		  }
		}
	 }
  }
  printf("[%s:%d] Test OK\n", __FILE__, __LINE__);
}


/* --- */

void xdp_add_fixed_x_all_dz_approx_rec_i(const uint32_t i, const uint32_t order,
													  const WORD dy, const WORD x, const WORD xx, 
													  const WORD dz, const WORD y, uint64_t* cnt_dz);
uint64_t xdp_add_fixed_x_all_dz_approx_rec(const WORD x, const WORD xx, const WORD dy, uint32_t order);

void xdp_add_fixed_x_all_dz_approx_rec_i(const uint32_t i, const uint32_t order,
													  const WORD dy, const WORD x, const WORD xx, 
													  const WORD dz, const WORD y, uint64_t* cnt_dz)
{
  if(i == WORD_SIZE) {
	 (*cnt_dz)++;
#if 0 // DEBUG
	 double p_ap = xdp_add_fixed_x_approx_rec(x, xx, dy, dz, order);
	 printf("[%s:%d] XDP_ADD_FIXED_X_AP[(%8X,%8X,%8X)->%8X] = %6.5f\n", 
			  __FILE__, __LINE__, x, xx, dy, dz, p_ap);
	 assert(p_ap != 0.0);
#endif
	 return;
  }

  //  uint32_t mask_order = (0xffffffff >> (32 - (order - 1)));
  uint32_t mask_stride = (0xffffffff >> (32 - order)) << (i + 1 - order); 
  for(uint32_t dz_i = 0; dz_i < 2; dz_i++) {
	 WORD new_dz = ((dz_i << i) | dz);
	 for(uint32_t y_i = 0; y_i < 2; y_i++) {
		WORD new_x = x & mask_stride; 
		WORD new_xx = xx & mask_stride;
		WORD new_y = ((y_i << i) | y) & mask_stride; 
		WORD new_yy = (new_y ^ dy) & mask_stride;

		//		printf("[%s:%d] %8X %8X %8X %8X | %8X\n", __FILE__, __LINE__, new_x, new_y, new_xx, new_yy, diff_stride);
		WORD diff_stride = ((new_x + new_y) ^ (new_xx + new_yy)) & mask_stride;
		//		bool b_match = (((diff_stride >> i) & 1) == ((dz >> i) & 1)); // diff[i] ?= dz[i]
		bool b_match = (((diff_stride >> i) & 1) == ((new_dz >> i) & 1)); // diff[i] ?= dz[i]
		if(b_match) {
		  xdp_add_fixed_x_all_dz_approx_rec_i(i+1, order, dy, x, xx, new_dz, new_y, cnt_dz);
		}
	 }
  }
}

uint64_t xdp_add_fixed_x_all_dz_approx_rec(const WORD x, const WORD xx, const WORD dy, uint32_t order)
{
  //  printf("[%s:%d] Enter %s() ...\n", __FILE__, __LINE__, __FUNCTION__);
  assert(order >= 2);
  assert(order <= WORD_SIZE);

  uint32_t N = (1U << (order - 1)); // x[i-1:0]
  uint32_t mask_lsb = (0xffffffff >> (32 - (order - 1))); // mask order-1 LS bits

  //  printf("[%s:%d] order % 2d mask_lsb %8X\n", __FILE__, __LINE__, order, mask_lsb);
  uint64_t cnt_dz = 0;
  for(WORD dz = 0; dz < N; dz++) {
	 for(WORD y = 0; y < N; y++) {
		//		printf("[%s:%d] %d %d\n", __FILE__, __LINE__, x, y);
		WORD yy = (y ^ dy) & mask_lsb;
		WORD diff_lsb = (((x + y) & mask_lsb) ^ ((xx + yy) & mask_lsb)) & mask_lsb;
		bool b_match_lsb = (diff_lsb == (dz & mask_lsb));
		if(b_match_lsb) {
		  uint32_t i = order - 1; // next bit index to be assigned
		  xdp_add_fixed_x_all_dz_approx_rec_i(i, order, dy, x, xx, dz, y, &cnt_dz);
		}
	 }
	 printf("[%s:%d] cnt_dz %lld\n", __FILE__, __LINE__, cnt_dz);
  }
  return cnt_dz;
}

void test_xdp_add_fixed_x_all_dz_approx()
{
  printf("[%s:%d] Enter %s() ...\n", __FILE__, __LINE__, __FUNCTION__);

  uint32_t x = random32() & MASK;
  uint32_t xx = random32() & MASK;
  uint32_t db = 0;//random32() & MASK;
  //  for(WORD x = 0; x < ALL_WORDS; x++) {
  //	 for(WORD xx = 0; xx < ALL_WORDS; xx++) {
  //		for(WORD db = 0; db < ALL_WORDS; db++) {
		  {{{
		  //  uint32_t dc = (x ^ xx ^ db);//random32() & MASK;

		  //  uint32_t order = 2;
		  uint32_t order_max = std::min(6, WORD_SIZE);
		  for(uint32_t order = 2; order <= order_max; order++) {
			 uint64_t cnt_dz = xdp_add_fixed_x_all_dz_approx_rec(x, xx, db, order);

			 printf("[%s:%d] Order %2d ", __FILE__, __LINE__, order);
			 printf("XDP_ADD_FIXED_X_AP[(%8X,%8X,%8X)-> *] = %lld\n", x, xx, db, cnt_dz);
#if 0//(WORD_SIZE <= 10)
			 double p_ex = xdp_add_fixed_x_approx_exper(x, xx, db, dc, order);
			 if(p_ex != p_ap) {
				printf("Error! XDP_ADD_FIXED_X_EX[(%8X,%8X,%8X)->%8X] = %6.5f 2^%4.2f\n", x, xx, db, dc, p_ex, log2(p_ex));
			 }
			 assert(p_ex == p_ap);
#endif // #if (WORD_SIZE <= 10)
		  }
		}
	 }
  }
  printf("[%s:%d] Test OK\n", __FILE__, __LINE__);
}

  //  test_xdp_add_fixed_x_all_dz_approx();

/* --- */
  //  printf("[%s:%d] i = %2d mask %8X\n", __FILE__, __LINE__, i, mask_stride);
		//		uint32_t nbits = (1U << order);
		//		WORD diff_stride = (((new_x + new_y) & mask_stride) ^ ((new_xx + new_yy) & mask_stride)) & mask_stride;
		//		bool b_match = (diff_stride == (dz & mask_stride));
  //  printf("[%s:%d] Enter %s() ...\n", __FILE__, __LINE__, __FUNCTION__);
	 //	 printf("[%s:%d] Add pair %2lld\n", __FILE__, __LINE__, *cnt_xy);


/* --- */

void xdp_add_approx_rec_i(const uint32_t i, const uint32_t order,
								  const WORD dx, const WORD dy, const WORD dz,
								  const WORD x, const WORD y, uint64_t* cnt_xy)
{
  //  printf("[%s:%d] Enter %s() ...\n", __FILE__, __LINE__, __FUNCTION__);
  if(i == WORD_SIZE) {
	 (*cnt_xy)++;
	 return;
  }

  //  uint32_t mask_lsb = (0xffffffff >> (32 - (i+1))); 
  uint32_t mask_lsb = (0xffffffff >> (32 - order)) << (i + 1 - order); 
  printf("[%s:%d] i = %2d mask %8X\n", __FILE__, __LINE__, i, mask_lsb);
  for(uint32_t x_i = 0; x_i < 2; x_i++) {
	 for(uint32_t y_i = 0; y_i < 2; y_i++) {
		WORD new_x = (x_i << i) | x; 
		WORD new_y = (y_i << i) | y; 
		WORD new_xx = (new_x ^ dx) & mask_lsb;
		WORD new_yy = (new_y ^ dy) & mask_lsb;
		WORD diff_lsb = ((new_x + new_y) ^ (new_xx + new_yy)) & mask_lsb;
		bool b_match_lsb = (diff_lsb == (dz & mask_lsb));
		if(b_match_lsb) {
		  xdp_add_approx_rec_i(i+1, order, dx, dy, dz, new_x, new_y, cnt_xy);
		}
	 }
  }
}

double xdp_add_approx_rec(const WORD dx, const WORD dy, const WORD dz, uint32_t order)
{
  printf("[%s:%d] Enter %s() ...\n", __FILE__, __LINE__, __FUNCTION__);
  assert(order >= 1);
  assert(order <= WORD_SIZE);

  uint32_t N = (1U << (order - 1)); // x[i-1:0]
  uint32_t mask_lsb = (0xffffffff >> (32 - (order - 1))); // mask order-1 LS bits
  uint64_t cnt_xy = 0;

  printf("[%s:%d] order % 2d mask_lsb %8X\n", __FILE__, __LINE__, order, mask_lsb);

  for(WORD x = 0; x < N; x++) {
	 for(WORD y = 0; y < N; y++) {
		WORD xx = (x ^ dx) & mask_lsb;
		WORD yy = (y ^ dy) & mask_lsb;
		WORD diff_lsb = ((x + y) ^ (xx + yy)) & mask_lsb;
		bool b_match_lsb = (diff_lsb == (dz & mask_lsb));
		if(b_match_lsb) {
		  uint32_t i = order - 1; // next bit index to be assigned
		  xdp_add_approx_rec_i(i, order, dx, dy, dz, x, y, &cnt_xy);
		}
	 }
  }
  uint64_t all = (ALL_WORDS * ALL_WORDS);
  double p = (double)cnt_xy / (double)all;
  return p;
}


/* --- */


void yaarx_alloc_matrices_3d(gsl_matrix***** A, 
									  uint32_t A_dim_one, uint32_t A_dim_two)
{
  int32_t nvals = (1U << A_dim_one);
  (*A) = (gsl_matrix ****)calloc(A_dim_one, sizeof(gsl_matrix ***));
  for(int i = 0; i < nvals; i++){
	 (*A)[i] = (gsl_matrix ***)calloc(A_dim_one, sizeof(gsl_matrix **));
	 for(int j = 0; j < nvals; j++){
		(*A)[i][j] = (gsl_matrix **)calloc(A_dim_one, sizeof(gsl_matrix *));
		for(int k = 0; k < nvals; k++){
		  (*A)[i][j][k] = gsl_matrix_calloc(A_dim_two, A_dim_two);
		  gsl_matrix_free(A[i][j][k]);
		}
	 }
  }
}

void yaarx_free_matrices_3d(gsl_matrix***** A, 
									  uint32_t A_dim_one, uint32_t A_dim_two)
{
  int32_t nvals = (1U << A_dim_one);
  for(int i = 0; i < nvals; i++){
	 for(int j = 0; j < nvals; j++) {
		//		gsl_matrix_free(A[i][j][k]);
		for(int k = 0; k < nvals; k++){
		  gsl_matrix_free((*A)[i][j][k]);
		}
		//		free(A[i][j]);
	 }
	 //	 free(A[i]);
  }
  //  free(A);
}

//void xdp_add_approx_sf(gsl_matrix* A[2][2][2], const uint32_t A_len, const uint32_t order)
void xdp_add_approx_sf(gsl_matrix**** A, const uint32_t A_len, const uint32_t order)
{
  uint32_t N = (1U << order); // 2^ord
  assert(A_len == N);

  uint32_t mask_order = (0xffffffff >> (32 - order)); // mask order LS bits
  uint32_t stride = (order - 1);
  uint32_t mask_stride = (mask_order >> 1);

  assert((uint64_t)(4 * stride) < 32); // make sure we have space 
  printf("mask_order %X\n", mask_order);

  for(WORD da = 0; da < N; da++) {
	 for(WORD db = 0; db < N; db++) {
		for(WORD x = 0; x < N; x++) {
		  for(WORD y = 0; y < N; y++) {
			 WORD xx = (x ^ da);
			 WORD yy = (y ^ db);
			 WORD z = (x + y) & mask_order;
			 WORD zz = (xx + yy) & mask_order;
			 WORD dc = (z ^ zz);
			 uint32_t s_in = // (yy xx y x)[ord-2:0]
				(yy & (mask_stride) << (stride * 3)) | 
				(xx & (mask_stride) << (stride * 2)) | 
				( y & (mask_stride) << (stride * 1)) | 
				( x & (mask_stride) << (stride * 0)); 
			 uint32_t s_out = // (yy xx y x)[ord-1:1]
				((yy >> 1) & (mask_stride) << (stride * 3)) | 
				((xx >> 1) & (mask_stride) << (stride * 2)) | 
				(( y >> 1) & (mask_stride) << (stride * 1)) | 
				(( x >> 1) & (mask_stride) << (stride * 0)); 

			 //			 A[da][db][dc]++;
			 uint32_t col = s_in;  // u
			 uint32_t row = s_out; // v

			 uint32_t e = 0;
			 e = gsl_matrix_get(A[da][db][dc], row, col);
			 e += 1;
			 gsl_matrix_set(A[da][db][dc], row, col, e);

		  }
		}
	 }
  }
}

//double xdp_add_approx(gsl_matrix* A[2][2][2], const uint32_t A_len, 
double xdp_add_approx(gsl_matrix**** A, const uint32_t A_len, 
							 const WORD da, const WORD db, const WORD dc, 
							 uint32_t order)
{
  uint32_t N = (1U << order);
  assert(A_len == N);
  uint32_t mask_order = (0xffffffff >> (32 - order)); // mask order LS bits
  double cnt = 1.0;
  for(uint32_t j = 0; j < (order - 1); j++) {
	 for(uint32_t i = (order - 1); i < WORD_SIZE; i++) {
		const WORD da_i = (da >> i) & mask_order;
		const WORD db_i = (db >> i) & mask_order;
		const WORD dc_i = (dc >> i) & mask_order;
		printf("[%s:%d] i = %2d | %X %X %X | cnt %f\n", __FILE__, __LINE__, i, da_i, db_i, dc_i, cnt);
	 }
  }
  double p = (double)cnt;
  return p;
}

void test_xdp_add_approx()
{
  uint32_t order = 2;
  uint32_t stride = (order - 1);
  assert((uint64_t)(4 * stride) < 32); // make sure we have space 

  uint32_t A_dim_one = (1U << order);
  uint32_t A_dim_two = (4 * stride);
  gsl_matrix**** A;
  //  gsl_matrix* A[2][2][2];
  //  uint32_t*** A;

  yaarx_alloc_matrices_3d(&A, A_dim_one, A_dim_two);
#if 0

  xdp_add_approx_sf(A, A_len, order);

  WORD da = 0;//random32() & MASK;
  WORD db = 2;//random32() & MASK;
  WORD dc = 2;//da ^ db;//random32() & MASK;
  double p_exp = xdp_add_approx_exper(da, db, dc, order);
  double p_the = xdp_add_approx(A, A_len, da, db, dc, order);
  printf("[%s:%d] order %d: XDP_ADD_EX[(%8X,%8X)->%8X] = %6.5f\n", __FILE__, __LINE__, 
			order, da, db, dc, p_exp);
  printf("[%s:%d] order %d: XDP_ADD_TH[(%8X,%8X)->%8X] = %6.5f\n", __FILE__, __LINE__, 
			order, da, db, dc, p_the);

#endif
  //  yaarx_free_matrices_3d(&A, A_dim_one, A_dim_two);
}

void test_xdp_add_approx_all()
{
#if 0
  assert(WORD_SIZE <= 16);
  assert(WORD_SIZE >= 6);
  uint32_t order = 3;
  uint32_t A_len = (1U << order);
  gsl_matrix* A[2][2][2];
  //  uint32_t*** A;

  yaarx_alloc_matrices_3d(A, A_len);

  xdp_add_approx_sf(A, A_len, order);

  for(WORD da = 0; da < ALL_WORDS; da++) {
	 for(WORD db = 0; db < ALL_WORDS; db++) {
		for(WORD dc = 0; dc < ALL_WORDS; dc++) {
		  double p_exp = xdp_add_approx_exper(da, db, dc, order);
		  double p_the = xdp_add_approx(A, A_len, da, db, dc, order);
		  printf("[%s:%d] order %d: XDP_ADD_EX[(%8X,%8X)->%8X] = %6.5f\n", __FILE__, __LINE__, 
					order, da, db, dc, p_exp);
		  printf("[%s:%d] order %d: XDP_ADD_TH[(%8X,%8X)->%8X] = %6.5f\n", __FILE__, __LINE__, 
					order, da, db, dc, p_the);
		  assert(p_exp == p_the);
		}
	 }
  }
  yaarx_free_matrices_3d(A, A_len);
#endif
}

/* --- */
void xdp_add_approx_sf(uint32_t*** A, const uint32_t A_len, const uint32_t order)
{
  uint32_t N = (1U << order); // 2^ord
  assert(A_len == N);

  for(uint32_t i = 0; i < A_len; i++) {
	 for(uint32_t j = 0; j < A_len; j++) {
		for(uint32_t k = 0; k < A_len; k++) {
		  A[i][j][k] = 0;
		}
	 }
  }

  uint32_t mask_order = (0xffffffff >> (32 - order)); // mask order LS bits
  printf("mask_order %X\n", mask_order);
  for(WORD da = 0; da < N; da++) {
	 for(WORD db = 0; db < N; db++) {
		for(WORD x = 0; x < N; x++) {
		  for(WORD y = 0; y < N; y++) {
			 WORD xx = (x ^ da);
			 WORD yy = (y ^ db);
			 WORD z = (x + y) & mask_order;
			 WORD zz = (xx + yy) & mask_order;
			 WORD dc = (z ^ zz);

			 A[da][db][dc]++;
		  }
		}
	 }
  }

#if 1 // print matrices
  for(uint32_t i = 0; i < A_len; i++) {
	 for(uint32_t j = 0; j < A_len; j++) {
		for(uint32_t k = 0; k < A_len; k++) {
		  //		  A[i][j][k] = 0;
		  if(A[i][j][k]) {
			 printf("[%s:%d] A[%5d][%5d][%5d] = %5d\n", __FILE__, __LINE__, i, j, k, A[i][j][k]);
		  }
		}
	 }
  }
#endif // #if 0 // print matrices
}


double xdp_add_approx(uint32_t*** A, const uint32_t A_len, 
							 const WORD da, const WORD db, const WORD dc, 
							 uint32_t order)
{
  uint32_t N = (1U << order);
  assert(A_len == N);
  uint32_t mask_order = (0xffffffff >> (32 - order)); // mask order LS bits
  //  uint64_t cnt = 1;//order;//(order * order);
  double cnt = 1.0;//(double)(order);//1.0;
  for(uint32_t i = 0; i <= (WORD_SIZE - order); i++) {
  //  for(uint32_t i = 0; i < WORD_SIZE; i += order) {
	 const WORD da_i = (da >> i) & mask_order;
	 const WORD db_i = (db >> i) & mask_order;
	 const WORD dc_i = (dc >> i) & mask_order;
	 cnt *= (double)(A[da_i][db_i][dc_i]) / (double)(N * N);
	 //	 cnt *= A[da_i][db_i][dc_i];
	 //	 printf("[%s:%d] i = %2d | %X %X %X | cnt %lld\n", __FILE__, __LINE__, i, da_i, db_i, dc_i, cnt);
	 printf("[%s:%d] i = %2d | %X %X %X | cnt %f\n", __FILE__, __LINE__, i, da_i, db_i, dc_i, cnt);
  }
  //  uint64_t all = (ALL_WORDS * ALL_WORDS);
  //  double p = (double)cnt / (double)all;
  double p = (double)cnt;
  return p;
}


/* --- */
void xdp_add_approx_sf(gsl_matrix* A[2][2][2], const uint32_t A_len, const uint32_t order)
{
  assert(order == 2);

  uint32_t ndiffs = (1U << 3);
  uint32_t nvals = 4;
  uint32_t nbits = (1U << (order - 1));

  WORD* X = (WORD *)calloc(nbits, sizeof(WORD));
  WORD* XX = (WORD *)calloc(nbits, sizeof(WORD));
  WORD* Y = (WORD *)calloc(nbits, sizeof(WORD));
  WORD* YY = (WORD *)calloc(nbits, sizeof(WORD));

  for(uint32_t d = 0; d < ndiffs; d++) {
	 const WORD da_i = (d >> 0) & 1;
	 const WORD db_i = (d >> 1) & 1;
	 const WORD dc_i = (d >> 2) & 1;

	 for(uint32_t v = 0; v < nvals; v++) {
		const WORD x_i = (v >> 0) & 1;
		const WORD y_i = (v >> 1) & 1;

		for(uint32_t x_prev = 0; x_prev < nbits; x_prev++) {
		  for(uint32_t i = 0; i < (order - 1); i++) {
			 X[i] = (x_prev >> i) & 1;
		  }
		  for(uint32_t y_prev = 0; y_prev < nbits; y_prev++) {
			 for(uint32_t i = 0; i < (order - 1); i++) {
				Y[i] = (y_prev >> i) & 1;
			 }

		  }
		}
	 }
  }

  free(X);
  free(XX);
  free(Y);
  free(YY);
}

double xdp_add_approx(gsl_matrix* A[2][2][2], const uint32_t A_len, 
							 const WORD da, const WORD db, const WORD dc, 
							 uint32_t order)
{
  return 0.0;
}


/* --- */
void xdp_add_approx_sf(uint32_t*** A, const uint32_t A_len, const uint32_t order)
{
  uint32_t N = (1U << order); // 2^ord
  assert(A_len == N);

  for(uint32_t i = 0; i < A_len; i++) {
	 for(uint32_t j = 0; j < A_len; j++) {
		for(uint32_t k = 0; k < A_len; k++) {
		  A[i][j][k] = 0;
		}
	 }
  }

  uint32_t mask_order = (0xffffffff >> (32 - order)); // mask order LS bits
  printf("mask_order %X\n", mask_order);
  for(WORD da = 0; da < N; da++) {
	 for(WORD db = 0; db < N; db++) {
		for(WORD x = 0; x < N; x++) {
		  for(WORD y = 0; y < N; y++) {
			 WORD xx = (x ^ da);
			 WORD yy = (y ^ db);
			 WORD z = (x + y) & mask_order;
			 WORD zz = (xx + yy) & mask_order;
			 WORD dc = (z ^ zz);

			 A[da][db][dc]++;
		  }
		}
	 }
  }

#if 0 // print matrices
  for(uint32_t i = 0; i < A_len; i++) {
	 for(uint32_t j = 0; j < A_len; j++) {
		for(uint32_t k = 0; k < A_len; k++) {
		  //		  A[i][j][k] = 0;
		  if(A[i][j][k]) {
			 printf("[%s:%d] A[%5d][%5d][%5d] = %5d\n", __FILE__, __LINE__, i, j, k, A[i][j][k]);
		  }
		}
	 }
  }
#endif // #if 0 // print matrices
}


double xdp_add_approx(uint32_t*** A, const uint32_t A_len, 
							 const WORD da, const WORD db, const WORD dc, 
							 uint32_t order)
{
  uint32_t N = (1U << order);
  assert(A_len == N);
  uint32_t mask_order = (0xffffffff >> (32 - order)); // mask order LS bits
  uint64_t cnt = 1;//order;//(order * order);
  //  double cnt = 1.0;//(double)(order);//1.0;
  for(uint32_t i = 0; i <= (WORD_SIZE - order); i++) {
  //  for(uint32_t i = 0; i < WORD_SIZE; i += order) {
	 const WORD da_i = (da >> i) & mask_order;
	 const WORD db_i = (db >> i) & mask_order;
	 const WORD dc_i = (dc >> i) & mask_order;
	 //	 cnt *= (double)(A[da_i][db_i][dc_i]) / (double)(N * N);
	 cnt *= A[da_i][db_i][dc_i];
	 printf("[%s:%d] i = %2d | %X %X %X | cnt %lld\n", __FILE__, __LINE__, i, da_i, db_i, dc_i, cnt);
  }
  uint64_t all = (ALL_WORDS * ALL_WORDS);
  double p = (double)cnt / (double)all;
  //  double p = (double)cnt;
  return p;
}


/* --- */

void test_sub_approx()
{
  assert(WORD_SIZE <= 16);
  uint32_t sub_nequal_o1 = 0;
  uint32_t sub_nequal_o2 = 0;
  for(WORD x = 0; x < ALL_WORDS; x++) {
	 for(WORD y = 0; y < ALL_WORDS; y++) {
		WORD z = SUB(x, y);
		WORD z_o1 = sub_approx_ord_1(x, y);
		if(z == z_o1) {
		  sub_nequal_o1++;
		}
		WORD z_o2 = sub_approx_ord_2(x, y);
		if(z == z_o2) {
		  sub_nequal_o2++;
		}
	 }
  }
  uint64_t nall = (ALL_WORDS * ALL_WORDS);
  double sub_ratio_o1 = ((double)sub_nequal_o1 / (double)nall);
  printf("[%s:%d] #equal o1: [%5d / %5lld] %10.9f %4.2f %%\n", __FILE__, __LINE__, sub_nequal_o1, (long long int)nall, sub_ratio_o1, (sub_ratio_o1 * 100.00));
  double sub_ratio_o2 = ((double)sub_nequal_o2 / (double)nall);
  printf("[%s:%d] #equal o2: [%5d / %5lld] %10.9f %4.2f %%\n", __FILE__, __LINE__, sub_nequal_o2, (long long int)nall, sub_ratio_o2, (sub_ratio_o2 * 100.00));
}

/* --- */
#if 0									  // DEBUG
		  for(uint32_t i = 0; i < ds_array_new.len; i++) {
			 assert(ds_array_new.D[i] == ds_array->D[i]);
			 assert(ds_array_new.S[i] == ds_array->S[i]);
		  }
		  for(uint32_t i = 0; i < 2; i++) {
			 assert(ds_array_new.pc_pair.plaintext_first[i] == ds_array->pc_pair.plaintext_first[i]);
			 assert(ds_array_new.pc_pair.plaintext_second[i] == ds_array->pc_pair.plaintext_second[i]);
			 assert(ds_array_new.pc_pair.ciphertext_first[i] == ds_array->pc_pair.ciphertext_first[i]);
			 assert(ds_array_new.pc_pair.ciphertext_second[i] == ds_array->pc_pair.ciphertext_second[i]);
		  }
#endif  // #if 1

/* --- */

	 //#if RC5_FULL_FIB
	 //	 b_found = rc5_ds_array_is_found(*ds_array, *goup_diff_vec);
	 //	 assert(b_found == false);
	 //#endif // #if RC5_FULL_FIB
	 //	 boost::unordered_map<rc5_goup_diffs_t, WORD, rc5_goup_diffs_hash, rc5_goup_diffs_equal_to>::iterator variants_iter = goup_variants_hash_map->find(*ds_array);
	 //	 b_found = (variants_iter != goup_variants_hash_map->end());

/* --- */
#if RC5_FULL_FIB
  if(depth == 2){
	 if(ds_array->D[depth] != 0) {
		return 0;
	 }
  }
  if(depth == 1) {
	 if(ds_array->D[depth] != 0x80000000) {
		return 0;
	 }
  }
  if(depth == 0) {
	 if(ds_array->D[depth] != 0x80000000) {
		return 0;
	 }
  }
#endif
#if RC5_FULL_FIB
	 assert(ds_array->D[depth] == 0x80000000);
	 assert(ds_array->D[depth + 1] == 0x80000000);
	 assert(ds_array->D[depth + 2] == 0);
#endif

/* --- */
/*

Average probabilities and hamming weights of good pairs over 2^27 CP: 

  sum_hw = 1.00 1.00 1.00 0.00 1.15 1.27 0.75 1.41 1.48 1.99 2.81 3.59 4.31 6.00 7.88 8.95 9.47 10.61 12.19
[./src/rc5-dc.cc:3113] sum_prob_arr = 0.00 0.00 0.00 0.00 -0.92 -1.05 -0.48 -1.32 -1.14 -1.21 -2.01 -2.64 -2.94 -3.82 -4.88 -6.38 -7.81 -6.22 -7.21

Avergae over 2^25

hw 1.00 1.00 1.00 0.00 1.50 1.11 1.06 1.56 1.89 2.44 3.06 3.06 5.00 5.89 6.78 9.22 10.00 11.22 12.50
[./src/rc5-dc.cc:3113] sum_prob_arr = 0.00 0.00 0.00 0.00 -1.32 -1.35 -0.63 -1.37 -1.47 -1.33 -1.94 -1.87 -3.29 -4.57 -5.86 -7.64 -7.69 -8.06 -10.07

hw 1.00 1.00 1.00 0.00 1.44 1.38 0.81 1.44 1.75 1.81 3.00 4.12 4.56 5.06 5.69 6.06 8.31 10.00 11.94
[./src/rc5-dc.cc:3113] sum_prob_arr = 0.00 0.00 0.00 0.00 -1.25 -1.27 -0.33 -1.09 -1.26 -1.20 -1.85 -2.42 -2.43 -2.43 -4.08 -5.28 -4.79 -6.94 -9.56


 */

/* --- */

#if 0
#if 1 // depth = 5
		if(depth == (RC5_FIB_LEN - 3)) { // depth = 5
		  p_thres = ((double)1.0 / (double)(1U << 6));
		  //		  p_thres = ((double)1.0 / (double)(1U << 3));
		  const uint32_t dy = ds_array->D[depth + 1]; // D[6]
		  rc5_xdp_add_mid_round_diff_set_out(A_mid, L_mid, C_mid, 
														 dy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec); 
		}
#endif // #if 1
#if 1
		if(depth == (RC5_FIB_LEN - 4)) { // depth = 4
		  p_thres = ((double)1.0 / (double)(1U << 6));
		  //		  p_thres = ((double)1.0 / (double)(1U << 3));
		  const uint32_t dy = ds_array->D[depth + 1]; // D[6]
		  rc5_xdp_add_mid_round_diff_set_out(A_mid, L_mid, C_mid, 
														 dy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec); 
		}
#endif // #if 1 // if(depth == (RC5_FIB_LEN - 3))
#if 1
		if(depth == (RC5_FIB_LEN - 5)) { // depth = 3
		  p_thres = ((double)1.0 / (double)(1U << 5));
		  //		  p_thres = ((double)1.0 / (double)(1U << 2));
		  const uint32_t dy = ds_array->D[depth + 1]; // D[6]
		  rc5_xdp_add_mid_round_diff_set_out(A_mid, L_mid, C_mid, 
														 dy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec); 
		}
#endif // #if 1 // if(depth == (RC5_FIB_LEN - 3))
#endif

/* --- */

/* 
Add XOR at the end only and NOT in the middle is better than ADDing XOR everwhere.

XOR at MIDDLE and END: 15 filtered /  3 good (76 variants)  / 46 good total / 2^27 CP
XOR at MIDDLE        : 30 filtered / 15 good (235 variants) / 68 good total / 2^27 CP
XOR at END           : 10 filtered /  7 good (? variants - killed)   / 131 good total / 2^27 CP
XOR at END           :  7 filtered /  4 good (? variants - killed)   / 170 good total / 2^27 CP

 */

/* 
	[./src/rc5-dc.cc:2897] fib_array = [0] 2 [1] 3 [2] 4 [3] 5 [4] 8 [5] 13 [6] 13 [7] 13
ADD XOR Always
middle
[./src/rc5-dc.cc:3029] (   1614147 /  134217728) #Filtered          1
[./src/rc5-dc.cc:3048] Good #         1 (44000000, 3038E054, 9038EF57, 14, 21, 1)
[./src/rc5-dc.cc:3029] (   1640770 /  134217728) #Filtered          2
[./src/rc5-dc.cc:3048] Good #         2 ( 40000A0, CD22CD76, 4882C179,  5, 22, 1)
[./src/rc5-dc.cc:3029] (   2446301 /  134217728) #Filtered          3
[./src/rc5-dc.cc:3029] (   8197484 /  134217728) #Filtered          4
[./src/rc5-dc.cc:3048] Good #         3 (1A000000, A3F91C04, A2D8FC04, 25,  0, 1)
[./src/rc5-dc.cc:3029] (   8685832 /  134217728) #Filtered          5
[./src/rc5-dc.cc:3048] Good #         4 (  320040, 65753CC0, E6094840,  1,  3, 1)
[./src/rc5-dc.cc:3029] (  12597103 /  134217728) #Filtered          6
[./src/rc5-dc.cc:3048] Good #         5 ( 232A000, 760C56CA, 760C5AB5, 22,  0, 1)
[./src/rc5-dc.cc:3029] (  15369635 /  134217728) #Filtered          7
[./src/rc5-dc.cc:3029] (  17129586 /  134217728) #Filtered          8
[./src/rc5-dc.cc:3029] (  19603782 /  134217728) #Filtered          9
[./src/rc5-dc.cc:3048] Good #         6 ( 7FC1800, 30939ED6, 2BE1AAD6,  5, 27, 1)
[./src/rc5-dc.cc:3029] (  23805353 /  134217728) #Filtered         10
[./src/rc5-dc.cc:3029] (  28346658 /  134217728) #Filtered         11
[./src/rc5-dc.cc:3029] (  28439083 /  134217728) #Filtered         12
[./src/rc5-dc.cc:3029] (  29623476 /  134217728) #Filtered         13
[./src/rc5-dc.cc:3048] Good #         7 (1ED00000,  1C9FA8C, 3D75FA8D,  5, 25, 1)
[./src/rc5-dc.cc:3029] (  33303932 /  134217728) #Filtered         14
[./src/rc5-dc.cc:3048] Good #         8 ( 5020000, C3C77275, 43C4BA69, 19, 30, 1)


 */

/* 
#Rounds 8
WORD_SIZE 32
RC5_NTEXTS 2^23.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 1
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
RC5_XDP_ADD_MID_ROUND_P_THRES 2^-7.64
RC5_XDP_ADD_LAST_ROUND_P_THRES 2^-7.64
#Filtered pairs: 2 (2^1.000000)
#Good pairs among filtered: 2
#Good pairs total: 8
#GoUP sets of trails: 2 (2^1.000000)
[./tests/rc5-tests.cc:487] #GoUP sets of trails: 2 (2^1.000000)
[./src/rc5-dc.cc:479] Enter rc5_last_round_eq_x_params_hash_map()
[./src/rc5-dc.cc:571] Unique variants = 33 (2^5.044394), #All GoUP variants = 10024081 (2^23.256967), cnt_vec_1d = 2 (2^1.000000)
[./src/rc5-dc.cc:423] Enter rc5_last_round_eq_x_params_hash_map_count_good()
[./src/rc5-dc.cc:424] Filtered params hash map size 33
[./src/rc5-dc.cc:450] Good # 1 ( 5801800, 5A0F4AC7, 5D093E87, 27, 31, 1)
[./src/rc5-dc.cc:450] Good # 2 (38008000, 792B1ADE, CB2EFADD,  3, 27, 1)
[./src/rc5-dc.cc:970] Enter rc5_pairs_classify_by_last_round_rot_const() Hash map size 33
[./src/rc5-dc.cc:1258] Enter rc5_last_round_rot_const_keyrec()
[./src/rc5-dc.cc:1261] R sizes [0 : 31] =   0   0   0  20   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  13   0   0   0   0
[./src/rc5-dc.cc:1272] No pair with rot const = 0 . Returning...
[./src/rc5-dc.cc:1414] Enter rc5_last_round_rot_const_keyrec_bf()
terminate called after throwing an instance of 'std::bad_alloc'
what():  std::bad_alloc
Aborted

real    9m14.333s
user    9m11.042s
sys     0m1.776s

 */


/* --- */

/* 
#Rounds 8
WORD_SIZE 32
RC5_NTEXTS 2^27.00
RC5_FIXED_KEY 1
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 1
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
RC5_FILTER_GOUP_DIFF_SET 1
#Filtered pairs: 25 (2^4.643856)
#Good pairs among filtered: 11
#Good pairs total: 86
#GoUP sets of trails: 25 (2^4.643856)
[./tests/rc5-tests.cc:487] #GoUP sets of trails: 25 (2^4.643856)
[./src/rc5-dc.cc:479] Enter rc5_last_round_eq_x_params_hash_map()
[./src/rc5-dc.cc:571] Unique variants = 182 (2^7.507795), #All GoUP variants = 691103 (2^19.398541), cnt_vec_1d = 25 (2^4.643856)
[./src/rc5-dc.cc:423] Enter rc5_last_round_eq_x_params_hash_map_count_good()
[./src/rc5-dc.cc:424] Filtered params hash map size 182
[./src/rc5-dc.cc:450] Good # 1 (F7000E80,  D2EA318, 8DAE4919, 16, 17, 1)
[./src/rc5-dc.cc:450] Good # 2 (177C0000, 8FC563CA, 8FC547A6, 17, 29, 1)
[./src/rc5-dc.cc:450] Good # 3 (2BE00000, 684CA773, ADB4A773,  1, 30, 1)
[./src/rc5-dc.cc:450] Good # 4 (    8200, CA8E82B1, E28E82B1,  8,  0, 1)
[./src/rc5-dc.cc:450] Good # 5 ( C690000, EA81928D, EA80DC8D, 19,  1, 1)
[./src/rc5-dc.cc:450] Good # 6 (1F005080, 8D4A22EE, 754B02ED, 20,  1, 1)
[./src/rc5-dc.cc:450] Good # 7 ( 9000900, C3B861C6, C2FC604A, 10,  3, 1)
[./src/rc5-dc.cc:450] Good # 8 (  A66800, B98406A9, B8C60929,  1, 27, 1)
[./src/rc5-dc.cc:450] Good # 9 ( 6D00180, A78926FA, A629112E, 14,  1, 1)
[./src/rc5-dc.cc:450] Good #10 (   10740, 29CCF44C, 354CF44E, 17, 30, 1)
[./src/rc5-dc.cc:450] Good #11 (   38000, 318A9296, 300A9296,  5,  0, 1)
[./tests/rc5-tests.cc:542] Test OK!

real    5m55.399s
user    5m54.366s
sys     0m0.068s
 */

/* --- */

  if(depth == 5) {
	 if(ds_array->D[depth] != 0) {
		//		return 0;
	 }
  }
  if(depth == 4){
	 if(ds_array->D[depth] != 0x4000) {
		//		return 0;
	 }
  }
  if(depth == 3){
	 if(ds_array->D[depth] != 0x4000) {
		//		return 0;
	 }
  }


/* --- */

		  //		if(depth == (RC5_FIB_LEN - 3)) { // depth = 5
		//		if((depth == (RC5_FIB_LEN - 3)) || (depth == (RC5_FIB_LEN - 4))) { // depth = 5, 4
		//		if((depth == (RC5_FIB_LEN - 3)) || (depth == (RC5_FIB_LEN - 4)) || (depth == (RC5_FIB_LEN - 5))) { // depth = 5, 4, 3
		  //		  p_thres = 0.1;


/* --- */

/*
 * Similar to \ref rc5_filter_go_up_ext_i but uses the Fibonacci
 * filter over the whole trail (as opposed to only the last 8 rounds)
 * and propagates differences all the way up to the input differces.
 * 
 * \see rc5_filter_go_up_ext_i
 */
uint32_t rc5_filter_go_up_nl_fullfib_i(const uint32_t depth, 
													const gsl_matrix* A_last[2][2][2],
													const gsl_vector* L_last,
													const gsl_vector* C_last,
													const gsl_matrix* A_mid[2][2],
													const gsl_vector* L_mid,
													const gsl_vector* C_mid,
													uint32_t* count, 
													const std::vector<uint32_t> fib_array, 
													const rc5_goup_diffs_t* ds_array,
													std::vector<rc5_goup_diffs_t>* goup_diff_vec)
{
  uint32_t flag = 0;
  uint32_t s;
  uint32_t left = RC5_FEISTEL_LEFT;
  //  uint32_t right = RC5_FEISTEL_RIGHT;
#if 1									  // DEBUG
  assert(ds_array->D.size() == ((2*NROUNDS) + 3));
  assert(ds_array->D.size() == ds_array->S.size());
  assert(ds_array->D.size() == ds_array->len);
#endif
#if 1
  if(hw32(ds_array->D[depth]) > fib_array[depth]) {
	 return 0;						  // cut the search tree
  }
#endif
  if(depth != 0) {
	 for(s = 0; s < WORD_SIZE; s++) {	  // Guess S[6] over all possibilities 0..31

		std::vector<uint32_t> dx_vec;
		double p_thres = RC5_XDP_ADD_MID_ROUND_P_THRES;
		const uint32_t dx_prev = RC5_ROTL(ds_array->D[depth], s); // D[5] <<< S[5]
		const uint32_t rot_const_prev = s;
		const uint32_t hw_thres = WORD_SIZE;//fib_array[depth - 1]; // WORD_SIZE;
		// Generate a set of diffs dx
#if 1 // if(depth == (RC5_FIB_LEN - 2))
		if(depth == (RC5_FIB_LEN - 2)) { // depth = 6
		  pair_t pc_pair = ds_array->pc_pair;
		  WORD y = pc_pair.ciphertext_first[left]; // y[7] = left ciphertext 1
		  WORD yy = pc_pair.ciphertext_second[left]; // yy[7] = left ciphertext 2
		  assert((y ^ yy) == ds_array->D[ds_array->len - 2]);
		  rc5_xdp_add_last_round_diff_set_out(A_last, L_last, C_last, 
														  y, yy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec); // (y[7], yy[7] -> {dx[5]})
#if 0 // DEBUG
		  if(dx_vec.size()) {
			 printf("\r[%s:%d] depth %2d p_thres 2^%4.2f (y %8X yy %8X) -> dx set size %10d #variants %15d", 
					  __FILE__, __LINE__, depth, log2(p_thres), y, yy, dx_vec.size(), *count);
			 fflush(stdout);
		  }
#endif // #if 0 // EDBUG
		}
#endif // #if 1 // if(depth == (RC5_FIB_LEN - 2))
		//		if(depth >= 3) {
#if 0 // if(depth == (RC5_FIB_LEN - 3)) {// depth = 5
		//		if((depth == (RC5_FIB_LEN - 3)) || (depth == (RC5_FIB_LEN - 4))) { // depth = 5, 4
		if((depth == (RC5_FIB_LEN - 3)) || (depth == (RC5_FIB_LEN - 4)) || (depth == (RC5_FIB_LEN - 5))) { // depth = 5, 4, 3
		  p_thres = 0.01;
		  const uint32_t dy = ds_array->D[depth + 1]; // D[6]
		  rc5_xdp_add_mid_round_diff_set_out(A_mid, L_mid, C_mid, 
														 dy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec); // (D[5] -> {dx[4]})
#if 0 // DEBUG
		  if(dx_vec.size()) {
			 printf("\r[%s:%d] depth %2d p_thres 2^%4.2f hw_thres %2d dy %8X -> dx set size %10d #variants %15d", 
					  __FILE__, __LINE__, depth, log2(p_thres), hw_thres, dy, dx_vec.size(), *count);
			 fflush(stdout);
		  }
#endif // #if 0 // EDBUG
		}
#endif // #if 1 // if(depth == (RC5_FIB_LEN - 3))
		dx_vec.push_back(ds_array->D[depth + 1]); // dx[5] == D[7]

		for(uint32_t i = 0; i < dx_vec.size(); i++) {
		  WORD dx = dx_vec[i]; // dx[5]
		  //		  assert((RC5_ROTR(dx, s) & RC5_ROT_MASK) == 0); // <-- !
		  if((RC5_ROTR(dx, s) & RC5_ROT_MASK) == 0) { // if (dx[5] >>> S[6]) = 0

			 rc5_goup_diffs_t ds_array_new = *ds_array;
			 ds_array_new.D[depth - 1] =  RC5_ROTR(dx, s) ^ ds_array_new.D[depth]; // D[5] = (dx[5] >>> S[6]) ^ D[6]		 
			 ds_array_new.S[depth] = s; // S[6]
			 // recursive call for correct count of variants
			 if(rc5_filter_go_up_nl_fullfib_i(depth - 1, A_last, L_last, C_last, A_mid, L_mid, C_mid, count, fib_array, &ds_array_new, goup_diff_vec)) {
				flag = 1;
			 }
		  }
		}
	 }

  } else {							  // reached the top
	 (*count)++;					  // accumulate num. of variants
	 goup_diff_vec->push_back(*ds_array);
#if 0 // DEBUG
	 printf("\r[%s:%d] depth %2d #variants %15d", __FILE__, __LINE__, depth, *count);
	 fflush(stdout);
#endif // #if 0 // DEBUG
	 return 1;
  }
  return flag;
}


/* --- */
/* 
#Rounds 8
WORD_SIZE 32
RC5_NTEXTS 2^25.00
RC5_FIXED_KEY 1
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 1
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
#Filtered pairs: 1 (2^0.000000)
#Good pairs among filtered: 1
#Good pairs total: 21
#GoUP sets of trails: 1 (2^0.000000)
[./tests/rc5-tests.cc:547] #GoUP sets of trails: 1 (2^0.000000)
WORD g_good_pairs[21][2][2] = {
{{0x B3C3666, 0xA30B9391}, {0x8B3C3666, 0x230B9391}},
{{0x335EC39D, 0x97BA8289}, {0xB35EC39D, 0x17BA8289}},
{{0x9E18AA25, 0x1D634D6E}, {0x1E18AA25, 0x9D634D6E}},
{{0x595C87D4, 0xD77958BC}, {0xD95C87D4, 0x577958BC}},
{{0x1E6033D1, 0x2C078E2A}, {0x9E6033D1, 0xAC078E2A}},
{{0xE621AC1E, 0xFD6AA6BA}, {0x6621AC1E, 0x7D6AA6BA}},
{{0xCFAFC08C, 0xD1764EEA}, {0x4FAFC08C, 0x51764EEA}},
{{0x658C48D3, 0xDD5EF362}, {0xE58C48D3, 0x5D5EF362}},
{{0x825B07B2, 0x302CA6E1}, {0x 25B07B2, 0xB02CA6E1}},
{{0x3DA47F65, 0xF4BF38B3}, {0xBDA47F65, 0x74BF38B3}},
{{0x890DCD44, 0xF0D54844}, {0x 90DCD44, 0x70D54844}},
{{0x202E86E9, 0x327617EA}, {0xA02E86E9, 0xB27617EA}},
{{0x34CA0D5B, 0xD75DEBD1}, {0xB4CA0D5B, 0x575DEBD1}},
{{0x1629A38E, 0xFCA2F2EA}, {0x9629A38E, 0x7CA2F2EA}},
{{0x5408E12F, 0x4284B12F}, {0xD408E12F, 0xC284B12F}},
{{0xFCF003D9, 0xCE9E451F}, {0x7CF003D9, 0x4E9E451F}},
{{0xF914E538, 0x75811EFE}, {0x7914E538, 0xF5811EFE}},
{{0xD38D94F2, 0x7A3D49B3}, {0x538D94F2, 0xFA3D49B3}},
{{0x28F86DB1, 0xA83D5350}, {0xA8F86DB1, 0x283D5350}},
{{0xF0DDA1D2, 0x7F07CDCA}, {0x70DDA1D2, 0xFF07CDCA}},
{{0x18C0DB4B, 0x EAD7D1E}, {0x98C0DB4B, 0x8EAD7D1E}}};
[./src/rc5-dc.cc:454] Enter rc5_last_round_eq_x_params_hash_map()
[./src/rc5-dc.cc:546] Unique variants = 22 (2^4.459432), #All GoUP variants = 725519 (2^19.468654), cnt_vec_1d = 1 (2^0.000000)
  [./src/rc5-dc.cc:398] Enter rc5_last_round_eq_x_params_hash_map_count_good()
  [./src/rc5-dc.cc:399] Filtered params hash map size 22
  [./src/rc5-dc.cc:425] Good # 1 (    4000, 1777644F, E77764C7, 21, 25, 1)
  [./src/rc5-dc.cc:945] Enter rc5_pairs_classify_by_last_round_rot_const() Hash map size 22
  [./src/rc5-dc.cc:1233] Enter rc5_last_round_rot_const_keyrec()
  [./src/rc5-dc.cc:1236] R sizes [0 : 31] =   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  22   0   0   0   0   0   0   0   0   0   0
  [./src/rc5-dc.cc:1247] No pair with rot const = 0 . Returning...
  [./tests/rc5-tests.cc:602] Test OK!
 */

/* --- */

uint32_t rc5_filter_go_up_nl_i(const uint32_t depth, 
										 const gsl_matrix* A[2][2],
										 const gsl_vector* L,
										 const gsl_vector* C,
										 uint32_t* count, 
										 const std::vector<uint32_t> fib_array, 
										 const rc5_goup_diffs_t* ds_array,
										 std::vector<rc5_goup_diffs_t>* goup_diff_vec)
{
  uint32_t flag = 0;
  uint32_t s;
#if 1									  // DEBUG
  assert(ds_array->D.size() == (RC5_FIB_LEN  + 1));
  assert(ds_array->D.size() == ds_array->S.size());
  assert(ds_array->D.size() == ds_array->len);
#endif
#if 1
  if(hw32(ds_array->D[depth]) > fib_array[depth]) {
	 return 0;						  // cut the search tree
  }
#endif
  if(depth != 0) {
	 for(s = 0; s < WORD_SIZE; s++) {	  // Guess S[6] over all possibilities 0..31

		std::vector<uint32_t> dx_vec;
		// Generate a set of diffs dx
#if 1
		if(depth == (RC5_FIB_LEN - 2)) {
		//		if((depth == (RC5_FIB_LEN - 2)) || (depth == (RC5_FIB_LEN - 3))) {
		//		if(depth >= 3) {
#if 0 // p_thres
		  //		  double p_thres_arr[RC5_FIB_LEN] = {0.1, 0.1, 0.08, 0.05, 0.01, 0.005, 0.001, 0.001};
#if (WORD_SIZE == 16)
		  double p_thres_arr[RC5_FIB_LEN] = {0.0, 0.0, 0.0, 0.3, 0.3, 0.2, 0.01, 0.0};
#endif // #if (WORD_SIZE == 16)
#if (WORD_SIZE == 32)
		  double p_thres_arr[RC5_FIB_LEN] = {0.0, 0.0, 0.0, 0.3, 0.3, 0.2, 0.01, 0.0};
#endif // #if (WORD_SIZE == 16)
		  double p_thres = p_thres_arr[depth];
#else // #if 1 // p_thres
		  double p_thres = RC5_XDP_ADD_MID_ROUND_P_THRES;
#endif // #if 1 // p_thres

		  const uint32_t dy = ds_array->D[depth + 1]; // D[7]
		  const uint32_t dx_prev = RC5_ROTL(ds_array->D[depth], s); // D[6] <<< S[6]
		  const uint32_t rot_const_prev = s;
		  const uint32_t hw_thres = WORD_SIZE;//fib_array[depth - 1];
		  rc5_xdp_add_mid_round_diff_set_out(A, L, C, dy, dx_prev, rot_const_prev, p_thres, hw_thres, &dx_vec);
#if 0 // EDBUG
		  if(dx_vec.size()) {
			 printf("\r[%s:%d] depth %2d p_thres 2^%4.2f hw_thres %2d dy %8X -> dx set size %10d #variants %15d", 
					  __FILE__, __LINE__, depth, log2(p_thres), hw_thres, dy, dx_vec.size(), *count);
			 fflush(stdout);
		  }
#endif // #if 0 // EDBUG
		} //else {
#endif
		dx_vec.push_back(ds_array->D[depth + 1]);
		  //		}
		//#endif // Generate a set of diffs dx

		for(uint32_t i = 0; i < dx_vec.size(); i++) {
		  WORD dx = dx_vec[i];
		  //		  assert((RC5_ROTR(dx, s) & RC5_ROT_MASK) == 0); // <-- !
		  if((RC5_ROTR(dx, s) & RC5_ROT_MASK) == 0) { // if (D[7] >>> S[6]) = 0

			 rc5_goup_diffs_t ds_array_new = *ds_array;
			 ds_array_new.D[depth - 1] =  RC5_ROTR(dx, s) ^ ds_array_new.D[depth]; // D[5] = (D[7] >>> S[6]) ^ D[6]		 
			 ds_array_new.S[depth] = s; // S[6]
			 // recursive call for correct count of variants
			 if(rc5_filter_go_up_nl_i(depth - 1, A, L, C, count, fib_array, &ds_array_new, goup_diff_vec)) {
				flag = 1;
			 }
		  }
		}
	 }

  } else {							  // reached the top
	 (*count)++;					  // accumulate num. of variants
	 goup_diff_vec->push_back(*ds_array);
#if 0 // DEBUG
	 printf("\r[%s:%d] depth %2d #variants %15d", __FILE__, __LINE__, depth, *count);
	 fflush(stdout);
#endif // #if 0 // DEBUG
	 return 1;
  }
  return flag;
}

/* --- */
/*
 * Print a vector of key candidates sorted by counter.
 */
/* 
void rc5_key_candidates_print(const std::vector<rc5_key_t> key_vec, WORD key_correct)
{
  std::vector<rc5_key_t>::const_iterator key_cand_iter = key_vec.begin();
  uint32_t i_key = 0;
  for(key_cand_iter = key_vec.begin(); key_cand_iter != key_vec.end(); key_cand_iter++) {
	 WORD key_cand = key_cand_iter->value;
	 uint64_t key_cnt = key_cand_iter->counter;
	 printf("[%5d] %8X %lld ", i_key, key_cand, key_cnt);
	 if(key_cand == key_correct) {
		printf("<-");
	 }
	 printf("\n");
	 i_key++;
  }
}

 */

/* 
Average Hamming weights WORD_SIZE 16, 8 rounds

0.00 1.00 1.00 1.00 0.00 1.32 1.43 1.22 1.66 2.24 1.99 | 2.75 2.69 2.29 3.00 3.84 4.51 4.16 4.96
0.00 1.00 1.00 1.00 0.00 1.34 1.60 1.12 1.79 2.30 2.15 | 2.71 2.83 2.61 3.00 3.77 4.17 4.41 4.84
0.00 1.00 1.00 1.00 0.00 1.32 1.35 0.83 1.36 1.82 1.58 | 2.65 2.50 2.56 2.73 3.79 4.00 4.55 4.89
1.00 1.00 1.00 0.00 1.38 1.68 1.25 1.72 2.25 2.30 3.17 | 2.82 2.38 3.00 3.73 4.13 4.33 4.67 5.80
1.00 1.00 1.00 0.00 1.35 1.56 1.23 1.68 2.06 1.81 2.29 | 2.42 2.42 2.88 3.50 4.03 4.56 4.81 5.92

*/

/* --- */

		  //		  rc5_xdp_add_mid_round_diff_set_out(dy, dx_prev, p_thres, hw_thres, &dx_vec);

/* --- */

/*
 * Fixed key: ~= 15 good pairs over 2^24 CP
 * [./src/rc5-dc.cc:202] key[16] = {0x18, 0xF1, 0x9D, 0x37, 0xA7, 0xDD, 0xEB, 0x98, 0x72, 0x81, 0xAE, 0x73, 0x4F, 0xA0, 0xCB, 0x19};
 * [./src/rc5-dc.cc:307]  8R p(80000000 80000000 -> *) = 0.000001 2^-20.093109 | 15 2^24.000000
 */
//const uint32_t g_key[16] = {0x18, 0xF1, 0x9D, 0x37, 0xA7, 0xDD, 0xEB, 0x98, 0x72, 0x81, 0xAE, 0x73, 0x4F, 0xA0, 0xCB, 0x19};
// keys with solutions
//const uint32_t g_key[16] = {0x8C, 0x8C, 0x13, 0x40, 0xC9, 0x70, 0x77, 0x70, 0xD, 0x86, 0x60, 0x30, 0x5D, 0xFA, 0x92, 0xE0};
//const uint32_t g_key[16] = {0x7D, 0xF3, 0x5F, 0xB6, 0xF2, 0xCB, 0x87, 0x58, 0x98, 0xBF, 0x5A, 0xCD, 0x4, 0x22, 0x16, 0xC6};


/* --- */
bool rc5_goup_variant_has_solutions(const gsl_matrix* A[2][2], const gsl_vector* L, const gsl_vector* C,
												const rc5_goup_diffs_t ds)
{
  //  pair_t pc = ds.pc_pair;
  uint32_t len = ds.len;
  assert(len == (RC5_FIB_LEN  + 1));

  uint32_t depth = (RC5_FIB_LEN - 2); // (= 6) so that D[depth + 1] = D[7]
  while(depth != 0) {

	 const uint32_t dy = ds.D[depth + 1]; // D[7]

	 double p = rc5_xdp_add_mid_round((const gsl_matrix*(*)[2])A, L, C, dy, dy);

	 if(p == 0.0) {
		return false;
	 } 
	 depth--;
  }
  return true;
}

void rc5_ds_vec_2d_remove_inconsistent(const std::vector<std::vector<rc5_goup_diffs_t>> ds_vec_2d,
													std::vector<std::vector<rc5_goup_diffs_t>>* ds_vec_2d_out)
{
  gsl_vector* L;
  gsl_vector* C;

  L = gsl_vector_calloc(RC5_MID_ROUND_MSIZE);
  C = gsl_vector_calloc(RC5_MID_ROUND_MSIZE);

  gsl_vector_set_all(L, 1.0);
  gsl_vector_set_all(C, 0.0);
  gsl_vector_set(C, RC5_MID_ROUND_ISTATE, 1.0);

  gsl_matrix* A[2][2];
  gsl_matrix* AA[2][2][2][2];

  rc5_mid_round_eq_alloc_matrices_2d(A);
  rc5_mid_round_eq_alloc_matrices_4d(AA);

  rc5_mid_round_eq_xy_sf(AA);
  rc5_mid_round_eq_add_matrices(A, AA);
  rc5_mid_round_eq_normalize_matrices(A); 

  std::vector<std::vector<rc5_goup_diffs_t>>::const_iterator vec_iter_2d = ds_vec_2d.begin();
  while(vec_iter_2d != ds_vec_2d.end()) {
	 const std::vector<rc5_goup_diffs_t> ds_vec = *vec_iter_2d;
	 std::vector<rc5_goup_diffs_t> ds_vec_out;

	 std::vector<rc5_goup_diffs_t>::const_iterator vec_iter = ds_vec.begin();
	 while(vec_iter != ds_vec.end()) {
		rc5_goup_diffs_t ds = *vec_iter;

		bool b_has_solution = rc5_goup_variant_has_solutions((const gsl_matrix*(*)[2])A, L, C, ds);
		if(b_has_solution) {
		  ds_vec_out.push_back(ds);
		} else {
		  printf("[%s:%d] Erased variant!\n", __FILE__, __LINE__);
		  assert(1 == 0);
		}
		vec_iter++;
	 }
	 if(ds_vec_out.size() != 0) {
		ds_vec_2d_out->push_back(ds_vec_out);
	 }
#if 1 // DEBUG
	 if(ds_vec_out.size() != ds_vec.size()) {
		printf("[%s:%d] Shrink size OLD %5d NEW %5d\n", __FILE__, __LINE__, ds_vec.size(), ds_vec_out.size());
		assert(1 == 0);
	 }
#endif
	 vec_iter_2d++;
  }
  rc5_mid_round_eq_free_matrices_2d(A);
  rc5_mid_round_eq_free_matrices_4d(AA);
  gsl_vector_free(C);
  gsl_vector_free(L);
}


/* --- */
	 //	 std::vector<uint32_t> dx_vec;
	 //	 const double p_thres = RC5_XDP_ADD_MID_ROUND_P_THRES;
	 //	 rc5_xdp_add_mid_round_diff_set_out(dy, p_thres, &dx_vec);
	 //	 for(uint32_t i = 0; i < dx_vec.size(); i++) {
	 //		WORD dx = dx_vec[i];
	 //	 }

/* --- */

/*
 * Non-lnear (w.r.t. XOR) version of the goUP filter for good pairs
 * for RC5 proposed by [Biryukov, Kushilevitz]
 * 
 * \note (depth + 1) must be equal to the index of the right
 *       ciphertext: \p depth = (RC5_FIB_LEN - 1) = 7 so that D[depth
 *       + 1] = D[RC5_FIB_LEN] = D[8] . For example for \ref
 *       RC5_GOUP_LEVEL = RC5_FIB_LEN - 2 = 6, the filter starts at
 *       depth = 6.
 *
 * \see rc5_filter_go_up_ext_i
 */
uint32_t rc5_filter_go_up_nl_i(const uint32_t depth, 
										 const gsl_matrix* A[2][2][2][2],
										 const gsl_vector* L,
										 const gsl_vector* C,
										 uint32_t* count, 
										 const std::vector<uint32_t> fib_array, 
										 const rc5_goup_diffs_t* ds_array,
										 std::vector<rc5_goup_diffs_t>* goup_diff_vec)
{
  uint32_t flag = 0;
  uint32_t s;
#if 1									  // DEBUG
  assert(ds_array->D.size() == (RC5_FIB_LEN  + 1));
  assert(ds_array->D.size() == ds_array->S.size());
  assert(ds_array->D.size() == ds_array->len);
#endif
#if 1
  if(hw32(ds_array->D[depth]) > fib_array[depth]) {
	 return 0;						  // cut the search tree
  }
#endif
  if(depth != 0) {
	 for(s = 0; s < WORD_SIZE; s++) {	  // Guess S[6] over all possibilities 0..31
		if((RC5_ROTR(ds_array->D[depth + 1], s) & RC5_ROT_MASK) == 0) { // if (D[7] >>> S[6]) = 0

		  rc5_goup_diffs_t ds_array_new = *ds_array;
		  ds_array_new.D[depth - 1] = 
			 RC5_ROTR(ds_array_new.D[depth + 1], s) ^ ds_array_new.D[depth]; // D[5] = (D[7] >>> S[6]) ^ D[6]		  
		  ds_array_new.S[depth] = s; // S[6]
        // recursive call for correct count of variants
		  if(rc5_filter_go_up_nl_i(depth - 1, A, L, C, count, fib_array, &ds_array_new, goup_diff_vec)) {
			 flag = 1;
		  }
		}
	 }

  } else {							  // reached the top
	 (*count)++;					  // accumulate num. of variants
	 goup_diff_vec->push_back(*ds_array);
	 return 1;
  }
  return flag;
}

/* --- */
/*
 * For fixed y,yy generate all output differences dx for which the
 * probability xdp^{+}_LR(y, yy -> dx) over all keys is above a
 * certain threshold.
 *
 * In addition, each soluition x satisfies the condition (x >>> r) mod
 * log2(n) for a given rotation constant. 
 *
 * \see rc5_xdp_add_last_round_diff_set_out_i
 */
void rc5_xdp_add_mid_round_diff_set_out_i(const uint32_t i, 
														const double p_thres, 
														const unit32_t rot_const,
														const gsl_matrix* A[2][2], 
														const gsl_vector* L, const gsl_vector* C, 
														const uint32_t dy, const uint32_t dx, const double p,
														std::vector<uint32_t>* dx_vec)


/* --- */

/* 

- 20140526

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/rc5-tests;
[./tests/rc5-tests.cc:864] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF, RC5_XOR = 0
[./tests/rc5-tests.cc:384] RC5_FIXED_KEY 0 | Master key[16] = {0xBC, 0xB9, 0xD1, 0xE2, 0x5C, 0x6A, 0x67, 0xC6, 0x F, 0x55, 0x6A, 0x4B, 0xC3, 0x3E, 0xD9, 0x6F, };
[./tests/rc5-tests.cc:394] RC5_FIXED_KEY 0 | Expanded key[26] = {0xF9637FB9, 0x43A13ED1, 0xE04AA34F, 0x941C36B3, 0xCC04B987, 0x9A8175EE, 0xD4B8175B, 0xFA3E98D7, 0x3E460314, 0x391F677D, 0xDA9C4DA3, 0xB10CAD06, 0xBAADF1D1, 0xAB710DD2, 0x6AB3E9A0, 0xF2AC13E0, 0x7209AB2F, 0x759CE125, 0x85D17C88, 0xC5853938, 0x175F99E9, 0x54F849A0, 0x3C399F0E, 0x B546A06, 0xA5927E57, 0x276DA6DC, };
[./src/rc5-dc.cc:2421] Enter rc5_equal_rot_attack()
[./src/rc5-dc.cc:2454] RC5_FIB_LEN 8 fib_array.size() = 8
[./src/rc5-dc.cc:2456] fib_array = [0] 2 [1] 3 [2] 4 [3] 5 [4] 8 [5] 13 [6] 13 [7] 13
[./src/rc5-dc.cc:2534] (   1450970 /  134217728) #Filtered          1
[./src/rc5-dc.cc:2554] Good #         1 (    7400, A478E729, A45D0729,  6, 30, 1)
[./src/rc5-dc.cc:2534] (   3545213 /  134217728) #Filtered          2
[./src/rc5-dc.cc:2554] Good #         2 (    3600, 73AD331A, 5AAD391A, 15, 15, 1)
[./src/rc5-dc.cc:2534] (   7525508 /  134217728) #Filtered          3
[./src/rc5-dc.cc:2554] Good #         3 (EA000100, 6CC281ED, 41A281AD, 30, 29, 1)
[./src/rc5-dc.cc:2534] (  22213209 /  134217728) #Filtered          4
[./src/rc5-dc.cc:2554] Good #         4 (7C000000, 2A2D3BD6, 2A2D0426, 16,  0, 1)
[./src/rc5-dc.cc:2534] (  28815070 /  134217728) #Filtered          6
[./src/rc5-dc.cc:2554] Good #         5 ( 3CCB800, 7564F630, 1564F5F2, 15, 31, 1)
[./src/rc5-dc.cc:2534] (  37831588 /  134217728) #Filtered          7
[./src/rc5-dc.cc:2554] Good #         6 ( 6800000, CE231303, CE233FB3, 20, 28, 1)
[./src/rc5-dc.cc:2534] (  40870674 /  134217728) #Filtered          8
[./src/rc5-dc.cc:2554] Good #         7 (  6B5000, D7A2FB55, D7CF9955, 31, 31, 1)
[./src/rc5-dc.cc:2534] (  41850663 /  134217728) #Filtered          9
[./src/rc5-dc.cc:2554] Good #         8 ( 6DC0000, 614F1628, 614EC108, 19,  5, 1)
[./src/rc5-dc.cc:2534] (  51764205 /  134217728) #Filtered         11
[./src/rc5-dc.cc:2554] Good #         9 (   1AA00, 7C472E45, 7C461A45, 31, 31, 1)
[./src/rc5-dc.cc:2534] (  53736803 /  134217728) #Filtered         12
[./src/rc5-dc.cc:2554] Good #        10 (     400, 63C7C4D5, 47C7C8D5,  0, 16, 1)
[./src/rc5-dc.cc:2534] (  56685026 /  134217728) #Filtered         13
[./src/rc5-dc.cc:2554] Good #        11 (44000000, C4BF8896, A6BF0096,  0, 17, 1)
[./src/rc5-dc.cc:2534] (  63413741 /  134217728) #Filtered         14
[./src/rc5-dc.cc:2554] Good #        12 ( 15C0000, ADB265A5, B0BBE5A5, 31, 31, 1)
[./src/rc5-dc.cc:2534] (  66922247 /  134217728) #Filtered         16
[./src/rc5-dc.cc:2554] Good #        13 (  2E7000, 75FFF315, 7601CB15, 31, 31, 1)
[./src/rc5-dc.cc:2534] (  80959315 /  134217728) #Filtered         18
[./src/rc5-dc.cc:2554] Good #        14 ( 5360000, 9908ECF5, 9A342CF5, 31, 31, 1)
[./src/rc5-dc.cc:2534] (  84941085 /  134217728) #Filtered         20
[./src/rc5-dc.cc:2554] Good #        15 (   1B800, 7A5495F5, 7A54DDF5, 31, 31, 1)
[./src/rc5-dc.cc:2534] ( 100148370 /  134217728) #Filtered         22
[./src/rc5-dc.cc:2554] Good #        16 (24800000, 8548E085, AD48E085, 31, 31, 1)
[./src/rc5-dc.cc:2534] ( 114450101 /  134217728) #Filtered         24
[./src/rc5-dc.cc:2554] Good #        17 (  716500, BDEA7C3D, 69EA7C17, 18,  1, 1)
[./src/rc5-dc.cc:2534] ( 122856573 /  134217728) #Filtered         27
[./src/rc5-dc.cc:2554] Good #        18 (    1F80, 98F67CD5, 98F6CAC5, 31, 31, 1)
[./src/rc5-dc.cc:rc5_equal_rot_attack():2571] Exit statistics:
#Rounds 8
WORD_SIZE 32
RC5_NTEXTS 2^27.00
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 1
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
RC5_DEBUG_HAVE_MEMORY 1
#Filtered pairs: 27 (2^4.754888)
#Good pairs among filtered: 18
#Good pairs total: 240
#GoUP sets of trails: 27 (2^4.754888)
[./tests/rc5-tests.cc:403] #GoUP sets of trails: 27 (2^4.754888)
[./src/rc5-dc.cc:444] Enter rc5_last_round_eq_x_params_hash_map()
[./src/rc5-dc.cc:536] Unique variants = 304 (2^8.247928), #All GoUP variants = 347024 (2^18.404676), cnt_vec_1d = 27 (2^4.754888)
[./src/rc5-dc.cc:388] Enter rc5_last_round_eq_x_params_hash_map_count_good()
[./src/rc5-dc.cc:389] Filtered params hash map size 304
[./src/rc5-dc.cc:415] Good # 1 (    7400, A478E729, A45D0729,  6, 30, 1)
[./src/rc5-dc.cc:415] Good # 2 (EA000100, 6CC281ED, 41A281AD, 30, 29, 1)
[./src/rc5-dc.cc:415] Good # 3 (24800000, 8548E085, AD48E085, 31, 31, 1)
[./src/rc5-dc.cc:415] Good # 4 (44000000, C4BF8896, A6BF0096,  0, 17, 1)
[./src/rc5-dc.cc:415] Good # 5 ( 3CCB800, 7564F630, 1564F5F2, 15, 31, 1)
[./src/rc5-dc.cc:415] Good # 6 (  2E7000, 75FFF315, 7601CB15, 31, 31, 1)
[./src/rc5-dc.cc:415] Good # 7 (   1AA00, 7C472E45, 7C461A45, 31, 31, 1)
[./src/rc5-dc.cc:415] Good # 8 (  716500, BDEA7C3D, 69EA7C17, 18,  1, 1)
[./src/rc5-dc.cc:415] Good # 9 ( 5360000, 9908ECF5, 9A342CF5, 31, 31, 1)
[./src/rc5-dc.cc:415] Good #10 ( 15C0000, ADB265A5, B0BBE5A5, 31, 31, 1)
[./src/rc5-dc.cc:415] Good #11 (    1F80, 98F67CD5, 98F6CAC5, 31, 31, 1)
[./src/rc5-dc.cc:415] Good #12 ( 6800000, CE231303, CE233FB3, 20, 28, 1)
[./src/rc5-dc.cc:415] Good #13 ( 6DC0000, 614F1628, 614EC108, 19,  5, 1)
[./src/rc5-dc.cc:415] Good #14 (    3600, 73AD331A, 5AAD391A, 15, 15, 1)
[./src/rc5-dc.cc:415] Good #15 (     400, 63C7C4D5, 47C7C8D5,  0, 16, 1)
[./src/rc5-dc.cc:415] Good #16 (   1B800, 7A5495F5, 7A54DDF5, 31, 31, 1)
[./src/rc5-dc.cc:415] Good #17 (  6B5000, D7A2FB55, D7CF9955, 31, 31, 1)
[./src/rc5-dc.cc:415] Good #18 (7C000000, 2A2D3BD6, 2A2D0426, 16,  0, 1)
[./tests/rc5-tests.cc:440] Test OK!

real    0m55.501s
user    0m55.319s
sys     0m0.032s
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$
v

 */

/* --- */

uint32_t rc5_filter_go_up_nl_i(const uint32_t depth, 
										 const gsl_matrix* A[2][2][2][2],
										 const gsl_vector* L,
										 const gsl_vector* C,
										 uint32_t* count, 
										 const std::vector<uint32_t> fib_array, 
										 const rc5_goup_diffs_t* ds_array,
										 std::vector<rc5_goup_diffs_t>* goup_diff_vec)
{
  uint32_t flag = 0;
  uint32_t s;
#if 1									  // DEBUG
  assert(ds_array->D.size() == (RC5_FIB_LEN  + 1));
  assert(ds_array->D.size() == ds_array->S.size());
  assert(ds_array->D.size() == ds_array->len);
#endif
#if 1
  if(hw32(ds_array->D[depth]) > fib_array[depth]) {
	 return 0;						  // cut the search tree
  }
#endif
  if(depth != 0) {
  //  if(depth != 5) {						 // !!!
	 for(s = 0; s < WORD_SIZE; s++) {	  // Guess S[6] over all possibilities 0..31

#if 0														 // just the maximum
		uint32_t dy = ds_array->D[depth + 1]; // D[7]
		uint32_t dk = 0;
		uint32_t dx_max = 0;
		max_xdp_add_lm(dy, dk, &dx_max);
		//		if((RC5_ROTR(ds_array->D[depth + 1], s) & RC5_ROT_MASK) == 0) { // if (D[7] >>> S[6]) = 0
		//		if((RC5_ROTR(dx_max, s) & RC5_ROT_MASK) == 0) { // if (D[7] >>> S[6]) = 0
#endif
		uint32_t dy = ds_array->D[depth + 1]; // D[7]
		uint32_t dk = 0;
		diff_set_t dx_set = {0,0};
		std::vector<uint32_t> dx_set_all;

#if 1									  // generate al
		xdp_add_input_diff_to_output_dset(dy, dk, &dx_set);
		//		xdp_add_dset_gen_diff_all(dx_set, &dx_set_all);
		uint32_t hw_limit = WORD_SIZE;//fib_array[depth - 1];//WORD_SIZE;
		//		xdp_add_dset_gen_diff_hamming_limit(dx_set, hw_limit, &dx_set_all);
		rc5_xdp_add_dset_gen_diff_hamming_limit(dx_set, hw_limit, ds_array->D[depth], &dx_set_all);
#endif
		uint32_t cnt = 0;
		uint32_t N = dx_set_all.size();//(1U << 1);
		//		if(N > 5) {
		//		  N = 5;							  // limit size to 10 entries
		//		}

		std::vector<uint32_t>::iterator vec_iter = dx_set_all.begin();
		while((vec_iter != dx_set_all.end()) && (cnt < N)) {
		  WORD dx_i = *vec_iter;

#if 0									  // DEBUG
		  printf("[%s:%d] Inside rec dx_i %8X (%d / %d) goup_diff_vec->size() %d\n", 
					__FILE__, __LINE__, dx_i, cnt, N, goup_diff_vec->size());
#endif

		  if((RC5_ROTR(dx_i, s) & RC5_ROT_MASK) == 0) { // if (D[7] >>> S[6]) = 0

			 bool b_sol = true;

			 if(depth == (RC5_FIB_LEN - 2)) { // = 6
			 //			 if(0) {
				assert(ds_array->len == (RC5_FIB_LEN + 1)); // = 9
				uint32_t i_init = 0;
				uint32_t x_sol = 0;
				uint32_t sol = 0;
				eq_x_params_t eq_params = {{{0}}};
				uint32_t right = RC5_FEISTEL_RIGHT;
				eq_params.y = ds_array->pc_pair.ciphertext_first[right];
				eq_params.yy = ds_array->pc_pair.ciphertext_second[right];
				eq_params.dx = dx_i;
				eq_params.cp_pair = ds_array->pc_pair;
				eq_params.rot_const = ds_array->S[ds_array->len - 2];
				assert((ds_array->len - 2) == (depth + 1));
				eq_params.rot_const_prev = s;
				b_sol = rc5_last_round_eq_x_has_solution(i_init, A, L, C, eq_params, x_sol, &sol);
				//				printf("\r[%s:%d] b_sol %10d x %8X ", __FILE__, __LINE__, b_sol, sol);
				//				fflush(stdout);
#if 0 // DEBUG
				std::vector<uint32_t> sol_vec;
				bool b_sol_tmp = rc5_last_round_eq_x_find_solutions_rec(A, eq_params, &sol_vec);
				assert(b_sol == b_sol_tmp);
#endif
			 }

			 if(b_sol) {
				rc5_goup_diffs_t ds_array_new = *ds_array;
				assert(ds_array_new.len == ds_array->len);

				ds_array_new.D[depth - 1] = 
				  RC5_ROTR(dx_i, s) ^ ds_array_new.D[depth]; // D[5] = (D[7] >>> S[6]) ^ D[6]		  
				ds_array_new.S[depth] = s; // S[6]
				if(rc5_filter_go_up_nl_i(depth - 1, A, L, C, count, fib_array, &ds_array_new, goup_diff_vec)) {  // recursive call for correct count of variants
				  flag = 1;
				}
			 }
		  } // if()

		  cnt++;
		  vec_iter++;
		} // while

	 }	// for s
	 

  } else {							  // reached the top
	 (*count)++;					  // accumulate num. of variants
	 goup_diff_vec->push_back(*ds_array);
	 return 1;
  }
  return flag;
}

/* --- */

	 // choose some random N pairs
#if 1
	 uint32_t dice = (random32() & 0xfff);
	 if(dice != 0) {
		vec_iter++;
		continue;
	 }
#endif // #if 0


		  // choose some random N pairs
#if 1
		  uint32_t dice = (random32() & 0xfff);
		  if(dice != 0) {
			 vec_iter++;
			 continue;
		  }
#endif


/* --- */

#if 0
  uint32_t y = random32() & MASK;//0;
  uint32_t yy = random32() & MASK;//1;
  uint32_t dx = random32() & MASK;//7;
  uint32_t nsol = rc5_last_round_eq_x_count_solutions_wrapper((const gsl_matrix*(*)[2][2])A, (const gsl_matrix*(*)[2][2][2])AA, y, yy, dx);
  printf("[%s:%d] #Solutions %d 2^%f\n", __FILE__, __LINE__, nsol, log2(nsol));
#endif

/* --- */

uint32_t rc5_last_round_eq_x_count_solutions_wrapper(const gsl_matrix* A[2][2][2], 
																	  const uint32_t y, const uint32_t yy, const uint32_t dx);

/*
 * Wrapper for rc5_last_round_eq_x_count_solutions
 *
 * A = AA[0] + AA[1], where AA[x[i]]|[y[i]][yy[i]][dx[i]]
 */
uint32_t rc5_last_round_eq_x_count_solutions_wrapper(const gsl_matrix* A[2][2][2],
																	  const uint32_t y, const uint32_t yy, const uint32_t dx)
{
  set_t sol_set = {0, 0}; 
  uint32_t nsol = 0;

  gsl_vector* L;
  gsl_vector* C;

  L = gsl_vector_calloc(RC5_LAST_ROUND_MSIZE);
  C = gsl_vector_calloc(RC5_LAST_ROUND_MSIZE);

  gsl_vector_set_all(L, 1.0);
  gsl_vector_set_all(C, 0.0);
  gsl_vector_set(C, RC5_LAST_ROUND_ISTATE, 1.0);

  bool b_has_solution = rc5_last_round_eq_x_count_solutions(A, L, C, y, yy, dx, &sol_set, &nsol);
  if(b_has_solution) {
	 printf("Sol (y, yy, dx) = (%8X %8X %8X) #sol %d sol_set %8X %8X\n", y, yy, dx, nsol, sol_set.val, sol_set.fixed);
  } else {
#if 1
	 printf("No solution (y, yy, dx) = (%8X %8X %8X) #sol %d sol_set %8X %8X\n", y, yy, dx, nsol, sol_set.val, sol_set.fixed);
#endif
  }

#if(WORD_SIZE <= 8)
  std::vector<uint32_t> sol_vec;
  uint32_t nsol_exper = rc5_last_round_eq_x_find_solutions_exper(y, yy, dx, &sol_vec);
  if(nsol != nsol_exper) {
	 printf("[%s:%d] Error: nsol %d nsol_exper %d\n", __FILE__, __LINE__, nsol, nsol_exper);
  }
  assert(nsol == nsol_exper);
#endif  // #if(WORD_SIZE <= 8)

  gsl_vector_free(C);
  gsl_vector_free(L);
  return nsol;
}

/* --- */
	 gsl_blas_dgemv(CblasNoTrans, 1.0, AA[0][i][j][k], T, 0.0, R); // R <- A T
	 bool b_zero_is_null = (1 == gsl_vector_isnull(R));

	 gsl_blas_dgemv(CblasNoTrans, 1.0, AA[1][i][j][k], T, 0.0, R); // R <- A T
	 bool b_one_is_null = (1 == gsl_vector_isnull(R));

	 if(b_zero_is_null != b_one_is_null) {
		printf("[%d]", pos);
	 }
	 assert(b_zero_is_null == b_one_is_null);

	 //	 printf("[%s] --- checkpoint line #%d --- j = %d\n", __FILE__, __LINE__, pos);
	 if((b_zero_is_null) && (!b_one_is_null)) { // 1
		sol_set->val |= (one << pos);
		sol_set->fixed |= (fixed << pos);
	 }
	 if((!b_zero_is_null) && (b_one_is_null)) { // 0
		sol_set->val |= (zero << pos);
		sol_set->fixed |= (fixed << pos);
	 }
	 if((!b_zero_is_null) && (!b_one_is_null)) { // *
		sol_set->val |= (zero << pos);
		sol_set->fixed |= (star << pos);
	 }
	 if((b_zero_is_null) && (b_one_is_null)) { // no solution
		sol_set->val = 0;
		sol_set->fixed = 0;
		*nsol = 0;
		b_has_solution = false;
		gsl_vector_free(R);
		gsl_vector_free(T);
		return b_has_solution;
	 }


/* --- */
  double p_th = rc5_xdp_add_last_round(y, yy, dx);
  printf("[%s:%d] P_TH (%8X, %8X -> %8X) %f 2^%4.2f\n", 
			__FILE__, __LINE__, y, yy, dx, p_th, log2(p_th));


  assert(p_th == p_ex);

#define RC5_XDP_ADD_LR_DEBUG 1

double rc5_xdp_add_last_round(uint32_t y, uint32_t yy, uint32_t dx)
{
  double p = 1.0;
  uint32_t i = 0;
  while((i < WORD_SIZE) && (p != 0.0)) {

#if RC5_XDP_ADD_LR_DEBUG
	 printf("[%s:%d] i = %2d\n", __FILE__, __LINE__, i);
#endif
	 uint32_t y_i = (y >> i) & 1;
	 uint32_t yy_i = (yy >> i) & 1;
	 uint32_t dx_i = (dx >> i) & 1;

#if RC5_XDP_ADD_LR_DEBUG
	 printf("[%s:%d] [%2d] %d %d %d\n", __FILE__, __LINE__, i, y_i, yy_i, dx_i);
#endif

	 if(i == 0) {
		if(dx_i != (y_i ^ yy_i)) {
		  p = 0.0;
		}
	 }

	 if(i > 0) {

		uint32_t y_ii = (y >> (i-1)) & 1;
		uint32_t yy_ii = (yy >> (i-1)) & 1;
		uint32_t dx_ii = (dx >> (i-1)) & 1;

		uint32_t xor_ii = (y_ii ^ yy_ii ^ dx_ii) & 1;

#if RC5_XDP_ADD_LR_DEBUG
		printf("[%s:%d] [%2d] %d %d %d\n", __FILE__, __LINE__, i-1, y_ii, yy_ii, dx_ii);
		printf("[%s:%d] xor[%2d] = %d\n", __FILE__, __LINE__, i-1, xor_ii);
#endif

		if(xor_ii == 0) {
#if RC5_XDP_ADD_LR_DEBUG
		  printf("[%s:%d] xor == 0\n", __FILE__, __LINE__);
#endif
		  if(y_ii == yy_ii) {
			 if(dx_i != (y_i ^ yy_i)) {
				p = 0.0;
			 }
		  }
		  if(y_ii != yy_ii) {
			 p *= 0.5;
		  }
		}

		if(xor_ii == 1) {
#if RC5_XDP_ADD_LR_DEBUG
		  printf("[%s:%d] xor == 1\n", __FILE__, __LINE__);
#endif
		  if(y_ii == yy_ii) {
			 p *= 0.5;
		  }
		  if(y_ii != yy_ii) {

			 if((y_ii == 0) && (yy_ii == 1)) {
				if(dx_i != (y_i ^ yy_i)) {
				  p = 0;
				}
			 }
			 if((y_ii == 1) && (yy_ii == 0)) {
				if(dx_i != (1 ^ y_i ^ yy_i)) {
				  p = 0;
				}
			 }
		  }
		}

	 }
#if RC5_XDP_ADD_LR_DEBUG
	 printf("[%s:%d] p %f\n", __FILE__, __LINE__, p);
#endif

	 i++;
  }
  return p;
}


/* --- */

#if 0														 // just the maximum
  uint32_t dy = ds_array.D[ds_array.len - 1]; // D[8]
  uint32_t dk = 0;
  uint32_t dx_max = 0;
  max_xdp_add_lm(dy, dk, &dx_max);
  ds_array.D[ds_array.len - 3] = 
	 RC5_ROTR(dx_max, ds_array.S[ds_array.len - 2]) ^ ds_array.D[ds_array.len - 2];
  ds_array.S[ds_array.len - 3] = 0; // S[6] : unknown - to be computed
#endif


/* --- */

/*
 * Non-linear version of \ref rc5_filter_go_up
 */
uint32_t rc5_filter_go_up_nl(uint32_t depth)
{
  //  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);

  assert(RC5_XOR == 0);

  uint32_t flag = 0;
  uint32_t s;

#if 0									  // DEBUG
  //  printf("[%s:%d] g_D[%d] %8X\n", __FILE__, __LINE__, depth, g_D[depth]);
  printf("\rg_D[%d] %8X", depth, g_D[depth]);
  fflush(stdout);
#endif

#if 1
  if(hw32(g_D[depth]) > FIB[depth])
	 return 0;						  // cut the search tree
#endif

  if(depth != 0) {

	 for(s = 0; s < WORD_SIZE; s++) {	  // try all rotations 0..31

		if((RC5_ROTR(g_D[depth + 1], s) & RC5_ROT_MASK) == 0) {

		  uint32_t dx = g_D[depth + 1];
		  uint32_t dk = 0;
		  // xxx
#if 1	 // explore only the max
		  uint32_t dy_max = 0;
		  max_xdp_add_lm(dx, dk, &dy_max);
		  g_D[depth - 1] = RC5_ROTR(dy_max, s) ^ g_D[depth]; // g_D[5] = (g_D[7] >>> s) ^ g_D[6]

		  if(rc5_filter_go_up_nl(depth - 1)) {  // recursive call for correct g_count of variants
			 flag = 1;
		  }
#else  // explore a set of output differences
		  diff_set_t dy_set = {0,0};
		  std::vector<uint32_t> dy_set_all;

		  xdp_add_input_diff_to_output_dset(dx, dk, &dy_set);
		  xdp_add_dset_gen_diff_all(dy_set, &dy_set_all);

		  //		  printf("[%s:%d] depth %2d dset_size %d dx %8X\n", __FILE__, __LINE__, depth, dy_set_all.size(), dx);
		  uint32_t N = dy_set_all.size();//(1U << 1);
		  uint32_t cnt = 0;
		  std::vector<uint32_t>::iterator vec_iter = dy_set_all.begin();
		  while((vec_iter != dy_set_all.end()) && (cnt < N)) {
		  //		  for(vec_iter = dy_set_all.begin(); vec_iter != dy_set_all.end(); vec_iter++) {
			 uint32_t dy_i = *vec_iter;
			 //		  g_D[depth - 1] = RC5_ROTR(g_D[depth + 1], s) ^ g_D[depth]; // g_D[5] = (g_D[7] >>> s) ^ g_D[6]
			 g_D[depth - 1] = RC5_ROTR(dy_i, s) ^ g_D[depth]; // g_D[5] = (g_D[7] >>> s) ^ g_D[6]

			 if(rc5_filter_go_up_nl(depth - 1)) {  // recursive call for correct g_count of variants
				flag = 1;
			 }
			 cnt++;
			 vec_iter++;
		  }
#endif  // #if 1	 // explore only the max

		}

	 }	// next rot const.

  } else {
	 g_count++;					  // accumulate num. of variants
	 return 1;
  }

  return flag;
}

/* --- */

/*
 * Check if a given \ref eq_x_params_t structure originates from a
 * list of good ciphertext/plaintex pairs \p good_pairs_vec . The \ref
 * eq_x_params_t contians the triple (dx, y, yy) representing the
 * parameters for for the last round equation in x: 
 * \f$(y - x) = (yy - (x \oplus dx))$\f where \f$y,yy,dx$\f - fixed. 
 *
 * The function goes through the list of good pairs \p good_pairs_vec
 * and for each checks whether dy = (y ^ yy) equals the ciphertext
 * difference at the \p feistel_branch branch .  If yes, then the
 * corresponding good pair is copied in \p x
 *
 * \param feistel_branch the left (= 0) or the right (= 1) branch of
 *                       the Feistel network (see \ref
 *                       RC5_FEISTEL_LEFT, \ref RC5_FEISTEL_RIGHT)
 * \param x_params the triple (dx, y, yy) where 
 * \param x stores the corresponding good pair (if found).
 * \param good_pairs_vec vector of good pairs determined experimentally.
 */
#if 0
bool rc5_ciphertext_is_good(const uint32_t nrounds, const uint32_t feistel_branch, 
									 const eq_x_params_t x_params, pair_t* x,
									 const std::vector<pair_t> good_pairs_vec,
									 const std::vector<std::vector<uint32_t>> values_first_pair,
									 const std::vector<std::vector<uint32_t>> values_second_pair)
{
  //  assert((feistel_branch == RC5_FEISTEL_LEFT) || (feistel_branch == RC5_FEISTEL_RIGHT));
  assert(feistel_branch == RC5_FEISTEL_RIGHT);
  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;
  uint32_t rot_mask = (WORD_SIZE - 1);
  bool b_is_good = false;

  // init x
  x->plaintext_first[left] = 0; x->plaintext_first[right] = 0;
  x->plaintext_second[left] = 0; x->plaintext_second[right] = 0;
  x->ciphertext_first[left] = 0; x->ciphertext_first[right] = 0;
  x->ciphertext_second[left] = 0; x->ciphertext_second[right] = 0;
  // copy (y,yy) in x
  x->ciphertext_first[feistel_branch] = x_params.y;
  x->ciphertext_second[feistel_branch] = x_params.yy;

  std::vector<pair_t>::const_iterator good_pairs_iter = good_pairs_vec.begin();
  std::vector<std::vector<uint32_t>>::const_iterator values_first_pair_iter = values_first_pair.begin();
  std::vector<std::vector<uint32_t>>::const_iterator values_second_pair_iter = values_second_pair.begin();

  assert(good_pairs_vec.size() == values_first_pair.size());
  assert(good_pairs_vec.size() == values_second_pair.size());
  while(!b_is_good && (good_pairs_iter != good_pairs_vec.end())) {
	 assert(values_first_pair_iter != values_first_pair.end());
	 assert(values_second_pair_iter != values_second_pair.end());

	 pair_t i_good_pair = *good_pairs_iter;
	 std::vector<uint32_t> X_first = *values_first_pair_iter;
	 std::vector<uint32_t> X_second = *values_second_pair_iter;
	 assert(X_first.size() == X_second.size());
	 assert(X_first.size() == (((2*nrounds) + 3)));

	 //	 uint32_t n = ((2*nrounds) + 2); // index of the last element of X
	 uint32_t n = nrounds;
	 assert(X_first.size() == (2*n + 3));

	 uint32_t r1_prev = (X_first[(2*n)] & rot_mask);
	 uint32_t r1 = (X_first[(2*n) + 1] & rot_mask);
	 uint32_t x1 = RC5_ROTL((X_first[(2*n) + 1] ^ X_first[(2*n)]), r1); // ((x[5] ^ x[6]) <<< (X[6] & RC5_ROT_MASK))

	 uint32_t r2_prev = (X_second[(2*n)] & rot_mask);
	 uint32_t r2 = (X_second[(2*n) + 1] & rot_mask);
	 uint32_t x2 = RC5_ROTL((X_second[(2*n) + 1] ^ X_second[(2*n)]), r2); // ((x[5] ^ x[6]) <<< (X[6] & RC5_ROT_MASK))

	 uint32_t dx = (x1 ^ x2);	  // input difference to the ADD operation of the last round

	 assert(r1 == r2);
	 assert(r1_prev == r2_prev);

	 uint32_t rot_const = r1;
	 uint32_t rot_const_prev = r1_prev;

#if 0									  // DEBUG
	 printf("[%s:%d] dx %8X %8X (!) hw %2d %2d (!) | \n", __FILE__, __LINE__, x_params.dx, dx, hw32(x_params.dx), hw32(dx));
#endif

	 assert(x_params.b_aux_data == true);

#if 1 // original
	 b_is_good = ((x_params.y == i_good_pair.ciphertext_first[feistel_branch]) && // y
					  (x_params.yy == i_good_pair.ciphertext_second[feistel_branch]) && // yy
					  (x_params.dx == dx) && // dx
					  (x_params.rot_const == rot_const) && // r7
					  (x_params.rot_const_prev == rot_const_prev)); // r6
#else	// debug
	 b_is_good = ((x_params.y == i_good_pair.ciphertext_first[feistel_branch]) && // y
					  (x_params.yy == i_good_pair.ciphertext_second[feistel_branch]) &&
					  //					  (x_params.dx == dx) && // yy
					  (x_params.rot_const == rot_const) && // r7
					  (x_params.rot_const_prev == rot_const_prev)); // r6
#endif
	 if(b_is_good) {
#if 0	// DEBUG
		printf("[%s:%d] Ciphertext pair is good: (%8X %8X)\n", __FILE__, __LINE__, 
				 x->ciphertext_first[feistel_branch], x->ciphertext_second[feistel_branch]);
#endif  // #if 1

		(*x) = i_good_pair;		  // copy all fields of the found pair

#if 0	// DEBUG
		rc5_print_pair(*x);
#endif  // #if 1
	 }

	 good_pairs_iter++;
	 values_first_pair_iter++;
	 values_second_pair_iter++;
  }
  return b_is_good;
}
#endif


/* --- */
void rc5_good_pairs_get_intermediate_values(const WORD S[RC5_STAB_LEN_T], const uint32_t nrounds,
														  const std::vector<pair_t> good_pairs_vec,
														  std::vector<std::vector<uint32_t>>* values_first_pair,
														  std::vector<std::vector<uint32_t>>* values_second_pair);

/*
 * Compute the intermediate values from the encryption of the good
 * pairs.
 *
 * \param nrounds number of rounds.
 * \param S the expanded key.
 * \param good_pairs_vec vector of good pairs determined
 *                       experimentally by using the "secret key" (for DEBUG purpouses)\
 * \param values_first_pair intermediate values for all first good pairs
 * \param values_second_pair intermediate values for all second good pairs
 */
#if 0
void rc5_good_pairs_get_intermediate_values(const WORD S[RC5_STAB_LEN_T], const uint32_t nrounds,
														  const std::vector<pair_t> good_pairs_vec,
														  std::vector<std::vector<uint32_t>>* values_first_pair,
														  std::vector<std::vector<uint32_t>>* values_second_pair)
{
#if 0	// DEBUG
  uint32_t cnt_good = 0;
#endif  // #if 0	// DEBUG
  std::vector<pair_t>::const_iterator pairs_iter = good_pairs_vec.begin();
  for(pairs_iter = good_pairs_vec.begin(); pairs_iter != good_pairs_vec.end(); pairs_iter++) {
  //  for(pairs_iter = good_pairs_vec.begin(); pairs_iter != good_pairs_vec.end(), cnt_good < 1; pairs_iter++) {

	 std::vector<uint32_t> X_first;		  // intermediate values from encryption
	 std::vector<uint32_t> X_second;

#if 0	// DEBUG
	 cnt_good++;
	 printf("[%s:%d] good pair# %d\n", __FILE__, __LINE__, cnt_good);
#endif  // #if 0	// DEBUG

	 pair_t pair = *pairs_iter;

	 rc5_encrypt_pair_get_intermediate_values(S, nrounds, pair, &X_first, &X_second);

	 // store values
	 values_first_pair->push_back(X_first);
	 values_second_pair->push_back(X_second);
  }
}
#endif


/* --- */

/*
 * Compute the intermediate values from the encryption of the good
 * pairs and store them in a 2D vector of \ref rc5_goup_diffs_t
 * elements.
 *
 * \param S the expanded key.
 * \param nrounds number of rounds.
 * \param good_pairs_vec vector of good pairs determined
 *                       experimentally by using the "secret key" (for DEBUG purpouses)\
 *
 * \see rc5_good_pairs_get_intermediate_values
 */
void rc5_good_pairs_to_diff_vec_2d(const WORD S[RC5_STAB_LEN_T],
											  const uint32_t nrounds,
											  const std::vector<pair_t> good_pairs_vec,
											  std::vector<std::vector<rc5_goup_diffs_t>>* goup_diff_vec_2d)
{
  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);

  // intermediate values for the good pairs: 1D = the pair, 2D = the corresponding trail
  std::vector<std::vector<uint32_t>> values_first_pair;
  std::vector<std::vector<uint32_t>> values_second_pair;

  rc5_good_pairs_get_intermediate_values(S, nrounds, good_pairs_vec, &values_first_pair, &values_second_pair);

  assert(good_pairs_vec.size() == values_first_pair.size());
  assert(good_pairs_vec.size() == values_second_pair.size());

  std::vector<std::vector<uint32_t>>::iterator values_first_iter = values_first_pair.begin();
  std::vector<std::vector<uint32_t>>::iterator values_second_iter = values_second_pair.begin();

  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;
  uint32_t cnt_good = 0;

  for(values_first_iter = values_first_pair.begin(); values_first_iter != values_first_pair.end(); values_first_iter++, values_second_iter++) {
  //  for(values_first_iter = values_first_pair.begin(); values_first_iter != values_first_pair.end(), cnt_good < 1; values_first_iter++, values_second_iter++) {

	 cnt_good++;
#if 0	// DEBUG
	 printf("Pair #%2d\n", cnt_good);
#endif  // #if 0	// DEBUG

	 std::vector<uint32_t> X_first = *values_first_iter;
	 std::vector<uint32_t> X_second = *values_second_iter;

	 assert(X_first.size() == X_second.size());

	 std::vector<uint32_t>::iterator X_first_iter = X_first.begin();
	 std::vector<uint32_t>::iterator X_second_iter = X_second.begin();

	 // initialize array
	 rc5_goup_diffs_t ds_array;
	 ds_array.pc_pair.plaintext_first[0] = 0;// = {0, 0};
	 ds_array.pc_pair.plaintext_first[1] = 0;// = {0, 0};
	 ds_array.pc_pair.plaintext_second[0] = 0;
	 ds_array.pc_pair.plaintext_second[1] = 0;
	 ds_array.pc_pair.ciphertext_first[0] = 0;// = {0, 0};
	 ds_array.pc_pair.ciphertext_first[1] = 0;// = {0, 0};
	 ds_array.pc_pair.ciphertext_second[0] = 0;
	 ds_array.pc_pair.ciphertext_second[1] = 0;
	 ds_array.len = X_first.size();
	 for(uint32_t i = 0; i < ds_array.len; i++) {
		ds_array.D.push_back(0);
		ds_array.S.push_back(0);
	 }

	 uint32_t i = 0;
	 for(X_first_iter = X_first.begin(); X_first_iter != X_first.end(); X_first_iter++, X_second_iter++) {

		uint32_t x = *X_first_iter;
		uint32_t xx = *X_second_iter;
		uint32_t dx = (x ^ xx);
		//		if(i >= 3) {
		if((i >= 2) && (i < ((2 * nrounds) + 2))) {
		  ds_array.S[i] = (WORD)(X_first[i] & RC5_ROT_MASK);
		  assert(ds_array.S[i] == (WORD)(X_second[i] & RC5_ROT_MASK));
		  //		  ds_array.S[i-1] = (WORD)(X_first[i-1] & RC5_ROT_MASK);
		  //		  assert(ds_array.S[i-1] == (WORD)(X_second[i-1] & RC5_ROT_MASK));
		}
		ds_array.D[i] = (WORD)(X_first[i] ^ X_second[i]);
		if(i < ((2 * nrounds) + 2)) { 
		  assert((ds_array.D[i] & RC5_ROT_MASK) == 0);
		}

#if 0	// DEBUG
		printf("X[%2d] %8X %8X | S %2d D %8X", i, x, xx, ds_array.S[i], ds_array.D[i]);
#endif  // #if 0	// DEBUG
		if(i == 0) {  // plaintext: X[0]
		  ds_array.pc_pair.plaintext_first[left] = x;
		  ds_array.pc_pair.plaintext_second[left] = xx;
		  //		  assert(x == pt_first[left]);
		  //		  assert(xx == pt_second[left]);
#if 0	// DEBUG
		  printf(" left PT");
#endif  // #if 0	// DEBUG
		}
		if(i == 1) { // plaintext: X[1]
		  ds_array.pc_pair.plaintext_first[right] = x;
		  ds_array.pc_pair.plaintext_second[right] = xx;
		  //		  assert(x == pt_first[right]);
		  //		  assert(xx == pt_second[right]);
#if 0	// DEBUG
		  printf(" right PT");
#endif  // #if 0	// DEBUG
		}
		if(i == ((2*nrounds) + 2 - 1)) { // ciphertext: X[(2*nrounds) + 1]
		  ds_array.pc_pair.ciphertext_first[left] = x;
		  ds_array.pc_pair.ciphertext_second[left] = xx;

		  //		  assert(ds_array.S[i - 1] == (ds_array.pc_pair.ciphertext_first[left] & RC5_ROT_MASK));

		//		assert(s == (ciphertext_second[0] & RC5_ROT_MASK));
		  //		  assert(x == ct_first[left]);
		  //		  assert(xx == ct_second[left]);
#if 0	// DEBUG
		  printf(" left CT");
#endif  // #if 0	// DEBUG
		}
		if(i == ((2 * nrounds) + 2)) { // ciphertext: X[(2*nrounds) + 2]
		  ds_array.pc_pair.ciphertext_first[right] = x;
		  ds_array.pc_pair.ciphertext_second[right] = xx;
		  //		  assert(x == ct_first[right]);
		  //		  assert(xx == ct_second[right]);
#if 0	// DEBUG
		  printf(" right CT");
#endif  // #if 0	// DEBUG
		}
		// rot consts for x and xx must be equal for all except the last
		// round i.e. (dx & RC5_ROT_MASK) == 0
		if(i < ((2 * nrounds) + 2)) { 
		  assert((dx & RC5_ROT_MASK) == 0);
		}
#if 0	// DEBUG
		printf("\n");
#endif  // #if 0	// DEBUG
		i++;
	 }

	 std::vector<rc5_goup_diffs_t> ds_array_vec; // dummy vector to make it 2d
	 ds_array_vec.push_back(ds_array);
	 goup_diff_vec_2d->push_back(ds_array_vec);
  }
#if 1
  assert(good_pairs_vec.size() == goup_diff_vec_2d->size());
#endif
}

/* --- */

  /*
   * In this vector store these filtered pairs that are good
   * i.e. appear in the \ref good_pairs_vec vector . Note that the
   * vector \ref good_pairs_vec is generated by using knowledge of the
   * "secret" key. Therefore \p good_among_filtered_pairs_vec is used
   * only for DEBUG purpouses.
   */
  std::vector<pair_t> good_among_filtered_pairs_vec;

/* --- */

void rc5_equal_rot_attack(const WORD S[RC5_STAB_LEN_T],
								  const uint32_t nrounds,
								  const WORD dx[2], 
								  const std::vector<pair_t> cptext_pairs_vec, 
								  const std::vector<pair_t> good_pairs_vec,
								  std::vector<pair_t>* good_among_filtered_pairs_vec,
								  std::vector<std::vector<rc5_goup_diffs_t>>* goup_diff_vec_2d);


/* --- */

#if 1									  // DEBUG
  printf("[%s:%d] RC5_FIXED_KEY %d | Expanded key[%d] = {", __FILE__, __LINE__, RC5_FIXED_KEY, RC5_STAB_LEN_T);
  for(uint32_t j = 0; j < RC5_STAB_LEN_T; j++) {
	 printf("0x%X, ", S[j]);
  }
  printf("};\n\n");
#endif  // #if 1


/* --- */

  /*
   * Keeps all plaintext/ciphertext pairs that pass the filters.
   */ 
#if RC5_DEBUG_HAVE_MEMORY
  std::vector<pair_t> filtered_pairs_vec;
#endif // #if RC5_DEBUG_HAVE_MEMORY

/* --- */
/* 
 * \param good_among_filtered_pairs_vec In this vector store these
 *        filtered pairs that are good i.e. appear in the \ref
 *        good_pairs_vec vector . Note that the vector \ref
 *        good_pairs_vec is generated by using knowledge of the
 *        "secret" key. Therefore \p good_among_filtered_pairs_vec is
 *        used only for DEBUG purpouses.
 */

/* --- */
      /*
       * Count the number of actual good pairs among the filtered (for
       * DEBUG purpouses)
       */
#if RC5_DEBUG_HAVE_MEMORY
		bool b_is_good = rc5_pair_is_good(new_pair, good_pairs_vec);
		if(b_is_good) {			  // store good pair
		  good_among_filtered_pairs_vec->push_back(new_pair);
		}
#endif  // #if RC5_DEBUG_HAVE_MEMORY
	 }


/* --- */
#if RC5_DEBUG_HAVE_MEMORY
		pair_t new_pair;
		for(uint32_t i = 0; i < 2; i++) {
		  new_pair.plaintext_first[i] = plaintext_first[i];
		  new_pair.ciphertext_first[i] = ciphertext_first[i];
		  new_pair.plaintext_second[i] = plaintext_second[i];
		  new_pair.ciphertext_second[i] = ciphertext_second[i];
		}
      /**
       * Store the filtered plaintext/ciphertext pair. It is a
       * candidate good pair. Used for DEBUG only.
       */
		filtered_pairs_vec.push_back(new_pair);
		assert(goup_diff_vec_2d->size() == filtered_pairs_vec.size());
#endif  // #if RC5_DEBUG_HAVE_MEMORY

#if 0														 // DEBUG
		printf("\r[%s:%d] #filtered %5d (2^%f) / %5d (2^%f) | r 2^%f", __FILE__, __LINE__, filtered_pairs_vec.size(), log2(filtered_pairs_vec.size()), cnt_cptext_pairs, log2(cnt_cptext_pairs), log2((double)filtered_pairs_vec.size() / (double)cnt_cptext_pairs));
		fflush(stdout);
#endif  // #if 1


/* --- */

#if 0	 // for DEBUG
  assert(RC5_DEBUG_HAVE_MEMORY == 1);
  std::vector<std::vector<rc5_goup_diffs_t>> good_pairs_ds_vec_2d;
  boost::unordered_map<eq_x_params_t, uint32_t, rc5_eq_x_params_hash, rc5_eq_x_params_equal_to> good_params_hash_map;
  rc5_good_pairs_to_diff_vec_2d(S, nrounds, good_pairs_vec, &good_pairs_ds_vec_2d);
  rc5_last_round_eq_x_params_hash_map(good_pairs_ds_vec_2d, &good_params_hash_map);

  //  rc5_params_hash_map_print(params_hash_map);
  //  rc5_params_hash_map_print(good_params_hash_map);
  uint32_t cnt_good = rc5_params_count_good(good_params_hash_map, params_hash_map);
  printf("[%s:%d] Good filtered %d, All filtered %d, Good total %d\n", __FILE__, __LINE__, cnt_good, (uint32_t)params_hash_map.size(), (uint32_t)good_params_hash_map.size());
#endif // #if 0


/* --- */


#if RC5_FILTER_GOUP_DEBUG
  assert(RC5_DEBUG_HAVE_MEMORY == 1);
  rc5_equal_rot_attack_debug(S, nrounds, dx, cptext_pairs_vec, good_pairs_vec);
#endif // #if RC5_FILTER_GOUP_DEBUG


/* --- */

  /*
   * generate a list of chosen plaintexts selected uniformly at random
   * and experimentally, for a known key, store the resulting good
   * pairs.
   */
#if RC5_DEBUG_HAVE_MEMORY
  rc5_equal_rot_differential(S, nrounds, dx, &cptext_pairs_vec, &good_pairs_vec);
  printf("[%s:%d] Found %d good pairs out of 2^%f by experiment\n", 
			__FILE__, __LINE__, (uint32_t)good_pairs_vec.size(), log2(cptext_pairs_vec.size()));
#if 0 // DEBUG
  rc5_print_pairs(good_pairs_vec);
#endif //#if 0 // DEBUG
#endif // #if RC5_DEBUG_HAVE_MEMORY

#if 0 // GOOD pairs attack
  std::vector<std::vector<rc5_goup_diffs_t>> good_pairs_ds_vec_2d;
  std::vector<eq_x_params_t> R[WORD_SIZE];

  rc5_good_pairs_to_diff_vec_2d(S, nrounds, good_pairs_vec, &good_pairs_ds_vec_2d);
  rc5_last_round_eq_x_params_hash_map(good_pairs_ds_vec_2d, &params_hash_map);
  rc5_pairs_classify_by_last_round_rot_const(params_hash_map, R);
  //  rc5_goup_diff_vec_2d_debug(S, nrounds, good_pairs_vec, good_pairs_ds_vec_2d);
  //  rc5_goup_diff_vec_2d_print(good_pairs_ds_vec_2d);
  //  printf("[%s:%d] Hash map size BEFORE: %d (2^%f)\n", __FILE__, __LINE__, 
  //			(uint32_t)params_hash_map.size(), log2(params_hash_map.size()));
  //  rc5_last_round_eq_x_keyrec(S, nrounds, &params_hash_map, good_pairs_vec);
  //  printf("[%s:%d] Hash map size AFTER: %d (2^%f)\n", __FILE__, __LINE__, 
  //			(uint32_t)params_hash_map.size(), log2(params_hash_map.size()));
  rc5_last_round_rot_const_keyrec(S, nrounds, R);
#endif // GOOD pairs attack


/* --- */

void rc5_equal_rot_attack(const WORD S[RC5_STAB_LEN_T],
								  const uint32_t nrounds,
								  const WORD dx[2], 
								  const std::vector<pair_t> cptext_pairs_vec, 
								  const std::vector<pair_t> good_pairs_vec,
								  std::vector<pair_t>* good_among_filtered_pairs_vec,
								  std::vector<std::vector<rc5_goup_diffs_t>>* goup_diff_vec_2d)
{
  printf("[%s:%d] Enter %s()\n", __FILE__, __LINE__, __FUNCTION__);

#if !RC5_DEBUG_HAVE_MEMORY // if low memory
  assert(cptext_pairs_vec.size() == 0);
  assert(cptext_pairs_vec.size() == 0);
#endif // #if RC5_DEBUG_HAVE_MEMORY

  uint64_t cnt = 0;
  uint64_t ret_cnt = 0;
  uint32_t fib_array_len = RC5_FIB_LEN;//= RC5_GOUP_LEVEL + 1;
  /**
   * Initialize the Fibonacci array.
   */
  std::vector<uint32_t> fib_array;
  for(uint32_t i = 0; i < fib_array_len; i++) {
	 fib_array.push_back(FIB[i]);
  }

  WORD plaintext_first[2] = {0, 0};
  WORD plaintext_second[2] = {0, 0};

  WORD ciphertext_first[2] = {0, 0};
  WORD ciphertext_second[2] = {0, 0};

  uint32_t cnt_cptext_pairs = 0;

#if 1									  // DEBUG
  printf("[%s:%d] RC5_FIB_LEN %d fib_array.size() = %d\n", __FILE__, __LINE__, RC5_FIB_LEN, (uint32_t)fib_array.size());
  assert(fib_array.size() == RC5_FIB_LEN);
  printf("[%s:%d] fib_array = ", __FILE__, __LINE__);
  for(uint32_t i = 0; i < RC5_FIB_LEN; i++) {
	 printf("[%d] %d ", i, fib_array[i]);
  }
  printf("\n");
#endif								  // #if 1

#if RC5_FILTER_GOUP_DEBUG
  uint32_t cnt_vec_2d = 0;
#endif // #if RC5_FILTER_GOUP_DEBUG

  /**
   * Keeps all plaintext/ciphertext pairs that pass the filters.
   */ 
#if RC5_DEBUG_HAVE_MEMORY
  std::vector<pair_t> filtered_pairs_vec;
#endif // #if RC5_DEBUG_HAVE_MEMORY
  /**
   * Keeps the full trail from the goUP filter that corresponds to a
   * filtered pair. This is one element of the \ref goup_diff_vec_2d
   * array.
   */
  std::vector<rc5_goup_diffs_t> goup_diff_vec;

  // Pool of chosen plaintexts
#if RC5_DEBUG_HAVE_MEMORY // have memory : plaintexts are pre-stored
  uint32_t j = 0;
  std::vector<pair_t>::const_iterator cptext_pairs_iter = cptext_pairs_vec.begin();
  for(cptext_pairs_iter = cptext_pairs_vec.begin(); cptext_pairs_iter != cptext_pairs_vec.end(); cptext_pairs_iter++) {
	 j++;
	 // extract the chosen plaintext (CP) pair
	 pair_t cp_pair = *cptext_pairs_iter;
#else // low memory : chosen plaintexts are generated on the fly
  for(uint32_t j = 0; j < RC5_NTEXTS; j++) {
    pair_t cp_pair;
    cp_pair.plaintext_first[0] = random32() & MASK;
    cp_pair.plaintext_first[1] = random32() & MASK;
    cp_pair.plaintext_second[0] = (cp_pair.plaintext_first[0] ^ dx[0]) & MASK;
	 cp_pair.plaintext_second[1] = (cp_pair.plaintext_first[1] ^ dx[1]) & MASK;
#endif // #if RC5_DEBUG_HAVE_MEMORY

	 goup_diff_vec.clear();		  // init the goup vector

	 cnt_cptext_pairs++;

	 for(uint32_t i = 0; i < 2; i++) { // left pt = 0, right pt = 1
		plaintext_first[i] = cp_pair.plaintext_first[i];
		plaintext_second[i] = cp_pair.plaintext_second[i];
	 }

	 // encrypt pairs of texts
#if RC5_XOR  // XOR-linear
	 rc5_xor_encrypt(nrounds, S, plaintext_first, ciphertext_first);  
	 rc5_xor_encrypt(nrounds, S, plaintext_second, ciphertext_second);  
#else	 // original
	 rc5_encrypt(nrounds, S, plaintext_first, ciphertext_first);  
	 rc5_encrypt(nrounds, S, plaintext_second, ciphertext_second);  
#endif  // #if RC5_XOR

	 // fill the ciphertexts into the pair
	 for(uint32_t i = 0; i < 2; i++) { // left ct = 0, right ct = 1
		cp_pair.ciphertext_first[i] = ciphertext_first[i];
		cp_pair.ciphertext_second[i] = ciphertext_second[i];
	 }

	 WORD L[2] = {ciphertext_first[0], ciphertext_second[0]}; 
	 WORD R[2] = {ciphertext_first[1], ciphertext_second[1]};

	 bool b_is_good_pair_f1 = rc5_filter_last_round(L, R);
	 bool b_is_good_pair_f2_ext = true;

	 uint32_t ret_ext = 0;
	 if(b_is_good_pair_f1) {
#if 0 // linear (extended)
		ret_ext = rc5_filter_go_up_ext(cp_pair, fib_array, &goup_diff_vec);
#else // non-linear
		printf("\r[%s:%d] Filter pair # (%d / %lld) goup_diff_vec_2d->size() %d", 
				 __FILE__, __LINE__, j, RC5_NTEXTS, (uint32_t)goup_diff_vec_2d->size());
		fflush(stdout);
		ret_ext = rc5_filter_go_up_nl(cp_pair, fib_array, &goup_diff_vec);
#endif
		ret_cnt += ret_ext;
	 }
	 b_is_good_pair_f2_ext = (ret_ext != 0); 

	 if(b_is_good_pair_f1 && b_is_good_pair_f2_ext) {

#if RC5_FILTER_GOUP_DEBUG
		assert(RC5_DEBUG_HAVE_MEMORY == 1);
		assert(goup_diff_vec.size() == g_goup_diff_vec_2d[cnt_vec_2d].size());
		bool b_are_equal = rc5_goup_diff_vecs_are_equal(goup_diff_vec, g_goup_diff_vec_2d[cnt_vec_2d]);
		assert(b_are_equal);
		cnt_vec_2d++;
#endif  // #if RC5_FILTER_GOUP_DEBUG

		cnt++;

      /**
       * Store the equal-rot trail corresponding to the filtered pair
       * (i.e. to the candidate good pair)
       */
		goup_diff_vec_2d->push_back(goup_diff_vec);

#if RC5_DEBUG_HAVE_MEMORY
		pair_t new_pair;
		for(uint32_t i = 0; i < 2; i++) {
		  new_pair.plaintext_first[i] = plaintext_first[i];
		  new_pair.ciphertext_first[i] = ciphertext_first[i];
		  new_pair.plaintext_second[i] = plaintext_second[i];
		  new_pair.ciphertext_second[i] = ciphertext_second[i];
		}
      /**
       * Store the filtered plaintext/ciphertext pair. It is a
       * candidate good pair. Used for DEBUG only.
       */
		filtered_pairs_vec.push_back(new_pair);
		assert(goup_diff_vec_2d->size() == filtered_pairs_vec.size());
#endif  // #if RC5_DEBUG_HAVE_MEMORY

#if 0														 // DEBUG
		printf("\r[%s:%d] #filtered %5d (2^%f) / %5d (2^%f) | r 2^%f", __FILE__, __LINE__, filtered_pairs_vec.size(), log2(filtered_pairs_vec.size()), cnt_cptext_pairs, log2(cnt_cptext_pairs), log2((double)filtered_pairs_vec.size() / (double)cnt_cptext_pairs));
		fflush(stdout);
#endif  // #if 1

      /**
       * Count the number of actual good pairs among the filtered (for
       * DEBUG purpouses)
       */
#if RC5_DEBUG_HAVE_MEMORY
		bool b_is_good = rc5_pair_is_good(new_pair, good_pairs_vec);
		if(b_is_good) {			  // store good pair
		  good_among_filtered_pairs_vec->push_back(new_pair);
		}
#endif  // #if RC5_DEBUG_HAVE_MEMORY
	 }

  } // for() CP texts pool

#if 0									  // DEBUG
  rc5_print_pairs(filtered_pairs_vec);
#endif

#if 1									  // DEBUG
  printf("[%s:%d] RC5_FIXED_KEY %d | Expanded key[%d] = {", __FILE__, __LINE__, RC5_FIXED_KEY, RC5_STAB_LEN_T);
  for(uint32_t j = 0; j < RC5_STAB_LEN_T; j++) {
	 printf("0x%X, ", S[j]);
  }
  printf("};\n\n");
#endif  // #if 1

  printf("[%s:%s():%d] Exit statistics:\n", __FILE__, __FUNCTION__, __LINE__);
  printf("#Rounds %d\n", nrounds);
  printf("WORD_SIZE %d\n", WORD_SIZE);
  printf("RC5_NTEXTS 2^%4.2f\n", log2((double)RC5_NTEXTS));
#if RC5_DEBUG_HAVE_MEMORY
  printf("#CP: %d (2^%f)\n", (uint32_t)cptext_pairs_vec.size(), log2(cptext_pairs_vec.size()));
#endif  // #if RC5_DEBUG_HAVE_MEMORY
  printf("RC5_FIXED_KEY %d\n", RC5_FIXED_KEY);
  printf("RC5_FILTER_LAST_ROUND %d\n", RC5_FILTER_LAST_ROUND);
  printf("RC5_FILTER_ONETOLAST_ROUND %d\n", RC5_FILTER_ONETOLAST_ROUND);
  printf("RC5_FILTER_GOUP %d\n", RC5_FILTER_GOUP);
  printf("RC5_FILTER_GOUP_DEBUG %d\n", RC5_FILTER_GOUP_DEBUG);
  printf("RC5_DEBUG_HAVE_MEMORY %d\n", RC5_DEBUG_HAVE_MEMORY);
#if RC5_DEBUG_HAVE_MEMORY
  printf("#Filtered pairs: %d (2^%f)\n", (uint32_t)filtered_pairs_vec.size(), log2(filtered_pairs_vec.size()));
  printf("#Good pairs among filtered: %d\n", (uint32_t)good_among_filtered_pairs_vec->size());
  assert(goup_diff_vec_2d->size() == filtered_pairs_vec.size());
  printf("#Good pairs total: %d\n", (uint32_t)good_pairs_vec.size());
#else
  assert(good_among_filtered_pairs_vec->size() == 0);
#endif  // #if RC5_DEBUG_HAVE_MEMORY
  printf("#GoUP sets of trails: %d (2^%f)\n", (uint32_t)goup_diff_vec_2d->size(), log2(goup_diff_vec_2d->size()));
}


/* --- */

uint32_t rc5_filter_go_up_nl_i(const uint32_t depth, uint32_t* count, 
										  const std::vector<uint32_t> fib_array, const rc5_goup_diffs_t* ds_array,
										  std::vector<rc5_goup_diffs_t>* goup_diff_vec)
{
  uint32_t flag = 0;
  uint32_t s;
#if 1									  // DEBUG
  assert(ds_array->D.size() == (RC5_FIB_LEN  + 1));
  assert(ds_array->D.size() == ds_array->S.size());
  assert(ds_array->D.size() == ds_array->len);
#endif
#if 1
  if(hw32(ds_array->D[depth]) > fib_array[depth]) {
	 return 0;						  // cut the search tree
  }
#endif
  if(depth != 0) {
  //  if(depth != 4) {						 // !!!
	 for(s = 0; s < WORD_SIZE; s++) {	  // Guess S[6] over all possibilities 0..31

		uint32_t dy = ds_array->D[depth + 1]; // D[7]
		uint32_t dk = 0;
		uint32_t dx_max = 0;
		max_xdp_add_lm(dy, dk, &dx_max);

		//		if((RC5_ROTR(ds_array->D[depth + 1], s) & RC5_ROT_MASK) == 0) { // if (D[7] >>> S[6]) = 0
		if((RC5_ROTR(dx_max, s) & RC5_ROT_MASK) == 0) { // if (D[7] >>> S[6]) = 0

		  rc5_goup_diffs_t ds_array_new = *ds_array;
		  assert(ds_array_new.len == ds_array->len);

		  //		  ds_array_new.D[depth - 1] = 
		  //			 RC5_ROTR(ds_array_new.D[depth + 1], s) ^ ds_array_new.D[depth]; // D[5] = (D[7] >>> S[6]) ^ D[6]
		  ds_array_new.D[depth - 1] = 
			 RC5_ROTR(dx_max, s) ^ ds_array_new.D[depth]; // D[5] = (D[7] >>> S[6]) ^ D[6]		  
		  ds_array_new.S[depth] = s; // S[6]
		  if(rc5_filter_go_up_nl_i(depth - 1, count, fib_array, &ds_array_new, goup_diff_vec)) {  // recursive call for correct count of variants
			 flag = 1;
		  }
		}
	 }

  } else {							  // reached the top
	 (*count)++;					  // accumulate num. of variants
	 goup_diff_vec->push_back(*ds_array);
	 return 1;
  }
  return flag;
}
/* --- */

#if 0	 // cheating for DEBUG
  goup_diff_vec_2d.clear();
  rc5_good_pairs_to_diff_vec_2d(S, nrounds, good_among_filtered_pairs_vec, &goup_diff_vec_2d);
  //  rc5_good_pairs_to_diff_vec_2d(S, nrounds, good_pairs_vec, &goup_diff_vec_2d);
  printf("[%s:%d] #GoUP sets of trails: %d (2^%f)\n", __FILE__, __LINE__, 
			(uint32_t)goup_diff_vec_2d.size(), log2(goup_diff_vec_2d.size()));
#endif // #if 0

/* --- */

/* 
X[ 0] 4F03FB89 CF03FB89 80000000 left PT
X[ 1] F2A08F88 72A08F88 80000000 right PT
X[ 2] 8FB44E6A  FB44E6A 80000000
X[ 3] 2952B9F4 2952B9F4        0
X[ 4] 6D2CCFA0 6D24CFA0    80000
X[ 5] 16AEDAB1 16A6DAB1    80000
X[ 6] 4C20BD46 4C20BD46        0
X[ 7] EDB9AABD EBB9AABD  6000000
X[ 8] A5FC78B8 A6BC78B8  3400000
X[ 9] 4AF712F7 4AFBD2F7    CC000
X[10] 2657B902 26565722    1EE20
X[11] 88456C61 8878D3E1   3DBF80
X[12] 4243B8E0 427B17A0   38AF40
X[13] F921CBBD F91EBB7D   3F70C0 left CT
X[14] A7F40165 A7F46895     69F0 right CT
 */

/* ---- */

#if 0

  rc5_key_t key = {0x7, 13};

  printf("[%s:%d] Search for key (%X %lld)\n", __FILE__, __LINE__, key.value, key.counter);

  //  bool b_found = std::binary_search(key_vec.begin(), key_vec.end(), rc5_struct_key_compare_by_value);
  std::vector<rc5_key_t>::iterator vec_iter = 
	 std::lower_bound(key_vec.begin(), key_vec.end(), key, rc5_struct_key_compare_by_value);
  bool b_found = (vec_iter->value == key.value);
  //  bool b_found = (vec_iter != key_vec.end());

  if(b_found) {
	 printf("[%s:%d] Found!\n", __FILE__, __LINE__);
  } else {
	 printf("[%s:%d] Not found.\n", __FILE__, __LINE__);
	 printf("[%s:%d] Insert (%8X, %lld)\n", __FILE__, __LINE__, key.value, key.counter);
	 key_vec.push_back(key);
	 rc5_key_set_print(key_vec);
  }
#endif


/* --- */
#if 0
	 std::set<rc5_key_t, rc5_compare_key_by_value>::iterator set_iter = 
		key_set.lower_bound(key);

	 bool b_found = ((set_iter->value == key.value) && (set_iter != key_set.end()));
	 if(b_found) {
		assert(set_iter != key_set.end());
		assert(set_iter->value == key.value);
		printf("[%s:%d] Found!\n", __FILE__, __LINE__);
		printf("[%s:%d] Update counter (%8X, %lld)\n", __FILE__, __LINE__, set_iter->value, set_iter->counter);
		rc5_key_t new_key = *set_iter;
		printf("[%s:%d] BEFORE Counter (%8X, %lld)\n", __FILE__, __LINE__, new_key.value, new_key.counter);
		new_key.counter++;
		key_set.erase(key);
		key_set.insert(new_key);
		printf("[%s:%d]  AFTER Counter (%8X, %lld)\n", __FILE__, __LINE__, new_key.value, new_key.counter);
	 } else {
		printf("[%s:%d] Not found.\n", __FILE__, __LINE__);
		printf("[%s:%d] Insert (%8X, %lld)\n", __FILE__, __LINE__, key.value, key.counter);
		key_set.insert(key);
	 }
	 //	 rc5_key_set_print(key_vec);
#else


/* --- */

bool rc5_struct_key_compare_by_value(rc5_key_t first, rc5_key_t second)
{
  bool b_equal = (first.value > second.value);
  return b_equal;
  //  bool b_equal = true;
#if 0
  if(first.value > second.value) {
#if 1 // DEBUG
	 printf("[%s:%d] %X %X FALSE\n", __FILE__, __LINE__, first.value, second.value);
#endif
	 return false;
  }
  if(first.value < second.value) {
#if 1 // DEBUG
	 printf("[%s:%d] %X %X FALSE\n", __FILE__, __LINE__, first.value, second.value);
#endif
	 return false;
  }
#if 1 // DEBUG
  printf("[%s:%d] %X %X TRUE\n", __FILE__, __LINE__, first.value, second.value);
#endif
  return true;
#endif
}

/* --- */

struct rc5_struct_key_compare_by_value
  : std::binary_function<rc5_key_t, rc5_key_t, bool>
{
  inline bool operator()(rc5_key_t const& first, rc5_key_t const& second) const
  {
	 bool b_more = (first.value > second.value);
	 return b_more;
  }
};



/* --- */

struct rc5_struct_key_compare_by_value
  : std::binary_function<rc5_key_t, rc5_key_t, bool>
{
  bool operator()(rc5_key_t const& a, rc5_key_t const& b) const
  {
	 bool b_equal = (a.value == b.value);
	 return b_equal;
  }
};


/* --- */

struct rc5_struct_key_compare_by_counter
  : std::binary_function<rc5_key_t, rc5_key_t, bool>
{
  bool operator()(rc5_key_t const& first, rc5_key_t const& second) const
  {
	 bool b_more = (first.counter > second.counter);
	 return b_more;
  }
};



/* --- */

/**
 * Structure for storing sugegsted key candidates.
 * \param value the value of the key
 * \param counter how many times was suggested
 */
struct rc5_key_t
{
  WORD value; /**< value of the key */
  uint64_t counter; /**< how many times was the key suggested */
};

/**
 * Compare two keys by their counters. Bigger counter values are
 * listed first.
 */
struct rc5_compare_struct_key : public std::binary_function<rc5_key_t, rc5_key_t, bool>
{
  inline bool operator()(rc5_key_t first, rc5_key_t second)
  {
	 bool b_more = (first.counter > second.counter);
	 return b_more;
  }
};

void rc5_key_set_print(const std::set<rc5_key_t, rc5_compare_struct_key> key_set)
{
  printf("[%s:%d] Key set size %d (2^%f)\n", __FILE__, __LINE__, (uint32_t)key_set.size(), log2(key_set.size()));
  uint64_t i = 0;
  std::set<rc5_key_t, rc5_compare_struct_key>::const_iterator set_iter;
  for(set_iter = key_set.begin(); set_iter != key_set.end(); set_iter++) {
	 i++;
	 rc5_key_t key = *set_iter;
	 printf("[%5lld] %8X %lld (2^%f)\n", i, key.value, key.counter, log2(key.counter));
  }
}


/* --- */

/**
 * Compare two keys by their counters. Bigger counter values are
 * listed first.
 */
struct rc5_struct_key_equal_to
  : std::binary_function<rc5_key_t, rc5_key_t, bool>
{
  bool operator()(rc5_key_t const& a, rc5_key_t const& b) const
  {

	 bool b_equal = (a.value == b.value);
	 return b_equal;
  }
};

struct rc5_compare_struct_key : public std::binary_function<rc5_key_t, rc5_key_t, bool>
{
  inline bool operator()(rc5_key_t first, rc5_key_t second)
  {
	 bool b_equal = (first.value == second.value);
	 return b_equal;
  }
};



/* --- */

#if 0//(WORD_SIZE == 16)
  if((i == 0) || (i == 4) || (i == 8) || (i == 12)) {
	 if(i_R.size() == 0) {
		printf("[%s:%d] No pair with rot const = %d . Returning...\n", __FILE__, __LINE__, i);
		return;
	 }
  }
#endif  // #if (WORD_SIZE == 16)
#if 0//(WORD_SIZE == 32)
  if((i == 0) || (i == 5) || (i == 10) || (i == 15) || (i == 20) || (i == 25) || (i == 27)) {
	 if(i_R.size() == 0) {
		printf("[%s:%d] No pair with rot const = %d . Returning...\n", __FILE__, __LINE__, i);
		return;
	 }
  }
#endif  // #if (WORD_SIZE == 32)

/* --- */
	 uint32_t j = new_i;

#if 1
	 printf("[%s:%d] Recovered key[%2d : %2d]\n", __FILE__, __LINE__, i, new_i - 1);
	 printf("[%s:%d] Next j %2d new_i_rot_mask %X\n", __FILE__, __LINE__, j, new_i_rot_mask);
#endif

	 assert(new_i <= WORD_SIZE);
	 assert(i > 0);
	 assert(new_i > 0);
	 assert(j <= WORD_SIZE);

	 /**
	  * Compute the next index s.t. R[j].size() != 0
	  */
	 //	while((R[j].size() == 0) && (j != (new_i - 1))) {
	while((R[j].size() == 0) && (j > i)) {
#if 1
	  printf("[%s:%d] R[%2d].size() = %d. Trying j - 1 = %d ...\n", __FILE__, __LINE__, j, R[j].size(), j-1);
#endif
		j--;
#if 1
		printf("[%s:%d] j %d R[%2d].size() = %d i %d\n", __FILE__, __LINE__, j, j, R[j].size(), i);
#endif
	 }
	 bool b_next = (j > i);
	 if(b_next) {
		assert((R[j].size() != 0));
		b_next = true;
		new_i = j;
		assert(new_i <= WORD_SIZE);
		new_i_rot_mask = RC5_ROT_MASK;//RC5_ROT_MASK >> (new_i - j);
		if((new_i < WORD_SIZE) && (WORD_SIZE - new_i) < hw32(RC5_ROT_MASK)) { // if less than log2(w) remain then shorten the mask 
		  new_i_rot_mask = 0xffffffff >> (32 - (WORD_SIZE - new_i));
#if 1
		  printf("[%s:%d] new_i %d | %d < %d\n", __FILE__, __LINE__, new_i, (WORD_SIZE - new_i), hw32(RC5_ROT_MASK));
		  printf("[%s:%d] Shorten mask %X\n", __FILE__, __LINE__, new_i_rot_mask);
#endif
		}
		printf("[%s:%d] Found new_i %2d new_i_rot_mask %X\n", __FILE__, __LINE__, new_i, new_i_rot_mask);
	 }

/* --- */

  if(i_R.size() == 0) {
  //  if(0) {
	 printf("[%s:%d] WARNING! No pair with rot const = %d .\n", __FILE__, __LINE__, i);
	 if((i > 0) && (i_rot_mask != 1) && (R[i-1].size() != 0)) {
		printf("[%s:%d] Trying out rot const i = %d i_rot_mask %X\n", __FILE__, __LINE__, i-1, (i_rot_mask >> 1));
		rc5_last_round_rot_const_keyrec_i(i-1, (i_rot_mask >> 1), key, R, key_cand_vec);
	 } else {
		printf("[%s:%d] Returning \n", __FILE__, __LINE__);
		return;
	 }
  }


/* ---- */


// BUGGED!!!!! DONT USE!!!!!!
void xxxx_rc5_last_round_rot_const_keyrec_i(const uint32_t i, // rot const
													const uint32_t key, // k[i - 1 : 0]
													const std::vector<eq_x_params_t> R[WORD_SIZE],
													std::vector<WORD>* key_cand_vec)
{
#error 								  // BUGGED!!!
  //  printf("[%s:%d] Enter %s() i = %2d\n", __FILE__, __LINE__, __FUNCTION__, i);
  if(i >= WORD_SIZE) {
	 //	 printf("[%s:%d] Key candidate %8X\n", __FILE__, __LINE__, key);
	 key_cand_vec->push_back(key);
	 return;
  }

#error 								  // BUGGED!!!
  const std::vector<eq_x_params_t> i_R = R[i];
  assert(i_R.size() != 0);
  if(i_R.size() == 0) {
	 printf("[%s:%d] WARNING! No pair with rot const = %d . Returning...\n", __FILE__, __LINE__, i);
	 return;
  }

#error 								  // BUGGED!!!
  std::vector<eq_x_params_t>::const_iterator i_iter = i_R.begin();

#error 								  // BUGGED!!!
  /**
   * Cycle through all entries that have rotation constant i 
   */
#error 								  // BUGGED!!!
  for(i_iter = i_R.begin(); i_iter != i_R.end(); i_iter++) {
	 eq_x_params_t i_params = *i_iter;
	 WORD dx = i_params.dx;
	 WORD y = i_params.y;		  // right ciphertext-1
	 WORD yy = i_params.yy;		  // right cipheretxt-2 (DEBUG)
	 WORD r = i_params.rot_const; // rot const from left ciphertext  (= r7)
	 WORD r_prev = i_params.rot_const_prev; // rot const from previous round (= r6)
	 assert(i == r);
	 assert(r <= (WORD_SIZE - RC5_LOG2W));
	 assert(r <= RC5_ROT_MASK);
	 assert(r_prev <= RC5_ROT_MASK);
	 /**
	  * L_{n-1}[log2(w) - 1 : 0] ^ L_{n}[log2(w) - 1 : 0]
	  */
	 WORD x = (r ^ r_prev) & RC5_ROT_MASK;		  // log2w bits
	 assert(x <= RC5_ROT_MASK);

	 /**
	  * Masks the i LS bits.
	  */
	 uint32_t i_mask = (0xffffffff >> (32 - i));
	 assert(key <= i_mask);

	 WORD c_s = 0;  // carry
	 WORD cc_s = 0; // carry

	 if(i > 0) {
		/**
		 * if S_n[s-1 : 0] > R_n[s-1 : 0] then carry[s] = 1
		 */
		int32_t y_sub_k = (y & i_mask) - (key & i_mask);
		if(y_sub_k < 0) {
		  c_s = 1;
		}
		int32_t yy_sub_k = (yy & i_mask) - (key & i_mask);
		if(yy_sub_k < 0) {
		  cc_s = 1;
		}
#if 0									  // DEBUG
		printf("[%s:%d] %d - %d = %d %d\n", 
				 __FILE__, __LINE__, (y & i_mask), (key & i_mask), ((y & i_mask) - (key & i_mask)), y_sub_k);
		printf("[%s:%d] %d - %d = %d %d\n", 
				 __FILE__, __LINE__, (yy & i_mask), (key & i_mask), ((yy & i_mask) - (key & i_mask)), yy_sub_k);
#endif
	 }
#error 								  // BUGGED!!!
	 /**
	  * Compute the next log2(w) bits of S_n i.e. bits S_n[s+log2(w)-1 : s] :
	  *
	  * S_n[log2(w) + s - 1 + i : s] = 
	  * (R_n[s + log2(w) - 1  : s] - (L_{n-1}[log2(w) - 1 : 0] ^ L_{n}[log2(w) - 1 : 0]) + c_s) mod 2^{log2(w)} 
	  */
#error 								  // BUGGED!!!
	 int32_t k = (((y >> i) & RC5_ROT_MASK) - (x & RC5_ROT_MASK) + c_s);
	 int32_t kk = (((yy >> i) & RC5_ROT_MASK) - (((dx >> r) ^ x) & RC5_ROT_MASK) + cc_s);
	 if(k < 0) {
		k += RC5_ROT_MASK;
		assert(k >= 0);
	 }
	 if(kk < 0) {
		kk += RC5_ROT_MASK;
		assert(kk >= 0);
	 }
	 assert((x & RC5_ROT_MASK) == (((dx >> r) ^ x) & RC5_ROT_MASK));

	 WORD key_next_log2w_bits = k % (1 + RC5_ROT_MASK);
	 WORD kkey_next_log2w_bits = kk % (1 + RC5_ROT_MASK);

	 //	 WORD key_next_log2w_bits = (((y >> i) & RC5_ROT_MASK) - (x & RC5_ROT_MASK) + c_s) & RC5_ROT_MASK;
	 //	 WORD kkey_next_log2w_bits = (((yy >> i) & RC5_ROT_MASK) - (((dx >> r) ^ x) & RC5_ROT_MASK) + cc_s) & RC5_ROT_MASK;
	 //	 WORD kkey_next_log2w_bits = (((yy >> i) & RC5_ROT_MASK) - (x & RC5_ROT_MASK) + cc_s) & RC5_ROT_MASK;

	 if(key_next_log2w_bits != kkey_next_log2w_bits) { // no solution
		printf("[%s:%d] c_s %2d cc_s %2d x %d y %d yy %d\n", __FILE__, __LINE__, c_s, cc_s, x, (y >> i) & RC5_ROT_MASK, (yy >> i) & RC5_ROT_MASK);
		printf("[%s:%d] Before k %d %X %d %X\n", __FILE__, __LINE__, k, k, kk, kk);
		printf("[%s:%d] After k %d %X %d %X\n", __FILE__, __LINE__, key_next_log2w_bits, key_next_log2w_bits, kkey_next_log2w_bits, kkey_next_log2w_bits);
		printf("[%s:%d]  k %8X != kk %8X. Returning...\n", __FILE__, __LINE__, key_next_log2w_bits, kkey_next_log2w_bits);
		return;
	 }
	 assert(key_next_log2w_bits == kkey_next_log2w_bits);

	 WORD key_new =  ((key_next_log2w_bits << i) | (key & i_mask));
	 WORD kkey_new =  ((kkey_next_log2w_bits << i) | (key & i_mask));
	 assert(key_new == kkey_new);

	 assert((key & i_mask) == key);
	 assert(i <= (WORD_SIZE - RC5_LOG2W));

	 uint32_t new_i = i + RC5_LOG2W;

#if(WORD_SIZE == 32) 			  // reset i pos as log2w is not multiple of 32
	 if(new_i == 30) {
		new_i = 27;
	 }
#endif  // #if(WORD_SIZE == 32) 

#error 								  // BUGGED!!!
	 xxxx_rc5_last_round_rot_const_keyrec_i(new_i, key_new, R, key_cand_vec);

	 assert(i <= (WORD_SIZE - RC5_LOG2W));
  }
}

/* --- */

		//		if((key & i_mask) > (y & i_mask)) {
		//		if(((y & i_mask) - (key & i_mask)) < 0) {
		//		if((key & i_mask) > (yy & i_mask)) {
		//		if(((yy & i_mask) - (key & i_mask)) < 0) {

/* --- */

/**
 * Compute the intermediate values from the encryption of the good
 * pairs.
 *
 * \param nrounds number of rounds.
 * \param S the expanded key.
 * \param good_pairs_vec vector of good pairs determined
 *                       experimentally by using the "secret key" (for DEBUG purpouses)\
 * \param values_first_pair intermediate values for the all first good pairs
 * \param values_second_pair intermediate values for the all second good pairs
 *
 * \see rc5_good_pairs_get_intermediate_values
 */
void rc5_good_pairs_to_diff_vec_2d(const WORD S[RC5_STAB_LEN_T], const uint32_t nrounds,
											  const std::vector<pair_t> good_pairs_vec,
											  std::vector<std::vector<uint32_t>>* values_first_pair,
											  std::vector<std::vector<uint32_t>>* values_second_pair)
{
  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;
  uint32_t cnt_good = 0;
  std::vector<pair_t>::const_iterator pairs_iter = good_pairs_vec.begin();
  for(pairs_iter = good_pairs_vec.begin(); pairs_iter != good_pairs_vec.end(); pairs_iter++) {

	 WORD pt_first[2] = {0, 0};
	 WORD ct_first[2] = {0, 0};
	 WORD pt_second[2] = {0, 0};
	 WORD ct_second[2] = {0, 0};
	 std::vector<uint32_t> X_first;		  // intermediate values from encryption
	 std::vector<uint32_t> X_second;

	 X_first.clear();
	 X_second.clear();

	 cnt_good++;
#if 0	// DEBUG
	 printf("[%s:%d] good pair# %d\n", __FILE__, __LINE__, cnt_good);
#endif  // #if 0	// DEBUG
	 pair_t pair = *pairs_iter;

	 pt_first[left] = pair.plaintext_first[left];
	 pt_first[right] = pair.plaintext_first[right];
	 ct_first[left] = 0;
	 ct_first[right] = 0;
	 rc5_encrypt_get_intermediate_values(S, nrounds, pt_first, ct_first, &X_first);
#if 0	// DEBUG
	 printf("[%s:%d] X_first: nrounds %d PT %8X %8X CT %8X %8X\n", __FILE__, __LINE__, 
			  nrounds, pt_first[left], pt_first[right], ct_first[left], ct_first[right]);
#endif  // #if 0	// DEBUG

	 pt_second[left] = pair.plaintext_second[left];
	 pt_second[right] = pair.plaintext_second[right];
	 ct_second[left] = 0;
	 ct_second[right] = 0;
	 rc5_encrypt_get_intermediate_values(S, nrounds, pt_second, ct_second, &X_second);
#if 0	// DEBUG
	 printf("[%s:%d] X_second: nrounds %d PT %8X %8X CT %8X %8X\n", __FILE__, __LINE__, 
			  nrounds, pt_second[left], pt_second[right], ct_second[left], ct_second[right]);
#endif  // #if 0	// DEBUG

	 assert(X_first.size() == X_second.size());

	 uint32_t i = 0;
	 std::vector<uint32_t>::iterator X_first_iter = X_first.begin();
	 std::vector<uint32_t>::iterator X_second_iter = X_second.begin();
	 for(X_first_iter = X_first.begin(); X_first_iter != X_first.end(); X_first_iter++, X_second_iter++) {
#if 1
		uint32_t x = *X_first_iter;
		uint32_t xx = *X_second_iter;
		uint32_t dx = (x ^ xx);
#if 0	// DEBUG
		printf("X[%2d] %8X %8X %8X", i, x, xx, dx);
#endif  // #if 0	// DEBUG
		if(i == 0) {  // plaintext: X[0]
		  assert(x == pt_first[left]);
		  assert(xx == pt_second[left]);
#if 0	// DEBUG
		  printf(" left PT");
#endif  // #if 0	// DEBUG
		}
		if(i == 1) { // plaintext: X[1]
		  assert(x == pt_first[right]);
		  assert(xx == pt_second[right]);
#if 0	// DEBUG
		  printf(" right PT");
#endif  // #if 0	// DEBUG
		}
		if(i == ((2*nrounds) + 2 - 1)) { // ciphertext: X[(2*nrounds) + 1]
		  assert(x == ct_first[left]);
		  assert(xx == ct_second[left]);
#if 0	// DEBUG
		  printf(" left CT");
#endif  // #if 0	// DEBUG
		}
		if(i == ((2 * nrounds) + 2)) { // ciphertext: X[(2*nrounds) + 2]
		  assert(x == ct_first[right]);
		  assert(xx == ct_second[right]);
#if 0	// DEBUG
		  printf(" right CT");
#endif  // #if 0	// DEBUG
		}
		// rot consts for x and xx must be equal for all except the last
		// round i.e. (dx & RC5_ROT_MASK) == 0
		if(i < ((2 * nrounds) + 2)) { 
		  assert((dx & RC5_ROT_MASK) == 0);
		}
#if 0	// DEBUG
		printf("\n");
#endif  // #if 0	// DEBUG
		i++;
#endif  // #if 0
	 }
	 // store values
	 values_first_pair->push_back(X_first);
	 values_second_pair->push_back(X_second);
  }
}

/* --- */

void rc5_good_pairs_to_diff_vec_2d(const WORD S[RC5_STAB_LEN_T],
											  const uint32_t nrounds,
											  const WORD dx[2], 
											  const std::vector<pair_t> good_pairs_vec,
											  std::vector<std::vector<rc5_goup_diffs_t>>* goup_diff_vec_2d)
{
  printf("[%s:%s():%d]\n", __FILE__, __FUNCTION__, __LINE__);

  // intermediate values for the good pairs
  std::vector<std::vector<uint32_t>> values_first_pair;
  std::vector<std::vector<uint32_t>> values_second_pair;

  rc5_good_pairs_get_intermediate_values(S, nrounds, good_pairs_vec, &values_first_pair, &values_second_pair);

  assert(good_pairs_vec.size() == values_first_pair.size());
  assert(good_pairs_vec.size() == values_second_pair.size());

  WORD plaintext_first[2] = {0, 0};
  WORD plaintext_second[2] = {0, 0};

  WORD ciphertext_first[2] = {0, 0};
  WORD ciphertext_second[2] = {0, 0};

  /**
   * Keeps the full trail from the goUP filter that corresponds to a
   * filtered pair. This is one element of the \ref goup_diff_vec_2d
   * array.
   */
  std::vector<rc5_goup_diffs_t> goup_diff_vec;

  // Pool of chosen plaintexts
  std::vector<pair_t>::const_iterator good_pairs_iter = good_pairs_vec.begin();
  for(good_pairs_iter = good_pairs_vec.begin(); good_pairs_iter != good_pairs_vec.end(); good_pairs_iter++) {

	 goup_diff_vec.clear();		  // init the goup vector

	 // extract the chosen plaintext (CP) pair
	 pair_t cp_pair = *good_pairs_iter;
	 for(uint32_t i = 0; i < 2; i++) { // left pt = 0, right pt = 1
		plaintext_first[i] = cp_pair.plaintext_first[i];
		plaintext_second[i] = cp_pair.plaintext_second[i];
	 }

	 // encrypt pairs of texts
#if RC5_XOR  // XOR-linear
	 rc5_xor_encrypt(nrounds, S, plaintext_first, ciphertext_first);  
	 rc5_xor_encrypt(nrounds, S, plaintext_second, ciphertext_second);  
#else	 // original
	 rc5_encrypt(nrounds, S, plaintext_first, ciphertext_first);  
	 rc5_encrypt(nrounds, S, plaintext_second, ciphertext_second);  
#endif  // #if RC5_XOR

	 // fill the ciphertexts into the pair
	 for(uint32_t i = 0; i < 2; i++) { // left ct = 0, right ct = 1
		cp_pair.ciphertext_first[i] = ciphertext_first[i];
		cp_pair.ciphertext_second[i] = ciphertext_second[i];
	 }

	 //	 cnt++;
	 //	 pair_t new_pair;
	 //	 for(uint32_t i = 0; i < 2; i++) {
	 //		new_pair.plaintext_first[i] = plaintext_first[i];
	 //		new_pair.ciphertext_first[i] = ciphertext_first[i];
	 //		new_pair.plaintext_second[i] = plaintext_second[i];
	 //		new_pair.ciphertext_second[i] = ciphertext_second[i];
	 //	 }

	 //	 bool b_is_good = rc5_pair_is_good(new_pair, good_pairs_vec);

	 /**
	  * Store the equal-rot trail corresponding to the filtered pair
	  * (i.e. to the candidate good pair)
	  */
	 goup_diff_vec_2d->push_back(goup_diff_vec);

  } // for() CP texts pool
}

/* ---- */
	 WORD x = (r ^ r_prev) << r;		  // log2w bits
	 /**
	  * Masks the i LS bits.
	  */
	 uint32_t i_mask = (0xffffffff >> (32 - i));
	 uint32_t c = 0;				  // borrow
	 uint32_t cc = 0;				  // borrow (DEBUG)
    /**
     * Check if the following subtraction generates a borrow:
     * y[i-1 : 0] - k[i-1 : 0]
     */
	 if(((y & i_mask) - (key & i_mask)) < 0) {
		c = 1;						  // -1
	 }
	 if(((yy & i_mask) - (key & i_mask)) < 0) {
		cc = 1;						  // -1
	 }
	 /**
	  * y[log2(w)+i-1 : i], x[log2(w)+i-1 : i]
	  */
	 WORD y_log2w_i = (y >> i) & RC5_ROT_MASK;
	 WORD yy_log2w_i = (yy >> i) & RC5_ROT_MASK;
	 WORD x_log2w_i = (x >> i) & RC5_ROT_MASK;
	 assert(x_log2w_i == (r ^ r_prev));

	 //	 WORD key_log2w_i = (y_log2w_i - x_log2w_i - c) & RC5_ROT_MASK;
	 //	 WORD kkey_log2w_i = (yy_log2w_i - x_log2w_i - cc) & RC5_ROT_MASK;
	 WORD key_log2w_i = (y_log2w_i - x_log2w_i + c) & RC5_ROT_MASK;
	 WORD kkey_log2w_i = (yy_log2w_i - x_log2w_i + cc) & RC5_ROT_MASK;
	 assert(key_log2w_i == kkey_log2w_i);

	 WORD key_rec = (key_log2w_i << i) | (key & i_mask);
	 WORD kkey_rec = (kkey_log2w_i << i) | (key & i_mask);
	 assert(key_rec == kkey_rec);

	 assert(i <= (WORD_SIZE - RC5_LOG2W));

	 uint32_t new_i = i + RC5_LOG2W;

#if(WORD_SIZE == 32) 			  // reset i pos as log2w is not multiple of 32
	 if(new_i == 30) {
		new_i = 27;
	 }

/* --- */

	 //	 WORD y_log2w_i = (y >> i) & RC5_ROT_MASK;
	 //	 WORD yy_log2w_i = (yy >> i) & RC5_ROT_MASK;
	 //	 WORD x_log2w_i = (x) & RC5_ROT_MASK;
	 //	 assert(x_log2w_i == (r ^ r_prev));

	 //	 WORD key_log2w_i = (y_log2w_i - x_log2w_i + c_s) & RC5_ROT_MASK;
	 //	 WORD kkey_log2w_i = (yy_log2w_i - x_log2w_i + cc_s) & RC5_ROT_MASK;
	 //	 assert(key_log2w_i == kkey_log2w_i);



/* --- */

void rc5_last_round_rot_const_keyrec_i(const uint32_t i, // rot const
													const uint32_t key, // k[i - 1 : 0]
													const std::vector<eq_x_params_t> R[WORD_SIZE],
													std::vector<WORD>* key_cand_vec)
{
  //  if(i == 4)
  //  printf("[%s:%d] Enter %s() i = %2d\n", __FILE__, __LINE__, __FUNCTION__, i);
  //  if(i >= (WORD_SIZE - RC5_LOG2W)) {
  if(i >= WORD_SIZE) {
	 //	 printf("[%s:%d] Key candidate %8X\n", __FILE__, __LINE__, key);
	 key_cand_vec->push_back(key);
	 return;
  }

  const std::vector<eq_x_params_t> i_R = R[i];
  assert(i_R.size() != 0);
  if(i_R.size() == 0) {
	 printf("[%s:%d] WARNING! No pair with rot const = %d . Returning...\n", __FILE__, __LINE__, i);
	 return;
  }

  std::vector<eq_x_params_t>::const_iterator i_iter = i_R.begin();

  /**
   * Cycle through all entries that have rotation constant i 
   */
  //  for(uint32_t i_cnt = 0, i_iter = i_R.begin(); i_iter != i_R.end() && i_cnt < 3; i_iter++, i_cnt++) {
  uint32_t i_cnt = 0;
  for(i_iter = i_R.begin(); i_iter != i_R.end() && i_cnt < 3; i_iter++) {
	 i_cnt++;
	 eq_x_params_t i_params = *i_iter;
	 //	 WORD dx = i_params.dx;
	 WORD y = i_params.y;		  // right ciphertext-1
	 WORD yy = i_params.yy;		  // right cipheretxt-2 (DEBUG)
	 WORD r = i_params.rot_const; // rot const from left ciphertext  (= r7)
	 WORD r_prev = i_params.rot_const_prev; // rot const from previous round (= r6)
	 assert(i == r);
	 //	 assert(r <= (WORD_SIZE - RC5_LOG2W));

#if 1
	 /**
	  * L_{n-1}[log2(w) - 1 : 0] ^ L_{n}[log2(w) - 1 : 0]
	  */
	 //	 WORD x = (r ^ r_prev);		  // log2w bits
	 WORD x = (r ^ r_prev) & 1;		  // log2w bits

	 /**
	  * Masks the i LS bits.
	  */
	 uint32_t i_mask = (0xffffffff >> (32 - i));
	 WORD c_s = 0;					  // carry
	 if(i > 0) {
		/**
		 * if S_n[s-1 : 0] > R_n[s-1 : 0] then carry[s] = 1
		 */
		if((key & i_mask) > (y & i_mask)) {
		  c_s = 1;
		}
	 }

	 /**
	  * Compute the next log2(w) bits of S_n i.e. bits S_n[s+log2(w)-1 : s] :
	  *
	  * S_n[log2(w) + s - 1 + i : s] = 
	  * (R_n[s + log2(w) - 1  : s] - (L_{n-1}[log2(w) - 1 : 0] ^ L_{n}[log2(w) - 1 : 0]) + c_s) mod 2^{log2(w)} 
	  */
	 //	 WORD key_next_log2w_bits  = (((y >> i) & RC5_ROT_MASK) - x + c_s) & RC5_ROT_MASK;
	 WORD key_next_log2w_bits  = (((y >> i) & 1) ^ x ^ c_s) & 1;// & RC5_ROT_MASK;

	 //	 WORD key_new =  ((key_next_log2w_bits << i) | (key & i_mask));
	 //	 assert((key & i_mask) == key);
	 //	 assert(i <= (WORD_SIZE - RC5_LOG2W));

	 WORD key_new =  ((key_next_log2w_bits << i) | key);

	 //	 uint32_t new_i = i + RC5_LOG2W;
	 uint32_t new_i = i + 1;

#if(WORD_SIZE == 32) 			  // reset i pos as log2w is not multiple of 32
	 if(new_i == 30) {
		new_i = 27;
	 }
#endif  // #if(WORD_SIZE == 32) 

	 rc5_last_round_rot_const_keyrec_i(new_i, key_new, R, key_cand_vec);

	 //	 assert(i <= (WORD_SIZE - RC5_LOG2W));


	 // -------------------------------------------
#else //#if 0

	 WORD x = (r ^ r_prev) << r;		  // log2w bits
	 /**
	  * Masks the i LS bits.
	  */
	 uint32_t i_mask = (0xffffffff >> (32 - i));
	 uint32_t c = 0;				  // borrow
	 uint32_t cc = 0;				  // borrow (DEBUG)
    /**
     * Check if the following subtraction generates a borrow:
     * y[i-1 : 0] - k[i-1 : 0]
     */
	 if(((y & i_mask) - (key & i_mask)) < 0) {
		c = 1;						  // -1
	 }
	 if(((yy & i_mask) - (key & i_mask)) < 0) {
		cc = 1;						  // -1
	 }
	 /**
	  * y[log2(w)+i-1 : i], x[log2(w)+i-1 : i]
	  */
	 WORD y_log2w_i = (y >> i) & RC5_ROT_MASK;
	 WORD yy_log2w_i = (yy >> i) & RC5_ROT_MASK;
	 WORD x_log2w_i = (x >> i) & RC5_ROT_MASK;
	 assert(x_log2w_i == (r ^ r_prev));

	 //	 WORD key_log2w_i = (y_log2w_i - x_log2w_i - c) & RC5_ROT_MASK;
	 //	 WORD kkey_log2w_i = (yy_log2w_i - x_log2w_i - cc) & RC5_ROT_MASK;
	 WORD key_log2w_i = (y_log2w_i - x_log2w_i + c) & RC5_ROT_MASK;
	 WORD kkey_log2w_i = (yy_log2w_i - x_log2w_i + cc) & RC5_ROT_MASK;

	 WORD key_rec = (key_log2w_i << i) | (key & i_mask);
	 WORD kkey_rec = (kkey_log2w_i << i) | (key & i_mask);
	 assert(key_rec == kkey_rec);

	 assert(i <= (WORD_SIZE - RC5_LOG2W));

	 uint32_t new_i = i + RC5_LOG2W;

#if(WORD_SIZE == 32) 			  // reset i pos as log2w is not multiple of 32
	 if(new_i == 30) {
		new_i = 27;
	 }
#endif  // #if(WORD_SIZE == 32) 

	 rc5_last_round_rot_const_keyrec_i(new_i, key_rec, R, key_cand_vec);
#endif // #if 0
	 // -------------------------------------------
  }

}

/* --- */

void rc5_last_round_rot_const_keyrec_i(const uint32_t i, // rot const
													const uint32_t key, // k[i - 1 : 0]
													const std::vector<eq_x_params_t> R[WORD_SIZE],
													std::vector<WORD>* key_cand_vec)
{
  //  if(i == 4)
  //  printf("[%s:%d] Enter %s() i = %2d\n", __FILE__, __LINE__, __FUNCTION__, i);
  //  if(i >= (WORD_SIZE - RC5_LOG2W)) {
  if(i >= WORD_SIZE) {
	 //	 printf("[%s:%d] Key candidate %8X\n", __FILE__, __LINE__, key);
	 key_cand_vec->push_back(key);
	 return;
  }

  const std::vector<eq_x_params_t> i_R = R[i];
  assert(i_R.size() != 0);
  if(i_R.size() == 0) {
	 printf("[%s:%d] WARNING! No pair with rot const = %d . Returning...\n", __FILE__, __LINE__, i);
	 return;
  }

  std::vector<eq_x_params_t>::const_iterator i_iter = i_R.begin();

  /**
   * Cycle through all entries that have rotation constant i 
   */
  for(i_iter = i_R.begin(); i_iter != i_R.end(); i_iter++) {

	 //	 printf("[%s:%d] CHECKPOINT %s()\n", __FILE__, __LINE__, __FUNCTION__);

	 eq_x_params_t i_params = *i_iter;
	 //	 WORD dx = i_params.dx;
	 WORD y = i_params.y;		  // right ciphertext-1
	 WORD yy = i_params.yy;		  // right cipheretxt-2 (DEBUG)
	 WORD r = i_params.rot_const; // rot const from left ciphertext  (= r7)
	 WORD r_prev = i_params.rot_const_prev; // rot const from previous round (= r6)
	 assert(i == r);
	 /**
     * x[log2(w)+r7-1 : r7] = (r7 ^ r6) <<< r7;
     */
	 assert(r <= (WORD_SIZE - RC5_LOG2W));
	 WORD x = ((r ^ r_prev) << r); // <---- CHECK
	 /**
	  * Masks the i LS bits.
	  */
	 uint32_t i_mask = (0xffffffff >> (32 - i));
	 uint32_t c = 0;				  // borrow
	 uint32_t cc = 0;				  // borrow (DEBUG)
    /**
     * Check if the following subtraction generates a borrow:
     * y[i-1 : 0] - k[i-1 : 0]
     */
	 if(((y & i_mask) - (key & i_mask)) < 0) {
		c = 1;						  // -1
	 }
	 if(((yy & i_mask) - (key & i_mask)) < 0) {
		cc = 1;						  // -1
	 }
	 /**
	  * y[log2(w)+i-1 : i], x[log2(w)+i-1 : i]
	  */
	 WORD y_log2w_i = (y >> i) & RC5_ROT_MASK;
	 WORD yy_log2w_i = (yy >> i) & RC5_ROT_MASK;
	 WORD x_log2w_i = (x >> i) & RC5_ROT_MASK;
	 assert(x_log2w_i == (r ^ r_prev));

	 WORD key_log2w_i = (y_log2w_i - x_log2w_i - c) & RC5_ROT_MASK;
	 WORD kkey_log2w_i = (yy_log2w_i - x_log2w_i - cc) & RC5_ROT_MASK;

	 WORD key_rec = (key_log2w_i << i) | (key & i_mask);
	 WORD kkey_rec = (kkey_log2w_i << i) | (key & i_mask);
	 assert(key_rec == kkey_rec);

	 assert(i <= (WORD_SIZE - RC5_LOG2W));

	 uint32_t new_i = i + RC5_LOG2W;

#if(WORD_SIZE == 32) 			  // reset i pos as log2w is not multiple of 32
	 if(new_i == 30) {
		new_i = 27;
	 }
#endif  // #if(WORD_SIZE == 32) 

	 rc5_last_round_rot_const_keyrec_i(new_i, key_rec, R, key_cand_vec);
  }

}

/* --- */

#if 0 // DEBUG
  WORD dx = i_params.dx;
  uint32_t key = S[2 + (2*nrounds) - 1] & RC5_ROT_MASK;
  printf("r = %2d: (%8X, %8X, %8X, %2d, %2d) ", r, dx, y, yy, r, r_prev);
  printf(" k %8X S[%2d] %8X  ", key_rec, (2 + (2*nrounds) - 1), key);
  if(key_rec == key) {
	 printf(" <- ");
  }
  printf("\n");
#endif // #if 0 // DEBUG 



/* --- */
	 /*
	  * Masks the log2(w) LS bits (w - word size in bits).
	  */
	 uint32_t log2w_mask = (0xffffffff >> (32 - RC5_LOG2W));


/* --- */

#if 0
  printf("[%s:%d] %s()\n", __FILE__, __LINE__, __FUNCTION__);
  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t c = 0;
  std::vector<pair_t>::iterator pairs_iter = pairs_vec.begin();
  for(pairs_iter = pairs_vec.begin(); pairs_iter != pairs_vec.end(); pairs_iter++) {
	 c++;
	 pair_t pair = *pairs_iter;

	 uint32_t rot_const[2] = {(pair.ciphertext_first[left] & RC5_ROT_MASK), ((pair.ciphertext_second[left] & RC5_ROT_MASK))};
	 assert(rot_const[0] == rot_const[1]);

	 uint32_t i = rot_const[0];

	 R[i]->push_back(pair);
  }
#endif

/* --- */
/* 

Test vpv 2014.05.10

[./src/rc5-dc.cc:1591] RC5_FIXED_KEY 1 | Expanded key[26] = {0xC8BE, 0xB38D, 0x321D, 0x8C2B, 0xF5C, 0x4E07, 0x36EE, 0x67AD, 0xC6EB, 0x7A9F, 0x686C, 0xF3BA, 0x8D7A, 0x210F, 0x28F9, 0x5D6, 0xC9AA, 0x69AF, 0xD7BC, 0x3A90, 0xD5E8, 0xC76D, 0x992F, 0xF50F, 0x590F, 0x611F, };

[./src/rc5-dc.cc:rc5_equal_rot_attack():1598] Exit statistics:
#Rounds 6
WORD_SIZE 16
#CP: 524288 (2^19.000000)
RC5_FIXED_KEY 1
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 1
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
#Filtered pairs: 1283 (2^10.325305)
#GoUP sets of trails: 1283 (2^10.325305)
#Good pairs among filtered: 37
#Good pairs total: 67
[./tests/rc5-tests.cc:385] #GoUP sets of trails: 1283 (2^10.325305)
[./src/rc5-dc.cc:393] Enter rc5_last_round_eq_x_keyrec() good_pairs_vec size = 67 (2^6.066089)
[./src/rc5-dc.cc:148] Hashtable size = 2782 (2^11.441907), All vals = 1756866 (2^20.744573)
[./src/rc5-dc.cc:425] rc5_last_round_eq_x_keyrec() Hash map size 2782
[./src/rc5-dc.cc:359] Ciphertext pair is good: (    7DBF     7D3F)
--- [./src/rc5-dc.cc:670] rc5_print_pair() CP pair: ---
Plaintext diff: DX (    8000     8000)
First plaintext: X1 (    11C0     B866)
Second plaintext: X2 (    91C0     3866)
Ciphertext diff: DY (    6080       80)
First ciphertext: Y1 (    1AE0     7DBF)
Second ciphertext: Y2 (    7A60     7D3F)
[./src/rc5-dc.cc:473] Input (y, yy, dx) = (    7DBF     7D3F       80) b_has_sol 1 | #solutions 2^11.000000 of 2^16.000000 possible
[./src/rc5-dc.cc:499]/---------------------------------------------------------------/
[./src/rc5-dc.cc:501] RIGHT KEY 210F = 210F = S[13] is 1 out of 2048 (2^11.000000) solutions.
[./src/rc5-dc.cc:502] Total number of round keys: 65536 (2^16.000000).
[./src/rc5-dc.cc:503]/---------------------------------------------------------------/
[./src/rc5-dc.cc:359] Ciphertext pair is good: (    B71B     B51B)
--- [./src/rc5-dc.cc:670] rc5_print_pair() CP pair: ---
Plaintext diff: DX (    8000     8000)
First plaintext: X1 (    6E1F     2835)
Second plaintext: X2 (    EE1F     A835)
Ciphertext diff: DY (      10      200)
First ciphertext: Y1 (    55B5     B71B)
Second ciphertext: Y2 (    55A5     B51B)
[./src/rc5-dc.cc:473] Input (y, yy, dx) = (    B71B     B51B      200) b_has_sol 1 | #solutions 2^11.000000 of 2^16.000000 possible
[./src/rc5-dc.cc:499]/---------------------------------------------------------------/
[./src/rc5-dc.cc:501] RIGHT KEY 210F = 210F = S[13] is 1 out of 2048 (2^11.000000) solutions.
[./src/rc5-dc.cc:502] Total number of round keys: 65536 (2^16.000000).
[./src/rc5-dc.cc:503]/---------------------------------------------------------------/
[./src/rc5-dc.cc:514] #Good ciphertexts = 2
[./tests/rc5-tests.cc:395] Test OK!
vpv@igor:~/skcrypto/trunk/work/src/yaarx$


 */
/* --- */

#if 1									  // DEBUG TEST
		{
		  eq_x_params_t i_params_temp = {dx, y, yy, s, s_prev ^ 1, true};
		  bool b_new_entry = (params_hash_map->find(i_params_temp) == params_hash_map->end());
		  if(b_new_entry) {
			 uint32_t i_params_temp_hash_val = params_hash_function(i_params_temp);
			 std::pair<eq_x_params_t, uint32_t> new_pair (i_params_temp, i_params_temp_hash_val);
			 params_hash_map->insert(new_pair);
			 cnt_hash_vals++;
#if 1									  // DEBUG
			 printf("[%s:%d] #%10d New hash: H[%8X] = (%8X, %8X, %8X, %2d, %2d, %d)\n", __FILE__, __LINE__, 
					  cnt_hash_vals, i_params_temp_hash_val, 
					  i_params_temp.dx, i_params_temp.y, i_params_temp.yy, 
					  i_params_temp.rot_const, i_params_temp.rot_const_prev, i_params_temp.b_aux_data);
#endif  // #if 0
		  }
		}
#endif

/* --- */
/* 
For test:

[./src/rc5-dc.cc:1590] RC5_FIXED_KEY 1 | Expanded key[26] = {0xC8BE, 0xB38D, 0x321D, 0x8C2B, 0xF5C, 0x4E07, 0x36EE, 0x67AD, 0xC6EB, 0x7A9F, 0x686C, 0xF3BA, 0x8D7A, 0x210F, 0x28F9, 0x5D6, 0xC9AA, 0x69AF, 0xD7BC, 0x3A90, 0xD5E8, 0xC76D, 0x992F, 0xF50F, 0x590F, 0x611F, };

[./src/rc5-dc.cc:rc5_equal_rot_attack():1597] Exit statistics:
#Rounds 6
WORD_SIZE 16
#CP: 262144 (2^18.000000)
RC5_FIXED_KEY 1
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 1
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
#Filtered pairs: 677 (2^9.403012)
#GoUP sets of trails: 677 (2^9.403012)
#Good pairs among filtered: 26
#Good pairs total: 39
[./tests/rc5-tests.cc:385] #GoUP sets of trails: 677 (2^9.403012)
[./src/rc5-dc.cc:393] Enter rc5_last_round_eq_x_keyrec() good_pairs_vec size = 39 (2^5.285402)
[./src/rc5-dc.cc:148] Hashtable size = 1485 (2^10.536247), All vals = 2062482 (2^20.975950)
[./src/rc5-dc.cc:425] rc5_last_round_eq_x_keyrec() Hash map size 1485
[./src/rc5-dc.cc:359] Ciphertext pair is good: (    BD10     6510)
--- [./src/rc5-dc.cc:669] rc5_print_pair() CP pair: ---
Plaintext diff: DX (    8000     8000)
First plaintext: X1 (    36B8     607E)
Second plaintext: X2 (    B6B8     E07E)
Ciphertext diff: DY (     800     D800)
First ciphertext: Y1 (    8E01     BD10)
Second ciphertext: Y2 (    8601     6510)
[./src/rc5-dc.cc:473] Input (y, yy, dx) = (    BD10     6510     D800) b_has_sol 1 | #solutions 2^9.000000 of 2^16.000000 possible
[./src/rc5-dc.cc:499]/---------------------------------------------------------------/
[./src/rc5-dc.cc:501] RIGHT KEY 210F suggested among 512 (2^9.000000) candidates out of 65536 (2^16.000000).
[./src/rc5-dc.cc:502]/---------------------------------------------------------------/
[./src/rc5-dc.cc:513] #Good ciphertexts = 1
[./tests/rc5-tests.cc:395] Test OK!

--- Send test ---

#Rounds 6
WORD_SIZE 16
#CP: 16384 (2^14.000000)
RC5_FIXED_KEY 1
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 1
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
#Filtered pairs: 38 (2^5.247928)
#GoUP sets of trails: 38 (2^5.247928)
#Good pairs among filtered: 1
#Good pairs total: 3
[./tests/rc5-tests.cc:385] #GoUP sets of trails: 38 (2^5.247928)
[./src/rc5-dc.cc:382] Enter rc5_last_round_eq_x_keyrec() good_pairs_vec size = 3 (2^1.584963)
[./src/rc5-dc.cc:147] Hashtable size = 84 (2^6.392317), All vals = 7895 (2^12.946724)
[./src/rc5-dc.cc:414] rc5_last_round_eq_x_keyrec() Hash map size 84
[./src/rc5-dc.cc:502] #Good ciphertexts = 0
[./tests/rc5-tests.cc:395] Test OK!


 */

/* --- */

/* vpv 2014.05.10
vpv@igor:~/skcrypto/trunk/work/src/yaarx$ ./bin/rc5-tests
[./tests/rc5-tests.cc:743] Tests, WORD_SIZE  = 16, MASK =     FFFF, RC5_XOR = 0
[./tests/rc5-tests.cc:349] RC5_FIXED_KEY 0 | Master key[16] = {0x26, 0xF9, 0x36, 0xDE, 0xB3, 0x7D, 0x67, 0xAC, 0x41, 0xCB, 0x11, 0xCE, 0xB1, 0xF2, 0xEE, 0xF0, };
[./tests/rc5-tests.cc:359] RC5_FIXED_KEY 0 | Expanded key[26] = {0x     DCF, 0x    C99F, 0x    DAB9, 0x    9EBF, 0x    5A9C, 0x     13A, 0x    E9F7, 0x    6F3B, 0x    AB1D, 0x    D9FA, 0x    7378, 0x    268D, 0x    CCB7, 0x    F13A, 0x    5A9C, 0x    6136, 0x    1FAA, 0x    7EF1, 0x    FA4B, 0x    E674, 0x    D89B, 0x    9FBD, 0x    F0EA, 0x    818F, 0x    188D, 0x    8AFE, };
[./src/rc5-dc.cc:1015]  6R p(    8000     8000 -> *) = 0.000183 2^-12.415037 | 3 2^14.000000
[./tests/rc5-tests.cc:373] Found 3 good pairs out of 2^14.000000 by experiment
[./src/rc5-dc.cc:rc5_equal_rot_attack():1427]
[./src/rc5-dc.cc:1457] RC5_FIB_LEN 8 fib_array.size() = 8
[./src/rc5-dc.cc:1459] fib_array =
[0] 2
[1] 3
[2] 4
[3] 4
[4] 5
[5] 9
[6] 9
[7] 10
[./src/rc5-dc.cc:1579] RC5_FIXED_KEY 0 | Expanded key[26] = {0xDCF, 0xC99F, 0xDAB9, 0x9EBF, 0x5A9C, 0x13A, 0xE9F7, 0x6F3B, 0xAB1D, 0xD9FA, 0x7378, 0x268D, 0xCCB7, 0xF13A, 0x5A9C, 0x6136, 0x1FAA, 0x7EF1, 0xFA4B, 0xE674, 0xD89B, 0x9FBD, 0xF0EA, 0x818F, 0x188D, 0x8AFE, };

[./src/rc5-dc.cc:rc5_equal_rot_attack():1586] Exit statistics:
#Rounds 6
WORD_SIZE 16
#CP: 16384 (2^14.000000)
RC5_FIXED_KEY 0
RC5_FILTER_LAST_ROUND 1
RC5_FILTER_ONETOLAST_ROUND 1
RC5_FILTER_GOUP 1
RC5_FILTER_GOUP_DEBUG 0
#Filtered pairs: 52 (2^5.700440)
#GoUP sets of trails: 52 (2^5.700440)
#Good pairs among filtered: 2
#Good pairs total: 3
[./tests/rc5-tests.cc:385] #GoUP sets of trails: 52 (2^5.700440)
[./src/rc5-dc.cc:382] Enter rc5_last_round_eq_x_keyrec() good_pairs_vec size = 3 (2^1.584963)
[./src/rc5-dc.cc:147] Hashtable size = 109 (2^6.768184), All vals = 1106799 (2^20.077962)
[./src/rc5-dc.cc:414] rc5_last_round_eq_x_keyrec() Hash map size 109
[./src/rc5-dc.cc:348] Ciphertext pair is good: (    B5AA     35AA)
--- [./src/rc5-dc.cc:658] rc5_print_pair() CP pair: ---
Plaintext diff: DX (    8000     8000)
First plaintext: X1 (    F9B3     F4D2)
Second plaintext: X2 (    79B3     74D2)
Ciphertext diff: DY (    8000     8000)
First ciphertext: Y1 (    FCD0     B5AA)
Second ciphertext: Y2 (    7CD0     35AA)
[./src/rc5-dc.cc:462] Input (y, yy, dx) = (    B5AA     35AA     8000) b_has_sol 1 | #solutions 2^12.000000 of 2^16.000000 possible
[./src/rc5-dc.cc:488]/---------------------------------------------------------------/
[./src/rc5-dc.cc:490] RIGHT KEY F13A suggested among 4096 (2^12.000000) candidates out of 65536 (2^16.000000).
[./src/rc5-dc.cc:491]/---------------------------------------------------------------/
[./src/rc5-dc.cc:502] #Good ciphertexts = 1
[./tests/rc5-tests.cc:395] Test OK!
vpv@igor:~/skcrypto/trunk/work/src/yaarx$
vpv@igor:~/skcrypto/trunk/work/src/yaarx$


 */
/* --- */

#if 0
	 if((eq_params.b_aux_data == true)) {					
		WORD rot_const = eq_params.rot_const; // r7 = (ciphertext_left & rot_mask)
		WORD rot_const_prev = eq_params.rot_const_prev; // r6
		WORD fixed_bits_len = 4;//RC5_LOG2_WORD_SIZE; // log2(word_size) = 4 or 5
		if(WORD_SIZE == 32) {
		  fixed_bits_len = 5;
		}

		// w=16 bits, len = 4: x[r7, r7+1, r7+2, r7+3]

		if((rot_const + fixed_bits_len) < WORD_SIZE) {
		  if((i >= rot_const) && (i < (rot_const + fixed_bits_len))) {
			 // (r6 ^ r7)= (X[6] & rot_mask) ^ X[7] & rot_mask)
			 WORD fixed_bits_seq = (rot_const ^ rot_const_prev); 
			 // extract j-th fixed bit: j = 0, 1, ..., (len-1)
			 WORD fixed_bit_j = (fixed_bits_seq >> (i - rot_const)) & 1; 
			 // if the i-th bit of x does not match the fixed bit continue
			 if(x_i != fixed_bit_j) {
				continue;
			 }
		  }
		}

		if((rot_const + fixed_bits_len) >= WORD_SIZE) {

		  if(i <= ((rot_const + fixed_bits_len) % WORD_SIZE)) { // remainder
			 // (r6 ^ r7)= (X[6] & rot_mask) ^ X[7] & rot_mask)
			 WORD fixed_bits_seq = (rot_const ^ rot_const_prev); 
			 // extract j-th fixed bit: j = 0, 1, ..., (len-1)
			 WORD fixed_bit_j = (fixed_bits_seq >> i) & 1; 
			 // if the i-th bit of x does not match the fixed bit continue
			 if(x_i != fixed_bit_j) {
				continue;
			 }
		  }

		  //		  if((i >= rot_const) && (i < ((rot_const + fixed_bits_len) / WORD_SIZE))) {
		  if((i >= rot_const) && (i < WORD_SIZE)) {
			 // (r6 ^ r7)= (X[6] & rot_mask) ^ X[7] & rot_mask)
			 WORD fixed_bits_seq = (rot_const ^ rot_const_prev); 
			 // extract j-th fixed bit: j = 0, 1, ..., (len-1)
			 WORD fixed_bit_j = (fixed_bits_seq >> (i - rot_const)) & 1; 
			 // if the i-th bit of x does not match the fixed bit continue
			 if(x_i != fixed_bit_j) {
				continue;
			 }
		  }

		}
	 }
#endif  // #if 0

/* --- */
bool rc5_last_round_eq_x_bit_seq_match_bit_i(const uint32_t x, const uint32_t rot_const, 
															  const uint32_t bit_seq, const uint32_t bit_seq_len)
{

  assert(bit_seq_len <= WORD_SIZE);

  const uint32_t mask_L = (0xffffffff >> (32 - bit_seq_len)); // masks bit_seq_len LS bits
  assert((bit_seq & mask_L) == bit_seq);							  // make sure the bit seq is not longer than L bits

  const uint32_t w = WORD_SIZE;
  const uint32_t r = rot_const; // r7
  const uint32_t L = bit_seq_len; // l 
  //  const uint32_t bit_seq = (bit_seq & mask_L);  // r6 ^ r7

  assert(r <= w);

#if 0 // DEBUG
  printf("[%s:%d] w %d r %d bit_seq %X bit_seq_len %d mask_bit_seq %X\n", 
			__FILE__, __LINE__, w, r, bit_seq, L, mask_L);
#endif // #if 1 // DEBUG

  for(uint32_t i = 0; i < w; i++) {

#if 1 // DEBUG
	 bool b_case_1 = false;
	 bool b_case_2 = false;
	 bool b_case_2a = false;
	 bool b_case_2b = false;
#endif // #if 1 // DEBUG

	 if((L + r) <= w) {			  // case 1
		b_case_1 = true;
#if 0 // DEBUG
		printf("[%s:%d] %s() Case 1 (L+r) = %d + %d = %d <= w = %d: bit_seq %X\n", 
				 __FILE__, __LINE__, __FUNCTION__, L, r, L+r, w, bit_seq);
#endif // #if 1 // DEBUG
		if((i >= r) && (i < (L + r))) {
		  uint32_t j = i - r;
		  uint32_t bit_seq_j = (bit_seq >> j) & 1;
		  uint32_t x_i = (x >> i) & 1;
		  if(bit_seq_j != x_i) {
			 return false;
		  }
		}
	 }

	 if((L + r) > w) {			  // case 2
		b_case_2 = true;
#if 0 // DEBUG
		printf("[%s:%d] %s() Case 2 (L+r) > %d + %d = %d <= w = %d: bit_seq %X\n", 
				 __FILE__, __LINE__, __FUNCTION__, L, r, L+r, w, bit_seq);
#endif // #if 1 // DEBUG
		if((i >= r) && (i < w)) { // Case 2a: MSB seq x[w-1 : r]
		  b_case_2a = true;
		  uint32_t j = i - r;
		  uint32_t bit_seq_j = (bit_seq >> j) & 1;
		  uint32_t x_i = (x >> i) & 1;
#if 0 // DEBUG
		  printf("[%s:%d] Case 2a: MSB \n", __FILE__, __LINE__);
#endif // #if 1 // DEBUG
		  if(bit_seq_j != x_i) {
			 return false;
		  }
		}

		if((i >= 0) && (i < (L + r - w))) { // LSB seq x[L+r-w-1 : 0]
		  b_case_2b = true;
		  uint32_t j = (w - r) + i;
		  uint32_t bit_seq_j = (bit_seq >> j) & 1;
		  uint32_t x_i = (x >> i) & 1;
#if 0 // DEBUG
		  printf("[%s:%d] Case 2b: LSB \n", __FILE__, __LINE__);
#endif // #if 1 // DEBUG
		  if(bit_seq_j != x_i) {
			 return false;
		  }
		}
		assert(!(b_case_2a && b_case_2b)); // the two can not happen at once
	 }
	 assert(!(b_case_1 && b_case_2)); // the two can not happen at once
  }
  return true;
}

/* --- */

		//		if((i >= rot_const) && (i < ((rot_const + fixed_bits_len) % WORD_SIZE))) {
		if(b_fixed_seq) {

		  printf("[%s:%d] i %d rot_const %d fixed_bits_len %d (rot_const + fixed_bits_len) mod %d = %d\n", 
					__FILE__, __LINE__, i, rot_const, fixed_bits_len, WORD_SIZE, (rot_const + fixed_bits_len) % WORD_SIZE);

		  //		  assert(((rot_const + fixed_bits_len) < WORD_SIZE));
		}

/* --- */

#if 0									  // no this si not necessary since here we set S[6]
      /**
       * If at the last round, check if the guessed rotation constant
       * \p s matches the rotation const. defined from the left
       * ciphertext X[7] .
       */
		if(depth == (RC5_FIB_LEN - 1)) { // last round
		  uint32_t rconst_last_round = g_S[(RC5_FIB_LEN + 1) - 2];
		  if(s != rconst_last_round) {
			 continue;
		  } else {
#if 0									  // DEBUG
			 printf("[%s:%d] Match! s %d = %d\n", __FILE__, __LINE__, s, rconst_last_round);
#endif								  // #if 1
		  }
		}
#endif

/* ---- */

#if 0									  // no this si not necessary since here we set S[6]
      /**
       * If we are at the last round, then check if the guessed
       * rotation constant \p s matches the rotation const. defined
       * from the left ciphertext X[7] .
       */
		assert((RC5_FIB_LEN - 1) == 7);
		if(depth == (RC5_FIB_LEN - 1)) { // last (i.e. bottom) round
		  uint32_t rconst_last_round = ds_array->S[ds_array->len - 2];
		  assert((ds_array->len - 2) == 7);
		  if(s != rconst_last_round) {
			 continue;
		  } else {
#if 0									  // DEBUG
			 printf("[%s:%d] Match! s %d = %d\n", __FILE__, __LINE__, s, rconst_last_round);
#endif								  // #if 1
		  }
		}
#endif



/* ---- */
/*
 * Extended version of the goUP filter for good pairs for RC5
 * proposed by [Biryukov, Kushilevitz]
 * 
 * \see rc5_filter_go_up
 *
 * Example: For \ref RC5_GOUP_LEVEL = 7, the filter starts at depth = 7 (previously was 6).
 */
uint32_t rc5_filter_go_up_ext_i(const uint32_t depth, uint32_t* count, 
										  const std::vector<uint32_t> fib_array, const rc5_goup_diffs_t* ds_array,
										  std::vector<rc5_goup_diffs_t>* goup_diff_vec)
{
  uint32_t flag = 0;
  uint32_t s;
#if 1
  if(hw32(ds_array->D[depth]) > fib_array[depth]) {
	 return 0;						  // cut the search tree
  }
#endif
  if(depth != 0) {
	 for(s = 0; s < 32; s++) {	  // try all rotations 0..31
		//		if((RC5_ROTR(ds_array->D[depth + 1], s) & 0x1F) == 0) { // D[7] <<< s
		if((RC5_ROTR(ds_array->D[depth + 1], s) & 0x1F) == 0) { // D[8] <<< (X[7] & 0x1F)

		  rc5_goup_diffs_t ds_array_new = *ds_array;
#if 1									  // DEBUG
		  assert(ds_array_new.len == ds_array->len);
#endif  // #if 1
		  //		  ds_array_new.D[depth - 1] = RC5_ROTR(ds_array_new.D[depth + 1], s) ^ ds_array_new.D[depth]; // ds_array.D[5] = (ds_array.D[7] >>> s) ^ ds_array.D[6]		  
		  ds_array_new.D[depth - 1] = RC5_ROTR(ds_array_new.D[depth + 1], s) ^ ds_array_new.D[depth]; // ds_array.D[6] = (ds_array.D[8] >>> (X[7] & 0x1F)) ^ ds_array.D[7]		  
		  //		  ds_array_new.S[depth - 1] = s; // S[5]
		  ds_array_new.S[depth - 1] = s; // S[6]
		  if(rc5_filter_go_up_ext_i(depth - 1, count, fib_array, &ds_array_new, goup_diff_vec)) {  // recursive call for correct count of variants
			 flag = 1;
		  }
		}
	 }

  } else {							  // reached the top
	 (*count)++;					  // accumulate num. of variants
	 goup_diff_vec->push_back(*ds_array);
	 return 1;
  }
  return flag;
}

/* --- */
	 // fix 2014.05.08
	 //	 g_D[depth + 1] = ciphertext_first[0] ^ ciphertext_second[0];
	 //	 g_D[depth + 2] = ciphertext_first[1] ^ ciphertext_second[1];
	 //	 g_S[depth + 1] = ciphertext_first[0] & 0x1F;
	 //	 g_S[depth + 2] = 0;	  // don't care for last round const

/* --- */

  const uint32_t goup_filter_depth,
#if RC5_FILTER_GOUP_DEBUG
	 assert(goup_filter_depth == LEVEL);
#endif  // #if RC5_FILTER_GOUP_DEBUG


/* --- */
/*
 * \param goup_filter_depth number of rounds from the bottom up over
 *                           which the goUP filter will be applied.
 */
/* --- */

  //  assert(goup_filter_depth == (LEVEL - 1));
  //  uint32_t fib_array_len = goup_filter_depth + 2;// = LEVEL + 1;

/* --- */
//uint32_t FIB_NL[FIB_NL_LEN] = {1, 1, 0, 1, 1, 0, 3, 3, 5, 5, 5, 8, 8, 8, 12, 13, 13, 15};
//uint32_t FIB_NL[FIB_NL_LEN] = {5, 5, 5, 8, 8, 8, 12, 13, 13, 15};
//uint32_t FIB_NL[FIB_NL_LEN] = {5, 5, 5, 8, 8, 8, 20, 20, 20, 20};

/* --- */

  WORD pt[2] = {0, 0};
  WORD ct[2] = {0, 0};
  std::vector<uint32_t> X;
  rc5_encrypt_get_intermediate_values(S, nrounds, pt, ct, &X);


/* --- */

#if 0
void rc5_encrypt_pair(const WORD S[RC5_STAB_LEN_T],
							 const uint32_t nrounds,
							 const WORD plaintext_first[2], std::vector<uint32_t>* X_first_vec,
							 const WORD plaintext_second[2], std::vector<uint32_t>* X_second_vec)
{
  bool b_is_sat = true;
  uint32_t rot_mask = (WORD_SIZE - 1);
  uint32_t left = RC5_FEISTEL_LEFT;
  uint32_t right = RC5_FEISTEL_RIGHT;

  //  first encryption (A1, B1)
  // second encryption (A2, B2)

  // store first plaintext
  X_first_vec->push_back(plaintext_first[left]); // X[0]
  X_first_vec->push_back(plaintext_first[right]); // X[1]
  // first enc Half-round 0
  WORD A1 = plaintext_first[left] + S[0]; // first encryption (left)
  WORD B1 = plaintext_first[right] + S[1]; // first encryption (right)
  X_first_vec->push_back(B1);	  // X[2]

  // store second plaintext
  X_second_vec->push_back(plaintext_second[left]); // XX[0]
  X_second_vec->push_back(plaintext_second[right]); // XX[1]
  // second enc Half-round 0
  WORD A2 = plaintext_second[left] + S[0]; // second encryption (left)
  WORD B2 = plaintext_second[right] + S[1]; // second encryption (right)
  X_second_vec->push_back(B2);	  // XX[2]

  for(WORD i = 1; i <= nrounds; i++) { 

	 b_is_sat = ((B1 & rot_mask) == (B2 & rot_mask));
	 assert(b_is_sat);				  // the pair is good

	 // Half-rounds i and (i+1)
	 A1 = RC5_ROTL(A1^B1, B1) + S[2*i]; // first plaintext (left)
	 B1 = RC5_ROTL(B1^A1, A1) + S[2*i+1]; // first plaintext (right)

	 A2 = RC5_ROTL(A2^B2, B2) + S[2*i]; // second plaintext (left)
	 B2 = RC5_ROTL(B2^A2, A2) + S[2*i+1]; // second plaintext (right)

	 b_is_sat = ((A1 & rot_mask) == (A2 & rot_mask));
	 assert(b_is_sat);				  // the pair is good

	 // Half-round (i+1)
  }
}
#endif

/* --- */

// {------- START NEW rc5_encrypt_pair() -----------------

/*
 * For a given input chosen plaintext pair and fixed key store all
 * intermediate pairs and rotation constants during partial encryption
 * of \p nrounds with RC5.
 */
void rc5_encrypt_pair(const WORD S[RC5_STAB_LEN_T],
							 const uint32_t nrounds,
							 const WORD dx[2], 
							 std::vector<pair_t>* cptext_pairs_vec,
							 std::vector<pair_t>* good_pairs_vec)
{
  //  printf("r %2d %2d ", r1, r2);

  uint64_t cnt = 0;
  uint32_t rot_mask = (WORD_SIZE - 1);

  WORD plaintext_first[2] = {0, 0};
  WORD plaintext_second[2] = {0, 0};

  // nrounds
  for(uint32_t j = 0; j < RC5_NTEXTS; j++) {


#define PRINT_DDIFF 0
#if PRINT_DDIFF			  // DEBUG: print differences
    WORD DD[2 + 24] = {0}; // Array of differences
	 DD[0] = dx[0];
	 DD[1] = dx[1];
#endif  // #if 0

	 plaintext_first[0] = random32() & MASK;
	 plaintext_first[1] = random32() & MASK;

	 plaintext_second[0] = plaintext_first[0] ^ dx[0];
	 plaintext_second[1] = plaintext_first[1] ^ dx[1];

	 bool b_is_rot_equal = true;

	 pair_t cptext_pair;
	 for(uint32_t i = 0; i < 2; i++) {
		cptext_pair.plaintext_first[i] = plaintext_first[i];
		cptext_pair.plaintext_second[i] = plaintext_second[i];
		cptext_pair.ciphertext_first[i] = 0;
		cptext_pair.ciphertext_second[i] = 0;
	 }
	 cptext_pairs_vec->push_back(cptext_pair);

#if !RC5_XOR						  // original cipher
	 WORD A1 = plaintext_first[0] + S[0];
	 WORD B1 = plaintext_first[1] + S[1];
	 WORD A2 = plaintext_second[0] + S[0];
	 WORD B2 = plaintext_second[1] + S[1];
#else	 // XOR-linear
	 WORD A1 = plaintext_first[0] ^ S[0];
	 WORD B1 = plaintext_first[1] ^ S[1];
	 WORD A2 = plaintext_second[0] ^ S[0];
	 WORD B2 = plaintext_second[1] ^ S[1];
#endif

	 for(WORD i = 1; i <= nrounds; i++) { 

		bool b_is_sat = true;

		b_is_sat = ((B1 & rot_mask) == (B2 & rot_mask));

		//		  if((B1 & rot_mask) != (B2 & rot_mask)) {
		if(!b_is_sat) {
		  assert(b_is_rot_equal == true);
		  b_is_rot_equal = false;
		  break;
		}
#if !RC5_XOR						  // original cipher
		A1 = RC5_ROTL(A1^B1, B1) + S[2*i]; 
		A2 = RC5_ROTL(A2^B2, B2) + S[2*i]; 
#else	 // XOR-linear
		A1 = RC5_ROTL(A1^B1, B1) ^ S[2*i]; 
		A2 = RC5_ROTL(A2^B2, B2) ^ S[2*i]; 
#endif

#if PRINT_DDIFF				  // store the difference after the half-round
		DD[2 + (2*(i-1))] = A1 ^ A2;
#endif  // #if 1

		b_is_sat = ((A1 & rot_mask) == (A2 & rot_mask));

		//		  if((A1 & rot_mask) != (A2 & rot_mask)) {
		if(!b_is_sat) {
		  assert(b_is_rot_equal == true);
		  b_is_rot_equal = false;
		  break;
		}
#if !RC5_XOR						  // original cipher
		B1 = RC5_ROTL(B1^A1, A1) + S[2*i+1]; 
		B2 = RC5_ROTL(B2^A2, A2) + S[2*i+1]; 
#else	 // XOR-linear
		B1 = RC5_ROTL(B1^A1, A1) ^ S[2*i+1]; 
		B2 = RC5_ROTL(B2^A2, A2) ^ S[2*i+1]; 
#endif

#if PRINT_DDIFF				  // store the difference after the half-round
		DD[2 + (2*(i-1)) + 1] = B1 ^ B2;
#endif  // #if 1

#if 0
		printf("D[%2d] D[%2d]\n", 2 + (2*(i-1)), 2 + (2*(i-1)) + 1);
#endif
	 }

	 if(!b_is_rot_equal) {
		continue;
	 }
	 cnt++;

	 WORD ciphertext_first[2] = {A1, B1};
	 WORD ciphertext_second[2] = {A2, B2};

#if 0									  // DEBUG
	 printf("[%s:%d] #%2lld ", __FILE__, __LINE__, cnt);
	 printf("PT (%8X, %8X), (%8X, %8X) | ", plaintext_first[0], plaintext_first[1], plaintext_second[0], plaintext_second[1]);
	 printf("CT (%8X, %8X), (%8X, %8X) \n", ciphertext_first[0], ciphertext_first[1], ciphertext_second[0], ciphertext_second[1]);
#endif								  // #if 1
	 pair_t new_pair;
	 for(uint32_t i = 0; i < 2; i++) {
		new_pair.plaintext_first[i] = plaintext_first[i];
		new_pair.ciphertext_first[i] = ciphertext_first[i];
		new_pair.plaintext_second[i] = plaintext_second[i];
		new_pair.ciphertext_second[i] = ciphertext_second[i];
	 }
	 good_pairs_vec->push_back(new_pair);

#if PRINT_DDIFF
	 printf("\n--------------------------\n");
	 for(WORD i = 0; i < (2 + (2*nrounds)); i++) { 
		  printf("DD[%2d]: %8X | HW %2d \n", i, DD[i], hw32(DD[i]));
	 }
#endif  // #if 0
	 //		printf("[%s:%d] %10lld %8X %8X\n", __FILE__, __LINE__, cnt, dy[0],  dy[1]);

  }	// ntexts

  uint64_t nall = RC5_NTEXTS;

  //  if(cnt > 0) 
  {
	 double p = (double)cnt / (double)nall;
	 printf("[%s:%d] %2dR p(%8X %8X -> *) = %f 2^%f | %lld 2^%f\n", __FILE__, __LINE__, nrounds, dx[0],  dx[1], p, log2(p), cnt, log2(nall));
  }
}

// -------- END NEW rc5_encrypt_pair() ------------------}

/* --- */

struct rc5_eq_x_params_hash
  : std::unary_function<eq_x_params_t, std::size_t>
{
  std::size_t operator()(eq_x_params_t const& params) const
  {
	 std::size_t seed = 0;
	 boost::hash_combine(seed, params);
	 return seed;
  }
};


/* --- */

#if 0
typedef struct {					  // 0 = left part, 1 = right part (after the Feistel swap!)
  WORD pt1[2];						  //  plaintext-1[0, 1] = {left, right}
  WORD pt2[2];						  //  plaintext-2[0, 1] = {left, right}
  WORD ct1[2];						  // ciphertext-1[0, 1] = {left, right}
  WORD ct2[2];						  // ciphertext-2[0, 1] = {left, right}
} pair_t;
#endif


/* --- */
#if 0
  uint32_t N = (1U << 20);

  for(uint32_t i = 0; i < N; i++) {

	 uint32_t y = random32() & MASK;
	 uint32_t yy = random32() & MASK;
	 uint32_t dx = random32() & MASK;

	 std::vector<uint32_t> sol_vec;
	 bool b_has_sol = 
		rc5_last_round_eq_x_find_solutions_rec((const gsl_matrix*(*)[2][2][2])AA, y, yy, dx, &sol_vec);

#if 1									  // DEBUG
	 if(b_has_sol) {
		printf("Input (y, yy, dx) = (%8X %8X %8X) b_has_sol %d #sol %d\n", y, yy, dx, b_has_sol, sol_vec.size());
	 }
#endif  // #if 1
  }
#endif


/* --- */
#if 1	 // DEBUG
	 bool b_first_is_stored = false;
	 uint32_t y_first = 0;
	 uint32_t yy_first = 0;
	 uint32_t dx_first = 0;
	 uint32_t s_first = 0;

		if(!b_first_is_stored) {
		  b_first_is_stored = true;
		  y_first = y;
		  yy_first = yy;
		  dx_first = dx;
		  s_first = s;
		}
		assert(y == y_first);
		assert(yy == yy_first);
		assert(s == s_first);
		assert(dx == dx_first);
#endif



/* --- */
void rc5_equal_rot_attack(const WORD S[RC5_STAB_LEN_T],
								  const uint32_t nrounds,
								  const WORD dx[2], 
								  const uint32_t goup_filter_depth,
								  std::vector<pair_t> cptext_pairs_vec, 
								  std::vector<pair_t> good_pairs_vec)
{
#if 1									  // DEBUG
  printf("[%s:%s():%d]\n", __FILE__, __FUNCTION__, __LINE__);
#endif

  assert(goup_filter_depth == (LEVEL - 1));

  uint32_t depth = goup_filter_depth;//LEVEL - 1;

  uint64_t cnt = 0;
  uint64_t ret_cnt = 0;

  std::vector<pair_t> filtered_pairs_vec;
  std::vector<pair_t> filtered_good_pairs_vec;

  WORD pt1[2] = {0, 0};
  WORD pt2[2] = {0, 0};

  WORD ct1[2] = {0, 0};
  WORD ct2[2] = {0, 0};

  uint32_t ncp = 0;

  // Pool of chosen plaintexts
  std::vector<pair_t>::iterator cptext_pairs_iter = cptext_pairs_vec.begin();
  for(cptext_pairs_iter = cptext_pairs_vec.begin(); cptext_pairs_iter != cptext_pairs_vec.end(); cptext_pairs_iter++) {

	 g_goup_diff_vec.clear();	  // clear the global vector

	 ncp++;
	 pair_t cp_pair = *cptext_pairs_iter;

	 /*
	 g_count = 0;
	 for(uint32_t t = 0; t < LEVEL + 1; t++) {
		g_D[t] = 0;
      g_S[t] = 0;
	 }
	 */

	 pt1[0] = cp_pair.pt1[0];	  // left pt 1
	 pt1[1] = cp_pair.pt1[1];	  // right pt 1

	 pt2[0] = cp_pair.pt2[0];	  // left pt 2
	 pt2[1] = cp_pair.pt2[1];	  // right pt 2

	 // encrypt pairs of texts
#if RC5_XOR  // XOR-linear
	 rc5_xor_encrypt(nrounds, S, pt1, ct1);  
	 rc5_xor_encrypt(nrounds, S, pt2, ct2);  
#else	 // original
	 rc5_encrypt(nrounds, S, pt1, ct1);  
	 rc5_encrypt(nrounds, S, pt2, ct2);  
#endif  // #if RC5_XOR

	 WORD L[2] = {ct1[0], ct2[0]}; 
	 WORD R[2] = {ct1[1], ct2[1]};

	 bool b_is_good_pair_f1 = rc5_filter_last_round(L, R);

	 /*
	 g_D[depth + 0] = ct1[0] ^ ct2[0];
	 g_D[depth + 1] = ct1[1] ^ ct2[1];
	 g_S[depth + 0] = ct1[0] & 0x1F;
	 g_S[depth + 1] = 0;	  // don't care for last round const
	 */

#if 0									  // DEBUG
	 printf("[%s:%d] PT %8X %8X | %8X %8X\n", __FILE__, __LINE__, pt1[0], pt2[0], pt1[1], pt2[1]);
	 printf("[%s:%d] CT %8X %8X | %8X %8X\n", __FILE__, __LINE__, ct1[0], ct2[0], ct1[1], ct2[1]);
	 //	 printf("[%s:%d] g_D[%d] %8X\n", __FILE__, __LINE__, depth, g_D[depth]);
	 //	 printf("[%s:%d] g_D[%d] %8X\n", __FILE__, __LINE__, depth + 1, g_D[depth + 1]);
#endif

	 bool b_is_good_pair_f2 = true;
	 bool b_is_good_pair_f2_ext = true;

#if RC5_FILTER_GOUP // apply goUP filter

#if 1									  // goUP extended 
	 uint32_t ret_ext = 0;
	 uint32_t ndiff = LEVEL + 1;
	 std::vector<rc5_goup_diffs_t> goup_diff_vec;
	 pair_t pc_pair;
	 for(uint32_t i = 0; i < 2; i++) {
		pc_pair.pt1[i] = pt1[i];
		pc_pair.ct1[i] = ct1[i];
		pc_pair.pt2[i] = pt2[i];
		pc_pair.ct2[i] = ct2[i];
	 }

	 std::vector<uint32_t> fib_array;
	 for(uint32_t i = 0; i < ndiff; i++) {
		fib_array.push_back(FIB_NL[i]);
	 }
#endif
#if 1									  // DEBUG
	 for(uint32_t i = 0; i < ndiff; i++) {
		assert(fib_array[i] == FIB_NL[i]);
	 }
#endif

	 uint32_t ret = 0;
	 if(b_is_good_pair_f1) {
		//#if 0									  // goUP non-linear
		//		ret = rc5_filter_go_up_nl(depth);
		//#endif
		//#if 1									  // goUP with debug info
		//		ret = rc5_filter_go_up_debug(depth);
		//#endif
		//#if 0									  // goUP
		//		ret = rc5_filter_go_up(depth);
		//#endif
#if 1									  // goUP extended
		ret = ret_ext = rc5_filter_go_up_ext(ndiff, pc_pair, fib_array, &goup_diff_vec);

		//		assert(goup_diff_vec.size() == g_goup_diff_vec.size());
		//#if 1									  // DEBUG
		//		bool b_are_equal = rc5_goup_diff_vecs_are_equal(goup_diff_vec, g_goup_diff_vec);
		//		assert(b_are_equal);
#endif
#if 0									  // DEBUG
		if(goup_diff_vec.size() > 0) {
		  printf("\n--- [%s:%d] goup_diff_vec.size() = %2d | PT %8X %8X | %8X %8X ", __FILE__, __LINE__, goup_diff_vec.size(), pt1[0], pt1[1], pt2[0], pt2[1]);
		  printf("CT %8X %8X | %8X %8X ---\n", ct1[0], ct1[1], ct2[0], ct2[1]);
		  rc5_goup_diff_vec_print(goup_diff_vec);
		}
#endif

		ret_cnt += ret;
	 }
	 //	 b_is_good_pair_f2 = (ret != 0); 
	 b_is_good_pair_f2_ext = b_is_good_pair_f2 = (ret_ext != 0); 
	 assert(b_is_good_pair_f2 == b_is_good_pair_f2_ext);

#endif  // #if RC5_FILTER_GOUP

	 if(b_is_good_pair_f1 && b_is_good_pair_f2) {
		cnt++;
		pair_t new_pair;
		for(uint32_t i = 0; i < 2; i++) {
		  new_pair.pt1[i] = pt1[i];
		  new_pair.ct1[i] = ct1[i];
		  new_pair.pt2[i] = pt2[i];
		  new_pair.ct2[i] = ct2[i];
		}

		filtered_pairs_vec.push_back(new_pair); // store filtered pair
#if 1
		//		printf("\n");
		printf("\r[%s:%d] #filtered %5d (2^%f) / %5d (2^%f) | r 2^%f", __FILE__, __LINE__, filtered_pairs_vec.size(), log2(filtered_pairs_vec.size()), ncp, log2(ncp), log2((double)filtered_pairs_vec.size() / (double)ncp));
		fflush(stdout);
#endif

		bool b_is_good = rc5_pair_is_good(new_pair, good_pairs_vec);
		if(b_is_good) {			  // store good pair
		  filtered_good_pairs_vec.push_back(new_pair);
		}

	 }

  }

#if 0									  // DEBUG
  rc5_print_pairs(filtered_pairs_vec);
#endif

  //  double p_est = (double)cnt / (double)RC5_NTEXTS;
  //  printf("\n[%s:%s():%d] R#%d cnt %lld %lld (2^%f) / %lld (2^%f) p_est %f 2^%f\n", __FILE__, __FUNCTION__, __LINE__, nrounds, ret_cnt, cnt, log2(cnt), RC5_NTEXTS, log2(RC5_NTEXTS), p_est, log2(p_est));
#if 1									  // DEBUG
  printf("[%s:%d] RC5_FIXED_KEY %d | Expanded key[%d] = {", __FILE__, __LINE__, RC5_FIXED_KEY, RC5_STAB_LEN_T);
  for(uint32_t j = 0; j < RC5_STAB_LEN_T; j++) {
	 printf("0x%8X, ", S[j]);
  }
  printf("};\n\n");
#endif  // #if 1
  printf("[%s:%s():%d] Exit statistics:\n", __FILE__, __FUNCTION__, __LINE__);
  printf("#CP: %d (2^%f)\n", cptext_pairs_vec.size(), log2(cptext_pairs_vec.size()));
  printf("RC5_FILTER_LAST_ROUND %d\n", RC5_FILTER_LAST_ROUND);
  printf("RC5_FILTER_ONETOLAST_ROUND %d\n", RC5_FILTER_ONETOLAST_ROUND);
  printf("RC5_FILTER_GOUP %d\n", RC5_FILTER_GOUP);
  printf("#Filtered pairs: %d (2^%f)\n", filtered_pairs_vec.size(), log2(filtered_pairs_vec.size()));
  printf("#Good pairs among filtered: %d\n", filtered_good_pairs_vec.size());
  printf("#Good pairs total: %d\n", good_pairs_vec.size());
  //  assert(cnt == ret_cnt);
}

/* --- */

uint32_t rc5_filter_go_up_ext_i(const uint32_t depth, uint32_t* count, 
										  const uint32_t* fib_array, const rc5_goup_diffs_t* ds_array,
										  std::vector<rc5_goup_diffs_t>* goup_diff_vec)
{
  uint32_t flag = 0;
  uint32_t s;
#if 0									  // DEBUG
  printf("[%s:%d] depth %d\n", __FILE__, __LINE__, depth);
#endif  // #if 0
  if(hw32(ds_array->D[depth]) > fib_array[depth]) {
	 return 0;						  // cut the search tree
  }

  if(depth != 0) {
	 for(s = 0; s < 32; s++) {	  // try all rotations 0..31
#if 0									  // DEBUG
		printf("[%s:%d] s %2d\n", __FILE__, __LINE__, s);
#endif  // #if 0
		if((RC5_ROTR(ds_array->D[depth + 1], s) & 0x1F) == 0) {

#if 1									  // DEBUG
		  rc5_goup_diffs_t ds_array_new = *ds_array;
		  assert(ds_array_new.len == ds_array->len);
		  for(uint32_t i = 0; i < ds_array_new.len; i++) {
			 assert(ds_array_new.D[i] == ds_array->D[i]);
			 assert(ds_array_new.S[i] == ds_array->S[i]);
		  }
		  ds_array_new.D[depth - 1] = RC5_ROTR(ds_array_new.D[depth + 1], s) ^ ds_array_new.D[depth]; // ds_array.D[5] = (ds_array.D[7] >>> s) ^ ds_array.D[6]		  
		  ds_array_new.S[depth - 1] = s;
		  if(rc5_filter_go_up_ext_i(depth - 1, count, fib_array, &ds_array_new, goup_diff_vec)) {  // recursive call for correct count of variants
			 flag = 1;
		  }
#endif  // #if 1
		}
	 }

  } else {							  // reached the top
#if 0									  // DEBUG
	 printf("-- [%s:%d] BEFORE count %10d ---\n", __FILE__, __LINE__, *count);
	 rc5_goup_diff_vec_print(*goup_diff_vec);
#endif  // #if 0

	 (*count)++;					  // accumulate num. of variants
	 //	 rc5_goup_diffs_t ds_array_new = *ds_array;
	 //	 rc5_goup_diff_print(ds_array_new);
	 //	 goup_diff_vec->push_back(ds_array_new);
	 goup_diff_vec->push_back(*ds_array);

#if 0									  // DEBUG
	 printf("-- [%s:%d] AFTER  count %10d ---\n", __FILE__, __LINE__, *count);
	 rc5_goup_diff_vec_print(*goup_diff_vec);
#endif  // #if 0
	 return 1;
  }
  return flag;
}

/* --- */
#if 1									  // DEBUG
	 printf("-- [%s:%d] START  count %10d ---\n", __FILE__, __LINE__, *count);
	 rc5_print_goup_diff_vec(*goup_diff_vec);
#endif  // #if 0

  // FIX THE RECURSION!!! <-- copy input parameter goup_diff_vec!!!

/* --- */
  //  ds_array.D = (uint32_t *)calloc((size_t)ndiff, sizeof(uint32_t)); // differences
  //  ds_array.S = (uint32_t *)calloc((size_t)ndiff, sizeof(uint32_t)); // rotation constants
  //  free(fib_array);
  //  free(ds_array.S);
  //  free(ds_array.D);

/* --- */

#if 0
struct rc5_goup_diffs_t
{
  uint32_t* D;						  // diffs
  uint32_t* S;						  // rot const
  uint32_t len;					  // length of D and S
  pair_t pc_pair;					  // chosen plaintext/ciphertext pairs
};
#endif

/* --- */

  assert(ds_array.len == ds_array_in->len);
  for(uint32_t i = 0; i < ds_array.len; i++) {
	 //	 printf("D[%2d] %8X (s %2d)\n", i, ds.D[i], ds.S[i]);
	 assert(ds_array.D[i] == ds_array_in->D[i]);
	 assert(ds_array.S[i] == ds_array_in->S[i]);
  }

/* --- */
/*
 * \param D array of differences for rounds 0, 1, ..., D_len
 * \param D_len length of \p D
 * \param S array of rotation constants
 * \param S_len length of \p S (normally S_len = D_len)
 * \param fib array of Fibonacci coefficients
 * \param fib_len length of \p fib
 */

/* --- */
		  //		  printf("in_s1 %8X ", in_s1);
		  //		  print_binary(in_s1);
		  //		  printf("\n");
		  //		  printf("out_s1 %8X ", out_s1);
		  //		  print_binary(out_s1);
		  //		  printf("\n");

/* --- */
		  uint32_t r_zero = 0;
		  uint32_t r_one = 0;

		  if(k == kk) {
			 if(k == 0) {			  // k = kk = 0
				r_zero = 1;
				r_one = 0;
			 }
			 if(k == 1) {			  // k = kk = 1
				r_zero = 0;
				r_one = 1;
			 } 
		  } else {					  // k != kk
				r_zero = 0;
				r_one = 0;
		  }

/* --- */
uint32_t rc5_filter_go_up(uint32_t depth)
{
  uint32_t flag = 0;
  uint32_t s;

#if RC5_XOR							  // XOR-linear
  if(hw32(D[depth]) > FIB[depth])
	 return 0;						  // cut the search tree
#else									  // original
  if(hw32(D[depth]) > FIB_NL[depth])
	 return 0;						  // cut the search tree
#endif

  if(depth != 0) {

	 for(s = 0; s < 32; s++) {	  // try all rotations 0..31

		if((RC5_ROTR(D[depth + 1], s) & 0x1F) == 0) {

		  D[depth - 1] = RC5_ROTR(D[depth + 1], s) ^ D[depth]; // D[5] = (D[7] >>> s) ^ D[6]

		  if(rc5_filter_go_up(depth - 1)) {  // recursive call for correct g_count of variants
			 flag = 1;
		  }
		}
	 }

  } else {
	 g_count++;					  // accumulate num. of variants
	 return 1;
  }

  return flag;
}

/* --- */

#if 0									  // DEBUG
		  if(dx != 0) {
			 max_xdp_add_lm(dx, dk, &dy_max);
			 printf("[%s:%d] depth %d | %8X %8X -> %8X\n", __FILE__, __LINE__, depth, dx, dk, dy_max);
		  }
#endif


/* --- */
#ifndef XDP_ADD_H
#include "xdp-add.hh"
#endif
#ifndef MAX_XDP_ADD_H
#include "max-xdp-add.hh"
#endif
#ifndef XDP_ADD_DIFF_SET_H
#include "xdp-add-diff-set.hh"
#endif


/* --- */
/*
 * Non-linear version of \ref rc5_filter_go_up
 */
uint32_t rc5_filter_go_up_nl(uint32_t depth)
{
  assert(RC5_XOR == 0);

  uint32_t flag = 0;
  uint32_t s;

#if 1
  if(hw32(D[depth]) > FIB_NL[depth])
	 return 0;						  // cut the search tree
#endif

  if(depth != 0) {

	 for(s = 0; s < 32; s++) {	  // try all rotations 0..31

		if((RC5_ROTR(D[depth + 1], s) & 0x1F) == 0) {

		  uint32_t dx = D[depth + 1];
		  uint32_t dk = 0;
		  uint32_t dy_max = 0;

		  if(dx != 0) {

			 //		xdp_add_input_diff_to_output_dset(da, db, &dc_set);

			 max_xdp_add_lm(dx, dk, &dy_max);
#if 0									  // DEBUG
			 printf("[%s:%d] depth %d | %8X %8X -> %8X\n", __FILE__, __LINE__, depth, dx, dk, dy_max);
#endif
		  }

		  //		  D[depth - 1] = RC5_ROTR(D[depth + 1], s) ^ D[depth]; // D[5] = (D[7] >>> s) ^ D[6]
		  D[depth - 1] = RC5_ROTR(dy_max, s) ^ D[depth]; // D[5] = (D[7] >>> s) ^ D[6]

		  if(rc5_filter_go_up_nl(depth - 1)) {  // recursive call for correct g_count of variants
			 flag = 1;
		  }
		}

	 }	// next rot const.

  } else {
	 g_count++;					  // accumulate num. of variants
	 return 1;
  }

  return flag;
}


/* --- */

  uint32_t c = 0;
  std::vector<pair_t>::iterator filtered_pairs_iter = filtered_pairs_vec.begin();
  std::vector<pair_t>::iterator good_pairs_iter = good_pairs_vec.begin();
  for(filtered_pairs_iter = filtered_pairs_vec.begin(); filtered_pairs_iter != filtered_pairs_vec.end(); filtered_pairs_iter++) {
	 pair_t p = *filtered_pairs_iter;
	 pair_t good_p = *good_pairs_iter;

	 bool b_are_equal = rc5_pairs_are_equal(p, good_p);

	 while((!b_are_equal) && (good_pairs_iter != good_pairs_vec.end())) {
		good_pairs_iter++;
		good_p = *good_pairs_iter;
		b_are_equal = (
							(p.pt1[0] == good_p.pt1[0]) &&
							(p.pt1[1] == good_p.pt1[1]) &&
							(p.pt2[0] == good_p.pt2[0]) &&
							(p.pt2[1] == good_p.pt2[1]) //&&
							//							(p.ct1[0] == good_p.ct1[0]) &&
							//							(p.ct1[1] == good_p.ct1[1]) &&
							//							(p.ct2[0] == good_p.ct2[0]) &&
							//							(p.ct2[1] == good_p.ct2[1])
							);

		if(b_are_equal) {
		  //	 if(good_pairs_iter != good_pairs_vec.end()) {
		  c++;
		  printf("---[ filtered pair #%2d ]---\n", c);
		  printf("pt1(%8X %8X) -> ct1(%8X %8X)\n", p.pt1[0], p.pt1[1], p.ct1[0], p.ct1[1]);
		  printf("pt2(%8X %8X) -> ct2(%8X %8X)\n", p.pt2[0], p.pt2[1], p.ct2[0], p.ct2[1]);
		}
	 }

#if 0
	 if(b_are_equal) {
	 //	 if(good_pairs_iter != good_pairs_vec.end()) {
		c++;
		printf("---[ good pair #%2d ]---\n", c);
		printf("pt1(%8X %8X) -> ct1(%8X %8X)\n", p.pt1[0], p.pt1[1], p.ct1[0], p.ct1[1]);
		printf("pt2(%8X %8X) -> ct2(%8X %8X)\n", p.pt2[0], p.pt2[1], p.ct2[0], p.ct2[1]);
	 }
#endif  // #if 0
  }

/* --- */

#if 0
  //  uint32_t hw = 1;
  uint64_t N = ALL_WORDS;//WORD_SIZE;//(1UL << 20);
  for(uint64_t i = 0; i < N; i++) {

	 if((i % 1024) == 0) {
		printf("[%s:%d] %8X\n", __FILE__, __LINE__, (uint32_t)i);
	 }

	 dx[0] = (uint32_t)i;//gen_sparse(hw, WORD_SIZE);// & (0xFFFFFFFF << 5);
	 dx[1] = 0;//gen_sparse(hw, WORD_SIZE);// & (0xFFFFFFFF << 5);

	 if((dx[0] == 0) && (dx[1] == 0)) {
		continue;
	 }

	 rc5_equal_rot_differential(nrounds, dx);
   //	 rc5_equal_rot_differential(nrounds, dx);
  }
#endif

/* --- */

#if 0
	 WORD A1 = pt1[0] + S[0];
	 WORD B1 = pt1[1] + S[1];
	 for(WORD i = 1; i <= nrounds; i++) { 
		A1 = RC5_ROTL(A1^B1, B1) + S[2*i]; 
		B1 = RC5_ROTL(B1^A1, A1) + S[2*i+1]; 
	 }
	 ct1[0] = A1; 
	 ct1[1] = B1;  
#endif
#if 0
	 WORD A2 = pt2[0] + S[0];
	 WORD B2 = pt2[1] + S[1];
	 for(WORD i = 1; i <= nrounds; i++) { 
		A2 = RC5_ROTL(A2^B2, B2) + S[2*i]; 
		B2 = RC5_ROTL(B2^A2, A2) + S[2*i+1]; 
	 }
	 ct2[0] = A2; 
	 ct2[1] = B2;  
#endif


/* --- */
/* 
SPECK64, Clustering 14R

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/speck-xor-threshold-search-tests
#--- [./tests/speck-xor-threshold-search-tests.cc:1320] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF
[./tests/speck-xor-threshold-search-tests.cc:1330] WORD_SIZE 32 NROUNDS 14 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 4194304 2^22.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-5.00
Input diffs:        9  1000000
B[ 0] 2^0.000000 |  8000000        0 2^-2.000000
B[ 1] 2^-1.000000 |    80000    80000 2^-1.000000
B[ 2] 2^-3.000000 |    80800   480800 2^-2.000000
B[ 3] 2^-7.000000 |   480008  2084008 2^-4.000000
B[ 4] 2^-13.000000 |  6080808 164A0848 2^-7.000092
B[ 5] 2^-21.000000 | F2400040 40104200 2^-13.000831
B[ 6] 2^-27.000000 |   820200     1202 2^-8.000092
B[ 7] 2^-32.000000 |     9000       10 2^-4.000000
B[ 8] 2^-36.000000 |       80        0 2^-2.000000
B[ 9] 2^-40.000000 | 80000000 80000000 2^0.000000
B[10] 2^-44.000000 | 80800000 80800004 2^-1.000000
B[11] 2^-47.000000 | 80008004 84008020 2^-3.000000
B[12] 2^-52.000000 | 808080A0 A08481A4 2^-5.000000
B[13] 2^-60.000000 |    40024  4200D01 2^-8.000092
[./src/speck-xor-threshold-search.cc:684] HWay table: p_thres = 0.031250 (2^-5.000000), hw_thres = 9, n = 32, #diffs = 4194304 2^22.00
[./src/speck-xor-threshold-search.cc:2874] trail_len 14
[./src/speck-xor-threshold-search.cc:2930] Add initial trail: 2^-60.001108 | 0
[./src/speck-xor-threshold-search.cc:2935] Initial trail: 14 R (       9  1000000) -> (   40024  4200D01) : [         1 trails]  2^-60.001108
[./src/speck-xor-threshold-search.cc:2846] this: 2^-76.000000 (best: 2^-60.001108)4200D01) : [         7 trails]  2^-59.7851937558

 */

/* --- */
/* 
	[./tests/speck-xor-threshold-search-tests.cc:1055] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.007812 2^-7.000000 SPECK_MAX_DIFF_CNT 4194304 2^22.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00
	[./tests/speck-xor-threshold-search-tests.cc:1055] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.007812 2^-7.000000 SPECK_MAX_DIFF_CNT 4194304 2^22.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00


 */



/* --- */

  if(p_thres != SPECK_P_THRES) {
	 printf("[%s:%d] %f %f\n", __FILE__, __LINE__, p_thres, SPECK_P_THRES);
  }
  assert(p_thres == SPECK_P_THRES);

/* --- */

void speck_array_differential_3d_alloc(differential_3d_t* T, const uint64_t len)
{
  T = (differential_3d_t **)calloc(len, sizeof(differential_3d_t *)); // !!!
  for(uint32_t i = 0; i < len; i++) {
	 T[i] = (differential_3d_t *)calloc(len, sizeof(differential_3d_t));
  }
}

void speck_array_differential_3d_free(differential_3d_t** T, const uint64_t len)
{
  for(uint32_t i = 0; i < len; i++) {
	 free(T[i]);
  }
  free(T);
}


/* ---- */

/*

SPECK48, HW 5, P_thres 2^-7, memory 183 GB

[./tests/speck-xor-threshold-search-tests.cc:1243] WORD_SIZE 24 NROUNDS 10 SPECK_P_THRES 0.007812 2^-7.000000 SPECK_MAX_DIFF_CNT 4294967296 2^32.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 7  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00

->! Found 3:

->! Found 2:

-> ! Found 0:
[./src/speck-xor-threshold-search.cc:660] p_thres = 0.007812 (2^-7.000000), n = 24, #diffs = 3075257025 2^31.52
	  [./src/speck-xor-threshold-search.cc:3000] nrounds = 1, Bn_init = 2^-inf : key   8B98E2   616342   A9867F   355298

B[ 0] = 2^0.000000
	  0:        0 ->        4 1.000000 (2^0.000000)
	  p_tot = 1.000000000000000 = 2^0.000000, Bn = 1.000000 = 2^0.000000
	  [./src/speck-xor-threshold-search.cc:3000] nrounds = 2, Bn_init = 2^-1.000000 : key   8B98E2   616342   A9867F   355298

D USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND                                                                                                                                                                          
11154 vvelichk  20   0  183g 183g 1096 R  100 18.2  98:03.04 speck-xor-thres   

*/

/* --- */
/* 
Hi Arnab,

Please see a list of TODO tasks for the FSE paper, based on the discussion that I had with Alex today.

1. In the description of the ciphers, for SIMON/SPECK-32/48/64 add a table with columns: block size, key size, number of rounds.
2. In the conclusions summarize how many rounds are attacked out of how many rounds in total.
3. Add a new table summarizing attack complexities and number of rounds attacked (including total number of rounds). Put this table before Table 1 (summary of trails/differentials).
4. Replace Fig. 5 (clustering of trails on 8 rounds) on the corresponding figure on 21 rounds that has > 200,000 trails.
5. Do we still show the figure with the expanded graph - Fig. 7? (Personally I am in favor of keeping it).
6. In the attack descriptions change the number of rounds to start form 1 and not from 0. E.g. in the attack on 18 rounds the 4 additional rounds at the bottom will be R15-R17 and not R14-R16.
7. Add a figure showing the propagation of truncated differences in the bottom 4 rounds of the 18R attack on SIMON32 (Sect. 7.1)
8 The paper will contain only one fully described attack for each cipher: one attack on 18R of SIMON32 and one attack on SPECK. All other attacks will be briefly outlined as extensions of the latter and their complexities will be given in the table in the beginning.
9. Update the differentials on SIMON and SPECK (resp. Table 2 and 3) with the latest results. Make the corresponding changes in the table summarizing the trails/differentials.

Currently I am working on 9.

Let's try to finish as much as possible from the above during the weekend and on Monday morning we can meet again! 

Vesselin

 */
/* --- */

/*
As we wrote you in a previous mail, we are currently adjusting the parameters of our tool in order to find trails on SPECK48 that are at least as good as the one that we sent to you (found using your start-in-the-middle approach). 

The problem is that the top transition in those trails is of very low probability (2^-7), while our tool uses a probability threshold 2^-5 and misses such low probability transitions when they apear in the first round (for the rounds after the first this is not a problem).

Our question is: in case we are unable to find a better trail on SPECK48 using our original start-from-the-top technique, 

would it be acceptable to you if we use the trails that we sent you in our attacks? Of course, in that case we shall give credit to your work and also explain the above problem (cf. the probability threshold issue).

are currently working on the analysis and the complexity estimations of these attacks for both SIMON and SPECK. o

This is also the reason why we think that merging the two papers for ePrint would not be a good idea: duplicate attacks will be described for the same versions of the ciphers.

for SIMON-32/48/64 and SPECK-32/64 we have trails/differentials that were found by our search method as described in the original FSE submission i.e. by starting the search from the top and not from the middle. Regarding SPECK48, 

(for us this is the probability computation and the search technique; for you this is the extensive attack analsysis)

We also have a related question: would it be acceptable for you if we decide to use the trails that we sent you (found using your start-in-the-middle approach) in our analysis? Of course, in that case we shall give credit to your work.

*/

/* ---- */

/** 
 * For given input XOR differences da,db to ADD 
 * Compute a pDDT of input XOR differences da, db limited only by their 
 * maximum Hammimng weight. The output diff. dc is left blank (zero)
 * and will be explored during the search. Compare to 
 * \ref xdp_add_dx_dy_pddt and \ref xdp_add_dx_dy_pddt_simple where
 * dc is also computed.
 *
 * \see xdp_add_dx_dy_pddt, xdp_add_dx_dy_pddt_simple
 */ 
void speck_xdp_add_pddt_dx_dy_i(const uint32_t k, const uint32_t n, const double p_thres, const uint32_t hw_thres, 
										  gsl_matrix* A[2][2][2], gsl_vector* C, 
										  uint32_t* da, uint32_t* db, uint32_t* dc, double* p, 
										  std::set<differential_3d_t, struct_comp_diff_3d_dx_dy_dz>* diff_set_dx_dy_dz,
										  std::multiset<differential_3d_t, struct_comp_diff_3d_p>* diff_mset_p,
										  uint64_t max_size)
{

}



/* --- */
	 //	 if(!b_flag) {
	 //		da = RROT(0x480901, SPECK_RIGHT_ROT_CONST);
	 //		db = 0x94009;
	 //		b_flag = true;
	 //	 }


/* --- */

/* 
Best 15R, -67

[./tests/speck-xor-threshold-search-tests.cc:764] End search
[./tests/speck-xor-threshold-search-tests.cc:766] Final bounds:
B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-7.000000
B[ 4] = 2^-13.000000
B[ 5] = 2^-21.000000
B[ 6] = 2^-27.000000
B[ 7] = 2^-32.000000
B[ 8] = 2^-36.000000
B[ 9] = 2^-40.000000
B[10] = 2^-44.000000
B[11] = 2^-47.000000
B[12] = 2^-52.000000
B[13] = 2^-60.000000
B[14] = 2^-67.000000
[./tests/speck-xor-threshold-search-tests.cc:773] Final trail:
0:        9 ->  1000000 1.000000
1:  8000000 ->        0 0.250000 (2^-2.000000)
2:    80000 ->    80000 0.500000 (2^-1.000000)
3:    80800 ->   480800 0.250000 (2^-2.000000)
4:   480008 ->  2084008 0.062500 (2^-4.000000)
5:  6080808 -> 164A0848 0.007812 (2^-7.000000)
6: F2400040 -> 40104200 0.000122 (2^-13.000000)
7:   820200 ->     1202 0.003906 (2^-8.000000)
8:     9000 ->       10 0.062500 (2^-4.000000)
9:       80 ->        0 0.250000 (2^-2.000000)
10: 80000000 -> 80000000 1.000000 (2^0.000000)
11: 80800000 -> 80800004 0.500000 (2^-1.000000)
12: 80008004 -> 84008020 0.125000 (2^-3.000000)
13: 808080A0 -> A08481A4 0.031250 (2^-5.000000)
14:    40024 ->  4200D01 0.003906 (2^-8.000000)
15: 20200101 ->  1206909 0.007812 (2^-7.000000)
p_tot = 0.000000000000000 = 2^-67.000000

[./tests/speck-xor-threshold-search-tests.cc:1259] WORD_SIZE 32 NROUNDS 15 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 268435456 2^28.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00

real    258m8.996s
user    257m14.305s
sys     0m20.909s
vvelichkov@r-cluster1-1:~/skcrypto/trunk/work/src/yaarx$

 */

/* 
Best 14R, -60

[./tests/speck-xor-threshold-search-tests.cc:764] End search
[./tests/speck-xor-threshold-search-tests.cc:766] Final bounds:
B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-7.000000
B[ 4] = 2^-13.000000
B[ 5] = 2^-21.000000
B[ 6] = 2^-27.000000
B[ 7] = 2^-32.000000
B[ 8] = 2^-36.000000
B[ 9] = 2^-40.000000
B[10] = 2^-44.000000
B[11] = 2^-47.000000
B[12] = 2^-52.000000
B[13] = 2^-60.000000
[./tests/speck-xor-threshold-search-tests.cc:773] Final trail:
0:        9 ->  1000000 1.000000
1:  8000000 ->        0 0.250000 (2^-2.000000)
2:    80000 ->    80000 0.500000 (2^-1.000000)
3:    80800 ->   480800 0.250000 (2^-2.000000)
4:   480008 ->  2084008 0.062500 (2^-4.000000)
5:  6080808 -> 164A0848 0.007812 (2^-7.000000)
6: F2400040 -> 40104200 0.000122 (2^-13.000000)
7:   820200 ->     1202 0.003906 (2^-8.000000)
8:     9000 ->       10 0.062500 (2^-4.000000)
9:       80 ->        0 0.250000 (2^-2.000000)
10: 80000000 -> 80000000 1.000000 (2^0.000000)
11: 80800000 -> 80800004 0.500000 (2^-1.000000)
12: 80008004 -> 84008020 0.125000 (2^-3.000000)
13: 808080A0 -> A08481A4 0.031250 (2^-5.000000)
14:    40024 ->  4200D01 0.003906 (2^-8.000000)
p_tot = 0.000000000000000 = 2^-60.000000

[./tests/speck-xor-threshold-search-tests.cc:1259] WORD_SIZE 32 NROUNDS 14 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 268435456 2^28.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00

real    207m39.110s
user    206m52.820s
sys     0m20.357s

 */

/* 
Best 13R, -52, 2040204

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-7.000000
B[ 4] = 2^-13.000000
B[ 5] = 2^-21.000000
B[ 6] = 2^-27.000000
B[ 7] = 2^-32.000000
B[ 8] = 2^-36.000000
B[ 9] = 2^-40.000000
B[10] = 2^-44.000000
B[11] = 2^-47.000000
B[12] = 2^-52.000000
[./tests/speck-xor-threshold-search-tests.cc:773] Final trail:
0:        9 ->  1000000 1.000000
1:  8000000 ->        0 0.250000 (2^-2.000000)
2:    80000 ->    80000 0.500000 (2^-1.000000)
3:    80800 ->   480800 0.250000 (2^-2.000000)
4:   480008 ->  2084008 0.062500 (2^-4.000000)
5:  6080808 -> 164A0848 0.007812 (2^-7.000000)
6: F2400040 -> 40104200 0.000122 (2^-13.000000)
7:   820200 ->     1202 0.003906 (2^-8.000000)
8:     9000 ->       10 0.062500 (2^-4.000000)
9:       80 ->        0 0.250000 (2^-2.000000)
10: 80000000 -> 80000000 1.000000 (2^0.000000)
11: 80800000 -> 80800004 0.500000 (2^-1.000000)
12: 80008004 -> 84008020 0.125000 (2^-3.000000)
13: 808080A0 -> A08481A4 0.031250 (2^-5.000000)
p_tot = 0.000000000000000 = 2^-52.000000

[./tests/speck-xor-threshold-search-tests.cc:1259] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 268435456 2^28.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00

real    200m4.809s
user    199m19.167s
sys     0m18.209s

 */

/* 
		 [./tests/speck-xor-threshold-search-tests.cc:1247] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 1073741824 2^30.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00
Killed

real    207m41.686s
user    205m35.263s
sys     1m29.222s

 */

	 /* --- */


	 /* 
		 [./tests/speck-xor-threshold-search-tests.cc:764] End search
		 [./tests/speck-xor-threshold-search-tests.cc:766] Final bounds:
B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-7.000000
B[ 4] = 2^-15.000000
B[ 5] = 2^-24.000000
B[ 6] = 2^-31.000000
B[ 7] = 2^-37.000000
B[ 8] = 2^-39.000000
B[ 9] = 2^-42.000000
B[10] = 2^-46.000000
B[11] = 2^-53.000000
B[12] = 2^-61.000000
[./tests/speck-xor-threshold-search-tests.cc:773] Final trail:
0:       90 -> 10000000 1.000000
1: 80000000 ->        0 0.500000 (2^-1.000000)
2:   800000 ->   800000 0.500000 (2^-1.000000)
3:  3808000 ->  7808000 0.062500 (2^-4.000000)
4: 3C800080 ->   840080 0.000488 (2^-11.000000)
5: 80A08080 -> 84808480 0.007812 (2^-7.000000)
6:  4002400 -> 20040004 0.015625 (2^-6.000000)
7: 20000020 -> 20200001 0.062500 (2^-4.000000)
8:        1 ->  1000008 0.125000 (2^-3.000000)
9:        8 ->  8000048 0.250000 (2^-2.000000)
10:       48 -> 40000208 0.125000 (2^-3.000000)
11:  8000208 ->  800124A 0.062500 (2^-4.000000)
12:    81248 -> 40088018 0.007812 (2^-7.000000)
13:  808880A ->  84C88C8 0.003906 (2^-8.000000)
p_tot = 0.000000000000000 = 2^-61.000000

[./tests/speck-xor-threshold-search-tests.cc:1259] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 1073741824 2^30.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 7  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00

real    212m30.777s
user    209m58.315s
sys     1m30.678s
v
 */

/* ---- */

/* 
[./tests/speck-xor-threshold-search-tests.cc:1259] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 268435456 2^28.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 7  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00
	[./tests/speck-xor-threshold-search-tests.cc:764] End search
	[./tests/speck-xor-threshold-search-tests.cc:766] Final bounds:
B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-7.000000
B[ 4] = 2^-15.000000
B[ 5] = 2^-24.000000
B[ 6] = 2^-31.000000
B[ 7] = 2^-37.000000
B[ 8] = 2^-39.000000
B[ 9] = 2^-42.000000
B[10] = 2^-46.000000
B[11] = 2^-53.000000
B[12] = 2^-61.000000
[./tests/speck-xor-threshold-search-tests.cc:773] Final trail:
0:       90 -> 10000000 1.000000
1: 80000000 ->        0 0.500000 (2^-1.000000)
2:   800000 ->   800000 0.500000 (2^-1.000000)
3:  3808000 ->  7808000 0.062500 (2^-4.000000)
4: 3C800080 ->   840080 0.000488 (2^-11.000000)
5: 80A08080 -> 84808480 0.007812 (2^-7.000000)
6:  4002400 -> 20040004 0.015625 (2^-6.000000)
7: 20000020 -> 20200001 0.062500 (2^-4.000000)
8:        1 ->  1000008 0.125000 (2^-3.000000)
9:        8 ->  8000048 0.250000 (2^-2.000000)
10:       48 -> 40000208 0.125000 (2^-3.000000)
11:  8000208 ->  800124A 0.062500 (2^-4.000000)
12:    81248 -> 40088018 0.007812 (2^-7.000000)
13:  808880A ->  84C88C8 0.003906 (2^-8.000000)
p_tot = 0.000000000000000 = 2^-61.000000

[./tests/speck-xor-threshold-search-tests.cc:1259] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 268435456 2^28.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 7  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00

real    54m41.339s
user    54m11.639s
sys     0m22.649s
vvelichkov@r-cluster1-1:~/skcrypto/trunk/work/src/yaarx$

 */
/* {---------- SPECK-EXPER.txt ----------------- */

/*
-------------------

SPECK48, 11 rounds

[./tests/speck-xor-threshold-search-tests.cc:620] FULL | Final bounds:
B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-8.000000
B[ 4] = 2^-15.000000
B[ 5] = 2^-17.000000
B[ 6] = 2^-21.000000
B[ 7] = 2^-26.000000
B[ 8] = 2^-33.000000
B[ 9] = 2^-40.000000
B[10] = 2^-47.000000
[./tests/speck-xor-threshold-search-tests.cc:626] FULL | Final trail:
 0:   202040 ->    82921 1.000000
 1:   480901 ->    94009 0.007812 (2^-7.000000)
 2:    80802 ->   42084A 0.007812 (2^-7.000000)
 3:   400052 ->   504200 0.007812 (2^-7.000000)
 4:   820200 ->     1202 0.031250 (2^-5.000000)
 5:     9000 ->       10 0.062500 (2^-4.000000)
 6:       80 ->        0 0.250000 (2^-2.000000)
 7:   800000 ->   800000 1.000000 (2^0.000000)
 8:   808000 ->   808004 0.500000 (2^-1.000000)
 9:   800084 ->   8400A0 0.125000 (2^-3.000000)
10:     80A0 ->   2085A4 0.062500 (2^-4.000000)
11:   808424 ->   84A905 0.007812 (2^-7.000000)
p_tot = 0.000000000000007 = 2^-47.000000
[./src/speck-xor-threshold-search.cc:2425] trail_len 11
[./src/speck-xor-threshold-search.cc:2481] Add initial trail: 2^-47.000000 | 0
[./src/speck-xor-threshold-search.cc:2486] Initial trail: 11 R (  202040    82921) -> (  808424   84A905) : [         1 trails]  2^-47.000000
[./src/speck-xor-threshold-search.cc:2074] 11 R (  202040    82921) -> (  808424   84A905) : [         1 trails]  2^-47.0000000000
[./src/speck-xor-threshold-search.cc:2397] this: 2^-72.000000 (best: 2^-47.000000)^[[B905) : [         5 trails]  2^-46.9541497038
[./src/speck-xor-threshold-search.cc:2397] this: 2^-73.000000 (best: 2^-47.000000)
[./src/speck-xor-threshold-search.cc:2397] this: 2^-71.000000 (best: 2^-47.000000)^[[A905) : [        17 trails]  2^-46.9080488853

[./src/speck-xor-threshold-search.cc:2397] this: 2^-66.000000 (best: 2^-47.000000) 84A905) : [        42 trails]  2^-46.8535285494

[./src/speck-xor-threshold-search.cc:2397] this: 2^-72.000000 (best: 2^-47.000000) 84A905) : [       132 trails]  2^-46.7486240324

[./src/speck-xor-threshold-search.cc:2397] this: 2^-71.000000 (best: 2^-47.000000) 84A905) : [       137 trails]  2^-46.7482804957
[./src/speck-xor-threshold-search.cc:2397] this: 2^-71.000000 (best: 2^-47.000000) 84A905) : [       209 trails]  2^-46.5771433723

[./src/speck-xor-threshold-search.cc:2397] this: 2^-71.000000 (best: 2^-47.000000) 84A905) : [       384 trails]  2^-46.4818412262
[./tests/speck-xor-threshold-search-tests.cc:1169] WORD_SIZE 24 NROUNDS 11 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 4096 2^12.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 5  SPECK_CLUSTER_MAX_HW 7 SPECK_EPS 2^-15.00

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$

-----------

SPECK48, 10 rounds

[./tests/speck-xor-threshold-search-tests.cc:622] FULL | Final bounds:
B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-8.000000
B[ 4] = 2^-15.000000
B[ 5] = 2^-17.000000
B[ 6] = 2^-21.000000
B[ 7] = 2^-26.000000
B[ 8] = 2^-33.000000
B[ 9] = 2^-40.000000
[./tests/speck-xor-threshold-search-tests.cc:628] FULL | Final trail:
 0:   480901 ->    94009 1.000000
 1:    80802 ->   42084A 0.007812 (2^-7.000000)
 2:   400052 ->   504200 0.007812 (2^-7.000000)
 3:   820200 ->     1202 0.031250 (2^-5.000000)
 4:     9000 ->       10 0.062500 (2^-4.000000)
 5:       80 ->        0 0.250000 (2^-2.000000)
 6:   800000 ->   800000 1.000000 (2^0.000000)
 7:   808000 ->   808004 0.500000 (2^-1.000000)
 8:   800084 ->   8400A0 0.125000 (2^-3.000000)
 9:     80A0 ->   2085A4 0.062500 (2^-4.000000)
10:   808424 ->   84A905 0.007812 (2^-7.000000)
p_tot = 0.000000000000909 = 2^-40.000000
[./src/speck-xor-threshold-search.cc:2425] trail_len 10
[./src/speck-xor-threshold-search.cc:2481] Add initial trail: 2^-40.000000 | 0
[./src/speck-xor-threshold-search.cc:2486] Initial trail: 10 R (  480901    94009) -> (  808424   84A905) : [         1 trails]  2^-40.000000
[./src/speck-xor-threshold-search.cc:2397] this: 2^-68.000000 (best: 2^-40.000000) 84A905) : [         5 trails]  2^-39.9541497038

[./src/speck-xor-threshold-search.cc:2397] this: 2^-64.000000 (best: 2^-40.000000) 84A905) : [        17 trails]  2^-39.9080488853

[./src/speck-xor-threshold-search.cc:2397] this: 2^-64.000000 (best: 2^-40.000000) 84A905) : [        42 trails]  2^-39.8535285494

[./src/speck-xor-threshold-search.cc:2397] this: 2^-66.000000 (best: 2^-40.000000)
[./src/speck-xor-threshold-search.cc:2397] this: 2^-59.000000 (best: 2^-40.000000)
[./src/speck-xor-threshold-search.cc:2397] this: 2^-64.000000 (best: 2^-40.000000) 84A905) : [       137 trails]  2^-39.7482804957


[./tests/speck-xor-threshold-search-tests.cc:1171] WORD_SIZE 24 NROUNDS 10 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 1048576 2^20.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 5  SPECK_CLUSTER_MAX_HW 7 SPECK_EPS 2^-15.00


-------------
SPECK64, 13R

[./tests/speck-xor-threshold-search-tests.cc:932] FULL | Final bounds:
B[ 0] = 2^-2.000000
B[ 1] = 2^-5.000000
B[ 2] = 2^-9.000000
B[ 3] = 2^-12.000000
B[ 4] = 2^-16.000000
B[ 5] = 2^-22.000000
B[ 6] = 2^-27.000000
B[ 7] = 2^-31.000000
B[ 8] = 2^-34.000000
B[ 9] = 2^-39.000000
B[10] = 2^-44.000000
B[11] = 2^-51.000000
B[12] = 2^-58.000000
[./tests/speck-xor-threshold-search-tests.cc:938] FULL | Final trail:
 0: 24024080 ->   4400C2 1.000000
 1: 80200282 -> 82000492 0.007812 (2^-7.000000)
 2:   802490 -> 10800004 0.007812 (2^-7.000000)
 3: 80808020 ->  4808000 0.031250 (2^-5.000000)
 4: 24000080 ->    40080 0.031250 (2^-5.000000)
 5: 80200080 -> 80000480 0.125000 (2^-3.000000)
 6:   802480 ->   800084 0.062500 (2^-4.000000)
 7: 808080A0 -> 84808480 0.031250 (2^-5.000000)
 8: 24000400 ->    42004 0.015625 (2^-6.000000)
 9:   202000 ->    12020 0.062500 (2^-4.000000)
10:    10000 ->    80100 0.125000 (2^-3.000000)
11:    80000 ->   480800 0.250000 (2^-2.000000)
12:   480000 ->  2084000 0.125000 (2^-3.000000)
13:  2080800 -> 124A0800 0.062500 (2^-4.000000)
p_tot = 0.000000000000000 = 2^-58.000000
[./src/speck-xor-threshold-search.cc:2417] trail_len 13
[./src/speck-xor-threshold-search.cc:2473] Add initial trail: 2^-58.000000 | 0
[./src/speck-xor-threshold-search.cc:2478] Initial trail: 13 R (24024080   4400C2) -> ( 2080800 124A0800) : [         1 trails]  2^-58.000000
[./src/speck-xor-threshold-search.cc:2389] this: 2^-79.000000 (best: 2^-58.000000)24A0800) : [        11 trails]  2^-57.9083141101
[./src/speck-xor-threshold-search.cc:2389] this: 2^-78.000000 (best: 2^-58.000000)24A0800) : [        58 trails]  2^-57.8106797353

[./src/speck-xor-threshold-search.cc:2389] this: 2^-78.000000 (best: 2^-58.000000)24A0800) : [        64 trails]  2^-57.8098304993
[./src/speck-xor-threshold-search.cc:2389] this: 2^-79.000000 (best: 2^-58.000000)24A0800) : [       156 trails]  2^-57.7127858871

[./src/speck-xor-threshold-search.cc:2389] this: 2^-79.000000 (best: 2^-58.000000)24A0800) : [       191 trails]  2^-57.6686368991
[./src/speck-xor-threshold-search.cc:2389] this: 2^-79.000000 (best: 2^-58.000000)24A0800) : [       196 trails]  2^-57.6673777205
[./src/speck-xor-threshold-search.cc:2389] this: 2^-80.000000 (best: 2^-58.000000)24A0800) : [       198 trails]  2^-57.6670893135
[./tests/speck-xor-threshold-search-tests.cc:1219] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 65536 2^16.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00


---------
SPECK64, 14R

[./tests/speck-xor-threshold-search-tests.cc:932] FULL | Final bounds:
B[ 0] = 2^-2.000000
B[ 1] = 2^-5.000000
B[ 2] = 2^-9.000000
B[ 3] = 2^-16.000000
B[ 4] = 2^-19.000000
B[ 5] = 2^-23.000000
B[ 6] = 2^-29.000000
B[ 7] = 2^-34.000000
B[ 8] = 2^-38.000000
B[ 9] = 2^-41.000000
B[10] = 2^-46.000000
B[11] = 2^-51.000000
B[12] = 2^-58.000000
B[13] = 2^-65.000000
[./tests/speck-xor-threshold-search-tests.cc:938] FULL | Final trail:
 0: 24024080 ->   4400C2 1.000000
 1: 80200282 -> 82000492 0.007812 (2^-7.000000)
 2:   802490 -> 10800004 0.007812 (2^-7.000000)
 3: 80808020 ->  4808000 0.031250 (2^-5.000000)
 4: 24000080 ->    40080 0.031250 (2^-5.000000)
 5: 80200080 -> 80000480 0.125000 (2^-3.000000)
 6:   802480 ->   800084 0.062500 (2^-4.000000)
 7: 808080A0 -> 84808480 0.031250 (2^-5.000000)
 8: 24000400 ->    42004 0.015625 (2^-6.000000)
 9:   202000 ->    12020 0.062500 (2^-4.000000)
10:    10000 ->    80100 0.125000 (2^-3.000000)
11:    80000 ->   480800 0.250000 (2^-2.000000)
12:   480000 ->  2084000 0.125000 (2^-3.000000)
13:  2080800 -> 124A0800 0.062500 (2^-4.000000)
14: 12480008 -> 80184008 0.007812 (2^-7.000000)
p_tot = 0.000000000000000 = 2^-65.000000
[./src/speck-xor-threshold-search.cc:2417] trail_len 14
[./src/speck-xor-threshold-search.cc:2473] Add initial trail: 2^-65.000000 | 0
[./src/speck-xor-threshold-search.cc:2478] Initial trail: 14 R (24024080   4400C2) -> (12480008 80184008) : [         1 trails]  2^-65.000000
[./src/speck-xor-threshold-search.cc:2389] this: 2^-87.000000 (best: 2^-65.000000)0184008) : [         2 trails]  2^-64.9556058806
[./src/speck-xor-threshold-search.cc:2389] this: 2^-91.000000 (best: 2^-65.000000)0184008) : [         5 trails]  2^-64.9541497038
[./src/speck-xor-threshold-search.cc:2389] this: 2^-90.000000 (best: 2^-65.000000)
[./src/speck-xor-threshold-search.cc:2389] this: 2^-91.000000 (best: 2^-65.000000)0184008) : [         9 trails]  2^-64.9097557460
[./src/speck-xor-threshold-search.cc:2389] this: 2^-91.000000 (best: 2^-65.000000)0184008) : [        15 trails]  2^-64.9082999076
[./src/speck-xor-threshold-search.cc:2389] this: 2^-88.000000 (best: 2^-65.000000)0184008) : [        33 trails]  2^-64.8415431645
[./src/speck-xor-threshold-search.cc:2389] this: 2^-87.000000 (best: 2^-65.000000)0184008) : [       171 trails]  2^-64.7191929890
[./src/speck-xor-threshold-search.cc:2389] this: 2^-87.000000 (best: 2^-65.000000)0184008) : [       186 trails]  2^-64.7172712620
[./src/speck-xor-threshold-search.cc:2389] this: 2^-89.000000 (best: 2^-65.000000)0184008) : [       209 trails]  2^-64.7127035823
[./src/speck-xor-threshold-search.cc:2389] this: 2^-89.000000 (best: 2^-65.000000)0184008) : [       272 trails]  2^-64.6669811757
[./tests/speck-xor-threshold-search-tests.cc:1219] WORD_SIZE 32 NROUNDS 14 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 65536 2^16.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$



*/
/* ---------- SPECK-EXPER.txt -----------------} */


/* --- */

uint32_t speck_xdp_add_dx_dy_pddt_gen_dset( const uint32_t da, const uint32_t db, const double p_thres,
														  std::set<differential_3d_t, struct_comp_diff_3d_dx_dy_dz>* diff_set_dx_dy_dz,
														  std::multiset<differential_3d_t, struct_comp_diff_3d_p>* diff_mset_p)
{
  uint64_t cnt = 0;

  diff_set_t dc_set;
  //xdp_add_input_diff_to_output_dset(da, db, &dc_set);
  {
	 uint32_t n = WORD_SIZE;
	 uint32_t dc = 0;

	 // if fixed[i] = 1, dc[i] can be anything, if fixed[i] = 0, dc[i] is fixed
	 uint32_t fixed = 0;
	 dc |= (da & 1) ^ (db & 1);

	 uint32_t C = cap(da, db);
	 for(uint32_t i = 1; i < n; i++) {
		uint32_t C_this = (C >> i) & 1;
		uint32_t da_prev = (da >> (i - 1)) & 1;
		uint32_t db_prev = (db >> (i - 1)) & 1;
		uint32_t dc_prev = (dc >> (i - 1)) & 1;
		uint32_t da_this = (da >> i) & 1;
		uint32_t db_this = (db >> i) & 1;
		uint32_t dc_this = 0;		  // to be determined
		uint32_t fixed_this = 0;		  // is this bit fixed or no
		if(is_eq(da_prev, db_prev, dc_prev)) {
		  dc_this = (da_this ^ db_this ^ da_prev);
		  fixed_this = FIXED;				  // fixed
		} else {
		  if((i == (n-1)) || (da_this != db_this) || (C_this == 1)) {
			 dc_this = 0;				  // can be 0/1
			 fixed_this = STAR;			  // can be 0/1 
		  } else {
			 dc_this = da_this;		  // so that it is possible to have da_this = db_this = dc_this
			 fixed_this = FIXED;				  // fixed
		  }
		}
		dc |= (dc_this << i);
		fixed |= (fixed_this << i);
	 }
	 dc_set.diff = dc;
	 dc_set.fixed = fixed;
  }

  //  uint64_t dc_set_size = xdp_add_dset_size(dc_set);

  //  std::vector<uint32_t> dc_set_all;
  //  xdp_add_dset_gen_diff_all(dc_set, &dc_set_all);
  uint32_t nfree = hw32(dc_set.fixed & MASK);	  // number of free (non-fixed) positions
  uint32_t N = (1U << (nfree));
  double logN = log2(N);

  uint32_t max_vals = N;//32;//16;

  uint32_t nrand_vals = std::min((const uint64_t)N, (const uint64_t)max_vals);

  //  for(uint32_t i = 0; i < N; i++) { // all values of the free positions
  for(uint32_t val = 0; val < nrand_vals; val++) { // nvals random values

	 uint32_t i = random32() % N;
	 if(nrand_vals == N) {
		i = val;
	 }

	 uint32_t dc_new = dc_set.diff;
	 uint32_t i_pos = 0;				  // counting the bit position within the log2(N)-bit value i

	 for(uint32_t j = 0; j < WORD_SIZE; j++) {
		uint32_t is_fixed = (dc_set.fixed >> j) & 1;

		if(is_fixed == STAR) {		  // the position is free
		  uint32_t val = (i >> i_pos) & 1;
		  dc_new ^= (val << j);	  // flip the bit at the free position
		  assert((double)i_pos < logN);
		  i_pos++;
		}
	 }

	 uint32_t db_next = LROT(db, SPECK_LEFT_ROT_CONST) ^ dc_new;
	 bool b_is_low_hw_next = (hw32(dc_new) <= SPECK_MAX_HW) && (hw32(db_next) <= SPECK_MAX_HW);

	 if((b_is_low_hw_next) && (da != 0) && (db != 0)) {

		double p = xdp_add_lm(da, db, dc_new);

		if(p >= p_thres) {

		  differential_3d_t new_diff;
		  new_diff.dx = da;
		  new_diff.dy = db;
		  new_diff.dz = dc_new;
		  new_diff.p = p;
		  uint32_t old_size = diff_set_dx_dy_dz->size();
		  diff_set_dx_dy_dz->insert(new_diff);
		  if(old_size != diff_set_dx_dy_dz->size()) {
			 diff_mset_p->insert(new_diff);
#if 1									  // DEBUG
			 uint32_t hway_size = diff_set_dx_dy_dz->size();
			 printf("\r[%s:%d] Add %8X %8X -> %8X : %f 2^%4.2f | 2^%4.2f  HW size %d 2^%f", __FILE__, __LINE__, da, db, dc_new, p, log2(p), log2(p_thres), hway_size, log2(hway_size));
			 fflush(stdout);
#endif
			 cnt++;
		  }
		}
	 }
	 assert(i_pos == log2(N));
  }
  assert(diff_set_dx_dy_dz->size() == diff_mset_p->size());
  return cnt;
}


/* --- */
uint32_t speck_xdp_add_dx_dy_pddt_gen_random ( gsl_matrix* A[2][2][2], const uint32_t da, const uint32_t db, 
															  uint32_t hw_thres, double p_thres, const uint64_t max_size,
															  std::set<differential_3d_t, struct_comp_diff_3d_dx_dy_dz>* diff_set_dx_dy_dz,
															  std::multiset<differential_3d_t, struct_comp_diff_3d_p>* diff_mset_p)
{
  uint64_t cnt = 0;
  while(cnt < max_size) {

	 //	 uint32_t max_hw = hw32(da ^ db);
	 uint32_t dc_new = gen_sparse(hw_thres, WORD_SIZE);
	 double p = xdp_add_lm(da, db, dc_new);

	 if(p >= p_thres) {

		differential_3d_t new_diff;
		new_diff.dx = da;
		new_diff.dy = db;
		new_diff.dz = dc_new;
		new_diff.p = p;
		uint32_t old_size = diff_set_dx_dy_dz->size();
		diff_set_dx_dy_dz->insert(new_diff);
		if(old_size != diff_set_dx_dy_dz->size()) {
		  diff_mset_p->insert(new_diff);
#if 1									  // DEBUG
		  uint32_t hway_size = diff_set_dx_dy_dz->size();
		  printf("\r[%s:%d] Add %8X %8X -> %8X : %f 2^%4.2f | 2^%4.2f  HW size %d 2^%f", __FILE__, __LINE__, da, db, dc_new, p, log2(p), log2(p_thres), hway_size, log2(hway_size));
		  fflush(stdout);
#endif
		  cnt++;
		}
	 }
  }
  assert(diff_set_dx_dy_dz->size() == diff_mset_p->size());
  return cnt;
}

/* --- */
#if 1									  // DEBUG
		std::set<differential_3d_t, struct_comp_diff_3d_dx_dy_dz>::iterator set_iter = diff_set_dx_dy_dz->begin();
		for(set_iter = diff_set_dx_dy_dz->begin(); set_iter != diff_set_dx_dy_dz->end(); set_iter++) {
		  if((set_iter->dx == dx) && (set_iter->dy == dy) && (set_iter->dx != 0) && (set_iter->dy != 0)) {
			 printf("[%s:%d] (%8X %8X) | %8X %8X %8X 2^%f ", __FILE__, __LINE__, dx, dy, set_iter->dx, set_iter->dy, set_iter->dz, log2(set_iter->p));
			 printf(" <-- ");
			 printf("\n");
		  }
		}
#endif

/* --- */

void speck_xdp_add_dx_dy_pddt_gen_random(const uint32_t da, const uint32_t db, 
													  uint32_t hw_thres, double p_thres, const uint64_t max_size,
													  std::set<differential_3d_t, struct_comp_diff_3d_dx_dy_dz>* diff_set_dx_dy_dz,
													  std::multiset<differential_3d_t, struct_comp_diff_3d_p>* diff_mset_p)
{
  //  bool b_is_hway_this = (hw32(da) <= SPECK_MAX_HW) && ((hw32(db) <= SPECK_MAX_HW));
  //  if(!b_is_hway_this)
  //	 return;

  //  uint32_t n = WORD_SIZE;
  //  double p_thres = P_THRES;
  //  double p = 0.0;

  // init A
  gsl_matrix* A[2][2][2];
  xdp_add_alloc_matrices(A);
  xdp_add_sf(A);
  xdp_add_normalize_matrices(A);

  // init C
  gsl_vector* C = gsl_vector_calloc(XDP_ADD_MSIZE);
  gsl_vector_set(C, XDP_ADD_ISTATE, 1.0);

  uint64_t cnt = 0;

  diff_set_t dc_set;
  xdp_add_input_diff_to_output_dset(da, db, &dc_set);
  uint64_t dc_set_size = xdp_add_dset_size(dc_set);

  // {---
  std::vector<uint32_t> dc_set_all;
#if 0
  xdp_add_dset_gen_diff_all(dc_set, &dc_set_all);
#else
  uint32_t nfree = hw32(dc_set.fixed & MASK);	  // number of free (non-fixed) positions
  uint32_t N = (1U << (nfree));
  double logN = log2(N);

  uint32_t nset_cnt = 0;

  uint32_t nrand_vals = 32;

  nrand_vals = std::min((const uint64_t)N, (const uint64_t)nrand_vals);

  //  for(uint32_t i = 0; i < N; i++) { // all values of the free positions
  for(uint32_t val = 0; val < nrand_vals; val++) { // nvals random values

	 uint32_t i = random32() % N;

	 uint32_t dc_new = dc_set.diff;
	 uint32_t i_pos = 0;				  // counting the bit position within the log2(N)-bit value i

	 for(uint32_t j = 0; j < WORD_SIZE; j++) {
		uint32_t is_fixed = (dc_set.fixed >> j) & 1;

		if(is_fixed == STAR) {		  // the position is free
		  uint32_t val = (i >> i_pos) & 1;
		  //		  dc_new |= (val << j);
		  dc_new ^= (val << j);	  // flip the bit at the free position
		  assert((double)i_pos < logN);
		  i_pos++;
		}
	 }
	 //	 double p = xdp_add(A, da, db, dc_new);
	 //	 if(p >= p_thres) {
		dc_set_all.push_back(dc_new);
		//	 }
	 assert(i_pos == log2(N));
  }
  //  assert(dc_set_all->size() == N);
#endif
  // ---}

  //  assert(dc_set_size == dc_set_all.size());

#if 1
  std::vector<uint32_t>::iterator vec_iter = dc_set_all.begin();

  //  while(cnt < std::min((const uint64_t)max_size, (const uint64_t)dc_set_size)) {
  while((vec_iter != dc_set_all.end()) && (cnt < max_size)) {

	 uint32_t dc = *vec_iter;
	 double p = xdp_add(A, da, db, dc);

	 //	 uint32_t da_next = RROT(dc, SPECK_RIGHT_ROT_CONST); // ! the left input to the next round will be rotated before entering the ADD op
	 //	 uint32_t db_next = LROT(dc, SPECK_LEFT_ROT_CONST) ^ dc;

	 //	 uint32_t dc_next_max = 0;
	 //	 double p_next_max = max_xdp_add_lm(da_next, db_next, &dc_next_max);
	 //	 bool b_is_hway_next = (p_next_max >= SPECK_P_THRES) && (hw32(dc_next_max) <= SPECK_MAX_HW);

	 //	 bool b_is_hway_this = true;//(hw32(da) <= SPECK_MAX_HW) && ((hw32(db) <= SPECK_MAX_HW));
	 //	 bool b_is_hway_next = true;//(hw32(da_next) <= SPECK_MAX_HW) && ((hw32(db_next) <= SPECK_MAX_HW));

	 //	 if((p >= p_thres) && (hw32(dc) <= hw_thres)) {
	 //	 if((p >= p_thres) && (p_next_max >= SPECK_P_THRES)) {
	 //	 if((p >= p_thres) && (b_is_hway_next) && (b_is_hway_this)) {
	 if(p >= p_thres) {

		differential_3d_t new_diff;
		new_diff.dx = da;
		new_diff.dy = db;
		new_diff.dz = dc;
		new_diff.p = p;
		uint32_t old_size = diff_set_dx_dy_dz->size();
		diff_set_dx_dy_dz->insert(new_diff);
		if(old_size != diff_set_dx_dy_dz->size()) {
		  diff_mset_p->insert(new_diff);
#if 1									  // DEBUG
		  uint32_t hway_size = diff_set_dx_dy_dz->size();
		  //		  printf("\r[%s:%d] [%10lld / %10lld] | Add %8X %8X -> %8X : %f 2^%4.2f | 2^%4.2f  HW size %d 2^%f", __FILE__, __LINE__, cnt, max_size, da, db, dc, p, log2(p), log2(p_thres), hway_size, log2(hway_size));
		  printf("\r[%s:%d] Add %8X %8X -> %8X : %f 2^%4.2f | 2^%4.2f  HW size %d 2^%f", __FILE__, __LINE__, da, db, dc, p, log2(p), log2(p_thres), hway_size, log2(hway_size));
		  fflush(stdout);
#endif
		  cnt++;
		}
	 }
	 vec_iter++;
  }
#endif
  uint32_t croad_new_size = diff_set_dx_dy_dz->size();

  //  printf("[%s:%d] CR size %d 2^%f\n", __FILE__, __LINE__, croad_new_size, log2(croad_new_size));

  //  speck_xdp_add_pddt_i(k, n, p_thres, A, C, &da, &db, &dc, &p, diff_set_dx_dy_dz, diff_mset_p, max_size);
#if 0									  // DEBUG
  printf("[%s:%d] p_thres = %f (2^%f), n = %d, #diffs = %d\n", __FILE__, __LINE__, 
			p_thres, log2(p_thres), WORD_SIZE, diff_mset_p->size());
#endif
  assert(diff_set_dx_dy_dz->size() == diff_mset_p->size());

  gsl_vector_free(C);
  xdp_add_free_matrices(A);
}


/* --- */
	 // if the max alloed is smaller than the full vec size
	 // then sample the vector randomly
#if 0
	 if(max_size < dc_set_all.size()) {
		uint32_t i_rand = random32() % dc_set_all.size();
		dc = dc_set_all.at(i_rand);
	 }
#endif

/* --- */

  // {---
  std::vector<uint32_t> dc_set_all;
#if 0
  xdp_add_dset_gen_diff_all(dc_set, &dc_set_all);
#else
  uint32_t nfree = hw32(dc_set.fixed & MASK);	  // number of free (non-fixed) positions
  uint32_t N = (1U << (nfree));
  double logN = log2(N);

  uint32_t nset_cnt = 0;

  for(uint32_t i = 0; i < N; i++) { // all values of the free positions
  //  while(nset_cnt < std::min((const uint64_t)N, (const uint64_t)max_size)) {

	 uint32_t dc_new = dc_set.diff;
	 uint32_t i_pos = 0;				  // counting the bit position within the log2(N)-bit value i

	 for(uint32_t j = 0; j < WORD_SIZE; j++) {
		uint32_t is_fixed = (dc_set.fixed >> j) & 1;

		if(is_fixed == STAR) {		  // the position is free
		  uint32_t val = (i >> i_pos) & 1;
		  //		  dc_new |= (val << j);
		  dc_new ^= (val << j);	  // flip the bit at the free position
		  assert((double)i_pos < logN);
		  i_pos++;
		}
	 }
	 double p = xdp_add(A, da, db, dc_new);
	 if(p >= p_thres) {
		dc_set_all.push_back(dc_new);
	 }
	 assert(i_pos == log2(N));
  }
  //  assert(dc_set_all->size() == N);
#endif
  // ---}

/* --- */

//#define SPECK_LOG_FILE "speck24-r10-ddt25-hw6-pthres0-0079-greedy.log"
//#define SPECK_LOG_FILE "speck24-r10-ddt25-hw6-pthres0-015-greedy.log"
//#define SPECK_LOG_FILE "speck32-r13-ddt25-hw6-pthres0-015-greedy.log"
//#define SPECK_LOG_FILE "speck32-r13-ddt25-hw6-pthres0-031-greedy.log"
//#define SPECK_LOG_FILE "speck32-r13-ddt30-hw8-pthres0-031-greedy.log"
//#define SPECK_LOG_FILE "speck24-r10-ddt30-hw7-pthres0-015-greedy.log"
//#define SPECK_LOG_FILE "speck24-r10-ddt30-hw7-pthres0-015-greedy.log"

//#define SPECK_LOG_FILE "speck32-r13-ddthw9-hw7-ddt25-eps15-cluster.log"
//#define SPECK_LOG_FILE "speck24-r10-ddthw7-hw7-ddt25-eps15-cluster.log"
//#define SPECK_LOG_FILE "speck32-r14-ddt31-hw8-pthres0-031-greedy.log"
//#define SPECK_LOG_FILE "speck24.log"
//#define SPECK_LOG_FILE "speck16-r9-hw9-ddt22-eps10-cluster.log"
//#define SPECK_LOG_FILE "speck16-r9-ddt30-hw9-pthres0-031-greedy.log"
//#define SPECK_LOG_FILE "speck24-r10-hw7-ddt22-eps0-cluster.log"
//#define SPECK_LOG_FILE "speck24-r10-hw7-ddt25-eps15-cluster.log"
//#define SPECK_LOG_FILE "speck24-lm-r10-hw8-ddt27-eps10.log"
//#define SPECK_LOG_FILE "speck24-r10-hw7-ddt26-eps10.log"
//#define SPECK_LOG_FILE "speck24-r10-hw7-ddt28-eps10.log"
//#define SPECK_LOG_FILE "speck24-r10-hw7-ddt25-eps10.log"
//#define SPECK_LOG_FILE "speck24-r11-ddt25-hw7-pthres0-031-greedy.log"
//#define SPECK_LOG_FILE "speck24-r11-ddt30-hw9-pthres0-031-greedy.log"
//#define SPECK_LOG_FILE "speck24-r10-hw10-ddt30-eps15-cluster.log"
//#define SPECK_LOG_FILE "speck32-r14-ddt32-hw8-pthres0-031-greedy.log"

/* --- */

  if((n == 0) && (n != (nrounds - 1))) { // First round and more than one rounds
	 uint32_t dx = dx_init_in;
	 uint32_t dy = RROT((dx_init_in ^ dy_init_in), left_rot_const);
	 uint32_t dz_max = 0;
	 double p_max = max_xdp_add_lm(dx, dy, &dz_max);

	 pn = p_max;

	 uint32_t dxx = LROT(dz_max, right_rot_const); // x_{i-1}
	 uint32_t dyy = dy; // y_{i-1}

	 diff[n].dx = dxx;		  // dx_{i-1}
	 diff[n].dy = dyy;		  // dy_{i-1} 
	 diff[n].p = pn;

	 speck_xor_threshold_search_decrypt(n+1, nrounds, A, B, Bn, diff, dx_init_in, dy_init_in, trail, dx_init, dy_init, right_rot_const, left_rot_const, diff_mset_p, diff_set_dx_dy_dz, croads_diff_mset_p, croads_diff_set_dx_dy_dz, p_thres, b_speck_cluster_trails);
  }


/* --- */

  tv_pt[0] = random32() & MASK;
  tv_pt[1] = random32() & MASK;
  tv_ct[0] = random32() & MASK;
  tv_ct[1] = random32() & MASK;


/* --- */

		if(nrounds == NROUNDS) {
		  FILE* fp = fopen(TEA_ADD_TRAILS_LOGFILE, "a");
		  printf("[%s:%d] Add new trail:\n", __FILE__, __LINE__);
		  double p_tmp = 1.0;
		  for(uint32_t i = 0; i < (uint32_t)nrounds; i++) {
			 p_tmp *= trail[i].p;
			 fprintf(fp, "%8X %8X %10.9f ", trail[i].dy, trail[i].dx, trail[i].p);
			 printf("%8X %8X %10.9f ", trail[i].dy, trail[i].dx, trail[i].p);
		  }
		  fprintf(fp, "\n");
		  printf(" | p = %f (2^%f)\n", p_tmp, log2(p_tmp));
		  fclose(fp);
		}


/* ---- */


void xtea_xor_threshold_search(const int n, const int nrounds, const uint32_t npairs, 
										 const uint32_t round_key[64], const uint32_t round_delta[64],
										 gsl_matrix* A[2][2][2], double B[NROUNDS], double* Bn,
										 const differential_t diff_in[NROUNDS], differential_t trail[NROUNDS], 
										 uint32_t lsh_const, uint32_t rsh_const,
										 std::multiset<differential_t, struct_comp_diff_p>* diff_mset_p,
										 std::set<differential_t, struct_comp_diff_dx_dy>* diff_set_dx_dy,
										 uint32_t dxx_init, uint32_t* dxx_init_in)
{
  double pn = 0.0;

  // make a local copy of the input diff trail
  differential_t diff[NROUNDS] = {{0, 0, 0, 0.0}};
  for(int i = 0; i < n; i++) {
	 diff[i].dx = diff_in[i].dx;
	 diff[i].dy = diff_in[i].dy;
	 diff[i].npairs = diff_in[i].npairs;
	 diff[i].p = diff_in[i].p;
  }

  if((n == 0) && (nrounds == 1)) {						  // Only one round
	 assert(*Bn == 0.0);
	 bool b_end = false;
	 std::multiset<differential_t, struct_comp_diff_p>::iterator mset_iter = diff_mset_p->begin();
	 while((mset_iter != diff_mset_p->end()) && (!b_end)) {
		uint32_t dx = mset_iter->dx;
		uint32_t dy = mset_iter->dy;
		double p_f = mset_iter->p;
		uint32_t dxx = dy;		  // the second input difference to the first round is set to dy
		uint32_t dyy = 0;			  // to be computed
		//		double p_add2 = max_xdp_add(A, dxx, dy, &dyy);
		double p_add2 = max_xdp_add_lm(dxx, dy, &dyy);
		// the final prob. is the product of the probabilities of the F-function and the second add operation
		pn = p_add2 * p_f;
		pn = xdp_xtea_f2_fk_approx(npairs, dxx, dx, dyy, round_key[n], round_delta[n], lsh_const, rsh_const); // adjust the probability to the round key

		if((pn >= *Bn) && (pn != 0.0)) { // discard zero probability
		  dxx_init = dxx;
		  trail[n].dx = dx;
		  trail[n].dy = dyy;		  // !
		  trail[n].p = pn;
		  *Bn = pn;
		  B[n] = pn;
		} else {
		  b_end = true;
		}
		mset_iter++;
	 }	// while()
  }

  if((n == 0) && (nrounds > 1)) {						  // Round-0 and not last round
	 bool b_end = false;
	 std::multiset<differential_t, struct_comp_diff_p>::iterator mset_iter = diff_mset_p->begin();
	 while((mset_iter != diff_mset_p->end()) && (!b_end)) {
		uint32_t dx = mset_iter->dx;
		uint32_t dy = mset_iter->dy;
		double p_f = mset_iter->p;
		uint32_t dxx = dy;		  // the second input difference to the first round is set to dy
		uint32_t dyy = 0;			  // to be computed
		//		double p_add2 = max_xdp_add(A, dxx, dy, &dyy);		
		double p_add2 = max_xdp_add_lm(dxx, dy, &dyy);		
		pn = p_add2 * p_f;		  // product of the probabilities of the F-function and the second add operation
		pn = xdp_xtea_f2_fk_approx(npairs, dxx, dx, dyy, round_key[n], round_delta[n], lsh_const, rsh_const); // adjust the probability to the round key

		double p = pn * B[nrounds - 1 - (n + 1)];
		assert(B[nrounds - 1 - (n + 1)] != 0.0);

		std::multiset<differential_t, struct_comp_diff_p>::iterator begin_iter = diff_mset_p->begin();
		if((p >= *Bn) && (p != 0.0)) { // discard zero probability
		  dxx_init = dxx;
		  diff[n].dx = dx;
		  diff[n].dy = dyy;		  // !
		  diff[n].p = pn;
		  xtea_xor_threshold_search(n+1, nrounds, npairs, round_key, round_delta, A, B, Bn, diff, trail, lsh_const, rsh_const, diff_mset_p, diff_set_dx_dy, dxx_init, dxx_init_in);
		} else {
		  b_end = true;
		}
		if(begin_iter != diff_mset_p->begin()) { // if the root was updated, start from beginning
		  mset_iter = diff_mset_p->begin();
		  printf("[%s:%d] Return to beginning\n", __FILE__, __LINE__);
		} else {
		  mset_iter++;
		}
	 }		 // while()
  }

  if((n >= 1) && (n != (nrounds - 1))) { // Round-i and not last round
	 uint32_t dx = diff[n - 1].dy; // !
	 uint32_t dy = 0;

	 differential_t diff_dy;
	 diff_dy.dx = dx;  
	 diff_dy.dy = 0;
	 diff_dy.p = 0.0;

	 // check if the differential is not already in the set
	 std::set<differential_t, struct_comp_diff_dx_dy>::iterator find_iter = diff_set_dx_dy->lower_bound(diff_dy);
 	 bool b_found = (find_iter != diff_set_dx_dy->end()) && (find_iter->dx == dx);
	 if(!b_found) {				  // if not found, add new
		double pn = nz_xdp_xtea_f(A, dx, &dy, lsh_const, rsh_const);
		diff_dy.dx = dx;  
		diff_dy.dy = dy;
		diff_dy.p = pn;

		// Add the new diff to Dp only if it has better prob. than the min.
		//		double p_min = diff_mset_p->rbegin()->p;
		//		if(diff_dy.p >= p_min) {
		//		  diff_mset_p->insert(diff_dy);
		//		}
		if(diff_dy.p >= XTEA_XOR_P_THRES) {
		  uint32_t old_size = diff_set_dx_dy->size();
		  diff_set_dx_dy->insert(diff_dy);
		  uint32_t new_size = diff_set_dx_dy->size();
		  if(old_size != new_size) {
			 diff_mset_p->insert(diff_dy);
		  }
		}
		//		diff_set_dx_dy->insert(diff_dy);
		find_iter = diff_set_dx_dy->lower_bound(diff_dy);
	 } 
	 assert((find_iter->dx == dx));

	 while((find_iter->dx < (dx + 1)) && (find_iter != diff_set_dx_dy->end())) {
		assert((find_iter->dx == dx));
		diff_dy = *find_iter;

		dx = diff_dy.dx;
		dy = diff_dy.dy;
		double p_f = diff_dy.p;
		uint32_t dxx = diff[n - 1].dx;
		uint32_t dyy = 0;			  // to be computed
		//		double p_add2 = max_xdp_add(A, dxx, dy, &dyy);		
		double p_add2 = max_xdp_add_lm(dxx, dy, &dyy);		
		pn = p_add2 * p_f;		  // product of the probabilities of the F-function and the second add operation
		pn = xdp_xtea_f2_fk_approx(npairs, dxx, dx, dyy, round_key[n], round_delta[n], lsh_const, rsh_const); // adjust the probability to the round key

		double p = 1.0;
		for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
		  p *= diff[i].p;
		}
		p = p * pn * B[nrounds - 1 - (n + 1)]; 

		// store the beginnig
		std::set<differential_t, struct_comp_diff_dx_dy>::iterator begin_iter = diff_set_dx_dy->begin();
		if((p >= *Bn) && (p != 0.0)) {
		  diff[n].dx = dx;
		  diff[n].dy = dyy;	  // !
		  diff[n].p = pn;
		  xtea_xor_threshold_search(n+1, nrounds, npairs, round_key, round_delta, A, B, Bn, diff, trail, lsh_const, rsh_const, diff_mset_p, diff_set_dx_dy, dxx_init, dxx_init_in);
		}
		if(begin_iter != diff_set_dx_dy->begin()) { // if the root was updated, start from beginning
		  diff_dy.dx = dx;  
		  diff_dy.dy = 0;
		  diff_dy.p = 0.0;
		  find_iter = diff_set_dx_dy->lower_bound(diff_dy);
		  printf("[%s:%d] Return to beginning\n", __FILE__, __LINE__);
		  assert((find_iter->dx == dx));
		} else {
		  find_iter++;
		}
	 }
  }

  if((n == (nrounds - 1)) && (nrounds > 1)) {		  // Last round

	 uint32_t dx = 0;
	 uint32_t dy = 0;	
	 uint32_t dxx = 0;
	 uint32_t dyy = 0;	
	 double p_f = 0.0;
	 double p_add2 = 0.0;

	 if(nrounds == 2) { // Last round (n = 1) AND only two rounds - freely choose dx
		dx = diff_mset_p->begin()->dx;
		dy = diff_mset_p->begin()->dy;
		p_f = diff_mset_p->begin()->p;
		dxx = diff[n - 1].dx;
		dyy = 0;			  // to be computed
		//		p_add2 = max_xdp_add(A, dxx, dy, &dyy);
		p_add2 = max_xdp_add_lm(dxx, dy, &dyy);
		pn = p_add2 * p_f;
		pn = xdp_xtea_f2_fk_approx(npairs, dxx, dx, dyy, round_key[n], round_delta[n], lsh_const, rsh_const); // adjust the probability to the round key
	 } else {

		dx = diff[n - 1].dy;
		dy = 0;

		differential_t diff_max_dy;
		diff_max_dy.dx = dx;  
		diff_max_dy.dy = 0;
		diff_max_dy.p = 0.0;

		// check if a diff with the same dx is already in the set
		std::set<differential_t, struct_comp_diff_dx_dy>::iterator find_iter = diff_set_dx_dy->lower_bound(diff_max_dy);
		bool b_found = (find_iter != diff_set_dx_dy->end()) && (find_iter->dx == dx);
		if(!b_found) {				  // if not found, add new

		  pn = nz_xdp_xtea_f(A, dx, &dy, lsh_const, rsh_const);

		  diff_max_dy.dx = dx; 
		  diff_max_dy.dy = dy;
		  diff_max_dy.p = pn;

		  // Add the new diff to Dp only if it has better prob. than the min.
		  if(pn >= XTEA_XOR_P_THRES) {
			 bool b_found = (diff_set_dx_dy->find(diff_max_dy) != diff_set_dx_dy->end());
			 if(!b_found) {
				uint32_t old_size = diff_set_dx_dy->size();
				diff_set_dx_dy->insert(diff_max_dy);
				uint32_t new_size = diff_set_dx_dy->size();
				if(old_size != new_size) {
				  diff_mset_p->insert(diff_max_dy);
				}
			 }
		  }

		} else {
		  assert((find_iter->dx == dx));

		  diff_max_dy = *find_iter;
		  while((find_iter->dx < (dx + 1)) && (find_iter != diff_set_dx_dy->end())) { // get the max among the available

			 if(find_iter->p > diff_max_dy.p) {
				diff_max_dy = *find_iter;
			 }
			 find_iter++;
		  }
		}

		dx = diff_max_dy.dx;
		dy = diff_max_dy.dy;
		p_f = diff_max_dy.p;
		dxx = diff[n - 1].dx;
		dyy = 0;			  // to be computed
		//	 p_add2 = max_xdp_add(A, dxx, dy, &dyy);
		p_add2 = max_xdp_add_lm(dxx, dy, &dyy);
		pn = p_add2 * p_f;
		pn = xdp_xtea_f2_fk_approx(npairs, dxx, dx, dyy, round_key[n], round_delta[n], lsh_const, rsh_const); // adjust the probability to the round key

	 }

	 double p = 1.0;
	 for(int i = 0; i < n; i++) {
		p *= diff[i].p;
	 }
	 p *= pn;

	 if((p >= *Bn) && (p != 1.0) && (p != 0.0)) { // skip the 0-diff trail (p = 1.0)
#if 1									  // DEBUG
		if (p > *Bn) {
		  printf("[%s:%d] %d | Update best found Bn: 2^%f -> 2^%f\n", __FILE__, __LINE__, n, log2(*Bn), log2(p));
		}
#endif
		diff[n].dx = dx;
		diff[n].dy = dyy;			  // !
		diff[n].p = pn;
		*Bn = p;
		B[n] = p;
		*dxx_init_in = dxx_init;
		for(int i = 0; i < nrounds; i++) {
		  trail[i].dx = diff[i].dx;
		  trail[i].dy = diff[i].dy;
		  trail[i].p = diff[i].p;
		  //		  printf("[%s:%d] %d | %8X %8X 2^%f\n", __FILE__, __LINE__, i, trail[i].dx, trail[i].dy, log2(trail[i].p));
		}
	 }
  }
}

/* ---- */

  if((n == (nrounds - 1)) && (nrounds > 1)) {		  // Last round

	 uint32_t dx = 0;
	 uint32_t dy = 0;	
	 uint32_t dxx = 0;
	 uint32_t dyy = 0;	
	 double p_f = 0.0;
	 double p_add2 = 0.0;

	 if(nrounds == 2) { // Last round (n = 1) AND only two rounds - freely choose dx
		dx = diff_mset_p->begin()->dx;
		dy = diff_mset_p->begin()->dy;
		p_f = diff_mset_p->begin()->p;
		dxx = diff[n - 1].dx;
		dyy = 0;			  // to be computed
		//		p_add2 = max_xdp_add(A, dxx, dy, &dyy);
		p_add2 = max_xdp_add_lm(dxx, dy, &dyy);
		pn = p_add2 * p_f;
		pn = xdp_xtea_f2_fk_approx(npairs, dxx, dx, dyy, round_key[n], round_delta[n], lsh_const, rsh_const); // adjust the probability to the round key
	 } else {

		dx = diff[n - 1].dy;
		dy = 0;

		differential_t diff_max_dy;
		diff_max_dy.dx = dx;  
		diff_max_dy.dy = 0;
		diff_max_dy.p = 0.0;

		// check if a diff with the same dx is already in the set
		std::set<differential_t, struct_comp_diff_dx_dy>::iterator find_iter = diff_set_dx_dy->lower_bound(diff_max_dy);
		bool b_found = (find_iter != diff_set_dx_dy->end()) && (find_iter->dx == dx);
		if(!b_found) {				  // if not found, add new

		  pn = nz_xdp_xtea_f(A, dx, &dy, lsh_const, rsh_const);

		  diff_max_dy.dx = dx; 
		  diff_max_dy.dy = dy;
		  diff_max_dy.p = pn;

		  // Add the new diff to Dp only if it has better prob. than the min.
		  double p_min = diff_mset_p->rbegin()->p;
		  if(diff_max_dy.p >= p_min) {
			 diff_mset_p->insert(diff_max_dy);
		  }

		  diff_set_dx_dy->insert(diff_max_dy);
		  find_iter = diff_set_dx_dy->lower_bound(diff_max_dy);
		} 
		assert((find_iter->dx == dx));

		diff_max_dy = *find_iter;
		while((find_iter->dx < (dx + 1)) && (find_iter != diff_set_dx_dy->end())) { // get the max among the available

		  if(find_iter->p > diff_max_dy.p) {
			 diff_max_dy = *find_iter;
		  }
		  find_iter++;
		}

		dx = diff_max_dy.dx;
		dy = diff_max_dy.dy;
		p_f = diff_max_dy.p;
		dxx = diff[n - 1].dx;
		dyy = 0;			  // to be computed
		//		p_add2 = max_xdp_add(A, dxx, dy, &dyy);
		p_add2 = max_xdp_add_lm(dxx, dy, &dyy);
		pn = p_add2 * p_f;
		pn = xdp_xtea_f2_fk_approx(npairs, dxx, dx, dyy, round_key[n], round_delta[n], lsh_const, rsh_const); // adjust the probability to the round key

	 }

	 double p = 1.0;
	 for(int i = 0; i < n; i++) {
		p *= diff[i].p;
	 }
	 p *= pn;

	 if((p >= *Bn) && (p != 1.0) && (p != 0.0)) { // skip the 0-diff trail (p = 1.0)
#if 1									  // DEBUG
		if (p > *Bn) {
		  printf("[%s:%d] %d | Update best found Bn: 2^%f -> 2^%f\n", __FILE__, __LINE__, n, log2(*Bn), log2(p));
		}
#endif
		diff[n].dx = dx;
		diff[n].dy = dyy;			  // !
		diff[n].p = pn;
		*Bn = p;
		B[n] = p;
		*dxx_init_in = dxx_init;
		for(int i = 0; i < nrounds; i++) {
		  trail[i].dx = diff[i].dx;
		  trail[i].dy = diff[i].dy;
		  trail[i].p = diff[i].p;
		  //		  printf("[%s:%d] %d | %8X %8X 2^%f\n", __FILE__, __LINE__, i, trail[i].dx, trail[i].dy, log2(trail[i].p));
		}
	 }
  }

/* --- */

  // convert to vector of values sorted by probability
  std::vector<std::pair<simon_diff_graph_node_t, simon_diff_graph_node_t> > V_vec(V.begin(), V.end());
  std::sort(V_vec.begin(), V_vec.end(), &simon_diff_vec_comp);

#if 1									  // DEBUG
  printf("[%s:%d] #vertices V_vec.size %d\n", __FILE__, __LINE__, V.size());
#endif  // #if 1									  // DEBUG
#if 1									  // DEBUG
  for(uint32_t i = 0; i < V_vec.size(); i++) {
	 simon_diff_graph_node_t node = V_vec[i].second;
	 printf("[%s:%d] node #%5d: %2d (%8X, %8X)\n", __FILE__, __LINE__, i, node.level, node.node[0], node.node[1]);
  }
#endif  // #if 1									  // DEBUG

  assert(V_vec.size() == V.size());



/* --- */
struct simon_diff_graph_node_comp
  : std::binary_function<simon_diff_graph_node_t, simon_diff_graph_node_t, bool>
{
  bool operator()(simon_diff_graph_node_t const& a,
						simon_diff_graph_node_t const& b) const
  {
	 bool b_less = true;
	 if(a.level != b.level) {
		b_less = (a.level < b.level);
	 } else {
		if(a.node[1] != b.node[1]) {
		  b_less = (a.node[1] < b.node[1]);
		} else {
		  if(a.node[0] != b.node[0]) {
			 b_less = (a.node[0] < b.node[0]);
		  }
		}
	 }
	 return b_less;
  }
};

/* --- */

struct simon_diff_graph_node_comp
  : std::binary_function<simon_diff_graph_node_t, simon_diff_graph_node_t, bool>
{
  bool operator()(simon_diff_graph_node_t const& a,
						simon_diff_graph_node_t const& b) const
  {
	 bool b_less = false;
	 if(a.level < b.level) {
		b_less = true;
	 } else {
		if(a.node[1] < b.node[1]) {
		  b_less = true;
		} else {
		  if(a.node[0] < b.node[0]) {
			 b_less = true;
		  }
		}
	 }
	 return b_less;
  }
};


/* ---- */

/*

A binary predicate that takes two element keys as arguments and returns a bool. 

comp() shall return true if a is considered to go before b in the strict weak ordering the function defines.

The map object uses this expression to determine both the order the elements follow in the container and whether two element keys are equivalent (by comparing them reflexively: they are equivalent if !comp(a,b) && !comp(b,a)). No two elements in a map container can have equivalent keys.

This can be a function pointer or a function object (see constructor for an example). This defaults to less<T>, which returns the same as applying the less-than operator (a<b).

Aliased as member type map::key_compare.

 */

struct simon_diff_graph_node_equal_to
  : std::binary_function<simon_diff_graph_node_t, simon_diff_graph_node_t, bool>
{
  bool operator()(simon_diff_graph_node_t const& a,
						simon_diff_graph_node_t const& b) const
  {
	 bool b_equal = ((a.level == b.level) && 
						  (a.node[0] == b.node[0]) &&
						  (a.node[1] == b.node[1]));
	 return b_equal;
  }
};

struct simon_diff_graph_node_hash
  : std::unary_function<simon_diff_graph_node_t, std::size_t>
{
  std::size_t operator()(simon_diff_graph_node_t const& a) const
  {
	 std::size_t seed = 0;

	 boost::hash_combine(seed, a.level);
	 boost::hash_combine(seed, a.node[0]); // dx
	 boost::hash_combine(seed, a.node[1]); // dy

	 return seed;
  }
};


//void simon_diff_graph_print_nodes(boost::unordered_map<simon_diff_graph_node_t, uint32_t, simon_diff_graph_node_hash, simon_diff_graph_node_equal_to> V)
#if 0
void simon_diff_graph_print_nodes(boost::map<simon_diff_graph_node_t, uint32_t, simon_diff_graph_node_hash, simon_diff_graph_node_equal_to> V)
{
  boost::unordered_map<simon_diff_graph_node_t, uint32_t, simon_diff_graph_node_hash, simon_diff_graph_node_equal_to>::iterator node_iter;
  uint32_t node_cnt =0;
  for(node_iter = V.begin(); node_iter != V.end(); node_iter++) {
	 node_cnt++;
	 simon_diff_graph_node_t node = node_iter->first;
#if 1									  // DEBUG
	 printf("[%s:%d] node #%5d: %2d(%8X, %8X)\n", __FILE__, __LINE__, node_cnt, node.level, node.node[0], node.node[1]);
#endif  // #if 1									  // DEBUG
  }
}
#endif

/*
 * Compute the in- and out-degree of the nodes of 
 * a differential trail graph for Simon (using \p simon_diff_graph_edge_t)
 */
void simon_diff_graph_extract_nodes(std::vector<simon_diff_graph_edge_t> E,
												boost::unordered_map<simon_diff_graph_node_t, uint32_t, simon_diff_graph_node_hash, simon_diff_graph_node_equal_to>* V)
//												std::map<simon_diff_graph_node_t, uint32_t, simon_diff_graph_node_hash, simon_diff_graph_node_equal_to>* V)
{
  for(uint32_t i = 0; i < E.size(); i++) {

	 simon_diff_graph_node_t new_node;
	 new_node.level = E.at(i).level;
	 new_node.node[0] = E.at(i).node_from[0];
	 new_node.node[1] = E.at(i).node_from[1];
	 new_node.p_sum = E.at(i).p;
	 new_node.deg_in = 0;
	 new_node.deg_out = 1;

	 bool b_is_new = (V->find(new_node) == V->end());
	 if(b_is_new) {
		simon_diff_graph_node_hash node_hash;
		uint32_t node_hash_val = node_hash(new_node);
		std::pair<simon_diff_graph_node_t, uint32_t> new_pair (new_node, node_hash_val);
		V->insert(new_pair);
#if 0									  // DEBUG
		printf("[%s:%d] Add new node: %2d(%8X, %8X)\n", __FILE__, __LINE__, new_node.level, new_node.node[0], new_node.node[1]);
#endif  // #if 1									  // DEBUG
	 }

  }
}

/* --- */

/* 

  4000000 11000000 1.000000000  
  1000000  4000000 0.250000000
        0  1000000 0.250000000
  1000000        0 1.000000000
  4000000  1000000 0.250000000
 11000000  4000000 0.250000000
 40000000 11000000 0.062500000
 11000001 40000000 0.250000000
  4000004 11000001 0.015625000
  1000011  4000004 0.062500000
       41  1000011 0.015625000
  1000015       41 0.062500000
  4000015  1000015 0.003906250
 11000041  4000015 0.003906250
 40000001 11000041 0.007812500 11000006 40000001 0.062500000  6000001 11000006 0.003906250  1000000  6000001 0.031250000        0  1000000 0.250000000  1000000        0 1.000000000  4000000  1000000 0.250000000 11000000  4000000 0.250000000 

 */


/* --- */

		if(level > 0) {

		  simon_diff_graph_edge_t new_edge;
		  new_edge.level = level - 1;

		  new_edge.node_from[0] = diff_prev.dx;
		  new_edge.node_from[1] = diff_prev.dy;
		  new_edge.node_to[0] = diff.dx;
		  new_edge.node_to[1] = diff.dy;
		  new_edge.p = diff.p;
		  new_edge.cnt = 1;

		  if(level == 1) {

		  new_edge.node_from[0] = diff_prev.dx;
		  new_edge.node_from[1] = diff.dx ^ diff_prev.dy;

		  new_edge.node_to[0] = diff.dx;
		  new_edge.node_to[1] = diff.dy;

		  }


/* --- */

  //  std::vector<differential_t> D;
		//	 D.push_back(diff);
#if 0									  // DEBUG
		//	 printf("%X %X %f\n", dx, dy, p);
		//		printf("%X %X %f\r", dx, dy, p);
		//		fflush(stdout);
#endif

/* --- */

#if 0
  while(fscanf(fp, "%X %X %lf", &dx, &dy, &p) != EOF) {
	 differential_t diff = {dx, dy, 0, p};
	 //	 D.push_back(diff);
#if 1									  // DEBUG
	 //	 printf("%X %X %f\n", dx, dy, p);
	 printf("%X %X %f\r", dx, dy, p);
	 fflush(stdout);
#endif
  }
#endif


/* --- */

  time_t rawtime;
  time(&rawtime);
  FILE* fp = fopen(SIMON_CLUSTER_TRAILS_DATFILE, "w");
  fprintf(fp, "\nTime: %s", ctime (&rawtime));
  fclose(fp);



/* --- */

/*
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():740]:
Verified 8 R differential (       0        1) -> (    4000     1101) | 2^27.00 CP pairs
 Final probability p = 2^-21.117357
  [./tests/simon-xor-threshold-search-tests.cc:746] temp_edp 0.000000 (2^-21.117357) nkeys 0
*/

/* --- */
/* 
The reason to use a partial rather than full DDT is the fact that ARX algorithms typically achieve non-linearity through operations for which it is infeasible to compute the full DDT. For example, \textsc{Simon} uses the \texttt{AND} operation in combination with bit rotation while \textsc{Speck} uses modular addition as their non-linear components. Computing a full DDT for $n$-bit words in the first case would require $4 \times 2^{2n}$ Bytes and in the second case $4 \times 2^{3n}$ Bytes of memory, which is clearly infeasible for $n > 16$.

a count of the best differentials encountered so far



We apply the threshold search method outlined above to find high probability differential trails for reduced round versions of \textsc{Simon} and \textsc{Speck}. In addition, we also extend the technique search for differentials


This method is based on Matsui's branch-and-bound 

We propose an extension to the thershold search technique to the case of differentials. Namely, 

For practical purposes we assume 

 */

/* --- */

/*
Inconsistency in the trail of Lucks on Simon48.

#--- [./tests/speck-xor-threshold-search-tests.cc:266] Tests, WORD_SIZE  = 24, MASK =   FFFFFF
[./tests/speck-xor-threshold-search-tests.cc:276] WORD_SIZE 24 NROUNDS 10 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 4194304 2^22.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 1 SPECK_MAX_HW 6  SPECK_CLUSTER_MAX_HW 6 SPECK_EPS 2^-15.00
[./tests/speck-xor-threshold-search-tests.cc:204] Lucks trail BEFORE:
0:   480B01    94009 1.000000
0:   480B01    94009 1.000000 (2^0.000000)
1:    81802   42084A 1.000000 (2^0.000000)
2:   400052   504200 1.000000 (2^0.000000)
3:   820200     1202 1.000000 (2^0.000000)
4:     9000       10 1.000000 (2^0.000000)
5:       80        0 1.000000 (2^0.000000)
6:   800000   800000 1.000000 (2^0.000000)
7:   808000   808004 1.000000 (2^0.000000)
8:   800084   8400A0 1.000000 (2^0.000000)
9:     80A0   2085A4 1.000000 (2^0.000000)
10:     8524   84A805 1.000000 (2^0.000000)

(   1480B    94009 ->    81802) 0.003906 2^-8.00  <-   42084A !=   42184A
(   20818   42084A ->   400052) 0.007812 2^-7.00
(  524000   504200 ->   820200) 0.031250 2^-5.00
(    8202     1202 ->     9000) 0.062500 2^-4.00
(      90       10 ->       80) 0.250000 2^-2.00
(  800000        0 ->   800000) 1.000000 2^0.00
(    8000   800000 ->   808000) 0.500000 2^-1.00
(    8080   808004 ->   800084) 0.125000 2^-3.00
(  848000   8400A0 ->     80A0) 0.062500 2^-4.00
(  A00080   2085A4 ->     8524) 0.000000 2^-inf  <-   84A805 !=    4A805  |  ->   808424 0.007812
p_tot = 0.000000000000000 = 2^-inf

(   1480B 94009 42084A !=   42184A

p_tot = 0.000000000000000 = 2^-inf
[./src/speck-xor-threshold-search.cc:538] Verify P for one round (2^27.000000 CPs)...
R# 0  Input differences:   480B01    94009
R# 0 Output differences:    81802   42084A
THE  0: 0.003906 (2^-8.000000)
EXP  0: 0.000000 (2^-inf)

R# 1  Input differences:    81802   42084A
R# 1 Output differences:   400052   504200
THE  1: 0.007812 (2^-7.000000)
EXP  1: 0.007813 (2^-6.999911)

R# 2  Input differences:   400052   504200
R# 2 Output differences:   820200     1202
THE  2: 0.031250 (2^-5.000000)
EXP  2: 0.031260 (2^-4.999533)

R# 3  Input differences:   820200     1202
R# 3 Output differences:     9000       10
THE  3: 0.062500 (2^-4.000000)
EXP  3: 0.062490 (2^-4.000231)

R# 4  Input differences:     9000       10
R# 4 Output differences:       80        0
THE  4: 0.250000 (2^-2.000000)
EXP  4: 0.250072 (2^-1.999586)

R# 5  Input differences:       80        0
R# 5 Output differences:   800000   800000
THE  5: 1.000000 (2^0.000000)
EXP  5: 1.000000 (2^0.000000)

R# 6  Input differences:   800000   800000
R# 6 Output differences:   808000   808004
THE  6: 0.500000 (2^-1.000000)
EXP  6: 0.500001 (2^-0.999996)

R# 7  Input differences:   808000   808004
R# 7 Output differences:   800084   8400A0
THE  7: 0.125000 (2^-3.000000)
EXP  7: 0.125001 (2^-2.999988)

R# 8  Input differences:   800084   8400A0
R# 8 Output differences:     80A0   2085A4
THE  8: 0.062500 (2^-4.000000)
EXP  8: 0.062485 (2^-4.000345)

R# 9  Input differences:     80A0   2085A4
R# 9 Output differences:     8524   84A805
THE  9: 0.007812 (2^-7.000000)
EXP  9: 0.000000 (2^-inf)

OK
[./src/speck-xor-threshold-search.cc:455] Verify P of differentials (2^27.000000 CPs)...
Input differences:   480B01    94009

R# 0 Output differences:    81802   42084A
THE  1: 0.003906 (2^-8.000000)    81802 ->   42084A
EXP  1: 0.000000 (2^-inf)    81802 ->   42084A

R# 1 Output differences:   400052   504200
THE  2: 0.000031 (2^-15.000000)   400052 ->   504200
EXP  2: 0.000085 (2^-13.529850)   400052 ->   504200

R# 2 Output differences:   820200     1202
THE  3: 0.000001 (2^-20.000000)   820200 ->     1202
EXP  3: 0.000002 (2^-18.700792)   820200 ->     1202

R# 3 Output differences:     9000       10
THE  4: 0.000000 (2^-24.000000)     9000 ->       10
EXP  4: 0.000000 (2^-22.678072)     9000 ->       10

R# 4 Output differences:       80        0
THE  5: 0.000000 (2^-26.000000)       80 ->        0
EXP  5: 0.000000 (2^-24.415037)       80 ->        0

R# 5 Output differences:   800000   800000
THE  6: 0.000000 (2^-26.000000)   800000 ->   800000
EXP  6: 0.000000 (2^-25.000000)   800000 ->   800000

R# 6 Output differences:   808000   808004
THE  7: 0.000000 (2^-27.000000)   808000 ->   808004
EXP  7: 0.000000 (2^-25.415037)   808000 ->   808004

R# 7 Output differences:   800084   8400A0
THE  8: 0.000000 (2^-30.000000)   800084 ->   8400A0
EXP  8: 0.000000 (2^-inf)   800084 ->   8400A0

R# 8 Output differences:     80A0   2085A4
THE  9: 0.000000 (2^-34.000000)     80A0 ->   2085A4
EXP  9: 0.000000 (2^-inf)     80A0 ->   2085A4

R# 9 Output differences:     8524   84A805
THE 10: 0.000000 (2^-41.000000)     8524 ->   84A805
EXP 10: 0.000000 (2^-inf)     8524 ->   84A805

OK

[./tests/speck-xor-threshold-search-tests.cc:286] WORD_SIZE 24 NROUNDS 10 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 4194304 2^22.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 1 SPECK_MAX_HW 6  SPECK_CLUSTER_MAX_HW 6 SPECK_EPS 2^-15.00

real    3m30.258s
user    3m29.653s
sys     0m0.040s
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$



*/

/* 

Inconsistency in the trail of Lucks on Simon64.

	[./tests/speck-xor-threshold-search-tests.cc:183] Lucks trail BEFORE:
 0: 10420040 40024000 1.000000
 0: 10420040 40024000 1.000000 (2^0.000000)
 1:   120200      202 1.000000 (2^0.000000)
 2:     1000       10 1.000000 (2^0.000000)
 3:        0       80 1.000000 (2^0.000000)
 4: 40000000 40000000 1.000000 (2^0.000000)
 5: 40400000 40400002 1.000000 (2^0.000000)
 6: 40004002 42004010 1.000000 (2^0.000000)
 7: 404040D0 50424052 1.000000 (2^0.000000)
 8: 80020092  2100200 1.000000 (2^0.000000)
 9: 90900000 80101000 1.000000 (2^0.000000)
10: 80808000 80000004 1.000000 (2^0.000000)
11: 80808084 808080A0 1.000000 (2^0.000000)
12:  4000020    40524 1.000000 (2^0.000000)
13: 20000524 20202C04 1.000000 (2^0.000000)

(40104200 40024000 ->   120200) 0.031250 2^-5.00
(    1202      202 ->     1000) 0.125000 2^-3.00
(      10       10 ->        0) 0.500000 2^-1.00
(       0       80 -> 40000000) 0.000000 2^-inf  <- 40000000 != 40000400  |  ->       80 0.500000
(  400000 40000000 -> 40400000) 0.250000 2^-2.00
(  404000 40400002 -> 40004002) 0.062500 2^-4.00
( 2400040 42004010 -> 404040D0) 0.007812 2^-7.00
(D0404040 50424052 -> 80020092) 0.001953 2^-9.00
(92800200  2100200 -> 90900000) 0.031250 2^-5.00
(  909000 80101000 -> 80808000) 0.062500 2^-4.00
(  808080 80000004 -> 80808084) 0.062500 2^-4.00
(84808080 808080A0 ->  4000020) 0.031250 2^-5.00
(20040000    40524 -> 20000524) 0.015625 2^-6.00
p_tot = 0.000000000000000 = 2^-inf
[./src/speck-xor-threshold-search.cc:402] Verify P of differentials (2^27.000000 CPs)...
Input differences: 10420040 40024000

R# 0 Output differences:   120200      202
THE  1: 0.031250 (2^-5.000000)   120200 ->      202
EXP  1: 0.031235 (2^-5.000689)   120200 ->      202

R# 1 Output differences:     1000       10
THE  2: 0.003906 (2^-8.000000)     1000 ->       10
EXP  2: 0.003908 (2^-7.999472)     1000 ->       10

R# 2 Output differences:        0       80
THE  3: 0.001953 (2^-9.000000)        0 ->       80
EXP  3: 0.001958 (2^-8.996482)        0 ->       80

R# 3 Output differences: 40000000 40000000
THE  4: 0.000977 (2^-10.000000) 40000000 -> 40000000
EXP  4: 0.000000 (2^-inf) 40000000 -> 40000000

R# 4 Output differences: 40400000 40400002
THE  5: 0.000244 (2^-12.000000) 40400000 -> 40400002
EXP  5: 0.000000 (2^-inf) 40400000 -> 40400002

R# 5 Output differences: 40004002 42004010
THE  6: 0.000015 (2^-16.000000) 40004002 -> 42004010
EXP  6: 0.000000 (2^-inf) 40004002 -> 42004010

R# 6 Output differences: 404040D0 50424052
THE  7: 0.000000 (2^-23.000000) 404040D0 -> 50424052
EXP  7: 0.000000 (2^-inf) 404040D0 -> 50424052

R# 7 Output differences: 80020092  2100200
THE  8: 0.000000 (2^-32.000000) 80020092 ->  2100200
EXP  8: 0.000000 (2^-inf) 80020092 ->  2100200

R# 8 Output differences: 90900000 80101000
THE  9: 0.000000 (2^-37.000000) 90900000 -> 80101000
EXP  9: 0.000000 (2^-inf) 90900000 -> 80101000

R# 9 Output differences: 80808000 80000004
THE 10: 0.000000 (2^-41.000000) 80808000 -> 80000004
EXP 10: 0.000000 (2^-inf) 80808000 -> 80000004

R#10 Output differences: 80808084 808080A0
THE 11: 0.000000 (2^-45.000000) 80808084 -> 808080A0
EXP 11: 0.000000 (2^-inf) 80808084 -> 808080A0

R#11 Output differences:  4000020    40524
THE 12: 0.000000 (2^-50.000000)  4000020 ->    40524
EXP 12: 0.000000 (2^-inf)  4000020 ->    40524

R#12 Output differences: 20000524 20202C04
THE 13: 0.000000 (2^-56.000000) 20000524 -> 20202C04
EXP 13: 0.000000 (2^-inf) 20000524 -> 20202C04

OK

[./tests/speck-xor-threshold-search-tests.cc:265] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 4194304 2^22.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 1 SPECK_MAX_HW 6  SPECK_CLUSTER_MAX_HW 6 SPECK_EPS 2^-15.00

Possible corrected trail with p = 2^-56 which is also inconsistent:

[./tests/speck-xor-threshold-search-tests.cc:183] Lucks trail BEFORE:
0: 10420040 40024000 1.000000
0: 10420040 40024000 1.000000 (2^0.000000)
1:   120200      202 1.000000 (2^0.000000)
2:     1000       10 1.000000 (2^0.000000)
3:        0 40000000 1.000000 (2^0.000000)   <-- correction
4: 40000000 40000000 1.000000 (2^0.000000)
5: 40400000 40400002 1.000000 (2^0.000000)
6: 40004002 42004010 1.000000 (2^0.000000)
7: 404040D0 50424052 1.000000 (2^0.000000)
8: 80020092  2100200 1.000000 (2^0.000000)
9: 90900000 80101000 1.000000 (2^0.000000)
10: 80808000 80000004 1.000000 (2^0.000000)
11: 80808084 808080A0 1.000000 (2^0.000000)
12:  4000020    40524 1.000000 (2^0.000000)
13: 20000524 20202C04 1.000000 (2^0.000000)

(40104200 40024000 ->   120200) 0.031250 2^-5.00
(    1202      202 ->     1000) 0.125000 2^-3.00
(      10       10 ->        0) 0.500000 2^-1.00  <- 40000000 !=       80
(       0 40000000 -> 40000000) 0.500000 2^-1.00  <- 40000000 != 40000002
(  400000 40000000 -> 40400000) 0.250000 2^-2.00
(  404000 40400002 -> 40004002) 0.062500 2^-4.00
( 2400040 42004010 -> 404040D0) 0.007812 2^-7.00
(D0404040 50424052 -> 80020092) 0.001953 2^-9.00
(92800200  2100200 -> 90900000) 0.031250 2^-5.00
(  909000 80101000 -> 80808000) 0.062500 2^-4.00
(  808080 80000004 -> 80808084) 0.062500 2^-4.00
(84808080 808080A0 ->  4000020) 0.031250 2^-5.00
(20040000    40524 -> 20000524) 0.015625 2^-6.00
p_tot = 0.000000000000000 = 2^-56.000000
[./src/speck-xor-threshold-search.cc:402] Verify P of differentials (2^28.000000 CPs)...
Input differences: 10420040 40024000

R# 0 Output differences:   120200      202
THE  1: 0.031250 (2^-5.000000)   120200 ->      202
EXP  1: 0.031243 (2^-5.000340)   120200 ->      202

R# 1 Output differences:     1000       10
THE  2: 0.003906 (2^-8.000000)     1000 ->       10
EXP  2: 0.003907 (2^-7.999604)     1000 ->       10

R# 2 Output differences:        0 40000000
THE  3: 0.001953 (2^-9.000000)        0 -> 40000000
EXP  3: 0.000000 (2^-inf)        0 -> 40000000

R# 3 Output differences: 40000000 40000000
THE  4: 0.000977 (2^-10.000000) 40000000 -> 40000000
EXP  4: 0.000000 (2^-inf) 40000000 -> 40000000

R# 4 Output differences: 40400000 40400002
THE  5: 0.000244 (2^-12.000000) 40400000 -> 40400002
EXP  5: 0.000000 (2^-inf) 40400000 -> 40400002

R# 5 Output differences: 40004002 42004010
THE  6: 0.000015 (2^-16.000000) 40004002 -> 42004010
EXP  6: 0.000000 (2^-inf) 40004002 -> 42004010

R# 6 Output differences: 404040D0 50424052
THE  7: 0.000000 (2^-23.000000) 404040D0 -> 50424052
EXP  7: 0.000000 (2^-inf) 404040D0 -> 50424052

R# 7 Output differences: 80020092  2100200
THE  8: 0.000000 (2^-32.000000) 80020092 ->  2100200
EXP  8: 0.000000 (2^-inf) 80020092 ->  2100200

R# 8 Output differences: 90900000 80101000
THE  9: 0.000000 (2^-37.000000) 90900000 -> 80101000
EXP  9: 0.000000 (2^-inf) 90900000 -> 80101000

R# 9 Output differences: 80808000 80000004
THE 10: 0.000000 (2^-41.000000) 80808000 -> 80000004
EXP 10: 0.000000 (2^-inf) 80808000 -> 80000004

R#10 Output differences: 80808084 808080A0
THE 11: 0.000000 (2^-45.000000) 80808084 -> 808080A0
EXP 11: 0.000000 (2^-inf) 80808084 -> 808080A0

R#11 Output differences:  4000020    40524
THE 12: 0.000000 (2^-50.000000)  4000020 ->    40524
EXP 12: 0.000000 (2^-inf)  4000020 ->    40524

R#12 Output differences: 20000524 20202C04
THE 13: 0.000000 (2^-56.000000) 20000524 -> 20202C04
EXP 13: 0.000000 (2^-inf) 20000524 -> 20202C04

OK

[./tests/speck-xor-threshold-search-tests.cc:265] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 4194304 2^22.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 1 SPECK_MAX_HW 6  SPECK_CLUSTER_MAX_HW 6 SPECK_EPS 2^-15.00

real    4m34.807s
user    4m34.073s
sys     0m0.000s


 */
/* --- */

/* 
  for(uint32_t i = 0; i < NROUNDS; i++) {
	 trail[i] = g_lucks_trail[i+1];
	 uint32_t dx = RROT(g_lucks_trail[i].dx, right_rot_const);
	 uint32_t dy = g_lucks_trail[i].dy;
	 uint32_t dz = g_lucks_trail[i+1].dx;
	 double p = xdp_add_lm(dx, dy, dz);
	 trail[i].p = p;
#if 1
	 printf("(%8X %8X -> %8X) %f 2^%4.2f ", dx, dy, dz, p, log2(p));
	 uint32_t dyy = LROT(dy, left_rot_const) ^ dz;
	 //	 assert(dyy == g_lucks_trail[i+1].dy);
	 if(!(dyy == g_lucks_trail[i+1].dy)) {
		printf(" <- %8X != %8X ", g_lucks_trail[i+1].dy, dyy);
	 }
	 if(p == 0.0) {
		uint32_t dz_max = 0;
		double p_max = max_xdp_add_lm(dx, dy, &dz_max);
		printf(" |  -> %8X %f", dz_max, p_max);
		trail[i+1].dx = dz_max;
		trail[i].p = p_max;
	 }
	 printf("\n");
#endif
  }

#if 1									  // DEBUG
  printf("[%s:%d] Lucks trail:\n", __FILE__, __LINE__);
  double p_tot = 1.0;
  printf("%2d: %8X -> %8X %f\n", 0, dx_init, dy_init, 1.0);
  for(uint32_t i = 0; i < NROUNDS; i++) {
	 printf("%2d: %8X -> %8X %f (2^%f)\n", i+1, trail[i].dx, trail[i].dy, trail[i].p, log2(trail[i].p));
	 p_tot *= trail[i].p;
  }
  printf("p_tot = %16.15f = 2^%f\n", p_tot, log2(p_tot));
#endif  // #if 0									  // DEBUG


 */

/* 
	[./src/speck-xor-threshold-search.cc:1677] this: 2^-58.000000 (best: 2^-63.000185)

                                        {0x50400092, 0x10404000, 0, 1.000000},
   [./src/speck-xor-threshold-search.cc:1682] 82100000   120000 2^-5.000000
	[./src/speck-xor-threshold-search.cc:1682]   901000     1000 2^-4.000000
	[./src/speck-xor-threshold-search.cc:1682]     8010       10 2^-3.000000
	[./src/speck-xor-threshold-search.cc:1682] 10000090 10000010 2^-3.000000
	[./src/speck-xor-threshold-search.cc:1682] 80100010   100090 2^-3.000000
	[./src/speck-xor-threshold-search.cc:1682] 10901090 10101410 2^-6.000000
	[./src/speck-xor-threshold-search.cc:1682] 8000BC00   801C80 2^-7.000000
	[./src/speck-xor-threshold-search.cc:1682]     E404  4000004 2^-12.000000
	[./src/speck-xor-threshold-search.cc:1682]       20 20000000 2^-5.000000
	[./src/speck-xor-threshold-search.cc:1682]        0        1 2^-1.000000
	[./src/speck-xor-threshold-search.cc:1682]        1        9 2^-1.000000
	[./src/speck-xor-threshold-search.cc:1682]  1000009  1000041 2^-3.000000
	[./src/speck-xor-threshold-search.cc:1682]  8010041    10249 2^-5.000000
 | 0.000000 2^-58.000000 (best: 0.000000 2^-63.000185)


  {0x50400092, 0x10404000, 0, 1.000000},
  {0x82100000, 0x120000, 0, (1.0 / (double)(1ULL << 5))},
  {0x901000, 0x1000, 0, (1.0 / (double)(1ULL << 4))},
  {0x8010, 0x10, 0, (1.0 / (double)(1ULL << 3))},
  {0x10000090, 0x10000010, 0, (1.0 / (double)(1ULL << 3))},
  {0x80100010, 0x100090, 0, (1.0 / (double)(1ULL << 3))},
  {0x10901090, 0x10101410, 0, (1.0 / (double)(1ULL << 6))},
  {0x8000BC00, 0x801C80, 0, (1.0 / (double)(1ULL << 7))},
  {0xE404, 0x4000004, 0, (1.0 / (double)(1ULL << 12))},
  {0x20, 0x20000000, 0, (1.0 / (double)(1ULL << 5))},
  {0x0, 0x1, 0, (1.0 / (double)(1ULL << 1))},
  {0x1, 0x9, 0, (1.0 / (double)(1ULL << 1))},
  {0x1000009, 0x1000041, 0, (1.0 / (double)(1ULL << 3))},
  {0x8010041, 0x10249, 0, (1.0 / (double)(1ULL << 5))},
 

 */

/* 
	[./tests/speck-xor-threshold-search-tests.cc:67] Final bounds:
B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-7.000000
B[ 4] = 2^-11.000000
B[ 5] = 2^-16.000000
B[ 6] = 2^-24.000000
B[ 7] = 2^-31.000000
B[ 8] = 2^-36.000000
B[ 9] = 2^-41.000000
B[10] = 2^-48.000000
B[11] = 2^-55.000000
B[12] = 2^-63.000000
[./tests/speck-xor-threshold-search-tests.cc:74] Final trail:
0: 50400092 -> 10404000 1.000000
1: 82100000 ->   120000 0.031250 (2^-5.000000)
2:   901000 ->     1000 0.062500 (2^-4.000000)
3:     8010 ->       10 0.125000 (2^-3.000000)
4: 10000090 -> 10000010 0.125000 (2^-3.000000)
5: 80100010 ->   100090 0.125000 (2^-3.000000)
6: 10901090 -> 10101410 0.015625 (2^-6.000000)
7: 80008400 ->   802480 0.015625 (2^-6.000000)
8:     2404 ->  4010004 0.031250 (2^-5.000000)
9:    10020 -> 20090000 0.062500 (2^-4.000000)
10:    90100 ->   410101 0.062500 (2^-4.000000)
11:   410800 ->  2490008 0.031250 (2^-5.000000)
12:  2494100 -> 10014140 0.007812 (2^-7.000000)
13: 10010801 -> 900B0201 0.003906 (2^-8.000000)
p_tot = 0.000000000000000 = 2^-63.000000


 */

/* 
#--- [./tests/speck-xor-threshold-search-tests.cc:146] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF
[./tests/speck-xor-threshold-search-tests.cc:156] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 33554432 2^25.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 1 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 7 SPECK_EPS 2^-35.00
Input diffs:   802490 10800004
B[ 0] 2^0.000000 | 80808020  4808000 2^-5.000000
B[ 1] 2^-1.000000 | 24000080    40080 2^-5.000000
B[ 2] 2^-3.000000 | 80200080 80000480 2^-3.000000
B[ 3] 2^-6.000000 |   802480   800084 2^-4.000000
B[ 4] 2^-11.000000 | 808080A0 84808480 2^-5.000000
B[ 5] 2^-16.000000 | 24000400    42004 2^-6.000000
B[ 6] 2^-21.000000 |   202000    12020 2^-4.000000
B[ 7] 2^-29.000000 |    10000    80100 2^-3.000000
B[ 8] 2^-34.000000 |    80000   480800 2^-2.000000
B[ 9] 2^-38.000000 |   480000  2084000 2^-3.000000
B[10] 2^-44.000000 |  2080800 124A0800 2^-4.000000
B[11] 2^-51.000000 | 12480008 80184008 2^-7.000092
B[12] 2^-58.000000 | 880A0808 88C8084C 2^-7.000092
[./src/speck-xor-threshold-search.cc:1642] trail_len 13
[./src/speck-xor-threshold-search.cc:1698] Add initial trail: 2^-58.000185 | 0
[./src/speck-xor-threshold-search.cc:1703] Initial trail: 13 R (  802490 10800004) -> (880A0808 88C8084C) : [         1 trails]  2^-58.000185
[./src/speck-xor-threshold-search.cc:1316] 13 R (  802490 10800004) -> (880A0808 88C8084C) : [       728 trails]  2^-57.8633224925


 */

/* --- */

#if (WORD_SIZE == 24)
/*
Found with parameters: 

#--- [./tests/speck-xor-threshold-search-tests.cc:146] Tests, WORD_SIZE  = 24, MASK =   FFFFFF
[./tests/speck-xor-threshold-search-tests.cc:156] WORD_SIZE 24 NROUNDS 10 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 33554432 2^25.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 1 SPECK_MAX_HW 7  SPECK_CLUSTER_MAX_HW 7 SPECK_EPS 2^-10.00
[./src/speck-xor-threshold-search.cc:1629] trail_len 10
[./src/speck-xor-threshold-search.cc:1685] Add initial trail: 2^-45.000000 | 0
[./src/speck-xor-threshold-search.cc:1690] Initial trail: 10 R (  4000D2   504200) -> (   40085   2F05E9) : [         1 trails]  2^-45.000000
[./src/speck-xor-threshold-search.cc:1269] Found 1 trails:
[    1] 4000D2 504200 820200 1202 9000 10 80 0 800000 800000 808000 808004 800084 8400A0 80A0 2085A4 808424 84A905 A02881 8560AD 40085 2F05E9  | 2^-45.000000
Probability of differential: 2^-45.000000
[./src/speck-xor-threshold-search.cc:1305] 10 R (  4000D2   504200) -> (   40085   2F05E9) : [         1 trails]  2^-45.000000
[./src/speck-xor-threshold-search.cc:1305] 10 R (     88A   484008) -> (  800082   8324B2) : [        24 trails]  2^-43.874006


*/
double g_B[SPECK_TRAIL_LEN] = {
  1.0,								  // 0: input diff
  (1.0 / (double)(1ULL <<  0)), // 1
  (1.0 / (double)(1ULL <<  1)), // 2
  (1.0 / (double)(1ULL <<  3)), // 3
  (1.0 / (double)(1ULL <<  7)), // 4
  (1.0 / (double)(1ULL << 10)), // 5
  (1.0 / (double)(1ULL << 14)), // 6
  (1.0 / (double)(1ULL << 19)), // 7
  (1.0 / (double)(1ULL << 26)), // 8
  (1.0 / (double)(1ULL << 35)), // 9
  (1.0 / (double)(1ULL << 45))  // 10
};

differential_t g_trail[SPECK_TRAIL_LEN] = {
 {0x4000D2, 0x504200, 0, 1.0}, // 0 : input difference, p = 1
 {0x820200,   0x1202, 0, 0.031250}, //(2^-5.000000)
 {  0x9000,     0x10, 0, 0.062500}, //(2^-4.000000)
 {    0x80,      0x0, 0, 0.250000}, //(2^-2.000000)
 {0x800000, 0x800000, 0, 1.000000}, //(2^0.000000)
 {0x808000, 0x808004, 0, 0.500000}, //(2^-1.000000)
 {0x800084, 0x8400A0, 0, 0.125000}, //(2^-3.000000)
 {  0x80A0, 0x2085A4, 0, 0.062500}, //(2^-4.000000)
 {0x808424, 0x84A905, 0, 0.007812}, //(2^-7.000000)
 {0xA02881, 0x8560AD, 0, 0.001953}, //(2^-9.000000)
 { 0x40085, 0x2F05E9, 0, 0.000977}  //(2^-10.000000)
};												// total p = 2^-45
#endif  // #if (WORD_SIZE == 24)


/* --- */

/* 
#define SPECK_LOG_FILE "speck16-r9-ddt30-hw9-pthres0-031-greedy.log"
#define SPECK_LOG_FILE "speck24-r11-ddt30-hw9-pthres0-031-greedy.log"
#define SPECK_LOG_FILE "speck32-r14-ddt30-hw9-pthres0-031-greedy.log"

Experiments to try:

[./tests/speck-xor-threshold-search-tests.cc:158] WORD_SIZE 32 NROUNDS 14 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 1073741824 2^30.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 1 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00

[./tests/speck-xor-threshold-search-tests.cc:158] WORD_SIZE 24 NROUNDS 11 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 1073741824 2^30.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 1 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00

[./tests/speck-xor-threshold-search-tests.cc:158] WORD_SIZE 16 NROUNDS 9 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 1073741824 2^30.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 1 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00


 */


/* ---- */

/* 
Best on Speck24, 11 rounds

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-7.000000
B[ 4] = 2^-10.000000
B[ 5] = 2^-14.000000
B[ 6] = 2^-19.000000
B[ 7] = 2^-26.000000
B[ 8] = 2^-35.000000
B[ 9] = 2^-45.000000
B[10] = 2^-56.000000
 0:   820200 ->     1202 0.031250 (2^-5.000000)
 1:     9000 ->       10 0.062500 (2^-4.000000)
 2:       80 ->        0 0.250000 (2^-2.000000)
 3:   800000 ->   800000 1.000000 (2^0.000000)
 4:   808000 ->   808004 0.500000 (2^-1.000000)
 5:   800084 ->   8400A0 0.125000 (2^-3.000000)
 6:     80A0 ->   2085A4 0.062500 (2^-4.000000)
 7:   808424 ->   84A905 0.007812 (2^-7.000000)
 8:   A02881 ->   8560AD 0.001953 (2^-9.000000)
 9:    40085 ->   2F05E9 0.000977 (2^-10.000000)
10:   A40029 ->   DC2F60 0.000488 (2^-11.000000)
p_tot = 0.000000000000000 = 2^-56.000000, Bn = 0.000000 = 2^-56.000000
[./src/speck-xor-threshold-search.cc:1941] nrounds = 11
[./src/speck-xor-threshold-search.cc:364] Verify P for one round (2^20.000000 CPs)...
R# 0  Input differences:   4000D2   504200
R# 0 Output differences:   820200     1202
THE  0: 0.031250 (2^-5.000000)
EXP  0: 0.030875 (2^-5.017407)

R# 1  Input differences:   820200     1202
R# 1 Output differences:     9000       10
THE  1: 0.062500 (2^-4.000000)
EXP  1: 0.062526 (2^-3.999406)

R# 2  Input differences:     9000       10
R# 2 Output differences:       80        0
THE  2: 0.250000 (2^-2.000000)
EXP  2: 0.250025 (2^-1.999857)

R# 3  Input differences:       80        0
R# 3 Output differences:   800000   800000
THE  3: 1.000000 (2^0.000000)
EXP  3: 1.000000 (2^0.000000)

R# 4  Input differences:   800000   800000
R# 4 Output differences:   808000   808004
THE  4: 0.500000 (2^-1.000000)
EXP  4: 0.499816 (2^-1.000531)

R# 5  Input differences:   808000   808004
R# 5 Output differences:   800084   8400A0
THE  5: 0.125000 (2^-3.000000)
EXP  5: 0.125319 (2^-2.996328)

R# 6  Input differences:   800084   8400A0
R# 6 Output differences:     80A0   2085A4
THE  6: 0.062500 (2^-4.000000)
EXP  6: 0.062630 (2^-3.997009)

R# 7  Input differences:     80A0   2085A4
R# 7 Output differences:   808424   84A905
THE  7: 0.007812 (2^-7.000000)
EXP  7: 0.007801 (2^-7.002115)

R# 8  Input differences:   808424   84A905
R# 8 Output differences:   A02881   8560AD
THE  8: 0.001953 (2^-9.000000)
EXP  8: 0.001938 (2^-9.011315)

R# 9  Input differences:   A02881   8560AD
R# 9 Output differences:    40085   2F05E9
THE  9: 0.000977 (2^-10.000000)
EXP  9: 0.001023 (2^-9.932566)

R#10  Input differences:    40085   2F05E9
R#10 Output differences:   A40029   DC2F60
THE 10: 0.000488 (2^-11.000000)
EXP 10: 0.000481 (2^-11.022720)

OK
[./src/speck-xor-threshold-search.cc:281] Verify P of differentials (2^20.000000 CPs)...
Input differences:   4000D2   504200

R# 0 Output differences:   820200     1202
THE  1: 0.031250 (2^-5.000000)   820200 ->     1202
EXP  1: 0.031127 (2^-5.005691)   820200 ->     1202

R# 1 Output differences:     9000       10
THE  2: 0.001953 (2^-9.000000)     9000 ->       10
EXP  2: 0.002293 (2^-8.768779)     9000 ->       10

R# 2 Output differences:       80        0
THE  3: 0.000488 (2^-11.000000)       80 ->        0
EXP  3: 0.000548 (2^-10.832582)       80 ->        0

R# 3 Output differences:   800000   800000
THE  4: 0.000488 (2^-11.000000)   800000 ->   800000
EXP  4: 0.000606 (2^-10.689387)   800000 ->   800000

R# 4 Output differences:   808000   808004
THE  5: 0.000244 (2^-12.000000)   808000 ->   808004
EXP  5: 0.000306 (2^-11.673571)   808000 ->   808004

R# 5 Output differences:   800084   8400A0
THE  6: 0.000031 (2^-15.000000)   800084 ->   8400A0
EXP  6: 0.000031 (2^-15.000000)   800084 ->   8400A0

R# 6 Output differences:     80A0   2085A4
THE  7: 0.000002 (2^-19.000000)     80A0 ->   2085A4
EXP  7: 0.000000 (2^-inf)     80A0 ->   2085A4

R# 7 Output differences:   808424   84A905
THE  8: 0.000000 (2^-26.000000)   808424 ->   84A905
EXP  8: 0.000000 (2^-inf)   808424 ->   84A905

R# 8 Output differences:   A02881   8560AD
THE  9: 0.000000 (2^-35.000000)   A02881 ->   8560AD
EXP  9: 0.000000 (2^-inf)   A02881 ->   8560AD

R# 9 Output differences:    40085   2F05E9
THE 10: 0.000000 (2^-45.000000)    40085 ->   2F05E9
EXP 10: 0.000000 (2^-inf)    40085 ->   2F05E9

R#10 Output differences:   A40029   DC2F60
THE 11: 0.000000 (2^-56.000000)   A40029 ->   DC2F60
EXP 11: 0.000000 (2^-inf)   A40029 ->   DC2F60

OK
[./src/speck-xor-threshold-search.cc:1629] trail_len 11
[./src/speck-xor-threshold-search.cc:1685] Add initial trail: 2^-56.000000 | 0
[./src/speck-xor-threshold-search.cc:1690] Initial trail: 11 R (  4000D2   504200) -> (  A40029   DC2F60) : [         1 trails]  2^-56.000000
[./src/speck-xor-threshold-search.cc:1269] Found 1 trails:
[    1] 4000D2 504200 820200 1202 9000 10 80 0 800000 800000 808000 808004 800084 8400A0 80A0 2085A4 808424 84A905 A02881 8560AD 40085 2F05E9 A40029 DC2F60  | 2^-56.000000
Probability of differential: 2^-56.000000
[./src/speck-xor-threshold-search.cc:1305] 11 R (  4000D2   504200) -> (  A40029   DC2F60) : [         1 trails]  2^-56.000000


[./src/speck-xor-threshold-search.cc:1269] Found 24 trails:
[    1] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 802080 8124A0 2A480 B8184 998020 C58C00 2C0480 6486 800082 8324B2  | 2^-54.000000
[    2] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 802080 8124A0 A480 98184 898020 C58C00 2C0480 6486 800082 8324B2  | 2^-49.000000
[    3] 88A 484008 424000 4042 202 20012 10 100080 80 800480 C80 2884 806880 812CA0 1E488 8818C 818060 C58C00 2C0480 6486 800082 8324B2  | 2^-59.000000
[    4] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 806080 8164A0 1A480 A8184 978020 C38C00 1C0480 6486 800082 8324B2  | 2^-59.000000
[    5] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 806080 8164A0 3A480 88184 888020 CC8C00 640480 6486 800082 8324B2  | 2^-50.000000
[    6] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 802080 8124A0 1A480 88184 888020 CC8C00 640480 6486 800082 8324B2  | 2^-45.000000
[    7] 88A 484008 424000 4042 202 20012 10 100080 80 800480 C80 2884 805880 811CA0 16488 9818C 808060 CC8C00 640480 6486 800082 8324B2  | 2^-58.000000
[    8] 88A 484008 424000 4042 202 20012 10 100080 80 800480 C80 2884 805880 811CA0 6488 8818C 808060 C48C00 240480 6486 800082 8324B2  | 2^-57.000000
[    9] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 806080 8164A0 A480 B8184 988020 C48C00 240480 6486 800082 8324B2  | 2^-51.000000
[   10] 88A 484008 424000 4042 202 20012 10 100080 80 800480 C80 2884 806880 812CA0 1E488 8818C 808060 C48C00 240480 6486 800082 8324B2  | 2^-54.000000
[   11] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 802080 8124A0 EA480 78184 F88020 C48C00 240480 6486 800082 8324B2  | 2^-59.000000
[   12] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 802080 8124A0 7A480 E8184 B18020 C58C00 2C0480 6486 800082 8324B2  | 2^-59.000000
[   13] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 806080 8164A0 3A480 88184 878020 C38C00 1C0480 6486 800082 8324B2  | 2^-59.000000
[   14] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 806080 8164A0 2A480 98184 898020 C58C00 2C0480 6486 800082 8324B2  | 2^-54.000000
[   15] 88A 484008 424000 4042 202 20012 10 100080 80 800480 C80 2884 806880 812CA0 E488 9818C 808060 CC8C00 640480 6486 800082 8324B2  | 2^-55.000000
[   16] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 802080 8124A0 2A480 B8184 988020 C48C00 240480 6486 800082 8324B2  | 2^-50.000000
[   17] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 806080 8164A0 2A480 98184 888020 C48C00 240480 6486 800082 8324B2  | 2^-50.000000
[   18] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 802080 8124A0 A480 98184 888020 C48C00 240480 6486 800082 8324B2  | 2^-45.000000
[   19] 88A 484008 424000 4042 202 20012 10 100080 80 800480 3C80 1884 81E880 812CA0 1E488 8818C 808060 C48C00 240480 6486 800082 8324B2  | 2^-61.000000
[   20] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 802080 8124A0 1A480 88184 878020 C38C00 1C0480 6486 800082 8324B2  | 2^-54.000000
[   21] 88A 484008 424000 4042 202 20012 10 100080 80 800480 C80 2884 805880 811CA0 6488 8818C 818060 C58C00 2C0480 6486 800082 8324B2  | 2^-62.000000
[   22] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 802080 8124A0 3A480 A8184 978020 C38C00 1C0480 6486 800082 8324B2  | 2^-58.000000
[   23] 88A 484008 424000 4042 202 20012 10 100080 80 800480 480 2084 806080 8164A0 A480 B8184 998020 C58C00 2C0480 6486 800082 8324B2  | 2^-55.000000
[   24] 88A 484008 424000 4042 202 20012 10 100080 80 800480 C80 2884 805880 811CA0 6498 8819C 8080E0 C48C00 240480 6486 800082 8324B2  | 2^-59.000000
Probability of differential: 2^-43.874006
[./src/speck-xor-threshold-search.cc:1305] 10 R (     88A   484008) -> (  800082   8324B2) : [        24 trails]  2^-43.874006


 */


/* 

Best on Speck24, 10 rounds

#--- [./tests/speck-xor-threshold-search-tests.cc:146] Tests, WORD_SIZE  = 24, MASK =   FFFFFF
[./tests/speck-xor-threshold-search-tests.cc:156] WORD_SIZE 24 NROUNDS 10 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 33554432 2^25.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 1 SPECK_MAX_HW 7  SPECK_CLUSTER_MAX_HW 7 SPECK_EPS 2^-10.00



B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-7.000000
B[ 4] = 2^-10.000000
B[ 5] = 2^-14.000000
B[ 6] = 2^-19.000000
B[ 7] = 2^-26.000000
B[ 8] = 2^-35.000000
B[ 9] = 2^-45.000000
 0:   820200 ->     1202 0.031250 (2^-5.000000)
 1:     9000 ->       10 0.062500 (2^-4.000000)
 2:       80 ->        0 0.250000 (2^-2.000000)
 3:   800000 ->   800000 1.000000 (2^0.000000)
 4:   808000 ->   808004 0.500000 (2^-1.000000)
 5:   800084 ->   8400A0 0.125000 (2^-3.000000)
 6:     80A0 ->   2085A4 0.062500 (2^-4.000000)
 7:   808424 ->   84A905 0.007812 (2^-7.000000)
 8:   A02881 ->   8560AD 0.001953 (2^-9.000000)
 9:    40085 ->   2F05E9 0.000977 (2^-10.000000)
p_tot = 0.000000000000028 = 2^-45.000000, Bn = 0.000000 = 2^-45.000000
[./src/speck-xor-threshold-search.cc:1941] nrounds = 10
[./src/speck-xor-threshold-search.cc:364] Verify P for one round (2^20.000000 CPs)...
R# 0  Input differences:   4000D2   504200
R# 0 Output differences:   820200     1202
THE  0: 0.031250 (2^-5.000000)
EXP  0: 0.031140 (2^-5.005072)

R# 1  Input differences:   820200     1202
R# 1 Output differences:     9000       10
THE  1: 0.062500 (2^-4.000000)
EXP  1: 0.062627 (2^-3.997075)

R# 2  Input differences:     9000       10
R# 2 Output differences:       80        0
THE  2: 0.250000 (2^-2.000000)
EXP  2: 0.250092 (2^-1.999472)

R# 3  Input differences:       80        0
R# 3 Output differences:   800000   800000
THE  3: 1.000000 (2^0.000000)
EXP  3: 1.000000 (2^0.000000)

R# 4  Input differences:   800000   800000
R# 4 Output differences:   808000   808004
THE  4: 0.500000 (2^-1.000000)
EXP  4: 0.500225 (2^-0.999351)

R# 5  Input differences:   808000   808004
R# 5 Output differences:   800084   8400A0
THE  5: 0.125000 (2^-3.000000)
EXP  5: 0.125352 (2^-2.995944)

R# 6  Input differences:   800084   8400A0
R# 6 Output differences:     80A0   2085A4
THE  6: 0.062500 (2^-4.000000)
EXP  6: 0.062020 (2^-4.011116)

R# 7  Input differences:     80A0   2085A4
R# 7 Output differences:   808424   84A905
THE  7: 0.007812 (2^-7.000000)
EXP  7: 0.007748 (2^-7.012025)

R# 8  Input differences:   808424   84A905
R# 8 Output differences:   A02881   8560AD
THE  8: 0.001953 (2^-9.000000)
EXP  8: 0.001940 (2^-9.009896)

R# 9  Input differences:   A02881   8560AD
R# 9 Output differences:    40085   2F05E9
THE  9: 0.000977 (2^-10.000000)
EXP  9: 0.000967 (2^-10.014158)

OK
[./src/speck-xor-threshold-search.cc:281] Verify P of differentials (2^20.000000 CPs)...
Input differences:   4000D2   504200

R# 0 Output differences:   820200     1202
THE  1: 0.031250 (2^-5.000000)   820200 ->     1202
EXP  1: 0.031177 (2^-5.003394)   820200 ->     1202

R# 1 Output differences:     9000       10
THE  2: 0.001953 (2^-9.000000)     9000 ->       10
EXP  2: 0.001615 (2^-9.274634)     9000 ->       10

R# 2 Output differences:       80        0
THE  3: 0.000488 (2^-11.000000)       80 ->        0
EXP  3: 0.000403 (2^-11.275486)       80 ->        0

R# 3 Output differences:   800000   800000
THE  4: 0.000488 (2^-11.000000)   800000 ->   800000
EXP  4: 0.000405 (2^-11.268681)   800000 ->   800000

R# 4 Output differences:   808000   808004
THE  5: 0.000244 (2^-12.000000)   808000 ->   808004
EXP  5: 0.000195 (2^-12.327575)   808000 ->   808004

R# 5 Output differences:   800084   8400A0
THE  6: 0.000031 (2^-15.000000)   800084 ->   8400A0
EXP  6: 0.000022 (2^-15.476438)   800084 ->   8400A0

R# 6 Output differences:     80A0   2085A4
THE  7: 0.000002 (2^-19.000000)     80A0 ->   2085A4
EXP  7: 0.000000 (2^-inf)     80A0 ->   2085A4

R# 7 Output differences:   808424   84A905
THE  8: 0.000000 (2^-26.000000)   808424 ->   84A905
EXP  8: 0.000000 (2^-inf)   808424 ->   84A905

R# 8 Output differences:   A02881   8560AD
THE  9: 0.000000 (2^-35.000000)   A02881 ->   8560AD
EXP  9: 0.000000 (2^-inf)   A02881 ->   8560AD

R# 9 Output differences:    40085   2F05E9
THE 10: 0.000000 (2^-45.000000)    40085 ->   2F05E9
EXP 10: 0.000000 (2^-inf)    40085 ->   2F05E9

OK
[./src/speck-xor-threshold-search.cc:1629] trail_len 10
[./src/speck-xor-threshold-search.cc:1685] Add

OK
[./src/speck-xor-threshold-search.cc:1629] trail_len 10
[./src/speck-xor-threshold-search.cc:1685] Add initial trail: 2^-45.000000 | 0
[./src/speck-xor-threshold-search.cc:1690] Initial trail: 10 R (  4000D2   504200) -> (   40085   2F05E9) : [         1 trails]  2^-45.000000
[./src/speck-xor-threshold-search.cc:1269] Found 1 trails:
[    1] 4000D2 504200 820200 1202 9000 10 80 0 800000 800000 808000 808004 800084 8400A0 80A0 2085A4 808424 84A905 A02881 8560AD 40085 2F05E9  | 2^-45.000000
Probability of differential: 2^-45.000000
[./src/speck-xor-threshold-search.cc:1305] 10 R (  4000D2   504200) -> (   40085   2F05E9) : [         1 trails]  2^-45.000000

 */

/* 

Best on Speck16, 9R

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-5.000000
B[ 4] = 2^-9.000000
B[ 5] = 2^-13.000000
B[ 6] = 2^-18.000000
B[ 7] = 2^-24.000000
B[ 8] = 2^-31.000000
 0:      211 ->      A04 0.031250 (2^-5.000000)
 1:     2800 ->       10 0.062500 (2^-4.000000)
 2:       40 ->        0 0.250000 (2^-2.000000)
 3:     8000 ->     8000 1.000000 (2^0.000000)
 4:     8100 ->     8102 0.500000 (2^-1.000000)
 5:     8000 ->     840A 0.250000 (2^-2.000000)
 6:     850A ->     9520 0.062500 (2^-4.000000)
 7:     802A ->     D4A8 0.015625 (2^-6.000000)
 8:       A8 ->     520B 0.007812 (2^-7.000000)
p_tot = 0.000000000465661 = 2^-31.000000, Bn = 0.000000 = 2^-31.000000
[./src/speck-xor-threshold-search.cc:1148] Found 1 trails:
[    1] A60 4205 211 A04 2800 10 40 0 8000 8000 8100 8102 8000 840A 850A 9520 802A D4A8 A8 520B  | 2^-31.000000 
Probability of differential: 2^-31.000000
[./src/speck-xor-threshold-search.cc:1186]  9 R (     A60     4205) -> (      A8     520B) : [         1 trails]  2^-31.000000

R# 0  Input differences:      A60     4205
R# 0 Output differences:      211      A04
THE  0: 0.031250 (2^-5.000000)
EXP  0: 0.030704 (2^-5.025451)

R# 1  Input differences:      211      A04
R# 1 Output differences:     2800       10
THE  1: 0.062500 (2^-4.000000)
EXP  1: 0.062408 (2^-4.002115)

R# 2  Input differences:     2800       10
R# 2 Output differences:       40        0
THE  2: 0.250000 (2^-2.000000)
EXP  2: 0.250568 (2^-1.996724)

R# 3  Input differences:       40        0
R# 3 Output differences:     8000     8000
THE  3: 1.000000 (2^0.000000)
EXP  3: 1.000000 (2^0.000000)

R# 4  Input differences:     8000     8000
R# 4 Output differences:     8100     8102
THE  4: 0.500000 (2^-1.000000)
EXP  4: 0.500222 (2^-0.999359)

R# 5  Input differences:     8100     8102
R# 5 Output differences:     8000     840A
THE  5: 0.250000 (2^-2.000000)
EXP  5: 0.250717 (2^-1.995867)

R# 6  Input differences:     8000     840A
R# 6 Output differences:     850A     9520
THE  6: 0.062500 (2^-4.000000)
EXP  6: 0.062656 (2^-3.996394)

R# 7  Input differences:     850A     9520
R# 7 Output differences:     802A     D4A8
THE  7: 0.015625 (2^-6.000000)
EXP  7: 0.015758 (2^-5.987812)

R# 8  Input differences:     802A     D4A8
R# 8 Output differences:       A8     520B
THE  8: 0.007812 (2^-7.000000)
EXP  8: 0.007806 (2^-7.001233)

OK
[./src/speck-xor-threshold-search.cc:166] Verify P of differentials (2^20.000000 CPs)...
Input differences:      A60     4205

R# 0 Output differences:      211      A04
THE  1: 0.031250 (2^-5.000000)      211 ->      A04
EXP  1: 0.030994 (2^-5.011848)      211 ->      A04

R# 1 Output differences:     2800       10
THE  2: 0.001953 (2^-9.000000)     2800 ->       10
EXP  2: 0.001461 (2^-9.418799)     2800 ->       10

R# 2 Output differences:       40        0
THE  3: 0.000488 (2^-11.000000)       40 ->        0
EXP  3: 0.000373 (2^-11.388975)       40 ->        0

R# 3 Output differences:     8000     8000
THE  4: 0.000488 (2^-11.000000)     8000 ->     8000
EXP  4: 0.000387 (2^-11.334664)     8000 ->     8000

R# 4 Output differences:     8100     8102
THE  5: 0.000244 (2^-12.000000)     8100 ->     8102
EXP  5: 0.000175 (2^-12.476438)     8100 ->     8102

R# 5 Output differences:     8000     840A
THE  6: 0.000061 (2^-14.000000)     8000 ->     840A
EXP  6: 0.000047 (2^-14.385290)     8000 ->     840A

R# 6 Output differences:     850A     9520
THE  7: 0.000004 (2^-18.000000)     850A ->     9520
EXP  7: 0.000005 (2^-17.678072)     850A ->     9520

R# 7 Output differences:     802A     D4A8
THE  8: 0.000000 (2^-24.000000)     802A ->     D4A8
EXP  8: 0.000000 (2^-inf)     802A ->     D4A8

R# 8 Output differences:       A8     520B
THE  9: 0.000000 (2^-31.000000)       A8 ->     520B
EXP  9: 0.000000 (2^-inf)       A8 ->     520B

OK
[./src/speck-xor-threshold-search.cc:1505] trail_len 9
[./src/speck-xor-threshold-search.cc:1561] Add initial trail: 2^-31.000000 | 0
[./src/speck-xor-threshold-search.cc:1566] Initial trail:  9 R (     A60     4205) -> (      A8     520B) : [         1 trails]  2^-31.000000
[./src/speck-xor-threshold-search.cc:1147] Found 1 trails:
[    1] A60 4205 211 A04 2800 10 40 0 8000 8000 8100 8102 8000 840A 850A 9520 802A D4A8 A8 520B  | 2^-31.000000
Probability of differential: 2^-31.000000
[./src/speck-xor-threshold-search.cc:1183]  9 R (     A60     4205) -> (      A8     520B) : [         1 trails]  2^-31.000000




 */

/* 

Best on Speck32, 13R


Time: Fri Nov  8 19:51:56 2013
[./tests/speck-xor-threshold-search-tests.cc:158] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 1073741824 2^30.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 1 SPECK_MAX_HW 9  SPECK_CLUSTER_MAX_HW 9 SPECK_EPS 2^-15.00
[./src/speck-xor-threshold-search.cc:1680] nrounds = 1, Bn_init = 2^-inf : key DDA937EE DA74F756 622FFC7C 8A3A534E

B[ 0] = 2^0.000000
 0:        0 ->        4 1.000000 (2^0.000000)
p_tot = 1.000000000000000 = 2^0.000000, Bn = 1.000000 = 2^0.000000
[./src/speck-xor-threshold-search.cc:1680] nrounds = 2, Bn_init = 2^-1.000000 : key DDA937EE DA74F756 622FFC7C 8A3A534E

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
 0:        0 -> 80000000 0.500000 (2^-1.000000)
 1: 80000000 -> 80000004 1.000000 (2^0.000000)
p_tot = 0.500000000000000 = 2^-1.000000, Bn = 0.500000 = 2^-1.000000
[./src/speck-xor-threshold-search.cc:1680] nrounds = 3, Bn_init = 2^-3.000000 : key DDA937EE DA74F756 622FFC7C 8A3A534E

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
 0:       80 ->        0 0.250000 (2^-2.000000)
 1: 80000000 -> 80000000 1.000000 (2^0.000000)
 2: 80800000 -> 80800004 0.500000 (2^-1.000000)
p_tot = 0.125000000000000 = 2^-3.000000, Bn = 0.125000 = 2^-3.000000
[./src/speck-xor-threshold-search.cc:1680] nrounds = 4, Bn_init = 2^-6.000000 : key DDA937EE DA74F756 622FFC7C 8A3A534E

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-6.000000
 0:       80 ->        0 0.250000 (2^-2.000000)
 1: 80000000 -> 80000000 1.000000 (2^0.000000)
 2: 80800000 -> 80800004 0.500000 (2^-1.000000)
 3: 80008004 -> 84008020 0.125000 (2^-3.000000)
p_tot = 0.015625000000000 = 2^-6.000000, Bn = 0.015625 = 2^-6.000000
[./src/speck-xor-threshold-search.cc:1680] nrounds = 5, Bn_init = 2^-11.000000 : key DDA937EE DA74F756 622FFC7C 8A3A534E

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-11.000000
 0: 80000480 -> 80800000 0.062500 (2^-4.000000)
 1:        4 ->  4000000 0.250000 (2^-2.000000)
 2:        0 -> 20000000 0.500000 (2^-1.000000)
 3: 20000000 -> 20000001 0.500000 (2^-1.000000)
 4: 20200001 -> 20200008 0.125000 (2^-3.000000)
p_tot = 0.000488281250000 = 2^-11.000000, Bn = 0.000488 = 2^-11.000000
[./src/speck-xor-threshold-search.cc:1680] nrounds = 6, Bn_init = 2^-16.000000 : key DDA937EE DA74F756 622FFC7C 8A3A534E

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-11.000000
B[ 5] = 2^-16.000000
 0: 82020000 ->   120200 0.031250 (2^-5.000000)
 1:   900000 ->     1000 0.062500 (2^-4.000000)
 2:     8000 ->        0 0.250000 (2^-2.000000)
 3:       80 ->       80 0.500000 (2^-1.000000)
 4: 80000080 -> 80000480 0.500000 (2^-1.000000)
 5:   800480 ->   802084 0.125000 (2^-3.000000)
p_tot = 0.000015258789062 = 2^-16.000000, Bn = 0.000015 = 2^-16.000000
[./src/speck-xor-threshold-search.cc:1680] nrounds = 7, Bn_init = 2^-21.000000 : key DDA937EE DA74F756 622FFC7C 8A3A534E

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-11.000000
B[ 5] = 2^-16.000000
B[ 6] = 2^-21.000000
 0: 82020000 ->   120200 0.031250 (2^-5.000000)
 1:   900000 ->     1000 0.062500 (2^-4.000000)
 2:     8000 ->        0 0.250000 (2^-2.000000)
 3:       80 ->       80 0.500000 (2^-1.000000)
 4: 80000080 -> 80000480 0.500000 (2^-1.000000)
 5:   800480 ->   802084 0.125000 (2^-3.000000)
 6: 8080A080 -> 8481A4A0 0.031250 (2^-5.000000)
p_tot = 0.000000476837158 = 2^-21.000000, Bn = 0.000000 = 2^-21.000000
[./src/speck-xor-threshold-search.cc:1680] nrounds = 8, Bn_init = 2^-29.000000 : key DDA937EE DA74F756 622FFC7C 8A3A534E

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-11.000000
B[ 5] = 2^-16.000000
B[ 6] = 2^-21.000000
B[ 7] = 2^-29.000000
 0: 82020000 ->   120200 0.031250 (2^-5.000000)
 1:   900000 ->     1000 0.062500 (2^-4.000000)
 2:     8000 ->        0 0.250000 (2^-2.000000)
 3:       80 ->       80 0.500000 (2^-1.000000)
 4: 80000080 -> 80000480 0.500000 (2^-1.000000)
 5:   800480 ->   802084 0.125000 (2^-3.000000)
 6: 8080A080 -> 8481A4A0 0.031250 (2^-5.000000)
 7:  4002400 -> 200D0104 0.003906 (2^-8.000000)
p_tot = 0.000000001862645 = 2^-29.000000, Bn = 0.000000 = 2^-29.000000
[./src/speck-xor-threshold-search.cc:1680] nrounds = 9, Bn_init = 2^-36.000000 : key DDA937EE DA74F756 622FFC7C 8A3A534E

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-11.000000
B[ 5] = 2^-16.000000
B[ 6] = 2^-21.000000
B[ 7] = 2^-29.000000
B[ 8] = 2^-34.000000
 0: 80240000 -> 80000400 0.062500 (2^-4.000000)
 1: 80802000 -> 80800004 0.125000 (2^-3.000000)
 2: 80008024 -> 84008000 0.062500 (2^-4.000000)
 3: A0808080 -> 80848084 0.031250 (2^-5.000000)
 4:   240004 ->  4000420 0.015625 (2^-6.000000)
 5:     2020 -> 20000120 0.062500 (2^-4.000000)
 6:      100 ->      801 0.125000 (2^-3.000000)
 7:      800 ->     4808 0.250000 (2^-2.000000)
 8:     4800 ->    20840 0.125000 (2^-3.000000)
p_tot = 0.000000000058208 = 2^-34.000000, Bn = 0.000000 = 2^-34.000000
[./src/speck-xor-threshold-search.cc:1680] nrounds = 10, Bn_init = 2^-38.000000 : key DDA937EE DA74F756 622FFC7C 8A3A534E

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-11.000000
B[ 5] = 2^-16.000000
B[ 6] = 2^-21.000000
B[ 7] = 2^-29.000000
B[ 8] = 2^-34.000000
B[ 9] = 2^-38.000000
 0: 80240000 -> 80000400 0.062500 (2^-4.000000)
 1: 80802000 -> 80800004 0.125000 (2^-3.000000)
 2: 80008024 -> 84008000 0.062500 (2^-4.000000)
 3: A0808080 -> 80848084 0.031250 (2^-5.000000)
 4:   240004 ->  4000420 0.015625 (2^-6.000000)
 5:     2020 -> 20000120 0.062500 (2^-4.000000)
 6:      100 ->      801 0.125000 (2^-3.000000)
 7:      800 ->     4808 0.250000 (2^-2.000000)
 8:     4800 ->    20840 0.125000 (2^-3.000000)
 9:    20808 ->   124A08 0.062500 (2^-4.000000)
p_tot = 0.000000000003638 = 2^-38.000000, Bn = 0.000000 = 2^-38.000000
[./src/speck-xor-threshold-search.cc:1680] nrounds = 11, Bn_init = 2^-45.000000 : key DDA937EE DA74F756 622FFC7C 8A3A534E

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-11.000000
B[ 5] = 2^-16.000000
B[ 6] = 2^-21.000000
B[ 7] = 2^-29.000000
B[ 8] = 2^-34.000000
B[ 9] = 2^-38.000000
B[10] = 2^-44.000000
 0: 80808020 ->  4808000 0.031250 (2^-5.000000)
 1: 24000080 ->    40080 0.031250 (2^-5.000000)
 2: 80200080 -> 80000480 0.125000 (2^-3.000000)
 3:   802480 ->   800084 0.062500 (2^-4.000000)
 4: 808080A0 -> 84808480 0.031250 (2^-5.000000)
 5: 24000400 ->    42004 0.015625 (2^-6.000000)
 6:   202000 ->    12020 0.062500 (2^-4.000000)
 7:    10000 ->    80100 0.125000 (2^-3.000000)
 8:    80000 ->   480800 0.250000 (2^-2.000000)
 9:   480000 ->  2084000 0.125000 (2^-3.000000)
10:  2080800 -> 124A0800 0.062500 (2^-4.000000)
p_tot = 0.000000000000057 = 2^-44.000000, Bn = 0.000000 = 2^-44.000000
[./src/speck-xor-threshold-search.cc:1680] nrounds = 12, Bn_init = 2^-51.000000 : key DDA937EE DA74F756 622FFC7C 8A3A534E

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-11.000000
B[ 5] = 2^-16.000000
B[ 6] = 2^-21.000000
B[ 7] = 2^-29.000000
B[ 8] = 2^-34.000000
B[ 9] = 2^-38.000000
B[10] = 2^-44.000000
B[11] = 2^-51.000000
 0: 80808020 ->  4808000 0.031250 (2^-5.000000)
 1: 24000080 ->    40080 0.031250 (2^-5.000000)
 2: 80200080 -> 80000480 0.125000 (2^-3.000000)
 3:   802480 ->   800084 0.062500 (2^-4.000000)
 4: 808080A0 -> 84808480 0.031250 (2^-5.000000)
 5: 24000400 ->    42004 0.015625 (2^-6.000000)
 6:   202000 ->    12020 0.062500 (2^-4.000000)
 7:    10000 ->    80100 0.125000 (2^-3.000000)
 8:    80000 ->   480800 0.250000 (2^-2.000000)
 9:   480000 ->  2084000 0.125000 (2^-3.000000)
10:  2080800 -> 124A0800 0.062500 (2^-4.000000)
11: 12480008 -> 80184008 0.007812 (2^-7.000000)
p_tot = 0.000000000000000 = 2^-51.000000, Bn = 0.000000 = 2^-51.000000
[./src/speck-xor-threshold-search.cc:1680] nrounds = 13, Bn_init = 2^-58.000000 : key DDA937EE DA74F756 622FFC7C 8A3A534E

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-11.000000
B[ 5] = 2^-16.000000
B[ 6] = 2^-21.000000
B[ 7] = 2^-29.000000
B[ 8] = 2^-34.000000
B[ 9] = 2^-38.000000
B[10] = 2^-44.000000
B[11] = 2^-51.000000
B[12] = 2^-58.000000
 0: 80808020 ->  4808000 0.031250 (2^-5.000000)
 1: 24000080 ->    40080 0.031250 (2^-5.000000)
 2: 80200080 -> 80000480 0.125000 (2^-3.000000)
 3:   802480 ->   800084 0.062500 (2^-4.000000)
 4: 808080A0 -> 84808480 0.031250 (2^-5.000000)
 5: 24000400 ->    42004 0.015625 (2^-6.000000)
 6:   202000 ->    12020 0.062500 (2^-4.000000)
 7:    10000 ->    80100 0.125000 (2^-3.000000)
 8:    80000 ->   480800 0.250000 (2^-2.000000)
 9:   480000 ->  2084000 0.125000 (2^-3.000000)
10:  2080800 -> 124A0800 0.062500 (2^-4.000000)
11: 12480008 -> 80184008 0.007812 (2^-7.000000)
12: 880A0808 -> 88C8084C 0.007812 (2^-7.000000)
p_tot = 0.000000000000000 = 2^-58.000000, Bn = 0.000000 = 2^-58.000000
[./src/speck-xor-threshold-search.cc:1148] Found 1 trails:
[./src/speck-xor-threshold-search.cc:1148] Found 34 trails:
Probability of differential: 2^-57.908817
[./src/speck-xor-threshold-search.cc:1186] 13 R (  802490 10800004) -> (880A0808 88C8084C) : [        34 trails]  2^-57.908817
[./src/speck-xor-threshold-search.cc:1148] Found 34 trails:
[    1] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 780000 380800 3C80000 2084000 2080800 124A0800 12480008 80184008 880A0808 88C8084C  | 2^-72.000000 
[    2] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 780000 380800 380000 1F84000 1E080800 11CA0800 E480008 80184008 880A0808 88C8084C  | 2^-80.000000 
[    3] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 180000 580800 380000 2F84000 6180800 11DA0800 EC80008 80184008 880A0808 88C8084C  | 2^-77.000000 
[    4] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 180000 580800 B80000 2784000 2080800 11CA0800 E480008 80184008 880A0808 88C8084C  | 2^-73.000000 
[    5] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 F80000 B80800 7C80000 2084000 2180800 125A0800 12C80008 80184008 880A0808 88C8084C  | 2^-81.000000 
[    6] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 80000 480800 B80000 2F84000 2180800 15DA0800 2EC80008 80184008 880A0808 88C8084C  | 2^-77.000000 
[    7] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 180000 580800 380000 2F84000 2080800 15CA0800 2E480008 80184008 880A0808 88C8084C  | 2^-74.000000 
[    8] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 380000 780800 380000 3F84000 2080800 1DCA0800 6E480008 80184008 880A0808 88C8084C  | 2^-79.000000 
[    9] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 380000 780800 1B80000 2784000 2180800 11DA0800 EC80008 80184008 880A0808 88C8084C  | 2^-80.000000 
[   10] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 380000 780800 380000 3F84000 E180800 11DA0800 EC80008 80184008 880A0808 88C8084C  | 2^-80.000000 
[   11] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 80000 480800 480000 2084000 2080800 124A0800 12480008 80184008 880A0808 88C8084C  | 2^-58.000000 
[   12] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 F80000 B80800 7C80000 2084000 2080800 124A0800 12480008 80184008 880A0808 88C8084C  | 2^-76.000000 
[   13] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 80000 480800 380000 2784000 6080800 15CA0800 2E480008 80184008 880A0808 88C8084C  | 2^-73.000000 
[   14] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 80000 480800 B80000 2F84000 6080800 11CA0800 E480008 80184008 880A0808 88C8084C  | 2^-74.000000 
[   15] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 80000 480800 B80000 2F84000 6180800 11DA0800 EC80008 80184008 880A0808 88C8084C  | 2^-77.000000 
[   16] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 380000 780800 1C80000 2084000 2080800 124A0800 12480008 80184008 880A0808 88C8084C  | 2^-68.000000 
[   17] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 780000 380800 3C80000 2084000 2180800 125A0800 12C80008 80184008 880A0808 88C8084C  | 2^-77.000000 
[   18] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 180000 580800 380000 2F84000 6080800 11CA0800 E480008 80184008 880A0808 88C8084C  | 2^-74.000000 
[   19] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 180000 580800 C80000 2084000 2080800 124A0800 12480008 80184008 880A0808 88C8084C  | 2^-63.000000 
[   20] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 80000 480800 380000 2784000 2180800 11DA0800 EC80008 80184008 880A0808 88C8084C  | 2^-72.000000 
[   21] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 80000 480800 B80000 2F84000 2080800 15CA0800 2E480008 80184008 880A0808 88C8084C  | 2^-74.000000 
[   22] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 80000 480800 1B80000 3F84000 2080800 1DCA0800 6E480008 80184008 880A0808 88C8084C  | 2^-79.000000 
[   23] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 80000 480800 380000 2784000 6180800 15DA0800 2EC80008 80184008 880A0808 88C8084C  | 2^-76.000000 
[   24] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 80000 480800 1B80000 3F84000 E180800 11DA0800 EC80008 80184008 880A0808 88C8084C  | 2^-80.000000 
[   25] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 80000 480800 380000 2784000 2080800 11CA0800 E480008 80184008 880A0808 88C8084C  | 2^-69.000000 
[   26] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 380000 780800 1B80000 2784000 6080800 15CA0800 2E480008 80184008 880A0808 88C8084C  | 2^-81.000000 
[   27] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 180000 580800 B80000 2784000 2180800 11DA0800 EC80008 80184008 880A0808 88C8084C  | 2^-76.000000 
[   28] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 380000 780800 1B80000 2784000 2080800 11CA0800 E480008 80184008 880A0808 88C8084C  | 2^-77.000000 
[   29] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 80000 480800 480000 2084000 2180800 125A0800 12C80008 80184008 880A0808 88C8084C  | 2^-63.000000 
[   30] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 180000 580800 C80000 2084000 2180800 125A0800 12C80008 80184008 880A0808 88C8084C  | 2^-68.000000 
[   31] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 180000 580800 B80000 2784000 6180800 15DA0800 2EC80008 80184008 880A0808 88C8084C  | 2^-80.000000 
[   32] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 180000 580800 380000 2F84000 2180800 15DA0800 2EC80008 80184008 880A0808 88C8084C  | 2^-77.000000 
[   33] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 380000 780800 1C80000 2084000 2180800 125A0800 12C80008 80184008 880A0808 88C8084C  | 2^-73.000000 
[   34] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 180000 580800 B80000 2784000 6080800 15CA0800 2E480008 80184008 880A0808 88C8084C  | 2^-77.000000 
Probability of differential: 2^-57.908817
[./src/speck-xor-threshold-search.cc:1186] 13 R (  802490 10800004) -> (880A0808 88C8084C) : [        34 trails]  2^-57.908817


 */

/* 
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/speck-xor-threshold-search-tests
#--- [./tests/speck-xor-threshold-search-tests.cc:109] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF
[./tests/speck-xor-threshold-search-tests.cc:113] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 65536 2^16.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 8  SPECK_CLUSTER_MAX_HW 10 SPECK_EPS 2^-25.00
[./src/speck-xor-threshold-search.cc:372]      65535 /      65536 | Add 3C000000 DC000000 -> A0000000 : 0.031250 2^-5.00 | 2^-5.00[./src/speck-xor-threshold-search.cc:1645] p_thres = 0.031250 (2^-5.000000), n = 32, #diffs = 65536 65536
[./src/speck-xor-threshold-search.cc:1683] nrounds = 1, Bn_init = 2^-inf : key 57650344 43CC432D DEB22F5B 3018C27D

Also this finds 10 trails:

#--- [./tests/speck-xor-threshold-search-tests.cc:109] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF
[./tests/speck-xor-threshold-search-tests.cc:113] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 65536 2^16.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 8  SPECK_CLUSTER_MAX_HW 10 SPECK_EPS 2^-40.00
[./src/speck-xor-threshold-search.cc:372]      65535 /      65536 | Add 3C000000 DC000000 -> A0000000 : 0.031250 2^-5.00 | 2^-5.00[./src/speck-xor-threshold-search.cc:1645] p_thres = 0.031250 (2^-5.000000), n = 32, #diffs = 65536 65536
[./src/speck-xor-threshold-search.cc:1683] nrounds = 1, Bn_init = 2^-inf : key 2156BC7F 6D625CF1 63BA8206 79889651


[./src/speck-xor-threshold-search.cc:1178] 13 R (      90 10000000) -> ( 8100288  98026CE) : [         9 trails]  2^-70.983192
[./src/speck-xor-threshold-search.cc:1475] Does not match output diffs: ( 8100208, 980224E) vs. ( 8100288, 98026CE)[./src/speck-xor-threshold-search.cc:1458] Add new trail: 2^-78.000000 | 9
[./src/speck-xor-threshold-search.cc:1148] Found 10 trails:
[    1] 90 10000000 80000000 0 800000 800000 808000 4808000 4800080 20840080 A0808080 A4A08481 24000401 104200C 202000 8012060 8010000 48080300 48000000 8401802 8080802 4A08C812 4C00400A 1C460098 22020048 C0320488 8100288 98026CE  | 2^-76.000000
[    2] 90 10000000 80000000 0 800000 800000 808000 4808000 4800080 20840080 A0808080 A4A08481 24000401 104200C 202000 8012060 8010000 48080300 48000000 8401802 8080802 4A08C812 4C00401A 1C460088 220200C8 C0320488 8100288 98026CE  | 2^-75.000000
[    3] 90 10000000 80000000 0 800000 800000 808000 4808000 4800080 20840080 A0808080 A4A08481 24000401 104200C 202000 8012060 8010000 48080300 48000000 8401802 8080806 4A08C816 4800402A 18460098 2020048 C0320488 8100288 98026CE  | 2^-78.000000
[    4] 90 10000000 80000000 0 800000 800000 808000 4808000 4800080 20840080 A0808080 A4A08481 24000401 104200C 202000 8012060 8010000 48080300 48000000 8401802 8080806 4A08C816 4C00403A 1C460088 220200C8 C0320488 8100288 98026CE  | 2^-78.000000
[    5] 90 10000000 80000000 0 800000 800000 808000 4808000 4800080 20840080 A0808080 A4A08481 24000401 104200C 202000 8012060 8010000 48080300 48000000 8401802 8080802 4A08C812 4800400A 18460098 2020048 C0320488 8100288 98026CE  | 2^-73.000000
[    6] 90 10000000 80000000 0 800000 800000 808000 4808000 4800080 20840080 A0808080 A4A08481 24000401 104200C 202000 8012060 8010000 48080300 48000000 8401802 8081802 4A08D812 4800C00A 18460098 2020048 C0320488 8100288 98026CE  | 2^-74.000000
[    7] 90 10000000 80000000 0 800000 800000 808000 4808000 4800080 20840080 A0808080 A4A08481 24000401 104200C 202000 8012060 8010000 48080300 48000000 8401802 8080806 4A08C816 4800403A 18460088 20200C8 C0320488 8100288 98026CE  | 2^-77.000000
[    8] 90 10000000 80000000 0 800000 800000 808000 4808000 4800080 20840080 A0808080 A4A08481 24000401 104200C 202000 8012060 8010000 48080300 48000000 8401802 8081802 4A08D812 4C00C00A 1C460098 22020048 C0320488 8100288 98026CE  | 2^-77.000000
[    9] 90 10000000 80000000 0 800000 800000 808000 4808000 4800080 20840080 A0808080 A4A08481 24000401 104200C 202000 8012060 8010000 48080300 48000000 8401802 8080806 4A08C816 4C00402A 1C460098 22020048 C0320488 8100288 98026CE  | 2^-79.000000
[   10] 90 10000000 80000000 0 800000 800000 808000 4808000 4800080 20840080 A0808080 A4A08481 24000401 104200C 202000 8012060 8010000 48080300 48000000 8401802 8080802 4A08C812 4800401A 18460088 20200C8 C0320488 8100288 98026CE  | 2^-72.000000
Probability of differential: 2^-70.972094
[./src/speck-xor-threshold-search.cc:1178] 13 R (      90 10000000) -> ( 8100288  98026CE) : [        10 trails]  2^-70.972094




																															/**

/* 

Speck32, 14R, 2^-67

B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-11.000000
B[ 5] = 2^-16.000000
B[ 6] = 2^-21.000000
B[ 7] = 2^-29.000000
B[ 8] = 2^-34.000000
B[ 9] = 2^-38.000000
B[10] = 2^-44.000000
B[11] = 2^-51.000000
B[12] = 2^-58.000000
B[13] = 2^-67.000000
 0: 80808020 ->  4808000 0.031250 (2^-5.000000)
 1: 24000080 ->    40080 0.031250 (2^-5.000000)
 2: 80200080 -> 80000480 0.125000 (2^-3.000000)
 3:   802480 ->   800084 0.062500 (2^-4.000000)
 4: 808080A0 -> 84808480 0.031250 (2^-5.000000)
 5: 24000400 ->    42004 0.015625 (2^-6.000000)
 6:   202000 ->    12020 0.062500 (2^-4.000000)
 7:    10000 ->    80100 0.125000 (2^-3.000000)
 8:    80000 ->   480800 0.250000 (2^-2.000000)
 9:   480000 ->  2084000 0.125000 (2^-3.000000)
10:  2080800 -> 124A0800 0.062500 (2^-4.000000)
11: 12480008 -> 80184008 0.007812 (2^-7.000000)
12: 880A0808 -> 88C8084C 0.007812 (2^-7.000000)
13: 80400244 -> C6004020 0.001953 (2^-9.000000)
p_tot = 0.000000000000000 = 2^-67.000000, Bn = 0.000000 = 2^-67.000000
[./src/speck-xor-threshold-search.cc:1148] Found 1 trails:
[    1] 802490 10800004 80808020 4808000 24000080 40080 80200080 80000480 802480 800084 808080A0 84808480 24000400 42004 202000 12020 10000 80100 80000 480800 480000 2084000 2080800 124A0800 12480008 80184008 880A0808 88C8084C 80400244 C6004020  | 2^-67.000000 
Probability of differential: 2^-67.000000

 */
/* 
	[./tests/speck-xor-threshold-search-tests.cc:113] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 65536 2^16.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 8  SPECK_CLUSTER_MAX_HW 10
	[./src/speck-xor-threshold-search.cc:317]      65535 /      65536 | Add 3C000000 DC000000 -> A0000000 : 0.031250 2^-5.00 | 2^-5.00[./src/speck-xor-threshold-search.cc:1590] p_thres = 0.031250 (2^-5.000000), n = 32, #diffs = 65536 65536
	[./src/speck-xor-threshold-search.cc:1628] nrounds = 1, Bn_init = 2^-inf : key 6D0D501C A083B6C4 7654C11C 18F6E610

	[./src/speck-xor-threshold-search.cc:1437] trail_len 13
	[./src/speck-xor-threshold-search.cc:1493] Add initial trail: 2^-73.000000 | 0
	[./src/speck-xor-threshold-search.cc:1498] Initial trail: 13 R (      90 10000000) -> ( 8100288  98026CE) : [         1 trails]  2^-73.000000
	[./src/speck-xor-threshold-search.cc:1093] Found 1 trails:
[    1] 90 10000000 80000000 0 800000 800000 808000 4808000 4800080 20840080 A0808080 A4A08481 24000401 104200C 202000 8012060 8010000 48080300 48000000 8401802 8080802 4A08C812 4800400A 18460098 2020048 C0320488 8100288 98026CE  | 2^-73.000000
Probability of differential: 2^-73.000000
[./src/speck-xor-threshold-search.cc:1123] 13 R (      90 10000000) -> ( 8100288  98026CE) : [         1 trails]  2^-73.000000
[./src/speck-xor-threshold-search.cc:549]       2558 /   33554432 | Add  A480040 18460098 ->  20207F8 : 0.000031 2^-15.00 | 2^-45.00[./src/speck-xor-threshold-search.cc:1093] Found 1 trails:
[    1] 90 10000000 80000000 0 800000 800000 808000 4808000 4800080 20840080 A0808080 A4A08481 24000401 104200C 202000 8012060 8010000 48080300 48000000 8401802 8080802 4A08C812 4800400A 18460098 2020048 C0320488 8100288 98026CE  | 2^-73.000000
Probability of differential: 2^-73.000000
[./src/speck-xor-threshold-search.cc:1123] 13 R (      90 10000000) -> ( 8100288  98026CE) : [         1 trails]  2^-73.000000
[./src/speck-xor-threshold-search.cc:1420] Does not match output diffs: ( 8100208, 980224E) vs. ( 8100288, 98026CE)[./src/speck-xor-threshold-search.cc:1403] Add new trail: 2^-72.000000 | 1
speck-xor-threshold-search-tests: ./src/speck-xor-threshold-search.cc:1404: void speck_xor_cluster_trails_boost(int, int, gsl_matrix* (*)[2][2], double*, const differential_t*, differential_t*, boost::unordered_map<std::array<differential_t, 13u>, unsigned int, speck_trail_hash, speck_trail_equal_to, std::allocator<std::pair<const std::array<differential_t, 13u>, unsigned int> > >*, differential_t, differential_t, uint32_t, uint32_t, std::multiset<differential_3d_t, struct_comp_diff_3d_p, std::allocator<differential_3d_t> >*, std::set<differential_3d_t, struct_comp_diff_3d_dx_dy_dz, std::allocator<differential_3d_t> >*, std::multiset<differential_3d_t, struct_comp_diff_3d_p, std::allocator<differential_3d_t> >*, std::set<differential_3d_t, struct_comp_diff_3d_dx_dy_dz, std::allocator<differential_3d_t> >*, double): Assertion 
 */
												  /**/

																																																																																																																																																																																																																																																																																			  

/* --- */

  if((n == (nrounds - 1)) && (nrounds > 1)) {		  // Last round

	 assert(n == (nrounds - 1));

	 if((diff[n].dx == output_diff.dx) && (diff[n].dy == output_diff.dy)) {

		printf("[%s:%d] CHECKPOINT! Match!\n", __FILE__, __LINE__);

		uint32_t trail_len = nrounds;
		differential_t trail[NROUNDS] = {{0,0,0,0.0}};

		for(int i = 0; i < nrounds; i++) {
		  trail[i].dx = diff[i].dx;
		  trail[i].dy = diff[i].dy;
		  trail[i].p = diff[i].p;
		}

		speck_trail_hash trail_hash;  // trails hash function

		double p = 1.0;
		std::array<differential_t, NROUNDS> trail_array;
		for(uint32_t i = 0; i < NROUNDS; i++) {
		  trail_array[i].dx = trail[i].dx;
		  trail_array[i].dy = trail[i].dy;
		  trail_array[i].npairs = trail[i].npairs;
		  trail_array[i].p = trail[i].p;
		  p *= trail_array[i].p;
		}

		boost::unordered_map<std::array<differential_t, NROUNDS>, uint32_t, speck_trail_hash, speck_trail_equal_to>::iterator trail_iter 
		  = trails_hash_map->find(trail_array);

		if(trail_iter == trails_hash_map->end()) { // trail is not in the trail table
#if 1									  // DEBUG
		  printf("[%s:%d] Add new trail: 2^%f | %d\n", __FILE__, __LINE__, log2(p), (uint32_t)trails_hash_map->size());
		  assert(0 == 1);
#endif
		  uint32_t trail_hash_val = trail_hash(trail_array);
		  std::pair<std::array<differential_t, NROUNDS>, uint32_t> new_pair (trail_array, trail_hash_val);
		  trails_hash_map->insert(new_pair);
		}
#if 0									  // DEBUG
		for(int i = 0; i < nrounds; i++) {
		  printf("[%s:%d] %8X %8X 2^%f\n", __FILE__, __LINE__, trail[i].dx, trail[i].dy, log2(trail[i].p));
		}
		printf("\n");
#endif
		speck_boost_print_hash_table(*trails_hash_map, trail_len, input_diff.dx, input_diff.dy);
	 } else {
#if 1
		printf("\r[%s:%d] Does not match output diffs: (%8X,%8X) vs. (%8X,%8X)", __FILE__, __LINE__, 
				 diff[n].dx, diff[n].dy, output_diff.dx, output_diff.dy);
		fflush(stdout);
		//				assert(0 == 1);
#endif
	 }
  }


/* ---- */

		  if(n < (nrounds - 1) || ((nrounds == 1))) {

			 speck_xor_cluster_trails_boost(n+1, nrounds, A, B, diff, trail, trails_hash_map, input_diff, output_diff, right_rot_const, left_rot_const, diff_mset_p, diff_set_dx_dy_dz, croads_diff_mset_p, croads_diff_set_dx_dy_dz, eps);

		  } else {					  // n < (nrounds - 1)

#if 0									  // DEBUG
			 printf("[%s:%d] CHECKPOINT! Last round n = %d\n", __FILE__, __LINE__, n);
#endif
			 //			 assert(0 == 1);

			 assert(n == (nrounds - 1));

			 if((diff[n].dx == output_diff.dx) && (diff[n].dy == output_diff.dy)) {

				printf("[%s:%d] CHECKPOINT! Match!\n", __FILE__, __LINE__);

				uint32_t trail_len = nrounds;
				differential_t trail[NROUNDS] = {{0,0,0,0.0}};

				for(int i = 0; i < nrounds; i++) {
				  trail[i].dx = diff[i].dx;
				  trail[i].dy = diff[i].dy;
				  trail[i].p = diff[i].p;
				}

				speck_trail_hash trail_hash;  // trails hash function

				double p = 1.0;
				std::array<differential_t, NROUNDS> trail_array;
				for(uint32_t i = 0; i < NROUNDS; i++) {
				  trail_array[i].dx = trail[i].dx;
				  trail_array[i].dy = trail[i].dy;
				  trail_array[i].npairs = trail[i].npairs;
				  trail_array[i].p = trail[i].p;
				  p *= trail_array[i].p;
				}

				boost::unordered_map<std::array<differential_t, NROUNDS>, uint32_t, speck_trail_hash, speck_trail_equal_to>::iterator trail_iter 
				  = trails_hash_map->find(trail_array);

				if(trail_iter == trails_hash_map->end()) { // trail is not in the trail table
#if 1									  // DEBUG
				  printf("[%s:%d] Add new trail: 2^%f | %d\n", __FILE__, __LINE__, log2(p), (uint32_t)trails_hash_map->size());
				  assert(0 == 1);
#endif
				  uint32_t trail_hash_val = trail_hash(trail_array);
				  std::pair<std::array<differential_t, NROUNDS>, uint32_t> new_pair (trail_array, trail_hash_val);
				  trails_hash_map->insert(new_pair);
				}
#if 0									  // DEBUG
				for(int i = 0; i < nrounds; i++) {
				  printf("[%s:%d] %8X %8X 2^%f\n", __FILE__, __LINE__, trail[i].dx, trail[i].dy, log2(trail[i].p));
				}
				printf("\n");
#endif
				speck_boost_print_hash_table(*trails_hash_map, trail_len, input_diff.dx, input_diff.dy);
			 } else {
#if 1
				printf("\r[%s:%d] Does not match output diffs: (%8X,%8X) vs. (%8X,%8X)", __FILE__, __LINE__, 
						 diff[n].dx, diff[n].dy, output_diff.dx, output_diff.dy);
				fflush(stdout);
				//				assert(0 == 1);
#endif
			 }
		  }
		  find_iter++;
		}	// while
	 }	else {
		assert(0 == 1);
	 }	// if
  }


/* --- */
	 // p_i >= p_min = Bn / p1 * p2 ... * p{i-1} * B{n-i} 
	 double p_min = 1.0;
	 for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
		p_min *= diff[i].p;
	 }
	 if(n != (nrounds - 1)) {	  // not last round
		p_min = p_min * eps * B[nrounds - 1 - (n + 1)]; 
	 } else {
		p_min = (B[nrounds - 1]) * eps;
	 }
	 //	 p_min = (B[nrounds - 1] * eps) / p_min;
	 p_min = (B[nrounds - 1]) / p_min;

	 if(p_min > 1.0) {
		double p_tmp = 1.0;
		for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
		  p_tmp *= diff[i].p;
		  printf("[%d] 2^%f 2^%f\n", i, log2(diff[i].p), log2(p_tmp));
		}
		p_tmp = p_tmp * 1.0 * B[nrounds - 1 - (n + 1)];
		printf("[%d] B[%d] 2^%f 2^%f\n", n, nrounds - 1 - (n + 1), log2(B[nrounds - 1 - (n + 1)]), log2(p_tmp));
		p_tmp = (B[nrounds - 1] * eps) / p_tmp;
		printf("[%d] B[%d] 2^%f 2^%f\n", n+1, nrounds - 1, log2(B[nrounds - 1]), log2(p_tmp));
		//		p_min = 0.0;				  // <------ !!!
	 }
	 assert(p_min <= 1.0);

	 //	 p_min = *Bn / p_min;
	 //	 if(!(p_min <= 1.0)) {
	 //		printf("B[%d] = %f\n", nrounds - 1 - (n + 1), B[nrounds - 1 - (n + 1)]);
	 //		assert(n != (nrounds - 1));
	 //	 }
#if 1									  // DEBUG
	 if(p_min > 1.0) {
		//		double p_min_old = (B[nrounds - 1]) / p_min;
		printf("[%s:%d] CHECKPOINT! n= %d p_min 2^%f %f\n", __FILE__, __LINE__, n, log2(p_min), p_min);
	 }
#endif
	 assert(p_min <= 1.0);

/* --- */

	 if(n != (nrounds - 1)) {	  // not last round
		p_min = p_min * 1.0 * B[nrounds - 1 - (n + 1)];
		p_min = (B[nrounds - 1] * eps) / p_min;
		if(p_min > 1.0) {
		  double p_tmp = 1.0;
		  for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
			 p_tmp *= diff[i].p;
			 printf("[%d] 2^%f 2^%f\n", i, log2(diff[i].p), log2(p_tmp));
		  }
		  p_tmp = p_tmp * 1.0 * B[nrounds - 1 - (n + 1)];
		  printf("[%d] 2^%f 2^%f\n", n, log2(B[nrounds - 1 - (n + 1)]), log2(p_tmp));
		  p_tmp = (B[nrounds - 1] * eps) / p_tmp;
		  printf("[%d] 2^%f 2^%f\n", n, log2(B[nrounds - 1]), log2(p_tmp));
		}
	 } else {
		p_min *= eps;
	 }

/* --- */

  //  double B[NROUNDS] = {0.0}; 
  //  differential_t trail[NROUNDS] = {{0, 0, 0, 0.0}};
  assert(SPECK_TRAIL_LEN > NROUNDS);
  differential_t full_trail[SPECK_TRAIL_LEN] = {0, 0, 0, 0.0};
  full_trail[0].dx = *dx_input
  full_trail[0].dy = *dy_input
  full_trail[0].p = trail[i].p;
  for(uint32_t i = 0; i < NROUNDS; i++) {
	 full_trail[i+1].dx = trail[i].dx;
	 full_trail[i+1].dy = trail[i].dy;
	 full_trail[i+1].p = trail[i].p;
  }

/* ---- */

void speck_xdp_add_pddt_gen_random(uint32_t hw_thres, double p_thres, const uint64_t max_size,
											  std::set<differential_3d_t, struct_comp_diff_3d_dx_dy_dz>* diff_set_dx_dy_dz,
											  std::multiset<differential_3d_t, struct_comp_diff_3d_p>* diff_mset_p)
{
  //  uint32_t n = WORD_SIZE;
  //  double p_thres = P_THRES;
  double p = 0.0;

  // init A
  gsl_matrix* A[2][2][2];
  xdp_add_alloc_matrices(A);
  xdp_add_sf(A);
  xdp_add_normalize_matrices(A);

  // init C
  gsl_vector* C = gsl_vector_calloc(XDP_ADD_MSIZE);
  gsl_vector_set(C, XDP_ADD_ISTATE, 1.0);


  uint64_t cnt = 0;

  while(cnt < max_size) {

	 uint32_t da = gen_sparse(hw_thres, WORD_SIZE);
	 uint32_t db = gen_sparse(hw_thres, WORD_SIZE);
	 uint32_t dc = gen_sparse(hw_thres, WORD_SIZE);
	 double p_the = xdp_add(A, da, db, dc);

#if((WORD_SIZE == 16) || (WORD_SIZE == 32))
	 p_the = max_xdp_add_lm(da, db, &dc);
#else
	 p_the = max_xdp_add(A, da, db, &dc);
#endif

  if((p_the >= p_thres) && (hw32(dc) <= hw_thres)) {

		differential_3d_t new_diff;
		new_diff.dx = da;
		new_diff.dy = db;
		new_diff.dz = dc;
		new_diff.p = p;
		diff_set_dx_dy_dz->insert(new_diff);
		diff_mset_p->insert(new_diff);
#if 1									  // DEBUG
		printf("\r[%s:%d] [%10lld / %10lld] | Add %8X %8X -> %8X : %f 2^%4.2f | 2^%4.2f", __FILE__, __LINE__, cnt, max_size, da, db, dc, p, log2(p), log2(p_thres));
		fflush(stdout);
#endif

		cnt++;
	 }
  }

  //  speck_xdp_add_pddt_i(k, n, p_thres, A, C, &da, &db, &dc, &p, diff_set_dx_dy_dz, diff_mset_p, max_size);
#if 0									  // DEBUG
  printf("[%s:%d] p_thres = %f (2^%f), n = %d, #diffs = %d\n", __FILE__, __LINE__, 
			p_thres, log2(p_thres), WORD_SIZE, diff_mset_p->size());
#endif
  assert(diff_set_dx_dy_dz->size() == diff_mset_p->size());

  gsl_vector_free(C);
  xdp_add_free_matrices(A);
}


/* --- */
	 if(b_found_in_hways) {
		if(!(hway_iter->p >= SPECK_P_THRES)) {
		  std::set<differential_3d_t, struct_comp_diff_3d_dx_dy_dz>::iterator set_iter = diff_set_dx_dy_dz->begin();
		  for(set_iter = diff_set_dx_dy_dz->begin(); set_iter != diff_set_dx_dy_dz->end(); set_iter++) {
			 if(!(set_iter->p >= SPECK_P_THRES)) {
				printf("[%s:%d] %8X %8X %8X 2^%f \n", __FILE__, __LINE__, set_iter->dx, set_iter->dy, set_iter->dz, log2(set_iter->p));
			 }
			 //			 assert(set_iter->p >= SPECK_P_THRES);
		  }
		}
		assert(hway_iter->p >= SPECK_P_THRES);
	 }


/* --- */

/* 

[./tests/speck-xor-threshold-search-tests.cc:67] Final bounds:
B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-7.000000
B[ 4] = 2^-13.000000
B[ 5] = 2^-21.000000
B[ 6] = 2^-27.000000
B[ 7] = 2^-32.000000
B[ 8] = 2^-37.000000
B[ 9] = 2^-43.000000
B[10] = 2^-53.000000
B[11] = 2^-64.000000
B[12] = 2^-73.000000
  [./tests/speck-xor-threshold-search-tests.cc:74] Final trail:
  0:       90 -> 10000000 1.000000
  1: 80000000 ->        0 0.500000 (2^-1.000000)
  2:   800000 ->   800000 0.500000 (2^-1.000000)
  3:   808000 ->  4808000 0.250000 (2^-2.000000)
  4:  4800080 -> 20840080 0.062500 (2^-4.000000)
  5: A0808080 -> A4A08481 0.031250 (2^-5.000000)
  6: 24000401 ->  104200C 0.003906 (2^-8.000000)
  7:   202000 ->  8012060 0.015625 (2^-6.000000)
  8:  8010000 -> 48080300 0.031250 (2^-5.000000)
  9: 48000000 ->  8401802 0.031250 (2^-5.000000)
  10:  8080802 -> 4A08C812 0.015625 (2^-6.000000)
  11: 4800400A -> 18460098 0.000977 (2^-10.000000)
  12:  2020048 -> C0320488 0.000488 (2^-11.000000)
  13:  8100288 ->  98026CE 0.001953 (2^-9.000000)
p_tot = 0.000000000000000 = 2^-73.000000
  [./tests/speck-xor-threshold-search-tests.cc:118] WORD_SIZE 32 NROUNDS 13 SPECK_P_THRES 0.031250 2^-5.000000 SPECK_MAX_DIFF_CNT 16384 2^14.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 8

 */

/* --- */

/*
B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-10.000000
B[ 5] = 2^-16.000000
B[ 6] = 2^-23.000000
B[ 7] = 2^-29.000000
  [./tests/speck-xor-threshold-search-tests.cc:74] Final trail:
  0:       14 ->      800 1.000000
  1:     2000 ->        0 0.250000 (2^-2.000000)
  2:       40 ->       40 0.500000 (2^-1.000000)
  3:     8040 ->     8140 0.500000 (2^-1.000000)
  4:       40 ->      542 0.250000 (2^-2.000000)
  5:     8542 ->     904A 0.062500 (2^-4.000000)
  6:     1540 ->     546A 0.015625 (2^-6.000000)
  7:     5400 ->      5A9 0.007812 (2^-7.000000)
  8:      401 ->     12A5 0.015625 (2^-6.000000)
p_tot = 0.000000001862645 = 2^-29.000000
  [./tests/speck-xor-threshold-search-tests.cc:118] WORD_SIZE 16 NROUNDS 8 SPECK_P_THRES 0.030000 2^-5.058894 SPECK_MAX_DIFF_CNT 65536 2^16.00 SPECK_BACK_TO_HWAY 0 SPECK_GREEDY_SEARCH 0 SPECK_MAX_HW 7

real    17m3.505s
user    16m43.519s
sys     0m1.492s
*/

/* --- */

#if 0									  // DEBUG
		  if((dx == 0x8000) && (dy == 0)) {
			 printf("\n[%s:%d] CHECKPOINT! (%X %X -> %X %f) (%X %X)\n", __FILE__, __LINE__, dx, dy, dz, pn, dx_init_in, dy_init_in);
			 sleep(3);
		  }
#endif
#if 0									  // DEBUG
	 //	 if((dx == 0x8000) && (dy == 0)) {
	 if((dx == 0x8000)) {
		printf("\n[%s:%d] CHECKPOINT! %X (%X %X)\n", __FILE__, __LINE__, diff[n - 1].dx, dx, dy);
		sleep(10);
	 }
	 if((dx == 0x40) && (dy == 0)) {
		printf("\n[%s:%d] CHECKPOINT! %X (%X %X)\n", __FILE__, __LINE__, diff[n - 1].dx, dx, dy);
		sleep(10);
	 }
#endif
#if 1									  // DEBUG
	 if(dx == 0x8000) {
	 //	 if((dx == 0x8000) && (dy == 0)) {
		printf("\n[%s:%d] CHECKPOINT! %X (%X %X)\n", __FILE__, __LINE__, diff[n - 1].dx, dx, dy);
		sleep(10);
	 }
#endif


/* --- */
#if 1									  // DEBUG
		  if((da == 0x8000) && (db == 0)) {
			 printf("\n[%s:%d] (%X %X -> %X) %f | (%X %X -> %X) %f\n", __FILE__, __LINE__, da, db, *dc, p_the, dx_next, dy_next, dz_next_max, p_max);
			 sleep(10);
		  }
#endif

/* --- */

#if(WORD_SIZE < 32)
  double p_rand = 1.0 / (double)(1ULL << (2 * WORD_SIZE));
#else
  double p_rand = 1.0 / (double)(1ULL << ((2 * WORD_SIZE) - 1));
#endif
  printf("[%s:%d] p_rand 2^%f\n", __FILE__, __LINE__, log2(p_rand));


/* --- */



	 //	 if(B[nrounds - 1] > p_rand) {
	 //		for(uint32_t i = 0; i < nrounds; i++) {
	 //		  best_trail[i].dx = trail[i].dx;
	 //		  best_trail[i].dy = trail[i].dy;
	 //		  best_trail[i].p = trail[i].p;
	 //		}
	 //	 }
	 //  } while((nrounds < NROUNDS) && ((B[nrounds - 1] > p_rand) || (nrounds == 0)));
//  num_rounds = nrounds - 1;
//  if(nrounds == NROUNDS) {
//	 num_rounds = nrounds;
//  }


/* --- */

		// !!! CHECK
		//		if((*da == 0x40) && (*db == 0)) {
		if((*da == 0x8000) && (*db == 0)) {
		  printf("\n[%s:%d] Found in pDDT (%X %X -> %X) %f | %X\n", __FILE__, __LINE__, *da, *db, *dc, p_the, LROT(*da, SPECK_RIGHT_ROT_CONST_16BITS));
		  sleep(10);
		}



/* --- */

/* 
B[ 0] = 2^0.000000
B[ 1] = 2^-1.000000
B[ 2] = 2^-3.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-10.000000
B[ 5] = 2^-16.000000
B[ 6] = 2^-23.000000
B[ 7] = 2^-29.000000
[./tests/speck-xor-threshold-search-tests.cc:74] Final trail:
0:       14 ->      800 1.000000
1:     2000 ->        0 0.250000 (2^-2.000000)
2:       40 ->       40 0.500000 (2^-1.000000)
3:     8040 ->     8140 0.500000 (2^-1.000000)
4:       40 ->      542 0.250000 (2^-2.000000)
5:     8542 ->     904A 0.062500 (2^-4.000000)
6:     1540 ->     546A 0.015625 (2^-6.000000)
7:     5400 ->      5A9 0.007812 (2^-7.000000)
8:      401 ->     12A5 0.015625 (2^-6.000000)
p_tot = 0.000000001862645 = 2^-29.000000
[./tests/speck-xor-threshold-search-tests.cc:118] WORD_SIZE 16 NROUNDS 8 SPECK_P_THRES 0.015000 2^-6.058894 SPECK_MAX_DIFF_CNT 16384 2^14.00 SPECK_BACK_TO_HWAY 0
 SPECK_MAX_HW 7

 */

/* --- */

#if 1																	// DEBUG
		  if(b_found_in_croads) { 
			 uint32_t dxx_rrot = RROT(dz, right_rot_const); 		                     // x_{i+1}
			 bool b_is_hway = speck_xdp_add_is_dz_in_set_dx_dy_dz(dxx_rrot, dyy, *diff_set_dx_dy_dz);
			 printf("[%s:%d] CHECK is HW: dxx_rrot dyy %8X %8X\n\n", __FILE__, __LINE__, dxx_rrot, dyy);
			 assert(b_is_hway);
		  }
#endif


/* ---- */

/**
 * Compute a pDDT for SPECK.
 * \sa xdp_add_pddt_i
 */
void speck_xdp_add_pddt_i(const uint32_t k, const uint32_t n, const double p_thres, 
								  gsl_matrix* A[2][2][2], gsl_vector* C, 
								  uint32_t* da, uint32_t* db, uint32_t* dc, double* p, 
								  std::set<differential_3d_t, struct_comp_diff_3d_dx_dy_dz>* diff_set_dx_dy_dz,
								  std::multiset<differential_3d_t, struct_comp_diff_3d_p>* diff_mset_p,
								  uint64_t max_size)
{
  if(k == n) {
	 double p_the = xdp_add(A, *da, *db, *dc);
#if 0									  // DEBUG
	 printf("[%s:%d] XDP_ADD_THE[(%8X,%8X)->%8X] = %6.5f\n", 
			  __FILE__, __LINE__, *da, *db, *dc, p_the);
	 printf("[%s:%d] XDP_ADD_REC[(%8X,%8X)->%8X] = %6.5f\n", 
			  __FILE__, __LINE__, *da, *db, *dc, *p);
#endif
	 if(p_thres > 0.0) {
		assert(*p > 0.0);
	 }
	 assert(*p == p_the);
	 assert(*p >= p_thres);
	 uint64_t len = diff_set_dx_dy_dz->size();
	 bool b_back_to_hway = true;
#if SPECK_BACK_TO_HWAY
	 //bool xdp_add_is_dz_in_set_dx_dy_dz(uint32_t dx, uint32_t dy,
	 //											  std::set<differential_3d_t, struct_comp_diff_3d_dx_dy_dz> diff_set_dx_dy_dz)
#endif
	 if((*p > p_thres) && (len < max_size) && (b_back_to_hway)) {
#if 1									  // store the difference
		differential_3d_t i_diff;
		i_diff.dx = *da;
		i_diff.dy = *db;
		i_diff.dz = *dc;
		i_diff.p = *p;
		diff_set_dx_dy_dz->insert(i_diff);
		diff_mset_p->insert(i_diff);
#endif  // #if 0									  // do not store the difference
#if 1									  // DEBUG
		printf("\r[%s:%d] %10lld / %10lld | Add %8X %8X -> %8X : %f 2^%4.2f | 2^%4.2f", __FILE__, __LINE__, len, max_size, *da, *db, *dc, *p, log2(*p), log2(p_thres));
		fflush(stdout);
#endif
	 }
	 return;
  }

  if(diff_set_dx_dy_dz->size() == max_size)
	 return;

  // init L
  gsl_vector* L = gsl_vector_calloc(XDP_ADD_MSIZE);
  gsl_vector_set_all(L, 1.0);

  for(uint32_t x = 0; x < 2; x++) {
	 for(uint32_t y = 0; y < 2; y++) {
		for(uint32_t z = 0; z < 2; z++) {

		  // temp
		  gsl_vector* R = gsl_vector_calloc(XDP_ADD_MSIZE);
		  double new_p = 0.0;

		  // L A C
		  gsl_blas_dgemv(CblasNoTrans, 1.0, A[x][y][z], C, 0.0, R);
		  gsl_blas_ddot(L, R, &new_p);

		  //			 if(new_p != 0.0) {
		  if(new_p >= p_thres) {
			 uint32_t new_da = *da | (x << k);
			 uint32_t new_db = *db | (y << k);
			 uint32_t new_dc = *dc | (z << k);
			 speck_xdp_add_pddt_i(k+1, n, p_thres, A, R, &new_da, &new_db, &new_dc, &new_p, diff_set_dx_dy_dz, diff_mset_p, max_size);
		  }
		  gsl_vector_free(R);
		}
	 }
  }
  gsl_vector_free(L);
}

/** 
 * For Speck: compute a partial DDT for \f$\mathrm{xdp}^{+}\f$: wrapper function
 * of \ref xdp_add_pddt_i.
 *
 * \see xdp_add_pddt speck_xdp_add_pddt_i
 */
void speck_xdp_add_pddt(uint32_t n, double p_thres, const uint64_t max_size,
								std::set<differential_3d_t, struct_comp_diff_3d_dx_dy_dz>* diff_set_dx_dy_dz,
								std::multiset<differential_3d_t, struct_comp_diff_3d_p>* diff_mset_p)
{
  //  uint32_t n = WORD_SIZE;
  //  double p_thres = P_THRES;
  uint32_t k = 0;
  double p = 0.0;

  // init A
  gsl_matrix* A[2][2][2];
  xdp_add_alloc_matrices(A);
  xdp_add_sf(A);
  xdp_add_normalize_matrices(A);

  // init C
  gsl_vector* C = gsl_vector_calloc(XDP_ADD_MSIZE);
  gsl_vector_set(C, XDP_ADD_ISTATE, 1.0);

  uint32_t da = 0;
  uint32_t db = 0;
  uint32_t dc = 0;

  speck_xdp_add_pddt_i(k, n, p_thres, A, C, &da, &db, &dc, &p, diff_set_dx_dy_dz, diff_mset_p, max_size);
#if 0									  // DEBUG
  printf("[%s:%d] p_thres = %f (2^%f), n = %d, #diffs = %d\n", __FILE__, __LINE__, 
			p_thres, log2(p_thres), WORD_SIZE, diff_mset_p->size());
#endif
  assert(diff_set_dx_dy_dz->size() == diff_mset_p->size());

  gsl_vector_free(C);
  xdp_add_free_matrices(A);
}


/* ---- */

/* 
#if SPECK_BACK_TO_HWAY
	 assert(0==1);
	 //bool xdp_add_is_dz_in_set_dx_dy_dz(uint32_t dx, uint32_t dy,
	 //											  std::set<differential_3d_t, struct_comp_diff_3d_dx_dy_dz> diff_set_dx_dy_dz)

#endif

 */

/* 
Simon48 differential search:

Last Update: Wed Nov  6 04:45:08 2013
[./src/simon-xor-threshold-search.cc:1024] Parameters:
 WORD_SIZE 24
 NROUNDS 15
 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894
 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00
 SIMON_EPS 0.000031 2^-15.000000
 XDP_ROT_AND_MAX_HW 4
 TRAIL_MAX_HW 32
 SIMON_BACK_TO_HWAY 1
 XDP_ROT_PDDT_GEN_RANDOM 0
 XDP_ROT_AND_P_LOW_THRES 0.000000
[./src/simon-xor-threshold-search.cc:1033] Found 38 differentials:
[    1] H[9174BC71] 15R : (  800000   200002) -> (  8A0088   222222) 2^-63.000000
[    2] H[70AE3BC5] 15R : (  800000   200002) -> (   82000   222020) 2^-59.678072
[    3] H[995E3C71] 15R : (  800000   200002) -> (  A80088   A22222) 2^-62.000000
[    4] H[70BE19F1] 15R : (  800000   200002) -> (   80008    A2002) 2^-62.000000
[    5] H[705D6BC5] 15R : (  800000   200002) -> (   A0000   2AA820) 2^-63.000000
[    6] H[7547BBC5] 15R : (  800000   200002) -> (   E2000   3A2020) 2^-63.000000
[    7] H[70361E7B] 15R : (  800000   200002) -> (   8000A   82200A) 2^-63.000000
[    8] H[90F63C71] 15R : (  800000   200002) -> (  880088   222222) 2^-60.678072
[    9] H[7FFCBBC5] 15R : (  800000   200002) -> (  2A2000   AA2020) 2^-63.000000
[   10] H[90AEBBC7] 15R : (  800000   200002) -> (  882000    2A022) 2^-60.192645
[   11] H[754DB83F] 15R : (  800000   200002) -> (   C2002   B2A028) 2^-61.415037
[   12] H[78F6B83F] 15R : (  800000   200002) -> (  282002   22A028) 2^-63.000000
[   13] H[70AEBBC5] 15R : (  800000   200002) -> (   82000   22A020) 2^-57.093109
[   14] H[7229E945] 15R : (  800000   200002) -> (  200200   802820) 2^-63.000000
[   15] H[403EBBC6] 15R : (  800000   200002) -> (  C82000    2A023) 2^-63.000000
[   16] H[702E383F] 15R : (  800000   200002) -> (   82002   A22028) 2^-63.000000
[   17] H[78763BC5] 15R : (  800000   200002) -> (  282000   A22020) 2^-62.000000
[   18] H[ AA9E945] 15R : (  800000   200002) -> (     200    82820) 2^-63.000000
[   19] H[70D6BBC5] 15R : (  800000   200002) -> (   82000   2AA020) 2^-62.000000
[   20] H[781E19F1] 15R : (  800000   200002) -> (  280008   822002) 2^-63.000000
[   21] H[ AD1E945] 15R : (  800000   200002) -> (     200     2820) 2^-60.415037
[   22] H[702CBBC5] 15R : (  800000   200002) -> (   A2000   222020) 2^-63.000000
[   23] H[787E3BC5] 15R : (  800000   200002) -> (  282000   AA2020) 2^-63.000000
[   24] H[75CD3BC5] 15R : (  800000   200002) -> (   C2000   322020) 2^-62.000000
[   25] H[702EB83F] 15R : (  800000   200002) -> (   82002   A2A028) 2^-60.192645
[   26] H[7876BBC5] 15R : (  800000   200002) -> (  282000   A2A020) 2^-59.093109
[   27] H[90F73C71] 15R : (  800000   200002) -> (  880088   232222) 2^-63.000000
[   28] H[7054BBC5] 15R : (  800000   200002) -> (   A2000   2A2020) 2^-60.192645
[   29] H[ A2F6BC5] 15R : (  800000   200002) -> (   20000    AA820) 2^-63.000000
[   30] H[9976BBC7] 15R : (  800000   200002) -> (  A82000   82A022) 2^-63.000000
[   31] H[9D047F61] 15R : (  200020    80088) -> (     200    80888) 2^-48.000000
[   32] H[70B619F1] 15R : (  800000   200002) -> (   80008    22002) 2^-60.000000
[   33] H[ AA2B945] 15R : (  800000   200002) -> (    4200    12820) 2^-63.000000
[   34] H[702D3BC5] 15R : (  800000   200002) -> (   A2000   22A020) 2^-62.000000
[   35] H[75CDBBC5] 15R : (  800000   200002) -> (   C2000   32A020) 2^-59.093109
[   36] H[97E53C71] 15R : (  800000   200002) -> (  8C0088   322222) 2^-63.000000
[   37] H[90AE3BC7] 15R : (  800000   200002) -> (  882000    22022) 2^-63.000000
[   38] H[7D65BBC5] 15R : (  800000   200002) -> (  2C2000   B2A020) 2^-61.415037


 */

/* --- */

/*
[./src/simon-xor-threshold-search.cc:1835] Incoming trail
200020 800080 880008 2 2 880000 880000 200000 200000 80000 80000 0 0 80000 80000 200000 200000 880000 880000 2 2 880008 880008 200020 200020 80088 80088 200 200 80888  | 2^-62.000000
200020 800080 880008 2 2 880000 880000 200000 200000 80000 80000 0 0 80000 80000 200000 200000 880000 880000 2 2 880008 880008 200020 200020 80088 80088 200 200 80888  | 2^-48.000000


[./src/simon-xor-threshold-search.cc:1223] CHECKPOINT! Current trails table
[./src/simon-xor-threshold-search.cc:928] Found 1 trails:
200020 800080 880008 2 2 880000 880000 200000 200000 80000 80000 0 0 80000 80000 200000 200000 880000 880000 2 2 880008 880008 200020 200020 80088 80088 200 200 80888  | 2^-48.000000
Probability of differential: 2^-48.000000
[./src/simon-xor-threshold-search.cc:965] 15 R (  200020    80088) -> (     200    80888) : [         1 trails]  2^-48.000000
*/

/* --- */

#if 0									  // DEBUG
  printf("\n[%s:%d] Existing trails\n", __FILE__, __LINE__);
  simon_boost_print_hash_table(*trails_hash_map, trail_len);
  printf("\n");
#endif
#if 0									  // DEBUG
  uint32_t diff_len = 2;
  printf("\n[%s:%d] Existing differentials\n", __FILE__, __LINE__);
  simon_boost_print_hash_table(*diffs_hash_map, diff_len);
  printf("\n");
#endif

/* --- */

		//		std::cout << std::string(50, '\n'); // clear screen

/* --- */

/* 
		printf("\n[%s:%d] Incoming diff\n", __FILE__, __LINE__);
		for(int i = 0; i <= n; i++) {
		  p_tmp *= diff[i].p;
		  printf("%2d(%X %X) ", i, diff[i].dx, diff[i].dy);
		}
		printf(" | 2^%f\n", log2(p_tmp));

		p_tmp = 1.0;

 */
/* 

Simon32/48/64 : current results on differential search: 6 Nov 2013, 12:50 am

Simon32, 13 R

#--- [./tests/simon-xor-threshold-search-tests.cc:2172] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:2224] WORD_SIZE 16 NROUNDS 13 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 4 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:2705] trail_len 13
[./src/simon-xor-threshold-search.cc:2761] Add initial trail: 2^-36.000000 | 0
[./src/simon-xor-threshold-search.cc:2765] Initial trail: 13 R (       0       40) -> (       0     4000) : [         1 trails]  2^-36.000000
[./src/simon-xor-threshold-search.cc:919] 13 R (       0       40) -> (       0     4000) : [     14243 trails]  2^-30.637183
[./src/simon-xor-threshold-search.cc:919] 13 R (       0       40) -> (       0     4000) : [     18190 trails]  2^-30.452932

Simon48, 15 R

max HW 4

#--- [./tests/simon-xor-threshold-search-tests.cc:2172] Tests, WORD_SIZE  = 24, MASK =   FFFFFF
[./tests/simon-xor-threshold-search-tests.cc:2224] WORD_SIZE 24 NROUNDS 15 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 4 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:2705] trail_len 15
[./src/simon-xor-threshold-search.cc:2761] Add initial trail: 2^-48.000000 | 0
[./src/simon-xor-threshold-search.cc:2765] Initial trail: 15 R (  200020    80088) -> (     200    80888) : [         1 trails]  2^-48.000000
[./src/simon-xor-threshold-search.cc:919] 15 R (  200020    80088) -> (     200    80888) : [      6570 trails]  2^-43.654803
[./src/simon-xor-threshold-search.cc:919] 15 R (  200020    80088) -> (     200    80888) : [     18504 trails]  2^-43.106484

max HW 5

#--- [./tests/simon-xor-threshold-search-tests.cc:2284] Tests, WORD_SIZE  = 24, MASK =   FFFFFF
[./tests/simon-xor-threshold-search-tests.cc:2336] WORD_SIZE 24 NROUNDS 15 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 4 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:2705] trail_len 15
[./src/simon-xor-threshold-search.cc:2761] Add initial trail: 2^-48.000000 | 0
[./src/simon-xor-threshold-search.cc:2765] Initial trail: 15 R (  200000   888080) -> (   20200    88080) : [         1 trails]  2^-48.000000
[./src/simon-xor-threshold-search.cc:919] 15 R (  200000   888080) -> (   20200    88080) : [      2018 trails]  2^-45.127483
[./src/simon-xor-threshold-search.cc:919] 15 R (  200000   888080) -> (   20200    88080) : [      4768 trails]  2^-44.703618


Simon64, 20 R

#--- [./tests/simon-xor-threshold-search-tests.cc:2172] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF
[./tests/simon-xor-threshold-search-tests.cc:2224] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 4 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:2705] trail_len 20
[./src/simon-xor-threshold-search.cc:2761] Add initial trail: 2^-70.000369 | 0
[./src/simon-xor-threshold-search.cc:2765] Initial trail: 20 R ( 4000000 11000000) -> ( 1000000  4000000) : [         1 trails]  2^-70.000369
[./src/simon-xor-threshold-search.cc:919] 20 R ( 4000000 11000000) -> ( 1000000  4000000) : [      8101 trails]  2^-61.619953
[./src/simon-xor-threshold-search.cc:919] 20 R ( 4000000 11000000) -> ( 1000000  4000000) : [     26649 trails]  2^-60.401366

Simon64, 21 R

#--- [./tests/simon-xor-threshold-search-tests.cc:2286] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF
[./tests/simon-xor-threshold-search-tests.cc:2338] WORD_SIZE 32 NROUNDS 21 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 4 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:2762] trail_len 21
[./src/simon-xor-threshold-search.cc:2818] Add initial trail: 2^-72.000369 | 0
[./src/simon-xor-threshold-search.cc:2822] Initial trail: 21 R ( 4000000 11000000) -> ( 4000000 11000000) : [         1 trails]  2^-72.000369
[./src/simon-xor-threshold-search.cc:964] 21 R ( 4000000 11000000) -> ( 4000000 11000000) : [        66 trails]  2^-67.834800
[./src/simon-xor-threshold-search.cc:964] 21 R ( 4000000 11000000) -> ( 4000000 11000000) : [      5557 trails]  2^-63.858288
[./src/simon-xor-threshold-search.cc:964] 21 R ( 4000000 11000000) -> ( 4000000 11000000) : [      5561 trails]  2^-63.856215

 */

/* --- */
  { 0x400, 0x1800, 0, 0.250000}, //(2^-2.000000)
  { 0x100,    0x0, 0, 0.250000}, //(2^-2.000000)
  {   0x0,  0x100, 0, 1.000000}, //(2^0.000000)
  { 0x100,  0x400, 0, 0.250000}, //(2^-2.000000)
  { 0x400, 0x1100, 0, 0.250000}, //(2^-2.000000)
  {0x1100, 0x4200, 0, 0.062500}, //(2^-4.000000)
  {0x4200, 0x1D01, 0, 0.062500}, //(2^-4.000000)
  {0x1D01,  0x500, 0, 0.003906}, //(2^-8.000000)
  { 0x500,  0x100, 0, 0.125000}, //(2^-3.000000)
  { 0x100,  0x100, 0, 0.250000}, //(2^-2.000000)
  { 0x100,  0x500, 0, 0.250000}, //(2^-2.000000)
  { 0x500, 0x1500, 0, 0.125000}, //(2^-3.000000)
  {0, 0, 0, 0.0},


/* ---- */

#if !XDP_ROT_PDDT_GEN_RANDOM 
  uint32_t k = 0;
  uint32_t n = WORD_SIZE;
  uint32_t delta = 0;
  uint32_t dc = 0;
  xdp_rot_and_pddt_i(k, n, s, t, delta, dc, hways_diff_set_dx_dy, hways_diff_mset_p, &cnt_diff, max_cnt, p_thres);
#else	 // XDP_ROT_PDDT_GEN_RANDOM 


  //	 bool b_low_hw = (hw32(da) <= XDP_ROT_AND_MAX_HW);
  //	 assert(b_low_hw);
  uint32_t N = (1ULL << 4);
  uint32_t max_hw = 4;
  uint32_t i = 0;
  //  for(uint32_t i = 0; i < N; i++) {
  while(i != N) {

	 uint32_t da = gen_sparse(max_hw, WORD_SIZE);	 
	 uint32_t dc = gen_sparse(max_hw, WORD_SIZE);

	 double p = xdp_rot_and(da, dc, s, t);

	 if((p > XDP_ROT_AND_P_THRES) && (p != 0.0) && (cnt_diff < max_cnt)) {

		differential_t diff;
		diff.dx = da;
		diff.dy = dc;
		diff.p = p;

		hways_diff_mset_p->insert(diff);
		hways_diff_set_dx_dy->insert(diff);
		(cnt_diff)++;
#if 1									  // DEBUG
		printf("%10lld / %10lld\r", cnt_diff, max_cnt);
		fflush(stdout);
#endif
		assert(cnt_diff == hways_diff_set_dx_dy->size());
		i++;
	 }
  }
#endif

/* --- */

/* 
Simon32 12R 2^-34 with thershold search!!!! <-------------

Parameters:
------------------------------------------------ !!!!!!!!!! ----------------------------------
[./src/simon-xor-threshold-search.cc:2021] WORD_SIZE 16 NROUNDS 12 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[------------------------------------------------ !!!!!!!!!! ----------------------------------

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
pDDT sizes: HW Dp 128, Dxy 128, CR Dp 0, Dxy 0, p_thres 0.060000 2^-4.058894
0:      400 ->     1800 0.250000 (2^-2.000000)
1:      100 ->        0 0.250000 (2^-2.000000)
2:        0 ->      100 1.000000 (2^0.000000)
3:      100 ->      400 0.250000 (2^-2.000000)
4:      400 ->     1100 0.250000 (2^-2.000000)
5:     1100 ->     4200 0.062500 (2^-4.000000)
6:     4200 ->     1D01 0.062500 (2^-4.000000)
7:     1D01 ->      500 0.003906 (2^-8.000000)
8:      500 ->      100 0.125000 (2^-3.000000)
9:      100 ->      100 0.250000 (2^-2.000000)
10:      100 ->      500 0.250000 (2^-2.000000)
11:      500 ->     1500 0.125000 (2^-3.000000)
p_tot = 0.000000000058208 = 2^-34.000000, Bn = 0.000000 = 2^-34.000000
[./src/simon-xor-threshold-search.cc:2021] WORD_SIZE 16 NROUNDS 12 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:2025] nrounds = 12
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():367] dy_init        0
[./src/simon-xor-threshold-search.cc:387] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.250000 (2^-2.000000)      400 ->     1800
EXP  0: 0.249572 (2^-2.002473)      400 ->     1800

THE  1: 0.250000 (2^-2.000000)      100 ->        0
EXP  1: 0.249547 (2^-2.002617)      100 ->        0

THE  2: 1.000000 (2^0.000000)        0 ->      100
EXP  2: 1.000000 (2^0.000000)        0 ->      100

THE  3: 0.250000 (2^-2.000000)      100 ->      400
EXP  3: 0.249929 (2^-2.000407)      100 ->      400

THE  4: 0.250000 (2^-2.000000)      400 ->     1100
EXP  4: 0.250717 (2^-1.995867)      400 ->     1100

THE  5: 0.062500 (2^-4.000000)     1100 ->     4200
EXP  5: 0.062199 (2^-4.006973)     1100 ->     4200

THE  6: 0.062500 (2^-4.000000)     4200 ->     1D01
EXP  6: 0.062284 (2^-4.004984)     4200 ->     1D01

THE  7: 0.003906 (2^-8.000000)     1D01 ->      500
EXP  7: 0.003993 (2^-7.968299)     1D01 ->      500

THE  8: 0.125000 (2^-3.000000)      500 ->      100
EXP  8: 0.124821 (2^-3.002071)      500 ->      100

THE  9: 0.250000 (2^-2.000000)      100 ->      100
EXP  9: 0.249529 (2^-2.002721)      100 ->      100

THE 10: 0.250000 (2^-2.000000)      100 ->      500
EXP 10: 0.249928 (2^-2.000418)      100 ->      500

THE 11: 0.125000 (2^-3.000000)      500 ->     1500
EXP 11: 0.125629 (2^-2.992754)      500 ->     1500

OK
[./src/simon-xor-threshold-search.cc:516] Verify P of differentials (2^20.000000 CPs)...
Input differences:      400     1900

R# 0 Output differences:      100      400
THE  1: 0.250000 (2^-2.000000)      400 ->      100
EXP  1: 0.249800 (2^-2.001156)      400 ->      100

R# 1 Output differences:        0      100
THE  2: 0.062500 (2^-4.000000)      100 ->        0
EXP  2: 0.062008 (2^-4.011404)      100 ->        0

R# 2 Output differences:      100        0
THE  3: 0.062500 (2^-4.000000)        0 ->      100
EXP  3: 0.062473 (2^-4.000617)        0 ->      100

R# 3 Output differences:      400      100
THE  4: 0.015625 (2^-6.000000)      100 ->      400
EXP  4: 0.015695 (2^-5.993586)      100 ->      400

R# 4 Output differences:     1100      400
THE  5: 0.003906 (2^-8.000000)      400 ->     1100
EXP  5: 0.002177 (2^-8.843285)      400 ->     1100

R# 5 Output differences:     4200     1100
THE  6: 0.000244 (2^-12.000000)     1100 ->     4200
EXP  6: 0.000202 (2^-12.272080)     1100 ->     4200

R# 6 Output differences:     1D01     4200
THE  7: 0.000015 (2^-16.000000)     4200 ->     1D01
EXP  7: 0.000010 (2^-16.678072)     4200 ->     1D01

R# 7 Output differences:      500     1D01
THE  8: 0.000000 (2^-24.000000)     1D01 ->      500
EXP  8: 0.000000 (2^-inf)     1D01 ->      500

R# 8 Output differences:      100      500
THE  9: 0.000000 (2^-27.000000)      500 ->      100
EXP  9: 0.000000 (2^-inf)      500 ->      100

R# 9 Output differences:      100      100
THE 10: 0.000000 (2^-29.000000)      100 ->      100
EXP 10: 0.000000 (2^-inf)      100 ->      100

R#10 Output differences:      500      100
THE 11: 0.000000 (2^-31.000000)      100 ->      500
EXP 11: 0.000000 (2^-inf)      100 ->      500

R#11 Output differences:     1500      500
THE 12: 0.000000 (2^-34.000000)      500 ->     1500
EXP 12: 0.000000 (2^-inf)      500 ->     1500

OK

[./src/simon-xor-threshold-search.cc:2128] Best differential for 12 R: (   0    0) -> (   0    0) 2^-inf
[./src/simon-xor-threshold-search.cc:2156] nrounds 12
[./tests/simon-xor-threshold-search-tests.cc:258] Final best trail:
0:      400 ->     1800 0.250000 (2^-2.000000)
1:      100 ->        0 0.250000 (2^-2.000000)
2:        0 ->      100 1.000000 (2^0.000000)
3:      100 ->      400 0.250000 (2^-2.000000)
4:      400 ->     1100 0.250000 (2^-2.000000)
5:     1100 ->     4200 0.062500 (2^-4.000000)
6:     4200 ->     1D01 0.062500 (2^-4.000000)
7:     1D01 ->      500 0.003906 (2^-8.000000)
8:      500 ->      100 0.125000 (2^-3.000000)
9:      100 ->      100 0.250000 (2^-2.000000)
10:      100 ->      500 0.250000 (2^-2.000000)
11:      500 ->     1500 0.125000 (2^-3.000000)
p_tot = 0.000000000058208 = 2^-34.000000
[./tests/simon-xor-threshold-search-tests.cc:269] Final bounds 12R:
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
[./tests/simon-xor-threshold-search-tests.cc:2358] WORD_SIZE 16 NROUNDS 12 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1

real    36m17.570s
user    36m3.007s
sys     0m0.232s

Simon32, 13R:

[./src/simon-xor-threshold-search.cc:2042] WORD_SIZE 16 NROUNDS 13 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1873] nrounds = 12, Bn_init = 2^-36.000000 : key      16E     F9EC      EEC     150D
[./src/simon-xor-threshold-search.cc:1517]  1: [75 / 128]     2200 ->        0, 2^-4.000000, 2^-36.000000 hw     2     01
[./src/simon-xor-threshold-search.cc:1726] 11 | Update best found Bn: 2^-36.000000 -> 2^-35.000000
[./src/simon-xor-threshold-search.cc:1517]  1: [12 / 128]      200 ->        0, 2^-2.000000, 2^-35.000000 hw     1     01
[./src/simon-xor-threshold-search.cc:1726] 11 | Update best found Bn: 2^-35.000000 -> 2^-34.000000
[./src/simon-xor-threshold-search.cc:1517]  1: [127 / 128]     5080 ->     8000, 2^-4.000000, 2^-34.000000 hw     3     1
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
pDDT sizes: HW Dp 128, Dxy 128, CR Dp 0, Dxy 0, p_thres 0.060000 2^-4.058894
0:      400 ->     1800 0.250000 (2^-2.000000)
1:      100 ->        0 0.250000 (2^-2.000000)
2:        0 ->      100 1.000000 (2^0.000000)
3:      100 ->      400 0.250000 (2^-2.000000)
4:      400 ->     1100 0.250000 (2^-2.000000)
5:     1100 ->     4200 0.062500 (2^-4.000000)
6:     4200 ->     1D01 0.062500 (2^-4.000000)
7:     1D01 ->      500 0.003906 (2^-8.000000)
8:      500 ->      100 0.125000 (2^-3.000000)
9:      100 ->      100 0.250000 (2^-2.000000)
10:      100 ->      500 0.250000 (2^-2.000000)
11:      500 ->     1500 0.125000 (2^-3.000000)
p_tot = 0.000000000058208 = 2^-34.000000, Bn = 0.000000 = 2^-34.000000
[./src/simon-xor-threshold-search.cc:1959] Init bound: 1500 -> 0 = 0.062500 2^-4.000000
[./src/simon-xor-threshold-search.cc:2042] WORD_SIZE 16 NROUNDS 13 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1873] nrounds = 13, Bn_init = 2^-38.000000 : key      16E     F9EC      EEC     150D
[./src/simon-xor-threshold-search.cc:1517]  1: [ 1 / 128]     8000 ->        0, 2^-2.000000, 2^-38.000000 hw     1     0
[./src/simon-xor-threshold-search.cc:1726] 12 | Update best found Bn: 2^-38.000000 -> 2^-36.000000
[./src/simon-xor-threshold-search.cc:1470]  0: [18 / 128]     A000 ->        0, 2^-3.000000, 2^-36.0000000 hw     3     1
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
B[12] = 2^-36.000000
pDDT sizes: HW Dp 128, Dxy 128, CR Dp 0, Dxy 0, p_thres 0.060000 2^-4.058894
0:        0 ->        0 1.000000 (2^0.000000)
1:     8000 ->        2 0.250000 (2^-2.000000)
2:        2 ->     8008 0.250000 (2^-2.000000)
3:     8008 ->       20 0.062500 (2^-4.000000)
4:       20 ->     8088 0.250000 (2^-2.000000)
5:     8088 ->      202 0.015625 (2^-6.000000)
6:      202 ->     8880 0.062500 (2^-4.000000)
7:     8880 ->     2000 0.015625 (2^-6.000000)
8:     2000 ->      880 0.250000 (2^-2.000000)
9:      880 ->      200 0.062500 (2^-4.000000)
10:      200 ->       80 0.250000 (2^-2.000000)
11:       80 ->        0 0.250000 (2^-2.000000)
12:        0 ->       80 1.000000 (2^0.000000)
p_tot = 0.000000000014552 = 2^-36.000000, Bn = 0.000000 = 2^-36.000000
[./src/simon-xor-threshold-search.cc:2042] WORD_SIZE 16 NROUNDS 13 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:2046] nrounds = 13
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():388] dy_init        0
[./src/simon-xor-threshold-search.cc:408] Verify P for one round (2^20.000000 CPs)...
THE  0: 1.000000 (2^0.000000)        0 ->        0
EXP  0: 1.000000 (2^0.000000)        0 ->        0

THE  1: 0.250000 (2^-2.000000)     8000 ->        2
EXP  1: 0.250390 (2^-1.997751)     8000 ->        2

THE  2: 0.250000 (2^-2.000000)        2 ->     8008
EXP  2: 0.250201 (2^-1.998839)        2 ->     8008

THE  3: 0.062500 (2^-4.000000)     8008 ->       20
EXP  3: 0.062503 (2^-3.999934)     8008 ->       20

THE  4: 0.250000 (2^-2.000000)       20 ->     8088
EXP  4: 0.250285 (2^-1.998355)       20 ->     8088

THE  5: 0.015625 (2^-6.000000)     8088 ->      202
EXP  5: 0.015527 (2^-6.009098)     8088 ->      202

THE  6: 0.062500 (2^-4.000000)      202 ->     8880
EXP  6: 0.062750 (2^-3.994244)      202 ->     8880

THE  7: 0.015625 (2^-6.000000)     8880 ->     2000
EXP  7: 0.015526 (2^-6.009187)     8880 ->     2000

THE  8: 0.250000 (2^-2.000000)     2000 ->      880
EXP  8: 0.249656 (2^-2.001988)     2000 ->      880

THE  9: 0.062500 (2^-4.000000)      880 ->      200
EXP  9: 0.062540 (2^-3.999076)      880 ->      200

THE 10: 0.250000 (2^-2.000000)      200 ->       80
EXP 10: 0.249817 (2^-2.001057)      200 ->       80

THE 11: 0.250000 (2^-2.000000)       80 ->        0
EXP 11: 0.249196 (2^-2.004647)       80 ->        0

THE 12: 1.000000 (2^0.000000)        0 ->       80
EXP 12: 1.000000 (2^0.000000)        0 ->       80

OK
[./src/simon-xor-threshold-search.cc:537] Verify P of differentials (2^20.000000 CPs)...
Input differences:        0     8000

R# 0 Output differences:     8000        0
THE  1: 1.000000 (2^0.000000)        0 ->     8000
EXP  1: 1.000000 (2^0.000000)        0 ->     8000

R# 1 Output differences:        2     8000
THE  2: 0.250000 (2^-2.000000)     8000 ->        2
EXP  2: 0.250278 (2^-1.998399)     8000 ->        2

R# 2 Output differences:     8008        2
THE  3: 0.062500 (2^-4.000000)        2 ->     8008
EXP  3: 0.062513 (2^-3.999692)        2 ->     8008

R# 3 Output differences:       20     8008
THE  4: 0.003906 (2^-8.000000)     8008 ->       20
EXP  4: 0.006237 (2^-7.324925)     8008 ->       20

R# 4 Output differences:     8088       20
THE  5: 0.000977 (2^-10.000000)       20 ->     8088
EXP  5: 0.001558 (2^-9.325808)       20 ->     8088

R# 5 Output differences:      202     8088
THE  6: 0.000015 (2^-16.000000)     8088 ->      202
EXP  6: 0.000015 (2^-16.000000)     8088 ->      202

R# 6 Output differences:     8880      202
THE  7: 0.000001 (2^-20.000000)      202 ->     8880
EXP  7: 0.000002 (2^-19.000000)      202 ->     8880

R# 7 Output differences:     2000     8880
THE  8: 0.000000 (2^-26.000000)     8880 ->     2000
EXP  8: 0.000000 (2^-inf)     8880 ->     2000

R# 8 Output differences:      880     2000
THE  9: 0.000000 (2^-28.000000)     2000 ->      880
EXP  9: 0.000000 (2^-inf)     2000 ->      880

R# 9 Output differences:      200      880
THE 10: 0.000000 (2^-32.000000)      880 ->      200
EXP 10: 0.000000 (2^-inf)      880 ->      200

R#10 Output differences:       80      200
THE 11: 0.000000 (2^-34.000000)      200 ->       80
EXP 11: 0.000000 (2^-inf)      200 ->       80

R#11 Output differences:        0       80
THE 12: 0.000000 (2^-36.000000)       80 ->        0
EXP 12: 0.000000 (2^-inf)       80 ->        0

R#12 Output differences:       80        0
THE 13: 0.000000 (2^-36.000000)        0 ->       80
EXP 13: 0.000000 (2^-inf)        0 ->       80

OK

[./src/simon-xor-threshold-search.cc:2149] Best differential for 13 R: (   0    0) -> (   0    0) 2^-inf
[./src/simon-xor-threshold-search.cc:2177] nrounds 13
[./tests/simon-xor-threshold-search-tests.cc:258] Final best trail:
0:        0 ->        0 1.000000 (2^0.000000)
1:     8000 ->        2 0.250000 (2^-2.000000)
2:        2 ->     8008 0.250000 (2^-2.000000)
3:     8008 ->       20 0.062500 (2^-4.000000)
4:       20 ->     8088 0.250000 (2^-2.000000)
5:     8088 ->      202 0.015625 (2^-6.000000)
6:      202 ->     8880 0.062500 (2^-4.000000)
7:     8880 ->     2000 0.015625 (2^-6.000000)
8:     2000 ->      880 0.250000 (2^-2.000000)
9:      880 ->      200 0.062500 (2^-4.000000)
10:      200 ->       80 0.250000 (2^-2.000000)
11:       80 ->        0 0.250000 (2^-2.000000)
12:        0 ->       80 1.000000 (2^0.000000)
p_tot = 0.000000000014552 = 2^-36.000000
[./tests/simon-xor-threshold-search-tests.cc:269] Final bounds 13R:
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
B[12] = 2^-36.000000
[./tests/simon-xor-threshold-search-tests.cc:2362] WORD_SIZE 16 NROUNDS 13 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1

real    47m13.745s
user    47m8.361s
sys     0m0.220s



 */


/* 
[ 2495]    0   40   40  100  100  440  440 1000 1000 4440 4440  181  181 4046 4046  21C  21C 4006 4006   41   41 4000 4000    0    0 4000  | 2^-47.000000
Probability of differential: 2^-31.743091
[./src/simon-xor-threshold-search.cc:2578] Penultimate round does not match output diff: B444 vs. 0^C^-6.000000). New sizes: Dxy 32, Dp 32.28.

[ 2369] 200020 880008 880008    2    2 880000 880000 200000 200000 80000 80000    0    0 80000 80000 300008 300008 880020 880020 10000B 10000B C80008 C80008 300028 300028 80088 80088  200  200 80888  | 2^-62.000000
Probability of differential: 2^-44.239522
[./src/simon-xor-threshold-search.cc:897] WORD_SIZE 24 NROUNDS 15 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 4 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/xdp-rot-and.cc:1365] 76 / 4194304 : NEW Croad:   200103   410102 0.00781 2^-7.00 | CR size: Dp         76, Dxy         76^C 256, Dp 256.

[  685] 4000000 1000000 1000000    0    0 1000000 1000000 4000000 4000000 11000000 11000000 40000000 40000000 11000001 11000001 4000004 4000004 1000011 1000011   43   43 1000019 1000019 4000007 4000007 19000001 19000001 42000001 42000001 11000006 11000006 6000000 6000000 1000000 1000000    0    0 1000000 1000000 4000000  | 2^-80.000000
Probability of differential: 2^-63.806684
[./src/simon-xor-threshold-search.cc:897] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 4 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:2584] Penultimate round does not match output diff: 49004102 vs. 1000000^C). New sizes: Dxy 16, Dp 16.28..

 */


/* --- */

/* 
	[./src/simon-xor-threshold-search.cc:863] Found 18 trails:
[    1]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   10   10 4004 4004    1    1 4000 4000    0    0 4000  | 2^-36.000000
[    2]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   50   50 4184 4184 8001 8001 4000 4000    0    0 4000  | 2^-42.000000
[    3]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   18   18 4004 4004    1    1 4000 4000    0    0 4000  | 2^-38.000000
[    4]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   50   50 4104 4104 8041 8041 4000 4000    0    0 4000  | 2^-42.000000
[    5]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   50   50 4184 4184   41   41 4000 4000    0    0 4000  | 2^-42.000000
[    6]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   18   18 4004 4004 8041 8041 4000 4000    0    0 4000  | 2^-41.000000
[    7]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   50   50 4104 4104    1    1 4000 4000    0    0 4000  | 2^-39.000000
[    8]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   18   18 4004 4004 8001 8001 4000 4000    0    0 4000  | 2^-40.000000
[    9]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   58   58  184  184   41   41 4000 4000    0    0 4000  | 2^-43.000000
[   10]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   10   10 4004 4004 8041 8041 4000 4000    0    0 4000  | 2^-39.000000
[   11]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   10   10 4004 4004 8001 8001 4000 4000    0    0 4000  | 2^-38.000000
[   12]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   10   10 4004 4004   41   41 4000 4000    0    0 4000  | 2^-38.000000
[   13]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   50   50 4184 4184 8041 8041 4000 4000    0    0 4000  | 2^-43.000000
[   14]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   50   50 4104 4104 8001 8001 4000 4000    0    0 4000  | 2^-41.000000
[   15]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   58   58  184  184 8041 8041 4000 4000    0    0 4000  | 2^-44.000000
[   16]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   18   18 4004 4004   41   41 4000 4000    0    0 4000  | 2^-40.000000
[   17]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   50   50 4104 4104   41   41 4000 4000    0    0 4000  | 2^-41.000000
[   18]    0   40   40  100  100  440  440 1000 1000 4440 4440  101  101 4044 4044   58   58  104  104   41   41 4000 4000    0    0 4000  | 2^-41.000000
Probability of differential: 2^-34.788112

 */

/* --- */

/*
Found with parameters:

Time: Tue Oct 29 12:05:49 2013
[./tests/simon-xor-threshold-search-tests.cc:2205] 
 WORD_SIZE 16
 NROUNDS 13
 XDP_ROT_AND_P_THRES 0.030000 2^-5.058894
 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00
 SIMON_EPS 0.000031 2^-15.000000


B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
B[12] = 2^-37.000000
 0:     8000 ->        2 0.250000 (2^-2.000000)
 1:     2200 ->      800 0.062500 (2^-4.000000)
 2:      800 ->      200 0.250000 (2^-2.000000)
 3:      200 ->        0 0.250000 (2^-2.000000)
 4:        0 ->      200 1.000000 (2^0.000000)
 5:      200 ->      800 0.250000 (2^-2.000000)
 6:      800 ->     2200 0.250000 (2^-2.000000)
 7:     2200 ->     8400 0.062500 (2^-4.000000)
 8:     8400 ->     3A02 0.062500 (2^-4.000000)
 9:     3A02 ->      A00 0.003906 (2^-8.000000)
10:      A00 ->      200 0.125000 (2^-3.000000)
11:      200 ->      200 0.250000 (2^-2.000000)
12:      200 ->      A00 0.250000 (2^-2.000000)
p_tot = 0.000000000007276 = 2^-37.000000, Bn = 0.000000 = 2^-37.000000
[./src/simon-xor-threshold-search.cc:1663] nrounds = 13

*/

// for Simon16 for a fixed trail found with threshold search
// search for clusters of other trails connecting the same differential
void simon32_diff_search_fixed()
{
#if(NROUNDS == 13)
  assert(WORD_SIZE == 16);
  assert(NROUNDS == 13);

  double B[NROUNDS] = {
	 (1.0 / (double)(1ULL <<  0)),
	 (1.0 / (double)(1ULL <<  4)),
	 (1.0 / (double)(1ULL <<  4)),
	 (1.0 / (double)(1ULL <<  6)),
	 (1.0 / (double)(1ULL <<  8)),
	 (1.0 / (double)(1ULL << 12)),
	 (1.0 / (double)(1ULL << 14)),
	 (1.0 / (double)(1ULL << 18)),
	 (1.0 / (double)(1ULL << 20)),
	 (1.0 / (double)(1ULL << 26)),
	 (1.0 / (double)(1ULL << 30)),
	 (1.0 / (double)(1ULL << 34)),
	 (1.0 / (double)(1ULL << 37)),
  };

  differential_t trail[NROUNDS] = {
	 {0x8000,    0x2, 0, 0.250000}, //(2^-2.000000)
	 {0x2200,  0x800, 0, 0.062500}, //(2^-4.000000)
	 { 0x800,  0x200, 0, 0.250000}, //(2^-2.000000)
	 { 0x200,    0x0, 0, 0.250000}, //(2^-2.000000)
	 {   0x0,  0x200, 0, 1.000000}, //(2^0.000000)
	 { 0x200,  0x800, 0, 0.250000}, //(2^-2.000000)
	 { 0x800, 0x2200, 0, 0.250000}, //(2^-2.000000)
	 {0x2200, 0x8400, 0, 0.062500}, //(2^-4.000000)
	 {0x8400, 0x3A02, 0, 0.062500}, //(2^-4.000000)
	 {0x3A02,  0xA00, 0, 0.003906}, //(2^-8.000000)
	 { 0xA00,  0x200, 0, 0.125000}, //(2^-3.000000)
	 { 0x200,  0x200, 0, 0.250000}, //(2^-2.000000)
	 { 0x200,  0xA00, 0, 0.250000} //(2^-2.000000)
  };

  uint32_t dyy_init = 0;		  // dummy
  boost::unordered_map<std::array<differential_t, NROUNDS>, uint32_t, simon_trail_hash, simon_trail_equal_to> trails_hash_map;

  simon_trail_cluster_search_boost(&trails_hash_map, B, trail, NROUNDS, &dyy_init);
#endif
}


/* --- */

#if 0				// !!!
		bool b_low_hw = (hw32(dx) <= XDP_ROT_AND_MAX_HW);
		if((p > XDP_ROT_AND_P_THRES) && (b_low_hw)) 
      {
		  differential_t diff;
		  diff.dx = dx;
		  diff.dy = dy;
		  diff.p = p;

		  // check if it is already in highway table
		  //		  uint32_t old_size = diff_set_dx_dy.size();
		  //		  diff_set_dx_dy.insert(diff);
		  //		  uint32_t new_size = diff_set_dx_dy.size();

		  uint32_t old_size = hways_diff_set_dx_dy.size();
		  hways_diff_set_dx_dy.insert(diff);
		  uint32_t new_size = hways_diff_set_dx_dy.size();

		  if(old_size != new_size) {
			 //			 diff_mset_p.insert(diff);
			 hways_diff_mset_p.insert(diff);
			 printf("[%s:%d] Add new Hway: %X %X %f 2^%f\n", __FILE__, __LINE__, dx, dy, p, log2(p));
			 //				xdp_rot_and_print_set_dx_dy(diff_set_dx_dy);
			 //			 printf("[%s:%d] NEW HW sizes: Dp %d, Dxy %d\n", __FILE__, __LINE__, diff_mset_p.size(), diff_set_dx_dy.size());
			 printf("[%s:%d] NEW HW sizes: Dp %d, Dxy %d\n", __FILE__, __LINE__, hways_diff_mset_p.size(), hways_diff_set_dx_dy.size());
		  }
		}
#endif


/* --- */
#if 0
  		bool b_prev_four_croads = false;
		if(next_round >= 4) {	  // two rounds of countryroadare allowed
		  b_prev_four_croads = ((trail[next_round - 1].p < XDP_ROT_AND_P_THRES) && (trail[next_round - 2].p < XDP_ROT_AND_P_THRES) && (trail[next_round - 3].p < XDP_ROT_AND_P_THRES) && (trail[next_round - 4].p < XDP_ROT_AND_P_THRES)) || ((trail[next_round - 1].p <= p_low_thres) || (trail[next_round - 2].p <= p_low_thres) || (trail[next_round - 3].p <= p_low_thres) || (trail[next_round - 4].p <= p_low_thres));
		}
#endif
#if 0
  		bool b_prev_three_croads = false;
		if(next_round >= 3) {	  // two rounds of countryroadare allowed
		  b_prev_three_croads = ((trail[next_round - 1].p < XDP_ROT_AND_P_THRES) && (trail[next_round - 2].p < XDP_ROT_AND_P_THRES) && (trail[next_round - 3].p < XDP_ROT_AND_P_THRES)) || ((trail[next_round - 1].p <= p_low_thres) || (trail[next_round - 2].p <= p_low_thres) || (trail[next_round - 3].p <= p_low_thres));
		}
#endif

/* --- */

		if(b_prev_two_croads) {
		//		if(b_prev_four_croads) {
		//		if(!b_is_hway && !b_prev_hway) { //  two country roads in sequence
		//		if(!b_is_hway && b_prev_two_croads) { //  two country roads in sequence
		  //		  printf("\n\n -------- [%s:%d]  WARNING!! Two consecutive countryroads! --------\n\n", __FILE__, __LINE__);
		  //		  printf("\n\n -------- [%s:%d]  WARNING!! Four consecutive countryroads or at least one <= 2^%4.2f : 2^%4.2f 2^%4.2f 2^%4.2f 2^%4.2f --------\n\n", __FILE__, __LINE__, log2(p_low_thres), log2(trail[next_round - 4].p), log2(trail[next_round - 3].p), log2(trail[next_round - 2].p), log2(trail[next_round - 1].p));
		  //		  printf("\n\n -------- [%s:%d]  WARNING!! Three consecutive countryroads! 2^%4.2f 2^%4.2f 2^%4.2f --------\n\n", __FILE__, __LINE__, log2(trail[next_round - 3].p), log2(trail[next_round - 2].p), log2(trail[next_round - 1].p));
		  //		  printf("\n\n -------- [%s:%d]  WARNING!! Two consecutive countryroads! 2^%4.2f 2^%4.2f --------\n\n", __FILE__, __LINE__, log2(trail[next_round - 2].p), log2(trail[next_round - 1].p));
		  printf("\n\n -------- [%s:%d]  WARNING!! Two consecutive countryroads or at least one <= 2^%4.2f : 2^%4.2f 2^%4.2f --------\n\n", __FILE__, __LINE__, log2(p_low_thres), log2(trail[next_round - 2].p), log2(trail[next_round - 1].p));
#if 1									  // !!!
		  uint32_t hways_size_before = diff_set_dx_dy.size();

		  uint32_t inc_fact = 60;
		  //		  printf("[%s:%d] Increase Hway count: %lld -> %lld. Re-initializing hways... CR Dp %d Dxy %d\n", __FILE__, __LINE__, max_cnt, (inc_fact * max_cnt), croads_diff_mset_p.size(), croads_diff_set_dx_dy.size());
		  printf("[%s:%d] Increase Hway count: %lld -> %lld. Re-initializing hways...\n", __FILE__, __LINE__, max_cnt, (inc_fact + max_cnt));
		  diff_set_dx_dy.clear();
		  diff_mset_p.clear();
		  hways_diff_set_dx_dy.clear();
		  hways_diff_mset_p.clear();

		  max_cnt += inc_fact; 
		  xdp_rot_and_pddt(&diff_set_dx_dy, &diff_mset_p, lrot_const_s, lrot_const_t, max_cnt, p_thres);

		  hways_diff_mset_p = diff_mset_p;
		  hways_diff_set_dx_dy = diff_set_dx_dy;


		  uint32_t hways_size_after = diff_set_dx_dy.size();

		  if(hways_size_before == hways_size_after) {
			 printf("[%s:%d] Cannot continue without two consecutive Croads:\n", __FILE__, __LINE__);
			 printf("Current parameters: max Hways %lld , p_thres 2^4.2%f\n", max_cnt, XDP_ROT_AND_P_THRES);
			 printf("Please, lower the thershold! Exiting...\n");
			 exit(1);
		  }

		  //		  nrounds -= 1;				  // !!!
		  nrounds = 0;
		  b_repeat_round = true;

		  printf("[%s:%d] Start again from round %d\n", __FILE__, __LINE__, nrounds + 1);
		  sleep(5);
#endif
		}

/* --- */
#if 1								  // DEBUG
	 if(!(p_min <= 1.001)) {
		printf("[%s:%d] n %2d: %41.40f p_min 2^%4.2f Bn 2^%4.2f B[%d] 2^%4.2f\n", __FILE__, __LINE__, n, p_min, log2(p_min), log2(*Bn), nrounds - 1 - (n + 1), log2(B[nrounds - 1 - (n + 1)]));
	 }
#endif
	 assert(p_min <= 1.001);	  /* !!! */



/* ---- */

		//		if(b_prev_three_croads) {
		if(b_prev_two_croads) {
		//		if(b_prev_four_croads) {
		//		if(!b_is_hway && !b_prev_hway) { //  two country roads in sequence
		//		if(!b_is_hway && b_prev_two_croads) { //  two country roads in sequence
		  //		  printf("\n\n -------- [%s:%d]  WARNING!! Two consecutive countryroads! --------\n\n", __FILE__, __LINE__);
		  //		  printf("\n\n -------- [%s:%d]  WARNING!! Four consecutive countryroads or at least one <= 2^%4.2f : 2^%4.2f 2^%4.2f 2^%4.2f 2^%4.2f --------\n\n", __FILE__, __LINE__, log2(p_low_thres), log2(trail[next_round - 4].p), log2(trail[next_round - 3].p), log2(trail[next_round - 2].p), log2(trail[next_round - 1].p));
		  //		  printf("\n\n -------- [%s:%d]  WARNING!! Three consecutive countryroads! 2^%4.2f 2^%4.2f 2^%4.2f --------\n\n", __FILE__, __LINE__, log2(trail[next_round - 3].p), log2(trail[next_round - 2].p), log2(trail[next_round - 1].p));
		  //		  printf("\n\n -------- [%s:%d]  WARNING!! Two consecutive countryroads! 2^%4.2f 2^%4.2f --------\n\n", __FILE__, __LINE__, log2(trail[next_round - 2].p), log2(trail[next_round - 1].p));
		  printf("\n\n -------- [%s:%d]  WARNING!! Two consecutive countryroads or at least one <= 2^%4.2f : 2^%4.2f 2^%4.2f --------\n\n", __FILE__, __LINE__, log2(p_low_thres), log2(trail[next_round - 2].p), log2(trail[next_round - 1].p));
#if 1									  // !!!
		  uint32_t hways_size_before = diff_set_dx_dy.size();

		  uint32_t inc_fact = 60;
		  //		  printf("[%s:%d] Increase Hway count: %lld -> %lld. Re-initializing hways... CR Dp %d Dxy %d\n", __FILE__, __LINE__, max_cnt, (inc_fact * max_cnt), croads_diff_mset_p.size(), croads_diff_set_dx_dy.size());
		  printf("[%s:%d] Increase Hway count: %lld -> %lld. Re-initializing hways...\n", __FILE__, __LINE__, max_cnt, (inc_fact + max_cnt));
		  diff_set_dx_dy.clear();
		  diff_mset_p.clear();
		  hways_diff_set_dx_dy.clear();
		  hways_diff_mset_p.clear();

		  max_cnt += inc_fact; 
		  xdp_rot_and_pddt(&diff_set_dx_dy, &diff_mset_p, lrot_const_s, lrot_const_t, max_cnt, p_thres);

		  hways_diff_mset_p = diff_mset_p;
		  hways_diff_set_dx_dy = diff_set_dx_dy;


		  uint32_t hways_size_after = diff_set_dx_dy.size();

		  if(hways_size_before == hways_size_after) {
			 printf("[%s:%d] Cannot continue without two consecutive Croads:\n", __FILE__, __LINE__);
			 printf("Current parameters: max Hways %lld , p_thres 2^4.2%f\n", max_cnt, XDP_ROT_AND_P_THRES);
			 printf("Please, lower the thershold! Exiting...\n");
			 exit(1);
		  }

		  //		  nrounds -= 1;				  // !!!
		  nrounds = 0;
		  b_repeat_round = true;

		  printf("[%s:%d] Start again from round %d\n", __FILE__, __LINE__, nrounds + 1);
		  sleep(5);
#endif
		}

{
		  //		printf("[%s:%d] Set B[%d] = 2^%f\n", __FILE__, __LINE__, next_round, log2(Bn_init));
		  Bn_init = B[next_round - 1] * p;
		  B[next_round] = Bn_init;
		  trail[next_round].dx = dx;
		  trail[next_round].dy = dxx;
		  trail[next_round].p = p;

		  assert(trail[next_round].dx == trail[next_round-1].dy);

		} else {
/* --- */

		//		bool b_is_hway = (p >= XDP_ROT_AND_P_THRES);//xdp_rot_and_is_dx_in_set_dx_dy(dxx, dx, dx_prev, lrot_const_u, diff_set_dx_dy);
		//		bool b_prev_hway = (trail[next_round - 1].p >= XDP_ROT_AND_P_THRES);
#if 0
  		bool b_prev_four_croads = false;
		if(next_round >= 4) {	  // two rounds of countryroadare allowed
		  b_prev_four_croads = ((trail[next_round - 1].p < XDP_ROT_AND_P_THRES) && (trail[next_round - 2].p < XDP_ROT_AND_P_THRES) && (trail[next_round - 3].p < XDP_ROT_AND_P_THRES) && (trail[next_round - 4].p < XDP_ROT_AND_P_THRES)) || ((trail[next_round - 1].p <= p_low_thres) || (trail[next_round - 2].p <= p_low_thres) || (trail[next_round - 3].p <= p_low_thres) || (trail[next_round - 4].p <= p_low_thres));
		}
#endif
#if 0
  		bool b_prev_three_croads = false;
		if(next_round >= 3) {	  // two rounds of countryroadare allowed
		  b_prev_three_croads = ((trail[next_round - 1].p < XDP_ROT_AND_P_THRES) && (trail[next_round - 2].p < XDP_ROT_AND_P_THRES) && (trail[next_round - 3].p < XDP_ROT_AND_P_THRES)) || ((trail[next_round - 1].p <= p_low_thres) || (trail[next_round - 2].p <= p_low_thres) || (trail[next_round - 3].p <= p_low_thres));
		}
#endif
#if 1
		bool b_prev_two_croads = false;
		if(next_round >= 2) {	  // two rounds of countryroadare allowed
		  b_prev_two_croads = ((trail[next_round - 1].p < XDP_ROT_AND_P_THRES) && (trail[next_round - 2].p < XDP_ROT_AND_P_THRES)) || ((trail[next_round - 1].p <= p_low_thres) || (trail[next_round - 2].p <= p_low_thres));
		}
#endif

/* --- */

#if 1
	 bool b_prev_croad = false;
	 if(n >= 1) {	  // is the previous a Croad
		b_prev_croad = (trail[n - 1].p < XDP_ROT_AND_P_THRES);
	 }

#endif
#if 0
	 bool b_prev_two_croads = false;
	 if(n >= 2) {	  // is the previous a Croad
		b_prev_two_croads = ((trail[n - 1].p < XDP_ROT_AND_P_THRES) && (trail[n - 2].p < XDP_ROT_AND_P_THRES));
	 }
#endif
	 // if the max is a Croad (!b_found_in_hways) 
    // that does not lead back on an Hway (!b_croad_to_hway) and 
    // the previous is also a Croad (b_prev_croad) 
    // then generate a list of new Croads that will bring us back ona hughway
	 //	 if((!b_found_in_hways) && (!b_croad_to_hway) && (b_prev_croad) ) {
	 //	 if((!b_found_in_hways) && (!b_croad_to_hway) && (b_prev_two_croads) ) {
	 //	 if(!b_found_in_hways) 


/* ---- */

/* 
Simon48, 15R, unsuccessful cases:

Currently running 


[./src/simon-xor-threshold-search.cc:1898] WORD_SIZE 24 NROUNDS 15 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1703] nrounds = 13, Bn_init = 2^-44.000000 : key   77172F   609269   DA30CB   A3A016
[./src/xdp-rot-and.cc:1365] 7 / 16777216 : NEW Croad:    20022        2 0.01562 2^-6.00 | CR size: Dp          7, Dxy          702
[./src/simon-xor-threshold-search.cc:1511] 12 | Update best found Bn: 2^-44.000000 -> 2^-38.000000
[./src/simon-xor-threshold-search.cc:1249]  0: [33 / 483]   810000 ->        0, 2^-3.000000, 2^-38.0000000 hw     3xy          114
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
pDDT sizes: Dp 483, Dxy 483, p_thres 0.050000 2^-4.321928
0:     4000 ->    10000 0.250000 (2^-2.000000)
1:     1100 ->      400 0.062500 (2^-4.000000)
2:      400 ->      100 0.250000 (2^-2.000000)
3:      100 ->        0 0.250000 (2^-2.000000)
4:        0 ->      100 1.000000 (2^0.000000)
5:      100 ->      400 0.250000 (2^-2.000000)
6:      400 ->     1100 0.250000 (2^-2.000000)
7:     1100 ->     4000 0.062500 (2^-4.000000)
8:     4000 ->    11100 0.250000 (2^-2.000000)
9:    11100 ->    40400 0.015625 (2^-6.000000)
10:    40400 ->   110100 0.062500 (2^-4.000000)
11:   110100 ->   400000 0.015625 (2^-6.000000)
12:   400000 ->   110101 0.250000 (2^-2.000000)
p_tot = 0.000000000003638 = 2^-38.000000, Bn = 0.000000 = 2^-38.000000
[./src/simon-xor-threshold-search.cc:1785] Init bound: 110101 -> 0 = 0.003906 2^-8.000000

[./src/simon-xor-threshold-search.cc:1898] WORD_SIZE 24 NROUNDS 15 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1703] nrounds = 14, Bn_init = 2^-46.000000 : key   77172F   609269   DA30CB   A3A016
[./src/simon-xor-threshold-search.cc:1297]  1: [482 / 483]     4810 ->        0, 2^-4.000000, 2^-46.000000 hw     3xy          1
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-46.000000
pDDT sizes: Dp 483, Dxy 483, p_thres 0.050000 2^-4.321928
0:     4000 ->    10000 0.250000 (2^-2.000000)
1:     1100 ->      400 0.062500 (2^-4.000000)
2:      400 ->      100 0.250000 (2^-2.000000)
3:      100 ->        0 0.250000 (2^-2.000000)
4:        0 ->      100 1.000000 (2^0.000000)
5:      100 ->      400 0.250000 (2^-2.000000)
6:      400 ->     1100 0.250000 (2^-2.000000)
7:     1100 ->     4000 0.062500 (2^-4.000000)
8:     4000 ->    11100 0.250000 (2^-2.000000)
9:    11100 ->    40400 0.015625 (2^-6.000000)
10:    40400 ->   110100 0.062500 (2^-4.000000)
11:   110100 ->   400000 0.015625 (2^-6.000000)
12:   400000 ->   110101 0.250000 (2^-2.000000)
13:   110101 ->    40404 0.003906 (2^-8.000000)
p_tot = 0.000000000000014 = 2^-46.000000, Bn = 0.000000 = 2^-46.000000
[./src/simon-xor-threshold-search.cc:1785] Init bound: 40404 -> 0 = 0.015625 2^-6.000000


[./tests/simon-xor-threshold-search-tests.cc:2523] WORD_SIZE 24 NROUNDS 15 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 2 2^1.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1

real    132m41.246s
user    132m16.284s
sys     0m1.784s


[./tests/simon-xor-threshold-search-tests.cc:2523] WORD_SIZE 24 NROUNDS 15 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 5 SIMON_BACK_TO_HWAY 1

[./src/simon-xor-threshold-search.cc:2005] Best differential for 15 R: (   0    0) -> (   0    0) 2^-inf
[./src/simon-xor-threshold-search.cc:2033] nrounds 15
[./tests/simon-xor-threshold-search-tests.cc:258] Final best trail:
0:    10000 ->    60000 0.250000 (2^-2.000000)
1:     4400 ->     1000 0.062500 (2^-4.000000)
2:     1000 ->      400 0.250000 (2^-2.000000)
3:      400 ->        0 0.250000 (2^-2.000000)
4:        0 ->      400 1.000000 (2^0.000000)
5:      400 ->     1000 0.250000 (2^-2.000000)
6:     1000 ->     4400 0.250000 (2^-2.000000)
7:     4400 ->    50000 0.062500 (2^-4.000000)
8:    50000 ->   1C4400 0.062500 (2^-4.000000)
9:   1C4400 ->    41000 0.001953 (2^-9.000000)
10:    41000 ->    40400 0.062500 (2^-4.000000)
11:    40400 ->   100004 0.062500 (2^-4.000000)
12:   100004 ->   440000 0.062500 (2^-4.000000)
13:   440000 ->        1 0.062500 (2^-4.000000)
14:        1 ->   440004 0.250000 (2^-2.000000)
p_tot = 0.000000000000002 = 2^-49.000000
[./tests/simon-xor-threshold-search-tests.cc:269] Final bounds 15R:
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-46.000000
B[14] = 2^-49.000000
[./tests/simon-xor-threshold-search-tests.cc:2523] WORD_SIZE 24 NROUNDS 15 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 5 SIMON_BACK_TO_HWAY 1

real    669m21.174s
user    667m13.234s
sys     0m14.749s
vpv@igor:~/skcrypto/trunk/work/src/yaarx$ p
p: command not found



 */

/* --- */


		if(Bn < g_B32[next_round]) {
		  Bn_init = g_B32[next_round];

		  for(int i = 0; i < next_round; i++) {
			 B[i] = g_B32[i];
			 trail[i] = g_trail32[i];
		  }
		}

		if(Bn < g_B32[next_round]) {
		  Bn_init = g_B32[next_round];
		  B[next_round] = Bn_init;

		  for(int i = 0; i < next_round; i++) {
			 B[i] = g_B32[i];
			 trail[i] = g_trail32[i];
		  }
		}



/* --- */

	 printf("p_min %41.40f 2^%4.2f\n", p_min, log2(p_min));
	 printf("   Bn %41.40f 2^%4.2f\n", *Bn, log2(*Bn));


/* 

Simon64, 20R, 2^-70, 0.06, max_hw 4: RUN IN TWO PASSES!

[./src/simon-xor-threshold-search.cc:1898] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 4 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1703] nrounds = 20, Bn_init = 2^-72.000000 : key  3C473F8  A52FC33 9362C4CD 8AA268AB
[./src/xdp-rot-and.cc:1365] 1 / 4294967296 : NEW Croad:        0        0 1.00000 2^0.00 | CR size: Dp          1, Dxy          137
[./src/simon-xor-threshold-search.cc:1511] 19 | Update best found Bn: 2^-72.000000 -> 2^-70.000000
[./src/simon-xor-threshold-search.cc:1249]  0: [36 / 674] 40000020 ->        0, 2^-3.000000, 2^-70.0000000 hw     3 Dxy          10
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
B[15] = 2^-54.000000
B[16] = 2^-56.000000
B[17] = 2^-64.000000
B[18] = 2^-68.000000
B[19] = 2^-70.000000
pDDT sizes: Dp 674, Dxy 674, p_thres 0.060000 2^-4.058894
0:  4000000 -> 10000000 0.250000 (2^-2.000000)
1:  1000000 ->        0 0.250000 (2^-2.000000)
2:        0 ->  1000000 1.000000 (2^0.000000)
3:  1000000 ->  4000000 0.250000 (2^-2.000000)
4:  4000000 -> 11000000 0.250000 (2^-2.000000)
5: 11000000 -> 60000000 0.062500 (2^-4.000000)
6: 60000000 -> 51000001 0.062500 (2^-4.000000)
7: 51000001 ->  4000004 0.003906 (2^-8.000000)
8:  4000004 -> 41000011 0.062500 (2^-4.000000)
9: 41000011 ->        0 0.003906 (2^-8.000000)
10:        0 -> 41000011 1.000000 (2^0.000000)
11: 41000011 ->  4000004 0.003906 (2^-8.000000)
12:  4000004 -> 51000001 0.062500 (2^-4.000000)
13: 51000001 -> 60000000 0.003906 (2^-8.000000)
14: 60000000 -> 11000000 0.062500 (2^-4.000000)
15: 11000000 ->  4000000 0.062500 (2^-4.000000)
16:  4000000 ->  1000000 0.250000 (2^-2.000000)
17:  1000000 ->        0 0.250000 (2^-2.000000)
18:        0 ->  1000000 1.000000 (2^0.000000)
19:  1000000 ->  4000000 0.250000 (2^-2.000000)
p_tot = 0.000000000000000 = 2^-70.000000, Bn = 0.000000 = 2^-70.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.250000 (2^-2.000000)  4000000 -> 10000000
EXP  0: 0.249630 (2^-2.002137)  4000000 -> 10000000

THE  1: 0.250000 (2^-2.000000)  1000000 ->        0
EXP  1: 0.249871 (2^-2.000743)  1000000 ->        0

THE  2: 1.000000 (2^0.000000)        0 ->  1000000
EXP  2: 1.000000 (2^0.000000)        0 ->  1000000

THE  3: 0.250000 (2^-2.000000)  1000000 ->  4000000
EXP  3: 0.250669 (2^-1.996147)  1000000 ->  4000000

THE  4: 0.250000 (2^-2.000000)  4000000 -> 11000000
EXP  4: 0.250420 (2^-1.997581)  4000000 -> 11000000

THE  5: 0.062500 (2^-4.000000) 11000000 -> 60000000
EXP  5: 0.062611 (2^-3.997449) 11000000 -> 60000000

THE  6: 0.062500 (2^-4.000000) 60000000 -> 51000001
EXP  6: 0.062819 (2^-3.992666) 60000000 -> 51000001

THE  7: 0.003906 (2^-8.000000) 51000001 ->  4000004
EXP  7: 0.003870 (2^-8.013447) 51000001 ->  4000004

THE  8: 0.062500 (2^-4.000000)  4000004 -> 41000011
EXP  8: 0.062436 (2^-4.001476)  4000004 -> 41000011

THE  9: 0.003906 (2^-8.000000) 41000011 ->        0
EXP  9: 0.003900 (2^-8.002468) 41000011 ->        0

THE 10: 1.000000 (2^0.000000)        0 -> 41000011
EXP 10: 1.000000 (2^0.000000)        0 -> 41000011

THE 11: 0.003906 (2^-8.000000) 41000011 ->  4000004
EXP 11: 0.004000 (2^-7.965889) 41000011 ->  4000004

THE 12: 0.062500 (2^-4.000000)  4000004 -> 51000001
EXP 12: 0.062551 (2^-3.998812)  4000004 -> 51000001

THE 13: 0.003906 (2^-8.000000) 51000001 -> 60000000
EXP 13: 0.003811 (2^-8.035659) 51000001 -> 60000000

THE 14: 0.062500 (2^-4.000000) 60000000 -> 11000000
EXP 14: 0.061938 (2^-4.013025) 60000000 -> 11000000

THE 15: 0.062500 (2^-4.000000) 11000000 ->  4000000
EXP 15: 0.062097 (2^-4.009342) 11000000 ->  4000000

THE 16: 0.250000 (2^-2.000000)  4000000 ->  1000000
EXP 16: 0.250124 (2^-1.999285)  4000000 ->  1000000

THE 17: 0.250000 (2^-2.000000)  1000000 ->        0
EXP 17: 0.250806 (2^-1.995357)  1000000 ->        0

THE 18: 1.000000 (2^0.000000)        0 ->  1000000
EXP 18: 1.000000 (2^0.000000)        0 ->  1000000

THE 19: 0.250000 (2^-2.000000)  1000000 ->  4000000
EXP 19: 0.248992 (2^-2.005829)  1000000 ->  4000000

OK
[./src/simon-xor-threshold-search.cc:1898] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 4 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1902] nrounds = 20
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.250000 (2^-2.000000)  4000000 -> 10000000
EXP  0: 0.249990 (2^-2.000061)  4000000 -> 10000000

THE  1: 0.250000 (2^-2.000000)  1000000 ->        0
EXP  1: 0.250090 (2^-1.999483)  1000000 ->        0

THE  2: 1.000000 (2^0.000000)        0 ->  1000000
EXP  2: 1.000000 (2^0.000000)        0 ->  1000000

THE  3: 0.250000 (2^-2.000000)  1000000 ->  4000000
EXP  3: 0.249438 (2^-2.003245)  1000000 ->  4000000

THE  4: 0.250000 (2^-2.000000)  4000000 -> 11000000
EXP  4: 0.249774 (2^-2.001305)  4000000 -> 11000000

THE  5: 0.062500 (2^-4.000000) 11000000 -> 60000000
EXP  5: 0.062471 (2^-4.000661) 11000000 -> 60000000

THE  6: 0.062500 (2^-4.000000) 60000000 -> 51000001
EXP  6: 0.063091 (2^-3.986416) 60000000 -> 51000001

THE  7: 0.003906 (2^-8.000000) 51000001 ->  4000004
EXP  7: 0.003856 (2^-8.018790) 51000001 ->  4000004

THE  8: 0.062500 (2^-4.000000)  4000004 -> 41000011
EXP  8: 0.062471 (2^-4.000661)  4000004 -> 41000011

THE  9: 0.003906 (2^-8.000000) 41000011 ->        0
EXP  9: 0.003918 (2^-7.995780) 41000011 ->        0

THE 10: 1.000000 (2^0.000000)        0 -> 41000011
EXP 10: 1.000000 (2^0.000000)        0 -> 41000011

THE 11: 0.003906 (2^-8.000000) 41000011 ->  4000004
EXP 11: 0.003945 (2^-7.985631) 41000011 ->  4000004

THE 12: 0.062500 (2^-4.000000)  4000004 -> 51000001
EXP 12: 0.062632 (2^-3.996965)  4000004 -> 51000001

THE 13: 0.003906 (2^-8.000000) 51000001 -> 60000000
EXP 13: 0.003982 (2^-7.972440) 51000001 -> 60000000

THE 14: 0.062500 (2^-4.000000) 60000000 -> 11000000
EXP 14: 0.062617 (2^-3.997295) 60000000 -> 11000000

THE 15: 0.062500 (2^-4.000000) 11000000 ->  4000000
EXP 15: 0.062779 (2^-3.993564) 11000000 ->  4000000

THE 16: 0.250000 (2^-2.000000)  4000000 ->  1000000
EXP 16: 0.249818 (2^-2.001052)  4000000 ->  1000000

THE 17: 0.250000 (2^-2.000000)  1000000 ->        0
EXP 17: 0.249333 (2^-2.003852)  1000000 ->        0

THE 18: 1.000000 (2^0.000000)        0 ->  1000000
EXP 18: 1.000000 (2^0.000000)        0 ->  1000000

THE 19: 0.250000 (2^-2.000000)  1000000 ->  4000000
EXP 19: 0.249644 (2^-2.002054)  1000000 ->  4000000

OK


[./src/simon-xor-threshold-search.cc:347] Verify P of differentials (2^20.000000 CPs)...
Input differences:  4000000 11000000

R# 0 Output differences:  1000000  4000000
THE  1: 0.250000 (2^-2.000000)  4000000 ->  1000000
EXP  1: 0.249818 (2^-2.001052)  4000000 ->  1000000

R# 1 Output differences:        0  1000000
THE  2: 0.062500 (2^-4.000000)  1000000 ->        0
EXP  2: 0.062843 (2^-3.992097)  1000000 ->        0

R# 2 Output differences:  1000000        0
THE  3: 0.062500 (2^-4.000000)        0 ->  1000000
EXP  3: 0.062691 (2^-3.995604)        0 ->  1000000

R# 3 Output differences:  4000000  1000000
THE  4: 0.015625 (2^-6.000000)  1000000 ->  4000000
EXP  4: 0.015624 (2^-6.000088)  1000000 ->  4000000

R# 4 Output differences: 11000000  4000000
THE  5: 0.003906 (2^-8.000000)  4000000 -> 11000000
EXP  5: 0.005992 (2^-7.382762)  4000000 -> 11000000

R# 5 Output differences: 60000000 11000000
THE  6: 0.000244 (2^-12.000000) 11000000 -> 60000000
EXP  6: 0.000641 (2^-10.607683) 11000000 -> 60000000

R# 6 Output differences: 51000001 60000000
THE  7: 0.000015 (2^-16.000000) 60000000 -> 51000001
EXP  7: 0.000040 (2^-14.607683) 60000000 -> 51000001

R# 7 Output differences:  4000004 51000001
THE  8: 0.000000 (2^-24.000000) 51000001 ->  4000004
EXP  8: 0.000000 (2^-inf) 51000001 ->  4000004

R# 8 Output differences: 41000011  4000004
THE  9: 0.000000 (2^-28.000000)  4000004 -> 41000011
EXP  9: 0.000000 (2^-inf)  4000004 -> 41000011

R# 9 Output differences:        0 41000011
THE 10: 0.000000 (2^-36.000000) 41000011 ->        0
EXP 10: 0.000000 (2^-inf) 41000011 ->        0

R#10 Output differences: 41000011        0
THE 11: 0.000000 (2^-36.000000)        0 -> 41000011
EXP 11: 0.000000 (2^-inf)        0 -> 41000011

R#11 Output differences:  4000004 41000011
THE 12: 0.000000 (2^-44.000000) 41000011 ->  4000004
EXP 12: 0.000000 (2^-inf) 41000011 ->  4000004

R#12 Output differences: 51000001  4000004
THE 13: 0.000000 (2^-48.000000)  4000004 -> 51000001
EXP 13: 0.000000 (2^-inf)  4000004 -> 51000001

R#13 Output differences: 60000000 51000001
THE 14: 0.000000 (2^-56.000000) 51000001 -> 60000000
EXP 14: 0.000000 (2^-inf) 51000001 -> 60000000

R#14 Output differences: 11000000 60000000
THE 15: 0.000000 (2^-60.000000) 60000000 -> 11000000
EXP 15: 0.000000 (2^-inf) 60000000 -> 11000000

R#15 Output differences:  4000000 11000000
THE 16: 0.000000 (2^-64.000000) 11000000 ->  4000000
EXP 16: 0.000000 (2^-inf) 11000000 ->  4000000

R#16 Output differences:  1000000  4000000
THE 17: 0.000000 (2^-66.000000)  4000000 ->  1000000
EXP 17: 0.000000 (2^-inf)  4000000 ->  1000000

R#17 Output differences:        0  1000000
THE 18: 0.000000 (2^-68.000000)  1000000 ->        0
EXP 18: 0.000000 (2^-inf)  1000000 ->        0

R#18 Output differences:  1000000        0
THE 19: 0.000000 (2^-68.000000)        0 ->  1000000
EXP 19: 0.000000 (2^-inf)        0 ->  1000000

R#19 Output differences:  4000000  1000000
THE 20: 0.000000 (2^-70.000000)  1000000 ->  4000000
EXP 20: 0.000000 (2^-inf)  1000000 ->  4000000

OK

[./src/simon-xor-threshold-search.cc:2005] Best differential for 20 R: (   0    0) -> (   0    0) 2^-inf

[./tests/simon-xor-threshold-search-tests.cc:2523] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 4 SIMON_BACK_TO_HWAY 1

real    778m27.951s
user    769m38.010s
sys     0m35.802s

 */




/* 

simon64, 19R: -68, 0.06, max hw = 4 bits, starting from  round 17 with 32 Hways and reaching 674 Hways

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
B[15] = 2^-54.000000
B[16] = 2^-56.000000
B[17] = 2^-64.000000
[./src/simon-xor-threshold-search.cc:1511] 18 | Update best found Bn: 2^-72.000000 -> 2^-68.000000

pDDT sizes: Dp 674, Dxy 674, p_thres 0.060000 2^-4.058894
 0: 11010001 -> 44040004 0.003906 (2^-8.000092)
 1: 40000000 -> 11010000 0.250000 (2^-2.000000)
 2: 11010000 ->  4040000 0.015625 (2^-6.000000)
 3:  4040000 ->  1110000 0.062500 (2^-4.000000)
 4:  1110000 ->   400000 0.015625 (2^-6.000000)
 5:   400000 ->   110000 0.250000 (2^-2.000000)
 6:   110000 ->    40000 0.062500 (2^-4.000000)
 7:    40000 ->    10000 0.250000 (2^-2.000000)
 8:    10000 ->        0 0.250000 (2^-2.000000)
 9:        0 ->    10000 1.000000 (2^0.000000)
10:    10000 ->    40000 0.250000 (2^-2.000000)
11:    40000 ->   110000 0.250000 (2^-2.000000)
12:   110000 ->   400000 0.062500 (2^-4.000000)
13:   400000 ->  1110000 0.250000 (2^-2.000000)
14:  1110000 ->  4040000 0.015625 (2^-6.000000)
15:  4040000 -> 11010000 0.062500 (2^-4.000000)
16: 11010000 -> 40000000 0.015625 (2^-6.000000)
17: 40000000 -> 11010001 0.250000 (2^-2.000000)
p_tot = 0.000000000000000 = 2^-64.000092, Bn = 0.000000 = 2^-64.000000
[./src/simon-xor-threshold-search.cc:1785] Init bound: 11010001 -> 0 = 0.003906 2^-8.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.003906 (2^-8.000092) 11010001 -> 44040004
EXP  0: 0.003917 (2^-7.996131) 11010001 -> 44040004

THE  1: 0.250000 (2^-2.000000) 40000000 -> 11010000
EXP  1: 0.250659 (2^-1.996202) 40000000 -> 11010000

THE  2: 0.015625 (2^-6.000000) 11010000 ->  4040000
EXP  2: 0.015454 (2^-6.015849) 11010000 ->  4040000

THE  3: 0.062500 (2^-4.000000)  4040000 ->  1110000
EXP  3: 0.062413 (2^-4.002005)  4040000 ->  1110000

THE  4: 0.015625 (2^-6.000000)  1110000 ->   400000
EXP  4: 0.015594 (2^-6.002909)  1110000 ->   400000

THE  5: 0.250000 (2^-2.000000)   400000 ->   110000
EXP  5: 0.250135 (2^-1.999219)   400000 ->   110000

THE  6: 0.062500 (2^-4.000000)   110000 ->    40000
EXP  6: 0.062273 (2^-4.005249)   110000 ->    40000

THE  7: 0.250000 (2^-2.000000)    40000 ->    10000
EXP  7: 0.250325 (2^-1.998125)    40000 ->    10000

THE  8: 0.250000 (2^-2.000000)    10000 ->        0
EXP  8: 0.249230 (2^-2.004448)    10000 ->        0

THE  9: 1.000000 (2^0.000000)        0 ->    10000
EXP  9: 1.000000 (2^0.000000)        0 ->    10000

THE 10: 0.250000 (2^-2.000000)    10000 ->    40000
EXP 10: 0.250393 (2^-1.997734)    10000 ->    40000

THE 11: 0.250000 (2^-2.000000)    40000 ->   110000
EXP 11: 0.250217 (2^-1.998746)    40000 ->   110000

THE 12: 0.062500 (2^-4.000000)   110000 ->   400000
EXP 12: 0.062838 (2^-3.992228)   110000 ->   400000

THE 13: 0.250000 (2^-2.000000)   400000 ->  1110000
EXP 13: 0.250183 (2^-1.998944)   400000 ->  1110000

THE 14: 0.015625 (2^-6.000000)  1110000 ->  4040000
EXP 14: 0.015965 (2^-5.968988)  1110000 ->  4040000

THE 15: 0.062500 (2^-4.000000)  4040000 -> 11010000
EXP 15: 0.062615 (2^-3.997339)  4040000 -> 11010000

THE 16: 0.015625 (2^-6.000000) 11010000 -> 40000000
EXP 16: 0.015738 (2^-5.989559) 11010000 -> 40000000

THE 17: 0.250000 (2^-2.000000) 40000000 -> 11010001
EXP 17: 0.250251 (2^-1.998553) 40000000 -> 11010001

THE 18: 0.003906 (2^-8.000000) 11010001 ->  4040004
EXP 18: 0.004069 (2^-7.940994) 11010001 ->  4040004

OK
[./src/simon-xor-threshold-search.cc:1898] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 4 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1703] nrounds = 19, Bn_init = 2^-72.000000 : key  3C473F8  A52FC33 9362C4CD 8AA268AB
[./src/xdp-rot-and.cc:1365] 3 / 4294967296 : NEW Croad:  1000000        1 0.25000 2^-2.00 | CR size: Dp          3, Dxy          31
[./src/simon-xor-threshold-search.cc:1297]  1: [591 / 674]  9000000 ->        0, 2^-4.000000, 2^-68.000000 hw     2 Dxy          11
[./src/xdp-rot-and.cc:1365] 1 / 4294967296 : NEW Croad:     1100   100000 0.06250 2^-4.00 | CR size: Dp          1, Dxy          1
[./src/simon-xor-threshold-search.cc:1511] 18 | Update best found Bn: 2^-72.000000 -> 2^-68.000000


 */


/* 

Currently runnig: Simon48, 15 R, -49, 32 bit limit


[./src/simon-xor-threshold-search.cc:1898] WORD_SIZE 24 NROUNDS 15 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1703] nrounds = 13, Bn_init = 2^-44.000000 : key   77172F   609269   DA30CB   A3A016
[./src/xdp-rot-and.cc:1365] 7 / 16777216 : NEW Croad:    20022        2 0.01562 2^-6.00 | CR size: Dp          7, Dxy          702
[./src/simon-xor-threshold-search.cc:1511] 12 | Update best found Bn: 2^-44.000000 -> 2^-38.000000
[./src/simon-xor-threshold-search.cc:1249]  0: [33 / 483]   810000 ->        0, 2^-3.000000, 2^-38.0000000 hw     3xy          114
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
pDDT sizes: Dp 483, Dxy 483, p_thres 0.050000 2^-4.321928
0:     4000 ->    10000 0.250000 (2^-2.000000)
1:     1100 ->      400 0.062500 (2^-4.000000)
2:      400 ->      100 0.250000 (2^-2.000000)
3:      100 ->        0 0.250000 (2^-2.000000)
4:        0 ->      100 1.000000 (2^0.000000)
5:      100 ->      400 0.250000 (2^-2.000000)
6:      400 ->     1100 0.250000 (2^-2.000000)
7:     1100 ->     4000 0.062500 (2^-4.000000)
8:     4000 ->    11100 0.250000 (2^-2.000000)
9:    11100 ->    40400 0.015625 (2^-6.000000)
10:    40400 ->   110100 0.062500 (2^-4.000000)
11:   110100 ->   400000 0.015625 (2^-6.000000)
12:   400000 ->   110101 0.250000 (2^-2.000000)
p_tot = 0.000000000003638 = 2^-38.000000, Bn = 0.000000 = 2^-38.000000
[./src/simon-xor-threshold-search.cc:1785] Init bound: 110101 -> 0 = 0.003906 2^-8.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.250000 (2^-2.000000)     4000 ->    10000
EXP  0: 0.249905 (2^-2.000550)     4000 ->    10000

THE  1: 0.062500 (2^-4.000000)     1100 ->      400
EXP  1: 0.063286 (2^-3.981974)     1100 ->      400

THE  2: 0.250000 (2^-2.000000)      400 ->      100
EXP  2: 0.250578 (2^-1.996669)      400 ->      100

THE  3: 0.250000 (2^-2.000000)      100 ->        0
EXP  3: 0.250715 (2^-1.995878)      100 ->        0

THE  4: 1.000000 (2^0.000000)        0 ->      100
EXP  4: 1.000000 (2^0.000000)        0 ->      100

THE  5: 0.250000 (2^-2.000000)      100 ->      400
EXP  5: 0.249913 (2^-2.000501)      100 ->      400

THE  6: 0.250000 (2^-2.000000)      400 ->     1100
EXP  6: 0.250280 (2^-1.998383)      400 ->     1100

THE  7: 0.062500 (2^-4.000000)     1100 ->     4000
EXP  7: 0.062259 (2^-4.005580)     1100 ->     4000

THE  8: 0.250000 (2^-2.000000)     4000 ->    11100
EXP  8: 0.250798 (2^-1.995401)     4000 ->    11100

THE  9: 0.015625 (2^-6.000000)    11100 ->    40400
EXP  9: 0.015587 (2^-6.003527)    11100 ->    40400

THE 10: 0.062500 (2^-4.000000)    40400 ->   110100
EXP 10: 0.062371 (2^-4.002975)    40400 ->   110100

THE 11: 0.015625 (2^-6.000000)   110100 ->   400000
EXP 11: 0.015653 (2^-5.997449)   110100 ->   400000

THE 12: 0.250000 (2^-2.000000)   400000 ->   110101
EXP 12: 0.250183 (2^-1.998944)   400000 ->   110101

THE 13: 0.003906 (2^-8.000000)   110101 ->    40404
EXP 13: 0.003885 (2^-8.007770)   110101 ->    40404

OK
[./src/simon-xor-threshold-search.cc:1898] WORD_SIZE 24 NROUNDS 15 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1703] nrounds = 14, Bn_init = 2^-46.000000 : key   77172F   609269   DA30CB   A3A016
[./src/xdp-rot-and.cc:1365] 36 / 16777216 : NEW Croad:    D0001   100105 0.00391 2^-8.00 | CR size: Dp         36, Dxy         366


 */

/* 

Simon48, 15 R, -49, 5 bit limit

[./src/simon-xor-threshold-search.cc:1898] WORD_SIZE 24 NROUNDS 15 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 5 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1703] nrounds = 15, Bn_init = 2^-52.000000 : key    94628   2F41F3   6B5878   F2D154
[./src/simon-xor-threshold-search.cc:1511] 14 | Update best found Bn: 2^-52.000000 -> 2^-50.000000
[./src/simon-xor-threshold-search.cc:1511] 14 | Update best found Bn: 2^-50.000000 -> 2^-49.000000
[./src/simon-xor-threshold-search.cc:1249]  0: [83 / 483]   C00000 ->        0, 2^-4.000000, 2^-49.000000      1, Dxy          135
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-46.000000
B[14] = 2^-49.000000
pDDT sizes: Dp 483, Dxy 483, p_thres 0.050000 2^-4.321928
0:    10000 ->    60000 0.250000 (2^-2.000000)
1:     4400 ->     1000 0.062500 (2^-4.000000)
2:     1000 ->      400 0.250000 (2^-2.000000)
3:      400 ->        0 0.250000 (2^-2.000000)
4:        0 ->      400 1.000000 (2^0.000000)
5:      400 ->     1000 0.250000 (2^-2.000000)
6:     1000 ->     4400 0.250000 (2^-2.000000)
7:     4400 ->    50000 0.062500 (2^-4.000000)
8:    50000 ->   1C4400 0.062500 (2^-4.000000)
9:   1C4400 ->    41000 0.001953 (2^-9.000000)
10:    41000 ->    40400 0.062500 (2^-4.000000)
11:    40400 ->   100004 0.062500 (2^-4.000000)
12:   100004 ->   440000 0.062500 (2^-4.000000)
13:   440000 ->        1 0.062500 (2^-4.000000)
14:        1 ->   440004 0.250000 (2^-2.000000)
p_tot = 0.000000000000002 = 2^-49.000000, Bn = 0.000000 = 2^-49.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.250000 (2^-2.000000)    10000 ->    60000
EXP  0: 0.249481 (2^-2.002997)    10000 ->    60000

THE  1: 0.062500 (2^-4.000000)     4400 ->     1000
EXP  1: 0.062620 (2^-3.997229)     4400 ->     1000

THE  2: 0.250000 (2^-2.000000)     1000 ->      400
EXP  2: 0.250718 (2^-1.995862)     1000 ->      400

THE  3: 0.250000 (2^-2.000000)      400 ->        0
EXP  3: 0.250245 (2^-1.998586)      400 ->        0

THE  4: 1.000000 (2^0.000000)        0 ->      400
EXP  4: 1.000000 (2^0.000000)        0 ->      400

THE  5: 0.250000 (2^-2.000000)      400 ->     1000
EXP  5: 0.250053 (2^-1.999692)      400 ->     1000

THE  6: 0.250000 (2^-2.000000)     1000 ->     4400
EXP  6: 0.249304 (2^-2.004023)     1000 ->     4400

THE  7: 0.062500 (2^-4.000000)     4400 ->    50000
EXP  7: 0.062905 (2^-3.990674)     4400 ->    50000

THE  8: 0.062500 (2^-4.000000)    50000 ->   1C4400
EXP  8: 0.062796 (2^-3.993192)    50000 ->   1C4400

THE  9: 0.001953 (2^-9.000000)   1C4400 ->    41000
EXP  9: 0.001967 (2^-8.989472)   1C4400 ->    41000

THE 10: 0.062500 (2^-4.000000)    41000 ->    40400
EXP 10: 0.062236 (2^-4.006111)    41000 ->    40400

THE 11: 0.062500 (2^-4.000000)    40400 ->   100004
EXP 11: 0.062451 (2^-4.001123)    40400 ->   100004

THE 12: 0.062500 (2^-4.000000)   100004 ->   440000
EXP 12: 0.062131 (2^-4.008545)   100004 ->   440000

THE 13: 0.062500 (2^-4.000000)   440000 ->        1
EXP 13: 0.062903 (2^-3.990718)   440000 ->        1

THE 14: 0.250000 (2^-2.000000)        1 ->   440004
EXP 14: 0.249701 (2^-2.001729)        1 ->   440004

THE 15: 0.007812 (2^-7.000000)        0 ->        B
EXP 15: 0.000000 (2^-inf)        0 ->        B

OK


[./src/simon-xor-threshold-search.cc:347] Verify P of differentials (2^20.000000 CPs)...
Input differences:    10000    64400

R# 0 Output differences:     4400    10000
THE  1: 0.250000 (2^-2.000000)    10000 ->     4400
EXP  1: 0.250187 (2^-1.998922)    10000 ->     4400

R# 1 Output differences:     1000     4400
THE  2: 0.015625 (2^-6.000000)     4400 ->     1000
EXP  2: 0.015673 (2^-5.995604)     4400 ->     1000

R# 2 Output differences:      400     1000
THE  3: 0.003906 (2^-8.000000)     1000 ->      400
EXP  3: 0.003870 (2^-8.013447)     1000 ->      400

R# 3 Output differences:        0      400
THE  4: 0.000977 (2^-10.000000)      400 ->        0
EXP  4: 0.001568 (2^-9.317005)      400 ->        0

R# 4 Output differences:      400        0
THE  5: 0.000977 (2^-10.000000)        0 ->      400
EXP  5: 0.001555 (2^-9.328459)        0 ->      400

R# 5 Output differences:     1000      400
THE  6: 0.000244 (2^-12.000000)      400 ->     1000
EXP  6: 0.000398 (2^-11.296096)      400 ->     1000

R# 6 Output differences:     4400     1000
THE  7: 0.000061 (2^-14.000000)     1000 ->     4400
EXP  7: 0.000172 (2^-12.508147)     1000 ->     4400

R# 7 Output differences:    50000     4400
THE  8: 0.000004 (2^-18.000000)     4400 ->    50000
EXP  8: 0.000012 (2^-16.299560)     4400 ->    50000

R# 8 Output differences:   1C4400    50000
THE  9: 0.000000 (2^-22.000000)    50000 ->   1C4400
EXP  9: 0.000007 (2^-17.192645)    50000 ->   1C4400

R# 9 Output differences:    41000   1C4400
THE 10: 0.000000 (2^-31.000000)   1C4400 ->    41000
EXP 10: 0.000000 (2^-inf)   1C4400 ->    41000

R#10 Output differences:    40400    41000
THE 11: 0.000000 (2^-35.000000)    41000 ->    40400
EXP 11: 0.000000 (2^-inf)    41000 ->    40400

R#11 Output differences:   100004    40400
THE 12: 0.000000 (2^-39.000000)    40400 ->   100004
EXP 12: 0.000000 (2^-inf)    40400 ->   100004

R#12 Output differences:   440000   100004
THE 13: 0.000000 (2^-43.000000)   100004 ->   440000
EXP 13: 0.000000 (2^-inf)   100004 ->   440000

R#13 Output differences:        1   440000
THE 14: 0.000000 (2^-47.000000)   440000 ->        1
EXP 14: 0.000000 (2^-inf)   440000 ->        1

R#14 Output differences:   440004        1
THE 15: 0.000000 (2^-49.000000)        1 ->   440004
EXP 15: 0.000000 (2^-inf)        1 ->   440004

OK

[./src/simon-xor-threshold-search.cc:2005] Best differential for 15 R: (   0    0) -> (   0    0) 2^-inf
[./src/simon-xor-threshold-search.cc:2033] nrounds 15
[./tests/simon-xor-threshold-search-tests.cc:258] Final best trail:
0:    10000 ->    60000 0.250000 (2^-2.000000)
1:     4400 ->     1000 0.062500 (2^-4.000000)
2:     1000 ->      400 0.250000 (2^-2.000000)
3:      400 ->        0 0.250000 (2^-2.000000)
4:        0 ->      400 1.000000 (2^0.000000)
5:      400 ->     1000 0.250000 (2^-2.000000)
6:     1000 ->     4400 0.250000 (2^-2.000000)
7:     4400 ->    50000 0.062500 (2^-4.000000)
8:    50000 ->   1C4400 0.062500 (2^-4.000000)
9:   1C4400 ->    41000 0.001953 (2^-9.000000)
10:    41000 ->    40400 0.062500 (2^-4.000000)
11:    40400 ->   100004 0.062500 (2^-4.000000)
12:   100004 ->   440000 0.062500 (2^-4.000000)
13:   440000 ->        1 0.062500 (2^-4.000000)
14:        1 ->   440004 0.250000 (2^-2.000000)
p_tot = 0.000000000000002 = 2^-49.000000
[./tests/simon-xor-threshold-search-tests.cc:269] Final bounds 15R:
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-46.000000
B[14] = 2^-49.000000
[./tests/simon-xor-threshold-search-tests.cc:2523] WORD_SIZE 24 NROUNDS 15 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 5 SIMON_BACK_TO_HWAY 1

real    669m21.174s
user    667m13.234s
sys     0m14.749s
vpv@igor:~/skcrypto/trunk/work/src/yaarx$ p


 */


/* 

Simon64: 20R, 0.12, 8 bits

[./src/simon-xor-threshold-search.cc:1871] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.120000 2^-3.058894 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 8 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1680] nrounds = 20, Bn_init = 2^-80.000000 : key C9489392 500DB238 EA8D1394 94FFF96F
[./src/xdp-rot-and.cc:1365] 1 / 4294967296 : NEW Croad:        1        0 0.25000 2^-2.00 | CR size: Dp          1, Dxy          11
[./src/simon-xor-threshold-search.cc:1503] 19 | Update best found Bn: 2^-80.000000 -> 2^-78.000000
[./src/simon-xor-threshold-search.cc:1249]  0: [121 / 122]  4440044 ->        0, 2^-10.000000, 2^-78.000000hw     4 Dxy          1
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
B[15] = 2^-54.000000
B[16] = 2^-56.000000
B[17] = 2^-64.000000
B[18] = 2^-70.000000
B[19] = 2^-78.000000
pDDT sizes: Dp 122, Dxy 122, p_thres 0.120000 2^-3.058894
0: 44040004 -> 10100011 0.003906 (2^-8.000000)
1:        1 -> 44040000 0.250000 (2^-2.000000)
2: 44040000 -> 10100000 0.015625 (2^-6.000000)
3: 10100000 ->  4440000 0.062500 (2^-4.000000)
4:  4440000 ->  1000000 0.015625 (2^-6.000000)
5:  1000000 ->   440000 0.250000 (2^-2.000000)
6:   440000 ->   100000 0.062500 (2^-4.000000)
7:   100000 ->    40000 0.250000 (2^-2.000000)
8:    40000 ->        0 0.250000 (2^-2.000000)
9:        0 ->    40000 1.000000 (2^0.000000)
10:    40000 ->   100000 0.250000 (2^-2.000000)
11:   100000 ->   440000 0.250000 (2^-2.000000)
12:   440000 ->  1000000 0.062500 (2^-4.000000)
13:  1000000 ->  4440000 0.250000 (2^-2.000000)
14:  4440000 -> 10100000 0.015625 (2^-6.000000)
15: 10100000 -> 44040000 0.062500 (2^-4.000000)
16: 44040000 ->        1 0.015625 (2^-6.000000)
17:        1 -> 44040004 0.250000 (2^-2.000000)
18: 44040004 -> 10100010 0.003906 (2^-8.000000)
19: 10100010 ->  4440044 0.015625 (2^-6.000000)
p_tot = 0.000000000000000 = 2^-78.000000, Bn = 0.000000 = 2^-78.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.003906 (2^-8.000000) 44040004 -> 10100011
EXP  0: 0.003847 (2^-8.022005) 44040004 -> 10100011

THE  1: 0.250000 (2^-2.000000)        1 -> 44040000
EXP  1: 0.250311 (2^-1.998207)        1 -> 44040000

THE  2: 0.015625 (2^-6.000000) 44040000 -> 10100000
EXP  2: 0.015870 (2^-5.977545) 44040000 -> 10100000

THE  3: 0.062500 (2^-4.000000) 10100000 ->  4440000
EXP  3: 0.062408 (2^-4.002115) 10100000 ->  4440000

THE  4: 0.015625 (2^-6.000000)  4440000 ->  1000000
EXP  4: 0.015611 (2^-6.001321)  4440000 ->  1000000

THE  5: 0.250000 (2^-2.000000)  1000000 ->   440000
EXP  5: 0.249736 (2^-2.001525)  1000000 ->   440000

THE  6: 0.062500 (2^-4.000000)   440000 ->   100000
EXP  6: 0.062072 (2^-4.009918)   440000 ->   100000

THE  7: 0.250000 (2^-2.000000)   100000 ->    40000
EXP  7: 0.249251 (2^-2.004327)   100000 ->    40000

THE  8: 0.250000 (2^-2.000000)    40000 ->        0
EXP  8: 0.250459 (2^-1.997355)    40000 ->        0

THE  9: 1.000000 (2^0.000000)        0 ->    40000
EXP  9: 1.000000 (2^0.000000)        0 ->    40000

THE 10: 0.250000 (2^-2.000000)    40000 ->   100000
EXP 10: 0.249622 (2^-2.002181)    40000 ->   100000

THE 11: 0.250000 (2^-2.000000)   100000 ->   440000
EXP 11: 0.249759 (2^-2.001393)   100000 ->   440000

THE 12: 0.062500 (2^-4.000000)   440000 ->  1000000
EXP 12: 0.062659 (2^-3.996328)   440000 ->  1000000

THE 13: 0.250000 (2^-2.000000)  1000000 ->  4440000
EXP 13: 0.249599 (2^-2.002313)  1000000 ->  4440000

THE 14: 0.015625 (2^-6.000000)  4440000 -> 10100000
EXP 14: 0.015719 (2^-5.991309)  4440000 -> 10100000

THE 15: 0.062500 (2^-4.000000) 10100000 -> 44040000
EXP 15: 0.062102 (2^-4.009209) 10100000 -> 44040000

THE 16: 0.015625 (2^-6.000000) 44040000 ->        1
EXP 16: 0.015818 (2^-5.982322) 44040000 ->        1

THE 17: 0.250000 (2^-2.000000)        1 -> 44040004
EXP 17: 0.249727 (2^-2.001575)        1 -> 44040004

THE 18: 0.003906 (2^-8.000000) 44040004 -> 10100010
EXP 18: 0.003939 (2^-7.988074) 44040004 -> 10100010

THE 19: 0.015625 (2^-6.000000) 10100010 ->  4440044
EXP 19: 0.015724 (2^-5.990871) 10100010 ->  4440044

OK
[./src/simon-xor-threshold-search.cc:1871] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.120000 2^-3.058894 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 8 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1875] nrounds = 20
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.003906 (2^-8.000000) 44040004 -> 10100011
EXP  0: 0.003979 (2^-7.973477) 44040004 -> 10100011

THE  1: 0.250000 (2^-2.000000)        1 -> 44040000
EXP  1: 0.249614 (2^-2.002231)        1 -> 44040000

THE  2: 0.015625 (2^-6.000000) 44040000 -> 10100000
EXP  2: 0.015568 (2^-6.005293) 44040000 -> 10100000

THE  3: 0.062500 (2^-4.000000) 10100000 ->  4440000
EXP  3: 0.062245 (2^-4.005890) 10100000 ->  4440000

THE  4: 0.015625 (2^-6.000000)  4440000 ->  1000000
EXP  4: 0.015629 (2^-5.999648)  4440000 ->  1000000

THE  5: 0.250000 (2^-2.000000)  1000000 ->   440000
EXP  5: 0.249426 (2^-2.003317)  1000000 ->   440000

THE  6: 0.062500 (2^-4.000000)   440000 ->   100000
EXP  6: 0.061996 (2^-4.011693)   440000 ->   100000

THE  7: 0.250000 (2^-2.000000)   100000 ->    40000
EXP  7: 0.249831 (2^-2.000974)   100000 ->    40000

THE  8: 0.250000 (2^-2.000000)    40000 ->        0
EXP  8: 0.249893 (2^-2.000617)    40000 ->        0

THE  9: 1.000000 (2^0.000000)        0 ->    40000
EXP  9: 1.000000 (2^0.000000)        0 ->    40000

THE 10: 0.250000 (2^-2.000000)    40000 ->   100000
EXP 10: 0.250064 (2^-1.999631)    40000 ->   100000

THE 11: 0.250000 (2^-2.000000)   100000 ->   440000
EXP 11: 0.249279 (2^-2.004167)   100000 ->   440000

THE 12: 0.062500 (2^-4.000000)   440000 ->  1000000
EXP 12: 0.062579 (2^-3.998174)   440000 ->  1000000

THE 13: 0.250000 (2^-2.000000)  1000000 ->  4440000
EXP 13: 0.249019 (2^-2.005674)  1000000 ->  4440000

THE 14: 0.015625 (2^-6.000000)  4440000 -> 10100000
EXP 14: 0.015805 (2^-5.983453)  4440000 -> 10100000

THE 15: 0.062500 (2^-4.000000) 10100000 -> 44040000
EXP 15: 0.062305 (2^-4.004498) 10100000 -> 44040000

THE 16: 0.015625 (2^-6.000000) 44040000 ->        1
EXP 16: 0.015618 (2^-6.000617) 44040000 ->        1

THE 17: 0.250000 (2^-2.000000)        1 -> 44040004
EXP 17: 0.250497 (2^-1.997136)        1 -> 44040004

THE 18: 0.003906 (2^-8.000000) 44040004 -> 10100010
EXP 18: 0.003878 (2^-8.010606) 44040004 -> 10100010

THE 19: 0.015625 (2^-6.000000) 10100010 ->  4440044
EXP 19: 0.015481 (2^-6.013358) 10100010 ->  4440044

OK

[./src/simon-xor-threshold-search.cc:347] Verify P of differentials (2^20.000000 CPs)...
Input differences: 44040004 10100010

R# 0 Output differences:        1 44040004
THE  1: 0.003906 (2^-8.000000) 44040004 ->        1
EXP  1: 0.003888 (2^-8.006708) 44040004 ->        1

R# 1 Output differences: 44040000        1
THE  2: 0.000977 (2^-10.000000)        1 -> 44040000
EXP  2: 0.000950 (2^-10.039998)        1 -> 44040000

R# 2 Output differences: 10100000 44040000
THE  3: 0.000015 (2^-16.000000) 44040000 -> 10100000
EXP  3: 0.000034 (2^-14.830075) 44040000 -> 10100000

R# 3 Output differences:  4440000 10100000
THE  4: 0.000001 (2^-20.000000) 10100000 ->  4440000
EXP  4: 0.000001 (2^-20.000000) 10100000 ->  4440000

R# 4 Output differences:  1000000  4440000
THE  5: 0.000000 (2^-26.000000)  4440000 ->  1000000
EXP  5: 0.000000 (2^-inf)  4440000 ->  1000000

R# 5 Output differences:   440000  1000000
THE  6: 0.000000 (2^-28.000000)  1000000 ->   440000
EXP  6: 0.000000 (2^-inf)  1000000 ->   440000

R# 6 Output differences:   100000   440000
THE  7: 0.000000 (2^-32.000000)   440000 ->   100000
EXP  7: 0.000000 (2^-inf)   440000 ->   100000

R# 7 Output differences:    40000   100000
THE  8: 0.000000 (2^-34.000000)   100000 ->    40000
EXP  8: 0.000000 (2^-inf)   100000 ->    40000

R# 8 Output differences:        0    40000
THE  9: 0.000000 (2^-36.000000)    40000 ->        0
EXP  9: 0.000000 (2^-inf)    40000 ->        0

R# 9 Output differences:    40000        0
THE 10: 0.000000 (2^-36.000000)        0 ->    40000
EXP 10: 0.000000 (2^-inf)        0 ->    40000

R#10 Output differences:   100000    40000
THE 11: 0.000000 (2^-38.000000)    40000 ->   100000
EXP 11: 0.000000 (2^-inf)    40000 ->   100000

R#11 Output differences:   440000   100000
THE 12: 0.000000 (2^-40.000000)   100000 ->   440000
EXP 12: 0.000000 (2^-inf)   100000 ->   440000

R#12 Output differences:  1000000   440000
THE 13: 0.000000 (2^-44.000000)   440000 ->  1000000
EXP 13: 0.000000 (2^-inf)   440000 ->  1000000

R#13 Output differences:  4440000  1000000
THE 14: 0.000000 (2^-46.000000)  1000000 ->  4440000
EXP 14: 0.000000 (2^-inf)  1000000 ->  4440000

R#14 Output differences: 10100000  4440000
THE 15: 0.000000 (2^-52.000000)  4440000 -> 10100000
EXP 15: 0.000000 (2^-inf)  4440000 -> 10100000

R#15 Output differences: 44040000 10100000
THE 16: 0.000000 (2^-56.000000) 10100000 -> 44040000
EXP 16: 0.000000 (2^-inf) 10100000 -> 44040000

R#16 Output differences:        1 44040000
THE 17: 0.000000 (2^-62.000000) 44040000 ->        1
EXP 17: 0.000000 (2^-inf) 44040000 ->        1

R#17 Output differences: 44040004        1
THE 18: 0.000000 (2^-64.000000)        1 -> 44040004
EXP 18: 0.000000 (2^-inf)        1 -> 44040004

R#18 Output differences: 10100010 44040004
THE 19: 0.000000 (2^-72.000000) 44040004 -> 10100010
EXP 19: 0.000000 (2^-inf) 44040004 -> 10100010

R#19 Output differences:  4440044 10100010
THE 20: 0.000000 (2^-78.000000) 10100010 ->  4440044
EXP 20: 0.000000 (2^-inf) 10100010 ->  4440044

OK

[./tests/simon-xor-threshold-search-tests.cc:2523] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.120000 2^-3.058894 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 8 SIMON_BACK_TO_HWAY 1

real    427m53.075s
user    426m37.700s
sys     0m1.456s

 */

/* 

Simon32: 13R

Confirming the result of Abed et al. on Simon32 using threshold-search:

[./src/simon-xor-threshold-search.cc:1898] WORD_SIZE 16 NROUNDS 15 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1703] nrounds = 15, Bn_init = 2^-40.000000 : key     EFAE     CEF0     3A73     969E
[./src/simon-xor-threshold-search.cc:1249]  0: [25 / 280]     A000 ->        0, 2^-3.000000, 2^-40.000000 hw     2          18
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-25.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
B[12] = 2^-36.000000
B[13] = 2^-38.000000
B[14] = 2^-40.000000
pDDT sizes: Dp 280, Dxy 280, p_thres 0.050000 2^-4.321928
 0:        0 ->        0 1.000000 (2^-0.000000)
 1:       40 ->      100 0.250000 (2^-2.000000)
 2:      100 ->      440 0.250000 (2^-2.000000)
 3:      440 ->     1000 0.062500 (2^-4.000000)
 4:     1000 ->     4440 0.250000 (2^-2.000000)
 5:     4440 ->      101 0.015625 (2^-6.000000)
 6:      101 ->     4044 0.062500 (2^-4.000000)
 7:     4044 ->       10 0.015625 (2^-6.000000)
 8:       10 ->     4004 0.250000 (2^-2.000000)
 9:     4004 ->        1 0.062500 (2^-4.000000)
10:        1 ->     4000 0.250000 (2^-2.000000)
11:     4000 ->        0 0.250000 (2^-2.000000)
12:        0 ->     4000 1.000000 (2^-0.000000)
13:     4000 ->        1 0.250000 (2^-2.000000)
14:        1 ->     4004 0.250000 (2^-2.000000)
p_tot = 0.000000000000909 = 2^-40.000000, Bn = 0.000000 = 2^-40.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 1.000000 (2^0.000000)        0 ->        0
EXP  0: 1.000000 (2^0.000000)        0 ->        0

THE  1: 0.250000 (2^-2.000000)       40 ->      100
EXP  1: 0.249885 (2^-2.000666)       40 ->      100

THE  2: 0.250000 (2^-2.000000)      100 ->      440
EXP  2: 0.249181 (2^-2.004735)      100 ->      440

THE  3: 0.062500 (2^-4.000000)      440 ->     1000
EXP  3: 0.062613 (2^-3.997383)      440 ->     1000

THE  4: 0.250000 (2^-2.000000)     1000 ->     4440
EXP  4: 0.250419 (2^-1.997586)     1000 ->     4440

THE  5: 0.015625 (2^-6.000000)     4440 ->      101
EXP  5: 0.015654 (2^-5.997361)     4440 ->      101

THE  6: 0.062500 (2^-4.000000)      101 ->     4044
EXP  6: 0.062174 (2^-4.007548)      101 ->     4044

THE  7: 0.015625 (2^-6.000000)     4044 ->       10
EXP  7: 0.015644 (2^-5.998240)     4044 ->       10

THE  8: 0.250000 (2^-2.000000)       10 ->     4004
EXP  8: 0.249695 (2^-2.001762)       10 ->     4004

THE  9: 0.062500 (2^-4.000000)     4004 ->        1
EXP  9: 0.062922 (2^-3.990281)     4004 ->        1

THE 10: 0.250000 (2^-2.000000)        1 ->     4000
EXP 10: 0.249339 (2^-2.003819)        1 ->     4000

THE 11: 0.250000 (2^-2.000000)     4000 ->        0
EXP 11: 0.249902 (2^-2.000567)     4000 ->        0

THE 12: 1.000000 (2^0.000000)        0 ->     4000
EXP 12: 1.000000 (2^0.000000)        0 ->     4000

THE 13: 0.250000 (2^-2.000000)     4000 ->        1
EXP 13: 0.249919 (2^-2.000468)     4000 ->        1

THE 14: 0.250000 (2^-2.000000)        1 ->     4004
EXP 14: 0.250378 (2^-1.997822)        1 ->     4004


OK

[./src/simon-xor-threshold-search.cc:347] Verify P of differentials (2^20.000000 CPs)...
Input differences:        0       40

R# 0 Output differences:       40        0
THE  1: 1.000000 (2^0.000000)        0 ->       40
EXP  1: 1.000000 (2^0.000000)        0 ->       40

R# 1 Output differences:      100       40
THE  2: 0.250000 (2^-2.000000)       40 ->      100
EXP  2: 0.249899 (2^-2.000583)       40 ->      100

R# 2 Output differences:      440      100
THE  3: 0.062500 (2^-4.000000)      100 ->      440
EXP  3: 0.062188 (2^-4.007217)      100 ->      440

R# 3 Output differences:     1000      440
THE  4: 0.003906 (2^-8.000000)      440 ->     1000
EXP  4: 0.006214 (2^-7.330229)      440 ->     1000

R# 4 Output differences:     4440     1000
THE  5: 0.000977 (2^-10.000000)     1000 ->     4440
EXP  5: 0.001555 (2^-9.328459)     1000 ->     4440

R# 5 Output differences:      101     4440
THE  6: 0.000015 (2^-16.000000)     4440 ->      101
EXP  6: 0.000035 (2^-14.790547)     4440 ->      101

R# 6 Output differences:     4044      101
THE  7: 0.000001 (2^-20.000000)      101 ->     4044
EXP  7: 0.000003 (2^-18.415037)      101 ->     4044

R# 7 Output differences:       10     4044
THE  8: 0.000000 (2^-26.000000)     4044 ->       10
EXP  8: 0.000000 (2^-inf)     4044 ->       10

R# 8 Output differences:     4004       10
THE  9: 0.000000 (2^-28.000000)       10 ->     4004
EXP  9: 0.000000 (2^-inf)       10 ->     4004

R# 9 Output differences:        1     4004
THE 10: 0.000000 (2^-32.000000)     4004 ->        1
EXP 10: 0.000000 (2^-inf)     4004 ->        1

R#10 Output differences:     4000        1
THE 11: 0.000000 (2^-34.000000)        1 ->     4000
EXP 11: 0.000000 (2^-inf)        1 ->     4000

R#11 Output differences:        0     4000
THE 12: 0.000000 (2^-36.000000)     4000 ->        0
EXP 12: 0.000000 (2^-inf)     4000 ->        0

R#12 Output differences:     4000        0
THE 13: 0.000000 (2^-36.000000)        0 ->     4000
EXP 13: 0.000000 (2^-inf)        0 ->     4000

R#13 Output differences:        1     4000
THE 14: 0.000000 (2^-38.000000)     4000 ->        1
EXP 14: 0.000000 (2^-inf)     4000 ->        1

R#14 Output differences:     4004        1
THE 15: 0.000000 (2^-40.000000)        1 ->     4004
EXP 15: 0.000000 (2^-inf)        1 ->     4004

OK

[./src/simon-xor-threshold-search.cc:2005] Best differential for 15 R: (   0    0) -> (   0    0) 2^-inf
[./src/simon-xor-threshold-search.cc:2033] nrounds 15
[./tests/simon-xor-threshold-search-tests.cc:258] Final best trail:
 0:        0 ->        0 1.000000 (2^0.000000)
 1:       40 ->      100 0.250000 (2^-2.000000)
 2:      100 ->      440 0.250000 (2^-2.000000)
 3:      440 ->     1000 0.062500 (2^-4.000000)
 4:     1000 ->     4440 0.250000 (2^-2.000000)
 5:     4440 ->      101 0.015625 (2^-6.000000)
 6:      101 ->     4044 0.062500 (2^-4.000000)
 7:     4044 ->       10 0.015625 (2^-6.000000)
 8:       10 ->     4004 0.250000 (2^-2.000000)
 9:     4004 ->        1 0.062500 (2^-4.000000)
10:        1 ->     4000 0.250000 (2^-2.000000)
11:     4000 ->        0 0.250000 (2^-2.000000)
12:        0 ->     4000 1.000000 (2^0.000000)
13:     4000 ->        1 0.250000 (2^-2.000000)
14:        1 ->     4004 0.250000 (2^-2.000000)
p_tot = 0.000000000000909 = 2^-40.000000
[./tests/simon-xor-threshold-search-tests.cc:269] Final bounds 15R:
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-25.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
B[12] = 2^-36.000000
B[13] = 2^-38.000000
B[14] = 2^-40.000000
[./tests/simon-xor-threshold-search-tests.cc:2523] WORD_SIZE 16 NROUNDS 15 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1

real    206m32.393s
user    205m38.983s
sys     0m5.764s
vpv@igor:~/skcrypto/trunk/work/src/yaarx$ p


 */


/* ---- */

/* 
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-25.000000
B[10] = 2^-30.000000
pDDT sizes: Dp 280, Dxy 280, p_thres 0.050000 2^-4.321928
 0:       40 ->      100 0.250000 (2^-2.000000)
 1:       11 ->        4 0.062500 (2^-4.000000)
 2:        4 ->        1 0.250000 (2^-2.000000)
 3:        1 ->        0 0.250000 (2^-2.000000)
 4:        0 ->        1 1.000000 (2^0.000000)
 5:        1 ->      104 0.250000 (2^-2.000000)
 6:      104 ->      410 0.062500 (2^-4.000000)
 7:      410 ->      140 0.062500 (2^-4.000000)
 8:      140 ->      110 0.125000 (2^-3.000000)
 9:      110 ->      500 0.062500 (2^-4.000000)
10:      500 ->     1510 0.125000 (2^-3.000000)
p_tot = 0.000000000931323 = 2^-30.000000, Bn = 0.000000 = 2^-30.000000
[./src/simon-xor-threshold-search.cc:1785] Init bound: 1510 -> 0 = 0.015625 2^-6.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.250000 (2^-2.000000)       40 ->      100
EXP  0: 0.250342 (2^-1.998026)       40 ->      100

THE  1: 0.062500 (2^-4.000000)       11 ->        4
EXP  1: 0.062180 (2^-4.007416)       11 ->        4

THE  2: 0.250000 (2^-2.000000)        4 ->        1
EXP  2: 0.249996 (2^-2.000022)        4 ->        1

THE  3: 0.250000 (2^-2.000000)        1 ->        0
EXP  3: 0.250388 (2^-1.997762)        1 ->        0

THE  4: 1.000000 (2^0.000000)        0 ->        1
EXP  4: 1.000000 (2^0.000000)        0 ->        1

THE  5: 0.250000 (2^-2.000000)        1 ->      104
EXP  5: 0.250554 (2^-1.996806)        1 ->      104

THE  6: 0.062500 (2^-4.000000)      104 ->      410
EXP  6: 0.062476 (2^-4.000550)      104 ->      410

THE  7: 0.062500 (2^-4.000000)      410 ->      140
EXP  7: 0.062611 (2^-3.997449)      410 ->      140

THE  8: 0.125000 (2^-3.000000)      140 ->      110
EXP  8: 0.124871 (2^-3.001487)      140 ->      110

THE  9: 0.062500 (2^-4.000000)      110 ->      500
EXP  9: 0.062140 (2^-4.008345)      110 ->      500

THE 10: 0.125000 (2^-3.000000)      500 ->     1510
EXP 10: 0.124498 (2^-3.005801)      500 ->     1510

THE 11: 0.015625 (2^-6.000000)     1510 ->     5140
EXP 11: 0.015666 (2^-5.996219)     1510 ->     5140

OK
[./src/simon-xor-threshold-search.cc:1898] WORD_SIZE 16 NROUNDS 15 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1703] nrounds = 12, Bn_init = 2^-36.000000 : key     EFAE     CEF0     3A73     969E
[./src/xdp-rot-and.cc:1365] 3 / 65536 : NEW Croad:      A00     1002 0.12500 2^-3.00 | CR size: Dp          3, Dxy          33
[./src/simon-xor-threshold-search.cc:1511] 11 | Update best found Bn: 2^-36.000000 -> 2^-35.000000
[./src/xdp-rot-and.cc:1365] 1 / 65536 : NEW Croad:      200        0 0.25000 2^-2.00 | CR size: Dp          1, Dxy          13
[./src/simon-xor-threshold-search.cc:1511] 11 | Update best found Bn: 2^-35.000000 -> 2^-34.000000

 */

/* 

Tests that were tried:

[./src/simon-xor-threshold-search.cc:1871] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.120000 2^-3.058894 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 5 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1680] nrounds = 20, Bn_init = 2^-80.000000 : key 69F63A64 4ADAEEA0 3CCC7AE1 DA110416

[./src/simon-xor-threshold-search.cc:1871] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.120000 2^-3.058894 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 8 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1680] nrounds = 16, Bn_init = 2^-54.000000 : key C9489392 500DB238 EA8D1394 94FFF96F

[./src/simon-xor-threshold-search.cc:1871] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.120000 2^-3.058894 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 5 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1680] nrounds = 20, Bn_init = 2^-80.000000 : key D99936CB E9A5BC06 E03E36AE F577A4D0


[./src/simon-xor-threshold-search.cc:1871] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.120000 2^-3.058894 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1680] nrounds = 18, Bn_init = 2^-64.000000 : key 2A4AADC3 C92ABAF2 2F36D67E 52E01DB8

[./src/simon-xor-threshold-search.cc:1898] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.060000 2^-4.058894 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 4 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1703] nrounds = 8, Bn_init = 2^-18.000000 : key AC344BC1 256F19AD E3117159 E62320A2

[./src/simon-xor-threshold-search.cc:1871] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.200000 2^-2.321928 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 6 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1680] nrounds = 20, Bn_init = 2^-80.000000 : key F332FAB0 272DA67C 508BED89 FD10C9A3


 */


/* --- */

/* 

20 rounds: 2^-78: p_thres 0.2 (all differences), max_hw = 5

[./src/simon-xor-threshold-search.cc:1869] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.200000 2^-2.321928 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 5 SIMON_BACK_TO_HWAY 1


B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
B[15] = 2^-54.000000
B[16] = 2^-56.000000
B[17] = 2^-64.000000
B[18] = 2^-70.000000
B[19] = 2^-78.000000
pDDT sizes: Dp 58, Dxy 58, p_thres 0.200000 2^-2.321928
 0: 11010001 -> 44040004 0.003906 (2^-8.000000)
 1: 40000000 -> 11010000 0.250000 (2^-2.000000)
 2: 11010000 ->  4040000 0.015625 (2^-6.000000)
 3:  4040000 ->  1110000 0.062500 (2^-4.000000)
 4:  1110000 ->   400000 0.015625 (2^-6.000000)
 5:   400000 ->   110000 0.250000 (2^-2.000000)
 6:   110000 ->    40000 0.062500 (2^-4.000000)
 7:    40000 ->    10000 0.250000 (2^-2.000000)
 8:    10000 ->        0 0.250000 (2^-2.000000)
 9:        0 ->    10000 1.000000 (2^0.000000)
10:    10000 ->    40000 0.250000 (2^-2.000000)
11:    40000 ->   110000 0.250000 (2^-2.000000)
12:   110000 ->   400000 0.062500 (2^-4.000000)
13:   400000 ->  1110000 0.250000 (2^-2.000000)
14:  1110000 ->  4040000 0.015625 (2^-6.000000)
15:  4040000 -> 11010000 0.062500 (2^-4.000000)
16: 11010000 -> 40000000 0.015625 (2^-6.000000)
17: 40000000 -> 11010001 0.250000 (2^-2.000000)
18: 11010001 ->  4040004 0.003906 (2^-8.000000)
19:  4040004 ->  1110011 0.015625 (2^-6.000000)
p_tot = 0.000000000000000 = 2^-78.000000, Bn = 0.000000 = 2^-78.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.003906 (2^-8.000000) 11010001 -> 44040004
EXP  0: 0.003908 (2^-7.999296) 11010001 -> 44040004

THE  1: 0.250000 (2^-2.000000) 40000000 -> 11010000
EXP  1: 0.250547 (2^-1.996844) 40000000 -> 11010000

THE  2: 0.015625 (2^-6.000000) 11010000 ->  4040000
EXP  2: 0.015671 (2^-5.995780) 11010000 ->  4040000

THE  3: 0.062500 (2^-4.000000)  4040000 ->  1110000
EXP  3: 0.062333 (2^-4.003858)  4040000 ->  1110000

THE  4: 0.015625 (2^-6.000000)  1110000 ->   400000
EXP  4: 0.015652 (2^-5.997537)  1110000 ->   400000

THE  5: 0.250000 (2^-2.000000)   400000 ->   110000
EXP  5: 0.249652 (2^-2.002010)   400000 ->   110000

THE  6: 0.062500 (2^-4.000000)   110000 ->    40000
EXP  6: 0.062841 (2^-3.992141)   110000 ->    40000

THE  7: 0.250000 (2^-2.000000)    40000 ->    10000
EXP  7: 0.250545 (2^-1.996861)    40000 ->    10000

THE  8: 0.250000 (2^-2.000000)    10000 ->        0
EXP  8: 0.249878 (2^-2.000705)    10000 ->        0

THE  9: 1.000000 (2^0.000000)        0 ->    10000
EXP  9: 1.000000 (2^0.000000)        0 ->    10000

THE 10: 0.250000 (2^-2.000000)    10000 ->    40000
EXP 10: 0.250249 (2^-1.998564)    10000 ->    40000

THE 11: 0.250000 (2^-2.000000)    40000 ->   110000
EXP 11: 0.250266 (2^-1.998465)    40000 ->   110000

THE 12: 0.062500 (2^-4.000000)   110000 ->   400000
EXP 12: 0.061955 (2^-4.012625)   110000 ->   400000

THE 13: 0.250000 (2^-2.000000)   400000 ->  1110000
EXP 13: 0.250515 (2^-1.997031)   400000 ->  1110000

THE 14: 0.015625 (2^-6.000000)  1110000 ->  4040000
EXP 14: 0.015680 (2^-5.994902)  1110000 ->  4040000

THE 15: 0.062500 (2^-4.000000)  4040000 -> 11010000
EXP 15: 0.062611 (2^-3.997449)  4040000 -> 11010000

THE 16: 0.015625 (2^-6.000000) 11010000 -> 40000000
EXP 16: 0.015718 (2^-5.991484) 11010000 -> 40000000

THE 17: 0.250000 (2^-2.000000) 40000000 -> 11010001
EXP 17: 0.249863 (2^-2.000793) 40000000 -> 11010001

THE 18: 0.003906 (2^-8.000000) 11010001 ->  4040004
EXP 18: 0.003881 (2^-8.009541) 11010001 ->  4040004

THE 19: 0.015625 (2^-6.000000)  4040004 ->  1110011
EXP 19: 0.015659 (2^-5.996833)  4040004 ->  1110011

THE 20: 0.007812 (2^-7.000000)        0 ->        B
EXP 20: 0.000000 (2^-inf)        0 ->        B

OK
[./src/simon-xor-threshold-search.cc:1869] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.200000 2^-2.321928 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 5 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1873] nrounds = 20

B[19] = 2^-78.000000
[./tests/simon-xor-threshold-search-tests.cc:2523] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.200000 2^-2.321928 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 6 SIMON_BACK_TO_HWAY 1

~= approx time:

real    204m1.421s
user    200m19.639s
sys     0m2.500s



*/

/*

19 rounds: 2^-70: p_thres 0.2 (all differences), max_hw = 5

	[./src/simon-xor-threshold-search.cc:1869] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.200000 2^-2.321928 XDP_ROT_AND_MAX_DIFF_CNT 32 (++ all !!!) 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 5 SIMON_BACK_TO_HWAY 1
	[./src/simon-xor-threshold-search.cc:1678] nrounds = 19, Bn_init = 2^-72.000000 : key 33179795 799AC8E8 95C8EFF1 F23ECCC0
	[./src/xdp-rot-and.cc:1365] 1 / 4294967296 : NEW Croad: 40000000        0 0.25000 2^-2.00 | CR size: Dp          1, Dxy          11
	[./src/simon-xor-threshold-search.cc:1501] 18 | Update best found Bn: 2^-72.000000 -> 2^-70.000000


B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
B[15] = 2^-54.000000
B[16] = 2^-56.000000
B[17] = 2^-64.000000
B[18] = 2^-70.000000
pDDT sizes: Dp 57, Dxy 57, p_thres 0.200000 2^-2.321928
 0: 40000000 -> 80000001 0.250000 (2^-2.000000)
 1: 11010000 ->  4040000 0.015625 (2^-6.000000)
 2:  4040000 ->  1110000 0.062500 (2^-4.000000)
 3:  1110000 ->   400000 0.015625 (2^-6.000000)
 4:   400000 ->   110000 0.250000 (2^-2.000000)
 5:   110000 ->    40000 0.062500 (2^-4.000000)
 6:    40000 ->    10000 0.250000 (2^-2.000000)
 7:    10000 ->        0 0.250000 (2^-2.000000)
 8:        0 ->    10000 1.000000 (2^0.000000)
 9:    10000 ->    40000 0.250000 (2^-2.000000)
10:    40000 ->   110000 0.250000 (2^-2.000000)
11:   110000 ->   400000 0.062500 (2^-4.000000)
12:   400000 ->  1110000 0.250000 (2^-2.000000)
13:  1110000 ->  4040000 0.015625 (2^-6.000000)
14:  4040000 -> 11010000 0.062500 (2^-4.000000)
15: 11010000 -> 40000000 0.015625 (2^-6.000000)
16: 40000000 -> 11010001 0.250000 (2^-2.000000)
17: 11010001 ->  4040004 0.003906 (2^-8.000000)
18:  4040004 ->  1110011 0.015625 (2^-6.000000)
p_tot = 0.000000000000000 = 2^-70.000000, Bn = 0.000000 = 2^-70.000000
[./src/simon-xor-threshold-search.cc:1760] Init bound: 1110011 -> 0 = 0.000977 2^-10.000000


[./src/simon-xor-threshold-search.cc:1771]  WARNING!! Two consecutive countryroads!

[./src/simon-xor-threshold-search.cc:1802] Add new Hway: 1110011 0 0.000977 2^-10.000000
[./src/simon-xor-threshold-search.cc:1804] NEW HW sizes: Dp 58, Dxy 58
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.250000 (2^-2.000000) 40000000 -> 80000001
EXP  0: 0.250249 (2^-1.998564) 40000000 -> 80000001

THE  1: 0.015625 (2^-6.000000) 11010000 ->  4040000
EXP  1: 0.015824 (2^-5.981713) 11010000 ->  4040000

THE  2: 0.062500 (2^-4.000000)  4040000 ->  1110000
EXP  2: 0.062208 (2^-4.006752)  4040000 ->  1110000

THE  3: 0.015625 (2^-6.000000)  1110000 ->   400000
EXP  3: 0.015326 (2^-6.027918)  1110000 ->   400000

THE  4: 0.250000 (2^-2.000000)   400000 ->   110000
EXP  4: 0.249501 (2^-2.002881)   400000 ->   110000

THE  5: 0.062500 (2^-4.000000)   110000 ->    40000
EXP  5: 0.062344 (2^-4.003615)   110000 ->    40000

THE  6: 0.250000 (2^-2.000000)    40000 ->    10000
EXP  6: 0.249793 (2^-2.001195)    40000 ->    10000

THE  7: 0.250000 (2^-2.000000)    10000 ->        0
EXP  7: 0.249550 (2^-2.002600)    10000 ->        0

THE  8: 1.000000 (2^0.000000)        0 ->    10000
EXP  8: 1.000000 (2^0.000000)        0 ->    10000

THE  9: 0.250000 (2^-2.000000)    10000 ->    40000
EXP  9: 0.248644 (2^-2.007847)    10000 ->    40000

THE 10: 0.250000 (2^-2.000000)    40000 ->   110000
EXP 10: 0.249976 (2^-2.000138)    40000 ->   110000

THE 11: 0.062500 (2^-4.000000)   110000 ->   400000
EXP 11: 0.062693 (2^-3.995560)   110000 ->   400000

THE 12: 0.250000 (2^-2.000000)   400000 ->  1110000
EXP 12: 0.250243 (2^-1.998597)   400000 ->  1110000

THE 13: 0.015625 (2^-6.000000)  1110000 ->  4040000
EXP 13: 0.015463 (2^-6.015048)  1110000 ->  4040000

THE 14: 0.062500 (2^-4.000000)  4040000 -> 11010000
EXP 14: 0.062854 (2^-3.991856)  4040000 -> 11010000

THE 15: 0.015625 (2^-6.000000) 11010000 -> 40000000
EXP 15: 0.015393 (2^-6.021558) 11010000 -> 40000000

THE 16: 0.250000 (2^-2.000000) 40000000 -> 11010001
EXP 16: 0.249959 (2^-2.000237) 40000000 -> 11010001

THE 17: 0.003906 (2^-8.000000) 11010001 ->  4040004
EXP 17: 0.003782 (2^-8.046531) 11010001 ->  4040004

THE 18: 0.015625 (2^-6.000000)  4040004 ->  1110011
EXP 18: 0.015670 (2^-5.995867)  4040004 ->  1110011

THE 19: 0.000977 (2^-10.000000)  1110011 ->   400040
EXP 19: 0.000988 (2^-9.983192)  1110011 ->   400040

OK
[./src/simon-xor-threshold-search.cc:1869] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.200000 2^-2.321928 XDP_ROT_AND_MAX_DIFF_CNT 32 2^5.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 5 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1678] nrounds = 20, Bn_init = 2^-80.000000 : key 33179795 799AC8E8 95C8EFF1 F23ECCC0



 */


/* --- */

/* 

[./src/simon-xor-threshold-search.cc:1869] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.100000 2^-3.321928 XDP_ROT_AND_MAX_DIFF_CNT 10 2^3.32 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 4 SIMON_BACK_TO_HWAY 1


	[./src/simon-xor-threshold-search.cc:1869] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.100000 2^-3.321928 XDP_ROT_AND_MAX_DIFF_CNT 10 2^3.32 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 4 SIMON_BACK_TO_HWAY 1
	[./src/simon-xor-threshold-search.cc:1678] nrounds = 20, Bn_init = 2^-78.000000 : key ECEB8545 6427EE58 4BEC4D9A 49F3E42C
	[./src/simon-xor-threshold-search.cc:1297]  1: [13 / 18] 20000020 ->        0, 2^-4.000000, 2^-78.000000 hw     21, Dxy          1
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-24.000000
B[ 9] = 2^-28.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
B[15] = 2^-54.000000
B[16] = 2^-62.000000  -> -56
B[17] = 2^-68.000000  -> -64
B[18] = 2^-70.000000  -> -72
B[19] = 2^-78.000000  -> 
pDDT sizes: Dp 18, Dxy 18, p_thres 0.100000 2^-3.321928
 0: 20002020 -> 80008080 0.015625 (2^-6.000000)
 1:  8000888 ->      200 0.003906 (2^-8.000000)
 2:      200 ->  8000088 0.250000 (2^-2.000000)
 3:  8000088 -> 20000020 0.015625 (2^-6.000000)
 4: 20000020 -> 88000008 0.062500 (2^-4.000000)
 5: 88000008 ->        2 0.015625 (2^-6.000000)
 6:        2 -> 88000000 0.250000 (2^-2.000000)
 7: 88000000 -> 20000000 0.062500 (2^-4.000000)
 8: 20000000 ->  8000000 0.250000 (2^-2.000000)
 9:  8000000 ->        0 0.250000 (2^-2.000000)
10:        0 ->  8000000 1.000000 (2^0.000000)
11:  8000000 -> 20000000 0.250000 (2^-2.000000)
12: 20000000 -> 88000000 0.250000 (2^-2.000000)
13: 88000000 ->        2 0.062500 (2^-4.000000)
14:        2 -> 88000008 0.250000 (2^-2.000000)
15: 88000008 -> 20000020 0.015625 (2^-6.000000)
16: 20000020 ->  8000088 0.062500 (2^-4.000000)
17:  8000088 ->      200 0.015625 (2^-6.000000)
18:      200 ->  8000888 0.250000 (2^-2.000000)
19:  8000888 -> 20002020 0.003906 (2^-8.000000)
p_tot = 0.000000000000000 = 2^-78.000000, Bn = 0.000000 = 2^-78.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.015625 (2^-6.000000) 20002020 -> 80008080
EXP  0: 0.015633 (2^-5.999296) 20002020 -> 80008080

THE  1: 0.003906 (2^-8.000000)  8000888 ->      200
EXP  1: 0.003870 (2^-8.013447)  8000888 ->      200

THE  2: 0.250000 (2^-2.000000)      200 ->  8000088
EXP  2: 0.249791 (2^-2.001206)      200 ->  8000088

THE  3: 0.015625 (2^-6.000000)  8000088 -> 20000020
EXP  3: 0.015733 (2^-5.990084)  8000088 -> 20000020

THE  4: 0.062500 (2^-4.000000) 20000020 -> 88000008
EXP  4: 0.061954 (2^-4.012669) 20000020 -> 88000008

THE  5: 0.015625 (2^-6.000000) 88000008 ->        2
EXP  5: 0.015857 (2^-5.978760) 88000008 ->        2

THE  6: 0.250000 (2^-2.000000)        2 -> 88000000
EXP  6: 0.250442 (2^-1.997454)        2 -> 88000000

THE  7: 0.062500 (2^-4.000000) 88000000 -> 20000000
EXP  7: 0.062336 (2^-4.003791) 88000000 -> 20000000

THE  8: 0.250000 (2^-2.000000) 20000000 ->  8000000
EXP  8: 0.249656 (2^-2.001988) 20000000 ->  8000000

THE  9: 0.250000 (2^-2.000000)  8000000 ->        0
EXP  9: 0.250710 (2^-1.995911)  8000000 ->        0

THE 10: 1.000000 (2^0.000000)        0 ->  8000000
EXP 10: 1.000000 (2^0.000000)        0 ->  8000000

THE 11: 0.250000 (2^-2.000000)  8000000 -> 20000000
EXP 11: 0.249894 (2^-2.000611)  8000000 -> 20000000

THE 12: 0.250000 (2^-2.000000) 20000000 -> 88000000
EXP 12: 0.250039 (2^-1.999774) 20000000 -> 88000000

THE 13: 0.062500 (2^-4.000000) 88000000 ->        2
EXP 13: 0.062433 (2^-4.001542) 88000000 ->        2

THE 14: 0.250000 (2^-2.000000)        2 -> 88000008
EXP 14: 0.250403 (2^-1.997674)        2 -> 88000008

THE 15: 0.015625 (2^-6.000000) 88000008 -> 20000020
EXP 15: 0.015742 (2^-5.989210) 88000008 -> 20000020

THE 16: 0.062500 (2^-4.000000) 20000020 ->  8000088
EXP 16: 0.062171 (2^-4.007615) 20000020 ->  8000088

THE 17: 0.015625 (2^-6.000000)  8000088 ->      200
EXP 17: 0.015613 (2^-6.001145)  8000088 ->      200

THE 18: 0.250000 (2^-2.000000)      200 ->  8000888
EXP 18: 0.250630 (2^-1.996367)      200 ->  8000888

THE 19: 0.003906 (2^-8.000000)  8000888 -> 20002020
EXP 19: 0.003881 (2^-8.009541)  8000888 -> 20002020

THE 20: 0.007812 (2^-7.000000)        0 ->        B
EXP 20: 0.000000 (2^-inf)        0 ->        B

OK
[./src/simon-xor-threshold-search.cc:1869] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.100000 2^-3.321928 XDP_ROT_AND_MAX_DIFF_CNT 10 2^3.32 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 4 SIMON_BACK_TO_HWAY 1

 */


/* --- */
/* 
	[./src/simon-xor-threshold-search.cc:1869] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.100000 2^-3.321928 XDP_ROT_AND_MAX_DIFF_CNT 8 2^3.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 4 SIMON_BACK_TO_HWAY 1
	[./src/simon-xor-threshold-search.cc:1678] nrounds = 16, Bn_init = 2^-56.000000 : key D5EBB57E 2532CBC1 CBC8DE7E  D6FF7AE
	[./src/xdp-rot-and.cc:1365] 1 / 4294967296 : NEW Croad:      400        0 0.25000 2^-2.00 | CR size: Dp          1, Dxy          1
	[./src/xdp-rot-and.cc:1365] 1 / 4294967296 : NEW Croad: 40000040        0 0.06250 2^-4.00 | CR size: Dp          1, Dxy          1
	[./src/simon-xor-threshold-search.cc:1501] 15 | Update best found Bn: 2^-56.000000 -> 2^-54.000000

 */

/* 

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-24.000000
B[ 9] = 2^-28.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
B[15] = 2^-54.000000
pDDT sizes: Dp 16, Dxy 16, p_thres 0.100000 2^-3.321928
 0:  8000088 -> 20000220 0.015625 (2^-6.000000)
 1: 20000020 -> 88000008 0.062500 (2^-4.000000)
 2: 88000008 ->        2 0.015625 (2^-6.000000)
 3:        2 -> 88000000 0.250000 (2^-2.000000)
 4: 88000000 -> 20000000 0.062500 (2^-4.000000)
 5: 20000000 ->  8000000 0.250000 (2^-2.000000)
 6:  8000000 ->        0 0.250000 (2^-2.000000)
 7:        0 ->  8000000 1.000000 (2^0.000000)
 8:  8000000 -> 20000000 0.250000 (2^-2.000000)
 9: 20000000 -> 88000000 0.250000 (2^-2.000000)
10: 88000000 ->        2 0.062500 (2^-4.000000)
11:        2 -> 88000008 0.250000 (2^-2.000000)
12: 88000008 -> 20000020 0.015625 (2^-6.000000)
13: 20000020 ->  8000088 0.062500 (2^-4.000000)
14:  8000088 ->      200 0.015625 (2^-6.000000)
15:      200 ->  8000888 0.250000 (2^-2.000000)
p_tot = 0.000000000000000 = 2^-54.000000, Bn = 0.000000 = 2^-54.000000
[./src/simon-xor-threshold-search.cc:1760] Init bound: 8000888 -> 0 = 0.003906 2^-8.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.015625 (2^-6.000000)  8000088 -> 20000220
EXP  0: 0.015587 (2^-6.003527)  8000088 -> 20000220

THE  1: 0.062500 (2^-4.000000) 20000020 -> 88000008
EXP  1: 0.062666 (2^-3.996175) 20000020 -> 88000008

THE  2: 0.015625 (2^-6.000000) 88000008 ->        2
EXP  2: 0.015590 (2^-6.003262) 88000008 ->        2

THE  3: 0.250000 (2^-2.000000)        2 -> 88000000
EXP  3: 0.250065 (2^-1.999626)        2 -> 88000000

THE  4: 0.062500 (2^-4.000000) 88000000 -> 20000000
EXP  4: 0.062355 (2^-4.003350) 88000000 -> 20000000

THE  5: 0.250000 (2^-2.000000) 20000000 ->  8000000
EXP  5: 0.249407 (2^-2.003427) 20000000 ->  8000000

THE  6: 0.250000 (2^-2.000000)  8000000 ->        0
EXP  6: 0.249600 (2^-2.002308)  8000000 ->        0

THE  7: 1.000000 (2^0.000000)        0 ->  8000000
EXP  7: 1.000000 (2^0.000000)        0 ->  8000000

THE  8: 0.250000 (2^-2.000000)  8000000 -> 20000000
EXP  8: 0.249196 (2^-2.004647)  8000000 -> 20000000

THE  9: 0.250000 (2^-2.000000) 20000000 -> 88000000
EXP  9: 0.250153 (2^-1.999120) 20000000 -> 88000000

THE 10: 0.062500 (2^-4.000000) 88000000 ->        2
EXP 10: 0.062452 (2^-4.001101) 88000000 ->        2

THE 11: 0.250000 (2^-2.000000)        2 -> 88000008
EXP 11: 0.250601 (2^-1.996537)        2 -> 88000008

THE 12: 0.015625 (2^-6.000000) 88000008 -> 20000020
EXP 12: 0.015797 (2^-5.984237) 88000008 -> 20000020

THE 13: 0.062500 (2^-4.000000) 20000020 ->  8000088
EXP 13: 0.062144 (2^-4.008235) 20000020 ->  8000088

THE 14: 0.015625 (2^-6.000000)  8000088 ->      200
EXP 14: 0.015607 (2^-6.001674)  8000088 ->      200

THE 15: 0.250000 (2^-2.000000)      200 ->  8000888
EXP 15: 0.249888 (2^-2.000644)      200 ->  8000888

THE 16: 0.003906 (2^-8.000000)  8000888 -> 20002020
EXP 16: 0.004018 (2^-7.959368)  8000888 -> 20002020

OK
[./src/simon-xor-threshold-search.cc:1869] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.100000 2^-3.321928 XDP_ROT_AND_MAX_DIFF_CNT 10 2^3.32 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 4 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1678] nrounds = 17, Bn_init = 2^-62.000000 : key ECEB8545 6427EE58 4BEC4D9A 49F3E42C

 */

/* --- */

/* 
	[./src/simon-xor-threshold-search.cc:1869] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 4 SIMON_BACK_TO_HWAY 1
	[./src/simon-xor-threshold-search.cc:1678] nrounds = 10, Bn_init = 2^-26.000000 : key C749D89F 85ED8F01 338C7A65  BBE944E

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
pDDT sizes: Dp 130, Dxy 130, p_thres 0.050000 2^-4.321928
 0: 20000000 -> C0000000 0.250000 (2^-2.000000)
 1:  8800000 ->  2000000 0.062500 (2^-4.000000)
 2:  2000000 ->   800000 0.250000 (2^-2.000000)
 3:   800000 ->        0 0.250000 (2^-2.000000)
 4:        0 ->   800000 1.000000 (2^0.000000)
 5:   800000 ->  2000000 0.250000 (2^-2.000000)
 6:  2000000 ->  8800000 0.250000 (2^-2.000000)
 7:  8800000 -> 20000000 0.062500 (2^-4.000000)
 8: 20000000 -> 88800000 0.250000 (2^-2.000000)
p_tot = 0.000000953674316 = 2^-20.000000, Bn = 0.000001 = 2^-20.000000
[./src/simon-xor-threshold-search.cc:1760] Init bound: 88800000 -> 0 = 0.015625 2^-6.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.250000 (2^-2.000000) 20000000 -> C0000000
EXP  0: 0.250277 (2^-1.998405) 20000000 -> C0000000

THE  1: 0.062500 (2^-4.000000)  8800000 ->  2000000
EXP  1: 0.062908 (2^-3.990609)  8800000 ->  2000000

THE  2: 0.250000 (2^-2.000000)  2000000 ->   800000
EXP  2: 0.249925 (2^-2.000435)  2000000 ->   800000

THE  3: 0.250000 (2^-2.000000)   800000 ->        0
EXP  3: 0.250416 (2^-1.997602)   800000 ->        0

THE  4: 1.000000 (2^0.000000)        0 ->   800000
EXP  4: 1.000000 (2^0.000000)        0 ->   800000

THE  5: 0.250000 (2^-2.000000)   800000 ->  2000000
EXP  5: 0.249503 (2^-2.002870)   800000 ->  2000000

THE  6: 0.250000 (2^-2.000000)  2000000 ->  8800000
EXP  6: 0.249707 (2^-2.001691)  2000000 ->  8800000

THE  7: 0.062500 (2^-4.000000)  8800000 -> 20000000
EXP  7: 0.062715 (2^-3.995055)  8800000 -> 20000000

THE  8: 0.250000 (2^-2.000000) 20000000 -> 88800000
EXP  8: 0.249632 (2^-2.002126) 20000000 -> 88800000

THE  9: 0.015625 (2^-6.000000) 88800000 ->  2000002
EXP  9: 0.015653 (2^-5.997449) 88800000 ->  2000002

	[./src/simon-xor-threshold-search.cc:1869] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 4 SIMON_BACK_TO_HWAY 1
	[./src/simon-xor-threshold-search.cc:1678] nrounds = 10, Bn_init = 2^-26.000000 : key C749D89F 85ED8F01 338C7A65  BBE944E

 */

/* 
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-24.000000  -> -20
B[ 9] = 2^-28.000000  -> -26
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
B[15] = 2^-54.000000
B[16] = 2^-62.000000
B[17] = 2^-68.000000
B[18] = 2^-70.000000
pDDT sizes: Dp 16, Dxy 16, p_thres 0.050000 2^-4.321928
 0:      800 ->     2000 0.250000 (2^-2.000000)
 1: 20000220 -> 80000080 0.015625 (2^-6.000000)
 2: 80000080 -> 20000022 0.062500 (2^-4.000000)
 3: 20000022 ->        8 0.015625 (2^-6.000000)
 4:        8 -> 20000002 0.250000 (2^-2.000000)
 5: 20000002 -> 80000000 0.062500 (2^-4.000000)
 6: 80000000 -> 20000000 0.250000 (2^-2.000000)
 7: 20000000 ->        0 0.250000 (2^-2.000000)
 8:        0 -> 20000000 1.000000 (2^0.000000)
 9: 20000000 -> 80000000 0.250000 (2^-2.000000)
10: 80000000 -> 20000002 0.250000 (2^-2.000000)
11: 20000002 ->        8 0.062500 (2^-4.000000)
12:        8 -> 20000022 0.250000 (2^-2.000000)
13: 20000022 -> 80000080 0.015625 (2^-6.000000)
14: 80000080 -> 20000220 0.062500 (2^-4.000000)
15: 20000220 ->      800 0.015625 (2^-6.000000)
16:      800 -> 20002220 0.250000 (2^-2.000000)
17: 20002220 -> 80008080 0.003906 (2^-8.000000)
18: 80008080 -> 20022022 0.015625 (2^-6.000000)
p_tot = 0.000000000000000 = 2^-70.000000, Bn = 0.000000 = 2^-70.000000
[./src/simon-xor-threshold-search.cc:1760] Init bound: 20022022 -> 0 = 0.000977 2^-10.000000


[./src/simon-xor-threshold-search.cc:1771]  WARNING!! Two consecutive countryroads!

[./src/simon-xor-threshold-search.cc:1802] Add new Hway: 20022022 0 0.000977 2^-10.000000
[./src/simon-xor-threshold-search.cc:1804] NEW HW sizes: Dp 17, Dxy 17
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.250000 (2^-2.000000)      800 ->     2000
EXP  0: 0.249844 (2^-2.000903)      800 ->     2000

THE  1: 0.015625 (2^-6.000000) 20000220 -> 80000080
EXP  1: 0.015721 (2^-5.991134) 20000220 -> 80000080

THE  2: 0.062500 (2^-4.000000) 80000080 -> 20000022
EXP  2: 0.062400 (2^-4.002313) 80000080 -> 20000022

THE  3: 0.015625 (2^-6.000000) 20000022 ->        8
EXP  3: 0.015609 (2^-6.001498) 20000022 ->        8

THE  4: 0.250000 (2^-2.000000)        8 -> 20000002
EXP  4: 0.249615 (2^-2.002225)        8 -> 20000002

THE  5: 0.062500 (2^-4.000000) 20000002 -> 80000000
EXP  5: 0.061957 (2^-4.012581) 20000002 -> 80000000

THE  6: 0.250000 (2^-2.000000) 80000000 -> 20000000
EXP  6: 0.250272 (2^-1.998432) 80000000 -> 20000000

THE  7: 0.250000 (2^-2.000000) 20000000 ->        0
EXP  7: 0.249514 (2^-2.002809) 20000000 ->        0

THE  8: 1.000000 (2^0.000000)        0 -> 20000000
EXP  8: 1.000000 (2^0.000000)        0 -> 20000000

THE  9: 0.250000 (2^-2.000000) 20000000 -> 80000000
EXP  9: 0.250070 (2^-1.999598) 20000000 -> 80000000

THE 10: 0.250000 (2^-2.000000) 80000000 -> 20000002
EXP 10: 0.249733 (2^-2.001542) 80000000 -> 20000002

THE 11: 0.062500 (2^-4.000000) 20000002 ->        8
EXP 11: 0.062314 (2^-4.004299) 20000002 ->        8

THE 12: 0.250000 (2^-2.000000)        8 -> 20000022
EXP 12: 0.249110 (2^-2.005144)        8 -> 20000022

THE 13: 0.015625 (2^-6.000000) 20000022 -> 80000080
EXP 13: 0.015625 (2^-6.000000) 20000022 -> 80000080

THE 14: 0.062500 (2^-4.000000) 80000080 -> 20000220
EXP 14: 0.062599 (2^-3.997712) 80000080 -> 20000220

THE 15: 0.015625 (2^-6.000000) 20000220 ->      800
EXP 15: 0.015611 (2^-6.001321) 20000220 ->      800

THE 16: 0.250000 (2^-2.000000)      800 -> 20002220
EXP 16: 0.250485 (2^-1.997201)      800 -> 20002220

THE 17: 0.003906 (2^-8.000000) 20002220 -> 80008080
EXP 17: 0.004029 (2^-7.955264) 20002220 -> 80008080

THE 18: 0.015625 (2^-6.000000) 80008080 -> 20022022
EXP 18: 0.015654 (2^-5.997361) 80008080 -> 20022022

THE 19: 0.000977 (2^-10.000000) 20022022 ->    80008
EXP 19: 0.000970 (2^-10.009896) 20022022 ->    80008

OK
[./src/simon-xor-threshold-search.cc:1869] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 8 2^3.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1678] nrounds = 20, Bn_init = 2^-80.000000 : key A86E5267 214E4438 21E673E3 52A33D4A

 */

/* --- */

/* 
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-46.000000  -> -44
B[14] = 2^-52.000000  -> -48
B[15] = 2^-62.000000  -> -54
B[16] = 2^-66.000000  -> -62
B[17] = 2^-76.000000  -> -68
B[18] = 2^-82.000000
pDDT sizes: Dp 85, Dxy 85, p_thres 0.100000 2^-3.321928
 0: 40000000 -> 80000001 0.250000 (2^-2.000000)
 1: 11000000 ->  4000000 0.062500 (2^-4.000000)
 2:  4000000 ->  1000000 0.250000 (2^-2.000000)
 3:  1000000 ->        0 0.250000 (2^-2.000000)
 4:        0 ->  1000000 1.000000 (2^0.000000)
 5:  1000000 ->  4000000 0.250000 (2^-2.000000)
 6:  4000000 -> 11000000 0.250000 (2^-2.000000)
 7: 11000000 -> 40000000 0.062500 (2^-4.000000)
 8: 40000000 -> 11000001 0.250000 (2^-2.000000)
 9: 11000001 ->  4000004 0.015625 (2^-6.000000)
10:  4000004 ->  1000011 0.062500 (2^-4.000000)
11:  1000011 ->       40 0.015625 (2^-6.000000)
12:       40 ->  1000111 0.250000 (2^-2.000000)
13:  1000111 ->  4000404 0.003906 (2^-8.000092)
14:  4000404 -> 11001101 0.015625 (2^-6.000000)
15: 11001101 -> 40004000 0.000977 (2^-10.000000)
16: 40004000 -> 11011100 0.062500 (2^-4.000000)
17: 11011100 ->  4040400 0.000977 (2^-10.000000)
18:  4040400 ->  1110100 0.015625 (2^-6.000000)
p_tot = 0.000000000000000 = 2^-82.000092, Bn = 0.000000 = 2^-82.000000
[./src/simon-xor-threshold-search.cc:1760] Init bound: 1110100 -> 0 = 0.003906 2^-8.000000

[./src/simon-xor-threshold-search.cc:1869] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.100000 2^-3.321928 XDP_ROT_AND_MAX_DIFF_CNT 80 2^6.32 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 4 TRAIL_MAX_HW 4 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1678] nrounds = 20, Bn_init = 2^-90.000000 : key 74F45938 4A724EB3 7C6EA22E D7E728DC

 */

/* ---- */

/* 
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
pDDT sizes: Dp 132, Dxy 132, p_thres 0.100000 2^-3.321928
 0:   200000 ->   C00000 0.250000 (2^-2.000000)
 1:    88080 ->    20200 0.015625 (2^-6.000000)
 2:    20200 ->     8880 0.062500 (2^-4.000000)
 3:     8880 ->     2000 0.015625 (2^-6.000000)
 4:     2000 ->      880 0.250000 (2^-2.000000)
 5:      880 ->      200 0.062500 (2^-4.000000)
 6:      200 ->       80 0.250000 (2^-2.000000)
 7:       80 ->        0 0.250000 (2^-2.000000)
 8:        0 ->       80 1.000000 (2^0.000000)
 9:       80 ->      200 0.250000 (2^-2.000000)
10:      200 ->      880 0.250000 (2^-2.000000)
11:      880 ->     2000 0.062500 (2^-4.000000)
12:     2000 ->     8880 0.250000 (2^-2.000000)
13:     8880 ->    20200 0.015625 (2^-6.000000)
14:    20200 ->    88080 0.062500 (2^-4.000000)
p_tot = 0.000000000000004 = 2^-48.000000, Bn = 0.000000 = 2^-48.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.250000 (2^-2.000000)   200000 ->   C00000
EXP  0: 0.250356 (2^-1.997949)   200000 ->   C00000

THE  1: 0.015625 (2^-6.000000)    88080 ->    20200
EXP  1: 0.015477 (2^-6.013714)    88080 ->    20200

THE  2: 0.062500 (2^-4.000000)    20200 ->     8880
EXP  2: 0.062520 (2^-3.999538)    20200 ->     8880

THE  3: 0.015625 (2^-6.000000)     8880 ->     2000
EXP  3: 0.015545 (2^-6.007416)     8880 ->     2000

THE  4: 0.250000 (2^-2.000000)     2000 ->      880
EXP  4: 0.250005 (2^-1.999972)     2000 ->      880

THE  5: 0.062500 (2^-4.000000)      880 ->      200
EXP  5: 0.062324 (2^-4.004078)      880 ->      200

THE  6: 0.250000 (2^-2.000000)      200 ->       80
EXP  6: 0.250350 (2^-1.997982)      200 ->       80

THE  7: 0.250000 (2^-2.000000)       80 ->        0
EXP  7: 0.249798 (2^-2.001167)       80 ->        0

THE  8: 1.000000 (2^0.000000)        0 ->       80
EXP  8: 1.000000 (2^0.000000)        0 ->       80

THE  9: 0.250000 (2^-2.000000)       80 ->      200
EXP  9: 0.249347 (2^-2.003775)       80 ->      200

THE 10: 0.250000 (2^-2.000000)      200 ->      880
EXP 10: 0.249690 (2^-2.001790)      200 ->      880

THE 11: 0.062500 (2^-4.000000)      880 ->     2000
EXP 11: 0.062664 (2^-3.996219)      880 ->     2000

THE 12: 0.250000 (2^-2.000000)     2000 ->     8880
EXP 12: 0.250040 (2^-1.999769)     2000 ->     8880

THE 13: 0.015625 (2^-6.000000)     8880 ->    20200
EXP 13: 0.015507 (2^-6.010960)     8880 ->    20200

THE 14: 0.062500 (2^-4.000000)    20200 ->    88080
EXP 14: 0.062100 (2^-4.009253)    20200 ->    88080

 */

/* 
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-46.000000
pDDT sizes: Dp 128, Dxy 134, p_thres 0.050000 2^-4.321928
 0: 40000000 -> 80000001 0.250000 (2^-2.000000)
 1: 11000000 ->  4000000 0.062500 (2^-4.000000)
 2:  4000000 ->  1000000 0.250000 (2^-2.000000)
 3:  1000000 ->        0 0.250000 (2^-2.000000)
 4:        0 ->  1000000 1.000000 (2^0.000000)
 5:  1000000 ->  4000000 0.250000 (2^-2.000000)
 6:  4000000 -> 11000000 0.250000 (2^-2.000000)
 7: 11000000 -> 40000000 0.062500 (2^-4.000000)
 8: 40000000 -> 11000001 0.250000 (2^-2.000000)
 9: 11000001 ->  4000004 0.015625 (2^-6.000000)
10:  4000004 ->  1000011 0.062500 (2^-4.000000)
11:  1000011 ->       40 0.015625 (2^-6.000000)
12:       40 ->  1000111 0.250000 (2^-2.000000)
13:  1000111 ->  4000404 0.003906 (2^-8.000000)
p_tot = 0.000000000000014 = 2^-46.000000, Bn = 0.000000 = 2^-46.000000
[./src/simon-xor-threshold-search.cc:1712] Init bound: 4000404 -> 0 = 0.015625 2^-6.000000


[./src/simon-xor-threshold-search.cc:1723]  WARNING!! Two consecutive countryroads!

[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.250000 (2^-2.000000) 40000000 -> 80000001
EXP  0: 0.250410 (2^-1.997635) 40000000 -> 80000001

THE  1: 0.062500 (2^-4.000000) 11000000 ->  4000000
EXP  1: 0.062449 (2^-4.001167) 11000000 ->  4000000

THE  2: 0.250000 (2^-2.000000)  4000000 ->  1000000
EXP  2: 0.249701 (2^-2.001729)  4000000 ->  1000000

THE  3: 0.250000 (2^-2.000000)  1000000 ->        0
EXP  3: 0.249444 (2^-2.003212)  1000000 ->        0

THE  4: 1.000000 (2^0.000000)        0 ->  1000000
EXP  4: 1.000000 (2^0.000000)        0 ->  1000000

THE  5: 0.250000 (2^-2.000000)  1000000 ->  4000000
EXP  5: 0.251072 (2^-1.993827)  1000000 ->  4000000

THE  6: 0.250000 (2^-2.000000)  4000000 -> 11000000
EXP  6: 0.249551 (2^-2.002594)  4000000 -> 11000000

THE  7: 0.062500 (2^-4.000000) 11000000 -> 40000000
EXP  7: 0.062644 (2^-3.996680) 11000000 -> 40000000

THE  8: 0.250000 (2^-2.000000) 40000000 -> 11000001
EXP  8: 0.250139 (2^-1.999197) 40000000 -> 11000001

THE  9: 0.015625 (2^-6.000000) 11000001 ->  4000004
EXP  9: 0.015538 (2^-6.008035) 11000001 ->  4000004

THE 10: 0.062500 (2^-4.000000)  4000004 ->  1000011
EXP 10: 0.062392 (2^-4.002490)  4000004 ->  1000011

THE 11: 0.015625 (2^-6.000000)  1000011 ->       40
EXP 11: 0.015754 (2^-5.988161)  1000011 ->       40

THE 12: 0.250000 (2^-2.000000)       40 ->  1000111
EXP 12: 0.250550 (2^-1.996828)       40 ->  1000111

THE 13: 0.003906 (2^-8.000000)  1000111 ->  4000404
EXP 13: 0.003819 (2^-8.032413)  1000111 ->  4000404

THE 14: 0.015625 (2^-6.000000)  4000404 -> 11001101
EXP 14: 0.015655 (2^-5.997273)  4000404 -> 11001101

OK
[./src/simon-xor-threshold-search.cc:1821] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
[./src/simon-xor-threshold-search.cc:1630] nrounds = 15, Bn_init = 2^-52.000000 : key EFB15EA7 9D77D136 8F1D900C DC653EB6
[
 */

/* ---- */


/* 

	[./src/simon-xor-threshold-search.cc:1821] WORD_SIZE 16 NROUNDS 12 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1
	[./src/simon-xor-threshold-search.cc:1630] nrounds = 12, Bn_init = 2^-36.000000 : key     388A     5FB2     A060     3AE2
	[./src/xdp-rot-and.cc:1364] 2 / 128 : NEW Croad:      A00     1002 0.12500 2^-3.00 | CR size: Dp          2, Dxy          2
	[./src/simon-xor-threshold-search.cc:1501] 11 | Update best found Bn: 2^-36.000000 -> 2^-35.000000
	[./src/xdp-rot-and.cc:1364] 1 / 128 : NEW Croad:      200        0 0.25000 2^-2.00 | CR size: Dp          1, Dxy          1
	[./src/simon-xor-threshold-search.cc:1501] 11 | Update best found Bn: 2^-35.000000 -> 2^-34.000000

	[./src/simon-xor-threshold-search.cc:1956] nrounds 12
	[./tests/simon-xor-threshold-search-tests.cc:258] Final best trail:
 0:      400 ->     1800 0.250000 (2^-2.000000)
 1:      100 ->        0 0.250000 (2^-2.000000)
 2:        0 ->      100 1.000000 (2^0.000000)
 3:      100 ->      400 0.250000 (2^-2.000000)
 4:      400 ->     1100 0.250000 (2^-2.000000)
 5:     1100 ->     4200 0.062500 (2^-4.000000)
 6:     4200 ->     1D01 0.062500 (2^-4.000000)
 7:     1D01 ->      500 0.003906 (2^-8.000000)
 8:      500 ->      100 0.125000 (2^-3.000000)
 9:      100 ->      100 0.250000 (2^-2.000000)
10:      100 ->      500 0.250000 (2^-2.000000)
11:      500 ->     1500 0.125000 (2^-3.000000)
p_tot = 0.000000000058208 = 2^-34.000000
[./tests/simon-xor-threshold-search-tests.cc:269] Final bounds 12R:
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
[./tests/simon-xor-threshold-search-tests.cc:2523] WORD_SIZE 16 NROUNDS 12 XDP_ROT_AND_P_THRES 0.050000 2^-4.321928 XDP_ROT_AND_MAX_DIFF_CNT 128 2^7.00 SIMON_EPS 0.000031 2^-15.000000 XDP_ROT_AND_MAX_HW 32 TRAIL_MAX_HW 32 SIMON_BACK_TO_HWAY 1

real    37m14.735s
user    36m43.510s
sys     0m0.656s
vpv@igor:~/skcrypto/trunk/work/src/yaarx$
vpv@igor:~/skcrypto/trunk/work/src/yaarx$
vpv@igor:~/skcrypto/trunk/work/src/yaarx$


 */


/* --- */

		std::set<differential_t, struct_comp_diff_dx_dy>::iterator all_hways_iter = hways_diff_set_dx_dy->lower_bound(diff_dy);

		b_found_in_hways = (hway_iter != hways_diff_set_dx_dy->end()) && (all_hways_iter->dx == dx);
		assert(b_found_in_hways == true);
		if(b_found_in_hways) {
		  while(all_hways_iter->dx == dx) {
			 found_mset_p.insert(*all_hways_iter);
			 all_hways_iter++;
		  }
		}

/* --- */

  std::multiset<differential_t, struct_comp_diff_hw> diff_mset_hw;
  simon_diff_mset_p_to_mset_hw(diff_mset_p, &diff_mset_hw);
  xdp_rot_and_print_mset_hw(diff_mset_hw);
  assert(diff_mset_p.size() == diff_mset_hw.size());


/* --- */
#if !CLEAR_CROADS								  // !!!
		diff_set_dx_dy->clear();
		diff_mset_p->clear();
		// xxx
		*diff_set_dx_dy = *croads_diff_set_dx_dy;
		*diff_mset_p = *croads_diff_mset_p;
#endif



/* --- */
			 uint32_t hway_size_before = diff_mset_p->size();
			 uint32_t hway_size_after = diff_mset_p->size();

			 //			 if(hway_size_before != hway_size_after) {
			 //				hways_diff_set_dx_dy->clear();
			 //				hways_diff_mset_p->clear();
			 //				*hways_diff_set_dx_dy = *diff_set_dx_dy;
			 //				*hways_diff_mset_p = *diff_mset_p;
			 //				simon_diffsets_remove_rot_equivalent(hways_diff_mset_p, hways_diff_set_dx_dy);
			 //			 } 

			 //			 if(hway_size_before != hway_size_after) {
			 //				hways_diff_set_dx_dy->clear();
			 //				hways_diff_mset_p->clear();
			 //				*hways_diff_set_dx_dy = *diff_set_dx_dy;
			 //				*hways_diff_mset_p = *diff_mset_p;
			 //				simon_diffsets_remove_rot_equivalent(hways_diff_mset_p, hways_diff_set_dx_dy);
			 //			 } 



/* --- */


		bool b_low_hw = ((hw32(dx) <= XDP_ROT_AND_MAX_HW) && (hw32(dxx) <= XDP_ROT_AND_MAX_HW));
		if(b_low_hw) {

		  if((p >= *Bn) && (p != 0.0)) {
			 diff[n].dx = dx;		  // dx_{i}
			 diff[n].dy = dxx;		  // dx_{i+1}
			 diff[n].p = pn;

			 uint32_t hway_size_before = diff_mset_p->size();
			 simon_xor_threshold_search(n+1, nrounds, B, Bn, diff, trail, dyy_init, lrot_const_s, lrot_const_t, lrot_const_u, diff_mset_p, diff_set_dx_dy, hways_diff_mset_p, hways_diff_set_dx_dy, croads_diff_mset_p, croads_diff_set_dx_dy, diffs_hash_map, trails_hash_map, diff_max, b_hash_map, p_eps, p_thres);
			 uint32_t hway_size_after = diff_mset_p->size();

			 if(hway_size_before != hway_size_after) {
				hways_diff_set_dx_dy->clear();
				hways_diff_mset_p->clear();

				*hways_diff_set_dx_dy = *diff_set_dx_dy;
				*hways_diff_mset_p = *diff_mset_p;

				simon_diffsets_remove_rot_equivalent(hways_diff_mset_p, hways_diff_set_dx_dy);
			 } 

		  } else {
			 b_end = true;


/* --- */


/* 
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-10.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-16.000000

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000


pDDT sizes: Dp 117933, Dxy 117933, p_thres 0.000000 2^-inf
 0:      208 ->    20430 0.062500 (2^-4.000000)
 1:        2 ->        0 0.250000 (2^-2.000000)
 2:        0 ->        2 1.000000 (2^0.000000)
 3:        2 ->        8 0.250000 (2^-2.000000)
 4:        8 ->       22 0.250000 (2^-2.000000)
 5:       22 ->       80 0.062500 (2^-4.000000)
 6:       80 ->      222 0.250000 (2^-2.000000)
p_tot = 0.000015258789062 = 2^-16.000000, Bn = 0.000015 = 2^-16.000000
[./src/simon-xor-threshold-search.cc:1653] Init bound: 222 -> 0 = 0.015625 2^-6.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.062500 (2^-4.000000)      208 ->    20430
EXP  0: 0.062321 (2^-4.004145)      208 ->    20430

THE  1: 0.250000 (2^-2.000000)        2 ->        0
EXP  1: 0.250003 (2^-1.999983)        2 ->        0

THE  2: 1.000000 (2^0.000000)        0 ->        2
EXP  2: 1.000000 (2^0.000000)        0 ->        2

THE  3: 0.250000 (2^-2.000000)        2 ->        8
EXP  3: 0.250876 (2^-1.994951)        2 ->        8

THE  4: 0.250000 (2^-2.000000)        8 ->       22
EXP  4: 0.250144 (2^-1.999169)        8 ->       22

THE  5: 0.062500 (2^-4.000000)       22 ->       80
EXP  5: 0.062610 (2^-3.997471)       22 ->       80

THE  6: 0.250000 (2^-2.000000)       80 ->      222
EXP  6: 0.250854 (2^-1.995083)       80 ->      222

THE  7: 0.015625 (2^-6.000000)      222 ->      808
EXP  7: 0.015542 (2^-6.007681)      222 ->      808

[./src/simon-xor-threshold-search.cc:1734] Params: WORD_SIZE 32 NROUNDS 20 P_THRES 0.000 2^-inf XDP_ROT_AND_MAX_DIFF_CNT 2 2^1.00 SIMON_EPS 0.000031 2^-15.00 XDP_ROT_AND_MAX_HW 4 SIMON_BACK_TO_HWAY 0
[
 */


/* --- */

		if(hway_size_before != hway_size_after) {
		  hways_diff_set_dx_dy->clear();
		  hways_diff_mset_p->clear();
		  hways_diff_set_dx_dy = diff_set_dx_dy;
		  hways_diff_mset_p = diff_mset_p;

		  simon_diffsets_remove_rot_equivalent(hways_diff_mset_p, hways_diff_set_dx_dy);

		  mset_iter = hways_diff_mset_p->begin();
		  cnt = 0;
		  b_end = false;

		} else {
		  mset_iter++;
		  cnt++;
		}


/* ---- */
		uint32_t hway_size_before = diff_mset_p->size();
		uint32_t hway_size_after = diff_mset_p->size();


			 hway_size_before = diff_mset_p->size();
			 simon_xor_threshold_search(n+1, nrounds, B, Bn, diff, trail, dyy_init, lrot_const_s, lrot_const_t, lrot_const_u, diff_mset_p, diff_set_dx_dy, hways_diff_mset_p, hways_diff_set_dx_dy, croads_diff_mset_p, croads_diff_set_dx_dy, diffs_hash_map, trails_hash_map, diff_max, b_hash_map, p_eps, p_thres);
			 hway_size_after = diff_mset_p->size();

		if (0) {
		  hways_diff_set_dx_dy->clear();
		  hways_diff_mset_p->clear();
		  hways_diff_set_dx_dy = diff_set_dx_dy;
		  hways_diff_mset_p = diff_mset_p;
		  simon_diffsets_remove_rot_equivalent(hways_diff_mset_p, hways_diff_set_dx_dy);
		  mset_iter = hways_diff_mset_p->begin();
		  cnt = 0;
		  b_end = false;
		} else {
		  mset_iter++;
		  cnt++;
		}

	 if(hways_diff_set_dx_dy.size() != diff_set_dx_dy.size()) {
		hways_diff_set_dx_dy.clear();
		hways_diff_mset_p.clear();

		hways_diff_set_dx_dy = diff_set_dx_dy;
		hways_diff_mset_p = diff_mset_p;

		simon_diffsets_remove_rot_equivalent(&hways_diff_mset_p, &hways_diff_set_dx_dy);
	 }

/* --- */

#if 0
		  std::set<differential_t, struct_comp_diff_dx_dy>::iterator begin_iter = diff_set_dx_dy->begin();
#endif


/* --- */


  //  printf("--- [%s:%d] Initial hways_diff_set_dx_dy : p_thres %f 2^%f ---\n", __FILE__, __LINE__, p_thres, log2(p_thres));
  //  xdp_rot_and_print_set_dx_dy(diff_set_dx_dy);
  //  printf("\n---[%s:%d] Initial hways_diff_mset_p : p_thres %f 2^%f --- \n", __FILE__, __LINE__, p_thres, log2(p_thres));
  //  xdp_rot_and_print_mset_p(diff_mset_p);

  // !!!!
#if 0
  printf("[%s:%d] Initialize croads:\n", __FILE__, __LINE__);
  max_cnt = (1ULL << 16);
  double p_thres_croads = (double)(1.0 / (double)(1ULL << WORD_SIZE));
  xdp_rot_and_pddt(&croads_diff_set_dx_dy, &croads_diff_mset_p, lrot_const_s, lrot_const_t, max_cnt, p_thres_croads);
#endif

/* --- */

#if 1									  // WARNING!
	 uint32_t mask_k = ((0xffffffff >> (WORD_SIZE - k - 1)) & MASK); // get k LSB
	 uint32_t dyy = ((delta_prev & mask_k) ^ (LROT(delta, u) & mask_k) ^ new_dc);
	 bool b_low_hw = (hw32(delta & MASK) <= max_hw) && (hw32(dyy) <= max_hw);
	 //	 bool b_low_hw = (hw32(delta & MASK) <= max_hw) && (hw32(new_dc) <= max_hw);
	 //	 printf("k %2d %8X | %8X\n", k, mask_k, dyy);
#else
#endif

/* --- */

		//		std::multiset<differential_t, struct_comp_diff_p>::iterator begin_iter = diff_mset_p->begin();
		//		std::multiset<differential_t, struct_comp_diff_p>::iterator begin_iter = diff_mset_p->begin();


/* --- */


/* 
20131030

Simon64  best results 18 rounds: -64

Params: p_thres 0.010000 2^-6.643856, HW = 2

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
B[15] = 2^-54.000000
B[16] = 2^-56.000000
B[17] = 2^-64.000000
pDDT sizes: Dp 6591, Dxy 6591, p_thres 0.010000 2^-6.643856
 0:        2 ->      20C 0.250000 (2^-2.000000)
 1: 88080000 -> 20200000 0.015625 (2^-6.000000)
 2: 20200000 ->  8880000 0.062500 (2^-4.000000)
 3:  8880000 ->  2000000 0.015625 (2^-6.000000)
 4:  2000000 ->   880000 0.250000 (2^-2.000000)
 5:   880000 ->   200000 0.062500 (2^-4.000000)
 6:   200000 ->    80000 0.250000 (2^-2.000000)
 7:    80000 ->        0 0.250000 (2^-2.000000)
 8:        0 ->    80000 1.000000 (2^0.000000)
 9:    80000 ->   200000 0.250000 (2^-2.000000)
10:   200000 ->   880000 0.250000 (2^-2.000000)
11:   880000 ->  2000000 0.062500 (2^-4.000000)
12:  2000000 ->  8880000 0.250000 (2^-2.000000)
13:  8880000 -> 20200000 0.015625 (2^-6.000000)
14: 20200000 -> 88080000 0.062500 (2^-4.000000)
15: 88080000 ->        2 0.015625 (2^-6.000000)
16:        2 -> 88080008 0.250000 (2^-2.000000)
17: 88080008 -> 20200020 0.003906 (2^-8.000000)
p_tot = 0.000000000000000 = 2^-64.000000, Bn = 0.000000 = 2^-64.000000
[./src/simon-xor-threshold-search.cc:1670] Init bound: 20200020 -> 0 = 0.015625 2^-6.000000
[./src/simon-xor-threshold-search.cc:1714] Add new huighway: 20200020 0 0.015625 2^-6.000000
NEW pDDT sizes: Dp 6592, Dxy 6592

[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.250000 (2^-2.000000)        2 ->      20C
EXP  0: 0.248405 (2^-2.009237)        2 ->      20C

THE  1: 0.015625 (2^-6.000000) 88080000 -> 20200000
EXP  1: 0.015710 (2^-5.992184) 88080000 -> 20200000

THE  2: 0.062500 (2^-4.000000) 20200000 ->  8880000
EXP  2: 0.062232 (2^-4.006199) 20200000 ->  8880000

THE  3: 0.015625 (2^-6.000000)  8880000 ->  2000000
EXP  3: 0.015587 (2^-6.003527)  8880000 ->  2000000

THE  4: 0.250000 (2^-2.000000)  2000000 ->   880000
EXP  4: 0.249207 (2^-2.004586)  2000000 ->   880000

THE  5: 0.062500 (2^-4.000000)   880000 ->   200000
EXP  5: 0.061838 (2^-4.015359)   880000 ->   200000

THE  6: 0.250000 (2^-2.000000)   200000 ->    80000
EXP  6: 0.249594 (2^-2.002346)   200000 ->    80000

THE  7: 0.250000 (2^-2.000000)    80000 ->        0
EXP  7: 0.250013 (2^-1.999923)    80000 ->        0

THE  8: 1.000000 (2^0.000000)        0 ->    80000
EXP  8: 1.000000 (2^0.000000)        0 ->    80000

THE  9: 0.250000 (2^-2.000000)    80000 ->   200000
EXP  9: 0.249858 (2^-2.000820)    80000 ->   200000

THE 10: 0.250000 (2^-2.000000)   200000 ->   880000
EXP 10: 0.249216 (2^-2.004531)   200000 ->   880000

THE 11: 0.062500 (2^-4.000000)   880000 ->  2000000
EXP 11: 0.061902 (2^-4.013869)   880000 ->  2000000

THE 12: 0.250000 (2^-2.000000)  2000000 ->  8880000
EXP 12: 0.249476 (2^-2.003025)  2000000 ->  8880000

THE 13: 0.015625 (2^-6.000000)  8880000 -> 20200000
EXP 13: 0.015651 (2^-5.997624)  8880000 -> 20200000

THE 14: 0.062500 (2^-4.000000) 20200000 -> 88080000
EXP 14: 0.062462 (2^-4.000881) 20200000 -> 88080000

THE 15: 0.015625 (2^-6.000000) 88080000 ->        2
EXP 15: 0.015684 (2^-5.994551) 88080000 ->        2

THE 16: 0.250000 (2^-2.000000)        2 -> 88080008
EXP 16: 0.249390 (2^-2.003527)        2 -> 88080008

THE 17: 0.003906 (2^-8.000000) 88080008 -> 20200020
EXP 17: 0.003917 (2^-7.996131) 88080008 -> 20200020

THE 18: 0.015625 (2^-6.000000) 20200020 ->  8880088
EXP 18: 0.015577 (2^-6.004409) 20200020 ->  8880088

OK

[./src/simon-xor-threshold-search.cc:1588] nrounds = 19, Bn_init = 2^-70.000000 : key A8065976 2198FBFC 952D6727 A90A04AC

 */

/* 

1/ p_thres 0.01, no limit on HW
2/ p_thres 0.01, max_hw 2
3/ p_thres 0.01, max_hw 3
4/ p_thres 0.01, max_hw 4

 */

/* --- */

void simon_xor_threshold_search_v2(const int n, const int nrounds, 
											  double B[NROUNDS], double* Bn,
											  const differential_t diff_in[NROUNDS], differential_t trail[NROUNDS], 
											  const uint32_t dyy_init,
											  uint32_t lrot_const_s, uint32_t lrot_const_t, uint32_t lrot_const_u,
											  std::multiset<differential_t, struct_comp_diff_p>* diff_mset_p, // highways
											  std::set<differential_t, struct_comp_diff_dx_dy>* diff_set_dx_dy,
											  std::multiset<differential_t, struct_comp_diff_p>* croads_diff_mset_p, // country roads
											  std::set<differential_t, struct_comp_diff_dx_dy>* croads_diff_set_dx_dy,
											  boost::unordered_map<std::array<differential_t, SIMON_NDIFFS>, uint32_t, simon_diff_hash, simon_diff_equal_to>* diffs_hash_map,
											  boost::unordered_map<std::array<differential_t, NROUNDS>, uint32_t, simon_trail_hash, simon_trail_equal_to>* trails_hash_map,
											  differential_t** diff_max,
											  bool b_hash_map,
											  double p_eps,
											  double p_thres)
{
  assert(dyy_init == 0);

  //  uint32_t max_hw = 4;//XDP_ROT_AND_MAX_HW;
  double pn = 0.0;

  // make a local copy of the input diff trail
  differential_t diff[NROUNDS] = {{0, 0, 0, 0.0}};
  for(int i = 0; i < n; i++) {
	 diff[i].dx = diff_in[i].dx;
	 diff[i].dy = diff_in[i].dy;
	 diff[i].p = diff_in[i].p;
  }

  if((n == 0) && (nrounds == 1)) {						  // Only one round
	 bool b_end = false;
	 std::multiset<differential_t, struct_comp_diff_p>::iterator mset_iter = diff_mset_p->begin();
	 uint32_t cnt = 0;
	 while((mset_iter != diff_mset_p->end()) && (!b_end)) {
		uint32_t dx = mset_iter->dx; // alpha
		uint32_t dy = mset_iter->dy; // gamma
		pn = mset_iter->p;;
		uint32_t dxx = dy ^ dyy_init ^ LROT(dx, lrot_const_u); // gamma ^ dy_i ^ (alpha <<< 2)
#if 0									  // DEBUG
		printf("\r[%s:%d] %2d: [%2d / %2d] %8X -> %8X, 2^%f, 2^%f", __FILE__, __LINE__, n, cnt, diff_mset_p->size(), dx, dy, log2(pn), log2(*Bn));
		fflush(stdout);
#endif

		//		bool b_low_hw = ((hw32(dx & MASK) <= max_hw) && (hw32(dxx & MASK) <= max_hw));
		//		if((pn >= *Bn) && (pn != 0.0) && (b_low_hw)) {
		if((pn >= *Bn) && (pn != 0.0)) {
		  trail[n].dx = dx;		  // dx_{i}
		  trail[n].dy = dxx;		  // dx_{i+1} 
		  trail[n].p = pn;
		  if(!b_hash_map) {
			 *Bn = pn;
			 B[n] = pn;
		  }
		} else {
		  //			 if(b_low_hw)
		  b_end = true;
		}
		mset_iter++;
		cnt++;
	 }	// while()
  }

  if((n == 0) && (nrounds > 1)) {						  // Round-0 and not last round
	 bool b_end = false;
	 std::multiset<differential_t, struct_comp_diff_p>::iterator mset_iter = diff_mset_p->begin();
	 uint32_t cnt = 0;
	 while((mset_iter != diff_mset_p->end()) && (!b_end)) {
		uint32_t dx = mset_iter->dx; // alpha
		uint32_t dy = mset_iter->dy; // gamma
		pn = mset_iter->p;
		uint32_t dxx = dy ^ dyy_init ^ LROT(dx, lrot_const_u); // dx_{i+1} = gamma ^ dy_i ^ (alpha <<< 2)
		double p = pn * B[nrounds - 1 - (n + 1)];
		assert(B[nrounds - 1 - (n + 1)] != 0.0);
#if 0									  // DEBUG
		printf("\r[%s:%d] %2d: [%2d / %2d] %8X -> %8X, 2^%f, 2^%f", __FILE__, __LINE__, n, cnt, diff_mset_p->size(), dx, dy, log2(pn), log2(*Bn));
		fflush(stdout);
#endif
#if 0								  // DEBUG
		if(b_hash_map) {
		  printf("[%s:%d] n %2d: p 2^%4.2f Bn 2^%4.2f\n", __FILE__, __LINE__, n, log2(p), log2(*Bn));
		}
#endif
		//		bool b_low_hw = ((hw32(dx & MASK) <= max_hw) && (hw32(dxx & MASK) <= max_hw));
		//		if((p >= *Bn) && (p != 0.0) && (b_low_hw)) {
		if((p >= *Bn) && (p != 0.0)) {
		  diff[n].dx = dx;		  // dx_{i}
		  diff[n].dy = dxx;		  // dx_{i+1}
		  diff[n].p = pn;
		  simon_xor_threshold_search_v2(n+1, nrounds, B, Bn, diff, trail, dyy_init, lrot_const_s, lrot_const_t, lrot_const_u, diff_mset_p, diff_set_dx_dy, croads_diff_mset_p, croads_diff_set_dx_dy, diffs_hash_map, trails_hash_map, diff_max, b_hash_map, p_eps, p_thres);
		} else {
		  //			 if(b_low_hw)
		  b_end = true;
		}
#if 0
		if(begin_iter != diff_mset_p->begin()) { // if the root was updated, start from beginning
		  mset_iter = diff_mset_p->begin();
		  printf("[%s:%d] Return to beginning\n", __FILE__, __LINE__);
		  cnt = 0;
		} else {
		  mset_iter++;
		  cnt++;
		}
#endif
		mset_iter++;
		cnt++;
	 }
  }

  if((n == 1) && (n != (nrounds - 1))) {						  // Round-1 and not last round
	 bool b_end = false;
	 uint32_t cnt = 0;
	 std::multiset<differential_t, struct_comp_diff_p>::iterator mset_iter = diff_mset_p->begin();
	 while((mset_iter != diff_mset_p->end()) && (!b_end)) {
		uint32_t dx = mset_iter->dx; // alpha = dx_{i}
		uint32_t dy = mset_iter->dy; // gamma
		pn = mset_iter->p;
		uint32_t dyy = diff[n-1].dx; // dy_{i} = dx_{i-1}
		uint32_t dxx = dy ^ dyy ^ LROT(dx, lrot_const_u); // dx_{i+1} = gamma ^ dx_{i-1} ^ (alpha <<< 2)
		double p = diff[0].p * pn * B[nrounds - 1 - (n + 1)];
#if 0									  // DEBUG
		printf("\r[%s:%d] %2d: [%2d / %2d] %8X -> %8X, 2^%f, 2^%f", __FILE__, __LINE__, n, cnt, diff_mset_p->size(), dx, dy, log2(pn), log2(*Bn));
		fflush(stdout);
#endif
#if 0								  // DEBUG
		if(b_hash_map) {
		  printf("[%s:%d] n %2d: p 2^%4.2f Bn 2^%4.2f\n", __FILE__, __LINE__, n, log2(p), log2(*Bn));
		}
#endif
		//		bool b_low_hw = ((hw32((dx ^ diff[n - 1].dy) & MASK) <= max_hw) && (hw32(dxx & MASK) <= max_hw));
		//		if((p >= *Bn) && (p != 0.0) && (b_low_hw)) {
		if((p >= *Bn) && (p != 0.0)) {
		  diff[n].dx = dx;		  // dx_{i}
		  diff[n].dy = dxx;		  // dx_{i+1}
		  diff[n].p = pn;
		  simon_xor_threshold_search_v2(n+1, nrounds, B, Bn, diff, trail, dyy_init, lrot_const_s, lrot_const_t, lrot_const_u, diff_mset_p, diff_set_dx_dy, croads_diff_mset_p, croads_diff_set_dx_dy, diffs_hash_map, trails_hash_map, diff_max, b_hash_map, p_eps, p_thres);
		} else {
		  //			 if(b_low_hw)
		  b_end = true;
		} 
#if 0
		if(begin_iter != diff_mset_p->begin()) { // if the root was updated, start from beginning
		  mset_iter = diff_mset_p->begin();
		  printf("[%s:%d] Return to beginning\n", __FILE__, __LINE__);
		  cnt = 0;
		} else {
		  mset_iter++;
		  cnt++;
		}
#endif
		mset_iter++;
		cnt++;
	 }	// while()
  }

  //  if((n >= 2) && (n != (nrounds - 1)) && (cnt_lp <= max_lp)) {
  if((n >= 2) && (n != (nrounds - 1))) { // Round-i and not last round
	 uint32_t dx = diff[n - 1].dy; // dx_{i} = dy_{i - 1}
	 uint32_t dy = 0;					 // gamma

	 differential_t diff_dy;
	 diff_dy.dx = dx;  			  // alpha
	 diff_dy.dy = 0;
	 diff_dy.p = 0.0;

	 //	 std::set<differential_t, struct_comp_diff_dx_dy>* diff_set_dx_dy;
	 std::multiset<differential_t, struct_comp_diff_p> found_mset_p;

	 // p_i >= p_min = Bn / p1 * p2 ... * p{i-1} * B{n-i} 
	 double p_min = 0.0;
	 p_min = 1.0;
	 for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
		p_min *= diff[i].p;
	 }
	 p_min = p_min * 1.0 * B[nrounds - 1 - (n + 1)]; 
	 p_min = *Bn / p_min;
	 assert(p_min <= 1.0);

#if 0								  // DEBUG
	 if(b_hash_map) {
		printf("[%s:%d] n %2d: p_min 2^%4.2f Bn 2^%4.2f\n", __FILE__, __LINE__, n, log2(p_min), log2(*Bn));
	 }
#endif

	 // check if the differential is not already in the set
	 std::set<differential_t, struct_comp_diff_dx_dy>::iterator hway_iter = diff_set_dx_dy->lower_bound(diff_dy);
 	 bool b_found_in_hways = (hway_iter != diff_set_dx_dy->end()) && (hway_iter->dx == dx);
	 bool b_found_in_croads = false;

	 if(b_found_in_hways) {
		while(hway_iter->dx == dx) {
		  found_mset_p.insert(*hway_iter);
		  hway_iter++;
		}
	 } //else {
	 
#define CLEAR_CROADS 0
#if CLEAR_CROADS								  // !!!
	 croads_diff_set_dx_dy->clear();
	 croads_diff_mset_p->clear();
#endif

	 std::set<differential_t, struct_comp_diff_dx_dy>::iterator croad_iter = croads_diff_set_dx_dy->lower_bound(diff_dy);
	 b_found_in_croads = (croad_iter != croads_diff_set_dx_dy->end()) && (croad_iter->dx == dx);

#if CLEAR_CROADS
	 assert(b_found_in_croads == false);
#endif

	 uint32_t dx_prev = diff[n - 1].dx; // dy_{i} = dx_{i - 1}
	 assert(diff_set_dx_dy->size() != 0);
	 const uint64_t max_cnt = (1ULL << 32);//XDP_ROT_AND_MAX_DIFF_CNT;  // !!!
	 bool b_backto_hway = SIMON_BACK_TO_HWAY;
	 if(b_hash_map == true) {
	    b_backto_hway = false;	  // !!!
	 }

	 uint32_t cnt_new = xdp_rot_and_dx_pddt(diff_dy.dx, dx_prev, diff_set_dx_dy, diff_mset_p, croads_diff_set_dx_dy, croads_diff_mset_p, lrot_const_s, lrot_const_t, lrot_const_u, max_cnt, p_min, b_backto_hway);

	 if(cnt_new != 0) {
#if 1									  // DEBUG
		printf("\r[%s:%d] [%2d / %2d]: Added %d new country roads: p_min = %f (2^%f). New sizes: Dxy %d, Dp %d", __FILE__, __LINE__, n, NROUNDS, cnt_new, p_min, log2(p_min), croads_diff_set_dx_dy->size(), croads_diff_mset_p->size());
		fflush(stdout);
#endif
		croad_iter = croads_diff_set_dx_dy->lower_bound(diff_dy);
		b_found_in_croads = (croad_iter != croads_diff_set_dx_dy->end()) && (croad_iter->dx == dx);
	 } else {
		//		printf("\r[%s:%d] [%2d / %2d]: No new country roads found: p_min = %f (2^%f)", __FILE__, __LINE__, n, NROUNDS, p_min, log2(p_min));
		//		fflush(stdout);
	 }

	 if(b_found_in_croads) {
#if CLEAR_CROADS
		assert(croad_iter->p >= p_min);
#endif
		while((croad_iter->dx == dx) && (croad_iter->p >= p_min)) {
		  //		while(croad_iter->dx == dx) {
#if CLEAR_CROADS

		  found_mset_p.insert(*croad_iter);

#endif
		  croad_iter++;
		}
	 }
	 //	 }

	 std::multiset<differential_t, struct_comp_diff_p>::iterator find_iter = found_mset_p.begin();

#if 0									  // DEBUG
	 printf("\r[%s:%d] %2d: Temp set size %d ", __FILE__, __LINE__, n, found_mset_p.size());
	 fflush(stdout);
#endif

	 if(find_iter->dx == dx) {
		bool b_end = false;
		while((find_iter->dx == dx) && (find_iter != found_mset_p.end())  && (!b_end)) {
		  assert((find_iter->dx == dx));
		  diff_dy = *find_iter;

		  dx = diff_dy.dx;
		  dy = diff_dy.dy;
		  pn = diff_dy.p;

		  double p = 1.0;
		  for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
			 p *= diff[i].p;
		  }
		  p = p * pn * B[nrounds - 1 - (n + 1)]; 

		  uint32_t dyy = diff[n-1].dx; // dy_{i} = dx_{i-1}
		  uint32_t dxx = dy ^ dyy ^ LROT(dx, lrot_const_u); // dx_{i+1} = gamma ^ dx_{i-1} ^ (alpha <<< 2)

		  // store the beginnig
#if 0
		  std::set<differential_t, struct_comp_diff_dx_dy>::iterator begin_iter = diff_set_dx_dy->begin();
#endif
		  //		  bool b_low_hw = ((hw32(dx) <= max_hw) && (hw32(dxx) <= max_hw));
		  //		  if((p >= *Bn) && (p != 0.0) && (b_low_hw)) {
		  if((p >= *Bn) && (p != 0.0)) {
			 diff[n].dx = dx;		  // dx_{i}
			 diff[n].dy = dxx;	  // dx_{i+1}
			 diff[n].p = pn;
			 simon_xor_threshold_search_v2(n+1, nrounds, B, Bn, diff, trail, dyy_init, lrot_const_s, lrot_const_t, lrot_const_u, diff_mset_p, diff_set_dx_dy, croads_diff_mset_p, croads_diff_set_dx_dy, diffs_hash_map, trails_hash_map, diff_max, b_hash_map, p_eps, p_thres);
		  }// else {
		  //			 //			 if(b_low_hw)
		  //			 b_end = true;
		  //		  }
		  find_iter++;
		}	// while
	 }		// if
  }

  if((n == (nrounds - 1)) && (nrounds > 1)) {		  // Last round

	 uint32_t dx = diff[n - 1].dy; // dx_{i} = dy_{i - 1}
	 uint32_t dy = 0;					 // gamma

#if 1
	 pn = max_xdp_rot_and(dx, &dy, lrot_const_s, lrot_const_t);
#else	 // !!!
	 pn = xdp_rot_and(dx, dy, lrot_const_s, lrot_const_t);
#endif

	 uint32_t dyy = diff[n-1].dx; // dy_{i} = dx_{i-1}
	 uint32_t dxx = dy ^ dyy ^ LROT(dx, lrot_const_u); // dx_{i+1} = gamma ^ dx_{i-1} ^ (alpha <<< 2)

	 double p = 1.0;
	 for(int i = 0; i < n; i++) {
		p *= diff[i].p;
	 }
	 p *= pn;
	 diff[n].dx = dx;
	 diff[n].dy = dxx;
	 diff[n].p = pn;

#if 1									  // store trail in hash table
	 //	 double p_max = 1.0 / (double)(1UL << 12);
	 //	 if((p != 1.0) && (p == p_max) && (n == (NROUNDS - 1))) {
	 if((b_hash_map) && (p != 1.0) && (p != 0.0) && (n == (NROUNDS - 1))) {
		uint32_t trail_len = n + 1;
		assert(trail_len == (uint32_t)nrounds);
#if 0									  // DEBUG
		printf("\n[%s:%d] ", __FILE__, __LINE__);
		for(int i = 0; i <= n; i++) {
		  printf("%4X %4X ", diff[i].dx, diff[i].dy);
		}
		printf("\n");
		printf("[%s:%d] Trails:\n", __FILE__, __LINE__);
		simon_print_hash_table(*trails_hash_map, trail_len);
#endif

		//		simon_hash_map_update(diff, trail_len, diffs_hash_map, trails_hash_map, diff_max);
		simon_boost_hash_map_update(diff, trail_len, diffs_hash_map, trails_hash_map, diff_max);

	 }
#endif

	 //	 uint32_t max_hw = XDP_ROT_AND_MAX_HW;
	 //	 bool b_low_hw = ((hw32(dx & MASK) <= max_hw) && (hw32(dxx & MASK) <= max_hw));
	 //	 if((!b_hash_map) && (p >= *Bn) && (p != 1.0) && (p != 0.0) && (b_low_hw)) { // skip the 0-diff trail (p = 1.0)
	 if((!b_hash_map) && (p >= *Bn) && (p != 1.0) && (p != 0.0)) { // skip the 0-diff trail (p = 1.0)
#if 1									  // DEBUG
		if (p > *Bn) {
		  printf("[%s:%d] %d | Update best found Bn: 2^%f -> 2^%f\n", __FILE__, __LINE__, n, log2(*Bn), log2(p));
		}
#endif
		*Bn = p;
		B[n] = p;
		for(int i = 0; i < nrounds; i++) {
		  trail[i].dx = diff[i].dx;
		  trail[i].dy = diff[i].dy;
		  trail[i].p = diff[i].p;
		}
	 } 
  }
}


/* --- */

#if 0
  uint32_t max_lp = n;//4;
  uint32_t cnt_lp = 0;
  uint32_t trail_len = n;
  cnt_lp = simon_xor_threshold_count_lp(diff, trail_len, p_thres);
#endif
  //  printf("[%s:%d] cnt_lp %d / %d\n", __FILE__, __LINE__, cnt_lp, max_lp);



/* --- */

/* 
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
B[15] = 2^-54.000000
B[16] = 2^-56.000000
B[17] = 2^-58.000000
B[18] = 2^-60.000000
B[19] = 2^-62.000000
 0:  8000000 -> 30000000 0.250000 (2^-2.000000)
 1:  2202000 ->   808000 0.015625 (2^-6.000000)
 2:   808000 ->   222000 0.062500 (2^-4.000000)
 3:   222000 ->    80000 0.015625 (2^-6.000000)
 4:    80000 ->    22000 0.250000 (2^-2.000000)
 5:    22000 ->     8000 0.062500 (2^-4.000000)
 6:     8000 ->     2000 0.250000 (2^-2.000000)
 7:     2000 ->        0 0.250000 (2^-2.000000)
 8:        0 ->     2000 1.000000 (2^0.000000)
 9:     2000 ->     8000 0.250000 (2^-2.000000)
10:     8000 ->    22000 0.250000 (2^-2.000000)
11:    22000 ->    80000 0.062500 (2^-4.000000)
12:    80000 ->   222000 0.250000 (2^-2.000000)
13:   222000 ->   808000 0.015625 (2^-6.000000)
14:   808000 ->  2202000 0.062500 (2^-4.000000)
15:  2202000 ->  8000000 0.015625 (2^-6.000000)
16:  8000000 -> 20000000 0.250000 (2^-2.000000)
17: 20000000 -> 80000000 0.250000 (2^-2.000000)
18: 80000000 ->        2 0.250000 (2^-2.000000)
19:        2 ->        8 0.250000 (2^-2.000000)

 */

/* --- */

/*

BUG

[./src/simon-xor-threshold-search.cc:1664] Init bound: 20000000 -> 0 = 0.250000 2^-2.000000
  [./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
  [./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
  THE  0: 0.250000 (2^-2.000000)  8000000 -> 30000000
EXP  0: 0.249063 (2^-2.005420)  8000000 -> 30000000

THE  1: 0.015625 (2^-6.000000)  2202000 ->   808000
EXP  1: 0.015466 (2^-6.014781)  2202000 ->   808000

THE  2: 0.062500 (2^-4.000000)   808000 ->   222000
EXP  2: 0.062268 (2^-4.005359)   808000 ->   222000

THE  3: 0.015625 (2^-6.000000)   222000 ->    80000
EXP  3: 0.015580 (2^-6.004145)   222000 ->    80000

THE  4: 0.250000 (2^-2.000000)    80000 ->    22000
EXP  4: 0.249867 (2^-2.000765)    80000 ->    22000

THE  5: 0.062500 (2^-4.000000)    22000 ->     8000
EXP  5: 0.062263 (2^-4.005470)    22000 ->     8000

THE  6: 0.250000 (2^-2.000000)     8000 ->     2000
EXP  6: 0.249935 (2^-2.000374)     8000 ->     2000

THE  7: 0.250000 (2^-2.000000)     2000 ->        0
EXP  7: 0.249430 (2^-2.003295)     2000 ->        0

THE  8: 1.000000 (2^0.000000)        0 ->     2000
EXP  8: 1.000000 (2^0.000000)        0 ->     2000

THE  9: 0.250000 (2^-2.000000)     2000 ->     8000
EXP  9: 0.250384 (2^-1.997784)     2000 ->     8000

THE 10: 0.250000 (2^-2.000000)     8000 ->    22000
EXP 10: 0.250020 (2^-1.999884)     8000 ->    22000

THE 11: 0.062500 (2^-4.000000)    22000 ->    80000
EXP 11: 0.062600 (2^-3.997690)    22000 ->    80000

THE 12: 0.250000 (2^-2.000000)    80000 ->   222000
EXP 12: 0.249900 (2^-2.000578)    80000 ->   222000

THE 13: 0.015625 (2^-6.000000)   222000 ->   808000
EXP 13: 0.015714 (2^-5.991834)   222000 ->   808000

THE 14: 0.062500 (2^-4.000000)   808000 ->  2202000
EXP 14: 0.062552 (2^-3.998790)   808000 ->  2202000

THE 15: 0.015625 (2^-6.000000)  2202000 ->  8000000
EXP 15: 0.015660 (2^-5.996746)  2202000 ->  8000000

THE 16: 0.250000 (2^-2.000000)  8000000 -> 20000000
EXP 16: 0.000000 (2^-inf)  8000000 -> 20000000

OK
./src/simon-xor-threshold-search.cc:1583] nrounds = 18, Bn_init = 2^-58.000000 : key B79DA42D A24C64C2 2174B5D2 53B55993

*/

/* --- */

/* 
Time: Tue Oct 29 17:20:50 2013
[./tests/simon-xor-threshold-search-tests.cc:2495]
 WORD_SIZE 32
 NROUNDS 32
 XDP_ROT_AND_P_THRES 0.000000 2^-inf
 XDP_ROT_AND_MAX_DIFF_CNT 4294967296 2^32.00
 SIMON_EPS 0.000031 2^-15.000000
 XDP_ROT_AND_MAX_HW 2
[./src/simon-xor-threshold-search.cc:1517] Initialize hways:
Initial set sizes: Dp 5265, Dxy 5265


 */

/* 
Good params without back_to_highway

[./tests/simon-xor-threshold-search-tests.cc:2499] WORD_SIZE 32 NROUNDS 20 XDP_ROT_AND_P_THRES 0.070000 2^-3.836501 XDP_ROT_AND_MAX_DIFF_CNT 50 2^5.64 SIMON_EPS 0.000031 2^-15.000000

 */

/* --- */

/* 
Params for simon_xor_cluster_trails()	 

const uint64_t max_cnt = (1ULL << 8);
#define SIMON_EPS (double)(1.0 / (double)(1ULL << 15)) (or 10?)

 */

/* --- */


/* --- */

/* 
Simon48, pDDT size 2^5, #rounds 15, SIMON_EPS 2^-10
 */

/* 

Siomn48, Simon64 experiments, 20131029: intermediate results

1/ Simon64, pDDT size 2^8, #rounds 21 x killed -> Simon64, pDDT size 2^5, #rounds 21, SIMON_EPS 2^-10
2/ Simon64, pDDT size 2^7, #rounds 22 x killed -> Simon64, pDDT size 2^6, #rounds 21, SIMON_EPS 2^-10
3/ Simon64, pDDT size 2^7, #rounds 21
4/ Simon48, pDDT size 2^7, #rounds 16
5/ Simon48, pDDT size 2^8, #rounds 15 x killed
6/ Simon48, pDDT size 2^6, #rounds 15

Tasks: 227 total,   8 running, 219 sleeping,   0 stopped,   0 zombie                                                                             Cpu(s): 87.5%us,  0.0%sy,  0.0%ni, 12.4%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
Mem:   8153148k total,  5885876k used,  2267272k free,   234736k buffers
Swap:  8342516k total,     5588k used,  8336928k free,  5144660k cached

  PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
16052 vpv       20   0  6460 1728 1420 R  100  0.0 714:39.35 simon-xor-thres
16121 vpv       20   0  6460 1740 1424 R  100  0.0 708:19.36 simon-xor-thres
16166 vpv       20   0  6480 1724 1420 R  100  0.0 707:18.28 simon-xor-thres
16763 vpv       20   0  6476 1736 1420 R  100  0.0 611:54.01 simon-xor-thres
16818 vpv       20   0  6460 1744 1424 R  100  0.0 604:34.10 simon-xor-thres

Simon64, pDDT size 2^5, #rounds 21, SIMON_EPS 2^-10, p_thres 0.05

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-24.000000
B[ 9] = 2^-28.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
B[15] = 2^-54.000000
B[16] = 2^-62.000000
B[17] = 2^-68.000000
B[18] = 2^-70.000000
 0:      200 ->      800 0.250000 (2^-2.000000)
 1:  8000088 -> 20000020 0.015625 (2^-6.000000)
 2: 20000020 -> 88000008 0.062500 (2^-4.000000)
 3: 88000008 ->        2 0.015625 (2^-6.000000)
 4:        2 -> 88000000 0.250000 (2^-2.000000)
 5: 88000000 -> 20000000 0.062500 (2^-4.000000)
 6: 20000000 ->  8000000 0.250000 (2^-2.000000)
 7:  8000000 ->        0 0.250000 (2^-2.000000)
 8:        0 ->  8000000 1.000000 (2^0.000000)
 9:  8000000 -> 20000000 0.250000 (2^-2.000000)
10: 20000000 -> 88000000 0.250000 (2^-2.000000)
11: 88000000 ->        2 0.062500 (2^-4.000000)
12:        2 -> 88000008 0.250000 (2^-2.000000)
13: 88000008 -> 20000020 0.015625 (2^-6.000000)
14: 20000020 ->  8000088 0.062500 (2^-4.000000)
15:  8000088 ->      200 0.015625 (2^-6.000000)
16:      200 ->  8000888 0.250000 (2^-2.000000)
17:  8000888 -> 20002020 0.003906 (2^-8.000000)
18: 20002020 -> 88008808 0.015625 (2^-6.000000)
p_tot = 0.000000000000000 = 2^-70.000000, Bn = 0.000000 = 2^-70.000000
[./src/simon-xor-threshold-search.cc:1497] nrounds = 20, Bn_init = 2^-80.000000 : key 6D1614B3 68F0681A D46C47B6 2A6AF178


1/ Simon64, pDDT size 2^8, #rounds 21

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
 0: 80800000 -> 83000002 0.062500 (2^-4.000000)
 1: 22200000 ->  8000000 0.015625 (2^-6.000000)
 2:  8000000 ->  2200000 0.250000 (2^-2.000000)
 3:  2200000 ->   800000 0.062500 (2^-4.000000)
 4:   800000 ->   200000 0.250000 (2^-2.000000)
 5:   200000 ->        0 0.250000 (2^-2.000000)
 6:        0 ->   200000 1.000000 (2^0.000000)
 7:   200000 ->   800000 0.250000 (2^-2.000000)
 8:   800000 ->  2200000 0.250000 (2^-2.000000)
 9:  2200000 ->  8000000 0.062500 (2^-4.000000)
10:  8000000 -> 22200000 0.250000 (2^-2.000000)
p_tot = 0.000000000931323 = 2^-30.000000, Bn = 0.000000 = 2^-30.000000
[./src/simon-xor-threshold-search.cc:1497] nrounds = 12, Bn_init = 2^-36.000000 : key 26ECD315 D0069298 2B362867 A30741E3


2/ Simon64, pDDT size 2^7, #rounds 22

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
0: 88800000 -> 22000002 0.015625 (2^-6.000000)
1: 20000000 ->  8800000 0.250000 (2^-2.000000)
2:  8800000 ->  2000000 0.062500 (2^-4.000000)
3:  2000000 ->   800000 0.250000 (2^-2.000000)
4:   800000 ->        0 0.250000 (2^-2.000000)
5:        0 ->   800000 1.000000 (2^0.000000)
6:   800000 ->  2000000 0.250000 (2^-2.000000)
7:  2000000 ->  8800000 0.250000 (2^-2.000000)
8:  8800000 -> 20000000 0.062500 (2^-4.000000)
9: 20000000 -> 88800000 0.250000 (2^-2.000000)
10: 88800000 ->  2000002 0.015625 (2^-6.000000)
11:  2000002 -> 80800008 0.062500 (2^-4.000000)
12: 80800008 ->       20 0.015625 (2^-6.000000)
13:       20 -> 80800088 0.250000 (2^-2.000000)
p_tot = 0.000000000000057 = 2^-44.000000, Bn = 0.000000 = 2^-44.000000
[./src/simon-xor-threshold-search.cc:1497] nrounds = 15, Bn_init = 2^-52.000000 : key D7B47846 C9CE1683 B33563F0 4A9EE0FB


3/ Simon64, pDDT size 2^7, #rounds 21

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
0: 88800000 -> 22000002 0.015625 (2^-6.000000)
1: 20000000 ->  8800000 0.250000 (2^-2.000000)
2:  8800000 ->  2000000 0.062500 (2^-4.000000)
3:  2000000 ->   800000 0.250000 (2^-2.000000)
4:   800000 ->        0 0.250000 (2^-2.000000)
5:        0 ->   800000 1.000000 (2^0.000000)
6:   800000 ->  2000000 0.250000 (2^-2.000000)
7:  2000000 ->  8800000 0.250000 (2^-2.000000)
8:  8800000 -> 20000000 0.062500 (2^-4.000000)
9: 20000000 -> 88800000 0.250000 (2^-2.000000)
10: 88800000 ->  2000002 0.015625 (2^-6.000000)
11:  2000002 -> 80800008 0.062500 (2^-4.000000)
12: 80800008 ->       20 0.015625 (2^-6.000000)
13:       20 -> 80800088 0.250000 (2^-2.000000)
p_tot = 0.000000000000057 = 2^-44.000000, Bn = 0.000000 = 2^-44.000000
[./src/simon-xor-threshold-search.cc:1497] nrounds = 15, Bn_init = 2^-52.000000 : key 1611B765 83698638 E0419DF5 78F264FB


4/ Simon48, pDDT size 2^7, #rounds 16

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
0:   888000 ->   220002 0.015625 (2^-6.000000)
1:   200000 ->    88000 0.250000 (2^-2.000000)
2:    88000 ->    20000 0.062500 (2^-4.000000)
3:    20000 ->     8000 0.250000 (2^-2.000000)
4:     8000 ->        0 0.250000 (2^-2.000000)
5:        0 ->     8000 1.000000 (2^0.000000)
6:     8000 ->    20000 0.250000 (2^-2.000000)
7:    20000 ->    88000 0.250000 (2^-2.000000)
8:    88000 ->   200000 0.062500 (2^-4.000000)
9:   200000 ->   888000 0.250000 (2^-2.000000)
10:   888000 ->    20002 0.015625 (2^-6.000000)
11:    20002 ->   808008 0.062500 (2^-4.000000)
12:   808008 ->       20 0.015625 (2^-6.000000)
13:       20 ->   808088 0.250000 (2^-2.000000)
p_tot = 0.000000000000057 = 2^-44.000000, Bn = 0.000000 = 2^-44.000000
[./src/simon-xor-threshold-search.cc:1497] nrounds = 15, Bn_init = 2^-52.000000 : key   92F0DD   F74F03   8FE65C   56B3C7


5/ Simon48, pDDT size 2^8, #rounds 15

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
0:   222000 ->   888000 0.015625 (2^-6.000000)
1:    80000 ->    22000 0.250000 (2^-2.000000)
2:    22000 ->     8000 0.062500 (2^-4.000000)
3:     8000 ->     2000 0.250000 (2^-2.000000)
4:     2000 ->        0 0.250000 (2^-2.000000)
5:        0 ->     2000 1.000000 (2^0.000000)
6:     2000 ->     8000 0.250000 (2^-2.000000)
7:     8000 ->    22000 0.250000 (2^-2.000000)
8:    22000 ->    80000 0.062500 (2^-4.000000)
9:    80000 ->   222000 0.250000 (2^-2.000000)
10:   222000 ->   808000 0.015625 (2^-6.000000)
11:   808000 ->   202002 0.062500 (2^-4.000000)
p_tot = 0.000000000014552 = 2^-36.000000, Bn = 0.000000 = 2^-36.000000
[./src/simon-xor-threshold-search.cc:1497] nrounds = 13, Bn_init = 2^-42.000000 : key   8E4537   D6C346   BBE678   E7E034


6/ Simon48, pDDT size 2^5, #rounds 15

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-24.000000
B[ 9] = 2^-28.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
0:   200020 ->   800080 0.062500 (2^-4.000000)
1:   880008 ->        2 0.015625 (2^-6.000000)
2:        2 ->   880000 0.250000 (2^-2.000000)
3:   880000 ->   200000 0.062500 (2^-4.000000)
4:   200000 ->    80000 0.250000 (2^-2.000000)
5:    80000 ->        0 0.250000 (2^-2.000000)
6:        0 ->    80000 1.000000 (2^0.000000)
7:    80000 ->   200000 0.250000 (2^-2.000000)
8:   200000 ->   880000 0.250000 (2^-2.000000)
9:   880000 ->        2 0.062500 (2^-4.000000)
10:        2 ->   880008 0.250000 (2^-2.000000)
11:   880008 ->   200020 0.015625 (2^-6.000000)
12:   200020 ->    80088 0.062500 (2^-4.000000)
13:    80088 ->      200 0.015625 (2^-6.000000)
14:      200 ->    80888 0.250000 (2^-2.000000)
p_tot = 0.000000000000004 = 2^-48.000000, Bn = 0.000000 = 2^-48.000000
[./src/simon-xor-threshold-search.cc:1619] nrounds = 15
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():198] dy_init        0
[./src/simon-xor-threshold-search.cc:218] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.062500 (2^-4.000000)   200020 ->   800080
EXP  0: 0.062641 (2^-3.996746)   200020 ->   800080

THE  1: 0.015625 (2^-6.000000)   880008 ->        2
EXP  1: 0.015561 (2^-6.005912)   880008 ->        2

THE  2: 0.250000 (2^-2.000000)        2 ->   880000
EXP  2: 0.249454 (2^-2.003157)        2 ->   880000

THE  3: 0.062500 (2^-4.000000)   880000 ->   200000
EXP  3: 0.062924 (2^-3.990237)   880000 ->   200000

THE  4: 0.250000 (2^-2.000000)   200000 ->    80000
EXP  4: 0.250069 (2^-1.999604)   200000 ->    80000

THE  5: 0.250000 (2^-2.000000)    80000 ->        0
EXP  5: 0.249647 (2^-2.002038)    80000 ->        0

THE  6: 1.000000 (2^0.000000)        0 ->    80000
EXP  6: 1.000000 (2^0.000000)        0 ->    80000

THE  7: 0.250000 (2^-2.000000)    80000 ->   200000
EXP  7: 0.249482 (2^-2.002991)    80000 ->   200000

THE  8: 0.250000 (2^-2.000000)   200000 ->   880000
EXP  8: 0.249800 (2^-2.001156)   200000 ->   880000

THE  9: 0.062500 (2^-4.000000)   880000 ->        2
EXP  9: 0.062298 (2^-4.004674)   880000 ->        2

THE 10: 0.250000 (2^-2.000000)        2 ->   880008
EXP 10: 0.249971 (2^-2.000165)        2 ->   880008

THE 11: 0.015625 (2^-6.000000)   880008 ->   200020
EXP 11: 0.015608 (2^-6.001586)   880008 ->   200020

THE 12: 0.062500 (2^-4.000000)   200020 ->    80088
EXP 12: 0.062112 (2^-4.008988)   200020 ->    80088

THE 13: 0.015625 (2^-6.000000)    80088 ->      200
EXP 13: 0.015411 (2^-6.019860)    80088 ->      200

THE 14: 0.250000 (2^-2.000000)      200 ->    80888
EXP 14: 0.250301 (2^-1.998262)      200 ->    80888

Input differences:   200020    80088

R# 0 Output differences:   880008   200020
THE  1: 0.062500 (2^-4.000000)   200020 ->   880008
EXP  1: 0.062456 (2^-4.001013)   200020 ->   880008

R# 1 Output differences:        2   880008
THE  2: 0.000977 (2^-10.000000)   880008 ->        2
EXP  2: 0.000992 (2^-9.977632)   880008 ->        2

R# 2 Output differences:   880000        2
THE  3: 0.000244 (2^-12.000000)        2 ->   880000
EXP  3: 0.000236 (2^-12.051633)        2 ->   880000

R# 3 Output differences:   200000   880000
THE  4: 0.000015 (2^-16.000000)   880000 ->   200000
EXP  4: 0.000080 (2^-13.607683)   880000 ->   200000

R# 4 Output differences:    80000   200000
THE  5: 0.000004 (2^-18.000000)   200000 ->    80000
EXP  5: 0.000003 (2^-18.415037)   200000 ->    80000

R# 5 Output differences:        0    80000
THE  6: 0.000001 (2^-20.000000)    80000 ->        0
EXP  6: 0.000000 (2^-inf)    80000 ->        0

R# 6 Output differences:    80000        0
THE  7: 0.000001 (2^-20.000000)        0 ->    80000
EXP  7: 0.000002 (2^-19.000000)        0 ->    80000

R# 7 Output differences:   200000    80000
THE  8: 0.000000 (2^-22.000000)    80000 ->   200000
EXP  8: 0.000000 (2^-inf)    80000 ->   200000

R# 8 Output differences:   880000   200000
THE  9: 0.000000 (2^-24.000000)   200000 ->   880000
EXP  9: 0.000000 (2^-inf)   200000 ->   880000

R# 9 Output differences:        2   880000
THE 10: 0.000000 (2^-28.000000)   880000 ->        2
EXP 10: 0.000000 (2^-inf)   880000 ->        2

R#10 Output differences:   880008        2
THE 11: 0.000000 (2^-30.000000)        2 ->   880008
EXP 11: 0.000000 (2^-inf)        2 ->   880008

R#11 Output differences:   200020   880008
THE 12: 0.000000 (2^-36.000000)   880008 ->   200020
EXP 12: 0.000000 (2^-inf)   880008 ->   200020

R#12 Output differences:    80088   200020
THE 13: 0.000000 (2^-40.000000)   200020 ->    80088
EXP 13: 0.000000 (2^-inf)   200020 ->    80088

R#13 Output differences:      200    80088
THE 14: 0.000000 (2^-46.000000)    80088 ->      200
EXP 14: 0.000000 (2^-inf)    80088 ->      200

R#14 Output differences:    80888      200
THE 15: 0.000000 (2^-48.000000)      200 ->    80888
EXP 15: 0.000000 (2^-inf)      200 ->    80888

[./src/simon-xor-threshold-search.cc:1658] Initial trail
1: H[CA7E619D] | 200020 800080 880008    2    2 880000 880000 200000 200000 80000 80000    0    0 80000 80000 200000 200000 880000 880000
[./src/simon-xor-threshold-search.cc:1683] CHECKPOINT! p_max 2^-inf p 2^-48.000000
[./src/simon-xor-threshold-search.cc:1701] Update MAX differential: 200020 80088 ->  200 80888 2^-48.000000 | #trails 1
[./src/simon-xor-threshold-search.cc:1706] Initial MAX differential: 200020 80088 ->  200 80888 2^-48.000000 | #trails 1
[

 */


/* --- */

#if SIMON_DRAW_GRAPH
  //size = "7.5,10"
  //ranksep = "1.1 equally"
  //graph [concentrate = true]
  FILE* fp = fopen(SIMON_GVIZ_DATFILE, "w");
  fprintf(fp, "digraph G {\n");
  //  fprintf(fp, "size = \"7.5,10\"\n");
  fprintf(fp, "ranksep = \"1.1 equally\"\n");
  fprintf(fp, "node [shape=point]\n");
  fclose(fp);

  // clearfile concentrated file
  FILE* fp_con = fopen(SIMON_GVIZ_DATFILE_CON, "w");
  fclose(fp_con);
#endif


/* --- */
/* 
penwidth  [1 + log (number of edges)] 
 */

/* --- */

#if 1									  // DEBUG
	 printf("[%s:%d] Cur edge %d(%4X %4X) -> (%4X %4X) %d\n", __FILE__, __LINE__, 
			  E->at(edge_iter).level, 
			  E->at(edge_iter).node_from[0], E->at(edge_iter).node_from[1],
			  E->at(edge_iter).node_to[0], E->at(edge_iter).node_to[1],
			  E->at(edge_iter).cnt);
	 printf("[%s:%d] New edge %d(%4X %4X) -> (%4X %4X) %d\n", __FILE__, __LINE__, 
			  new_edge.level, 
			  new_edge.node_from[0], new_edge.node_from[1],
			  new_edge.node_to[0], new_edge.node_to[1],
			  new_edge.cnt);
	 printf("\n\n");
#endif


/* --- */

/* 
Using the differential search based on thershold search to find the best 13 R differential as full search: 13 R: (   0 8000) -> (   0   80)

New modifications:

- Remove "back to the highway" for the differential search phase
- P_THRES: from 2^-5 to 2^-7
- SIMON_EPS: from 2^-10 to 2^-15
- fixed "cout" to "boost::hash"

Parameterss:

#define XDP_ROT_AND_MAX_DIFF_CNT (1ULL << 7)//(1ULL << 7)
#define XDP_ROT_AND_P_THRES 0.05
#define SIMON_EPS (double)(1.0 / (double)(1ULL << 15))
#define NROUNDS 13

[./src/simon-xor-threshold-search.cc:915] Improve differential prob:     0 8000 ->    0   80 2^-33.744971 -> 2^-33.743791 | #trails 464812
[./src/simon-xor-threshold-search.cc:931] Update max for 13 R: (   0 8000) -> (   0   80) 2^-33.743791

   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 3100 3100  C82  C82  200  200   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 B004 B004  802  802 8200 8200   80   80    0    0   80  | 2^-45.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 A008 A008  883  883  300  300   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 A008 A008  880  880  200  200   80   80    0    0   80  | 2^-40.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 3008 3008  C80  C80  300  300   80   80    0    0   80  | 2^-45.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 B100 B100  C02  C02 8300 8300   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 2108 2108  C80  C80 8200 8200   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C82 8C82 3004 3004  882  882  200  200   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 A008 A008  880  880 8300 8300   80   80    0    0   80  | 2^-43.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 2100 2100  C81  C81  300  300   80   80    0    0   80  | 2^-43.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C82 8C82 3800 3800  C82  C82  200  200   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 A000 A000  880  880 8300 8300   80   80    0    0   80  | 2^-41.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 B800 B800  C02  C02 8200 8200   80   80    0    0   80  | 2^-45.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 B008 B008  803  803 8200 8200   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 2000 2000  C80  C80  300  300   80   80    0    0   80  | 2^-42.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 A000 A000  C82  C82  200  200   80   80    0    0   80  | 2^-42.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 3000 3000  882  882  200  200   80   80    0    0   80  | 2^-41.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 A100 A100  C82  C82  300  300   80   80    0    0   80  | 2^-43.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 A100 A100  C83  C83  300  300   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 2000 2000  C80  C80  200  200   80   80    0    0   80  | 2^-40.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C82 8C82 2008 2008  C82  C82 8200 8200   80   80    0    0   80  | 2^-45.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 2100 2100  880  880 8200 8200   80   80    0    0   80  | 2^-42.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 A00C A00C  C80  C80  200  200   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 2008 2008  880  880 8300 8300   80   80    0    0   80  | 2^-41.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 2000 2000  880  880 8200 8200   80   80    0    0   80  | 2^-38.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 3100 3100  880  880  300  300   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 2100 2100  C82  C82  300  300   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 3008 3008  882  882 8200 8200   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 2008 2008  882  882 8300 8300   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 3004 3004  882  882 8200 8200   80   80    0    0   80  | 2^-45.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 A100 A100  E80  E80 8300 8300   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 3100 3100  C80  C80 8200 8200   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 A008 A008  882  882  300  300   80   80    0    0   80  | 2^-43.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 A008 A008  C82  C82  200  200   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 B000 B000  803  803 8200 8200   80   80    0    0   80  | 2^-43.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 2100 2100  C81  C81 8300 8300   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 A000 A000  882  882  300  300   80   80    0    0   80  | 2^-41.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 A100 A100  C80  C80 8200 8200   80   80    0    0   80  | 2^-43.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 2008 2008  880  880 8200 8200   80   80    0    0   80  | 2^-40.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 3000 3000  882  882 8200 8200   80   80    0    0   80  | 2^-43.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 A000 A000  C82  C82 8200 8200   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 A100 A100  C80  C80  200  200   80   80    0    0   80  | 2^-41.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 2008 2008  882  882  200  200   80   80    0    0   80  | 2^-41.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 A008 A008  880  880 8200 8200   80   80    0    0   80  | 2^-42.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8880 8880 A000 A000  882  882  200  200   80   80    0    0   80  | 2^-39.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 3000 3000  C80  C80 8200 8200   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 2008 2008  C80  C80  300  300   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C82 8C82 200C 200C  882  882  200  200   80   80    0    0   80  | 2^-44.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8882 8882 A10C A10C  C80  C80  200  200   80   80    0    0   80  | 2^-45.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 2000 2000  C80  C80 8200 8200   80   80    0    0   80  | 2^-42.000000
   0    0 8000    2    2 8008 8008   20   20 8088 8088  202  202 8C80 8C80 3108 3108  880  880  200  200   80   80    0    0   80  | 2^-43.000000
 ...


 */


/* --- */

#if 0
  // -----------
  assert(trails_hash_map.size() == 0);
  std::string s_trail = trail_to_string(best_trail, *best_trail_len);
  differential_t** new_trail;
  new_trail = (differential_t** )calloc(1, sizeof(differential_t*));
  *new_trail = (differential_t*)calloc(*best_trail_len, sizeof(differential_t));
  for(uint32_t i = 0; i < *best_trail_len; i++) {
	 (*new_trail)[i].dx = best_trail[i].dx;
	 (*new_trail)[i].dy = best_trail[i].dy;
	 (*new_trail)[i].p = best_trail[i].p;
	 diff[i].dx = best_trail[i].dx;
	 diff[i].dy = best_trail[i].dy;
	 diff[i].p = best_trail[i].p;
  }
  std::pair<std::string, differential_t**> new_pair (s_trail,new_trail);
  trails_hash_map.insert(new_pair);

  printf("[%s:%d] Initial trails:\n", __FILE__, __LINE__);
  simon_print_hash_table(trails_hash_map, *best_trail_len);

  // Add initial differential
  std::string s_diff = diff_to_string(best_trail, *best_trail_len);
  std::unordered_map<std::string, differential_t**>::const_iterator diff_hash_map_iter = 
	 diffs_hash_map.find(s_diff);

  double p_max = (*diff_max)[1].p;
  assert((*diff_max)[1].p == 0.0);
  if(diff_hash_map_iter == diffs_hash_map.end()) {
	 printf("[%s:%d] Add initial differential: %4X %4X -> %4X %4X 2^%f | #trails %d\n", __FILE__, __LINE__, best_trail[0].dx, best_trail[0].dy ^ best_trail[1].dx, best_trail[*best_trail_len - 1].dx, best_trail[*best_trail_len -1].dy, log2(p), trails_hash_map.size());
	 fflush(stdout);
	 differential_t** new_diff;
	 new_diff = (differential_t** )calloc(1, sizeof(differential_t*));
	 *new_diff = (differential_t*)calloc(2, sizeof(differential_t));
	 (*new_diff)[0].dx = best_trail[0].dx;
	 (*new_diff)[0].dy = best_trail[0].dy ^ best_trail[1].dx; // !!
	 (*new_diff)[0].p = 1.0;
	 (*new_diff)[1].dx = best_trail[*best_trail_len - 1].dx;
	 (*new_diff)[1].dy = best_trail[*best_trail_len - 1].dy;
	 (*new_diff)[1].p = p;

	 std::pair<std::string, differential_t**> new_pair (s_diff,new_diff);
	 diffs_hash_map.insert(new_pair);

	 if(p > p_max) {
		(*diff_max)[0].p = (*diff_max)[1].p = p;
		(*diff_max)[0].dx = best_trail[0].dx;
		(*diff_max)[0].dy = best_trail[0].dy ^ best_trail[1].dx; // !!
		(*diff_max)[1].dx = best_trail[*best_trail_len - 1].dx;
		(*diff_max)[1].dy = best_trail[*best_trail_len - 1].dy;
		printf("[%s:%d] Update MAX differential: %4X %4X -> %4X %4X 2^%f | #trails %d\n", __FILE__, __LINE__, (*diff_max)[0].dx, (*diff_max)[0].dy, (*diff_max)[1].dx, (*diff_max)[1].dy, log2((*diff_max)[1].p), trails_hash_map.size());
	 }

  }
  printf("[%s:%d] Initial MAX differential: %4X %4X -> %4X %4X 2^%f | #trails %d\n", __FILE__, __LINE__, (*diff_max)[0].dx, (*diff_max)[0].dy, (*diff_max)[1].dx, (*diff_max)[1].dy, log2((*diff_max)[1].p), trails_hash_map.size());
#endif


/* --- */

=======
void test_xdp_rot_and_lucks()
{
  double p[2][2] = {
	 {1.0,  0.0},						  // 00, 01
	 {0.25, 0.75}						  // 10, 11
  }; 
  printf("p(R=0|L=1) %f\n", p[1][0]);
  printf("p(R=1|L=1) %f\n", p[1][1]);
  printf("p(R=0|L=0) %f\n", p[0][0]);
  printf("p(R=1|L=0) %f\n", p[0][1]);
}


/* --- */
double xdp_rot_and_lucks(const uint32_t da, const uint32_t db,
								 const uint32_t s, const uint32_t t)
{
  assert(t >= s);
  uint32_t da_lrot = LROT(da, (t - s));
  uint32_t da_rrot = RROT(da, (t - s));
  uint32_t db_lrot = LROT(db, (t - s));
  // p_i[LEFT][RIGHT]
  double p_arr[2][2] = {
	 {1.0,  0.0},						  // 00, 01
	 {0.25, 0.75}						  // 10, 11
  }; 
  double p_tot = 1.0;
  for(uint32_t i = 0; i < WORD_SIZE; i++) {
	 uint32_t da_i  = (da >> i) & 1;
	 uint32_t da_lrot_i  = (da_lrot >> i) & 1;
	 uint32_t da_rrot_i  = (da_rrot >> i) & 1;
	 uint32_t db_i  = (db_lrot >> i) & 1;
	 double p_i = 0.0;

	 //	 da_i = da_lrot_i & da_rrot_i;

	 if(da_i == 0) {
		if(db_i == 0) {
		  p_i = 1.0;
		}
		if(db_i == 1) {
		  p_i = 0.0;
		}
	 }
	 if(da_i == 1) {
		//		if((da_lrot_i == 0) && (da_rrot_i == 0)) {
		if(da_lrot_i & da_rrot_i) {
		  p_i = 0.25;
		} else {
		  p_i = 0.75;
		}
	 }
	 p_tot *= p_i;
	 //	 p_tot *= p_i[da_i][db_i];
#if 1								  // DEBUG
	 //	 printf("%2d: p(db %d| da %d) %f\n", i, db_i, da_i, p_arr[da_i][db_i]);
	 printf("%2d: p(db %d| da %d) %f\n", i, db_i, da_i, p_i);
#endif
  }
  return p_tot;
}


/* --- */

		if(i != a.size()) {
		  printf("[%s:%d] i %d a %d\n", __FILE__, __LINE__, i, a.size());
		}


/* --- */
struct iequal_to
  : std::binary_function<std::string, std::string, bool>
{
  bool operator()(std::string const& x,
						std::string const& y) const
  {
	 return boost::algorithm::iequals(x, y, std::locale());
  }
};

struct ihash
  : std::unary_function<std::string, std::size_t>
{
  std::size_t operator()(std::string const& x) const
  {
	 std::size_t seed = 0;
	 std::locale locale;

	 for(std::string::const_iterator it = x.begin();
		  it != x.end(); ++it)
		{
		  boost::hash_combine(seed, std::toupper(*it, locale));
		}

	 return seed;
  }
};

void test_simon_hash_custom()
{
  //  std::unordered_map<std::string, differential_t**>* diffs_hash_map;
  //  std::unordered_map<std::string, differential_t**>* trails_hash_map;
  //  std::unordered_map<std::array<differential_t, NROUNDS>, std::array<differential_t, NROUNDS>> hash_map;

  //  std::unordered_map<uint64_t, std::array<differential_t, NROUNDS>> trails_hash_map;

  //  boost::unordered_map<std::string, int, ihash, iequal_to> idictionary;

}

/* --- */
void test_simon_hash_custom()
{
  //  std::unordered_map<std::string, differential_t**>* diffs_hash_map;
  //  std::unordered_map<std::string, differential_t**>* trails_hash_map;
  //  std::unordered_map<std::array<differential_t, NROUNDS>, std::array<differential_t, NROUNDS>> hash_map;

  std::unordered_map<uint64_t, std::array<differential_t, NROUNDS>> trails_hash_map;

  for(uint32_t h = 0; h < (1UL << 22); h++) {

  differential_t trail[NROUNDS] = {{0, 0, 0, 0.0}};
  for(uint32_t i = 0; i < NROUNDS; i++) {
	 trail[i].dx = random32() & MASK;
	 trail[i].dy = random32() & MASK;
  }

  std::array<differential_t, NROUNDS> trail_array;

  for(uint32_t i = 0; i < NROUNDS; i++) {
    trail_array[i] = trail[i];
  }

  uint64_t trail_hash = random32();//simon_trail_hash(trail_array);

	std::pair<uint64_t, std::array<differential_t, NROUNDS>> new_pair (trail_hash, trail_array);
	trails_hash_map.insert(new_pair);

	std::unordered_map<uint64_t, std::array<differential_t, NROUNDS>>::const_iterator hash_map_iter = 
	  trails_hash_map.find(trail_hash);

   assert(hash_map_iter != trails_hash_map.end());

 }


#if 0
  printf("[%s%d] Added %d trails:\n", __FILE__, __LINE__, (uint32_t)trails_hash_map.size());
  std::unordered_map<uint64_t, std::array<differential_t, NROUNDS>>::const_iterator hash_map_iter = trails_hash_map.begin();
  uint32_t trail_cnt = 0; 
  while(hash_map_iter != trails_hash_map.end()) {
	 trail_cnt++;
	 printf("[%5d] ", trail_cnt);
    printf(" %llX | ", hash_map_iter->first);
	 for(uint32_t i = 0; i < NROUNDS; i++) {
		printf("%4X %4X ", hash_map_iter->second[i].dx, hash_map_iter->second[i].dy);
	 }
	 printf("\n");
	 hash_map_iter++;
  }
#endif

}

/* --- */
uint64_t simon_trail_hash(std::array<differential_t, NROUNDS> trail_array);

uint64_t simon_diff_hash(std::array<differential_t, SIMON_NDIFFS> trail_array);


uint64_t simon_trail_hash(std::array<differential_t, NROUNDS> trail_array)
{
  uint32_t* trail_data = (uint32_t *)calloc((2 * NROUNDS), sizeof(uint32_t));
  for(uint32_t i = 0; i < NROUNDS; i += 2) {
	 trail_data[i] = trail_array[i].dx;
	 trail_data[i+1] = trail_array[i].dy;
  }
  uint64_t trail_hash = MurmurHash64A((const void *)trail_data, (2 * NROUNDS), 0);
  free(trail_data);
  return trail_hash;
}

uint64_t simon_diff_hash(std::array<differential_t, NROUNDS> trail_array)
{
  assert(SIMON_NDIFFS == 2);
  uint32_t trail_data[4] = {0};
  trail_data[0] = trail_array[0].dx;
  trail_data[1] = trail_array[0].dy ^ trail_array[1].dx;
  trail_data[2] = trail_array[NROUNDS - 1].dx;
  trail_data[3] = trail_array[NROUNDS - 1].dy;

  //  uint32_t tmp = trail_data[1] << WORD_SIZE | trail_data[0];
  uint64_t diff_hash = MurmurHash64A((const void *)trail_data, 4, 0);
  return diff_hash;
}

										  std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>* diffs_hash_map,
										  std::unordered_map<uint64_t, std::array<differential_t, NROUNDS>>* trails_hash_map,

uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed );

// 64-bit hash for 64-bit platforms
// copied from https://sites.google.com/site/murmurhash/

// 64-bit hash for 32-bit platforms

uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed )
{
	const unsigned int m = 0x5bd1e995;
	const int r = 24;

	unsigned int h1 = seed ^ len;
	unsigned int h2 = 0;

	const unsigned int * data = (const unsigned int *)key;

	while(len >= 8)
	{
		unsigned int k1 = *data++;
		k1 *= m; k1 ^= k1 >> r; k1 *= m;
		h1 *= m; h1 ^= k1;
		len -= 4;

		unsigned int k2 = *data++;
		k2 *= m; k2 ^= k2 >> r; k2 *= m;
		h2 *= m; h2 ^= k2;
		len -= 4;
	}

	if(len >= 4)
	{
		unsigned int k1 = *data++;
		k1 *= m; k1 ^= k1 >> r; k1 *= m;
		h1 *= m; h1 ^= k1;
		len -= 4;
	}

	switch(len)
	{
	case 3: h2 ^= ((unsigned char*)data)[2] << 16;
	case 2: h2 ^= ((unsigned char*)data)[1] << 8;
	case 1: h2 ^= ((unsigned char*)data)[0];
			h2 *= m;
	};

	h1 ^= h2 >> 18; h1 *= m;
	h2 ^= h1 >> 22; h2 *= m;
	h1 ^= h2 >> 17; h1 *= m;
	h2 ^= h1 >> 19; h2 *= m;

	uint64_t h = h1;

	h = (h << 32) | h2;

	return h;
} 

uint32_t jenkins_one_at_a_time_hash(char *key, size_t len)
{
  uint64_t hash;
  uint32_t i;
  for(hash = i = 0; i < len; ++i)
    {
		hash += key[i];
		hash += (hash << 10);
		hash ^= (hash >> 6);
    }
  hash += (hash << 3);
  hash ^= (hash >> 11);
  hash += (hash << 15);
  return hash;
}

void simon_print_trail_hash_map(std::unordered_map<uint64_t, std::array<differential_t, NROUNDS>> trails_hash_map)
{
  std::unordered_map<uint64_t, std::array<differential_t, NROUNDS>>::const_iterator map_iter = trails_hash_map.begin();
  uint32_t trail_cnt = 0; 
  while(map_iter != trails_hash_map.end()) {
	 trail_cnt++;
	 printf("[%5d] ", trail_cnt);
	 printf(" H[%llX] | ", map_iter->first);
	 for(uint32_t i = 0; i < NROUNDS; i++) {
		printf("%4X %4X ", map_iter->second[i].dx, map_iter->second[i].dy);
	 }
	 printf("\n");
	 map_iter++;
  }
}

void simon_print_trail_array(std::array<differential_t, NROUNDS> trail_array)
{
  for(uint32_t i = 0; i < NROUNDS; i++) {
	 printf("%4X %4X ", trail_array[i].dx, trail_array[i].dy);
  }
  printf("\n");
}

void simon_print_differential_hash_map(std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>> diffs_hash_map)
{
  std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>::const_iterator map_iter = diffs_hash_map.begin();
  uint32_t trail_cnt = 0; 
  while(map_iter != diffs_hash_map.end()) {
	 trail_cnt++;
	 printf("[%5d] ", trail_cnt);
	 printf(" H[%llX] | ", map_iter->first);
	 printf("%4X %4X -> %4X %4X\n", map_iter->second[0].dx, map_iter->second[0].dy, map_iter->second[1].dx, map_iter->second[1].dy);
	 map_iter++;
  }
}

void simon_print_differential_array(std::array<differential_t, NROUNDS> trail_array)
{
  uint32_t diff[4] = {0};
  diff[0] = trail_array[0].dx;
  diff[1] = trail_array[0].dy ^ trail_array[1].dx;
  diff[2] = trail_array[NROUNDS - 1].dx;
  diff[3] = trail_array[NROUNDS - 1].dy;

  printf("%4X %4X -> %4X %4X\n", diff[0], diff[1], diff[2], diff[3]);
}

void simon_trail_to_diff_array(std::array<differential_t, NROUNDS> trail_array, 
										 std::array<differential_t, SIMON_NDIFFS>* diff_array)
{
  (*diff_array)[0].dx = trail_array[0].dx;
  (*diff_array)[0].dy = trail_array[0].dy ^ trail_array[1].dx;
  (*diff_array)[1].dx = trail_array[NROUNDS - 1].dx;
  (*diff_array)[1].dy = trail_array[NROUNDS - 1].dy;
}

void simon_murmur_hash_map_update(const differential_t diff[NROUNDS], 
											 const uint32_t trail_len,
											 std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>* diffs_hash_map,
											 std::unordered_map<uint64_t, std::array<differential_t, NROUNDS>>* trails_hash_map,
											 differential_t** diff_max)
{
  printf("\n[%s:%d] ------- diff %4X %4X | #h %d ------\n", __FILE__, __LINE__, diff[0].dy, diff[1].dx, trails_hash_map->size());
#if 1									  // DEBUG
  printf("[%s:%s():%d] Incoming trail:\n", __FILE__, __FUNCTION__, __LINE__);
  for(uint32_t i = 0; i < trail_len; i++) {
	 printf("%4X %4X ", diff[i].dx, diff[i].dy);
  }
  printf("\n");
#endif
  assert(trail_len == NROUNDS);

#if 0									  // DEBUG
  printf("[%s:%d] Existing trails\n", __FILE__, __LINE__);
  simon_print_trail_hash_map(*trails_hash_map);
#endif
#if 1									  // DEBUG
  printf("[%s:%d] Existing differentials\n", __FILE__, __LINE__);
  simon_print_differential_hash_map(*diffs_hash_map);
#endif
  //  const uint32_t nrounds = trail_len;
  double p = 1.0;
  for(uint32_t i = 0; i < trail_len; i++) {
	 p *= diff[i].p;
  }

  std::array<differential_t, NROUNDS> trail_array;
  for(uint32_t i = 0; i < NROUNDS; i++) {
	 trail_array[i].dx = diff[i].dx;
	 trail_array[i].dy = diff[i].dy;
	 trail_array[i].npairs = diff[i].npairs;
	 trail_array[i].p = diff[i].p;
  }

  uint64_t trail_hash = simon_trail_hash(trail_array);
  std::unordered_map<uint64_t, std::array<differential_t, NROUNDS>>::const_iterator hash_map_iter = 
	 trails_hash_map->find(trail_hash);

  if(hash_map_iter == trails_hash_map->end()) { // trail is not in the trail table
	 printf("[%s:%d] Add new trail: 2^%f | %d\n", __FILE__, __LINE__, log2(p), (uint32_t)trails_hash_map->size());
	 std::pair<uint64_t, std::array<differential_t, NROUNDS>> new_pair (trail_hash, trail_array);
	 trails_hash_map->insert(new_pair);

	 //	 const double p_max = (*diff_max)[1].p;
	 uint64_t diff_hash = simon_diff_hash(trail_array);
    std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>::iterator diff_hash_map_iter = 
	   diffs_hash_map->find(diff_hash);
#if 1									  // DEBUG
	 std::array<differential_t, SIMON_NDIFFS> tmp;
	 simon_trail_to_diff_array(trail_array, &tmp);
	 printf("[%s:%d] Search for differential: %4X %4X %4X -> %4X %4X\n", 
			  __FILE__, __LINE__, trail_array[1].dx, tmp[0].dx, tmp[0].dy, tmp[1].dx, tmp[1].dy);
#endif
	 if(diff_hash_map_iter == diffs_hash_map->end()) {
		printf("[%s:%d] Add new differential: %d\n", __FILE__, __LINE__, (uint32_t)diffs_hash_map->size());
		simon_print_differential_array(trail_array);

		std::array<differential_t, SIMON_NDIFFS> diff_array;
		simon_trail_to_diff_array(trail_array, &diff_array);
	   std::pair<uint64_t, std::array<differential_t, SIMON_NDIFFS>> new_pair (diff_hash, diff_array);
		assert(diff_array[0].dy == (diff[0].dy ^ diff[1].dx));
	   diffs_hash_map->insert(new_pair);

	 } else {
		printf("[%s:%d] Differential is found: %d\n", __FILE__, __LINE__, (uint32_t)diffs_hash_map->size());
		simon_print_differential_array(trail_array);

		printf("[%s:%d] Existing differentials\n", __FILE__, __LINE__);
		simon_print_differential_hash_map(*diffs_hash_map);

		uint64_t diff_hash = simon_diff_hash(trail_array);
		std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>::iterator tmp = 
		  diffs_hash_map->find(diff_hash);

		//		printf("[%s:%d] Searching H[%llX] %4X %4X -> %4X %4X\n", __FILE__, __LINE__, diff_hash, trail_data[0], trail_data[1], trail_data[2], trail_data[3]);
		printf("[%s:%d] Found H[%llX] %4X %4X -> %4X %4X\n", __FILE__, __LINE__, tmp->first, tmp->second[0].dx, tmp->second[0].dy, tmp->second[1].dx, tmp->second[1].dy);
		assert(tmp != diffs_hash_map->end());
	 std::array<differential_t, SIMON_NDIFFS> tmp1;
	 simon_trail_to_diff_array(trail_array, &tmp1);
		assert(tmp1[0].dx == tmp->second[0].dx);
		assert(tmp1[0].dy == tmp->second[0].dy);
		assert(tmp1[1].dx == tmp->second[1].dx);
		assert(tmp1[1].dy == tmp->second[1].dy);
	 }

  } else { 							  // trail already added
	 printf("[%s:%d] Trail found!\n\n", __FILE__, __LINE__);
#if 0									  // DEBUG
	 simon_print_trail_array(hash_map_iter->second);
#endif
  }

}



/* --- */
#if 0
uint64_t MurmurHash64A(const void * key, int len, unsigned int seed)
{
  const uint64_t m = 0xc6a4a7935bd1e995;
  const int r = 47;

  uint64_t h = seed ^ (len * m);

  const uint64_t * data = (const uint64_t *)key;
  const uint64_t * end = data + (len/8);

  while(data != end)
	 {
		uint64_t k = *data++;

		k *= m; 
		k ^= k >> r; 
		k *= m; 

		h ^= k;
		h *= m; 
	 }

  const unsigned char * data2 = (const unsigned char*)data;

  switch(len & 7)
	 {
	 case 7: h ^= uint64_t(data2[6]) << 48;
	 case 6: h ^= uint64_t(data2[5]) << 40;
	 case 5: h ^= uint64_t(data2[4]) << 32;
	 case 4: h ^= uint64_t(data2[3]) << 24;
	 case 3: h ^= uint64_t(data2[2]) << 16;
	 case 2: h ^= uint64_t(data2[1]) << 8;
	 case 1: h ^= uint64_t(data2[0]);
		h *= m;
	 };

  h ^= h >> r;
  h *= m;
  h ^= h >> r;

  return h;
} 
#else
// 64-bit hash for 32-bit platforms

uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed )
{
	const unsigned int m = 0x5bd1e995;
	const int r = 24;

	unsigned int h1 = seed ^ len;
	unsigned int h2 = 0;

	const unsigned int * data = (const unsigned int *)key;

	while(len >= 8)
	{
		unsigned int k1 = *data++;
		k1 *= m; k1 ^= k1 >> r; k1 *= m;
		h1 *= m; h1 ^= k1;
		len -= 4;

		unsigned int k2 = *data++;
		k2 *= m; k2 ^= k2 >> r; k2 *= m;
		h2 *= m; h2 ^= k2;
		len -= 4;
	}

	if(len >= 4)
	{
		unsigned int k1 = *data++;
		k1 *= m; k1 ^= k1 >> r; k1 *= m;
		h1 *= m; h1 ^= k1;
		len -= 4;
	}

	switch(len)
	{
	case 3: h2 ^= ((unsigned char*)data)[2] << 16;
	case 2: h2 ^= ((unsigned char*)data)[1] << 8;
	case 1: h2 ^= ((unsigned char*)data)[0];
			h2 *= m;
	};

	h1 ^= h2 >> 18; h1 *= m;
	h2 ^= h1 >> 22; h2 *= m;
	h1 ^= h2 >> 17; h1 *= m;
	h2 ^= h1 >> 19; h2 *= m;

	uint64_t h = h1;

	h = (h << 32) | h2;

	return h;
} 
#endif



/* --- */

void simon_murmur_hash_map_update(const differential_t diff[NROUNDS], 
											 const uint32_t trail_len,
											 std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>* diffs_hash_map,
											 std::unordered_map<uint64_t, std::array<differential_t, NROUNDS>>* trails_hash_map,
											 differential_t** diff_max)
{
  printf("\n[%s:%d] ------- diff %4X %4X | #h %d ------\n", __FILE__, __LINE__, diff[0].dy, diff[1].dx, trails_hash_map->size());

  assert(trail_len == NROUNDS);
  const uint32_t nrounds = trail_len;
  double p = 1.0;
  for(uint32_t i = 0; i < trail_len; i++) {
	 p *= diff[i].p;
  }

  std::array<differential_t, NROUNDS> trail_array;
  for(uint32_t i = 0; i < NROUNDS; i++) {
	 trail_array[i].dx = 0;
	 trail_array[i].dy = 0;
	 trail_array[i].npairs = 0;
	 trail_array[i].p = 0;
  }
  for(uint32_t i = 0; i < trail_len; i++) {
	 trail_array[i] = diff[i];
  }

  std::array<differential_t, SIMON_NDIFFS> diff_array;

  diff_array[0].dx = diff[0].dx;
  diff_array[0].dy = diff[0].dy ^ diff[1].dx;
  diff_array[0].npairs = diff[0].npairs;
  diff_array[0].p = diff[0].p;

  diff_array[1].dx = diff[trail_len - 1].dx;
  diff_array[1].dy = diff[trail_len - 1].dy;
  diff_array[1].npairs = diff[trail_len - 1].npairs;
  diff_array[1].p = diff[trail_len - 1].p;

  //  std::string s_trail = trail_to_string(diff, trail_len);
  //  std::unordered_map<std::string, differential_t**>::const_iterator hash_map_iter = 
  //	 trails_hash_map->find(s_trail);
  uint64_t trail_hash = simon_trail_hash(trail_array);
  std::unordered_map<uint64_t, std::array<differential_t, NROUNDS>>::const_iterator hash_map_iter = 
	 trails_hash_map->find(trail_hash);

  if(hash_map_iter == trails_hash_map->end()) { // trail is not in the trail table
	 //	 printf("\r[%s:%d] Add new trail: 2^%f | %d", __FILE__, __LINE__, log2(p), (uint32_t)trails_hash_map->size());
	 //	 fflush(stdout);
	 printf("[%s:%d] Add new trail: 2^%f | %d\n", __FILE__, __LINE__, log2(p), (uint32_t)trails_hash_map->size());

    // Add new trail
	 //	 simon_trails_hash_map_add_new(diff, trail_len, trails_hash_map);
	 //	 simon_trails_murmur_hash_map_add_new(trail_array, trail_len, trails_hash_map);
	 std::pair<uint64_t, std::array<differential_t, NROUNDS>> new_pair (trail_hash, trail_array);
	 trails_hash_map->insert(new_pair);

	 const double p_max = (*diff_max)[1].p;
	 //	 std::string s_diff = diff_to_string(diff, trail_len);
	 //	 std::unordered_map<std::string, differential_t**>::const_iterator diff_hash_map_iter = 
	 //		diffs_hash_map->find(s_diff);

		uint64_t diff_hash = simon_diff_hash(trail_array);
    std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>::iterator diff_hash_map_iter = 
	   diffs_hash_map->find(diff_hash);

#if 1
  std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>::const_iterator map_iter = diffs_hash_map->begin();
  uint32_t trail_cnt = 0; 
  while(map_iter != diffs_hash_map->end()) {
	 trail_cnt++;
	 printf("[%s:%d] [%5d] ", __FILE__, __LINE__, trail_cnt);
    printf(" %llX | ", map_iter->first);
	 for(uint32_t i = 0; i < 2; i++) {
	 printf("%4X %4X ", map_iter->second[i].dx, map_iter->second[i].dy);
	 }
	 printf(" | %4X %4X ", diff[0].dy, diff[1].dx);
	 printf("\n");
	 map_iter++;
  }
  if(diffs_hash_map->size() > 0) {
    map_iter = diffs_hash_map->begin();
    assert(map_iter->second[0].dy == (diff[0].dy ^ diff[1].dx));
    printf("[%s:%d] CHECKPOINT!\n", __FILE__, __LINE__);
  }
#endif

	 if(diff_hash_map_iter != diffs_hash_map->end()) {
		printf("[%s:%d] %4X %4X || %4X %4X | %4X %4X\n", __FILE__, __LINE__, diff[0].dy, diff[1].dx, diff_hash_map_iter->second[0].dx, diff_hash_map_iter->second[0].dy, diff_hash_map_iter->second[1].dx, diff_hash_map_iter->second[1].dy);
		assert(diff_hash_map_iter->second[0].dy == (diff[0].dy ^ diff[1].dx));
	 }

	 if(diff_hash_map_iter == diffs_hash_map->end()) { // differential is not in the diff table
		//		printf("\r[%s:%d] Add new differential: %4X %4X -> %4X %4X 2^%f | #trails %d", __FILE__, __LINE__, diff[0].dx, diff[0].dy ^ diff[1].dx, diff[trail_len - 1].dx, diff[trail_len -1].dy, log2(p), (uint32_t)trails_hash_map->size());
		//		fflush(stdout);
		printf("[%s:%d] Add new differential: %4X %4X -> %4X %4X 2^%f | #trails %d\n", __FILE__, __LINE__, diff[0].dx, diff[0].dy ^ diff[1].dx, diff[trail_len - 1].dx, diff[trail_len -1].dy, log2(p), (uint32_t)trails_hash_map->size());

		// Add differential
		//		simon_diffs_hash_map_add_new(diff, trail_len, diffs_hash_map);
		//		diff_array[0].dy = (diff[0].dy ^ diff[1].dx);
		uint64_t diff_hash = simon_diff_hash(trail_array);
	   std::pair<uint64_t, std::array<differential_t, SIMON_NDIFFS>> new_pair (diff_hash, diff_array);
		assert(diff_array[0].dy == (diff[0].dy ^ diff[1].dx));
	   diffs_hash_map->insert(new_pair);

		 //      printf("[%s:%d] CHECKPOINT!\n", __FILE__, __LINE__);
	 //		printf("%4X %4X ", diffs_hash_map->begin()->second[0].dx, diffs_hash_map->begin()->second[0].dy);
	 //		printf("%4X %4X ", diffs_hash_map->begin()->second[1].dx, diffs_hash_map->begin()->second[1].dy);

#if 1
  std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>::const_iterator map_iter = diffs_hash_map->begin();
  uint32_t trail_cnt = 0; 
  while(map_iter != diffs_hash_map->end()) {
	 trail_cnt++;
	 printf("[%s:%d] [%5d] ", __FILE__, __LINE__, trail_cnt);
    printf(" %llX | ", map_iter->first);
	 for(uint32_t i = 0; i < 2; i++) {
	 printf("%4X %4X ", map_iter->second[i].dx, map_iter->second[i].dy);
	 //		printf("%4X %4X ", diff_array[i].dx, diff_array[i].dy);
	 }
	 printf(" | %4X %4X ", diff[0].dy, diff[1].dx);
	 printf("\n");
	 map_iter++;
  }
  if(diffs_hash_map->size() > 0) {
    map_iter = diffs_hash_map->begin();
    assert(map_iter->second[0].dy == (diff[0].dy ^ diff[1].dx));
    printf("[%s:%d] CHECKPOINT!\n", __FILE__, __LINE__);
  }
#endif

														//		diff_hash_map_iter =  diffs_hash_map->find(diff_hash);
							//		assert(diff_hash_map_iter->second[0].dy == (diff[0].dy ^ diff[1].dx));

		if(p > p_max) {
		  (*diff_max)[0].p = 1.0;
		  (*diff_max)[0].dx = diff[0].dx;
		  (*diff_max)[0].dy = diff[0].dy ^ diff[1].dx; // !!
		  (*diff_max)[1].p = p;
		  (*diff_max)[1].dx = diff[trail_len - 1].dx;
		  (*diff_max)[1].dy = diff[trail_len - 1].dy;
		  printf("\n[%s:%d] Update max for %d R: (%4X %4X) -> (%4X %4X) 2^%f\n", __FILE__, __LINE__,  nrounds, (*diff_max)[0].dx, (*diff_max)[0].dy, (*diff_max)[1].dx, (*diff_max)[1].dy, log2((*diff_max)[1].p));
		}
#if 1
																							 {
  std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>::const_iterator map_iter = diffs_hash_map->begin();
  uint32_t trail_cnt = 0; 
  while(map_iter != diffs_hash_map->end()) {
	 trail_cnt++;
	 printf("[%s:%d] [%5d] ", __FILE__, __LINE__, trail_cnt);
    printf(" %llX | ", map_iter->first);
	 for(uint32_t i = 0; i < 2; i++) {
	 printf("%4X %4X ", map_iter->second[i].dx, map_iter->second[i].dy);
	 //		printf("%4X %4X ", diff_array[i].dx, diff_array[i].dy);
	 }
	 printf(" | %4X %4X ", diff[0].dy, diff[1].dx);
	 printf("\n");
	 map_iter++;
  }
  if(diffs_hash_map->size() > 0) {
    map_iter = diffs_hash_map->begin();
    assert(map_iter->second[0].dy == (diff[0].dy ^ diff[1].dx));
    printf("[%s:%d] CHECKPOINT!\n", __FILE__, __LINE__);
  }
	 }
#endif

	 } else {						  // differential is already stored
		double old_p = diff_hash_map_iter->second[1].p;
		diff_hash_map_iter->second[1].p += p;
		double new_p = diff_hash_map_iter->second[1].p;
		//		printf("\r[%s:%d] Improve differential prob:  %4X %4X -> %4X %4X 2^%f -> 2^%f | #trails %d", __FILE__, __LINE__, diff_hash_map_iter->second[0].dx, diff_hash_map_iter->second[0].dy, diff_hash_map_iter->second[1].dx, diff_hash_map_iter->second[1].dy, log2(old_p), log2(new_p), (uint32_t)trails_hash_map->size());
		//		fflush(stdout);
		printf("[%s:%d] Improve differential prob:  %4X %4X -> %4X %4X 2^%f -> 2^%f | #trails %d\n", __FILE__, __LINE__, diff_hash_map_iter->second[0].dx, diff_hash_map_iter->second[0].dy, diff_hash_map_iter->second[1].dx, diff_hash_map_iter->second[1].dy, log2(old_p), log2(new_p), (uint32_t)trails_hash_map->size());
#if 1
																							 {
  std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>::const_iterator map_iter = diffs_hash_map->begin();
  uint32_t trail_cnt = 0; 
  while(map_iter != diffs_hash_map->end()) {
	 trail_cnt++;
	 printf("[%s:%d] [%5d] ", __FILE__, __LINE__, trail_cnt);
    printf(" %llX | ", map_iter->first);
	 for(uint32_t i = 0; i < 2; i++) {
	 printf("%4X %4X ", map_iter->second[i].dx, map_iter->second[i].dy);
	 //		printf("%4X %4X ", diff_array[i].dx, diff_array[i].dy);
	 }
	 printf(" | %4X %4X ", diff[0].dy, diff[1].dx);
	 printf("\n");
	 map_iter++;
  }
  if(diffs_hash_map->size() > 0) {
    map_iter = diffs_hash_map->begin();
    assert(map_iter->second[0].dy == (diff[0].dy ^ diff[1].dx));
    printf("[%s:%d] CHECKPOINT!\n", __FILE__, __LINE__);
  }
	 }
#endif

#if 1									  // DEBUG
		assert(diff_hash_map_iter->second[0].dx == diff[0].dx);
		assert(diff_hash_map_iter->second[0].dy == (diff[0].dy ^ diff[1].dx));
		assert(diff_hash_map_iter->second[1].dx == diff[trail_len - 1].dx);
		assert(diff_hash_map_iter->second[1].dy == diff[trail_len - 1].dy);
#endif
		if(new_p > p_max) {
		  (*diff_max)[0].p = 1.0;
		  (*diff_max)[0].dx = diff_hash_map_iter->second[0].dx;
		  (*diff_max)[0].dy = diff_hash_map_iter->second[0].dy;
		  (*diff_max)[1].p = new_p;
		  (*diff_max)[1].dx = diff_hash_map_iter->second[1].dx;
		  (*diff_max)[1].dy = diff_hash_map_iter->second[1].dy;
		  printf("\n[%s:%d] Update max for %d R: (%4X %4X) -> (%4X %4X) 2^%f\n", __FILE__, __LINE__,  nrounds, (*diff_max)[0].dx, (*diff_max)[0].dy, (*diff_max)[1].dx, (*diff_max)[1].dy, log2((*diff_max)[1].p));
		}
#if 1
																							 {
  std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>::const_iterator map_iter = diffs_hash_map->begin();
  uint32_t trail_cnt = 0; 
  while(map_iter != diffs_hash_map->end()) {
	 trail_cnt++;
	 printf("[%s:%d] [%5d] ", __FILE__, __LINE__, trail_cnt);
    printf(" %llX | ", map_iter->first);
	 for(uint32_t i = 0; i < 2; i++) {
	 printf("%4X %4X ", map_iter->second[i].dx, map_iter->second[i].dy);
	 //		printf("%4X %4X ", diff_array[i].dx, diff_array[i].dy);
	 }
	 printf(" | %4X %4X ", diff[0].dy, diff[1].dx);
	 printf("\n");
	 map_iter++;
  }
  if(diffs_hash_map->size() > 0) {
    map_iter = diffs_hash_map->begin();
    assert(map_iter->second[0].dy == (diff[0].dy ^ diff[1].dx));
    printf("[%s:%d] CHECKPOINT!\n", __FILE__, __LINE__);
  }
	 }
#endif

	 }

	 if((*diff_max)[1].p > p_max) {
		//			 printf("\n[%s:%d] Update max for %d R: (%4X %4X) -> (%4X %4X) 2^%f\n", __FILE__, __LINE__,  nrounds, (*diff_max)[0].dx, (*diff_max)[0].dy, (*diff_max)[1].dx, (*diff_max)[1].dy, log2((*diff_max)[1].p));
		// Print all trails
		uint32_t dx_in = ((*diff_max))[0].dx;
		uint32_t dy_in = ((*diff_max))[0].dy;
		uint32_t dx_out = ((*diff_max))[1].dx;
		uint32_t dy_out = ((*diff_max))[1].dy;
		//		std::unordered_map<std::string, differential_t**>::const_iterator hash_map_iter = trails_hash_map->begin();
      std::unordered_map<uint64_t, std::array<differential_t, NROUNDS>>::iterator hash_map_iter = trails_hash_map->begin();
		double p_tot = 0.0;
		while(hash_map_iter != trails_hash_map->end()) {
		  uint32_t dx_trail_in = hash_map_iter->second[0].dx;
		  uint32_t dy_trail_in = hash_map_iter->second[0].dy ^ hash_map_iter->second[1].dx;
		  uint32_t dx_trail_out = hash_map_iter->second[trail_len - 1].dx;
		  uint32_t dy_trail_out = hash_map_iter->second[trail_len - 1].dy;
		  //				printf("[%s:%d] (%4X %4X %4X %4X) (%4X %4X %4X %4X)\n", __FILE__, __LINE__, dx_in, dy_in, dx_out, dy_out, dx_trail_in, dy_trail_in, dx_trail_out, dy_trail_out);
		  if((dx_in == dx_trail_in) && (dy_in == dy_trail_in) && (dx_out == dx_trail_out) && (dy_out == dy_trail_out)) {
			 double p = 1.0;
			 for(uint32_t i = 0; i < trail_len; i++) {
				printf("%4X %4X ", hash_map_iter->second[i].dx, hash_map_iter->second[i].dy);
				p *= hash_map_iter->second[i].p;
			 }
			 printf(" | 2^%f\n", log2(p));
			 p_tot += p;
		  }
		  hash_map_iter++;
		}
		printf("[%s:%d] Sum 2^%f\n", __FILE__, __LINE__, log2(p_tot));
	 }
#if 1
																							 {
  std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>::const_iterator map_iter = diffs_hash_map->begin();
  uint32_t trail_cnt = 0; 
  while(map_iter != diffs_hash_map->end()) {
	 trail_cnt++;
	 printf("[%s:%d] [%5d] ", __FILE__, __LINE__, trail_cnt);
    printf(" %llX | ", map_iter->first);
	 for(uint32_t i = 0; i < 2; i++) {
	 printf("%4X %4X ", map_iter->second[i].dx, map_iter->second[i].dy);
	 //		printf("%4X %4X ", diff_array[i].dx, diff_array[i].dy);
	 }
	 printf(" | %4X %4X ", diff[0].dy, diff[1].dx);
	 printf("\n");
	 map_iter++;
  }
  if(diffs_hash_map->size() > 0) {
    map_iter = diffs_hash_map->begin();
    assert(map_iter->second[0].dy == (diff[0].dy ^ diff[1].dx));
    printf("[%s:%d] CHECKPOINT!\n", __FILE__, __LINE__);
  }
	 }
#endif


  } else { 							  // trail already added
#if 0								  // DEBUG
		  //	 std::string s_diff = diff_to_string(diff, trail_len);
		  //	 std::unordered_map<std::string, differential_t**>::const_iterator diff_hash_map_iter = 
		  //		diffs_hash_map->find(s_diff);
    uint64_t diff_hash = simon_diff_hash(trail_array);
    std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>::iterator diff_hash_map_iter = 
	   diffs_hash_map->find(diff_hash);

	 assert(diff_hash_map_iter != diffs_hash_map->end());
	 assert((*diff_max)[1].p >= p);
#endif
  }
#if 1
																							 {
  std::unordered_map<uint64_t, std::array<differential_t, SIMON_NDIFFS>>::const_iterator map_iter = diffs_hash_map->begin();
  uint32_t trail_cnt = 0; 
  while(map_iter != diffs_hash_map->end()) {
	 trail_cnt++;
	 printf("[%s:%d] [%5d] ", __FILE__, __LINE__, trail_cnt);
    printf(" %llX | ", map_iter->first);
	 for(uint32_t i = 0; i < 2; i++) {
	 printf("%4X %4X ", map_iter->second[i].dx, map_iter->second[i].dy);
	 //		printf("%4X %4X ", diff_array[i].dx, diff_array[i].dy);
	 }
	 printf(" | %4X %4X ", diff[0].dy, diff[1].dx);
	 printf("\n");
	 map_iter++;
  }
	 printf("[%s:%d] %4X %4X\n", __FILE__, __LINE__, diff[0].dy, diff[1].dx);
  if(diffs_hash_map->size() > 0) {
    map_iter = diffs_hash_map->begin();
	 printf("[%s:%d] %4X %4X\n", __FILE__, __LINE__, diff[0].dy, diff[1].dx);
    assert(map_iter->second[0].dy == (diff[0].dy ^ diff[1].dx));
    printf("[%s:%d] CHECKPOINT!\n", __FILE__, __LINE__);
  }
	 }
#endif

}

/* --- */

void test_simon_hash_custom()
{
  //  std::unordered_map<std::string, differential_t**>* diffs_hash_map;
  //  std::unordered_map<std::string, differential_t**>* trails_hash_map;
  //  std::unordered_map<std::array<differential_t, NROUNDS>, std::array<differential_t, NROUNDS>> hash_map;
  std::unordered_map<uint64_t, std::array<differential_t, NROUNDS>> hash_map;

  differential_t trail[NROUNDS] = {{0, 0, 0, 0.0}};
  std::array<differential_t, NROUNDS> trail_array;

  for(uint32_t i = 0; i < NROUNDS; i++) {
    trail_array[i] = trail[i];
  }

  uint64_t trail_hash = random32();
  std::pair<uint64_t, std::array<differential_t, NROUNDS>> new_pair (trail_hash, trail_array);
  hash_map.insert(new_pair);

}

#if 0
bool operator==(std::array<differential_t, NROUNDS> a, std::array<differential_t, NROUNDS> b)
{
  return (a.size() == b.size()) && (a == b);
}

uint64_t trail_hash(std::array<differential_t, NROUNDS> diff) 
{
  size_t h = 0;
//  for (double* p = dr.p; p != dr.p + dr.size; ++p)
//	 h ^= hash(*p);
  return h;
}
#endif


/* --- */

#ifndef BOOST_HASH_H
#define BOOST_HASH_H	 /**< C++ STL Boost hash */
#include <boost/functional/hash.hpp>
#endif

/**
 * Compute a hash value of a Simon trail
 */
std::size_t simon_trail_hash(std::array<uint32_t, NROUNDS> c);

std::size_t simon_diff_hash(std::array<uint32_t, SIMON_NDIFFS> c);

std::size_t simon_trail_hash(std::array<uint32_t, NROUNDS> c)
{
  return boost::hash_range(c.begin(), c.end());
}

std::size_t simon_diff_hash(std::array<uint32_t, SIMON_NDIFFS> c)
{
  return boost::hash_range(c.begin(), c.end());
}



/* --- */

namespace simon_hash
{
  std::size_t hash_value(differential_t const& diff)
  {
	 boost::hash<uint64_t> hasher;
	 uint64_t val = ((diff.dy << WORD_SIZE) | diff.dx);
	 return hasher(val);
  }
}

//std::size_t simon_trail_hash_v2(std::array<differential_t, NROUNDS> c)
//{
//  return boost::hash_range(c.begin(), c.end());
//}


/* --- */
  // Add initial trail
#if 0
  double p = 1.0;
  for(uint32_t i = 0; i < nrounds; i++) {
	 p *= best_trail[i].p;
  }

  assert(trails_hash_map.size() == 0);
  std::string s_trail = trail_to_string(best_trail, *best_trail_len);
  differential_t** new_trail;
  new_trail = (differential_t** )calloc(1, sizeof(differential_t*));
  *new_trail = (differential_t*)calloc(*best_trail_len, sizeof(differential_t));
  for(uint32_t i = 0; i < *best_trail_len; i++) {
	 (*new_trail)[i].dx = best_trail[i].dx;
	 (*new_trail)[i].dy = best_trail[i].dy;
	 (*new_trail)[i].p = best_trail[i].p;
	 diff[i].dx = best_trail[i].dx;
	 diff[i].dy = best_trail[i].dy;
	 diff[i].p = best_trail[i].p;
  }
  std::pair<std::string, differential_t**> new_pair (s_trail,new_trail);
  trails_hash_map.insert(new_pair);

  printf("[%s:%d] Initial trails:\n", __FILE__, __LINE__);
  simon_print_hash_table(trails_hash_map, *best_trail_len);

  // Add initial differential
  std::string s_diff = diff_to_string(best_trail, *best_trail_len);
  std::unordered_map<std::string, differential_t**>::const_iterator diff_hash_map_iter = 
	 diffs_hash_map.find(s_diff);

  double p_max = (*diff_max)[1].p;
  assert((*diff_max)[1].p == 0.0);
  if(diff_hash_map_iter == diffs_hash_map.end()) {
	 printf("[%s:%d] Add initial differential: %4X %4X -> %4X %4X 2^%f | #trails %d\n", __FILE__, __LINE__, best_trail[0].dx, best_trail[0].dy ^ best_trail[1].dx, best_trail[*best_trail_len - 1].dx, best_trail[*best_trail_len -1].dy, log2(p), trails_hash_map.size());
	 fflush(stdout);
	 differential_t** new_diff;
	 new_diff = (differential_t** )calloc(1, sizeof(differential_t*));
	 *new_diff = (differential_t*)calloc(2, sizeof(differential_t));
	 (*new_diff)[0].dx = best_trail[0].dx;
	 (*new_diff)[0].dy = best_trail[0].dy ^ best_trail[1].dx; // !!
	 (*new_diff)[0].p = 1.0;
	 (*new_diff)[1].dx = best_trail[*best_trail_len - 1].dx;
	 (*new_diff)[1].dy = best_trail[*best_trail_len - 1].dy;
	 (*new_diff)[1].p = p;

	 std::pair<std::string, differential_t**> new_pair (s_diff,new_diff);
	 diffs_hash_map.insert(new_pair);

	 if(p > p_max) {
		(*diff_max)[0].p = (*diff_max)[1].p = p;
		(*diff_max)[0].dx = best_trail[0].dx;
		(*diff_max)[0].dy = best_trail[0].dy ^ best_trail[1].dx; // !!
		(*diff_max)[1].dx = best_trail[*best_trail_len - 1].dx;
		(*diff_max)[1].dy = best_trail[*best_trail_len - 1].dy;
		printf("[%s:%d] Update MAX differential: %4X %4X -> %4X %4X 2^%f | #trails %d\n", __FILE__, __LINE__, (*diff_max)[0].dx, (*diff_max)[0].dy, (*diff_max)[1].dx, (*diff_max)[1].dy, log2((*diff_max)[1].p), trails_hash_map.size());
	 }

  }
  printf("[%s:%d] Initial MAX differential: %4X %4X -> %4X %4X 2^%f | #trails %d\n", __FILE__, __LINE__, (*diff_max)[0].dx, (*diff_max)[0].dy, (*diff_max)[1].dx, (*diff_max)[1].dy, log2((*diff_max)[1].p), trails_hash_map.size());
#endif


/* 
20131017

Experiments on Simon with 16-bit words on the HPC cluster: Searching for differentials

Maximum HW = 5

Started: Tuesday, 15 Oct, ~ 16:00h
Ended:   Thursday, 17 Oct, ~ 8:00h 
Total: around 40 hours

Start: Tue Oct 15 16:25:57 CEST 2013
End:   Thu Oct 17 07:41:04 CEST 2013 

Last job: id=778043 (0x33792, 0x17) (33792, 17) END Job stopped normally.
 */


/* --- */

/* 
oarstat -u jmuszynski | cut -d " " -f 1 | xargs -I {} oardel {} | grep REGISTERED

If you give names to the jobs (oarsub with option "-n"), you can improve it a bit:

oarstat -u jmuszynski | grep jobname | cut -d " " -f 1 | xargs -I {} oardel {} | grep REGISTERED
 */

/* --- */

  std::unordered_map<uint32_t, differential_t>::iterator A_iter = A.begin();
  while(A_iter != A.end()) {
	 uint32_t dx = A_iter->second.dx;
	 uint32_t dy = A_iter->second.dy;
	 for(uint32_t i = 0; i < n; i++) {
		uint32_t dx_rot = LROT(dx, i);
		uint32_t dy_rot = LROT(dy, i);
		differential_t diff_rot = {dx_rot, dy_rot, 0, 0.0};
		uint32_t diff_rot_key = differential_to_num(diff_rot);
		std::unordered_map<uint32_t, differential_t>::iterator AA_iter = A.begin();
		while(AA_iter != A.end()) {
		  differential_t diff = AA_iter->second;
		  uint32_t diff_key = AA_iter->first;
		  if(diff_rot_key == diff_key) {
			 A.erase(diff_key);
		  }
		  AA_iter++;
		}
	 }
	 A_iter++;
  }


/* --- */


/* 
find . -name "simon-0x*.log" -size +700c -exec ls -l {} \;|wc -l
 */


/* 


Time: Sat Oct 12 08:11:48 2013
 dx_in 0x44
 dy_in 0x2010
 logfile simon-0x44-0x2010.log

[./tests/simon-xor-threshold-search-tests.cc:1251] INPUT DIFF   44 2010
R[ 0] MAX: (  44 2010) -> (2100   44) 2^-4.000000
R[ 1] MAX: (  44 2010) -> (8444 2100) 2^-8.000000
R[ 2] MAX: (  44 2010) -> (7110  404) 2^-13.678072
R[ 3] MAX: (  44 2010) -> (4440 1400) 2^-18.415037
R[ 4] MAX: (  44 2010) -> ( 508    0) 2^-21.678072
R[ 5] MAX: (  44 2010) -> (  44    0) 2^-23.143574
R[ 6] MAX: (  44 2010) -> ( 410    0) 2^-26.296772
R[ 7] MAX: (  44 2010) -> (   4    0) 2^-26.770951
R[ 8] MAX: (  44 2010) -> (  10    4) 2^-28.700135
R[ 9] MAX: (  44 2010) -> ( 400    0) 2^-30.527882
R[10] MAX: (  44 2010) -> (1000  400) 2^-32.511458
R[11] MAX: (  44 2010) -> (4400 1000) 2^-34.506886
R[12] MAX: (  44 2010) -> (   1 4400) 2^-37.798565
R[13] MAX: (  44 2010) -> (4404    1) 2^-39.795571
[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()1346] BEST 12R: (  44 2010) -> (4400 1000) 2^-34.506886

Time: Sat Oct 12 09:42:56 2013
 dx_in 0x88
 dy_in 0x4010
 logfile simon-0x88-0x4010.log

[./tests/simon-xor-threshold-search-tests.cc:1251] INPUT DIFF   88 4010
R[ 0] MAX: (  88 4010) -> (4220   88) 2^-4.000000
R[ 1] MAX: (  88 4010) -> ( 809 4220) 2^-9.000000
R[ 2] MAX: (  88 4010) -> (6204  809) 2^-13.790547
R[ 3] MAX: (  88 4010) -> (180B    0) 2^-17.415037
R[ 4] MAX: (  88 4010) -> (7204    0) 2^-20.530358
R[ 5] MAX: (  88 4010) -> ( 888  220) 2^-24.768779
R[ 6] MAX: (  88 4010) -> ( 820    0) 2^-25.867982
R[ 7] MAX: (  88 4010) -> (   8    0) 2^-26.688966
R[ 8] MAX: (  88 4010) -> (  20    8) 2^-28.650142
R[ 9] MAX: (  88 4010) -> (  88   20) 2^-30.642363
R[10] MAX: (  88 4010) -> ( 200   88) 2^-33.910576
R[11] MAX: (  88 4010) -> ( 888  200) 2^-35.907578
R[12] MAX: (  88 4010) -> (2002 8000) 2^-38.875992
R[13] MAX: (  88 4010) -> ( 888  200) 2^-41.645451
[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()1346] BEST 12R: (  88 4010) -> ( 888  200) 2^-35.907578


Time: Sat Oct 12 01:06:43 2013
 dx_in 0x9
 dy_in 0x60
 logfile simon-0x9-0x60.log

[./tests/simon-xor-threshold-search-tests.cc:1251] INPUT DIFF    C  600
R[ 0] MAX: (   C  600) -> ( 220    C) 2^-4.000000
R[ 1] MAX: (   C  600) -> ( 88C  220) 2^-8.000000
R[ 2] MAX: (   C  600) -> (2000  88C) 2^-13.714598
R[ 3] MAX: (   C  600) -> (  8C    0) 2^-15.192645
R[ 4] MAX: (   C  600) -> ( 400    8) 2^-19.917851
R[ 5] MAX: (   C  600) -> (   C    0) 2^-20.830075
R[ 6] MAX: (   C  600) -> (4420 1008) 2^-24.274293
R[ 7] MAX: (   C  600) -> (  8C   20) 2^-26.710796
R[ 8] MAX: (   C  600) -> (   2 8000) 2^-30.826459
R[ 9] MAX: (   C  600) -> ( 800    0) 2^-32.261758
R[10] MAX: (   C  600) -> (2000  800) 2^-34.207637
R[11] MAX: (   C  600) -> (8800 2000) 2^-36.198308
R[12] MAX: (   C  600) -> (   2 8800) 2^-39.477236
R[13] MAX: (   C  600) -> (8808    2) 2^-41.471694
[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()1346] BEST 12R: (   C  600) -> (8800 2000) 2^-36.198308

Time: Sat Oct 12 01:06:43 2013
 dx_in 0x9
 dy_in 0x60
 logfile simon-0x9-0x60.log

[./tests/simon-xor-threshold-search-tests.cc:1251] INPUT DIFF    9   60
R[ 0] MAX: (   9   60) -> (  44    9) 2^-4.000000
R[ 1] MAX: (   9   60) -> ( 111   44) 2^-8.000000
R[ 2] MAX: (   9   60) -> ( 400  111) 2^-13.000000
R[ 3] MAX: (   9   60) -> ( 155    0) 2^-15.000000
R[ 4] MAX: (   9   60) -> ( 454  155) 2^-21.000000
R[ 5] MAX: (   9   60) -> (1100 4000) 2^-24.660150
R[ 6] MAX: (   9   60) -> (  44 1000) 2^-26.807127
R[ 7] MAX: (   9   60) -> ( 100  400) 2^-28.962407
R[ 8] MAX: (   9   60) -> (   4    0) 2^-29.921873
R[ 9] MAX: (   9   60) -> ( 100    0) 2^-30.049435
R[10] MAX: (   9   60) -> ( 400  100) 2^-32.035761
R[11] MAX: (   9   60) -> (1100  400) 2^-34.032573
R[12] MAX: (   9   60) -> (4000 1100) 2^-37.325936
R[13] MAX: (   9   60) -> (1101 4000) 2^-39.323491
[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()1346] BEST 12R: (   9   60) -> (1100  400) 2^-34.032573


 */


/* --- */

/*
___Running Parallel Tasks on the Cluster___ (Jakub)

I'm running my jobs in the similar matter. I have plenty of independent jobs, but with longer runtime than yours. I'll describe you my approach:

1) The best thing for that is the "besteffort" queue (https://hpc.uni.lu/users/docs/oar.html#besteffort-versus-default). You should submit there each execution.
2) With your walltime (30 minutes), I wouldn't care about checkpointing - you should submit your jobs as "idempotent" (in case they're killed, they will be resubmitted to the queue with the same parameters automatically).
3) As for the memory requirements, to not disturb other users, you should take around 4 cores for each job.

So for the first three points, oarsub command should look more or less like that:
oarsub -t besteffort -t idempotent -l nodes=1/cpu=1/core=4,walltime=0:30:0 "./yourProgram"

4) VERY IMPORTANT: Do not add all of the jobs at once! 18k jobs in the queue is not good. It's easy to detect if you're overloading the queue, I'm using a bash function for that:
function waitForSpaceInQueue {
    while [ `oarstat -u vvelichkov | grep " W " | wc -l` -ge 15 ]; do
        echo "Waiting 10 minutes to free the queue..."
        sleep 10m
    done
}
Basically what this function does is waiting, until there is less than 15 of your jobs in "WAITING" state in the queue. You should call it before you submit something to the queue (by oarsub).

And now the main approach differs, depending on how your program works. For example, if you supply execution parameters to it, you could do something like that:
for param1 in $valuesOfParam1; do
    for param2 in $valuesOfParam2; do
        #and so on...
        waitForSpaceInQueue
        oarsub -t besteffort -t idempotent -l nodes=1/cpu=1/core=4,walltime=0:30:0 "./yourProgram --param1="$param1" --param2="$param2
    done
done

5) And the main script should run for example under "screen" (see: http://www.phacks.net/how-to-use-gnu-screen/) - so when you log out from the platform, it won't be killed.
6) The last issue, is errors. This is very application dependent. You should check if you have all of the results (sometimes nodes crash, or some bad user will interfere with your jobs) and re-execute missing jobs.
7) And for the last: monitor your jobs from time to time, if something strange is not happening there (like yesterday - 15.000 processes... ;-) ).

 */


/* --- */

/* 

14 min. to computethe DDT.

Typical execution of the cluster search on 14 rounds takes around 30 min:

real    29m56.871s
user    29m48.460s
sys     0m4.712s

of which 12 min. is spent on the computation of the DDT and rounds 9-14 take around 3.5 min:

(6 rounds * 3 min) + 12 min = 30 min .


 */

/* [./tests/simon-xor-threshold-search-tests.cc:1827] num_inputs 18768 2^14.195987 */


/* --- */

/* r4398  */

/* 

./tests/simon-xor-threshold-search-tests.cc:1090:36: error: assignment of member ‘differential_t::p’ in read-only object

 */

/* --- */

/* 
==15505==
==15505== HEAP SUMMARY:
==15505==     in use at exit: 539,380 bytes in 933 blocks
==15505==   total heap usage: 962,032,800 allocs, 962,031,867 frees, 43,675,427,220 bytes allocated
==15505==
==15505== 180 bytes in 3 blocks are definitely lost in loss record 1 of 4
==15505==    at 0x48DF71C: operator new(unsigned int) (vg_replace_malloc.c:255)
==15505==    by 0x806A3F3: void std::vector<differential_t, std::allocator<differential_t> >::_M_insert_aux<differential_t const&>(__gnu_cxx::__normal_iterator<differential_t*, std::vector<differential_t, std::allocator<differential_t> > >, differential_t const&&&) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x8064BBA: simon_ddt_add_row(std::unordered_map<unsigned int, std::vector<differential_t, std::allocator<differential_t> >*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, std::vector<differential_t, std::allocator<differential_t> >*> > >*, unsigned int, unsigned int) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x8064C44: simon_compute_partial_ddt(std::unordered_map<unsigned int, std::vector<differential_t, std::allocator<differential_t> >*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, std::vector<differential_t, std::allocator<differential_t> >*> > >*, std::vector<unsigned int, std::allocator<unsigned int> >, unsigned int) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x8067D52: test_simon_diff_search(unsigned int, unsigned int, char const*) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x80680A4: main (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==
==15505== 2,560 bytes in 1 blocks are possibly lost in loss record 2 of 4
==15505==    at 0x48DF71C: operator new(unsigned int) (vg_replace_malloc.c:255)
==15505==    by 0x806A3F3: void std::vector<differential_t, std::allocator<differential_t> >::_M_insert_aux<differential_t const&>(__gnu_cxx::__normal_iterator<differential_t*, std::vector<differential_t, std::allocator<differential_t> > >, differential_t const&&&) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x8064BBA: simon_ddt_add_row(std::unordered_map<unsigned int, std::vector<differential_t, std::allocator<differential_t> >*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, std::vector<differential_t, std::allocator<differential_t> >*> > >*, unsigned int, unsigned int) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x8066BC1: simon_diff_search_oneround(unsigned int, std::unordered_map<unsigned int, std::vector<differential_t, std::allocator<differential_t> >*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, std::vector<differential_t, std::allocator<differential_t> >*> > >*, std::unordered_map<unsigned int, differential_t*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, differential_t*> > >*, std::unordered_map<unsigned int, differential_t*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, differential_t*> > >*, differential_t, differential_t*, unsigned int) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x806729E: simon_diff_search(unsigned int, unsigned int, unsigned int, unsigned int, std::unordered_map<unsigned int, std::vector<differential_t, std::allocator<differential_t> >*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, std::vector<differential_t, std::allocator<differential_t> >*> > >*, char const*) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x8067D8E: test_simon_diff_search(unsigned int, unsigned int, char const*) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x80680A4: main (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==
==15505== 14,000 bytes in 700 blocks are definitely lost in loss record 3 of 4
==15505==    at 0x48DE28F: calloc (vg_replace_malloc.c:467)
==15505==    by 0x8066795: simon_diff_search_oneround(unsigned int, std::unordered_map<unsigned int, std::vector<differential_t, std::allocator<differential_t> >*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, std::vector<differential_t, std::allocator<differential_t> >*> > >*, std::unordered_map<unsigned int, differential_t*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, differential_t*> > >*, std::unordered_map<unsigned int, differential_t*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, differential_t*> > >*, differential_t, differential_t*, unsigned int) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x806729E: simon_diff_search(unsigned int, unsigned int, unsigned int, unsigned int, std::unordered_map<unsigned int, std::vector<differential_t, std::allocator<differential_t> >*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, std::vector<differential_t, std::allocator<differential_t> >*> > >*, char const*) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x8067D8E: test_simon_diff_search(unsigned int, unsigned int, char const*) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x80680A4: main (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==
==15505== 522,640 bytes in 229 blocks are definitely lost in loss record 4 of 4
==15505==    at 0x48DF71C: operator new(unsigned int) (vg_replace_malloc.c:255)
==15505==    by 0x806A3F3: void std::vector<differential_t, std::allocator<differential_t> >::_M_insert_aux<differential_t const&>(__gnu_cxx::__normal_iterator<differential_t*, std::vector<differential_t, std::allocator<differential_t> > >, differential_t const&&&) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x8064BBA: simon_ddt_add_row(std::unordered_map<unsigned int, std::vector<differential_t, std::allocator<differential_t> >*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, std::vector<differential_t, std::allocator<differential_t> >*> > >*, unsigned int, unsigned int) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x8066BC1: simon_diff_search_oneround(unsigned int, std::unordered_map<unsigned int, std::vector<differential_t, std::allocator<differential_t> >*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, std::vector<differential_t, std::allocator<differential_t> >*> > >*, std::unordered_map<unsigned int, differential_t*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, differential_t*> > >*, std::unordered_map<unsigned int, differential_t*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, differential_t*> > >*, differential_t, differential_t*, unsigned int) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x806729E: simon_diff_search(unsigned int, unsigned int, unsigned int, unsigned int, std::unordered_map<unsigned int, std::vector<differential_t, std::allocator<differential_t> >*, std::hash<unsigned int>, std::equal_to<unsigned int>, std::allocator<std::pair<unsigned int const, std::vector<differential_t, std::allocator<differential_t> >*> > >*, char const*) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x8067D8E: test_simon_diff_search(unsigned int, unsigned int, char const*) (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==    by 0x80680A4: main (in /home/vpv/skcrypto/trunk/work/src/yaarx/bin/simon-xor-threshold-search-tests)
==15505==
==15505== LEAK SUMMARY:
==15505==    definitely lost: 536,820 bytes in 932 blocks
==15505==    indirectly lost: 0 bytes in 0 blocks
==15505==      possibly lost: 2,560 bytes in 1 blocks
==15505==    still reachable: 0 bytes in 0 blocks
==15505==         suppressed: 0 bytes in 0 blocks
==15505==
==15505== For counts of detected and suppressed errors, rerun with: -v
==15505== ERROR SUMMARY: 4 errors from 4 contexts (suppressed: 27 from 8)

 */


/* --- */

/* 
--- [./tests/simon-xor-threshold-search-tests.cc:1324] Round [3 / 3] (   2    0) : T size 65536, H size 86 ---

[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()1346] BEST 12R: (   0    0) -> (   0    0) 2^-inf
==9958==
==9958== HEAP SUMMARY:
==9958==     in use at exit: 2,367,196,460 bytes in 65,614 blocks
==9958==   total heap usage: 852,829 allocs, 787,215 frees, 4,735,713,400 bytes allocated
==9958==
==9958== LEAK SUMMARY:
==9958==    definitely lost: 2,032,376,620 bytes in 61,374 blocks
==9958==    indirectly lost: 0 bytes in 0 blocks
==9958==      possibly lost: 334,819,840 bytes in 4,240 blocks
==9958==    still reachable: 0 bytes in 0 blocks
==9958==         suppressed: 0 bytes in 0 blocks
==9958== Rerun with --leak-check=full to see details of leaked memory
==9958==
==9958== For counts of detected and suppressed errors, rerun with: -v
==9958== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 27 from 8)
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ 

 */

/* --- */


/* 
--- [./tests/simon-xor-threshold-search-tests.cc:689] Key     8DD0      282     3E02     F958  ---
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():700]:
 Verify 13 R differential (       0        1) -> (     100        0) | 2^32.00 CP pairs
 [./src/simon-xor-threshold-search.cc:462] Round keys from key schedule, 13 R:
[ 0]    8DD0 [ 1]     282 [ 2]    3E02 [ 3]    F958 [ 4]    E150 [ 5]    CE43 [ 6]     127 [ 7]     76B [ 8]    E754 [ 9]    6294 [10]    B07F [11]    6161 [12]    714C
[./src/simon-xor-threshold-search.cc:526] p = 2^-31.000000
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():710]:
 Verified 13 R differential (       0        1) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-31.000000
 [./tests/simon-xor-threshold-search-tests.cc:716] temp_edp 0.000000 (2^-28.800328) nkeys 31
 [./tests/simon-xor-threshold-search-tests.cc:717] OK


real    211m10.655s
user    210m33.494s
sys     0m3.244s

 */

/* --- */

#if 1										// DEBUG
		  if(G_iter != G->end()) {  // diff already in G
			 std::stringstream oss("");
			 oss << std::hex << std::setfill('0') << std::setw(WORD_SIZE / 4) << G_iter->second->dx;
			 oss << std::hex << std::setfill('0') << std::setw(WORD_SIZE / 4) << G_iter->second->dy;
			 std::string s_tmp = oss.str();	

			 std::stringstream oss_out("");
			 oss_out << std::hex << std::setfill('0') << std::setw(WORD_SIZE / 4) << diff_out.dx;
			 oss_out << std::hex << std::setfill('0') << std::setw(WORD_SIZE / 4) << diff_out.dy;
			 std::string s_out = oss.str();	

			 if((G_iter->second)->dx != diff_out.dx) {
				printf("--- [%s:%d] ---\n", __FILE__, __LINE__);
				std::cout << std::hex << G_iter->first << " = (" << G_iter->second->dx << " " << G_iter->second->dy << ")";
				printf("| %4X %4X \n", G_iter->second->dx, G_iter->second->dy);
				std::cout << std::hex << s_out << " = (" << diff_out.dx << " " << diff_out.dy << ")";
				printf("| %4X %4X \n", diff_out.dx, diff_out.dy);
			 }

			 assert(s_diff_out.compare(s_tmp) == 0);
			 assert((G_iter->second)->dx == diff_out.dx);
			 assert((G_iter->second)->dy == diff_out.dy);
			 assert((G_iter->second)->dx == dx_out);
			 assert((G_iter->second)->dy == dy_out);
			 assert((G_iter->second)->dy == dx_in);
			 assert((G_iter->second)->dy == diff_in.dx);
		  }
#endif

/* --- */


/* 
[./src/simon-xor-threshold-search.cc:526] p = 2^-27.093109
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():708]:
 Verified 12 R differential (       0        1) -> (       4        1) | 2^32.00 CP pairs
 Final probability p = 2^-27.093109
 [./tests/simon-xor-threshold-search-tests.cc:714] temp_edp 0.000000 (2^-26.870717) nkeys 3
 [./tests/simon-xor-threshold-search-tests.cc:715] OK


real    24m45.596s
user    24m41.357s
sys     0m0.232s


 Verified 13 R differential (       0        1) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-29.000000
 [./tests/simon-xor-threshold-search-tests.cc:716] temp_edp 0.000000 (2^-27.356144) nkeys 3
 [./tests/simon-xor-threshold-search-tests.cc:717] OK


real    26m40.696s
user    26m36.420s
sys     0m0.000s



 */

/* --- */

/* 

OLD:

round 8:

H_size = 2^23.52
cnt_iter = 2107365984 = 2^30.97
Time = 683.715562 sec = 11.40 min

round 9:

H_size = 2^24.87
cnt_iter = 5331882136 = 2^32.31
Time = 1887.889692 sec = 31.46 min

round 10:

H_size = 2^25.20
cnt_iter = 6696241605 = 2^32.64
Time = 2384.106686 sec = 39.73 min


---

NEW:

[ 0]    C406 [ 1]    ED34 [ 2]    3963 [ 3]    7435 [ 4]    5990 [ 5]    B9B0 [ 6]    141C [ 7]    BD52 [ 8]    3F7A [ 9]    BC46 [10]    B456 [11]    DFA6 [12]    94ED [13]    C614 

--- [./tests/simon-xor-threshold-search-tests.cc:1312] Round [8 / 14] (   1    0) : T size 65536, H size 12061193 2^23.52 ---
[./tests/simon-xor-threshold-search-tests.cc:1336] 1.257563 min 75.453799 s 75453.799000 ms 75453799.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1337] cnt_iter 2107365984 2^30.97 C 0.000000 2^-24.735272
[./src/simon-xor-threshold-search.cc:436] p = 2^-22.415037
[./tests/simon-xor-threshold-search-tests.cc:1351] MAX:     1100     4000 2^-23.402965 2^-22.415037 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1312] Round [9 / 14] (   1    0) : T size 65536, H size 30636606 2^24.87 ---
[./tests/simon-xor-threshold-search-tests.cc:1336] 3.100693 min 186.041604 s 186041.604000 ms 186041604.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1337] cnt_iter 5331882136 2^32.31 C 0.000000 2^-24.772516
[./src/simon-xor-threshold-search.cc:464] Round keys from key schedule, 9 R:
[./tests/simon-xor-threshold-search-tests.cc:1351] MAX:      104        0 2^-25.348416 2^-24.540568 (2^32 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1312] Round [10 / 14] (   1    0) : T size 65536, H size 38529685 2^25.20 ---
[./tests/simon-xor-threshold-search-tests.cc:1336] 3.646142 min 218.768545 s 218768.545000 ms 218768545.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1337] cnt_iter 6696241605 2^32.64 C 0.000000 2^-24.867443
[./src/simon-xor-threshold-search.cc:464] Round keys from key schedule, 10 R:
[./tests/simon-xor-threshold-search-tests.cc:1351] MAX:        1        0 2^-26.163226 2^-24.400087 (2^32 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1312] Round [11 / 14] (   1    0) : T size 65536, H size 40015221 2^25.25 ---
[./tests/simon-xor-threshold-search-tests.cc:1336] 3.813859 min 228.831539 s 228831.539000 ms 228831539.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1337] cnt_iter 6954378315 2^32.70 C 0.000000 2^-24.857132
[./src/simon-xor-threshold-search.cc:464] Round keys from key schedule, 11 R:
[./src/simon-xor-threshold-search.cc:526] p = 2^-26.912537
[./tests/simon-xor-threshold-search-tests.cc:1351] MAX:        4        1 2^-28.115223 2^-26.912537 (2^32 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1312] Round [12 / 14] (   1    0) : T size 65536, H size 40308522 2^25.26 ---
[./tests/simon-xor-threshold-search-tests.cc:1336] 3.866041 min 231.962466 s 231962.466000 ms 231962466.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1337] cnt_iter 7005087707 2^32.71 C 0.000000 2^-24.848008
[./src/simon-xor-threshold-search.cc:464] Round keys from key schedule, 12 R:
[./src/simon-xor-threshold-search.cc:526] p = 2^-27.192645
[./tests/simon-xor-threshold-search-tests.cc:1351] MAX:      100        0 2^-28.960967 2^-27.192645 (2^32 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1312] Round [13 / 14] (   1    0) : T size 65536, H size 40382412 2^25.27 ---
[./tests/simon-xor-threshold-search-tests.cc:1336] 3.880861 min 232.851631 s 232851.631000 ms 232851631.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1337] cnt_iter 7017675802 2^32.71 C 0.000000 2^-24.845079
[./src/simon-xor-threshold-search.cc:464] Round keys from key schedule, 13 R:
[./src/simon-xor-threshold-search.cc:526] p = 2^-29.000000
[./tests/simon-xor-threshold-search-tests.cc:1351] MAX:      400      100 2^-30.949967 2^-29.000000 (2^32 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1312] Round [14 / 14] (   1    0) : T size 65536, H size 40406992 2^25.27 ---
[./tests/simon-xor-threshold-search-tests.cc:1336] 3.880005 min 232.800281 s 232800.281000 ms 232800281.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1337] cnt_iter 7021576716 2^32.71 C 0.000000 2^-24.846199
[./src/simon-xor-threshold-search.cc:464] Round keys from key schedule, 14 R:
[./src/simon-xor-threshold-search.cc:526] p = 2^-inf
[./tests/simon-xor-threshold-search-tests.cc:1351] MAX:     1100      400 2^-32.947555 2^-inf (2^32 CP)

real    81m26.145s
user    81m3.580s
sys     0m10.801s


 */

/* --- */

/* 
#--- [./tests/simon-xor-threshold-search-tests.cc:1851] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:1550] Fill DZ len 2^9.00
[./tests/simon-xor-threshold-search-tests.cc:1563] Fill H len 2^20.00
[./tests/simon-xor-threshold-search-tests.cc:1584] Start search 1381071263496649 H_len 2^20.00 G_len 2^-inf
[./tests/simon-xor-threshold-search-tests.cc:1587] End search 1381071268774385 H_len 2^20.00 G_len 2^5.78
[./tests/simon-xor-threshold-search-tests.cc:1594] 0.087962 min 5.277736 s 5277.736000 ms 5277736.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1595] cnt_iter 537847155 (2^29.00) C 0.000000 (2^-26.602702)
[./tests/simon-xor-threshold-search-tests.cc:1598] Free G

real    0m5.658s
user    0m5.624s
sys     0m0.020s
v
 */

/* 
	[./tests/simon-xor-threshold-search-tests.cc:1547] Fill DZ len 2^9.00
	[./tests/simon-xor-threshold-search-tests.cc:1560] Fill H len 2^20.00
	[./tests/simon-xor-threshold-search-tests.cc:1579] Start search 1381070837357759 H_len 2^20.00 G_len 2^-inf
	[./tests/simon-xor-threshold-search-tests.cc:1582] End search 1381070895637099 H_len 2^20.00 G_len 2^5.78
	[./tests/simon-xor-threshold-search-tests.cc:1589] 0.971322 min 58.279340 s 58279.340000 ms 58279340.000000 mu
	[./tests/simon-xor-threshold-search-tests.cc:1590] cnt_iter 537864597 (2^29.00) C 0.000000 (2^-23.137755)
	[./tests/simon-xor-threshold-search-tests.cc:1593] Free G

real    0m59.788s
user    0m59.664s
sys     0m0.024s

 */

//uint64_t simon_test_code(
uint64_t simon_test_code(std::unordered_map<uint32_t, std::vector<differential_t>*>* T,
								 std::vector<differential_t> DZ,
								 std::unordered_map<std::string, differential_t *>* H,
								 std::unordered_map<std::string, differential_t *>* G,
								 const differential_t input_diff,
								 differential_t* max_output_diff,
								 const uint32_t hw_max)
{
  uint64_t cnt_iter = 0;
  std::unordered_map<std::string, differential_t *>::const_iterator H_iter = H->begin();
  while(H_iter != H->end()) {

	 cnt_iter++;
	 const uint32_t dx_in = (H_iter->second)->dx;
	 const uint32_t dy_in = (H_iter->second)->dy;
	 const double p_in = (H_iter->second)->p;
	 const differential_t diff_in = {dx_in, dy_in, 0, p_in};

#if FULL_DDT
	 std::unordered_map<uint32_t, std::vector<differential_t>*>::const_iterator ddt_iter = T->find(dx_in);
	 DZ.clear();
	 DZ = *(ddt_iter->second); // dz ^ (dx <<< 2)
#endif
	 //	 std::vector<differential_t> DZ = *(ddt_iter->second); // dz ^ (dx <<< 2)

	 std::vector<differential_t>::iterator vec_iter;
	 for(vec_iter = DZ.begin(); vec_iter != DZ.end(); vec_iter++) {

		cnt_iter++;
		differential_t diff = *vec_iter;
		uint32_t dz = diff.dy;	  // = (dx_in <<< 2) ^ dz
		double p = diff.p;

		const uint32_t dx_out = dz ^ dy_in;
		const uint32_t dy_out = dx_in;
		const double p_out = (p_in * p);
		const differential_t diff_out = {dx_out, dy_out, 0, p_out};
		if(hw32(dx_out & MASK) <= hw_max) {

#if 1
		  std::string s_diff_out = differential_to_string(diff_out);
		  std::unordered_map<std::string, differential_t *>::const_iterator G_iter = G->find(s_diff_out);
		  if(G_iter != G->end()) {  // diff already in G

			 (G_iter->second)->p += diff_out.p;	  // update its probability

			 differential_t new_diff = {(G_iter->second)->dx, (G_iter->second)->dy, 0,  (G_iter->second)->p};

			 simon_diff_update_max(diff_in, new_diff, max_output_diff);

		  } else {

			 differential_t* new_diff = (differential_t *)calloc(1, sizeof(differential_t));
			 new_diff->dx = diff_out.dx;
			 new_diff->dy = diff_out.dy;
			 new_diff->npairs = diff_out.npairs;
			 new_diff->p = diff_out.p;

			 std::pair<std::string, differential_t*> new_pair (s_diff_out, new_diff);
			 G->insert(new_pair);

			 simon_diff_update_max(diff_in, *new_diff, max_output_diff);

		  }
#endif
		}
	 }
	 H_iter++;
  }
  return cnt_iter;
}

/* --- */
uint64_t simon_test_code(std::unordered_map<uint32_t, std::vector<differential_t>*>* T,
								 std::vector<differential_t> DZ,
								 std::unordered_map<std::string, differential_t *>* H,
								 std::unordered_map<std::string, differential_t *>* G,
								 const differential_t input_diff,
								 differential_t* max_output_diff,
								 const uint32_t hw_max)
{
  uint64_t cnt_iter = 0;
  std::unordered_map<std::string, differential_t *>::const_iterator H_iter = H->begin();
  while(H_iter != H->end()) {

	 cnt_iter++;
	 const uint32_t dx_in = (H_iter->second)->dx;
	 const uint32_t dy_in = (H_iter->second)->dy;
	 const double p_in = (H_iter->second)->p;
	 const differential_t diff_in = {dx_in, dy_in, 0, p_in};

#if FULL_DDT
	 std::unordered_map<uint32_t, std::vector<differential_t>*>::const_iterator ddt_iter = T->find(dx_in);
	 DZ.clear();
	 DZ = *(ddt_iter->second); // dz ^ (dx <<< 2)
#endif
	 //	 std::vector<differential_t> DZ = *(ddt_iter->second); // dz ^ (dx <<< 2)

	 std::vector<differential_t>::iterator vec_iter;
	 for(vec_iter = DZ.begin(); vec_iter != DZ.end(); vec_iter++) {

		cnt_iter++;
		differential_t diff = *vec_iter;
		uint32_t dz = diff.dy;	  // = (dx_in <<< 2) ^ dz
		double p = diff.p;

		const uint32_t dx_out = dz ^ dy_in;
		const uint32_t dy_out = dx_in;
		const double p_out = (p_in * p);
		const differential_t diff_out = {dx_out, dy_out, 0, p_out};
		if(hw32(dx_out & MASK) <= hw_max) {

#if 1
		  std::string s_diff_out = differential_to_string(diff_out);
		  std::unordered_map<std::string, differential_t *>::const_iterator G_iter = G->find(s_diff_out);
		  if(G_iter != G->end()) {  // diff already in G

			 (G_iter->second)->p += diff_out.p;	  // update its probability

			 differential_t new_diff = {(G_iter->second)->dx, (G_iter->second)->dy, 0,  (G_iter->second)->p};

			 simon_diff_update_max(diff_in, new_diff, max_output_diff);

		  } else {

			 differential_t* new_diff = (differential_t *)calloc(1, sizeof(differential_t));
			 new_diff->dx = diff_out.dx;
			 new_diff->dy = diff_out.dy;
			 new_diff->npairs = diff_out.npairs;
			 new_diff->p = diff_out.p;

			 std::pair<std::string, differential_t*> new_pair (s_diff_out, new_diff);
			 G->insert(new_pair);

			 simon_diff_update_max(diff_in, *new_diff, max_output_diff);

		  }
#endif
		}
	 }
	 H_iter++;
  }
  return cnt_iter;
}


/* --- */

std::string differential_to_string(const differential_t diff) 
{
#if 0
  std::stringstream oss("");
  oss << std::hex << std::setfill('0') << std::setw(WORD_SIZE / 4) << diff.dx;
  oss << std::hex << std::setfill('0') << std::setw(WORD_SIZE / 4) << diff.dy;
  //  return oss.str();
#else
  uint32_t n = ((diff.dx << WORD_SIZE) | diff.dy);
  std::string s { std::to_string(n) };
  if(s.compare(oss.str()) != 0) {
	 //	 std::cout << s.str() << " " << s << std::endl;
	 std::cout << oss.str() << " " << std::hex << n << std::endl;
  }
  assert(s.compare(oss.str()) == 0);
  return s;
#endif
}

/* --- */


/* 

#--- [./tests/simon-xor-threshold-search-tests.cc:1843] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:1549] Fill DZ len 2^9.00
[./tests/simon-xor-threshold-search-tests.cc:1562] Fill H len 2^25.00
[./tests/simon-xor-threshold-search-tests.cc:1577] Start search 1381059611925423
[./tests/simon-xor-threshold-search-tests.cc:1580] End search 1381062122878991
[./tests/simon-xor-threshold-search-tests.cc:1586] 2510.953568 s 2510953.568000 ms 2510953568.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1587] cnt_iter 17146192914 (2^34.00) C 0.000000 (2^-22.703150)
[./tests/simon-xor-threshold-search-tests.cc:1591] Free G

real    43m7.659s
user    43m0.245s
sys     0m1.132s

	[./tests/simon-xor-threshold-search-tests.cc:1549] Fill DZ len 2^9.00
	[./tests/simon-xor-threshold-search-tests.cc:1562] Fill H len 2^20.00
	[./tests/simon-xor-threshold-search-tests.cc:1577] Start search 1381059580687709
	[./tests/simon-xor-threshold-search-tests.cc:1580] End search 1381059668462944
	[./tests/simon-xor-threshold-search-tests.cc:1586] 87.775235 s 87775.235000 ms 87775235.000000 mu
	[./tests/simon-xor-threshold-search-tests.cc:1587] cnt_iter 537856389 (2^29.00) C 0.000000 (2^-22.546904)
	[./tests/simon-xor-threshold-search-tests.cc:1591] Free G


[./tests/simon-xor-threshold-search-tests.cc:1542] Fill full DDT T
[./tests/simon-xor-threshold-search-tests.cc:1560] Fill H len 2^20.00
[./tests/simon-xor-threshold-search-tests.cc:1575] Start search 1381059643793187
[./tests/simon-xor-threshold-search-tests.cc:1578] End search 1381059713906467
[./tests/simon-xor-threshold-search-tests.cc:1584] 70.113280 s 70113.280000 ms 70113280.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1585] cnt_iter 537851259 (2^29.00) C 0.000000 (2^-22.871016)
[./tests/simon-xor-threshold-search-tests.cc:1589] Free G

real    11m28.272s

 */

/* --- */
/*
Test vector

$ echo smashup | ./bin/tweetcipher-ref  e kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk vvvvvvvvvvvvvvvv | ./bin/tweetcipher-ref d  kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk vvvvvvvvvvvvvvvv 

*v[1] = e
*v[2] = kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk = W[0:3] :  W[0]kkkkkkkk W[1]kkkkkkkk W[2]kkkkkkkk W[3]kkkkkkkk
*v[3] = vvvvvvvvvvvvvvvv

v[1] = 65 e

v[2][0] 6B6B6B6B6B6B6B6B kkk...
v[2][1] 6B6B6B6B6B6B6B6B
v[2][2] 6B6B6B6B6B6B6B6B
v[2][3] 6B6B6B6B6B6B6B6B

v[3][4] 7676767676767676 vvv...
v[3][5] 7676767676767676

*/
//int main(int _,char**v)
//int main(int _,char**v)

#define PT_LEN 64

int main()
{
  srandom(time(NULL));

  uint64_t x[16];					  // state
  uint64_t i;
  uint64_t c;
  uint64_t r;
  uint64_t f=1;//'e'==*v[1];		  // encrypt or decrypt

  uint64_t key[4] = {0};//{0x6B6B6B6B6B6B6B6B, 0x6B6B6B6B6B6B6B6B, 0x6B6B6B6B6B6B6B6B, 0x6B6B6B6B6B6B6B6B};
  uint64_t iv[2]  = {0};//{0x7676767676767676, 0x7676767676767676};

  for(uint32_t i = 0; i < 4; i++) {
	 key[i] = random64();
	 printf("%llX ", key[i]);
  }
  printf("\n");
  for(uint32_t i = 0; i < 2; i++) {
	 iv[i] = random64();
	 printf("%llX ", iv[i]);
  }
  printf("\n");

  // --- Encryption ---

  // initialize input state
  for(i = 0; i < 16; ++i) {
    x[i] = (i * 0x7477697468617369ULL);
  }
  // add key
  for(i = 0; i < 4; ++i) {		  // LOOP(4) x[i]=W(v[2],i);
	 //	 x[i] = ((uint64_t*)v[2])[i];
	 x[i] = key[i];
  }
  // add tweak
  for(i = 0; i < 2; ++i) {		  // LOOP(2) x[i+4]=W(v[3],i);
	 //	 x[i+4] = ((uint64_t*)v[3])[i];
	 x[i+4] = iv[i];
  }
#if 0									  // DEBUG
  for(i = 0; i < 16; ++i) {
	 printf("x[%lld] %llX\n", i, x[i]);
  }
#endif
  ROUNDS;
#if 0									  // DEBUG
  printf("\n");
  for(i = 0; i < 16; ++i) {
	 printf("x[%lld] %llX\n", i, x[i]);
  }
#endif
  //  while((c=getchar())!=EOF){
  uint8_t ct[PT_LEN] = {0x61, 0x61, 0x61, 0x61, 0x61, 0xA};
  for(uint32_t i = 0; i < PT_LEN; i++) {
	 ct[i] = random32() & 0xFF;
  }
  ct[PT_LEN - 1] = 0xA;			  // EOF
  printf(" plaintext: ");
  for(uint32_t i = 0; i < PT_LEN; i++) {
	 printf("%2X ", ct[i]);
  }
  printf("\n");
  for(uint32_t j = 0; j < PT_LEN; j++) {
	 c = ct[j];//getchar();
	 //	 printf("\n%llX\n", c);

    if( !f && 10 == (x[0]^c) % 256 ) { // decrypt
		printf("\n");
		assert(1 == 0);
		return 0;
	 }
	 //    putchar(x[0]^c);
	 ct[j] = (uint8_t)(0xFF & (x[0]^c));
	 //	 printf("ct[%d] = %2X = %2X xor %2X\n", j, ct[j], (uint8_t)x[0], (uint8_t)c);
	 if(f == 1) {					  // encrypt
		x[0] = c ^ x[0];
	 } else {						  // decrypt
		x[0] = c ^ (x[0] & (~255ULL)); // ~255ULL = FFFFFFFFFFFFFF00
	 }
    ROUNDS;
  }
  x[0]^=1;
  ROUNDS;
  printf("\n");
  //  LOOP(8) putchar(255&((x[4]^x[5])>>8*i));
  //  LOOP(8) putchar(255&((x[6]^x[7])>>8*i));
#if 0									  // DEBUG
  printf("\n\n");
  for(i = 0; i < 16; ++i) {
	 printf("x[%lld] %llX\n", i, x[i]);
  }
#endif
  //  LOOP(8) {printf("%2llX ", 255 & ((x[4]^x[5])>>8*i));  iv[0] |= ((255 & ((x[4]^x[5])>>8*i)) << ((7 - i)*8)); }
  //  printf("\n");
  //  LOOP(8) {printf("%2llX ", 255 & ((x[6]^x[7])>>8*i)); iv[1] |= ((255 & ((x[6]^x[7])>>8*i)) << ((7 - i)*8));}
  //  printf("\n");
#if 0
  LOOP(8) printf("%2llX ", 255 & ((x[4]^x[5])>>8*i));
  printf("\n");
  LOOP(8) printf("%2llX ", 255 & ((x[6]^x[7])>>8*i));
  printf("\n");
#endif

  // --- Decryption ---
  //  printf("\n --- Decryption --- \n");
  f = 0;
  //  iv[0] = (x[4]^x[5]);
  //  iv[1] = (x[6]^x[7]);

  //  printf("iv[0] = %llX\n", iv[0]);
  //  printf("iv[1] = %llX\n", iv[1]);

  printf("ciphertext: ");
  for(uint32_t i = 0; i < PT_LEN; i++) {
	 printf("%2X ", ct[i]);
  }
  printf("\n");

  // initialize input state
  for(i = 0; i < 16; ++i) {
    x[i] = (i * 0x7477697468617369ULL);
  }
  // add key
  for(i = 0; i < 4; ++i) {		  // LOOP(4) x[i]=W(v[2],i);
	 //	 x[i] = ((uint64_t*)v[2])[i];
	 x[i] = key[i];
  }
  // add tweak
  for(i = 0; i < 2; ++i) {		  // LOOP(2) x[i+4]=W(v[3],i);
	 //	 x[i+4] = ((uint64_t*)v[3])[i];
	 x[i+4] = iv[i];
  }
#if 0									  // DEBUG
  printf("\n");
  for(i = 0; i < 16; ++i) {
	 printf("x[%lld] %llX\n", i, x[i]);
  }
#endif
  ROUNDS;
  //  while((c=getchar())!=EOF){
#if 0									  // DEBUG
  printf("\n");
  for(i = 0; i < 16; ++i) {
	 printf("x[%lld] %llX\n", i, x[i]);
  }
#endif
  uint8_t pt[PT_LEN] = {0};
  for(uint32_t j = 0; j < PT_LEN; j++) {
	 c = ct[j];//0x61;//getchar();
	 //	 printf("\n%llX\n", c);

	 //    if( !f && 10 == (x[0]^c) % 256 ) { // decrypt
    if( j == (PT_LEN - 1) ) { // decrypt
		//		printf("END\n");
		//		return 0;
		break;
	 }
	 //    putchar(x[0]^c);
	 //	 printf("%X\n", (uint8_t)(0xFF & (x[0]^c)));
	 pt[j] = (uint8_t)(0xFF & (x[0]^c));
	 //	 printf("pt[%d] = %2X = %2X xor %2X\n", j, pt[j], (uint8_t)x[0], (uint8_t)c);
	 if(f == 1) {					  // encrypt
		x[0] = c ^ x[0];
	 } else {						  // decrypt
		x[0] = c ^ (x[0] & (~255ULL)); // ~255ULL = FFFFFFFFFFFFFF00
	 }
    ROUNDS;
  }
  x[0]^=1;
  ROUNDS;
#if 0									  // DEBUG
  printf("\n");
  for(i = 0; i < 16; ++i) {
	 printf("x[%lld] %llX\n", i, x[i]);
  }
#endif
  printf("\n");
  //  LOOP(8) putchar(255&((x[4]^x[5])>>8*i));
  //  LOOP(8) putchar(255&((x[6]^x[7])>>8*i));
  //  LOOP(8) printf("%2llX ", 255 & ((x[4]^x[5])>>8*i));
  //  printf("\n");
  //  LOOP(8) printf("%2llX ", 255 & ((x[6]^x[7])>>8*i));
  //  printf("\n");
  printf(" decrypted: ");
  for(uint32_t i = 0; i < PT_LEN; i++) {
	 printf("%2X ", pt[i]);
  }
  printf("\n");

  return 0;
}

/* --- */

  uint64_t key[4] = {0};//{0x6B6B6B6B6B6B6B6B, 0x6B6B6B6B6B6B6B6B, 0x6B6B6B6B6B6B6B6B, 0x6B6B6B6B6B6B6B6B};
  uint64_t iv[2]  = {0};//{0x7676767676767676, 0x7676767676767676};

/* --- */

uint64_t random64()
{
  //  uint64_t r_hi = ((uint64_t)random32() << 32);
  //  uint64_t r_lo = (uint64_t)random32();
  //  uint64_t r = r_hi | r_lo;
  //  return r;
  return(((uint64_t)random32() << 32) | (random32()));
}


/* --- */

int main(int _,char**v)
{
  uint64_t x[16];					  // state
  uint64_t i;
  uint64_t c;
  uint64_t r;
  uint64_t f='e'==*v[1];		  // encrypt or decrypt

  uint64_t key[4] = {0};
  uint64_t iv[2] = {0};

  // initialize input state
  for(i = 0; i < 16; ++i) {
	 //    x[i] = (i * 0x7477697468617369ULL);
	 x[i] = key[i];
  }
  // add key
  for(i = 0; i < 4; ++i) {		  // LOOP(4) x[i]=W(v[2],i);
	 //	 x[i] = ((uint64_t*)v[2])[i];
	 x[i+4] = iv[i];
  }
  // add tweak
  for(i = 0; i < 2; ++i) {		  // LOOP(2) x[i+4]=W(v[3],i);
	 x[i+4] = ((uint64_t*)v[3])[i];
  }
  ROUNDS;
  //  while((c=getchar())!=EOF){
  for(uint32_t j = 0; j < 6; j++) {
	 c = getchar();
    if( !f && 10 == (x[0]^c) % 256 ) { // decrypt
		printf("\n");
		return 0;
	 }
    putchar(x[0]^c);
	 if(f == 1) {					  // encrypt
		x[0] = c ^ x[0];
	 } else {						  // decrypt
		x[0] = c ^ (x[0] & (~255ULL)); // ~255ULL = FFFFFFFFFFFFFF00
	 }
    ROUNDS;
  }
  x[0]^=1;
  ROUNDS;
  printf("\n");
  LOOP(8) putchar(255&((x[4]^x[5])>>8*i));
  LOOP(8) putchar(255&((x[6]^x[7])>>8*i));
  return 0;
}


/* --- */
int main()
{

  uint64_t key[4] = {0};
  uint64_t v[2] = {0};

  const char* pt = "smashup";
  uint32_t pt_len = 7;

  //  printf("\n");
  //  LOOP(8) putchar(255&((x[4]^x[5])>>8*i));
  //  LOOP(8) putchar(255&((x[6]^x[7])>>8*i));
  //  LOOP(8) printf("%llX ", 255 & (*(pt + (8*i)));
  //  LOOP(8) printf("%llX ", 255 & (*(pt + (8*i)));
  //  printf("\n");

  uint64_t x[16];					  // state
  uint64_t i;
  uint64_t c;
  uint64_t r;
  uint64_t f=1;//'e'==*v[1];		  // encrypt
  // initialize input state
  for(i = 0; i < 16; ++i) {
    x[i] = (i * 0x7477697468617369ULL);
  }
  // add key
  for(i = 0; i < 4; ++i) {		  // LOOP(4) x[i]=W(v[2],i);
	 //	 x[i] = ((uint64_t*)v[2])[i];
	 x[i] = key[i];
  }
  // add tweak
  for(i = 0; i < 2; ++i) {		  // LOOP(2) x[i+4]=W(v[3],i);
	 //	 x[i+4] = ((uint64_t*)v[3])[i];
	 x[i+4] = v[i];
  }
  ROUNDS;
  uint32_t cnt = 0;
  //  while((c=getchar())!=EOF){
  while(cnt++ != pt_len) {
	 c = (*(pt + (cnt*8))) & 0xF;
    if( !f && 10 == (x[0]^c) % 256 ) { // decrypt
		printf("\n");
		return 0;
	 }
	 //    putchar(x[0]^c);
	 if(f == 1) {					  // encrypt
		x[0] = c ^ x[0];
	 } else {						  // decrypt
		x[0] = c ^ (x[0] & (~255ULL)); // ~255ULL = FFFFFFFFFFFFFF00
	 }
    ROUNDS;
  }
  x[0]^=1;
  ROUNDS;
  printf("i = %d\n", i);
  //  LOOP(8) putchar(255&((x[4]^x[5])>>8*i));
  //  LOOP(8) putchar(255&((x[6]^x[7])>>8*i));
  LOOP(8) printf("%llX ", 255 & ((x[4]^x[5])>>8*i));
  LOOP(8) printf("%llX ", 255 & ((x[6]^x[7])>>8*i));
  printf("\n");
  return 0;
}


/* --- */

int main(int _,char**v)
{
  uint64_t x[16];					  // state
  uint64_t i;
  uint64_t c;
  uint64_t r;
  uint64_t f='e'==*v[1];		  // encrypt or decrypt
  //  LOOP(16)
  //    x[i]=i*0x7477697468617369ULL;
  // initialize input state
  for(i = 0; i < 16; ++i) {
    x[i] = (i * 0x7477697468617369ULL);
  }
  //  LOOP(4) x[i]=W(v[2],i);
  // add key
  for(i = 0; i < 4; ++i) {		  // LOOP(4) x[i]=W(v[2],i);
	 x[i] = ((uint64_t*)v[2])[i];
  }
  //  LOOP(2) x[i+4]=W(v[3],i);
  // add tweak
  for(i = 0; i < 2; ++i) {		  // LOOP(2) x[i+4]=W(v[3],i);
	 x[i+4] = ((uint64_t*)v[3])[i];
  }
  ROUNDS;
  while((c=getchar())!=EOF){
    if( !f && 10 == (x[0]^c) % 256 ) { // decrypt
		printf("\n");
		return 0;
	 }
    putchar(x[0]^c);
	 //    x[0]=c^(f?x[0]:x[0]&~255ULL);
	 if(f == 1) {					  // encrypt
		x[0] = c ^ x[0];
	 } else {						  // decrypt
		x[0] = c ^ (x[0] & (~255ULL)); // ~255ULL = FFFFFFFFFFFFFF00
	 }
    ROUNDS;
  }
  x[0]^=1;
  ROUNDS;
  printf("\n");
  LOOP(8) putchar(255&((x[4]^x[5])>>8*i));
  LOOP(8) putchar(255&((x[6]^x[7])>>8*i));
  return 0;
}

/* --- */

#if 0 
int main(int _,char**v){
  uint64_t x[16];					  // state
  uint64_t i;
  uint64_t c;
  uint64_t r;
  uint64_t f='e'==*v[1];		  // encrypt or decrypt

  // 'd' = 0x64
  // 'e' = 0x65

#if 1									  // DEBUG
  printf("\nv[1] = %X\n", *v[1]);
  printf("f = %llX\n", f);
#endif

  // initialize input state
  for(i = 0; i < 16; ++i) {
    x[i] = (i * 0x7477697468617369ULL);
  }

  // add key
  for(i = 0; i < 4; ++i) {		  // LOOP(4) x[i]=W(v[2],i);
	 x[i] = ((uint64_t*)v[2])[i];
  }

  // add tweak
  for(i = 0; i < 2; ++i) {		  // LOOP(2) x[i+4]=W(v[3],i);
	 x[i+4] = ((uint64_t*)v[3])[i];
  }

#if 0									  // DEBUG
  for(i = 0; i < 16; ++i) {
	 printf("x[%lld] %llX\n", i, x[i]);
  }
#endif

  ROUNDS;

#if 0									  // DEBUG
  for(i = 0; i < 16; ++i) {
	 printf("x[%lld] %llX\n", i, x[i]);
  }
#endif

  while((c=getchar()) != EOF) {

    if( !f && 10 == (x[0]^c) % 256 ) { // decrypt
		return 0;
	 }

	 //	 printf("c = %llX\n", c);

    putchar(x[0]^c);

	 //	 printf("x[0] = %llX\n", x[0]);

	 //	 uint64_t x_temp = 0;
    x[0] = c ^ (f?x[0]:x[0]&~255ULL);
#if 0
	 if(f == 1) {					  // encrypt
		x[0] = c ^ x[0];
	 } else {						  // decrypt
		x[0] = c ^ (x[0] & (~255ULL)); // ~255ULL = FFFFFFFFFFFFFF00
	 }
#endif
	 //	 printf("%llX %llX\n", x_temp, x[0]);
	 //	 assert(x_temp == x[0]);

	 //	 printf("x[0] = %llX\n", x[0]);

    ROUNDS;
  }

  x[0]^=1;

  ROUNDS;

  //  printf("\n");
  LOOP(8) putchar(255&((x[4]^x[5])>>8*i));
  LOOP(8) putchar(255&((x[6]^x[7])>>8*i));
  //  LOOP(8) printf("%llX ", 255 & ((x[4]^x[5])>>8*i));
  //  LOOP(8) printf("%llX ", 255 & ((x[6]^x[7])>>8*i));
  //  printf("\n");
  //  printf("c = %llX\n", c);

  return 0;
}
#endif

/* --- */

/* 
--- [./tests/simon-xor-threshold-search-tests.cc:1306] Round [8 / 14] (   1    0) : T size 65536, H size 12061193 2^23.52 ---
[./tests/simon-xor-threshold-search-tests.cc:1330] 11.395259 min 683.715562 s 683715.562000 ms 683715562.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1331] cnt_iter 2107365984 2^30.97 C 0.000000 2^-21.555541
[./src/simon-xor-threshold-search.cc:424] p = 2^-22.192645
[./tests/simon-xor-threshold-search-tests.cc:1345] MAX:     1100     4000 2^-23.402965 2^-22.192645 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1306] Round [9 / 14] (   1    0) : T size 65536, H size 30636606 2^24.87 ---
[./tests/simon-xor-threshold-search-tests.cc:1330] 31.464828 min 1887.889692 s 1887889.692000 ms 1887889692.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1331] cnt_iter 5331882136 2^32.31 C 0.000000 2^-21.429439
[./src/simon-xor-threshold-search.cc:452] Round keys from key schedule, 9 R:
[ 0]    AB30 [ 1]    BD6D [ 2]    AE08 [ 3]    B3F8 [ 4]    AA56 [ 5]     473 [ 6]    EB38 [ 7]    20AC [ 8]    95FE


--- [./tests/simon-xor-threshold-search-tests.cc:1306] Round [10 / 14] (   1    0) : T size 65536, H size 38529685 2^25.20 ---
[./tests/simon-xor-threshold-search-tests.cc:1329] 2384.106686 s 2384106.686000 ms 2384106686.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1330] cnt_iter 6696241605 232.64 C 0.000000 2^-21.421471
[./src/simon-xor-threshold-search.cc:450] Round keys from key schedule, 10 R:
[ 0]    F586 [ 1]    170B [ 2]    DEDC [ 3]     C18 [ 4]    17B7 [ 5]    CAC9 [ 6]    8EC0 [ 7]    76BD [ 8]    BE5B [ 9]    30BB

 */

/* 

8-th round:

H size = 2^23.52
cnt_iter = 2107365984 = 2^30.97
Time = 683.715562 sec = 11.40 min

9-th round:

H size = 2^24.87
cnt_iter = 5331882136 = 2^32.31
Time = 1887.889692 sec = 31.46 min 

10-th round:

H size = 2^25.20
cnt_iter = 6696241605 = 2^32.64
Time = 2384.106686 sec = 39.73 min


--- [./tests/simon-xor-threshold-search-tests.cc:1306] Round [10 / 14] (   1    0) : T size 65536, H size 38529685 2^25.20 ---
[./tests/simon-xor-threshold-search-tests.cc:1329] 2384.106686 s 2384106.686000 ms 2384106686.000000 mu
[./tests/simon-xor-threshold-search-tests.cc:1330] cnt_iter 6696241605 232.64 C 0.000000 2^-21.421471
[./src/simon-xor-threshold-search.cc:450] Round keys from key schedule, 10 R:
[ 0]    F586 [ 1]    170B [ 2]    DEDC [ 3]     C18 [ 4]    17B7 [ 5]    CAC9 [ 6]    8EC0 [ 7]    76BD [ 8]    BE5B [ 9]    30BB

 */

/* --- */

uint64_t get_time_ms64()
{
  struct timeval tv;

  gettimeofday(&tv, NULL);

  uint64_t ret = tv.tv_usec;

  printf("[%s:%d] %lld \n", __FILE__, __LINE__, ret);

  /* Convert from micro seconds (10^-6) to milliseconds (10^-3) */
  ret /= 1000;

  printf("[%s:%d] %lld \n", __FILE__, __LINE__, ret);

  /* Adds the seconds (10^0) after converting them to milliseconds (10^-3) */
  ret += (tv.tv_sec * 1000);

  printf("[%s:%d] %lld \n", __FILE__, __LINE__, tv.tv_sec);
  printf("[%s:%d] %lld \n\n", __FILE__, __LINE__, ret);

  return ret;
}


/* --- */

		  //		  differential_t* diff_temp = (*G)[s_diff_out];


/* --- */
/*

Mazirat: By reading the full table from file: first 7 rounds for 13 minutes

real    13m3.069s
user    9m1.374s
sys     1m27.621s

Chaos: Computes the full table for 12 minutes; completes first 7 rounds for 13 minutes.
From round 8 onwards the search becomes perceptibly slow. 

Looking at the statistics, my explanation is that after 8 rounds the input hash table becomes of size approx. 2^25 elements. One row of the DDT contains on average 2^9 elements (sometimes 2^8 or 2^10) and therefore one round loops over 2^25 * 2^9 = 2^34 elements on average.


*/

/* --- */

/*
real    13m18.110s
user    13m15.934s
sys     0m0.040s
*/

/* --- */

	 //	 printf("dx = 0x%X: ", dx);
		//		printf("0x%X|%d ", DY[i].dy, DY[i].npairs);
	 //	 printf("\n");


/* --- */

void simon_compute_full_ddt(std::unordered_map<uint32_t, std::vector<differential_t>*>* T)
{
  assert(WORD_SIZE <= 16);
  uint32_t r1 = SIMON_LROT_CONST_S; 
  uint32_t r2 = SIMON_LROT_CONST_T;
  uint32_t r3 = SIMON_LROT_CONST_U;


  for(uint32_t dx = 0; dx < ALL_WORDS; dx++) {
	 differential_t DY[ALL_WORDS] = {{0, 0, 0, 0.0}};
	 for(uint32_t x = 0; x < ALL_WORDS; x++) {
		uint32_t xx = x ^ dx;

		uint32_t y  = ( LROT(x, r1) &  LROT(x, r2)) ^  LROT(x, r3); 
		uint32_t yy = (LROT(xx, r1) & LROT(xx, r2)) ^ LROT(xx, r3); 
		uint32_t dy = y ^ yy;
		//		DY[dy]++;
		DY[dy].dx = dx;
		DY[dy].dy = dy;
		DY[dy].p = 0.0;
		DY[dy].npairs++;
	 }

	 std::sort(DY, DY + ALL_WORDS, simon_comp_differentials_npairs);
#if 1									  // DEBUG
	 printf("dx = 0x%X: ", dx);
	 uint32_t i = 0;
	 while(DY[i].npairs != 0) {
		printf("0x%X|%d ", DY[i].dy, DY[i].npairs);
		i++;
	 }
	 printf("\n");
#endif
  }

#if 0
  // ---
  std::vector<uint32_t> DX;

  std::vector<uint32_t>::iterator vec_iter;
  for(vec_iter = DX.begin(); vec_iter != DX.end(); vec_iter++) {
	 uint32_t dx = *vec_iter;
	 simon_ddt_add_row(T, dx, hw_max);
#if 1									  // DEBUG
	 std::unordered_map<uint32_t, std::vector<differential_t>*>::const_iterator T_iter = T->find(dx);
	 printf("%4X: %d\n", dx, (uint32_t)T_iter->second->size());
	 assert(T_iter != T->end());
	 //	 printf("%4X: %d\n", dx, T[dx].size());
#endif
  }
#endif
}

/* ---- */

//	 std::sort(DY, DY + ALL_WORDS, std::greater<int>());
//	 uint32_t i = 0;
//	 while(DY[i] != 0) {
//		printf("%X ", DY[i]);
//		i++;
//	 }
	 for(uint32_t i = 0; i < ALL_WORDS; i++) {
		if(DY[i] != 0) {
		  printf("%d ", DY[i]);
		} else {
		  printf(".");
		}
	 }

/* ---- */

void simon_diff_search(const uint32_t nrounds, const uint32_t hw_max, std::unordered_map<std::string, differential_t *> D[SIMON_NROUNDS])
{
  assert(nrounds <= SIMON_NROUNDS);
#if 1									  // for experimental verification
  uint32_t key[SIMON_MAX_NROUNDS] = {0};
  key[0] = random32() & MASK;
  key[1] = random32() & MASK;
  key[2] = random32() & MASK;
  key[3] = random32() & MASK;
  uint64_t npairs = (1ULL << 25);
  double p_exp = 0.0;
#endif
#if 1									  // DEBUG
  printf("\n[%s:%s()%d] hw_max %d\n", __FILE__, __FUNCTION__, __LINE__, hw_max);
#endif
  std::unordered_map<uint32_t, std::vector<differential_t>*> T;
  std::unordered_map<std::string, differential_t *> H;
  std::unordered_map<std::string, differential_t *> G;
  //  std::unordered_map<std::string, differential_t *> D[SIMON_NROUNDS]; // all rounds

  //  differential_t input_diff = {0x0001, 0x0000, 0, 1.0}; // DTU
  //  differential_t input_diff  = {0x280, 0xA80, 0, 1.0}; // mydiff-1
  //  differential_t input_diff  = {0x400, 0x1900, 0, 1.0}; // mydiff-2
  differential_t input_diff  = {0x8000, 0x2202, 0, 1.0}; // mydiff-3
  std::string s_diff = differential_to_string(input_diff);
  std::pair<std::string, differential_t *> new_pair (s_diff, &input_diff);
  H.insert(new_pair);

  D[0] = H;

#if 1									  // DEBUG
  printf("\n [%s:%d] INPUT DIFF %8X %8X \n", __FILE__, __LINE__, input_diff.dx, input_diff.dy);
#endif
  for(uint32_t i = 0; i < nrounds; i++) {

#if 1									  // DEBUG
	 printf("\n--- [%s:%d] Round [%d / %d] (%4X %4X) : T size %d, H size %d ---\n", __FILE__, __LINE__, i+1, nrounds, input_diff.dx, input_diff.dy, (uint32_t)T.size(), (uint32_t)H.size());
#endif
#if 1									  // Hash table statistics
	 std::cout << "entries_count = " << H.size() << " = 2^" << log2(H.size()) << std::endl;
	 std::cout << "bucket_count = " << H.bucket_count() << std::endl;
	 std::cout << "max_bucket_count = " << H.max_bucket_count() << " = 2^" << log2(H.max_bucket_count()) << std::endl;
	 std::cout << "load_factor = entries / buckets = " << H.load_factor() << " ( " << floor(H.load_factor() * 100) << "\% )" << std::endl;
	 std::cout << "max_load_factor = " << H.max_load_factor() << std::endl;
	 assert(H.load_factor() <= 1);
#endif
	 differential_t max_diff = {0, 0, 0, 0.0};
	 simon_diff_search_oneround(i+1, &T, &H, &G, input_diff, &max_diff, hw_max);

#if 1									  // DEBUG
	 if((i+1) >= 9) {			  // !!!
		npairs = (1ULL << 32);
		p_exp = simon_verify_differential(key, input_diff, max_diff, i+1, npairs); // full search over all 2^32 inputs
	 } else {						  // i <= 10
		p_exp = simon_verify_differential_approx(key, input_diff, max_diff, i+1, npairs); // random inputs
	 }
#endif

	 printf("[%s:%d] MAX: %8X %8X 2^%f 2^%f (2^%2.0f CP)\n", __FILE__, __LINE__, max_diff.dx, max_diff.dy, log2(max_diff.p), log2(p_exp), log2(npairs));
	 H.clear();
	 H = G;
	 G.clear();

#if 0
	 D[i+1] = H;
#endif
  }

#if 1									  // DEBUG
  printf("\n--- [%s:%d] Round [%d / %d] (%4X %4X) : T size %d, H size %d ---\n", __FILE__, __LINE__, nrounds, nrounds, input_diff.dx, input_diff.dy, (uint32_t)T.size(), (uint32_t)H.size());
#endif
#if 0									  // DEBUG
  differential_t max_diff = {0, 0, 0, 0.0};
  simon_diff_get_max(H, &max_diff);
  p_exp = simon_verify_differential_approx(key, input_diff, max_diff, nrounds, npairs);
  printf("[%s:%d] MAX: %8X %8X 2^%f 2^%f (2^%2.0f CP)\n", __FILE__, __LINE__, max_diff.dx, max_diff.dy, log2(max_diff.p), log2(p_exp), log2(npairs));
#endif

  // free memory
  std::unordered_map<std::string, differential_t *>::const_iterator H_iter = H.begin();
  while(H_iter != H.end()) {
	 free(H_iter->second);
	 H_iter++;
  }
  std::unordered_map<std::string, differential_t *>::const_iterator G_iter = G.begin();
  while(G_iter != G.end()) {
	 free(G_iter->second);
	 G_iter++;
  }
  std::unordered_map<uint32_t, std::vector<differential_t>*>::const_iterator T_iter;
  for(T_iter = T.begin(); T_iter != T.end(); T_iter++) {
	 free(T_iter->second);
  }
#if 1									  // DEBUG
  printf("\n[%s:%s()%d] hw_max %d\n", __FILE__, __FUNCTION__, __LINE__, hw_max);
#endif
}

void test_simon_diff_search()
{
  uint32_t nrounds = 5;//14;
  uint32_t hw_max = 3;//WORD_SIZE;
  std::unordered_map<std::string, differential_t *> D[SIMON_NROUNDS];
  simon_diff_search(nrounds, hw_max, D);
#if 0									  // DEBUG
  for(uint32_t i = 0; i <= nrounds; i++) {
	 printf("\n --- [%s:%d] D[%d] --- \n", __FILE__, __LINE__, i);
	 std::unordered_map<std::string, differential_t *>::const_iterator D_iter = D[i].begin();
	 while(D_iter != D[i].end()) {
		const uint32_t dx = (D_iter->second)->dx;
		const uint32_t dy = (D_iter->second)->dy;
		const double p = (D_iter->second)->p;
		printf("[%s:%d] D[%d]: %8X %8X 2^%f\n", __FILE__, __LINE__, i, dx, dy, log2(p));
		D_iter++;
	 }
  }
#endif
}

/* --- */

#if 0
	 differential_t** new_trail;
	 new_trail = (differential_t** )calloc(1, sizeof(differential_t*));
	 *new_trail = (differential_t*)calloc(trail_len, sizeof(differential_t));
	 for(uint32_t i = 0; i < trail_len; i++) {
		(*new_trail)[i].dx = diff[i].dx;
		(*new_trail)[i].dy = diff[i].dy;
		(*new_trail)[i].p = diff[i].p;
	 }
	 std::pair<std::string, differential_t**> new_pair (s_trail,new_trail);
	 trails_hash_map->insert(new_pair);
#endif

#if 0
		differential_t** new_diff;
		new_diff = (differential_t** )calloc(1, sizeof(differential_t*));
		*new_diff = (differential_t*)calloc(2, sizeof(differential_t));
		(*new_diff)[0].dx = diff[0].dx;
		(*new_diff)[0].dy = diff[0].dy ^ diff[1].dx; // !!
		(*new_diff)[0].p = 1.0;
		(*new_diff)[1].dx = diff[trail_len - 1].dx;
		(*new_diff)[1].dy = diff[trail_len - 1].dy;
		(*new_diff)[1].p = p;

		std::pair<std::string, differential_t**> new_pair (s_diff,new_diff);
		diffs_hash_map->insert(new_pair);
#endif


/* --- */

/**
 * Update the maximum probability differential
 */
double simon_diffs_update_max(differential_t diff[NROUNDS], 
										const uint32_t trail_len,
										differential_t** diff_max)
{
  const double p_max = (*diff_max)[1].p;

  double p = 1.0;
  for(uint32_t i = 0; i < trail_len; i++) {
	 p *= diff[i].p;
  }

  if(p > p_max) {
	 (*diff_max)[0].p = 1.0;
	 (*diff_max)[0].dx = diff[0].dx;
	 (*diff_max)[0].dy = diff[0].dy ^ diff[1].dx; // !!
	 (*diff_max)[1].p = p;
	 (*diff_max)[1].dx = diff[trail_len - 1].dx;
	 (*diff_max)[1].dy = diff[trail_len - 1].dy;
#if 0									  // DEBUG
	 printf("\n[%s:%d] Update max for %d R: (%4X %4X) -> (%4X %4X) 2^%f\n", __FILE__, __LINE__,  trail_len, (*diff_max)[0].dx, (*diff_max)[0].dy, (*diff_max)[1].dx, (*diff_max)[1].dy, log2((*diff_max)[1].p));
#endif
  }
  return p;
}



/* --- */

			 if(((G_iter->second)->dx != dx_out) || ((G_iter->second)->dy != dy_out)) {
				printf("[%s:%d] ERROR! ", __FILE__, __LINE__);
				printf("dx %4X != %4X, ", (G_iter->second)->dx, dx_out);
				printf("dy %4X != %4X, ", (G_iter->second)->dy, dy_out);
				printf(" %s %s", G_iter->first, s_diff_out);
				printf("\n");
				assert(s_diff_out.compare(G_iter->first) == 0);
				assert((G_iter->second)->dx == dx_out);
				assert((G_iter->second)->dy == dy_out);
			 }

/* --- */

/* 
> I thought that HW 3 would be not enough to find correct prob. of differential, does it still find 2^-29.5 probability?
 
Yes, it actually does. It finds the same diff. by DTU with the same probability. All considered differences have max. HW 3.

> Maybe you could save to disk the internal state of the differential after 8-10 rounds (it would take 200-300 Mbytes?) and then start search from 8-10 rounds on directly?
 
Yes, this is a good idea! I'll keep this in mind. In the meantime I was thinking could the following be the problem:

Suppose that we have found a differential for 11 rounds with prob. 2^-28 . We expect that on average 2^4 right pairs will follow this differential. We extend this differential to 12 rounds by appending the best differential that we find for the 12-th round. Suppose the latter happens to have probability P = 2^-4, which implies that 2^28 pairs satisfy this differential and 2^4 does not. What if the 2^4 pairs that do not satisfy the diff. for the 12-th round are precisely the 2^4 pairs that satisfy the diff. for 11 rounds? Then the actual prob. of the full 12 round diff. will be 0 while the algorithm will compute 2^-28 * 2^-4 = 2^-32. Do you think such cases are likely to occur? This may be one explanation why for 11 rounds and more I am unable to verify the probabilities.

> Yes, that's what I didn't like about it, you don't have full control over the search space. 

Yes, I agree.

> Do you keep statistics of how many such differentials  are kept in the hash table at each round?

Yes, here are the number of elements in the hash table at the beginning of every round (max. HW 5):

 1: entries_count = 1 = 2^0
 2: entries_count = 4 = 2^2
 3: entries_count = 66 = 2^6.04439
 4: entries_count = 1203 = 2^10.2324
 5: entries_count = 11180 = 2^13.4486
 6: entries_count = 157124 = 2^17.2615
 7: entries_count = 1866958 = 2^20.8323
 8: entries_count = 11949043 = 2^23.5104
 9: entries_count = 29908191 = 2^24.834
10: entries_count = 37337656 = 2^25.1541
11: entries_count = 38720315 = 2^25.2066
12: entries_count = 38993913 = 2^25.2167
13: entries_count = 39062878 = 2^25.2193


>>    Yes, 2^-30.790547 is definitely hard to exploit, but if we want to improve the DTU result in terms of number of rounds, it is not very likely that there exists a differential with much higher probability on 13 rounds. If there is one, then there must be also something significantly better on 12 rounds.

> I think it is definitely worth mentioning in the paper.

Another intersting thing related to the above. I ran the clustering algorithm for 13 rounds with the same input difference as the one by DTU and it finds the following differential: 13R: (1, 0) -> (400, 100) with probability 2^-29.415037. The pro. was experimentally computed over 2^32 random keys. This is better than the differential that I mentioned in my previous mail (see above).

 */


/* 
13 round trail found with threshold search

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
B[12] = 2^-37.000000
 0:     8000 ->        2 0.250000 (2^-2.000000)
 1:     2200 ->      800 0.062500 (2^-4.000000)
 2:      800 ->      200 0.250000 (2^-2.000000)
 3:      200 ->        0 0.250000 (2^-2.000000)
 4:        0 ->      200 1.000000 (2^0.000000)
 5:      200 ->      800 0.250000 (2^-2.000000)
 6:      800 ->     2200 0.250000 (2^-2.000000)
 7:     2200 ->     8400 0.062500 (2^-4.000000)
 8:     8400 ->     3A02 0.062500 (2^-4.000000)
 9:     3A02 ->      A00 0.003906 (2^-8.000000)
10:      A00 ->      200 0.125000 (2^-3.000000)
11:      200 ->      200 0.250000 (2^-2.000000)
12:      200 ->      A00 0.250000 (2^-2.000000)
p_tot = 0.000000000007276 = 2^-37.000000, Bn = 0.000000 = 2^-37.000000
[./src/simon-xor-threshold-search.cc:1248] nrounds = 13
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():128] dy_init        0
[./src/simon-xor-threshold-search.cc:148] Verify P for one round (2^20.000000 CPs)...
THE  0: 0.250000 (2^-2.000000)     8000 ->        2
EXP  0: 0.249860 (2^-2.000809)     8000 ->        2

THE  1: 0.062500 (2^-4.000000)     2200 ->      800
EXP  1: 0.062596 (2^-3.997778)     2200 ->      800

THE  2: 0.250000 (2^-2.000000)      800 ->      200
EXP  2: 0.250127 (2^-1.999268)      800 ->      200

THE  3: 0.250000 (2^-2.000000)      200 ->        0
EXP  3: 0.250175 (2^-1.998993)      200 ->        0

THE  4: 1.000000 (2^0.000000)        0 ->      200
EXP  4: 1.000000 (2^0.000000)        0 ->      200

THE  5: 0.250000 (2^-2.000000)      200 ->      800
EXP  5: 0.250496 (2^-1.997141)      200 ->      800

THE  6: 0.250000 (2^-2.000000)      800 ->     2200
EXP  6: 0.249667 (2^-2.001922)      800 ->     2200

THE  7: 0.062500 (2^-4.000000)     2200 ->     8400
EXP  7: 0.062938 (2^-3.989931)     2200 ->     8400

THE  8: 0.062500 (2^-4.000000)     8400 ->     3A02
EXP  8: 0.062294 (2^-4.004763)     8400 ->     3A02

THE  9: 0.003906 (2^-8.000000)     3A02 ->      A00
EXP  9: 0.003841 (2^-8.024152)     3A02 ->      A00

THE 10: 0.125000 (2^-3.000000)      A00 ->      200
EXP 10: 0.124774 (2^-3.002611)      A00 ->      200

THE 11: 0.250000 (2^-2.000000)      200 ->      200
EXP 11: 0.250274 (2^-1.998421)      200 ->      200

THE 12: 0.250000 (2^-2.000000)      200 ->      A00
EXP 12: 0.249947 (2^-2.000308)      200 ->      A00

OK

[./src/simon-xor-threshold-search.cc:277] Verify P of differentials (2^20.000000 CPs)...
Input differences:     8000     2202

R# 0 Output differences:     2200     8000
THE  1: 0.250000 (2^-2.000000)     8000 ->     2200
EXP  1: 0.249583 (2^-2.002407)     8000 ->     2200

R# 1 Output differences:      800     2200
THE  2: 0.015625 (2^-6.000000)     2200 ->      800
EXP  2: 0.015491 (2^-6.012381)     2200 ->      800

R# 2 Output differences:      200      800
THE  3: 0.003906 (2^-8.000000)      800 ->      200
EXP  3: 0.003940 (2^-7.987725)      800 ->      200

R# 3 Output differences:        0      200
THE  4: 0.000977 (2^-10.000000)      200 ->        0
EXP  4: 0.001575 (2^-9.310002)      200 ->        0

R# 4 Output differences:      200        0
THE  5: 0.000977 (2^-10.000000)        0 ->      200
EXP  5: 0.001612 (2^-9.277192)        0 ->      200

R# 5 Output differences:      800      200
THE  6: 0.000244 (2^-12.000000)      200 ->      800
EXP  6: 0.000417 (2^-11.228511)      200 ->      800

R# 6 Output differences:     2200      800
THE  7: 0.000061 (2^-14.000000)      800 ->     2200
EXP  7: 0.000058 (2^-14.069263)      800 ->     2200

R# 7 Output differences:     8400     2200
THE  8: 0.000004 (2^-18.000000)     2200 ->     8400
EXP  8: 0.000005 (2^-17.678072)     2200 ->     8400

R# 8 Output differences:     3A02     8400
THE  9: 0.000000 (2^-22.000000)     8400 ->     3A02
EXP  9: 0.000000 (2^-inf)     8400 ->     3A02

R# 9 Output differences:      A00     3A02
THE 10: 0.000000 (2^-30.000000)     3A02 ->      A00
EXP 10: 0.000000 (2^-inf)     3A02 ->      A00

R#10 Output differences:      200      A00
THE 11: 0.000000 (2^-33.000000)      A00 ->      200
EXP 11: 0.000000 (2^-inf)      A00 ->      200

R#11 Output differences:      200      200
THE 12: 0.000000 (2^-35.000000)      200 ->      200
EXP 12: 0.000000 (2^-inf)      200 ->      200

R#12 Output differences:      A00      200
THE 13: 0.000000 (2^-37.000000)      200 ->      A00
EXP 13: 0.000000 (2^-inf)      200 ->      A00

OK

8000    2 2200  800  800  200  200    0    0  200  200  C00  C00 2200 2200 8400 8400 3A02 3A02  A00  A00  200  200  200  200  A00  | 2^-39.000000
8000    2 2200  C00  C00  200  200    0    0  200  200  C00  C00 2200 2200 8400 8400 3A02 3A02  A00  A00  200  200  200  200  A00  | 2^-41.000000
8000    2 2200  800  800  200  200    0    0  200  200  800  800 2200 2200 8400 8400 3A02 3A02  A00  A00  200  200  200  200  A00  | 2^-37.000000
8000    2 2200  C00  C00  200  200    0    0  200  200  800  800 2200 2200 8400 8400 3A02 3A02  A00  A00  200  200  200  200  A00  | 2^-39.000000
[./src/simon-xor-threshold-search.cc:1012] Sum 2^-36.356144

 */


/* --- */

#if 0									  // DEBUG
	 std::unordered_map<std::string, differential_t *>::const_iterator D_iter = D[i+1].begin();
	 while(D_iter != D[i+1].end()) {
		const uint32_t dx = (D_iter->second)->dx;
		const uint32_t dy = (D_iter->second)->dy;
		double p = (D_iter->second)->p;
		printf("[%s:%d] D[%d]: %8X %8X 2^%f\n", __FILE__, __LINE__, i+1, dx, dy, log2(p));
		D_iter++;
	 }
#endif

/* ---- */


/* 
	[./tests/simon-xor-threshold-search-tests.cc:721] temp_edp 0.000000 (2^-30.955606) nkeys 31
	[./tests/simon-xor-threshold-search-tests.cc:722] OK

 */

/* --- */

/* 

Mydiff-2: 

hw_max = 5, p_eps =  1.0 / (double)(1ULL << 4);

differential_t input_diff  = {0x400, 0x1900, 0, 1.0}; // mydiff-2

--- [./tests/simon-xor-threshold-search-tests.cc:1048] Round [12 / 14] : T size 4277, H size 2354 ---
entries_count = 2354 = 2^11.2009
bucket_count = 7517
max_bucket_count = 357913941 = 2^28.415
load_factor = entries / buckets = 0.313157 ( 31% )
max_load_factor = 1

if((hw32(dx_out & MASK) <= hw_max) && (p_out >= (max_output_diff->p * p_eps))) { // ! ...

[./tests/simon-xor-threshold-search-tests.cc:857] Update max p : (  51   40 2^-30.129635) -> (  11   40 2^-28.303032)
[./src/simon-xor-threshold-search.cc:441] Round keys from key schedule, 11 R:
[ 0]    6C9D [ 1]    3399 [ 2]    26F9 [ 3]    EB6E [ 4]     AEE [ 5]    5812 [ 6]    295E [ 7]    3CB4 [ 8]    4555 [ 9]    66E0 [10]    FEFF
[./src/simon-xor-threshold-search.cc:503] p = 2^-25.870717
[./tests/simon-xor-threshold-search-tests.cc:1083] MAX:       11       40 2^-28.303032 2^-25.870717 (2^32 CP)

DTU [./tests/simon-xor-threshold-search-tests.cc:1083] MAX:      100        0 2^-30.767293 2^-28.000000 (2^32 CP)

Mydiff-2 12R:

[./tests/simon-xor-threshold-search-tests.cc:857] Update max p : (   4   11 2^-31.521357) -> ( 144    0 2^-31.508147)
[./src/simon-xor-threshold-search.cc:441] Round keys from key schedule, 12 R:
[ 0]    6C9D [ 1]    3399 [ 2]    26F9 [ 3]    EB6E [ 4]     AEE [ 5]    5812 [ 6]    295E [ 7]    3CB4 [ 8]    4555 [ 9]    66E0 [10]    FEFF [11]    2487


Mydiff-2: 11R: (400, 1900) -> (11, 40)  2^-28.303032 2^-25.870717 (2^32 CP)
DTU-diff: 11R: (1, 0)      -> (0, 100) 2^-30.767293 2^-28.000000 (2^32 CP)


Mydiff-2: 12R: (400, 1900)  -> (144, 0) 2^-31.508147 2^-31.000000 (2^32 CP)   or  (4, 11) 2^-31.578362 2^-29.000000 (2^32 CP)
DTU-diff: 12R: (1, 0)       -> (100, 0) 2^-30.767293 2^-28.000000 (2^32 CP)

Mydiff-2: 13R: (400, 1900)  ->      (1, 4) 2^-33.420095 2^-29.000000 (2^32 CP)   : key [ 0]    6C9D [ 1]    3399 [ 2]    26F9 [ 3]    EB6E [ 4]     AEE [ 5]    5812 [ 6]    295E [ 7]    3CB4 [ 8]    4555 [ 9]    66E0 [10]    FEFF [11]    2487 [12]    79E1
DTU-diff: 13R: (1, 0)       ->  (400, 100) 2^-32.767293 2^-31.000000 (2^32 CP)

Mydiff-2: 14R: (400, 1900)  ->      (0, 1) 2^-34.742400 2^-31.000000 (2^32 CP)

---

 */



/* --- */


/* 
void simon_diff_get_max(std::unordered_map<std::string, differential_t *> H, differential_t* max_diff)
{
  max_diff->dx = 0;
  max_diff->dy = 0;
  max_diff->npairs = 0;
  max_diff->p = 0.0;
  std::unordered_map<std::string, differential_t *>::const_iterator H_iter = H.begin();
  while(H_iter != H.end()) {
	 const uint32_t dx = (H_iter->second)->dx;
	 const uint32_t dy = (H_iter->second)->dy;
	 double p = (H_iter->second)->p;
	 const diferental_t diff = {dx, dy, 0, p};
	 simon_diff_update_max(diff, max_diff);
	 if(p > max_diff->p) {
		max_diff->p = p;
		max_diff->dx = dx;
		max_diff->dy = dy;
	 } else {
		uint32_t hw_sum = hw32(dx & MASK) + hw32(dy & MASK);
		uint32_t hw_sum_max = hw32(max_diff->dx & MASK) + hw32(max_diff->dy & MASK);
		if((p == max_diff->p) && (hw_sum < hw_sum_max)) { // if current has same prob. but smaller Hamming weight
		  max_diff->p = p;
		  max_diff->dx = dx;
		  max_diff->dy = dy;
		}
	 }
	 H_iter++;
  }
}

 */


/* --- */

#if 0									  // DEBUG
  printf("-------- [%s:%d] Print H ---------\n", __FILE__, __LINE__);
  while(H_iter != H->end()) {
	 const uint32_t dx_in = (H_iter->second)->dx;
	 const uint32_t dy_in = (H_iter->second)->dy;
	 double p_in = (H_iter->second)->p;
	 printf("[%s:%d] H: %8X %8X 2^%f\n", __FILE__, __LINE__, dx_in, dy_in, log2(p_in));
	 H_iter++;
  }
  H_iter = H->begin();
#endif



/* --- */

/* 

CHAOS: hw_max 4

vvelichkov@r-cluster1-1:~/skcrypto/trunk/work/src/yaarx$ ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:1121] Tests, WORD_SIZE  = 16, MASK =     FFFF

[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()973] hw_max 4

--- [./tests/simon-xor-threshold-search-tests.cc:989] Round [0 / 12] : H size 1 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 0 R:

[./src/simon-xor-threshold-search.cc:413] p = 2^0.000000
[./tests/simon-xor-threshold-search-tests.cc:1009] MAX:        1        0 2^0.000000 2^0.000000 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:989] Round [1 / 12] : H size 4 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 1 R:
[ 0]    859E
[./src/simon-xor-threshold-search.cc:413] p = 2^-1.999896
[./tests/simon-xor-threshold-search-tests.cc:1009] MAX:        4        1 2^-2.000000 2^-1.999896 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:989] Round [2 / 12] : H size 56 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 2 R:
[ 0]    859E [ 1]    DDB5
[./src/simon-xor-threshold-search.cc:413] p = 2^-3.999489
[./tests/simon-xor-threshold-search-tests.cc:1009] MAX:       11        4 2^-4.000000 2^-3.999489 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:989] Round [3 / 12] : H size 464 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 3 R:
[ 0]    859E [ 1]    DDB5 [ 2]    8415
[./src/simon-xor-threshold-search.cc:413] p = 2^-7.300082
[./tests/simon-xor-threshold-search-tests.cc:1009] MAX:       40       11 2^-7.299560 2^-7.300082 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:989] Round [4 / 12] : H size 1721 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 4 R:
[ 0]    859E [ 1]    DDB5 [ 2]    8415 [ 3]    87C8
[./src/simon-xor-threshold-search.cc:413] p = 2^-9.303847
[./tests/simon-xor-threshold-search-tests.cc:1009] MAX:      111       40 2^-9.299560 2^-9.303847 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:989] Round [5 / 12] : H size 8271 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 5 R:
[ 0]    859E [ 1]    DDB5 [ 2]    8415 [ 3]    87C8 [ 4]    D189
[./src/simon-xor-threshold-search.cc:413] p = 2^-14.061891
[./tests/simon-xor-threshold-search-tests.cc:1009] MAX:      544      100 2^-14.105182 2^-14.061891 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:989] Round [6 / 12] : H size 43496 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 6 R:
[ 0]    859E [ 1]    DDB5 [ 2]    8415 [ 3]    87C8 [ 4]    D189 [ 5]    C37E
[./src/simon-xor-threshold-search.cc:413] p = 2^-16.756826
[./tests/simon-xor-threshold-search-tests.cc:1009] MAX:     1101      404 2^-17.734202 2^-16.756826 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:989] Round [7 / 12] : H size 183404 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 7 R:
[ 0]    859E [ 1]    DDB5 [ 2]    8415 [ 3]    87C8 [ 4]    D189 [ 5]    C37E [ 6]    8B9C
[./src/simon-xor-threshold-search.cc:413] p = 2^-20.912537
[./tests/simon-xor-threshold-search-tests.cc:1009] MAX:     4000     1101 2^-21.738385 2^-20.912537 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:989] Round [8 / 12] : H size 616241 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 8 R:
[ 0]    859E [ 1]    DDB5 [ 2]    8415 [ 3]    87C8 [ 4]    D189 [ 5]    C37E [ 6]    8B9C [ 7]    18B2
[./src/simon-xor-threshold-search.cc:413] p = 2^-21.830075
[./tests/simon-xor-threshold-search-tests.cc:1009] MAX:     1100     4000 2^-23.726549 2^-21.830075 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:989] Round [9 / 12] : H size 1424730 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 9 R:
[ 0]    859E [ 1]    DDB5 [ 2]    8415 [ 3]    87C8 [ 4]    D189 [ 5]    C37E [ 6]    8B9C [ 7]    18B2 [ 8]    EE28
[./src/simon-xor-threshold-search.cc:413] p = 2^-inf
[./tests/simon-xor-threshold-search-tests.cc:1009] MAX:     4401        0 2^-26.471785 2^-inf (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:989] Round [10 / 12] : H size 2337903 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 10 R:
[ 0]    859E [ 1]    DDB5 [ 2]    8415 [ 3]    87C8 [ 4]    D189 [ 5]    C37E [ 6]    8B9C [ 7]    18B2 [ 8]    EE28 [ 9]    61F7
[./src/simon-xor-threshold-search.cc:413] p = 2^-25.000000
[./tests/simon-xor-threshold-search-tests.cc:1009] MAX:        1        0 2^-27.604467 2^-25.000000 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:989] Round [11 / 12] : H size 2991473 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 11 R:
[ 0]    859E [ 1]    DDB5 [ 2]    8415 [ 3]    87C8 [ 4]    D189 [ 5]    C37E [ 6]    8B9C [ 7]    18B2 [ 8]    EE28 [ 9]    61F7 [10]    FAAB
[./src/simon-xor-threshold-search.cc:413] p = 2^-24.000000
[./tests/simon-xor-threshold-search-tests.cc:1009] MAX:      200      100 2^-29.521775 2^-24.000000 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1017] Round [12 / 12] : H size 3318435 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 12 R:
[ 0]    859E [ 1]    DDB5 [ 2]    8415 [ 3]    87C8 [ 4]    D189 [ 5]    C37E [ 6]    8B9C [ 7]    18B2 [ 8]    EE28 [ 9]    61F7 [10]    FAAB [11]    BE8D
[./src/simon-xor-threshold-search.cc:413] p = 2^-inf
[./tests/simon-xor-threshold-search-tests.cc:1024] MAX:        2      230 2^-29.521795 2^-inf (2^25 CP)

[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()1037] hw_max 4


 */



/* ---- */
/* 
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:1115] Tests, WORD_SIZE  = 16, MASK =     FFFF

[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()967] hw_max 4

--- [./tests/simon-xor-threshold-search-tests.cc:983] Round [0 / 12] : H size 1 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 0 R:

[./src/simon-xor-threshold-search.cc:413] p = 2^0.000000
[./tests/simon-xor-threshold-search-tests.cc:1003] MAX:        1        0 2^0.000000 2^0.000000 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:983] Round [1 / 12] : H size 4 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 1 R:
[ 0]    FA59
[./src/simon-xor-threshold-search.cc:413] p = 2^-2.000536
[./tests/simon-xor-threshold-search-tests.cc:1003] MAX:        4        1 2^-2.000000 2^-2.000536 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:983] Round [2 / 12] : H size 56 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 2 R:
[ 0]    FA59 [ 1]    2B5F
[./src/simon-xor-threshold-search.cc:413] p = 2^-3.998117
[./tests/simon-xor-threshold-search-tests.cc:1003] MAX:       11        4 2^-4.000000 2^-3.998117 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:983] Round [3 / 12] : H size 464 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 3 R:
[ 0]    FA59 [ 1]    2B5F [ 2]    6894
[./src/simon-xor-threshold-search.cc:413] p = 2^-7.294287
[./tests/simon-xor-threshold-search-tests.cc:1003] MAX:       40       11 2^-7.299560 2^-7.294287 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:983] Round [4 / 12] : H size 1721 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 4 R:
[ 0]    FA59 [ 1]    2B5F [ 2]    6894 [ 3]    4904
[./src/simon-xor-threshold-search.cc:413] p = 2^-9.284038
[./tests/simon-xor-threshold-search-tests.cc:1003] MAX:     4111       40 2^-9.299560 2^-9.284038 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:983] Round [5 / 12] : H size 8271 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 5 R:
[ 0]    FA59 [ 1]    2B5F [ 2]    6894 [ 3]    4904 [ 4]    76E4
[./src/simon-xor-threshold-search.cc:413] p = 2^-14.252646
[./tests/simon-xor-threshold-search-tests.cc:1003] MAX:      544      100 2^-14.105182 2^-14.252646 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:983] Round [6 / 12] : H size 43496 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 6 R:
[ 0]    FA59 [ 1]    2B5F [ 2]    6894 [ 3]    4904 [ 4]    76E4 [ 5]    41CE
[./src/simon-xor-threshold-search.cc:413] p = 2^-17.225213
[./tests/simon-xor-threshold-search-tests.cc:1003] MAX:     1101      404 2^-17.734202 2^-17.225213 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:983] Round [7 / 12] : H size 183440 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 7 R:
[ 0]    FA59 [ 1]    2B5F [ 2]    6894 [ 3]    4904 [ 4]    76E4 [ 5]    41CE [ 6]    D6CA
[./src/simon-xor-threshold-search.cc:413] p = 2^-23.415037
[./tests/simon-xor-threshold-search-tests.cc:1003] MAX:     4100     1101 2^-21.738385 2^-23.415037 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:983] Round [8 / 12] : H size 616400 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 8 R:
[ 0]    FA59 [ 1]    2B5F [ 2]    6894 [ 3]    4904 [ 4]    76E4 [ 5]    41CE [ 6]    D6CA [ 7]     CDA
[./src/simon-xor-threshold-search.cc:413] p = 2^-24.000000
[./tests/simon-xor-threshold-search-tests.cc:1003] MAX:     1100     4000 2^-23.726549 2^-24.000000 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:983] Round [9 / 12] : H size 1425402 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 9 R:
[ 0]    FA59 [ 1]    2B5F [ 2]    6894 [ 3]    4904 [ 4]    76E4 [ 5]    41CE [ 6]    D6CA [ 7]     CDA [ 8]     966
[./src/simon-xor-threshold-search.cc:413] p = 2^-inf
[./tests/simon-xor-threshold-search-tests.cc:1003] MAX:     4401        0 2^-26.471785 2^-inf (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:983] Round [10 / 12] : H size 2338379 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 10 R:
[ 0]    FA59 [ 1]    2B5F [ 2]    6894 [ 3]    4904 [ 4]    76E4 [ 5]    41CE [ 6]    D6CA [ 7]     CDA [ 8]     966 [ 9]    A227
[./src/simon-xor-threshold-search.cc:413] p = 2^-25.000000
[./tests/simon-xor-threshold-search-tests.cc:1003] MAX:        1        0 2^-27.604467 2^-25.000000 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:983] Round [11 / 12] : H size 2991105 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 11 R:
[ 0]    FA59 [ 1]    2B5F [ 2]    6894 [ 3]    4904 [ 4]    76E4 [ 5]    41CE [ 6]    D6CA [ 7]     CDA [ 8]     966 [ 9]    A227 [10]    ADE6
[./src/simon-xor-threshold-search.cc:413] p = 2^-inf
[./tests/simon-xor-threshold-search-tests.cc:1003] MAX:     1402       38 2^-29.521775 2^-inf (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1011] Round [12 / 12] : H size 3318386 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 12 R:
[ 0]    FA59 [ 1]    2B5F [ 2]    6894 [ 3]    4904 [ 4]    76E4 [ 5]    41CE [ 6]    D6CA [ 7]     CDA [ 8]     966 [ 9]    A227 [10]    ADE6 [11]    4191
[./src/simon-xor-threshold-search.cc:413] p = 2^-inf
[./tests/simon-xor-threshold-search-tests.cc:1018] MAX:       19       3C 2^-29.521795 2^-inf (2^25 CP)

[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()1031] hw_max 4

real    34m0.028s
user    33m51.323s
sys     0m0.324s

*/


/* --- */

void test_simon_verify_differential()
{
  uint64_t npairs = (1ULL << 32);
#if 0									  // DTU, 12R, 2^-36 -> 2^-29
  uint32_t nrounds = 12;
  differential_t input_diff = {0x0001, 0x0000, 0, 0.0};
  differential_t output_diff = {0x0100, 0x0000, 0, 0.0};
#endif
#if 1
  uint32_t nrounds = 6;
  differential_t input_diff = {0x0001, 0x0000, 0, 0.0};
  //  differential_t output_diff = {0x0191, 0x0040, 0, 0.0};
  //  differential_t output_diff = {0x410D, 0x0000, 0, 0.0};
  //  differential_t output_diff = {0x0CC2, 0x0000, 0, 0.0};
  differential_t output_diff = {0x1111, 0x0400, 0, 0.0};
#endif
#if 0									  // LUX, 12R, trail 2^-33 -> ?
  //12 R: ( 280  A80) -> ( 200  880) 2^-33.000000
  //./src/simon-xor-threshold-search.cc:simon_xor_trail_search():1260]:
  // Verified 12 R differential (     280      A80) -> (     880      200) | 2^20.00 CP pairs
  uint32_t nrounds = 12;
  differential_t input_diff  = {0x280, 0xA80, 0, 0.0};
  differential_t output_diff = {0x880, 0x200, 0, 0.0}; // 12
#endif
#if 0									  // LUX, 8R
  uint32_t nrounds = 8;
  differential_t input_diff  = {0x280, 0xA80, 0, 0.0};
  differential_t output_diff = {0x80, 0x200, 0, 0.0}; // 12
#endif
#if 0									  // LUX, 9R, 2^-18.67 -> ?
  // Verified 9 R differential (    2000     8880) -> (    8880     2000) | 2^20.00 CP pairs
  uint32_t nrounds = 9;
  differential_t input_diff  = {0x2000, 0x8880, 0, 0.0};
  differential_t output_diff = {0x8880,  0x2000, 0, 0.0}; // 12
#endif
  // 	 uint32_t dy_in = trail[1].dx ^ trail[0].dy;
#if 0									  // LUX, 12R, 2^-34 -> ?
  uint32_t nrounds = 12;
  //  uint32_t dyy_init = 0x1900;
  differential_t input_diff  = { 0x400, 0x1900, 0, 0.0};
  //  differential_t output_diff = {0x500, 0x1500, 0, 0.0}; // 12
  //  differential_t output_diff = {0x1D01, 0x4200, 0, 0.0}; // 7
  //  differential_t output_diff = {0x100, 0x500, 0, 0.0}; // 9
  //  differential_t output_diff = {0x500, 0x100, 0, 0.0}; // 11
  differential_t output_diff = {0x1500, 0x500, 0, 0.0}; // 12
#endif
#if 0									  // LUX, 12R, 2^-36 -> ?
  uint32_t nrounds = 12;
  differential_t input_diff  = {0x8808, 0x2020, 0, 0.0};
  differential_t output_diff = {0x888,  0x2020, 0, 0.0}; // 12
#endif
#if 0									  // LUX, 12R, 2^-35 -> ?
  uint32_t nrounds = 12;
  differential_t input_diff  = {0x1400, 0x7000, 0, 0.0};
  differential_t output_diff = {0x5401, 0x5000, 0, 0.0}; // 12
#endif

  uint32_t nkeys = (1ULL << 2);
  double edp = 0.0;
  for(uint32_t i = 0; i < nkeys; i++) {

	 // generate random key
	 uint32_t key[SIMON_MAX_NROUNDS] = {0};
	 key[0] = random32() & MASK;
	 key[1] = random32() & MASK;
	 key[2] = random32() & MASK;
	 key[3] = random32() & MASK;

	 printf("--- [%s:%d] Key ", __FILE__, __LINE__);
	 for(uint32_t j = 0; j < 4; j++) {
		printf("%8X ", key[j]);
	 }
	 printf(" --- \n");

	 uint32_t dx_in = input_diff.dx;
	 uint32_t dy_in = input_diff.dy;
	 uint32_t dx_out = output_diff.dx;
	 uint32_t dy_out = output_diff.dy;

	 printf("[%s:%s():%d]:\n Verify %d R differential (%8X %8X) -> (%8X %8X) | 2^%4.2f CP pairs\n", __FILE__, __FUNCTION__, __LINE__, nrounds, dx_in, dy_in, dx_out, dy_out, log2(npairs));

	 //	 npairs = (1ULL << 16);
	 //	 npairs = (1ULL << 16);
	 double p_exp = simon_verify_differential(key, input_diff, output_diff, nrounds, npairs);

	 printf("[%s:%s():%d]:\n Verified %d R differential (%8X %8X) -> (%8X %8X) | 2^%4.2f CP pairs\n Final probability p = 2^%f\n", __FILE__, __FUNCTION__, __LINE__, nrounds, dx_in, dy_in, dx_out, dy_out, log2(npairs), log2(p_exp));

	 edp += p_exp;

	 double temp_edp = edp / (double)nkeys;

	 printf("[%s:%d] temp_edp %f (2^%f) nkeys %d\n", __FILE__, __LINE__, temp_edp, log2(temp_edp), i);
	 printf("[%s:%d] OK\n\n", __FILE__, __LINE__);

  }
}

/* --- */


/* 

Simon32 with max-hw = 6: aborted after 6 rounds

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:1144] Tests, WORD_SIZE  = 16, MASK =     FFFF

[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()998] hw_max 6

--- [./tests/simon-xor-threshold-search-tests.cc:1012] Round [0 / 12] : H size 1 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 0 R:

[./src/simon-xor-threshold-search.cc:410] p = 2^0.000000
[./tests/simon-xor-threshold-search-tests.cc:1032] MAX:        1        0 2^0.000000 2^0.000000 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1012] Round [1 / 12] : H size 4 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 1 R:
[ 0]    C88E
[./src/simon-xor-threshold-search.cc:410] p = 2^-2.000382
[./tests/simon-xor-threshold-search-tests.cc:1032] MAX:        4        1 2^-2.000000 2^-2.000382 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1012] Round [2 / 12] : H size 68 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 2 R:
[ 0]    C88E [ 1]    EC1D
[./src/simon-xor-threshold-search.cc:410] p = 2^-3.999948
[./tests/simon-xor-threshold-search-tests.cc:1032] MAX:       11        4 2^-4.000000 2^-3.999948 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1012] Round [3 / 12] : H size 1915 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 3 R:
[ 0]    C88E [ 1]    EC1D [ 2]    FE4B
[./src/simon-xor-threshold-search.cc:410] p = 2^-7.292439
[./tests/simon-xor-threshold-search-tests.cc:1032] MAX:       40       11 2^-7.299560 2^-7.292439 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1012] Round [4 / 12] : H size 46229 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 4 R:
[ 0]    C88E [ 1]    EC1D [ 2]    FE4B [ 3]    B5D1
[./src/simon-xor-threshold-search.cc:410] p = 2^-9.298694
[./tests/simon-xor-threshold-search-tests.cc:1032] MAX:      191       40 2^-9.299560 2^-9.298694 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1012] Round [5 / 12] : H size 1587046 ---
[./src/simon-xor-threshold-search.cc:352] Round keys from key schedule, 5 R:
[ 0]    C88E [ 1]    EC1D [ 2]    FE4B [ 3]    B5D1 [ 4]      87
[./src/simon-xor-threshold-search.cc:410] p = 2^-14.284038
[./tests/simon-xor-threshold-search-tests.cc:1032] MAX:      744      100 2^-14.105182 2^-14.284038 (2^25 CP)

--- [./tests/simon-xor-threshold-search-tests.cc:1012] Round [6 / 12] : H size 32272548 ---
terminate called after throwing an instance of 'std::bad_alloc'
what():  std::bad_alloc
Aborted

real    180m4.241s
user    179m13.156s
sys     0m20.349s

 */

/* 
Found 6815744 right pairs (2^-9.299560) |      191       40 | 1:(    FF7F     7FDF) 2:(    FF7E     7FDF)
[./src/simon.cc:277]  0:        1        0 | 0.250000 (2^-2.000000)
[./src/simon.cc:277]  1:        6        1 | 0.062500 (2^-4.000000)
[./src/simon.cc:277]  2:       11        6 | 0.062500 (2^-4.000000)
[./src/simon.cc:277]  3:       40       11 | 0.250000 (2^-2.000000)
[./src/simon.cc:299]  4:      191       40 |
p_trail = 0.000244 (2^-12.000000)
[./src/simon-xor-threshold-search.cc:416] p = 2^-9.299560
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():735]:
 Verified 4 R differential (       1        0) -> (     191       40) | 2^32.00 CP pairs
 Final probability p = 2^-9.299560

vs 2^-8.47 theoretical

 */

/* 
 * \p dy is the right input the Simon round
 * Added are only those entries for which (dx_lrot ^ dz ^ dy) have Hamming weight <= hw
 */


/* --- */


/* 

12 rounds with max HW = 6


vpv@mazirat:~$ cd skcrypto/trunk/work/src/yaarx/
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:1126] Tests, WORD_SIZE  = 16, MASK =     FFFF

[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()985] hw_max 6

--- [./tests/simon-xor-threshold-search-tests.cc:999] Round [0 / 12] : H size 1 ---
[./tests/simon-xor-threshold-search-tests.cc:1016] MAX:        1        0 2^0.000000

--- [./tests/simon-xor-threshold-search-tests.cc:999] Round [1 / 12] : H size 4 ---
[./tests/simon-xor-threshold-search-tests.cc:1016] MAX:        4        1 2^-2.000000

--- [./tests/simon-xor-threshold-search-tests.cc:999] Round [2 / 12] : H size 68 ---
[./tests/simon-xor-threshold-search-tests.cc:1016] MAX:       11        4 2^-4.000000

--- [./tests/simon-xor-threshold-search-tests.cc:999] Round [3 / 12] : H size 2045 ---
[./tests/simon-xor-threshold-search-tests.cc:1016] MAX:       40       11 2^-7.299560

--- [./tests/simon-xor-threshold-search-tests.cc:999] Round [4 / 12] : H size 31891 ---
[./tests/simon-xor-threshold-search-tests.cc:1016] MAX:      111       40 2^-8.472523

--- [./tests/simon-xor-threshold-search-tests.cc:999] Round [5 / 12] : H size 488492 ---
[./tests/simon-xor-threshold-search-tests.cc:1016] MAX:     1544      100 2^-13.454553

--- [./tests/simon-xor-threshold-search-tests.cc:999] Round [6 / 12] : H size 3959046 ---
[./tests/simon-xor-threshold-search-tests.cc:1016] MAX:     1111      400 2^-14.963384

--- [./tests/simon-xor-threshold-search-tests.cc:999] Round [7 / 12] : H size 4687772 ---
[./tests/simon-xor-threshold-search-tests.cc:1016] MAX:     249A        0 2^-17.251441

--- [./tests/simon-xor-threshold-search-tests.cc:999] Round [8 / 12] : H size 4693138 ---
[./tests/simon-xor-threshold-search-tests.cc:1016] MAX:     D842        0 2^-19.169636

--- [./tests/simon-xor-threshold-search-tests.cc:999] Round [9 / 12] : H size 4693145 ---
[./tests/simon-xor-threshold-search-tests.cc:1016] MAX:      596        0 2^-21.046930

--- [./tests/simon-xor-threshold-search-tests.cc:999] Round [10 / 12] : H size 4693151 ---
[./tests/simon-xor-threshold-search-tests.cc:1016] MAX:     6603        0 2^-22.917197

--- [./tests/simon-xor-threshold-search-tests.cc:999] Round [11 / 12] : H size 4693149 ---
[./tests/simon-xor-threshold-search-tests.cc:1016] MAX:     430E        0 2^-24.583539

--- [./tests/simon-xor-threshold-search-tests.cc:1025] Round [12 / 12] : H size 4693151 ---

[./tests/simon-xor-threshold-search-tests.cc:1029] MAX:     E40C        0 2^-26.369916

[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()1042] hw_max 6

real    242m59.940s
user    242m11.912s
sys     0m1.548s
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$
v

 */

/* 
5 rounds with max HW = 5

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:1124] Tests, WORD_SIZE  = 16, MASK =     FFFF

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [0 / 5] : H size 1 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:        1        0 2^0.000000

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [1 / 5] : H size 4 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:        4        1 2^-2.000000

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [2 / 5] : H size 66 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:       11        4 2^-4.000000

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [3 / 5] : H size 1430 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:       40       11 2^-7.299560

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [4 / 5] : H size 9526 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:      119       40 2^-8.516184

--- [./tests/simon-xor-threshold-search-tests.cc:1026] Round [5 / 5] : H size 67677 ---

[./tests/simon-xor-threshold-search-tests.cc:1030] MAX:     1541      100 2^-13.418799

real    13m38.474s
user    13m35.943s
sys     0m0.016s
v

After the fix

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:1130] Tests, WORD_SIZE  = 16, MASK =     FFFF

[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()989] hw_max 5

--- [./tests/simon-xor-threshold-search-tests.cc:1003] Round [0 / 5] : H size 1 ---
[./tests/simon-xor-threshold-search-tests.cc:1020] MAX:        1        0 2^0.000000

--- [./tests/simon-xor-threshold-search-tests.cc:1003] Round [1 / 5] : H size 4 ---
[./tests/simon-xor-threshold-search-tests.cc:1020] MAX:        4        1 2^-2.000000

--- [./tests/simon-xor-threshold-search-tests.cc:1003] Round [2 / 5] : H size 66 ---
[./tests/simon-xor-threshold-search-tests.cc:1020] MAX:       11        4 2^-4.000000

--- [./tests/simon-xor-threshold-search-tests.cc:1003] Round [3 / 5] : H size 1203 ---
[./tests/simon-xor-threshold-search-tests.cc:1020] MAX:       40       11 2^-7.299560

--- [./tests/simon-xor-threshold-search-tests.cc:1003] Round [4 / 5] : H size 11180 ---
[./tests/simon-xor-threshold-search-tests.cc:1020] MAX:      111       40 2^-9.299560

--- [./tests/simon-xor-threshold-search-tests.cc:1029] Round [5 / 5] : H size 157124 ---

[./tests/simon-xor-threshold-search-tests.cc:1033] MAX:      545      100 2^-14.105182

[./tests/simon-xor-threshold-search-tests.cc:simon_diff_search()1046] hw_max 5

real    14m7.434s
user    14m5.149s
sys     0m0.004s


 */

/* 

12 rounds with max HW = 5

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:1124] Tests, WORD_SIZE  = 16, MASK =     FFFF

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [0 / 12] : H size 1 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:        1        0 2^0.000000

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [1 / 12] : H size 4 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:        4        1 2^-2.000000

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [2 / 12] : H size 66 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:       11        4 2^-4.000000

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [3 / 12] : H size 1430 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:       40       11 2^-7.299560

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [4 / 12] : H size 9526 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:      119       40 2^-8.516184

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [5 / 12] : H size 67677 ---

[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:     1541      100 2^-13.418799

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [6 / 12] : H size 488403 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:     410D        0 2^-15.792404

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [7 / 12] : H size 1090125 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:     8071        0 2^-18.682823

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [8 / 12] : H size 1174348 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:     4310        0 2^-20.652389

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [9 / 12] : H size 1178290 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:     B003        0 2^-22.431358

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [10 / 12] : H size 1179743 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:     A02C        0 2^-25.212620

--- [./tests/simon-xor-threshold-search-tests.cc:1000] Round [11 / 12] : H size 1179347 ---
[./tests/simon-xor-threshold-search-tests.cc:1017] MAX:     AC80        0 2^-27.404453

--- [./tests/simon-xor-threshold-search-tests.cc:1026] Round [12 / 12] : H size 1179088 ---

[./tests/simon-xor-threshold-search-tests.cc:1030] MAX:      CC2        0 2^-29.442763

real    87m11.801s
user    86m53.898s
sys     0m0.320s
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$


 */

/* --- */

/* 
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ clear

4 rounds with no limit on Hamming weight

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:1092] Tests, WORD_SIZE  = 16, MASK =     FFFF

--- [./tests/simon-xor-threshold-search-tests.cc:980] Round [0 / 12] : H size 1 ---
[./tests/simon-xor-threshold-search-tests.cc:997] MAX:        1        0 2^0.000000

--- [./tests/simon-xor-threshold-search-tests.cc:980] Round [1 / 12] : H size 4 ---
[./tests/simon-xor-threshold-search-tests.cc:997] MAX:        6        1 2^-2.000000

--- [./tests/simon-xor-threshold-search-tests.cc:980] Round [2 / 12] : H size 68 ---
[./tests/simon-xor-threshold-search-tests.cc:997] MAX:      411        4 2^-4.000000

--- [./tests/simon-xor-threshold-search-tests.cc:980] Round [3 / 12] : H size 2380 ---
[./tests/simon-xor-threshold-search-tests.cc:997] MAX:     1142       11 2^-7.299560

--- [./tests/simon-xor-threshold-search-tests.cc:980] Round [4 / 12] : H size 138164 ---
[./tests/simon-xor-threshold-search-tests.cc:997] MAX:      191       40 2^-8.472523

--- [./tests/simon-xor-threshold-search-tests.cc:980] Round [5 / 12] : H size 10139251 ---


 */

/* --- */
  //  std::unordered_map<std::string, differential_t *>::const_iterator H_iter = H->begin();
  //  while(H_iter != H->end()) {
  //	 free(H_iter->second);
  //	 H_iter++;
  //  }



/* --- */


  //  uint32_t len = ALL_WORDS;
  //  T = (std::vector<differential_t> *)calloc(len, sizeof(std::vector<differential_t>));

/* --- */
  std::vector<std::vector<differential_t>> T;

  std::vector<differential_t> X;

  differential_t diff;

	 diff.dx = 1;
	 diff.dy = 2;
	 diff.npairs = 0;
	 diff.p = 0;

	 X.push_back(diff);



/* --- */


// {--- Multiply the DDT table ---

void simon_compute_ddt(double** D, const uint32_t s, const uint32_t t, const double p_thres)
{
#if 0
  assert(WORD_SIZE <= 16);
  assert(p_thres == 0.0);
  for(uint32_t delta = 0; delta < ALL_WORDS; delta++) {
	 for(uint32_t dc = 0; dc < ALL_WORDS; dc++) {
		double p = xdp_rot_and(delta, dc, s, t);
		D[delta][dc] = p;
	 }
#if 1
	 if((delta % 1000) == 0) {									  // DEBUG
		printf("row %10d / %10lld\r", delta, ALL_WORDS);
		fflush(stdout);
	 }
#endif
  }
#endif
}

void simon_ddt_multiply()
{
  //  double** DDT;
  //  simon_rot_and_ddt(DDT, lrot_const_s, lrot_const_t, p_thres);

}


// --- Multiply the DDT table ---}




/* --- */

/* 
For Simon32, 16-bit words, the total number of differentials with prob. >= 0.05 is 2753 = 2^{11.427}:

Initial set sizes: Dp 2753, Dxy 2753

#define XDP_ROT_AND_MAX_DIFF_CNT (1ULL << 20)//(1ULL << 8)//(1ULL << 7)
#define XDP_ROT_AND_P_THRES 0.05

#--- [./tests/simon-xor-threshold-search-tests.cc:733] Tests, WORD_SIZE  = 16, MASK =     FFFF
       923 /    1048576
      1147 /    1048576
      2211 /    1048576

Initial set sizes: Dp 2753, Dxy 2753

 */

/* 
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:733] Tests, WORD_SIZE  = 16, MASK =     FFFF
       923 /    1048576
----
xxx
      1147 /    1048576
xxx
      2211 /    1048576
xxx
 */


/* 

2^8 = 256 entries

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-25.000000
B[10] = 2^-30.000000
 0:     4000 ->     8041 0.250000 (2^-2.000000)
 1:     1100 ->      400 0.062500 (2^-4.000000)
 2:      400 ->      100 0.250000 (2^-2.000000)
 3:      100 ->        0 0.250000 (2^-2.000000)
 4:        0 ->      100 1.000000 (2^0.000000)
 5:      100 ->      400 0.250000 (2^-2.000000)
 6:      400 ->     1100 0.250000 (2^-2.000000)
 7:     1100 ->     4000 0.062500 (2^-4.000000)
 8:     4000 ->     1101 0.250000 (2^-2.000000)
 9:     1101 ->      404 0.015625 (2^-6.000000)
10:      404 ->      111 0.062500 (2^-4.000000)
p_tot = 0.000000000931323 = 2^-30.000000, Bn = 0.000000 = 2^-30.000000
[./src/simon-xor-threshold-search.cc:1022] nrounds = 12, Bn_init = 2^-36.000000 : key     8CA8     1EDA     E89B     E914
[./src/simon-xor-threshold-search.cc:931] 11 | Update best found Bn: 2^-36.000000 -> 2^-35.000000
[./src/simon-xor-threshold-search.cc:931] 11 | Update best found Bn: 2^-35.000000 -> 2^-34.000000
 */

/* 

Three methods:


- simon_hash_map_add_trail
- simon_hash_map_add_differential
- simon_diff_update_max
- simon_print_trail

 */


/* --- */


#if 0
  std::unordered_map<std::string, differential_t**>::const_iterator hash_map_iter = trails_hash_map.begin();
  printf("[%s:%d] Found %d trails:\n", __FILE__, __LINE__, trails_hash_map.size());
  while(hash_map_iter != trails_hash_map.end()) {
	 double p_trail = 1.0;
	 for(uint32_t i = 0; i < trail_len; i++) {
		//		printf("%4X %4X ", (*(hash_map_iter->second))[i].dx, (*(hash_map_iter->second))[i].dy);
		p_trail *= (*(hash_map_iter->second))[i].p;
	 }
	 uint32_t dx_in = (*(hash_map_iter->second))[0].dx;
	 uint32_t dy_in = (*(hash_map_iter->second))[0].dy;
	 uint32_t dx_out = (*(hash_map_iter->second))[trail_len - 1].dx;
	 uint32_t dy_out = (*(hash_map_iter->second))[trail_len - 1].dy;

	 std::stringstream oss("");
	 oss << dx_in;
	 oss << dy_in;
	 oss << dx_out;
	 oss << dy_out;

	 std::string s_diff = oss.str();//diff_to_string(diff, trail_len);
	 std::unordered_map<std::string, differential_t**>::const_iterator diff_hash_map_iter = 
		diffs_hash_map.find(s_diff);

	 if(diff_hash_map_iter == diffs_hash_map.end()) {
		printf("[%s:%d] Add new differential: %4X %4X -> %4X %4X 2^%f\n", __FILE__, __LINE__, dx_in, dy_in, dx_out, dy_out, log2(p_trail));
		differential_t** new_diff;
		new_diff = (differential_t** )calloc(1, sizeof(differential_t*));
		*new_diff = (differential_t*)calloc(2, sizeof(differential_t));
		(*new_diff)[0].dx = dx_in;
		(*new_diff)[0].dy = dy_in;
		(*new_diff)[0].p = 1.0;
		(*new_diff)[1].dx = dx_out;
		(*new_diff)[1].dy = dy_out;
		(*new_diff)[1].p = p_trail;
		std::pair<std::string, differential_t**> new_pair (s_diff,new_diff);
		diffs_hash_map.insert(new_pair);
	 } else {
		double old_p = (*(diff_hash_map_iter->second))[1].p;
		(*(diff_hash_map_iter->second))[1].p += p_trail;
		double new_p = (*(diff_hash_map_iter->second))[1].p;
		printf("[%s:%d] Improve differential prob:  %4X %4X -> %4X %4X 2^%f -> 2^%f\n", __FILE__, __LINE__, (*(diff_hash_map_iter->second))[0].dx, (*(diff_hash_map_iter->second))[0].dy, (*(diff_hash_map_iter->second))[1].dx, (*(diff_hash_map_iter->second))[1].dy, log2(old_p), log2(new_p));
	 }
	 hash_map_iter++;
  }

  printf("[%s:%d] Diferentials:\n", __FILE__, __LINE__);
  simon_print_diff_hash_table(diffs_hash_map);

  double p_max = 0.0;
  //  std::unordered_map<std::string, differential_t**>::const_iterator 
  hash_map_iter = diffs_hash_map.begin();
  //  printf("[%s:%d] Found %d differentials:\n", __FILE__, __LINE__, diffs_hash_map.size());
  //  uint32_t trail_cnt = 0;
  while(hash_map_iter != diffs_hash_map.end()) {
	 //	 trail_cnt++;
	 //	 printf("[%5d] ", trail_cnt);
	 //	 printf("%4X %4X -> ", (*(hash_map_iter->second))[0].dx, (*(hash_map_iter->second))[0].dy);
	 //	 printf("%4X %4X ", (*(hash_map_iter->second))[1].dx, (*(hash_map_iter->second))[1].dy);
	 double p = (*(hash_map_iter->second))[1].p;
	 if(p > p_max) {
		p_max = p;
		diff_max[0].dx = (*(hash_map_iter->second))[0].dx;
		diff_max[0].dy = (*(hash_map_iter->second))[0].dy;
		diff_max[1].dx = (*(hash_map_iter->second))[1].dx;
		diff_max[1].dy = (*(hash_map_iter->second))[1].dy;
		diff_max[1].p = p_max;
	 }
	 //	 printf(" | 2^%f\n", log2(p));
	 hash_map_iter++;
  }

#endif



/* --- */


  // Add diferential
#if 0
  printf("Probability of differential: 2^%f\n", log2(p_tot));
  std::string s_diff = diff_to_string(diff, trail_len);
  std::unordered_map<std::string, differential_t**>::const_iterator diff_hash_map_iter = 
	 diffs_hash_map->find(s_diff);
  if(diff_hash_map_iter == diffs_hash_map->end()) {
	 printf("[%s:%d] Add new differential: %4X %4X -> %4X %4X 2^%f\n", __FILE__, __LINE__, diff[0].dx, diff[0].dy, diff[trail_len - 1].dx, diff[trail_len -1].dy, log2(p));
	 differential_t** new_diff;
	 new_diff = (differential_t** )calloc(1, sizeof(differential_t*));
	 *new_diff = (differential_t*)calloc(2, sizeof(differential_t));
	 (*new_diff)[0].dx = diff[0].dx;
	 (*new_diff)[0].dy = diff[0].dy;
	 (*new_diff)[0].p = 1.0;
	 (*new_diff)[1].dx = diff[trail_len - 1].dx;
	 (*new_diff)[1].dy = diff[trail_len - 1].dy;
	 (*new_diff)[1].p = p;
	 std::pair<std::string, differential_t**> new_pair (s_diff,new_diff);
	 diffs_hash_map->insert(new_pair);
  } else {
	 double old_p = (*(diff_hash_map_iter->second))[1].p;
	 (*(diff_hash_map_iter->second))[1].p += p;
	 double new_p = (*(diff_hash_map_iter->second))[1].p;
	 printf("[%s:%d] Improve differential prob:  %4X %4X -> %4X %4X 2^%f -> 2^%f\n", __FILE__, __LINE__, (*(diff_hash_map_iter->second))[0].dx, (*(diff_hash_map_iter->second))[0].dy, (*(diff_hash_map_iter->second))[1].dx, (*(diff_hash_map_iter->second))[1].dy, log2(old_p), log2(new_p));
  }
#endif



/* --- */

// {--- Simon differential search ---

void simon_xor_differential_search(const int n, const int nrounds, 
										  double B[NROUNDS], double* Bn,
										  const differential_t diff_in[NROUNDS], differential_t trail[NROUNDS], 
										  const uint32_t dyy_init,
										  uint32_t lrot_const_s, uint32_t lrot_const_t, uint32_t lrot_const_u,
										  std::multiset<differential_t, struct_comp_diff_p>* diff_mset_p, // highways
										  std::set<differential_t, struct_comp_diff_dx_dy>* diff_set_dx_dy,
										  std::multiset<differential_t, struct_comp_diff_p>* croads_diff_mset_p, // country roads
										  std::set<differential_t, struct_comp_diff_dx_dy>* croads_diff_set_dx_dy,
										  std::unordered_map<std::string, differential_t**>* trails_hash_map,
										  double p_thres)
{
  double pn = 0.0;

  // make a local copy of the input diff trail
  differential_t diff[NROUNDS] = {{0, 0, 0, 0.0}};
  for(int i = 0; i < n; i++) {
	 diff[i].dx = diff_in[i].dx;
	 diff[i].dy = diff_in[i].dy;
	 diff[i].p = diff_in[i].p;
  }

#if 1
  uint32_t max_lp = n;//4;
  uint32_t cnt_lp = 0;
  uint32_t trail_len = n;
  cnt_lp = simon_xor_threshold_count_lp(diff, trail_len, p_thres);
#endif
  //  printf("[%s:%d] cnt_lp %d / %d\n", __FILE__, __LINE__, cnt_lp, max_lp);

  if((n == 0) && (nrounds == 1)) {						  // Only one round
	 bool b_end = false;
	 std::multiset<differential_t, struct_comp_diff_p>::iterator mset_iter = diff_mset_p->begin();
	 uint32_t cnt = 0;
	 while((mset_iter != diff_mset_p->end()) && (!b_end)) {
		uint32_t dx = mset_iter->dx; // alpha
		uint32_t dy = mset_iter->dy; // gamma
		pn = mset_iter->p;;
		uint32_t dxx = dy ^ dyy_init ^ LROT(dx, lrot_const_u); // gamma ^ dy_i ^ (alpha <<< 2)
#if 0									  // DEBUG
		printf("\r[%s:%d] %2d: [%2d / %2d] %8X -> %8X, 2^%f, 2^%f", __FILE__, __LINE__, n, cnt, diff_mset_p->size(), dx, dy, log2(pn), log2(*Bn));
		fflush(stdout);
#endif
		if((pn >= *Bn) && (pn != 0.0)) {
		  trail[n].dx = dx;		  // dx_{i}
		  trail[n].dy = dxx;		  // dx_{i+1} 
		  trail[n].p = pn;
		  *Bn = pn;
		  B[n] = pn;
		} else {
		  b_end = true;
		}
		mset_iter++;
		cnt++;
	 }	// while()
  }

  if((n == 0) && (nrounds > 1)) {						  // Round-0 and not last round
	 bool b_end = false;
	 std::multiset<differential_t, struct_comp_diff_p>::iterator mset_iter = diff_mset_p->begin();
	 uint32_t cnt = 0;
	 while((mset_iter != diff_mset_p->end()) && (!b_end)) {
		uint32_t dx = mset_iter->dx; // alpha
		uint32_t dy = mset_iter->dy; // gamma
		pn = mset_iter->p;
		uint32_t dxx = dy ^ dyy_init ^ LROT(dx, lrot_const_u); // dx_{i+1} = gamma ^ dy_i ^ (alpha <<< 2)
		double p = pn * B[nrounds - 1 - (n + 1)];
		assert(B[nrounds - 1 - (n + 1)] != 0.0);
		std::multiset<differential_t, struct_comp_diff_p>::iterator begin_iter = diff_mset_p->begin();
#if 0									  // DEBUG
		printf("\r[%s:%d] %2d: [%2d / %2d] %8X -> %8X, 2^%f, 2^%f", __FILE__, __LINE__, n, cnt, diff_mset_p->size(), dx, dy, log2(pn), log2(*Bn));
		fflush(stdout);
#endif
		if((p >= *Bn) && (p != 0.0)) {
		  diff[n].dx = dx;		  // dx_{i}
		  diff[n].dy = dxx;		  // dx_{i+1}
		  diff[n].p = pn;
		  simon_xor_differential_search(n+1, nrounds, B, Bn, diff, trail, dyy_init, lrot_const_s, lrot_const_t, lrot_const_u, diff_mset_p, diff_set_dx_dy, croads_diff_mset_p, croads_diff_set_dx_dy, trails_hash_map, p_thres);
		} else {
		  b_end = true;
		}
		if(begin_iter != diff_mset_p->begin()) { // if the root was updated, start from beginning
		  mset_iter = diff_mset_p->begin();
		  printf("[%s:%d] Return to beginning\n", __FILE__, __LINE__);
		  cnt = 0;
		} else {
		  mset_iter++;
		  cnt++;
		}
	 }
  }

  if((n == 1) && (n != (nrounds - 1))) {						  // Round-1 and not last round
	 bool b_end = false;
	 uint32_t cnt = 0;
	 std::multiset<differential_t, struct_comp_diff_p>::iterator mset_iter = diff_mset_p->begin();
	 while((mset_iter != diff_mset_p->end()) && (!b_end)) {
		uint32_t dx = mset_iter->dx; // alpha = dx_{i}
		uint32_t dy = mset_iter->dy; // gamma
		pn = mset_iter->p;
		uint32_t dyy = diff[n-1].dx; // dy_{i} = dx_{i-1}
		uint32_t dxx = dy ^ dyy ^ LROT(dx, lrot_const_u); // dx_{i+1} = gamma ^ dx_{i-1} ^ (alpha <<< 2)
		double p = diff[0].p * pn * B[nrounds - 1 - (n + 1)];
		std::multiset<differential_t, struct_comp_diff_p>::iterator begin_iter = diff_mset_p->begin();
#if 0									  // DEBUG
		printf("\r[%s:%d] %2d: [%2d / %2d] %8X -> %8X, 2^%f, 2^%f", __FILE__, __LINE__, n, cnt, diff_mset_p->size(), dx, dy, log2(pn), log2(*Bn));
		fflush(stdout);
#endif
		if((p >= *Bn) && (p != 0.0)) {
		  diff[n].dx = dx;		  // dx_{i}
		  diff[n].dy = dxx;		  // dx_{i+1}
		  diff[n].p = pn;
		  simon_xor_differential_search(n+1, nrounds, B, Bn, diff, trail, dyy_init, lrot_const_s, lrot_const_t, lrot_const_u, diff_mset_p, diff_set_dx_dy, croads_diff_mset_p, croads_diff_set_dx_dy, trails_hash_map, p_thres);
		} else {
		  b_end = true;
		} 
		if(begin_iter != diff_mset_p->begin()) { // if the root was updated, start from beginning
		  mset_iter = diff_mset_p->begin();
		  printf("[%s:%d] Return to beginning\n", __FILE__, __LINE__);
		  cnt = 0;
		} else {
		  mset_iter++;
		  cnt++;
		}
	 }	// while()
  }

  //  if((n >= 2) && (n != (nrounds - 1))) { // Round-i and not last round
  if((n >= 2) && (n != (nrounds - 1)) && (cnt_lp <= max_lp)) {
	 uint32_t dx = diff[n - 1].dy; // dx_{i} = dy_{i - 1}
	 uint32_t dy = 0;					 // gamma

	 differential_t diff_dy;
	 diff_dy.dx = dx;  			  // alpha
	 diff_dy.dy = 0;
	 diff_dy.p = 0.0;

	 //	 std::set<differential_t, struct_comp_diff_dx_dy>* diff_set_dx_dy;
	 std::multiset<differential_t, struct_comp_diff_p> found_mset_p;

	 // p_i >= p_min = Bn / p1 * p2 ... * p{i-1} * B{n-i} 
	 double p_min = 0.0;
	 p_min = 1.0;
	 for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
		p_min *= diff[i].p;
	 }
	 p_min = p_min * 1.0 * B[nrounds - 1 - (n + 1)]; 
	 p_min = *Bn / p_min;
	 assert(p_min <= 1.0);

	 // check if the differential is not already in the set
	 std::set<differential_t, struct_comp_diff_dx_dy>::iterator hway_iter = diff_set_dx_dy->lower_bound(diff_dy);
 	 bool b_found_in_hways = (hway_iter != diff_set_dx_dy->end()) && (hway_iter->dx == dx);
	 bool b_found_in_croads = false;
#if 1									  // DEBUG
	 if(p_thres == 0.0) {
		assert(b_found_in_hways == true);
	 }
#endif
	 if(b_found_in_hways) {
		//		while((hway_iter->dx == dx) && (hway_iter->p >= p_min)) {
		while(hway_iter->dx == dx) {
		  found_mset_p.insert(*hway_iter);
		  hway_iter++;
		}

	 } //else 
	 {

#define CLEAR_CROADS 1
#if CLEAR_CROADS								  // !!!
		croads_diff_set_dx_dy->clear();
		croads_diff_mset_p->clear();
#endif

		std::set<differential_t, struct_comp_diff_dx_dy>::iterator croad_iter = croads_diff_set_dx_dy->lower_bound(diff_dy);
		b_found_in_croads = (croad_iter != croads_diff_set_dx_dy->end()) && (croad_iter->dx == dx);
#if CLEAR_CROADS
		assert(b_found_in_croads == false);
#endif

		uint32_t dx_prev = diff[n - 1].dx; // dy_{i} = dx_{i - 1}
		assert(diff_set_dx_dy->size() != 0);
		const uint64_t max_cnt = XDP_ROT_AND_MAX_DIFF_CNT;  // !!!
		bool b_backto_hway = true;
		uint32_t cnt_new = xdp_rot_and_dx_pddt(diff_dy.dx, dx_prev, diff_set_dx_dy, diff_mset_p, croads_diff_set_dx_dy, croads_diff_mset_p, lrot_const_s, lrot_const_t, lrot_const_u, max_cnt, p_min, b_backto_hway);

		if(cnt_new != 0) {
#if 0									  // DEBUG
		  printf("\r[%s:%d] [%2d / %2d]: Added %d new country roads: p_min = %f (2^%f). New sizes: Dxy %d, Dp %d (cnt_lp %d / %d).", __FILE__, __LINE__, n, NROUNDS, cnt_new, p_min, log2(p_min), croads_diff_set_dx_dy->size(), croads_diff_mset_p->size(), cnt_lp, max_lp);
		  fflush(stdout);
#endif
		  croad_iter = croads_diff_set_dx_dy->lower_bound(diff_dy);
		  b_found_in_croads = (croad_iter != croads_diff_set_dx_dy->end()) && (croad_iter->dx == dx);
		} else {
		  //		printf("[%s:%d] [%2d / %2d]: No new country roads found: p_min = %f (2^%f).\n", __FILE__, __LINE__, n, NROUNDS, p_min, log2(p_min));
		}

		if(b_found_in_croads) {
#if CLEAR_CROADS
		  assert(croad_iter->p >= p_min);
#endif
		  while((croad_iter->dx == dx) && (croad_iter->p >= p_min)) {
			 //		while(croad_iter->dx == dx) {
#if CLEAR_CROADS
			 uint32_t dx = croad_iter->dx;
			 uint32_t dy = croad_iter->dy;
			 uint32_t dx_prev = diff[n - 1].dx;
			 bool b_is_hway = xdp_rot_and_is_dx_in_set_dx_dy(dy, dx, dx_prev, lrot_const_u, *diff_set_dx_dy);
			 assert(b_is_hway);
			 if(b_is_hway) {
				found_mset_p.insert(*croad_iter);
			 }
#else
			 found_mset_p.insert(*croad_iter);
#endif
			 croad_iter++;
		  }
		}
	 }

	 std::multiset<differential_t, struct_comp_diff_p>::iterator find_iter = found_mset_p.begin();

#if 0									  // DEBUG
	 printf("\r[%s:%d] %2d: Temp set size %d ", __FILE__, __LINE__, n, found_mset_p.size());
	 fflush(stdout);
#endif

	 //		while((find_iter->dx < (dx + 1)) && (find_iter != diff_set_dx_dy->end())) {
	 if(find_iter->dx == dx) {
		while((find_iter->dx == dx) && (find_iter != found_mset_p.end())) {
		  assert((find_iter->dx == dx));
		  diff_dy = *find_iter;

		  dx = diff_dy.dx;
		  dy = diff_dy.dy;
		  pn = diff_dy.p;

		  double p = 1.0;
		  for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
			 p *= diff[i].p;
		  }
		  p = p * pn * B[nrounds - 1 - (n + 1)]; 

		  uint32_t dyy = diff[n-1].dx; // dy_{i} = dx_{i-1}
		  uint32_t dxx = dy ^ dyy ^ LROT(dx, lrot_const_u); // dx_{i+1} = gamma ^ dx_{i-1} ^ (alpha <<< 2)

		  // store the beginnig
#if 0
		  std::set<differential_t, struct_comp_diff_dx_dy>::iterator begin_iter = diff_set_dx_dy->begin();
#endif
		  if((p >= *Bn) && (p != 0.0)) {
			 diff[n].dx = dx;		  // dx_{i}
			 diff[n].dy = dxx;	  // dx_{i+1}
			 diff[n].p = pn;
			 simon_xor_differential_search(n+1, nrounds, B, Bn, diff, trail, dyy_init, lrot_const_s, lrot_const_t, lrot_const_u, diff_mset_p, diff_set_dx_dy, croads_diff_mset_p, croads_diff_set_dx_dy, trails_hash_map, p_thres);
		  }
		  find_iter++;
		}	// while
	 }		// if
  }

  if((n == (nrounds - 1)) && (nrounds > 1)) {		  // Last round

	 uint32_t dx = diff[n - 1].dy; // dx_{i} = dy_{i - 1}
	 uint32_t dy = 0;					 // gamma

#if 1
	 pn = max_xdp_rot_and(dx, &dy, lrot_const_s, lrot_const_t);
#else	 // !!!
	 pn = xdp_rot_and(dx, dy, lrot_const_s, lrot_const_t);
#endif

	 uint32_t dyy = diff[n-1].dx; // dy_{i} = dx_{i-1}
	 uint32_t dxx = dy ^ dyy ^ LROT(dx, lrot_const_u); // dx_{i+1} = gamma ^ dx_{i-1} ^ (alpha <<< 2)

	 double p = 1.0;
	 for(int i = 0; i < n; i++) {
		p *= diff[i].p;
	 }
	 p *= pn;
	 diff[n].dx = dx;
	 diff[n].dy = dxx;
	 diff[n].p = pn;
#if 1									  // store trail in hash table
	 //	 double p_max = 1.0 / (double)(1UL << 12);
	 //	 if((p != 1.0) && (p == p_max) && (n == (NROUNDS - 1))) {
	 if((p != 1.0) && (p != 0.0) && (n == (NROUNDS - 1))) {
		uint32_t trail_len = n + 1;
		std::string s_trail = trail_to_string(diff, trail_len);
		std::unordered_map<std::string, differential_t**>::const_iterator hash_map_iter = 
		  trails_hash_map->find(s_trail);

		if(hash_map_iter == trails_hash_map->end()) {
		  printf("[%s:%d] Add new trail: 2^%f\n", __FILE__, __LINE__, log2(p));

		  differential_t** new_trail;
		  new_trail = (differential_t** )calloc(1, sizeof(differential_t*));
		  *new_trail = (differential_t*)calloc(trail_len, sizeof(differential_t));
		  for(uint32_t i = 0; i < trail_len; i++) {
			 (*new_trail)[i].dx = diff[i].dx;
			 (*new_trail)[i].dy = diff[i].dy;
			 (*new_trail)[i].p = diff[i].p;
		  }
		  std::pair<std::string, differential_t**> new_pair (s_trail,new_trail);
		  trails_hash_map->insert(new_pair);

		  double p_tot = 1.0;
		  for(uint32_t i = 0; i <= (uint32_t)n; i++) {
			 //			 printf("%4X %4X ", diff[i].dx, diff[i].dy);
			 p_tot *= diff[i].p;
		  }
		  //		  printf("p_tot 2^%f, Bn 2^%f\n", log2(p_tot), log2(*Bn));
		  assert(p_tot == p);

		  simon_print_hash_table(*trails_hash_map, trail_len);
		}
	 }
#endif

	 if((p >= *Bn) && (p != 1.0) && (p != 0.0)) { // skip the 0-diff trail (p = 1.0)
#if 1									  // DEBUG
		if (p > *Bn) {
		  printf("[%s:%d] %d | Update best found Bn: 2^%f -> 2^%f\n", __FILE__, __LINE__, n, log2(*Bn), log2(p));
		}
#endif
		*Bn = p;
		B[n] = p;
		for(int i = 0; i < nrounds; i++) {
		  trail[i].dx = diff[i].dx;
		  trail[i].dy = diff[i].dy;
		  trail[i].p = diff[i].p;
		}
	 }
  }
}

// --- Simon differential search ---}


/* ---- */

/* 
[./tests/simon-xor-threshold-search-tests.cc:712] p = 2^-27.540568
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():774]:

 Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 over 32 keys:
 [./tests/simon-xor-threshold-search-tests.cc:780] temp_edp 0.000000 (2^-28.363375) nkeys 31
 [./tests/simon-xor-threshold-search-tests.cc:781] OK

 */

/* --- */


/* 
Parameters for verifyuing the code:

#define SIMON_NPAIRS (1ULL << 20)
#define SIMON_EPS (double)(1.0 / (double)(1ULL << 5))
#define NROUNDS 6
#define XDP_ROT_AND_MAX_DIFF_CNT (1ULL << 5)
#define XDP_ROT_AND_P_THRES 0.05



*/

/* --- */

/* 
#if 0									  // compute probabilities
  uint32_t dxx = x ^ xx;
  uint32_t f_dy = dx;
  uint32_t f_dx = dy ^ dxx ^ LROT(dx, SIMON_LROT_CONST_U);
  double p = xdp_rot_and(f_dx, f_dy, SIMON_LROT_CONST_S, SIMON_LROT_CONST_T);
  p_trail *= p;
  printf("%f (2^%f)", p, log2(p));
#endif

 */

/* --- */

/*
vpv@mazirat:~$ cd skcrypto/trunk/work/src/yaarx/
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:782] Tests, WORD_SIZE  = 16, MASK =     FFFF
	  [./tests/simon-xor-threshold-search-tests.cc:757] Key     7D82     C2BB     B6AD      8E7
	  [./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():768]:
	  Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
			 Found 1 right pairs (2^-32.000000) |      100        0 | 1:(     290     3E6E) 2:(     291     3E6E)
	  [./src/simon.cc:273]  0:        1        0
	  [./src/simon.cc:273]  1:      106        1
	  [./src/simon.cc:273]  2:      411      106
	  [./src/simon.cc:273]  3:     1064      411
	  [./src/simon.cc:273]  4:      511     1064
	  [./src/simon.cc:273]  5:      607      511
	  [./src/simon.cc:273]  6:     1101      607
	  [./src/simon.cc:273]  7:     4011     1101
	  [./src/simon.cc:273]  8:     1104     4011
	  [./src/simon.cc:273]  9:      600     1104
	  [./src/simon.cc:273] 10:      100      600
	  [./src/simon.cc:273] 11:        0      100
	  [./src/simon.cc:287] 12:      100        0
	  Found 2 right pairs (2^-31.000000) |      100        0 | 1:(     291     3E6E) 2:(     290     3E6E)
	  [./src/simon.cc:273]  0:        1        0
	  [./src/simon.cc:273]  1:      106        1
	  [./src/simon.cc:273]  2:      411      106
	  [./src/simon.cc:273]  3:     1064      411
	  [./src/simon.cc:273]  4:      511     1064
	  [./src/simon.cc:273]  5:      607      511
	  [./src/simon.cc:273]  6:     1101      607
	  [./src/simon.cc:273]  7:     4011     1101
	  [./src/simon.cc:273]  8:     1104     4011
	  [./src/simon.cc:273]  9:      600     1104
	  [./src/simon.cc:273] 10:      100      600
	  [./src/simon.cc:273] 11:        0      100
	  [./src/simon.cc:287] 12:      100        0
	  Found 3 right pairs (2^-30.415037) |      100        0 | 1:(    1588     124E) 2:(    1589     124E)
	  [./src/simon.cc:273]  0:        1        0
	  [./src/simon.cc:273]  1:      104        1
	  [./src/simon.cc:273]  2:      411      104
	  [./src/simon.cc:273]  3:     1064      411
	  [./src/simon.cc:273]  4:      511     1064
	  [./src/simon.cc:273]  5:      607      511
	  [./src/simon.cc:273]  6:     1101      607
	  [./src/simon.cc:273]  7:     4011     1101
	  [./src/simon.cc:273]  8:     1104     4011
	  [./src/simon.cc:273]  9:      600     1104
	  [./src/simon.cc:273] 10:      100      600
	  [./src/simon.cc:273] 11:        0      100
	  [./src/simon.cc:287] 12:      100        0
	  Found 4 right pairs (2^-30.000000) |      100        0 | 1:(    1589     124E) 2:(    1588     124E)
	  [./src/simon.cc:273]  0:        1        0
	  [./src/simon.cc:273]  1:      104        1
	  [./src/simon.cc:273]  2:      411      104
	  [./src/simon.cc:273]  3:     1064      411
	  [./src/simon.cc:273]  4:      511     1064
	  [./src/simon.cc:273]  5:      607      511
	  [./src/simon.cc:273]  6:     1101      607
	  [./src/simon.cc:273]  7:     4011     1101
	  [./src/simon.cc:273]  8:     1104     4011
	  [./src/simon.cc:273]  9:      600     1104
	  [./src/simon.cc:273] 10:      100      600
	  [./src/simon.cc:273] 11:        0      100
	  [./src/simon.cc:287] 12:      100        0
	  Found 5 right pairs (2^-29.678072) |      100        0 | 1:(    159E     935E) 2:(    159F     935E)
	  [./src/simon.cc:273]  0:        1        0
	  [./src/simon.cc:273]  1:      104        1
	  [./src/simon.cc:273]  2:       10      104
	  [./src/simon.cc:273]  3:     1164       10
	  [./src/simon.cc:273]  4:      511     1164
	  [./src/simon.cc:273]  5:      705      511
	  [./src/simon.cc:273]  6:     1105      705
	  [./src/simon.cc:273]  7:     4001     1105
	  [./src/simon.cc:273]  8:     1100     4001
	  [./src/simon.cc:273]  9:      400     1100
	  [./src/simon.cc:273] 10:      100      400
	  [./src/simon.cc:273] 11:        0      100
	  [./src/simon.cc:287] 12:      100        0
F
*/

/* --- */




/* 

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:782] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:757] Key     F9E8     5A2E     74AC      B3D
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():768]:
 Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Found 1 right pairs (2^-32.000000) |      100        0 | 1:(    9A1A     5207) 2:(    9A1B     5207)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        6        1
 [./src/simon.cc:273]  2:      619        6
 [./src/simon.cc:273]  3:       62      619
 [./src/simon.cc:273]  4:      711       62
 [./src/simon.cc:273]  5:      504      711
 [./src/simon.cc:273]  6:     1105      504
 [./src/simon.cc:273]  7:     4001     1105
 [./src/simon.cc:273]  8:     1100     4001
 [./src/simon.cc:273]  9:      401     1100
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 2 right pairs (2^-31.000000) |      100        0 | 1:(    9A1B     5207) 2:(    9A1A     5207)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        6        1
 [./src/simon.cc:273]  2:      619        6
 [./src/simon.cc:273]  3:       62      619
 [./src/simon.cc:273]  4:      711       62
 [./src/simon.cc:273]  5:      504      711
 [./src/simon.cc:273]  6:     1105      504
 [./src/simon.cc:273]  7:     4001     1105
 [./src/simon.cc:273]  8:     1100     4001
 [./src/simon.cc:273]  9:      401     1100
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 3 right pairs (2^-30.415037) |      100        0 | 1:(    CB2E     6DA9) 2:(    CB2F     6DA9)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        6        1
 [./src/simon.cc:273]  2:      415        6
 [./src/simon.cc:273]  3:      D58      415
 [./src/simon.cc:273]  4:     2379      D58
 [./src/simon.cc:273]  5:     ACCC     2379
 [./src/simon.cc:273]  6:     497F     ACCC
 [./src/simon.cc:273]  7:     5015     497F
 [./src/simon.cc:273]  8:     1902     5015
 [./src/simon.cc:273]  9:      600     1902
 [./src/simon.cc:273] 10:      100      600
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 4 right pairs (2^-30.000000) |      100        0 | 1:(    CB2F     6DA9) 2:(    CB2E     6DA9)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        6        1
 [./src/simon.cc:273]  2:      415        6
 [./src/simon.cc:273]  3:      D58      415
 [./src/simon.cc:273]  4:     2379      D58
 [./src/simon.cc:273]  5:     ACCC     2379
 [./src/simon.cc:273]  6:     497F     ACCC
 [./src/simon.cc:273]  7:     5015     497F
 [./src/simon.cc:273]  8:     1902     5015
 [./src/simon.cc:273]  9:      600     1902
 [./src/simon.cc:273] 10:      100      600
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 [./tests/simon-xor-threshold-search-tests.cc:712] p = 2^-30.000000
OK
[


vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:782] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:757] Key     4A4F     CA49     C197     D391
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():768]:
 Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Found 1 right pairs (2^-32.000000) |      100        0 | 1:(    492A     C8A1) 2:(    492B     C8A1)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       42       11
 [./src/simon.cc:273]  4:      119       42
 [./src/simon.cc:273]  5:      407      119
 [./src/simon.cc:273]  6:     1001      407
 [./src/simon.cc:273]  7:     4411     1001
 [./src/simon.cc:273]  8:     1000     4411
 [./src/simon.cc:273]  9:      401     1000
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 2 right pairs (2^-31.000000) |      100        0 | 1:(    492B     C8A1) 2:(    492A     C8A1)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       42       11
 [./src/simon.cc:273]  4:      119       42
 [./src/simon.cc:273]  5:      407      119
 [./src/simon.cc:273]  6:     1001      407
 [./src/simon.cc:273]  7:     4411     1001
 [./src/simon.cc:273]  8:     1000     4411
 [./src/simon.cc:273]  9:      401     1000
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 3 right pairs (2^-30.415037) |      100        0 | 1:(    D026     CAE8) 2:(    D027     CAE8)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:     1042       11
 [./src/simon.cc:273]  4:      119     1042
 [./src/simon.cc:273]  5:      504      119
 [./src/simon.cc:273]  6:     1101      504
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 4 right pairs (2^-30.000000) |      100        0 | 1:(    D027     CAE8) 2:(    D026     CAE8)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:     1042       11
 [./src/simon.cc:273]  4:      119     1042
 [./src/simon.cc:273]  5:      504      119
 [./src/simon.cc:273]  6:     1101      504
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 [./tests/simon-xor-threshold-search-tests.cc:712] p = 2^-30.000000
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():772]:
Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-30.000000
 [./tests/simon-xor-threshold-search-tests.cc:774] OK

real    6m18.738s
user    6m17.600s
sys     0m0.036s


 */

/* 
12 round clustering for Simon32 2^-35

0:     1400 ->     7000 0.125000 (2^-3.000000)
 1:        0 ->     1400 1.000000 (2^0.000000)
 2:     1400 ->     5000 0.125000 (2^-3.000000)
 3:     5000 ->     D401 0.125000 (2^-3.000000)
 4:     D401 ->        0 0.007812 (2^-7.000000)
 5:        0 ->     D401 1.000000 (2^0.000000)
 6:     D401 ->     5000 0.007812 (2^-7.000000)
 7:     5000 ->     1400 0.125000 (2^-3.000000)
 8:     1400 ->        0 0.125000 (2^-3.000000)
 9:        0 ->     1400 1.000000 (2^0.000000)
10:     1400 ->     5000 0.125000 (2^-3.000000)
11:     5000 ->     5401 0.125000 (2^-3.000000)

[./src/simon-xor-threshold-search.cc:629] Found 17 trails:
[    1] 1400    0    0 1400 1400 7000 7000 1401 1401 2000 2000 9401 9401 7000 7000 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-39.000000
[    2] 1400    0    0 1400 1400 7000 7000 1401 1401 2000 2000 9401 9401 7004 7004 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-41.000000
[    3] 1400    0    0 1400 1400 7000 7000 9401 9401    0    0 9401 9401 7810 7810 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-41.000000
[    4] 1400    0    0 1400 1400 7000 7000 9401 9401 2000 2000 1401 1401 7004 7004 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-41.000000
[    5] 1400    0    0 1400 1400 7000 7000 1401 1401 2000 2000 9401 9401 7810 7810 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-41.000000
[    6] 1400    0    0 1400 1400 7000 7000 1401 1401    0    0 1401 1401 7000 7000 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-35.000000
[    7] 1400    0    0 1400 1400 5000 5000 D401 D401    0    0 D401 D401 5000 5000 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-35.000000
[    8] 1400    0    0 1400 1400 7000 7000 9401 9401 2000 2000 1401 1401 7000 7000 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-39.000000
[    9] 1400    0    0 1400 1400 5000 5000 D411 D411 2007 2007 5401 5401 7000 7000 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-44.000000
[   10] 1400    0    0 1400 1400 7000 7000 9401 9401    0    0 9401 9401 7000 7000 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-39.000000
[   11] 1400    0    0 1400 1400 7000 7000 1401 1401    0    0 1401 1401 7810 7810 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-37.000000
[   12] 1400    0    0 1400 1400 7000 7000 1401 1401    0    0 1401 1401 7004 7004 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-37.000000
[   13] 1400    0    0 1400 1400 5000 5000 D401 D401    7    7 D411 D411 7000 7000 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-44.000000
[   14] 1400    0    0 1400 1400 7000 7000 D411 D411    7    7 D401 D401 5000 5000 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-44.000000
[   15] 1400    0    0 1400 1400 7000 7000 5401 5401 2007 2007 D411 D411 5000 5000 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-44.000000
[   16] 1400    0    0 1400 1400 7000 7000 9401 9401    0    0 9401 9401 7004 7004 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-41.000000
[   17] 1400    0    0 1400 1400 7000 7000 9401 9401 2000 2000 1401 1401 7810 7810 1400 1400    0    0 1400 1400 5000 5000 5401  | 2^-41.000000
Probability of differential: 2^-33.520220
[./src/simon-xor-threshold-search.cc:791] Penultimate round does not match output diff:     2BF8 vs.     50005401)


vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:782] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:757] Key     F0E7     36DD     4032     C890
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():768]:
Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
Found 1 right pairs (2^-32.000000) |      100        0 | 1:(     396     9313) 2:(     397     9313)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:      106        1
[./src/simon.cc:273]  2:       11      106
[./src/simon.cc:273]  3:     1040       11
[./src/simon.cc:273]  4:      111     1040
[./src/simon.cc:273]  5:      604      111
[./src/simon.cc:273]  6:     1901      604
[./src/simon.cc:273]  7:     4000     1901
[./src/simon.cc:273]  8:     1900     4000
[./src/simon.cc:273]  9:      601     1900
[./src/simon.cc:273] 10:      100      601
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 2 right pairs (2^-31.000000) |      100        0 | 1:(     397     9313) 2:(     396     9313)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:      106        1
[./src/simon.cc:273]  2:       11      106
[./src/simon.cc:273]  3:     1040       11
[./src/simon.cc:273]  4:      111     1040
[./src/simon.cc:273]  5:      604      111
[./src/simon.cc:273]  6:     1901      604
[./src/simon.cc:273]  7:     4000     1901
[./src/simon.cc:273]  8:     1900     4000
[./src/simon.cc:273]  9:      601     1900
[./src/simon.cc:273] 10:      100      601
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 3 right pairs (2^-30.415037) |      100        0 | 1:(    344A     253F) 2:(    344B     253F)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:       19        4
[./src/simon.cc:273]  3:       50       19
[./src/simon.cc:273]  4:     11D9       50
[./src/simon.cc:273]  5:     E424     11D9
[./src/simon.cc:273]  6:     2507     E424
[./src/simon.cc:273]  7:     7010     2507
[./src/simon.cc:273]  8:     1506     7010
[./src/simon.cc:273]  9:      600     1506
[./src/simon.cc:273] 10:      100      600
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 4 right pairs (2^-30.000000) |      100        0 | 1:(    344B     253F) 2:(    344A     253F)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:       19        4
[./src/simon.cc:273]  3:       50       19
[./src/simon.cc:273]  4:     11D9       50
[./src/simon.cc:273]  5:     E424     11D9
[./src/simon.cc:273]  6:     2507     E424
[./src/simon.cc:273]  7:     7010     2507
[./src/simon.cc:273]  8:     1506     7010
[./src/simon.cc:273]  9:      600     1506
[./src/simon.cc:273] 10:      100      600
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 5 right pairs (2^-29.678072) |      100        0 | 1:(    7056     747E) 2:(    7057     747E)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:       11        4
[./src/simon.cc:273]  3:       40       11
[./src/simon.cc:273]  4:      111       40
[./src/simon.cc:273]  5:      405      111
[./src/simon.cc:273]  6:     1901      405
[./src/simon.cc:273]  7:     6001     1901
[./src/simon.cc:273]  8:     1804     6001
[./src/simon.cc:273]  9:      401     1804
[./src/simon.cc:273] 10:      100      401
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 6 right pairs (2^-29.415037) |      100        0 | 1:(    7057     747E) 2:(    7056     747E)
Found 6 right pairs (2^-29.415037) |      100        0 | 1:(    7057     747E) 2:(    7056     747E)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:       11        4
[./src/simon.cc:273]  3:       40       11
[./src/simon.cc:273]  4:      111       40
[./src/simon.cc:273]  5:      405      111
[./src/simon.cc:273]  6:     1901      405
[./src/simon.cc:273]  7:     6001     1901
[./src/simon.cc:273]  8:     1804     6001
[./src/simon.cc:273]  9:      401     1804
[./src/simon.cc:273] 10:      100      401
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 7 right pairs (2^-29.192645) |      100        0 | 1:(    7412     246A) 2:(    7413     246A)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:       11        4
[./src/simon.cc:273]  3:       40       11
[./src/simon.cc:273]  4:      111       40
[./src/simon.cc:273]  5:      405      111
[./src/simon.cc:273]  6:     1901      405
[./src/simon.cc:273]  7:     6009     1901
[./src/simon.cc:273]  8:     1004     6009
[./src/simon.cc:273]  9:      401     1004
[./src/simon.cc:273] 10:      100      401
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 8 right pairs (2^-29.000000) |      100        0 | 1:(    7413     246A) 2:(    7412     246A)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:       11        4
[./src/simon.cc:273]  3:       40       11
[./src/simon.cc:273]  4:      111       40
[./src/simon.cc:273]  5:      405      111
[./src/simon.cc:273]  6:     1901      405
[./src/simon.cc:273]  7:     6009     1901
[./src/simon.cc:273]  8:     1004     6009
[./src/simon.cc:273]  9:      401     1004
[./src/simon.cc:273] 10:      100      401
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 9 right pairs (2^-28.830075) |      100        0 | 1:(    C73E     CABA) 2:(    C73F     CABA)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        6        1
[./src/simon.cc:273]  2:       11        6
[./src/simon.cc:273]  3:       40       11
[./src/simon.cc:273]  4:      111       40
[./src/simon.cc:273]  5:      405      111
[./src/simon.cc:273]  6:     1901      405
[./src/simon.cc:273]  7:     4000     1901
[./src/simon.cc:273]  8:     1900     4000
[./src/simon.cc:273]  9:      400     1900
[./src/simon.cc:273] 10:      100      400
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 10 right pairs (2^-28.678072) |      100        0 | 1:(    C73F     CABA) 2:(    C73E     CABA)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        6        1
[./src/simon.cc:273]  2:       11        6
[./src/simon.cc:273]  3:       40       11
[./src/simon.cc:273]  4:      111       40
[./src/simon.cc:273]  5:      405      111
[./src/simon.cc:273]  6:     1901      405
[./src/simon.cc:273]  7:     4000     1901
[./src/simon.cc:273]  8:     1900     4000
[./src/simon.cc:273]  9:      400     1900
[./src/simon.cc:273] 10:      100      400
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 11 right pairs (2^-28.540568) |      100        0 | 1:(    E27A     204A) 2:(    E27B     204A)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        6        1
[./src/simon.cc:273]  2:      211        6
[./src/simon.cc:273]  3:      C42      211
[./src/simon.cc:273]  4:     7919      C42
[./src/simon.cc:273]  5:     731C     7919
[./src/simon.cc:273]  6:     8D01     731C
[./src/simon.cc:273]  7:     441D     8D01
[./src/simon.cc:273]  8:     1006     441D
[./src/simon.cc:273]  9:      601     1006
[./src/simon.cc:273] 10:      100      601
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 12 right pairs (2^-28.415037) |      100        0 | 1:(    E27B     204A) 2:(    E27A     204A)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        6        1
[./src/simon.cc:273]  2:      211        6
[./src/simon.cc:273]  3:      C42      211
[./src/simon.cc:273]  4:     7919      C42
[./src/simon.cc:273]  5:     731C     7919
[./src/simon.cc:273]  6:     8D01     731C
[./src/simon.cc:273]  7:     441D     8D01
[./src/simon.cc:273]  8:     1006     441D
[./src/simon.cc:273]  9:      601     1006
[./src/simon.cc:273] 10:      100      601
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 13 right pairs (2^-28.299560) |      100        0 | 1:(    E436     40B9) 2:(    E437     40B9)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:       11        4
[./src/simon.cc:273]  3:       40       11
[./src/simon.cc:273]  4:      111       40
[./src/simon.cc:273]  5:     1607      111
[./src/simon.cc:273]  6:     5901     1607
[./src/simon.cc:273]  7:     7000     5901
[./src/simon.cc:273]  8:     1900     7000
[./src/simon.cc:273]  9:      400     1900
[./src/simon.cc:273] 10:      100      400
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 14 right pairs (2^-28.192645) |      100        0 | 1:(    E437     40B9) 2:(    E436     40B9)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:       11        4
[./src/simon.cc:273]  3:       40       11
[./src/simon.cc:273]  4:      111       40
[./src/simon.cc:273]  5:     1607      111
[./src/simon.cc:273]  6:     5901     1607
[./src/simon.cc:273]  7:     7000     5901
[./src/simon.cc:273]  8:     1900     7000
[./src/simon.cc:273]  9:      400     1900
[./src/simon.cc:273] 10:      100      400
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
[./tests/simon-xor-threshold-search-tests.cc:712] p = 2^-28.192645
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():772]:
Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-28.192645
 [./tests/simon-xor-threshold-search-tests.cc:774] OK

real    10m34.820s
user    8m20.455s
sys     0m0.048s
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$

vpv@mazirat:~$ cd skcrypto/trunk/work/src/yaarx/
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:782] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:757] Key     DE0B     600E     45E4     28B7
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():768]:
Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
Found 1 right pairs (2^-32.000000) |      100        0 | 1:(    403A     46C1) 2:(    403B     46C1)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:       19        4
[./src/simon.cc:273]  3:       40       19
[./src/simon.cc:273]  4:      119       40
[./src/simon.cc:273]  5:      506      119
[./src/simon.cc:273]  6:     1901      506
[./src/simon.cc:273]  7:     4200     1901
[./src/simon.cc:273]  8:     1100     4200
[./src/simon.cc:273]  9:      400     1100
[./src/simon.cc:273] 10:      100      400
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 2 right pairs (2^-31.000000) |      100        0 | 1:(    403B     46C1) 2:(    403A     46C1)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:       19        4
[./src/simon.cc:273]  3:       40       19
[./src/simon.cc:273]  4:      119       40
[./src/simon.cc:273]  5:      506      119
[./src/simon.cc:273]  6:     1901      506
[./src/simon.cc:273]  7:     4200     1901
[./src/simon.cc:273]  8:     1100     4200
[./src/simon.cc:273]  9:      400     1100
[./src/simon.cc:273] 10:      100      400
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 3 right pairs (2^-30.415037) |      100        0 | 1:(    4F30     FB47) 2:(    4F31     FB47)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        6        1
[./src/simon.cc:273]  2:      211        6
[./src/simon.cc:273]  3:     1840      211
[./src/simon.cc:273]  4:      311     1840
[./src/simon.cc:273]  5:      607      311
[./src/simon.cc:273]  6:     1101      607
[./src/simon.cc:273]  7:     4010     1101
[./src/simon.cc:273]  8:     1100     4010
[./src/simon.cc:273]  9:      400     1100
[./src/simon.cc:273] 10:      100      400
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 4 right pairs (2^-30.000000) |      100        0 | 1:(    4F31     FB47) 2:(    4F30     FB47)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        6        1
[./src/simon.cc:273]  2:      211        6
[./src/simon.cc:273]  3:     1840      211
[./src/simon.cc:273]  4:      311     1840
[./src/simon.cc:273]  5:      607      311
[./src/simon.cc:273]  6:     1101      607
[./src/simon.cc:273]  7:     4010     1101
[./src/simon.cc:273]  8:     1100     4010
[./src/simon.cc:273]  9:      400     1100
[./src/simon.cc:273] 10:      100      400
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 5 right pairs (2^-29.678072) |      100        0 | 1:(    6840     4ADC) 2:(    6841     4ADC)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:       11        4
[./src/simon.cc:273]  3:     1040       11
[./src/simon.cc:273]  4:     2101     1040
[./src/simon.cc:273]  5:     9466     2101
[./src/simon.cc:273]  6:     780B     9466
[./src/simon.cc:273]  7:     5E05     780B
[./src/simon.cc:273]  8:     1C04     5E05
[./src/simon.cc:273]  9:      601     1C04
[./src/simon.cc:273] 10:      100      601
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 6 right pairs (2^-29.415037) |      100        0 | 1:(    6841     4ADC) 2:(    6840     4ADC)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:       11        4
[./src/simon.cc:273]  3:     1040       11
[./src/simon.cc:273]  4:     2101     1040
[./src/simon.cc:273]  5:     9466     2101
[./src/simon.cc:273]  6:     780B     9466
[./src/simon.cc:273]  7:     5E05     780B
[./src/simon.cc:273]  8:     1C04     5E05
[./src/simon.cc:273]  9:      601     1C04
[./src/simon.cc:273] 10:      100      601
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 7 right pairs (2^-29.192645) |      100        0 | 1:(    ADC2     3851) 2:(    ADC3     3851)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:      104        1
[./src/simon.cc:273]  2:       11      104
[./src/simon.cc:273]  3:       42       11
[./src/simon.cc:273]  4:      119       42
[./src/simon.cc:273]  5:      405      119
[./src/simon.cc:273]  6:     1801      405
[./src/simon.cc:273]  7:     4411     1801
[./src/simon.cc:273]  8:     1000     4411
[./src/simon.cc:273]  9:      401     1000
[./src/simon.cc:273] 10:      100      401
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 8 right pairs (2^-29.000000) |      100        0 | 1:(    ADC3     3851) 2:(    ADC2     3851)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:      104        1
[./src/simon.cc:273]  2:       11      104
[./src/simon.cc:273]  3:       42       11
[./src/simon.cc:273]  4:      119       42
[./src/simon.cc:273]  5:      405      119
[./src/simon.cc:273]  6:     1801      405
[./src/simon.cc:273]  7:     4411     1801
[./src/simon.cc:273]  8:     1000     4411
[./src/simon.cc:273]  9:      401     1000
[./src/simon.cc:273] 10:      100      401
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 9 right pairs (2^-28.830075) |      100        0 | 1:(    E150     E5B9) 2:(    E151     E5B9)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:      419        4
[./src/simon.cc:273]  3:      140      419
[./src/simon.cc:273]  4:      119      140
[./src/simon.cc:273]  5:      604      119
[./src/simon.cc:273]  6:     1905      604
[./src/simon.cc:273]  7:     4201     1905
[./src/simon.cc:273]  8:     1100     4201
[./src/simon.cc:273]  9:      400     1100
[./src/simon.cc:273] 10:      100      400
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 10 right pairs (2^-28.678072) |      100        0 | 1:(    E151     E5B9) 2:(    E150     E5B9)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:        4        1
[./src/simon.cc:273]  2:      419        4
[./src/simon.cc:273]  3:      140      419
[./src/simon.cc:273]  4:      119      140
[./src/simon.cc:273]  5:      604      119
[./src/simon.cc:273]  6:     1905      604
[./src/simon.cc:273]  7:     4201     1905
[./src/simon.cc:273]  8:     1100     4201
[./src/simon.cc:273]  9:      400     1100
[./src/simon.cc:273] 10:      100      400
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 11 right pairs (2^-28.540568) |      100        0 | 1:(    E4EC     9E30) 2:(    E4ED     9E30)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:      104        1
[./src/simon.cc:273]  2:       19      104
[./src/simon.cc:273]  3:     1040       19
[./src/simon.cc:273]  4:      119     1040
[./src/simon.cc:273]  5:      406      119
[./src/simon.cc:273]  6:     1901      406
[./src/simon.cc:273]  7:     4200     1901
[./src/simon.cc:273]  8:     1100     4200
[./src/simon.cc:273]  9:      400     1100
[./src/simon.cc:273] 10:      100      400
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
Found 12 right pairs (2^-28.415037) |      100        0 | 1:(    E4ED     9E30) 2:(    E4EC     9E30)
[./src/simon.cc:273]  0:        1        0
[./src/simon.cc:273]  1:      104        1
[./src/simon.cc:273]  2:       19      104
[./src/simon.cc:273]  3:     1040       19
[./src/simon.cc:273]  4:      119     1040
[./src/simon.cc:273]  5:      406      119
[./src/simon.cc:273]  6:     1901      406
[./src/simon.cc:273]  7:     4200     1901
[./src/simon.cc:273]  8:     1100     4200
[./src/simon.cc:273]  9:      400     1100
[./src/simon.cc:273] 10:      100      400
[./src/simon.cc:273] 11:        0      100
[./src/simon.cc:287] 12:      100        0
[./tests/simon-xor-threshold-search-tests.cc:712] p = 2^-28.415037
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():772]:
Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-28.415037
 [./tests/simon-xor-threshold-search-tests.cc:774] OK

r
 */


/* --- */

/* 
Ours:

vpv@igor:~/skcrypto/trunk/work/src/yaarx$ ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:771] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:746] Key     9971     49D3     D8A6     D8B8
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():757]:
 Verify 12 R differential (     400     1900) -> (    1500      500) | 2^32.00 CP pairs
 Found 1 right pairs (2^-32.000000) |     1500      500 | 1:(    1888     A301) 2:(    1C88     BA01)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      600      100
 [./src/simon.cc:273]  5:     1104      600
 [./src/simon.cc:273]  6:     4001     1104
 [./src/simon.cc:273]  7:     1143     4001
 [./src/simon.cc:273]  8:      71D     1143
 [./src/simon.cc:273]  9:        6      71D
 [./src/simon.cc:273] 10:      101        6
 [./src/simon.cc:273] 11:      500      101
 [./src/simon.cc:287] 12:     1500      500
 Found 2 right pairs (2^-31.000000) |     1500      500 | 1:(    1C88     BA01) 2:(    1888     A301)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      600      100
 [./src/simon.cc:273]  5:     1104      600
 [./src/simon.cc:273]  6:     4001     1104
 [./src/simon.cc:273]  7:     1143     4001
 [./src/simon.cc:273]  8:      71D     1143
 [./src/simon.cc:273]  9:        6      71D
 [./src/simon.cc:273] 10:      101        6
 [./src/simon.cc:273] 11:      500      101
 [./src/simon.cc:287] 12:     1500      500
 Found 3 right pairs (2^-30.415037) |     1500      500 | 1:(    29A9     2502) 2:(    2DA9     3C02)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:      200      100
 [./src/simon.cc:273]  3:      D00      200
 [./src/simon.cc:273]  4:     2E01      D00
 [./src/simon.cc:273]  5:     E526     2E01
 [./src/simon.cc:273]  6:     5C3F     E526
 [./src/simon.cc:273]  7:     22D5     5C3F
 [./src/simon.cc:273]  8:     C661     22D5
 [./src/simon.cc:273]  9:     3B11     C661
 [./src/simon.cc:273] 10:      B04     3B11
 [./src/simon.cc:273] 11:      500      B04
 [./src/simon.cc:287] 12:     1500      500
 Found 4 right pairs (2^-30.000000) |     1500      500 | 1:(    2DA9     3C02) 2:(    29A9     2502)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:      200      100
 [./src/simon.cc:273]  3:      D00      200
 [./src/simon.cc:273]  4:     2E01      D00
 [./src/simon.cc:273]  5:     E526     2E01
 [./src/simon.cc:273]  6:     5C3F     E526
 [./src/simon.cc:273]  7:     22D5     5C3F
 [./src/simon.cc:273]  8:     C661     22D5
 [./src/simon.cc:273]  9:     3B11     C661
 [./src/simon.cc:273] 10:      B04     3B11
 [./src/simon.cc:273] 11:      500      B04
 [./src/simon.cc:287] 12:     1500      500
 Found 5 right pairs (2^-29.678072) |     1500      500 | 1:(    5929     F585) 2:(    5D29     EC85)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      400      100
 [./src/simon.cc:273]  5:     1100      400
 [./src/simon.cc:273]  6:     4200     1100
 [./src/simon.cc:273]  7:     1D01     4200
 [./src/simon.cc:273]  8:      500     1D01
 [./src/simon.cc:273]  9:      100      500
 [./src/simon.cc:273] 10:      100      100
 [./src/simon.cc:273] 11:      500      100
 [./src/simon.cc:287] 12:     1500      500
 Found 6 right pairs (2^-29.415037) |     1500      500 | 1:(    5D29     EC85) 2:(    5929     F585)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      400      100
 [./src/simon.cc:273]  5:     1100      400
 [./src/simon.cc:273]  6:     4200     1100
 [./src/simon.cc:273]  7:     1D01     4200
 [./src/simon.cc:273]  8:      500     1D01
 [./src/simon.cc:273]  9:      100      500
 [./src/simon.cc:273] 10:      100      100
 [./src/simon.cc:273] 11:      500      100
 [./src/simon.cc:287] 12:     1500      500
 Found 7 right pairs (2^-29.192645) |     1500      500 | 1:(    7368      350) 2:(    7768     1A50)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      400      100
 [./src/simon.cc:273]  5:     1100      400
 [./src/simon.cc:273]  6:     4200     1100
 [./src/simon.cc:273]  7:     1D01     4200
 [./src/simon.cc:273]  8:      500     1D01
 [./src/simon.cc:273]  9:      100      500
 [./src/simon.cc:273] 10:      100      100
 [./src/simon.cc:273] 11:      500      100
 [./src/simon.cc:287] 12:     1500      500
 Found 8 right pairs (2^-29.000000) |     1500      500 | 1:(    7768     1A50) 2:(    7368      350)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      400      100
 [./src/simon.cc:273]  5:     1100      400
 [./src/simon.cc:273]  6:     4200     1100
 [./src/simon.cc:273]  7:     1D01     4200
 [./src/simon.cc:273]  8:      500     1D01
 [./src/simon.cc:273]  9:      100      500
 [./src/simon.cc:273] 10:      100      100
 [./src/simon.cc:273] 11:      500      100
 [./src/simon.cc:287] 12:     1500      500
 Found 9 right pairs (2^-28.830075) |     1500      500 | 1:(    8A4D     C289) 2:(    8E4D     DB89)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      400      100
 [./src/simon.cc:273]  5:     1104      400
 [./src/simon.cc:273]  6:     4211     1104
 [./src/simon.cc:273]  7:     1941     4211
 [./src/simon.cc:273]  8:      61D     1941
 [./src/simon.cc:273]  9:      105      61D
 [./src/simon.cc:273] 10:      101      105
 [./src/simon.cc:273] 11:      500      101
 [./src/simon.cc:287] 12:     1500      500
 Found 10 right pairs (2^-28.678072) |     1500      500 | 1:(    8E4D     DB89) 2:(    8A4D     C289)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      400      100
 [./src/simon.cc:273]  5:     1104      400
 [./src/simon.cc:273]  6:     4211     1104
 [./src/simon.cc:273]  7:     1941     4211
 [./src/simon.cc:273]  8:      61D     1941
 [./src/simon.cc:273]  9:      105      61D
 [./src/simon.cc:273] 10:      101      105
 [./src/simon.cc:273] 11:      500      101
 [./src/simon.cc:287] 12:     1500      500
 Found 11 right pairs (2^-28.540568) |     1500      500 | 1:(    A929     2477) 2:(    AD29     3D77)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      400      100
 [./src/simon.cc:273]  5:     1104      400
 [./src/simon.cc:273]  6:     4201     1104
 [./src/simon.cc:273]  7:     1941     4201
 [./src/simon.cc:273]  8:      61D     1941
 [./src/simon.cc:273]  9:      105      61D
 [./src/simon.cc:273] 10:      101      105
 [./src/simon.cc:273] 11:      500      101
 [./src/simon.cc:287] 12:     1500      500
 Found 12 right pairs (2^-28.415037) |     1500      500 | 1:(    AD29     3D77) 2:(    A929     2477)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      400      100
 [./src/simon.cc:273]  5:     1104      400
 [./src/simon.cc:273]  6:     4201     1104
 [./src/simon.cc:273]  7:     1941     4201
 [./src/simon.cc:273]  8:      61D     1941
 [./src/simon.cc:273]  9:      105      61D
 [./src/simon.cc:273] 10:      101      105
 [./src/simon.cc:273] 11:      500      101
 [./src/simon.cc:287] 12:     1500      500
 Found 13 right pairs (2^-28.299560) |     1500      500 | 1:(    D87D     81F7) 2:(    DC7D     98F7)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      400      100
 [./src/simon.cc:273]  5:     1104      400
 [./src/simon.cc:273]  6:     4201     1104
 [./src/simon.cc:273]  7:     1941     4201
 [./src/simon.cc:273]  8:      61D     1941
 [./src/simon.cc:273]  9:      105      61D
 [./src/simon.cc:273] 10:      101      105
 [./src/simon.cc:273] 11:      500      101
 [./src/simon.cc:287] 12:     1500      500
 Found 14 right pairs (2^-28.192645) |     1500      500 | 1:(    DC7D     98F7) 2:(    D87D     81F7)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      400      100
 [./src/simon.cc:273]  5:     1104      400
 [./src/simon.cc:273]  6:     4201     1104
 [./src/simon.cc:273]  7:     1941     4201
 [./src/simon.cc:273]  8:      61D     1941
 [./src/simon.cc:273]  9:      105      61D
 [./src/simon.cc:273] 10:      101      105
 [./src/simon.cc:273] 11:      500      101
 [./src/simon.cc:287] 12:     1500      500
 Found 15 right pairs (2^-28.093109) |     1500      500 | 1:(    FA28     633F) 2:(    FE28     7A3F)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      601      100
 [./src/simon.cc:273]  5:     1502      601
 [./src/simon.cc:273]  6:     7A18     1502
 [./src/simon.cc:273]  7:     1D01     7A18
 [./src/simon.cc:273]  8:      500     1D01
 [./src/simon.cc:273]  9:      100      500
 [./src/simon.cc:273] 10:      100      100
 [./src/simon.cc:273] 11:      500      100
 [./src/simon.cc:287] 12:     1500      500
 Found 16 right pairs (2^-28.000000) |     1500      500 | 1:(    FE28     7A3F) 2:(    FA28     633F)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      601      100
 [./src/simon.cc:273]  5:     1502      601
 [./src/simon.cc:273]  6:     7A18     1502
 [./src/simon.cc:273]  7:     1D01     7A18
 [./src/simon.cc:273]  8:      500     1D01
 [./src/simon.cc:273]  9:      100      500
 [./src/simon.cc:273] 10:      100      100
 [./src/simon.cc:273] 11:      500      100
 [./src/simon.cc:287] 12:     1500      500
 [./tests/simon-xor-threshold-search-tests.cc:712] p = 2^-28.000000
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():761]:
Verified 12 R differential (     400     1900) -> (    1500      500) | 2^32.00 CP pairs
 Final probability p = 2^-28.000000
 [./tests/simon-xor-threshold-search-tests.cc:763] OK

 vpv@igor:~/skcrypto/trunk/work/src/yaarx$ ./bin/simon-xor-threshold-search-tests
 #--- [./tests/simon-xor-threshold-search-tests.cc:771] Tests, WORD_SIZE  = 16, MASK =     FFFF
 [./tests/simon-xor-threshold-search-tests.cc:746] Key     822D     B743     F460     D0D1
 [./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():757]:
 Verify 12 R differential (     400     1900) -> (    1500      500) | 2^32.00 CP pairs
 Found 1 right pairs (2^-32.000000) |     1500      500 | 1:(    5979     D9E5) 2:(    5D79     C0E5)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      600      100
 [./src/simon.cc:273]  5:     1100      600
 [./src/simon.cc:273]  6:     4200     1100
 [./src/simon.cc:273]  7:     1D01     4200
 [./src/simon.cc:273]  8:      500     1D01
 [./src/simon.cc:273]  9:      100      500
 [./src/simon.cc:273] 10:      100      100
 [./src/simon.cc:273] 11:      500      100
 [./src/simon.cc:287] 12:     1500      500
 Found 2 right pairs (2^-31.000000) |     1500      500 | 1:(    5D79     C0E5) 2:(    5979     D9E5)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      600      100
 [./src/simon.cc:273]  5:     1100      600
 [./src/simon.cc:273]  6:     4200     1100
 [./src/simon.cc:273]  7:     1D01     4200
 [./src/simon.cc:273]  8:      500     1D01
 [./src/simon.cc:273]  9:      100      500
 [./src/simon.cc:273] 10:      100      100
 [./src/simon.cc:273] 11:      500      100
 [./src/simon.cc:287] 12:     1500      500
 Found 3 right pairs (2^-30.415037) |     1500      500 | 1:(    9151     6D2A) 2:(    9551     742A)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      900      400
 [./src/simon.cc:273]  2:     2001      900
 [./src/simon.cc:273]  3:     8824     2001
 [./src/simon.cc:273]  4:     2053     8824
 [./src/simon.cc:273]  5:     594E     2053
 [./src/simon.cc:273]  6:     B9BF     594E
 [./src/simon.cc:273]  7:     8097     B9BF
 [./src/simon.cc:273]  8:     AE4F     8097
 [./src/simon.cc:273]  9:     2D1B     AE4F
 [./src/simon.cc:273] 10:      B04     2D1B
 [./src/simon.cc:273] 11:      500      B04
 [./src/simon.cc:287] 12:     1500      500
 Found 4 right pairs (2^-30.000000) |     1500      500 | 1:(    9551     742A) 2:(    9151     6D2A)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      900      400
 [./src/simon.cc:273]  2:     2001      900
 [./src/simon.cc:273]  3:     8824     2001
 [./src/simon.cc:273]  4:     2053     8824
 [./src/simon.cc:273]  5:     594E     2053
 [./src/simon.cc:273]  6:     B9BF     594E
 [./src/simon.cc:273]  7:     8097     B9BF
 [./src/simon.cc:273]  8:     AE4F     8097
 [./src/simon.cc:273]  9:     2D1B     AE4F
 [./src/simon.cc:273] 10:      B04     2D1B
 [./src/simon.cc:273] 11:      500      B04
 [./src/simon.cc:287] 12:     1500      500
 Found 5 right pairs (2^-29.678072) |     1500      500 | 1:(    A08D     1E66) 2:(    A48D      766)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      401      100
 [./src/simon.cc:273]  5:     1100      401
 [./src/simon.cc:273]  6:     6200     1100
 [./src/simon.cc:273]  7:     1D03     6200
 [./src/simon.cc:273]  8:      700     1D03
 [./src/simon.cc:273]  9:      100      700
 [./src/simon.cc:273] 10:      100      100
 [./src/simon.cc:273] 11:      500      100
 [./src/simon.cc:287] 12:     1500      500
 Found 6 right pairs (2^-29.415037) |     1500      500 | 1:(    A48D      766) 2:(    A08D     1E66)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      401      100
 [./src/simon.cc:273]  5:     1100      401
 [./src/simon.cc:273]  6:     6200     1100
 [./src/simon.cc:273]  7:     1D03     6200
 [./src/simon.cc:273]  8:      700     1D03
 [./src/simon.cc:273]  9:      100      700
 [./src/simon.cc:273] 10:      100      100
 [./src/simon.cc:273] 11:      500      100
 [./src/simon.cc:287] 12:     1500      500
 [./tests/simon-xor-threshold-search-tests.cc:712] p = 2^-29.415037
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():761]:
Verified 12 R differential (     400     1900) -> (    1500      500) | 2^32.00 CP pairs
 Final probability p = 2^-29.415037
 [./tests/simon-xor-threshold-search-tests.cc:763] OK


 */

/* 
DTU:

vpv@igor:~/skcrypto/trunk/work/src/yaarx$ ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:771] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:746] Key     6E5C     FDEC     4223     A065
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():757]:
 Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Found 1 right pairs (2^-32.000000) |      100        0 | 1:(       8     A3B1) 2:(       9     A3B1)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       19        4
 [./src/simon.cc:273]  3:       62       19
 [./src/simon.cc:273]  4:      111       62
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1005      407
 [./src/simon.cc:273]  7:     6001     1005
 [./src/simon.cc:273]  8:     1100     6001
 [./src/simon.cc:273]  9:      600     1100
 [./src/simon.cc:273] 10:      100      600
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 2 right pairs (2^-31.000000) |      100        0 | 1:(       9     A3B1) 2:(       8     A3B1)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       19        4
 [./src/simon.cc:273]  3:       62       19
 [./src/simon.cc:273]  4:      111       62
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1005      407
 [./src/simon.cc:273]  7:     6001     1005
 [./src/simon.cc:273]  8:     1100     6001
 [./src/simon.cc:273]  9:      600     1100
 [./src/simon.cc:273] 10:      100      600
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 3 right pairs (2^-30.415037) |      100        0 | 1:(    8D30     8241) 2:(    8D31     8241)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       19        4
 [./src/simon.cc:273]  3:       62       19
 [./src/simon.cc:273]  4:      111       62
 [./src/simon.cc:273]  5:      404      111
 [./src/simon.cc:273]  6:     1101      404
 [./src/simon.cc:273]  7:     6000     1101
 [./src/simon.cc:273]  8:     1100     6000
 [./src/simon.cc:273]  9:      601     1100
 [./src/simon.cc:273] 10:      100      601
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 4 right pairs (2^-30.000000) |      100        0 | 1:(    8D31     8241) 2:(    8D30     8241)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       19        4
 [./src/simon.cc:273]  3:       62       19
 [./src/simon.cc:273]  4:      111       62
 [./src/simon.cc:273]  5:      404      111
 [./src/simon.cc:273]  6:     1101      404
 [./src/simon.cc:273]  7:     6000     1101
 [./src/simon.cc:273]  8:     1100     6000
 [./src/simon.cc:273]  9:      601     1100
 [./src/simon.cc:273] 10:      100      601
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 5 right pairs (2^-29.678072) |      100        0 | 1:(    DB0E     7C02) 2:(    DB0F     7C02)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        6        1
 [./src/simon.cc:273]  2:       11        6
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      404      111
 [./src/simon.cc:273]  6:     1105      404
 [./src/simon.cc:273]  7:     4001     1105
 [./src/simon.cc:273]  8:     1100     4001
 [./src/simon.cc:273]  9:      600     1100
 [./src/simon.cc:273] 10:      100      600
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 6 right pairs (2^-29.415037) |      100        0 | 1:(    DB0F     7C02) 2:(    DB0E     7C02)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        6        1
 [./src/simon.cc:273]  2:       11        6
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      404      111
 [./src/simon.cc:273]  6:     1105      404
 [./src/simon.cc:273]  7:     4001     1105
 [./src/simon.cc:273]  8:     1100     4001
 [./src/simon.cc:273]  9:      600     1100
 [./src/simon.cc:273] 10:      100      600
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0

OURS:

vpv@igor:~/skcrypto/trunk/work/src/yaarx$ ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:771] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:746] Key     F8DE     F237     C103     133C
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():757]:
 Verify 12 R differential (     400     1900) -> (    1500      500) | 2^32.00 CP pairs
 Found 1 right pairs (2^-32.000000) |     1500      500 | 1:(    C29C     4EB0) 2:(    C69C     57B0)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      400      100
 [./src/simon.cc:273]  5:     1900      400
 [./src/simon.cc:273]  6:     7200     1900
 [./src/simon.cc:273]  7:     1501     7200
 [./src/simon.cc:273]  8:      500     1501
 [./src/simon.cc:273]  9:      100      500
 [./src/simon.cc:273] 10:      100      100
 [./src/simon.cc:273] 11:      500      100
 [./src/simon.cc:287] 12:     1500      500
 Found 2 right pairs (2^-31.000000) |     1500      500 | 1:(    C69C     57B0) 2:(    C29C     4EB0)
 [./src/simon.cc:273]  0:      400     1900
 [./src/simon.cc:273]  1:      100      400
 [./src/simon.cc:273]  2:        0      100
 [./src/simon.cc:273]  3:      100        0
 [./src/simon.cc:273]  4:      400      100
 [./src/simon.cc:273]  5:     1900      400
 [./src/simon.cc:273]  6:     7200     1900
 [./src/simon.cc:273]  7:     1501     7200
 [./src/simon.cc:273]  8:      500     1501
 [./src/simon.cc:273]  9:      100      500
 [./src/simon.cc:273] 10:      100      100
 [./src/simon.cc:273] 11:      500      100
 [./src/simon.cc:287] 12:     1500      500


 vpv@igor:~/skcrypto/trunk/work/src/yaarx$ ./bin/simon-xor-threshold-search-tests
 #--- [./tests/simon-xor-threshold-search-tests.cc:771] Tests, WORD_SIZE  = 16, MASK =     FFFF
 [./tests/simon-xor-threshold-search-tests.cc:746] Key     F65B      A8E     452E     1D02
 [./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():757]:
 Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Found 1 right pairs (2^-32.000000) |      100        0 | 1:(    2428     3C42) 2:(    2429     3C42)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:      411        4
 [./src/simon.cc:273]  3:     1040      411
 [./src/simon.cc:273]  4:     4511     1040
 [./src/simon.cc:273]  5:      406     4511
 [./src/simon.cc:273]  6:     5901      406
 [./src/simon.cc:273]  7:     6200     5901
 [./src/simon.cc:273]  8:     1100     6200
 [./src/simon.cc:273]  9:      601     1100
 [./src/simon.cc:273] 10:      100      601
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 2 right pairs (2^-31.000000) |      100        0 | 1:(    2429     3C42) 2:(    2428     3C42)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:      411        4
 [./src/simon.cc:273]  3:     1040      411
 [./src/simon.cc:273]  4:     4511     1040
 [./src/simon.cc:273]  5:      406     4511
 [./src/simon.cc:273]  6:     5901      406
 [./src/simon.cc:273]  7:     6200     5901
 [./src/simon.cc:273]  8:     1100     6200
 [./src/simon.cc:273]  9:      601     1100
 [./src/simon.cc:273] 10:      100      601
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 3 right pairs (2^-30.415037) |      100        0 | 1:(    3C6A     3933) 2:(    3C6B     3933)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:      411        4
 [./src/simon.cc:273]  3:      160      411
 [./src/simon.cc:273]  4:     4191      160
 [./src/simon.cc:273]  5:      445     4191
 [./src/simon.cc:273]  6:     5101      445
 [./src/simon.cc:273]  7:     6000     5101
 [./src/simon.cc:273]  8:     1100     6000
 [./src/simon.cc:273]  9:      401     1100
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 4 right pairs (2^-30.000000) |      100        0 | 1:(    3C6B     3933) 2:(    3C6A     3933)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:      411        4
 [./src/simon.cc:273]  3:      160      411
 [./src/simon.cc:273]  4:     4191      160
 [./src/simon.cc:273]  5:      445     4191
 [./src/simon.cc:273]  6:     5101      445
 [./src/simon.cc:273]  7:     6000     5101
 [./src/simon.cc:273]  8:     1100     6000
 [./src/simon.cc:273]  9:      401     1100
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 5 right pairs (2^-29.678072) |      100        0 | 1:(    7C28     1F16) 2:(    7C29     1F16)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       40       11
 [./src/simon.cc:273]  4:      111       40
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      600     1100
 [./src/simon.cc:273] 10:      100      600
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 6 right pairs (2^-29.415037) |      100        0 | 1:(    7C29     1F16) 2:(    7C28     1F16)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       40       11
 [./src/simon.cc:273]  4:      111       40
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      600     1100
 [./src/simon.cc:273] 10:      100      600
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 7 right pairs (2^-29.192645) |      100        0 | 1:(    A500     5366) 2:(    A501     5366)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:     1040       11
 [./src/simon.cc:273]  4:     4111     1040
 [./src/simon.cc:273]  5:      605     4111
 [./src/simon.cc:273]  6:     5901      605
 [./src/simon.cc:273]  7:     6009     5901
 [./src/simon.cc:273]  8:     1006     6009
 [./src/simon.cc:273]  9:      401     1006
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 8 right pairs (2^-29.000000) |      100        0 | 1:(    A501     5366) 2:(    A500     5366)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:     1040       11
 [./src/simon.cc:273]  4:     4111     1040
 [./src/simon.cc:273]  5:      605     4111
 [./src/simon.cc:273]  6:     5901      605
 [./src/simon.cc:273]  7:     6009     5901
 [./src/simon.cc:273]  8:     1006     6009
 [./src/simon.cc:273]  9:      401     1006
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 9 right pairs (2^-28.830075) |      100        0 | 1:(    AA44     F88F) 2:(    AA45     F88F)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        6        1
 [./src/simon.cc:273]  2:      619        6
 [./src/simon.cc:273]  3:       42      619
 [./src/simon.cc:273]  4:      511       42
 [./src/simon.cc:273]  5:      407      511
 [./src/simon.cc:273]  6:     1007      407
 [./src/simon.cc:273]  7:     6001     1007
 [./src/simon.cc:273]  8:     1100     6001
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 10 right pairs (2^-28.678072) |      100        0 | 1:(    AA45     F88F) 2:(    AA44     F88F)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        6        1
 [./src/simon.cc:273]  2:      619        6
 [./src/simon.cc:273]  3:       42      619
 [./src/simon.cc:273]  4:      511       42
 [./src/simon.cc:273]  5:      407      511
 [./src/simon.cc:273]  6:     1007      407
 [./src/simon.cc:273]  7:     6001     1007
 [./src/simon.cc:273]  8:     1100     6001
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 11 right pairs (2^-28.540568) |      100        0 | 1:(    B510     1306) 2:(    B511     1306)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:     1040       11
 [./src/simon.cc:273]  4:     4111     1040
 [./src/simon.cc:273]  5:      605     4111
 [./src/simon.cc:273]  6:     5901      605
 [./src/simon.cc:273]  7:     6009     5901
 [./src/simon.cc:273]  8:     1006     6009
 [./src/simon.cc:273]  9:      401     1006
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 12 right pairs (2^-28.415037) |      100        0 | 1:(    B511     1306) 2:(    B510     1306)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:     1040       11
 [./src/simon.cc:273]  4:     4111     1040
 [./src/simon.cc:273]  5:      605     4111
 [./src/simon.cc:273]  6:     5901      605
 [./src/simon.cc:273]  7:     6009     5901
 [./src/simon.cc:273]  8:     1006     6009
 [./src/simon.cc:273]  9:      401     1006
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 13 right pairs (2^-28.299560) |      100        0 | 1:(    B84C     4070) 2:(    B84D     4070)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:      411        4
 [./src/simon.cc:273]  3:      160      411
 [./src/simon.cc:273]  4:     4191      160
 [./src/simon.cc:273]  5:      445     4191
 [./src/simon.cc:273]  6:     5101      445
 [./src/simon.cc:273]  7:     6000     5101
 [./src/simon.cc:273]  8:     1100     6000
 [./src/simon.cc:273]  9:      401     1100
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 14 right pairs (2^-28.192645) |      100        0 | 1:(    B84D     4070) 2:(    B84C     4070)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:      411        4
 [./src/simon.cc:273]  3:      160      411
 [./src/simon.cc:273]  4:     4191      160
 [./src/simon.cc:273]  5:      445     4191
 [./src/simon.cc:273]  6:     5101      445
 [./src/simon.cc:273]  7:     6000     5101
 [./src/simon.cc:273]  8:     1100     6000
 [./src/simon.cc:273]  9:      401     1100
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 15 right pairs (2^-28.093109) |      100        0 | 1:(    C5BA     7901) 2:(    C5BB     7901)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:      104        1
 [./src/simon.cc:273]  2:       11      104
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 16 right pairs (2^-28.000000) |      100        0 | 1:(    C5BB     7901) 2:(    C5BA     7901)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:      104        1
 [./src/simon.cc:273]  2:       11      104
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 17 right pairs (2^-27.912537) |      100        0 | 1:(    C82A     C704) 2:(    C82B     C704)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 18 right pairs (2^-27.830075) |      100        0 | 1:(    C82B     C704) 2:(    C82A     C704)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 19 right pairs (2^-27.752072) |      100        0 | 1:(    CC06     84BE) 2:(    CC07     84BE)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:      411        4
 [./src/simon.cc:273]  3:       40      411
 [./src/simon.cc:273]  4:     4511       40
 [./src/simon.cc:273]  5:      406     4511
 [./src/simon.cc:273]  6:     5901      406
 [./src/simon.cc:273]  7:     6200     5901
 [./src/simon.cc:273]  8:     1100     6200
 [./src/simon.cc:273]  9:      600     1100
 [./src/simon.cc:273] 10:      100      600
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 20 right pairs (2^-27.678072) |      100        0 | 1:(    CC07     84BE) 2:(    CC06     84BE)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:      411        4
 [./src/simon.cc:273]  3:       40      411
 [./src/simon.cc:273]  4:     4511       40
 [./src/simon.cc:273]  5:      406     4511
 [./src/simon.cc:273]  6:     5901      406
 [./src/simon.cc:273]  7:     6200     5901
 [./src/simon.cc:273]  8:     1100     6200
 [./src/simon.cc:273]  9:      600     1100
 [./src/simon.cc:273] 10:      100      600
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 21 right pairs (2^-27.607683) |      100        0 | 1:(    D5AC     1F4C) 2:(    D5AD     1F4C)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:      104        1
 [./src/simon.cc:273]  2:       11      104
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 22 right pairs (2^-27.540568) |      100        0 | 1:(    D5AD     1F4C) 2:(    D5AC     1F4C)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:      104        1
 [./src/simon.cc:273]  2:       11      104
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 23 right pairs (2^-27.476438) |      100        0 | 1:(    D83E     B34D) 2:(    D83F     B34D)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 24 right pairs (2^-27.415037) |      100        0 | 1:(    D83F     B34D) 2:(    D83E     B34D)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0

 Found 25 right pairs (2^-27.356144) |      100        0 | 1:(    DD2A     4D74) 2:(    DD2B     4D74)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       40       11
 [./src/simon.cc:273]  4:     4191       40
 [./src/simon.cc:273]  5:      445     4191
 [./src/simon.cc:273]  6:     5101      445
 [./src/simon.cc:273]  7:     6000     5101
 [./src/simon.cc:273]  8:     1100     6000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 26 right pairs (2^-27.299560) |      100        0 | 1:(    DD2B     4D74) 2:(    DD2A     4D74)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       40       11
 [./src/simon.cc:273]  4:     4191       40
 [./src/simon.cc:273]  5:      445     4191
 [./src/simon.cc:273]  6:     5101      445
 [./src/simon.cc:273]  7:     6000     5101
 [./src/simon.cc:273]  8:     1100     6000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 27 right pairs (2^-27.245112) |      100        0 | 1:(    E050     46C2) 2:(    E051     46C2)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:     1040       11
 [./src/simon.cc:273]  4:     4111     1040
 [./src/simon.cc:273]  5:      605     4111
 [./src/simon.cc:273]  6:     5901      605
 [./src/simon.cc:273]  7:     6009     5901
 [./src/simon.cc:273]  8:     1006     6009
 [./src/simon.cc:273]  9:      401     1006
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 28 right pairs (2^-27.192645) |      100        0 | 1:(    E051     46C2) 2:(    E050     46C2)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:     1040       11
 [./src/simon.cc:273]  4:     4111     1040
 [./src/simon.cc:273]  5:      605     4111
 [./src/simon.cc:273]  6:     5901      605
 [./src/simon.cc:273]  7:     6009     5901
 [./src/simon.cc:273]  8:     1006     6009
 [./src/simon.cc:273]  9:      401     1006
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 29 right pairs (2^-27.142019) |      100        0 | 1:(    E76E     2FDD) 2:(    E76F     2FDD)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        6        1
 [./src/simon.cc:273]  2:      411        6
 [./src/simon.cc:273]  3:     1040      411
 [./src/simon.cc:273]  4:     4511     1040
 [./src/simon.cc:273]  5:      404     4511
 [./src/simon.cc:273]  6:     5901      404
 [./src/simon.cc:273]  7:     6210     5901
 [./src/simon.cc:273]  8:     1100     6210
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 30 right pairs (2^-27.093109) |      100        0 | 1:(    E76F     2FDD) 2:(    E76E     2FDD)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        6        1
 [./src/simon.cc:273]  2:      411        6
 [./src/simon.cc:273]  3:     1040      411
 [./src/simon.cc:273]  4:     4511     1040
 [./src/simon.cc:273]  5:      404     4511
 [./src/simon.cc:273]  6:     5901      404
 [./src/simon.cc:273]  7:     6210     5901
 [./src/simon.cc:273]  8:     1100     6210
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 31 right pairs (2^-27.045804) |      100        0 | 1:(    ED7E     18A9) 2:(    ED7F     18A9)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 32 right pairs (2^-27.000000) |      100        0 | 1:(    ED7F     18A9) 2:(    ED7E     18A9)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 33 right pairs (2^-26.955606) |      100        0 | 1:(    F040      6A2) 2:(    F041      6A2)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:     1040       11
 [./src/simon.cc:273]  4:     4111     1040
 [./src/simon.cc:273]  5:      605     4111
 [./src/simon.cc:273]  6:     5901      605
 [./src/simon.cc:273]  7:     6009     5901
 [./src/simon.cc:273]  8:     1006     6009
 [./src/simon.cc:273]  9:      401     1006
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 34 right pairs (2^-26.912537) |      100        0 | 1:(    F041      6A2) 2:(    F040      6A2)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:     1040       11
 [./src/simon.cc:273]  4:     4111     1040
 [./src/simon.cc:273]  5:      605     4111
 [./src/simon.cc:273]  6:     5901      605
 [./src/simon.cc:273]  7:     6009     5901
 [./src/simon.cc:273]  8:     1006     6009
 [./src/simon.cc:273]  9:      401     1006
 [./src/simon.cc:273] 10:      100      401
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 35 right pairs (2^-26.870717) |      100        0 | 1:(    F13A     126D) 2:(    F13B     126D)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:      411        4
 [./src/simon.cc:273]  3:     1060      411
 [./src/simon.cc:273]  4:     4511     1060
 [./src/simon.cc:273]  5:      406     4511
 [./src/simon.cc:273]  6:     5901      406
 [./src/simon.cc:273]  7:     6200     5901
 [./src/simon.cc:273]  8:     1100     6200
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 36 right pairs (2^-26.830075) |      100        0 | 1:(    F13B     126D) 2:(    F13A     126D)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:      411        4
 [./src/simon.cc:273]  3:     1060      411
 [./src/simon.cc:273]  4:     4511     1060
 [./src/simon.cc:273]  5:      406     4511
 [./src/simon.cc:273]  6:     5901      406
 [./src/simon.cc:273]  7:     6200     5901
 [./src/simon.cc:273]  8:     1100     6200
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 37 right pairs (2^-26.790547) |      100        0 | 1:(    FD6A     6CC0) 2:(    FD6B     6CC0)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 Found 38 right pairs (2^-26.752072) |      100        0 | 1:(    FD6B     6CC0) 2:(    FD6A     6CC0)
 [./src/simon.cc:273]  0:        1        0
 [./src/simon.cc:273]  1:        4        1
 [./src/simon.cc:273]  2:       11        4
 [./src/simon.cc:273]  3:       60       11
 [./src/simon.cc:273]  4:      111       60
 [./src/simon.cc:273]  5:      407      111
 [./src/simon.cc:273]  6:     1101      407
 [./src/simon.cc:273]  7:     4000     1101
 [./src/simon.cc:273]  8:     1100     4000
 [./src/simon.cc:273]  9:      400     1100
 [./src/simon.cc:273] 10:      100      400
 [./src/simon.cc:273] 11:        0      100
 [./src/simon.cc:287] 12:      100        0
 [./tests/simon-xor-threshold-search-tests.cc:712] p = 2^-26.752072
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():761]:
Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-26.752072
[

 */

/* 

 0:      400 ->     1800 0.250000 (2^-2.000000)
 1:      100 ->        0 0.250000 (2^-2.000000)
 2:        0 ->      100 1.000000 (2^0.000000)
 3:      100 ->      400 0.250000 (2^-2.000000)
 4:      400 ->     1100 0.250000 (2^-2.000000)
 5:     1100 ->     4200 0.062500 (2^-4.000000)
 6:     4200 ->     1D01 0.062500 (2^-4.000000)
 7:     1D01 ->      500 0.003906 (2^-8.000000)
 8:      500 ->      100 0.125000 (2^-3.000000)
 9:      100 ->      100 0.250000 (2^-2.000000)
10:      100 ->      500 0.250000 (2^-2.000000)
11:      500 ->     1500 0.125000 (2^-3.000000)
p_tot = 0.000000000058208 = 2^-34.000000, Bn = 0.000000 = 2^-34.000000
[./src/simon-xor-threshold-search.cc:1137] nrounds = 12

 0:     8808 ->     2022 0.015625 (2^-6.000000)
 1:        2 ->     8800 0.250000 (2^-2.000000)
 2:     8800 ->     2000 0.062500 (2^-4.000000)
 3:     2000 ->      800 0.250000 (2^-2.000000)
 4:      800 ->        0 0.250000 (2^-2.000000)
 5:        0 ->      800 1.000000 (2^0.000000)
 6:      800 ->     2000 0.250000 (2^-2.000000)
 7:     2000 ->     8800 0.250000 (2^-2.000000)
 8:     8800 ->        2 0.062500 (2^-4.000000)
 9:        2 ->     8808 0.250000 (2^-2.000000)
10:     8808 ->     2020 0.015625 (2^-6.000000)
11:     2020 ->      888 0.062500 (2^-4.000000)
p_tot = 0.000000000014552 = 2^-36.000000
[./tests/simon-xor-threshold-search-tests.cc:273]

 */


/* 
[    1]  400   100  100     0    0   100  100   400  400  1100  | 2^-8.000000
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():116] dy_init     1900
[./src/simon-xor-threshold-search.cc:136] Verify P for one round (2^22.000000 CPs)...
THE  0: 0.250000 (2^-2.000000)      400 ->      100
EXP  0: 0.250353 (2^-1.997965)      400 ->      100

THE  1: 0.250000 (2^-2.000000)      100 ->        0
EXP  1: 0.249724 (2^-2.001593)      100 ->        0

THE  2: 1.000000 (2^0.000000)        0 ->      100
EXP  2: 1.000000 (2^0.000000)        0 ->      100

THE  3: 0.250000 (2^-2.000000)      100 ->      400
EXP  3: 0.249679 (2^-2.001853)      100 ->      400

THE  4: 0.250000 (2^-2.000000)      400 ->     1100
EXP  4: 0.250226 (2^-1.998699)      400 ->     1100

OK
[./src/simon-xor-threshold-search.cc:264] Verify P of differentials (2^22.000000 CPs)...
Input differences:      400     1900

R# 0 Output differences:      100      400
THE  1: 0.250000 (2^-2.000000)      400 ->      100
EXP  1: 0.250234 (2^-1.998648)      400 ->      100

R# 1 Output differences:        0      100
THE  2: 0.062500 (2^-4.000000)      100 ->        0
EXP  2: 0.062526 (2^-3.999395)      100 ->        0

R# 2 Output differences:      100        0
THE  3: 0.062500 (2^-4.000000)        0 ->      100
EXP  3: 0.062568 (2^-3.998421)        0 ->      100

R# 3 Output differences:      400      100
THE  4: 0.015625 (2^-6.000000)      100 ->      400
EXP  4: 0.015654 (2^-5.997361)      100 ->      400

R# 4 Output differences:     1100      400
THE  5: 0.003906 (2^-8.000000)      400 ->     1100
EXP  5: 0.002181 (2^-8.840602)      400 ->     1100


 */


/* --- */

#if 0
  uint32_t nrounds_full = simon_xor_trail_search(key, B, trail, &trail_len, lowp_trail, &lowp_trail_len);
#else
  trail_len = 5;
  uint32_t nrounds_full = 5;
  trail[0] = {0x400, 0x1800, 0, 0.250000};
  trail[1] = {0x100,      0, 0, 0.250000};
  trail[2] = {0,      0x100, 0, 1.000000};
  trail[3] = {0x100,  0x400, 0, 0.250000};
  trail[4] = {0x400, 0x1100, 0, 0.250000};
  B[0] = 1.0;
  B[1] = 1.0 / (double)(1UL << 4);
  B[2] = 1.0 / (double)(1UL << 4);
  B[3] = 1.0 / (double)(1UL << 6);
  B[4] = 1.0 / (double)(1UL << 8);
  //  trail[5] = {
  //	 {0x400, 0x1800, 0, 0.250000},
  //	 {0x100,         0, 0, 0.250000},
  //	 {0,       100, 0, 1.000000},
  //	 {0x100,      400, 0, 0.250000},
  //	 {0x400,     1100, 0, 0.250000}
  //  };
#endif

/* --- */




/* 
Verification of the DTU results:

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:721] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:696] Key     D596     ED8B     730A     1FF2
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():707]:
 Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Found 1 right pairs (2^-32.000000) | 1:(    B5CE      952) 2:(    B4CE      952)
 Found 2 right pairs (2^-31.000000) | 1:(    B4CE      952) 2:(    B5CE      952)
 Found 3 right pairs (2^-30.415037) | 1:(    61CC     13AE) 2:(    60CC     13AE)
 Found 4 right pairs (2^-30.000000) | 1:(    60CC     13AE) 2:(    61CC     13AE)
 Found 5 right pairs (2^-29.678072) | 1:(    23FB      92A) 2:(    22FB      92A)
 Found 6 right pairs (2^-29.415037) | 1:(    22FB      92A) 2:(    23FB      92A)
 [./tests/simon-xor-threshold-search-tests.cc:668] p = 2^-29.415037
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():711]:
 Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-29.415037
 [./tests/simon-xor-threshold-search-tests.cc:713] OK

real    9m4.168s
user    8m20.011s
sys     0m0.032s


vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:721] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:696] Key     ED1E     DF92     D070     D6D8
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():707]:
 Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Found 1 right pairs (2^-32.000000)  | 1:(    5457     7667) 2:(    5557     7667)
 Found 2 right pairs (2^-31.000000)  | 1:(    5557     7667) 2:(    5457     7667)
 Found 3 right pairs (2^-30.415037)  | 1:(    8C3F     7D98) 2:(    8D3F     7D98)
 Found 4 right pairs (2^-30.000000)  | 1:(    8D3F     7D98) 2:(    8C3F     7D98)
 Found 5 right pairs (2^-29.678072)  | 1:(    825E      4B9) 2:(    835E      4B9)
 Found 6 right pairs (2^-29.415037)  | 1:(    835E      4B9) 2:(    825E      4B9)
 Found 7 right pairs (2^-29.192645)  | 1:(    8721     1D6A) 2:(    8621     1D6A)
 Found 8 right pairs (2^-29.000000)  | 1:(    8621     1D6A) 2:(    8721     1D6A)
 Found 9 right pairs (2^-28.830075)  | 1:(    AD13     9EC9) 2:(    AC13     9EC9)
 Found 10 right pairs (2^-28.678072) | 1:(    AC13     9EC9) 2:(    AD13     9EC9)
 Found 11 right pairs (2^-28.540568) | 1:(    5780     EC73) 2:(    5680     EC73)
 Found 12 right pairs (2^-28.415037) | 1:(    5680     EC73) 2:(    5780     EC73)
 Found 13 right pairs (2^-28.299560) | 1:(    F6FC     49D1) 2:(    F7FC     49D1)
 Found 14 right pairs (2^-28.192645) | 1:(    F7FC     49D1) 2:(    F6FC     49D1)
 Found 15 right pairs (2^-28.093109) | 1:(    524F     5485) 2:(    534F     5485)
 Found 16 right pairs (2^-28.000000) | 1:(    534F     5485) 2:(    524F     5485)
 [./tests/simon-xor-threshold-search-tests.cc:668] p = 2^-28.000000
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():711]:
 Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-28.000000
 [./tests/simon-xor-threshold-search-tests.cc:713] OK

real    9m25.798s
user    8m17.587s
sys     0m0.124s


vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:721] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:696] Key     2227      9E3     6372     98B8
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():707]:
 Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Found 1 right pairs (2^-32.000000) | 1:(    1169     5ABB) 2:(    1069     5ABB)
 Found 2 right pairs (2^-31.000000) | 1:(    1069     5ABB) 2:(    1169     5ABB)
 Found 3 right pairs (2^-30.415037) | 1:(    4F5C     68D0) 2:(    4E5C     68D0)
 Found 4 right pairs (2^-30.000000) | 1:(    4E5C     68D0) 2:(    4F5C     68D0)
 [./tests/simon-xor-threshold-search-tests.cc:668] p = 2^-30.000000
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():711]:
Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-30.000000
 [./tests/simon-xor-threshold-search-tests.cc:713] OK

real    9m37.752s
user    8m36.148s
sys     0m0.368s
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$

vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
#--- [./tests/simon-xor-threshold-search-tests.cc:721] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:696] Key     47DC     86AF      4D3     1BB4
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():707]:
Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
Found 1 right pairs (2^-32.000000) | 1:(    849A     30D7) 2:(    859A     30D7)
Found 2 right pairs (2^-31.000000) | 1:(    859A     30D7) 2:(    849A     30D7)
Found 3 right pairs (2^-30.415037) | 1:(    A3DF     2BDB) 2:(    A2DF     2BDB)
Found 4 right pairs (2^-30.000000) | 1:(    A2DF     2BDB) 2:(    A3DF     2BDB)
Found 5 right pairs (2^-29.678072) | 1:(    F2F7     ABE4) 2:(    F3F7     ABE4)
Found 6 right pairs (2^-29.415037) | 1:(    F3F7     ABE4) 2:(    F2F7     ABE4)
Found 7 right pairs (2^-29.192645) | 1:(    8495     259F) 2:(    8595     259F)
Found 8 right pairs (2^-29.000000) | 1:(    8595     259F) 2:(    8495     259F)
Found 9 right pairs (2^-28.830075) | 1:(    BE08     757B) 2:(    BF08     757B)
Found 10 right pairs (2^-28.678072) | 1:(    BF08     757B) 2:(    BE08     757B)
Found 11 right pairs (2^-28.540568) | 1:(    42A5     1E68) 2:(    43A5     1E68)
Found 12 right pairs (2^-28.415037) | 1:(    43A5     1E68) 2:(    42A5     1E68)
[./tests/simon-xor-threshold-search-tests.cc:668] p = 2^-28.415037
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():711]:
Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-28.415037
 [./tests/simon-xor-threshold-search-tests.cc:713] OK

real    9m55.189s
user    8m34.132s
sys     0m0.052s




---
#--- [./tests/simon-xor-threshold-search-tests.cc:720] Tests, WORD_SIZE  = 16, MASK =     FFFF
[./tests/simon-xor-threshold-search-tests.cc:695] Key     7DF0     20FA     735D     551D
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():706]:
 Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 1 right pairs (2^-32.000000) for diff (       1        0) -> (     100        0) |     3687     6BC6
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 2 right pairs (2^-31.000000) for diff (       1        0) -> (     100        0) |     3787     6BC6
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 3 right pairs (2^-30.415037) for diff (       1        0) -> (     100        0) |     F1D6     96F2
 [./tests/simon-xor-threshold-search-tests.cc:674] p = 2^-30.415037
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():710]:
 Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-30.415037



 vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
 #--- [./tests/simon-xor-threshold-search-tests.cc:720] Tests, WORD_SIZE  = 16, MASK =     FFFF
 [./tests/simon-xor-threshold-search-tests.cc:695] Key     EE12     4536     2256     E6F5
 [./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():706]:
 Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 1 right pairs (2^-32.000000) for diff (       1        0) -> (     100        0) |     2415     BDF7
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 2 right pairs (2^-31.000000) for diff (       1        0) -> (     100        0) |     D5C2     5C38
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 3 right pairs (2^-30.415037) for diff (       1        0) -> (     100        0) |     5F51     C096
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 4 right pairs (2^-30.000000) for diff (       1        0) -> (     100        0) |     5E51     C096
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 5 right pairs (2^-29.678072) for diff (       1        0) -> (     100        0) |     5E51     C096
 [./tests/simon-xor-threshold-search-tests.cc:674] p = 2^-29.678072
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():710]:
 Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-29.678072
 [./tests/simon-xor-threshold-search-tests.cc:712] OK


 vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
 #--- [./tests/simon-xor-threshold-search-tests.cc:720] Tests, WORD_SIZE  = 16, MASK =     FFFF
 [./tests/simon-xor-threshold-search-tests.cc:695] Key     220C     B9DF      E1B     6139
 [./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():706]:
 Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 [./tests/simon-xor-threshold-search-tests.cc:667] REV Found 1 right pairs (2^-32.000000) for diff (       1        0) -> (       0      100) |     6F93     4E66
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 1 right pairs (2^-32.000000) for diff (       1        0) -> (     100        0) |     2B7E     59C5
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 2 right pairs (2^-31.000000) for diff (       1        0) -> (     100        0) |     2A7E     59C5
 [./tests/simon-xor-threshold-search-tests.cc:674] p = 2^-31.000000
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():710]:
Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-31.000000
 [./tests/simon-xor-threshold-search-tests.cc:712] OK


 vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ time ./bin/simon-xor-threshold-search-tests
 #--- [./tests/simon-xor-threshold-search-tests.cc:720] Tests, WORD_SIZE  = 16, MASK =     FFFF
 [./tests/simon-xor-threshold-search-tests.cc:695] Key     40D7     301B     E140     A81A
 [./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():706]:
 Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 1 right pairs (2^-32.000000) for diff (       1        0) -> (     100        0) |     60F6     4A58
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 2 right pairs (2^-31.000000) for diff (       1        0) -> (     100        0) |     B9A4     314D
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 3 right pairs (2^-30.415037) for diff (       1        0) -> (     100        0) |     2450     8BD5
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 4 right pairs (2^-30.000000) for diff (       1        0) -> (     100        0) |     B8A4     314D
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 5 right pairs (2^-29.678072) for diff (       1        0) -> (     100        0) |     2550     8BD5
 [./tests/simon-xor-threshold-search-tests.cc:661] Found 6 right pairs (2^-29.415037) for diff (       1        0) -> (     100        0) |     2450     8BD5
 [./tests/simon-xor-threshold-search-tests.cc:674] p = 2^-29.415037
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():710]:
Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-29.415037
 [./tests/simon-xor-threshold-search-tests.cc:712] OK

 #--- [./tests/simon-xor-threshold-search-tests.cc:717] Tests, WORD_SIZE  = 16, MASK =     FFFF
 [./tests/simon-xor-threshold-search-tests.cc:692] Key     D24C     246E     3A57     3D57
 [./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():703]:
 Verify 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Found 1 right pairs (2^-32.000000) | 1:(    9A27     6CC9) 2:(    9B27     6CC9)
 Found 2 right pairs (2^-31.000000) | 1:(    11B0     9246) 2:(    10B0     9246)
 Found 3 right pairs (2^-30.415037) | 1:(    5601     6B8A) 2:(    5701     6B8A)
 Found 4 right pairs (2^-30.000000) | 1:(    5601     6B8A) 2:(    5701     6B8A)
 Found 5 right pairs (2^-29.678072) | 1:(    E694     7F1C) 2:(    E794     7F1C)
 Found 6 right pairs (2^-29.415037) | 1:(    CE93     924C) 2:(    CF93     924C)
 Found 7 right pairs (2^-29.192645) | 1:(    9A27     6CC9) 2:(    9B27     6CC9)
 Found 8 right pairs (2^-29.000000) | 1:(    F6D4      71F) 2:(    F7D4      71F)
 Found 9 right pairs (2^-28.830075) | 1:(    D7A9     49D2) 2:(    D6A9     49D2)
 [./tests/simon-xor-threshold-search-tests.cc:665] p = 2^-28.830075
OK
[./tests/simon-xor-threshold-search-tests.cc:test_simon_verify_differential():707]:
Verified 12 R differential (       1        0) -> (     100        0) | 2^32.00 CP pairs
 Final probability p = 2^-28.830075
 [./tests/simon-xor-threshold-search-tests.cc:709] OK


 */


/* 

Simon32 12 round 2^-34 clustering


B[ 0] = 1.0
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
 0:      400 ->     1800 0.250000 (2^-2.000000)
 1:      100 ->        0 0.250000 (2^-2.000000)
 2:        0 ->      100 1.000000 (2^0.000000)
 3:      100 ->      400 0.250000 (2^-2.000000)
 4:      400 ->     1100 0.250000 (2^-2.000000)
 5:     1100 ->     4200 0.062500 (2^-4.000000)
 6:     4200 ->     1D01 0.062500 (2^-4.000000)
 7:     1D01 ->      500 0.003906 (2^-8.000000)
 8:      500 ->      100 0.125000 (2^-3.000000)
 9:      100 ->      100 0.250000 (2^-2.000000)
10:      100 ->      500 0.250000 (2^-2.000000)
11:      500 ->     1500 0.125000 (2^-3.000000)
p_tot = 0.000000000058208 = 2^-34.000000, Bn = 0.000000 = 2^-34.000000
[./src/simon-xor-threshold-search.cc:1137] nrounds = 12
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():116] dy_init        0
[./src/simon-xor-threshold-search.cc:136] Verify P for one round (2^22.000000 CPs)...
THE  0: 0.250000 (2^-2.000000)      400 ->     1800
EXP  0: 0.249771 (2^-2.001323)      400 ->     1800

THE  1: 0.250000 (2^-2.000000)      100 ->        0
EXP  1: 0.249737 (2^-2.001518)      100 ->        0

THE  2: 1.000000 (2^0.000000)        0 ->      100
EXP  2: 1.000000 (2^0.000000)        0 ->      100

THE  3: 0.250000 (2^-2.000000)      100 ->      400
EXP  3: 0.249857 (2^-2.000824)      100 ->      400

THE  4: 0.250000 (2^-2.000000)      400 ->     1100
EXP  4: 0.250050 (2^-1.999710)      400 ->     1100

THE  5: 0.062500 (2^-4.000000)     1100 ->     4200
EXP  5: 0.062714 (2^-3.995066)     1100 ->     4200

THE  6: 0.062500 (2^-4.000000)     4200 ->     1D01
EXP  6: 0.062641 (2^-3.996740)     4200 ->     1D01

THE  7: 0.003906 (2^-8.000000)     1D01 ->      500
EXP  7: 0.003901 (2^-8.002027)     1D01 ->      500

THE  8: 0.125000 (2^-3.000000)      500 ->      100
EXP  8: 0.124738 (2^-3.003025)      500 ->      100

THE  9: 0.250000 (2^-2.000000)      100 ->      100
EXP  9: 0.250225 (2^-1.998703)      100 ->      100

THE 10: 0.250000 (2^-2.000000)      100 ->      500
EXP 10: 0.249919 (2^-2.000468)      100 ->      500

OK
[./src/simon-xor-threshold-search.cc:264] Verify P of differentials (2^22.000000 CPs)...
Input differences:      400     1900

R# 0 Output differences:      100      400
THE  1: 0.250000 (2^-2.000000)      400 ->      100
EXP  1: 0.250029 (2^-1.999831)      400 ->      100

R# 1 Output differences:        0      100
THE  2: 0.062500 (2^-4.000000)      100 ->        0
EXP  2: 0.062429 (2^-4.001630)      100 ->        0

R# 2 Output differences:      100        0
THE  3: 0.062500 (2^-4.000000)        0 ->      100
EXP  3: 0.062600 (2^-3.997701)        0 ->      100

R# 3 Output differences:      400      100
THE  4: 0.015625 (2^-6.000000)      100 ->      400
EXP  4: 0.015647 (2^-5.997954)      100 ->      400

R# 4 Output differences:     1100      400
THE  5: 0.003906 (2^-8.000000)      400 ->     1100
EXP  5: 0.002230 (2^-8.808787)      400 ->     1100

R# 5 Output differences:     4200     1100
THE  6: 0.000244 (2^-12.000000)     1100 ->     4200
EXP  6: 0.000224 (2^-12.121949)     1100 ->     4200

R# 6 Output differences:     1D01     4200
THE  7: 0.000015 (2^-16.000000)     4200 ->     1D01
EXP  7: 0.000015 (2^-15.977632)     4200 ->     1D01

R# 7 Output differences:      500     1D01
THE  8: 0.000000 (2^-24.000000)     1D01 ->      500
EXP  8: 0.000000 (2^-22.000000)     1D01 ->      500

R# 8 Output differences:      100      500
THE  9: 0.000000 (2^-27.000000)      500 ->      100
EXP  9: 0.000000 (2^-inf)      500 ->      100

R# 9 Output differences:      100      100
THE 10: 0.000000 (2^-29.000000)      100 ->      100
EXP 10: 0.000000 (2^-inf)      100 ->      100

R#10 Output differences:      500      100
THE 11: 0.000000 (2^-31.000000)      100 ->      500
EXP 11: 0.000000 (2^-inf)      100 ->      500

OK
[./src/simon-xor-threshold-search.cc:1149] num_rounds 11
[./tests/simon-xor-threshold-search-tests.cc:253]
----- Begin search for clusters of trails -----
[./src/simon-xor-threshold-search.cc:887] trail_len 12
[./src/simon-xor-threshold-search.cc:934] Initial trail:
[./src/simon-xor-threshold-search.cc:629] Found 1 trails:
[    1]  400  100  100    0    0  100  100  400  400 1100 1100 4200 4200 1D01 1D01  500  500  100  100  100  100  500  500 1500  | 2^-34.000000
Probability of differential: 2^-34.000000

vpv-20130925


[./src/simon-xor-threshold-search.cc:629] Found 38 trails:
[    1]  400  100  100    0    0  100  100  600  600 1500 1500 7200 7200 1D01 1D01  500  500  100  100  100  100  500  500 1500  | 2^-39.000000
[    2]  400  100  100    0    0  100  100  400  400 1100 1100 6200 6200 1903 1903  700  700  100  100  100  100  500  500 1500  | 2^-39.000000
[    3]  400  100  100    0    0  100  100  400  400 1100 1100 4200 4200 1D01 1D01  500  500  100  100  100  100  500  500 1500  | 2^-34.000000
[    4]  400  100  100    0    0  100  100  600  600 1500 1500 7A10 7A10 1D01 1D01  500  500  100  100  100  100  500  500 1500  | 2^-40.000000
[    5]  400  100  100    0    0  100  100  600  600 1100 1100 4200 4200 1D03 1D03  700  700  100  100  100  100  500  500 1500  | 2^-39.000000
[    6]  400  100  100    0    0  100  100  600  600 1D02 1D02 4000 4000 1D03 1D03  700  700  100  100  100  100  500  500 1500  | 2^-40.000000
[    7]  400  100  100    0    0  100  100  600  600 1D02 1D02 7200 7200 1501 1501  500  500  100  100  100  100  500  500 1500  | 2^-40.000000
[    8]  400  100  100    0    0  100  100  600  600 1D00 1D00 7A10 7A10 1501 1501  500  500  100  100  100  100  500  500 1500  | 2^-40.000000
[    9]  400  100  100    0    0  100  100  400  400 1100 1100 6210 6210 1903 1903  700  700  100  100  100  100  500  500 1500  | 2^-40.000000
[   10]  400  100  100    0    0  100  100  400  400 1100 1100 4210 4210 1903 1903  700  700  100  100  100  100  500  500 1500  | 2^-39.000000
[   11]  400  100  100    0    0  100  100  400  400 1100 1100 4200 4200 1D03 1D03  700  700  100  100  100  100  500  500 1500  | 2^-37.000000
[   12]  400  100  100    0    0  100  100  400  400 1100 1100 4211 4211 1D07 1D07  501  501  100  100  100  100  500  500 1500  | 2^-39.000000
[   13]  400  100  100    0    0  100  100  400  400 1900 1900 7208 7208 1D01 1D01  500  500  100  100  100  100  500  500 1500  | 2^-40.000000
[   14]  400  100  100    0    0  100  100  400  400 1100 1100 4200 4200 1903 1903  700  700  100  100  100  100  500  500 1500  | 2^-37.000000
[   15]  400  100  100    0    0  100  100  600  600 1102 1102 4018 4018 1103 1103  700  700  100  100  100  100  500  500 1500  | 2^-40.000000
[   16]  400  100  100    0    0  100  100  600  600 1100 1100 4200 4200 1D01 1D01  500  500  100  100  100  100  500  500 1500  | 2^-36.000000
[   17]  400  100  100    0    0  100  100  600  600 1100 1100 4200 4200 1903 1903  700  700  100  100  100  100  500  500 1500  | 2^-39.000000
[   18]  400  100  100    0    0  100  100  400  400 1900 1900 4201 4201 1107 1107  701  701  100  100  100  100  500  500 1500  | 2^-39.000000
[   19]  400  100  100    0    0  100  100  400  400 1100 1100 4011 4011 1107 1107  701  701  100  100  100  100  500  500 1500  | 2^-38.000000
[   20]  400  100  100    0    0  100  100  400  400 1900 1900 4001 4001 1907 1907  701  701  100  100  100  100  500  500 1500  | 2^-39.000000
[   21]  400  100  100    0    0  100  100  400  400 1100 1100 4001 4001 1107 1107  701  701  100  100  100  100  500  500 1500  | 2^-36.000000
[   22]  400  100  100    0    0  100  100  400  400 1100 1100 6200 6200 1D01 1D01  500  500  100  100  100  100  500  500 1500  | 2^-36.000000
[   23]  400  100  100    0    0  100  100  400  400 1900 1900 6208 6208 1D01 1D01  500  500  100  100  100  100  500  500 1500  | 2^-40.000000
[   24]  400  100  100    0    0  100  100  600  600 1100 1100 6200 6200 1D01 1D01  500  500  100  100  100  100  500  500 1500  | 2^-38.000000
[   25]  400  100  100    0    0  100  100  600  600 1902 1902 7200 7200 1501 1501  500  500  100  100  100  100  500  500 1500  | 2^-40.000000
[   26]  400  100  100    0    0  100  100  600  600 1100 1100 4001 4001 1107 1107  701  701  100  100  100  100  500  500 1500  | 2^-38.000000
[   27]  400  100  100    0    0  100  100  400  400 1104 1104 4019 4019 1103 1103  700  700  100  100  100  100  500  500 1500  | 2^-40.000000
[   28]  400  100  100    0    0  100  100  600  600 1902 1902 4000 4000 1903 1903  700  700  100  100  100  100  500  500 1500  | 2^-40.000000
[   29]  400  100  100    0    0  100  100  600  600 1102 1102 6008 6008 1103 1103  700  700  100  100  100  100  500  500 1500  | 2^-40.000000
[   30]  400  100  100    0    0  100  100  400  400 1900 1900 7200 7200 1501 1501  500  500  100  100  100  100  500  500 1500  | 2^-37.000000
[   31]  400  100  100    0    0  100  100  600  600 1502 1502 7008 7008 1503 1503  700  700  100  100  100  100  500  500 1500  | 2^-40.000000
[   32]  400  100  100    0    0  100  100  600  600 1D00 1D00 7200 7200 1501 1501  500  500  100  100  100  100  500  500 1500  | 2^-39.000000
[   33]  400  100  100    0    0  100  100  400  400 1100 1100 4201 4201 1907 1907  701  701  100  100  100  100  500  500 1500  | 2^-38.000000
[   34]  400  100  100    0    0  100  100  600  600 1500 1500 7004 7004 1501 1501  500  500  100  100  100  100  500  500 1500  | 2^-37.000000
[   35]  400  100  100    0    0  100  100  400  400 1100 1100 4211 4211 1D05 1D05  501  501  100  100  100  100  500  500 1500  | 2^-38.000000
[   36]  400  100  100    0    0  100  100  600  600 1900 1900 7200 7200 1501 1501  500  500  100  100  100  100  500  500 1500  | 2^-39.000000
[   37]  400  100  100    0    0  100  100  600  600 1500 1500 7814 7814 1501 1501  500  500  100  100  100  100  500  500 1500  | 2^-38.000000
[   38]  400  100  100    0    0  100  100  400  400 1100 1100 6200 6200 1D03 1D03  700  700  100  100  100  100  500  500 1500  | 2^-39.000000
Probability of differential: 2^-32.334664

real    29693m30.452s
user    29601m45.948s
sys     6m40.077s

New set of trails for Simon32 for 12 rounds using clustering algorithm 2 with parameters

#define SIMON_EPS (double)(1.0 / (double)(1ULL << 5))
#define XDP_ROT_AND_MAX_DIFF_CNT (1ULL << 8)
#define XDP_ROT_AND_P_THRES 0.05
#define NROUNDS 9

[./src/simon-xor-threshold-search.cc:883] Update max for 12 R: ( 280  A80) -> ( 200  880) 2^-33.000000
 280  A00   80   80   80  280  280 8E80 8E80 2100 2100  880  880  200  200   80   80    0    0   80   80  200  200  880  | 2^-34.000000
 280  A00   80   80   80  280  280 8E80 8E80 A100 A100  882  882 8200 8200   80   80    0    0   80   80  200  200  880  | 2^-38.000000
 280  A00   80   80   80  280  280 8E80 8E80 2100 2100  880  880 8300 8300   80   80    0    0   80   80  200  200  880  | 2^-37.000000
 280  A00   80   80   80  280  280 8E80 8E80 2100 2100  880  880  300  300   80   80    0    0   80   80  200  200  880  | 2^-36.000000
 280  A00   80   80   80  280  280 8E80 8E80 2100 2100  880  880 8200 8200   80   80    0    0   80   80  200  200  880  | 2^-36.000000
 280  A00   80   80   80  280  280 8E80 8E80 A100 A100  882  882  200  200   80   80    0    0   80   80  200  200  880  | 2^-36.000000
 280  A00   80   80   80  280  280 8E80 8E80 A100 A100  882  882  300  300   80   80    0    0   80   80  200  200  880  | 2^-38.000000
 [./src/simon-xor-threshold-search.cc:914] Sum 2^-33.000000
 [./src/simon-xor-threshold-search.cc:837] Add new differential: 4100  401 -> DA40 6D83 2^-46.000000 | #trails 36864142019 | #trails 36856


./src/simon-xor-threshold-search.cc:simon_xor_trail_search():1260]:
 Verified 12 R differential (     280      A80) -> (     880      200) | 2^20.00 CP pairs
 Final probability p = 2^-inf

real    1093m56.953s
user    1090m40.374s
sys     0m3.096s


 And another one 12R Simon32 but worse in probability:

 [./src/simon-xor-threshold-search.cc:883] Update max for 12 R: (4000 4101) -> (1000 4440) 2^-33.356144
4000    1 4100 C441 C441 5000 5000  440  440 4100 4100   40   40    0    0   40   40  180  180  440  440 1000 1000 4440  | 2^-39.000000
4000    1 4100 C441 C441 5000 5000  440  440  100  100   40   40    0    0   40   40  180  180  440  440 1000 1000 4440  | 2^-37.000000
4000    1 4100 C641 C641 5000 5000  640  640  100  100   40   40    0    0   40   40  180  180  440  440 1000 1000 4440  | 2^-40.000000
4000    1 4100 C641 C641 5080 5080  440  440  100  100   40   40    0    0   40   40  100  100  440  440 1000 1000 4440  | 2^-37.000000
4000    1 4100 C441 C441 5000 5000  440  440 4100 4100   40   40    0    0   40   40 4100 4100  440  440 1000 1000 4440  | 2^-39.000000
4000    1 4100 C441 C441 5000 5000  440  440  100  100   40   40    0    0   40   40 4100 4100  440  440 1000 1000 4440  | 2^-37.000000
4000    1 4100 C441 C441 5000 5000  440  440  100  100   40   40    0    0   40   40  100  100  440  440 1000 1000 4440  | 2^-35.000000
4000    1 4100 C641 C641 5000 5000  640  640  100  100   40   40    0    0   40   40 4100 4100  440  440 1000 1000 4440  | 2^-40.000000
4000    1 4100 C441 C441 5080 5080  640  640  100  100   40   40    0    0   40   40  180  180  440  440 1000 1000 4440  | 2^-40.000000
4000    1 4100 C441 C441 5000 5000  440  440  180  180   40   40    0    0   40   40 4100 4100  440  440 1000 1000 4440  | 2^-39.000000
4000    1 4100 C641 C641 5080 5080  440  440  100  100   40   40    0    0   40   40 4100 4100  440  440 1000 1000 4440  | 2^-39.000000
4000    1 4100 C441 C441 5000 5000  440  440 4100 4100   40   40    0    0   40   40  100  100  440  440 1000 1000 4440  | 2^-37.000000
4000    1 4100 C641 C641 5000 5000  640  640  100  100   40   40    0    0   40   40  100  100  440  440 1000 1000 4440  | 2^-38.000000
4000    1 4100 C441 C441 5080 5080  640  640  100  100   40   40    0    0   40   40 4100 4100  440  440 1000 1000 4440  | 2^-40.000000
4000    1 4100 C441 C441 5000 5000  440  440  180  180   40   40    0    0   40   40  100  100  440  440 1000 1000 4440  | 2^-37.000000
4000    1 4100 C441 C441 1000 1000 8441 8441 4100 4100   40   40    0    0   40   40  100  100  440  440 1000 1000 4440  | 2^-39.000000
4000    1 4100 C641 C641 5080 5080  440  440  180  180   40   40    0    0   40   40  100  100  440  440 1000 1000 4440  | 2^-39.000000
4000    1 4100 C641 C641 5080 5080  440  440  100  100   40   40    0    0   40   40  180  180  440  440 1000 1000 4440  | 2^-39.000000
4000    1 4100 C441 C441 5080 5080  640  640  100  100   40   40    0    0   40   40  100  100  440  440 1000 1000 4440  | 2^-38.000000
4000    1 4100 C441 C441 5000 5000  440  440  180  180   40   40    0    0   40   40  180  180  440  440 1000 1000 4440  | 2^-39.000000
[./src/simon-xor-threshold-search.cc:914] Sum 2^-33.356144


 */

 


/* --- */


/* 
Simon64 17 round trails
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-24.000000
B[ 9] = 2^-28.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
B[15] = 2^-54.000000
B[16] = 2^-62.000000
B[17] = 2^-68.000000
 0: 20002020 -> 80008080 0.015625 (2^-6.000000)
 1:  8000888 ->      200 0.003906 (2^-8.000000)
 2:      200 ->  8000088 0.250000 (2^-2.000000)
 3:  8000088 -> 20000020 0.015625 (2^-6.000000)
 4: 20000020 -> 88000008 0.062500 (2^-4.000000)
 5: 88000008 ->        2 0.015625 (2^-6.000000)
 6:        2 -> 88000000 0.250000 (2^-2.000000)
 7: 88000000 -> 20000000 0.062500 (2^-4.000000)
 8: 20000000 ->  8000000 0.250000 (2^-2.000000)
 9:  8000000 ->        0 0.250000 (2^-2.000000)
10:        0 ->  8000000 1.000000 (2^0.000000)
11:  8000000 -> 20000000 0.250000 (2^-2.000000)
12: 20000000 -> 88000000 0.250000 (2^-2.000000)
13: 88000000 ->        2 0.062500 (2^-4.000000)
14:        2 -> 88000008 0.250000 (2^-2.000000)
15: 88000008 -> 20000020 0.015625 (2^-6.000000)
16: 20000020 ->  8000088 0.062500 (2^-4.000000)
17:  8000088 ->      200 0.015625 (2^-6.000000)
p_tot = 0.000000000000000 = 2^-68.000000, Bn = 0.000000 = 2^-68.000000
[./src/simon-xor-threshold-search.cc:1137] nrounds = 18
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():116] dy_init        0
[./src/simon-xor-threshold-search.cc:136] Verify P for one round (2^22.000000 CPs)...
THE  0: 0.015625 (2^-6.000000) 20002020 -> 80008080
EXP  0: 0.015582 (2^-6.003946) 20002020 -> 80008080

THE  1: 0.003906 (2^-8.000000)  8000888 ->      200
EXP  1: 0.003897 (2^-8.003262)  8000888 ->      200

THE  2: 0.250000 (2^-2.000000)      200 ->  8000088
EXP  2: 0.250220 (2^-1.998732)      200 ->  8000088

THE  3: 0.015625 (2^-6.000000)  8000088 -> 20000020
EXP  3: 0.015645 (2^-5.998130)  8000088 -> 20000020

THE  4: 0.062500 (2^-4.000000) 20000020 -> 88000008
EXP  4: 0.062351 (2^-4.003455) 20000020 -> 88000008

THE  5: 0.015625 (2^-6.000000) 88000008 ->        2
EXP  5: 0.015569 (2^-6.005138) 88000008 ->        2

THE  6: 0.250000 (2^-2.000000)        2 -> 88000000
EXP  6: 0.249741 (2^-2.001494)        2 -> 88000000

THE  7: 0.062500 (2^-4.000000) 88000000 -> 20000000
EXP  7: 0.062524 (2^-3.999455) 88000000 -> 20000000

THE  8: 0.250000 (2^-2.000000) 20000000 ->  8000000
EXP  8: 0.249997 (2^-2.000018) 20000000 ->  8000000

THE  9: 0.250000 (2^-2.000000)  8000000 ->        0
EXP  9: 0.249952 (2^-2.000277)  8000000 ->        0

THE 10: 1.000000 (2^0.000000)        0 ->  8000000
EXP 10: 1.000000 (2^0.000000)        0 ->  8000000

THE 11: 0.250000 (2^-2.000000)  8000000 -> 20000000
EXP 11: 0.250238 (2^-1.998628)  8000000 -> 20000000

THE 12: 0.250000 (2^-2.000000) 20000000 -> 88000000
EXP 12: 0.249965 (2^-2.000204) 20000000 -> 88000000

THE 13: 0.062500 (2^-4.000000) 88000000 ->        2
EXP 13: 0.062506 (2^-3.999862) 88000000 ->        2

THE 14: 0.250000 (2^-2.000000)        2 -> 88000008
EXP 14: 0.250073 (2^-1.999579)        2 -> 88000008

THE 15: 0.015625 (2^-6.000000) 88000008 -> 20000020
EXP 15: 0.015679 (2^-5.995055) 88000008 -> 20000020

THE 16: 0.062500 (2^-4.000000) 20000020 ->  8000088
EXP 16: 0.062444 (2^-4.001294) 20000020 ->  8000088

OK
[./src/simon-xor-threshold-search.cc:264] Verify P of differentials (2^22.000000 CPs)...
Input differences: 20002020 88008808

R# 0 Output differences:  8000888 20002020
THE  1: 0.015625 (2^-6.000000) 20002020 ->  8000888
EXP  1: 0.015544 (2^-6.007482) 20002020 ->  8000888

R# 1 Output differences:      200  8000888
THE  2: 0.000061 (2^-14.000000)  8000888 ->      200
EXP  2: 0.000063 (2^-13.961081)  8000888 ->      200

R# 2 Output differences:  8000088      200
THE  3: 0.000015 (2^-16.000000)      200 ->  8000088
EXP  3: 0.000015 (2^-16.069263)      200 ->  8000088

R# 3 Output differences: 20000020  8000088
THE  4: 0.000000 (2^-22.000000)  8000088 -> 20000020
EXP  4: 0.000000 (2^-inf)  8000088 -> 20000020


R# 4 Output differences: 88000008 20000020
THE  5: 0.000000 (2^-26.000000) 20000020 -> 88000008
EXP  5: 0.000000 (2^-inf) 20000020 -> 88000008

R# 5 Output differences:        2 88000008
THE  6: 0.000000 (2^-32.000000) 88000008 ->        2
EXP  6: 0.000000 (2^-inf) 88000008 ->        2

R# 6 Output differences: 88000000        2
THE  7: 0.000000 (2^-34.000000)        2 -> 88000000
EXP  7: 0.000000 (2^-inf)        2 -> 88000000

R# 7 Output differences: 20000000 88000000
THE  8: 0.000000 (2^-38.000000) 88000000 -> 20000000
EXP  8: 0.000000 (2^-inf) 88000000 -> 20000000

R# 8 Output differences:  8000000 20000000
THE  9: 0.000000 (2^-40.000000) 20000000 ->  8000000
EXP  9: 0.000000 (2^-inf) 20000000 ->  8000000

R# 9 Output differences:        0  8000000
THE 10: 0.000000 (2^-42.000000)  8000000 ->        0
EXP 10: 0.000000 (2^-inf)  8000000 ->        0

R#10 Output differences:  8000000        0
THE 11: 0.000000 (2^-42.000000)        0 ->  8000000
EXP 11: 0.000000 (2^-inf)        0 ->  8000000

R#11 Output differences: 20000000  8000000
THE 12: 0.000000 (2^-44.000000)  8000000 -> 20000000
EXP 12: 0.000000 (2^-inf)  8000000 -> 20000000

R#12 Output differences: 88000000 20000000
THE 13: 0.000000 (2^-46.000000) 20000000 -> 88000000
EXP 13: 0.000000 (2^-inf) 20000000 -> 88000000

R#13 Output differences:        2 88000000
THE 14: 0.000000 (2^-50.000000) 88000000 ->        2
EXP 14: 0.000000 (2^-inf) 88000000 ->        2

R#14 Output differences: 88000008        2
THE 15: 0.000000 (2^-52.000000)        2 -> 88000008
EXP 15: 0.000000 (2^-inf)        2 -> 88000008

R#15 Output differences: 20000020 88000008
THE 16: 0.000000 (2^-58.000000) 88000008 -> 20000020
EXP 16: 0.000000 (2^-inf) 88000008 -> 20000020

R#16 Output differences:  8000088 20000020
THE 17: 0.000000 (2^-62.000000) 20000020 ->  8000088
EXP 17: 0.000000 (2^-inf) 20000020 ->  8000088

OK
[./src/simon-xor-threshold-search.cc:1149] num_rounds 17
[./tests/simon-xor-threshold-search-tests.cc:253]
----- Begin search for clusters of trails -----
[./src/simon-xor-threshold-search.cc:887] trail_len 17
[./src/simon-xor-threshold-search.cc:934] Initial trail:
[./src/simon-xor-threshold-search.cc:629] Found 1 trails:
[    1] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 20000020 20000020 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-62.000000
Probability of differential: 2^-62.000000


[./src/simon-xor-threshold-search.cc:629] Found 7 trails:
[    1] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 20000820 20000820 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-63.000000
[    2] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 30000820 30000820 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-65.000000
[    3] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 30000030 30000030 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-65.000000
[    4] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 30000020 30000020 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-64.000000
[    5] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 20000830 20000830 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-64.000000
[    6] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 20000030 20000030 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-63.000000
[    7] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 20000020 20000020 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-62.000000
Probability of differential: 2^-60.540568


vpv-20130924

Simon64 17 rounds, 14 trails

[./src/simon-xor-threshold-search.cc:629] Found 14 trails:
[    1] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 20000820 20000820 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-63.000000
[    2] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 30000030 30000030 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-65.000000
[    3] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 20000020 20000020 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-62.000000
[    4] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000   82   82 88000008 88000008 20000020 20000020 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-64.000000
[    5] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 20000030 20000030 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-63.000000
[    6] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000   82   82 88000008 88000008 20000030 20000030 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-65.000000
[    7] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 30000020 30000020 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-64.000000
[    8] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000   82   82 88000008 88000008 20000820 20000820 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-65.000000
[    9] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000   82   82 88000008 88000008 20000830 20000830 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-66.000000
[   10] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000   82   82 88000008 88000008 30000820 30000820 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-67.000000
[   11] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 20000830 20000830 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-64.000000
[   12] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000   82   82 88000008 88000008 30000030 30000030 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-67.000000
[   13] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000   82   82 88000008 88000008 30000020 30000020 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-66.000000
[   14] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 30000820 30000820 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-65.000000
[   14] 8000088 20000020 20000020 88000008 88000008    2    2 88000000 88000000 20000000 20000000 8000000 8000000    0    0 8000000 8000000 20000000 20000000 88000000 88000000    2    2 88000008 88000008 30000820 30000820 8000088 8000088  200  200 8000888 8000888 20002020  | 2^-65.000000
Probability of differential: 2^-60.218640

real    29693m30.452s
user    29601m45.948s
sys     6m40.077s

 */

/* 
Parameters with which we confirm the 9 round trail on Simon32 by Abed et al. using clustering algorithm 1

Size of Hway table: 2^7
Cluster epsilon: 2^-5
Threshold: 0.05
Rounds 9

#define SIMON_EPS (double)(1.0 / (double)(1ULL << 5))
#define XDP_ROT_AND_MAX_DIFF_CNT (1ULL << 7)
#define XDP_ROT_AND_P_THRES 0.05
#define NROUNDS 9

- With clustering algorithm 1

[./src/simon-xor-threshold-search.cc:629] Found 16 trails:
[    1] 8000 2200 2200  800  800  200  200    0    0  200  200  C00  C00 2200 2200 8000 8000 2202  | 2^-22.000000
[    2] 8000 2200 2200  802  802  200  200    0    0  200  200  802  802 2200 2200 8000 8000 2202  | 2^-24.000000
[    3] 8000 2200 2200  C00  C00  200  200    0    0  200  200  C00  C00 2200 2200 8000 8000 2202  | 2^-24.000000
[    4] 8000 2200 2200  800  800  200  200    0    0  200  200  C02  C02 2200 2200 8000 8000 2202  | 2^-23.000000
[    5] 8000 2200 2200  800  800  200  200    0    0  200  200  800  800 2200 2200 8000 8000 2202  | 2^-20.000000
[    6] 8000 2200 2200  802  802  200  200    0    0  200  200  C00  C00 2200 2200 8000 8000 2202  | 2^-24.000000
[    7] 8000 2200 2200  C02  C02  200  200    0    0  200  200  800  800 2200 2200 8000 8000 2202  | 2^-23.000000
[    8] 8000 2200 2200  800  800  200  200    0    0  200  200  802  802 2200 2200 8000 8000 2202  | 2^-22.000000
[    9] 8000 2200 2200  802  802  200  200    0    0  200  200  800  800 2200 2200 8000 8000 2202  | 2^-22.000000
[   10] 8000 2200 2200  C02  C02  200  200    0    0  200  200  C02  C02 2200 2200 8000 8000 2202  | 2^-26.000000
[   11] 8000 2200 2200  C00  C00  200  200    0    0  200  200  C02  C02 2200 2200 8000 8000 2202  | 2^-25.000000
[   12] 8000 2200 2200  C02  C02  200  200    0    0  200  200  C00  C00 2200 2200 8000 8000 2202  | 2^-25.000000
[   13] 8000 2200 2200  802  802  200  200    0    0  200  200  C02  C02 2200 2200 8000 8000 2202  | 2^-25.000000
[   14] 8000 2200 2200  C00  C00  200  200    0    0  200  200  802  802 2200 2200 8000 8000 2202  | 2^-24.000000
[   15] 8000 2200 2200  C00  C00  200  200    0    0  200  200  800  800 2200 2200 8000 8000 2202  | 2^-22.000000
[   16] 8000 2200 2200  C02  C02  200  200    0    0  200  200  802  802 2200 2200 8000 8000 2202  | 2^-25.000000
Probability of differential: 2^-18.599121

real    4m34.825s
user    3m49.190s
sys     0m0.316s

Parameters with which we confirm the 9 round trail on Simon32 by Abed et al. using clustering algorithm 1

Size of Hway table: 2^7
Cluster epsilon: 2^-5
Threshold: 0.05
Rounds 9

#define SIMON_EPS (double)(1.0 / (double)(1ULL << 5))
#define XDP_ROT_AND_MAX_DIFF_CNT (1ULL << 7)
#define XDP_ROT_AND_P_THRES 0.05
#define NROUNDS 9

- With clustering algorithm 2

#define SIMON_EPS (double)(1.0 / (double)(1ULL << 5))
#define XDP_ROT_AND_MAX_DIFF_CNT (1ULL << 8)
#define XDP_ROT_AND_P_THRES 0.05
#define NROUNDS 9

[./src/simon-xor-threshold-search.cc:866] Improve differential prob:  2000 8880 -> 2000 8880 2^-18.607683 -> 2^-18.599121 | #trails 418
[./src/simon-xor-threshold-search.cc:883] Update max for 9 R: (2000 8880) -> (2000 8880) 2^-18.599121
2000 8000  880 8300 8300   80   80    0    0   80   80 8300 8300  880  880 2000 2000 8880  | 2^-26.000000
2000 8000  880 8200 8200   80   80    0    0   80   80  300  300  880  880 2000 2000 8880  | 2^-24.000000
2000 8000  880  300  300   80   80    0    0   80   80 8300 8300  880  880 2000 2000 8880  | 2^-25.000000
2000 8000  880 8300 8300   80   80    0    0   80   80  300  300  880  880 2000 2000 8880  | 2^-25.000000
2000 8000  880 8300 8300   80   80    0    0   80   80  200  200  880  880 2000 2000 8880  | 2^-23.000000
2000 8000  880 8200 8200   80   80    0    0   80   80 8300 8300  880  880 2000 2000 8880  | 2^-25.000000
2000 8000  880  200  200   80   80    0    0   80   80  200  200  880  880 2000 2000 8880  | 2^-20.000000
2000 8000  880 8200 8200   80   80    0    0   80   80 8200 8200  880  880 2000 2000 8880  | 2^-24.000000
2000 8000  880  300  300   80   80    0    0   80   80 8200 8200  880  880 2000 2000 8880  | 2^-24.000000
2000 8000  880  200  200   80   80    0    0   80   80 8300 8300  880  880 2000 2000 8880  | 2^-23.000000
2000 8000  880  300  300   80   80    0    0   80   80  300  300  880  880 2000 2000 8880  | 2^-24.000000
2000 8000  880  300  300   80   80    0    0   80   80  200  200  880  880 2000 2000 8880  | 2^-22.000000
2000 8000  880 8300 8300   80   80    0    0   80   80 8200 8200  880  880 2000 2000 8880  | 2^-25.000000
2000 8000  880  200  200   80   80    0    0   80   80  300  300  880  880 2000 2000 8880  | 2^-22.000000
2000 8000  880  200  200   80   80    0    0   80   80 8200 8200  880  880 2000 2000 8880  | 2^-22.000000
2000 8000  880 8200 8200   80   80    0    0   80   80  200  200  880  880 2000 2000 8880  | 2^-22.000000
[./src/simon-xor-threshold-search.cc:914] Sum 2^-18.599121

[./src/simon-xor-threshold-search.cc:simon_xor_trail_search():1245]:
Verified 9 R differential (    2000     8880) -> (    8880     2000) | 2^20.00 CP pairs
 Final probability p = 2^-16.299560

real    27m13.500s
user    27m8.474s
sys     0m0.644s

 */

 /* 

Improved characteristic on 12 rounds on Simon32: from 2^-36 to 2^-34
found with the follwoing parameters

#define SIMON_EPS (double)(1.0 / (double)(1ULL << 5))
#define XDP_ROT_AND_MAX_DIFF_CNT (1ULL << 7)
#define XDP_ROT_AND_P_THRES 0.05
#define NROUNDS 44

[./src/simon-xor-threshold-search.cc:511] [ 2 / 44]: Added 1 new country roads: p_min = 0.015625 (2^-6.000000). New sizes: Dxy 1, Dp 1 (cnt_lp 0 / 2).B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
 0:      400 ->     1800 0.250000 (2^-2.000000)
 1:      100 ->        0 0.250000 (2^-2.000000)
 2:        0 ->      100 1.000000 (2^0.000000)
 3:      100 ->      400 0.250000 (2^-2.000000)
 4:      400 ->     1100 0.250000 (2^-2.000000)
 5:     1100 ->     4200 0.062500 (2^-4.000000)
 6:     4200 ->     1D01 0.062500 (2^-4.000000)
 7:     1D01 ->      500 0.003906 (2^-8.000000)
 8:      500 ->      100 0.125000 (2^-3.000000)
 9:      100 ->      100 0.250000 (2^-2.000000)
10:      100 ->      500 0.250000 (2^-2.000000)
11:      500 ->     1500 0.125000 (2^-3.000000)
p_tot = 0.000000000058208 = 2^-34.000000, Bn = 0.000000 = 2^-34.000000
[./src/simon-xor-threshold-search.cc:1137] nrounds = 12
[./src/simon-xor-threshold-search.cc:simon_verify_xor_trail():116] dy_init        0
[./src/simon-xor-threshold-search.cc:136] Verify P for one round (2^22.000000 CPs)...
THE  0: 0.250000 (2^-2.000000)      400 ->     1800
EXP  0: 0.249771 (2^-2.001323)      400 ->     1800

THE  1: 0.250000 (2^-2.000000)      100 ->        0
EXP  1: 0.249737 (2^-2.001518)      100 ->        0

THE  2: 1.000000 (2^0.000000)        0 ->      100
EXP  2: 1.000000 (2^0.000000)        0 ->      100

THE  3: 0.250000 (2^-2.000000)      100 ->      400
EXP  3: 0.249857 (2^-2.000824)      100 ->      400

THE  4: 0.250000 (2^-2.000000)      400 ->     1100
EXP  4: 0.250050 (2^-1.999710)      400 ->     1100

THE  5: 0.062500 (2^-4.000000)     1100 ->     4200
EXP  5: 0.062714 (2^-3.995066)     1100 ->     4200

THE  6: 0.062500 (2^-4.000000)     4200 ->     1D01
EXP  6: 0.062641 (2^-3.996740)     4200 ->     1D01

THE  7: 0.003906 (2^-8.000000)     1D01 ->      500
EXP  7: 0.003901 (2^-8.002027)     1D01 ->      500

THE  8: 0.125000 (2^-3.000000)      500 ->      100
EXP  8: 0.124738 (2^-3.003025)      500 ->      100

THE  9: 0.250000 (2^-2.000000)      100 ->      100
EXP  9: 0.250225 (2^-1.998703)      100 ->      100

THE 10: 0.250000 (2^-2.000000)      100 ->      500
EXP 10: 0.249919 (2^-2.000468)      100 ->      500

OK

----- Begin search for clusters of trails -----
[./src/simon-xor-threshold-search.cc:887] trail_len 12
[./src/simon-xor-threshold-search.cc:934] Initial trail:
[./src/simon-xor-threshold-search.cc:629] Found 1 trails:
[    1]  400  100  100    0    0  100  100  400  400 1100 1100 4200 4200 1D01 1D01  500  500  100  100  100  100  500  500 1500  | 2^-34.000000
Probability of differential: 2^-34.000000

 */


/* --- */

	 // p_i >= p_min = Bn / p1 * p2 ... * p{i-1} * B{n-i} 
	 double p_min = 1.0;
	 for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
		p_min *= diff[i].p;
		printf("diff[%d] %f\n", i, diff[i].p);
	 }
	 printf("[%s:%d] p_min %f, B[%d] 2^%f\n", __FILE__, __LINE__, p_min, nrounds - 1 - (n + 1), log2(B[nrounds - 1 - (n + 1)]));
	 p_min = p_min * 1.0 * B[nrounds - 1 - (n + 1)]; 
	 printf("[%s:%d] p_min 2^%f\n", __FILE__, __LINE__, log2(p_min));
	 p_min = (B[nrounds - 1] * eps) / p_min;
	 printf("[%s:%d] p_min 2^%f\n", __FILE__, __LINE__, log2(p_min));
	 assert(p_min <= 1.0);

#if 1									  // DEBUG
	 printf("[%s:%d] n %d, B[%d] 2^%f, Bn_eps 2^%f, p_min 2^%f", __FILE__, __LINE__, n, nrounds-1, log2(B[nrounds - 1]), log2(B[nrounds - 1] * eps), log2(p_min));
	 printf("\n");
	 //	 exit(1);
#endif


/* --- */

#if 1									  // DEBUG
		  uint32_t key[SIMON_MAX_NROUNDS] = {0};
		  key[0] = random32() & MASK;
		  key[1] = random32() & MASK;
		  key[2] = random32() & MASK;
		  key[3] = random32() & MASK;
		  simon_verify_xor_trail(n, SIMON_NPAIRS, key, diff, input_diff.dy, lrot_const_s, lrot_const_t, lrot_const_u);
#endif


/* --- */

/* 
10 Round clustering Simon32

[./src/simon-xor-threshold-search.cc:629] Found 4 trails:
[    1] 8000 2200 2200  800  800  200  200    0    0  200  200  802  802 2200 2200 8000 8000 2202  | 2^-22.000000
[    2] 8000 2200 2200  800  800  200  200    0    0  200  200  800  800 2200 2200 8000 8000 2202  | 2^-20.000000
[    3] 8000 2200 2200  800  800  200  200    0    0  200  200  C00  C00 2200 2200 8000 8000 2202  | 2^-22.000000
[    4] 8000 2200 2200  800  800  200  200    0    0  200  200  C02  C02 2200 2200 8000 8000 2202  | 2^-23.000000
Probability of differential: 2^-19.299560


 */

/* --- */
#if 0
		  std::set<differential_t, struct_comp_diff_dx_dy>::iterator begin_iter = diff_set_dx_dy->begin();
		  if(p < Bn_eps) {
			 printf("[%s:%d] p %f, Bn_eps %f\n", __FILE__, __LINE__, p, Bn_eps);
		  }
		  assert(p >= Bn_eps);
#endif

/* --- */

	 // p_i >= p_min = Bn / p1 * p2 ... * p{i-1} * B{n-i} 
	 double p_min = 1.0;
	 for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
		p_min *= diff[i].p;
	 }

	 //	 p_min = p_min * 1.0 * B[nrounds - 1 - (n + 1)]; 
	 //	 p_min = B[n] / p_min;
#if 1
	 p_min = p_min * 1.0 * B[nrounds - 1 - (n + 1)]; 
#endif
	 // !!!
	 // p[0] * p[1] * p[n-1] * p_min <= (B[n] * eps)
#if 0
	 double Bn_eps = (B[n] * eps);
#else
	 double Bn_eps = (B[nrounds - 1] * eps);
	 //	 p_min = Bn_eps / p_min;
#endif

#if 0
	 if(p_min >= Bn_eps) {
#else


/* --- */
/*
11 Round clustering on Simon32

[./src/simon-xor-threshold-search.cc:629] Found 20 trails:
[    1]    2 8800 8800 2000 2000  800  800    0    0  800  800 2008 2008 8800 8800   82   82 8808 8808 2020 2020  888  | 2^-34.000000
[    2]    2 8800 8800 2000 2000  800  800    0    0  800  800 2000 2000 8820 8820   8A   8A 8808 8808 2020 2020  888  | 2^-35.000000
[    3]    2 8800 8800 2000 2000  800  800    0    0  800  800 2008 2008 8800 8800    2    2 8808 8808 2020 2020  888  | 2^-32.000000
[    4]    2 8800 8800 2000 2000  800  800    0    0  800  800 2000 2000 8800 8800    2    2 8808 8808 2020 2020  888  | 2^-30.000000
[    5]    2 8800 8800 2000 2000  800  800    0    0  800  800 2000 2000 8820 8820    A    A 8808 8808 2020 2020  888  | 2^-33.000000
[    6]    2 8800 8800 2000 2000  800  800    0    0  800  800 3008 3008 8820 8820    A    A 8808 8808 2020 2020  888  | 2^-36.000000
[    7]    2 8800 8800 2000 2000  800  800    0    0  800  800 3000 3000 8820 8820    A    A 8808 8808 2020 2020  888  | 2^-35.000000
[    8]    2 8800 8800 2000 2000  800  800    0    0  800  800 3008 3008 8800 8800    2    2 8808 8808 2020 2020  888  | 2^-33.000000
[    9]    2 8800 8800 2000 2000  800  800    0    0  800  800 3008 3008 8830 8830    A    A 8808 8808 2020 2020  888  | 2^-37.000000
[   10]    2 8800 8800 2000 2000  800  800    0    0  800  800 3000 3000 8800 8800    2    2 8808 8808 2020 2020  888  | 2^-32.000000
[   11]    2 8800 8800 2000 2000  800  800    0    0  800  800 2000 2000 8800 8800   82   82 8808 8808 2020 2020  888  | 2^-32.000000
[   12]    2 8800 8800 2000 2000  800  800    0    0  800  800 2008 2008 8830 8830   8A   8A 8808 8808 2020 2020  888  | 2^-38.000000
[   13]    2 8800 8800 2000 2000  800  800    0    0  800  800 3008 3008 8820 8820   8A   8A 8808 8808 2020 2020  888  | 2^-38.000000
[   14]    2 8800 8800 2000 2000  800  800    0    0  800  800 2008 2008 8820 8820    A    A 8808 8808 2020 2020  888  | 2^-35.000000
[   15]    2 8800 8800 2000 2000  800  800    0    0  800  800 3008 3008 8830 8830   8A   8A 8808 8808 2020 2020  888  | 2^-39.000000
[   16]    2 8800 8800 2000 2000  800  800    0    0  800  800 3000 3000 8800 8800   82   82 8808 8808 2020 2020  888  | 2^-34.000000
[   17]    2 8800 8800 2000 2000  800  800    0    0  800  800 2008 2008 8830 8830    A    A 8808 8808 2020 2020  888  | 2^-36.000000
[   18]    2 8800 8800 2000 2000  800  800    0    0  800  800 3000 3000 8820 8820   8A   8A 8808 8808 2020 2020  888  | 2^-37.000000
[   19]    2 8800 8800 2000 2000  800  800    0    0  800  800 3008 3008 8800 8800   82   82 8808 8808 2020 2020  888  | 2^-35.000000
[   20]    2 8800 8800 2000 2000  800  800    0    0  800  800 2008 2008 8820 8820   8A   8A 8808 8808 2020 2020  888  | 2^-37.000000
Sum: 2^-28.789329
*/

/* --- */
#if 0
			 hash_map_iter = trails_hash_map->begin();
			 printf("[%s%d] Found %d trails:\n", __FILE__, __LINE__, trails_hash_map->size());
			 uint32_t trail_cnt = 0;
			 while(hash_map_iter != trails_hash_map->end()) {
				double p_tot = 1.0;
				trail_cnt++;
				printf("[%5d] ", trail_cnt);
				for(uint32_t i = 0; i < trail_len; i++) {
				  printf("%4X  %4X ", (*(hash_map_iter->second))[i].dx, (*(hash_map_iter->second))[i].dy);
		p_tot *= (*(hash_map_iter->second))[i].p;
				}
				printf(" | 2^%f\n", log2(p_tot));
				hash_map_iter++;
			 }
#endif


/* --- */

#if 0
	 std::unordered_map<std::string, differential_t**>::const_iterator hash_map_iter = trails_hash_map->begin();
	 //  hash_map_iter = trails_hash_map.begin();
	 printf("[%s%d] Found %d trails:\n", __FILE__, __LINE__, trails_hash_map->size());
	 uint32_t trail_cnt = 0;
	 while(hash_map_iter != trails_hash_map->end()) {
		trail_cnt++;
		printf("[%5d] ", trail_cnt);
		for(uint32_t i = 0; i < trail_len; i++) {
		  printf("%4X %4X ", (*(hash_map_iter->second))[i].dx, (*(hash_map_iter->second))[i].dy);
		}
		printf("\n");
		hash_map_iter++;
	 }
#endif

#if 0
	 double p_tot = 1.0;
	 differential_t round_diffs[NROUNDS + 1] = {{0, 0, 0, 0.0}};
	 simon_trail_to_round_diffs(trail, round_diffs, trail_len, lrot_const_s, lrot_const_t, lrot_const_u);
	 printf("[%s:%d] Final trail (round differences):\n", __FILE__, __LINE__);
	 p_tot = 1.0;
	 for(uint32_t i = 0; i < (trail_len); i++) {
		printf("%2d: %8X -> %8X %f (2^%f)\n", i, round_diffs[i].dx, round_diffs[i].dy, round_diffs[i].p, log2(round_diffs[i].p));
		p_tot *= round_diffs[i].p;
	 }
	 printf("p_tot = %16.15f = 2^%f\n", p_tot, log2(p_tot));
#endif

	 printf("[%s:%d]  After simon_xor_cluster_trails()\n", __FILE__, __LINE__);

#if 1
	 //  std::unordered_map<std::string, differential_t*>::const_iterator hash_map_iter = trails_hash_map.begin();
	 hash_map_iter = trails_hash_map->begin();
	 printf("[%s%d] Found %d trails:\n", __FILE__, __LINE__, trails_hash_map->size());
	 trail_cnt = 0;
	 while(hash_map_iter != trails_hash_map->end()) {
		trail_cnt++;
		printf("[%5d] ", trail_cnt);
		for(uint32_t i = 0; i < trail_len; i++) {
		  printf("%4X %4X ", (*(hash_map_iter->second))[i].dx, (*(hash_map_iter->second))[i].dy);
		}
		printf("\n");
		hash_map_iter++;
	 }
#endif



/* --- */

/* 
11 Round clustering on Simon32

[./src/simon-xor-threshold-search.cc942] Found 20 trails:
[    1]    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8800 8800    82   82  8808 8808  2020 2020   888  | 2^-34.000000
[    2]    2  8800 8800  2000 2000   800  800     0    0   800  800  2000 2000  8820 8820    8A   8A  8808 8808  2020 2020   888  | 2^-35.000000
[    3]    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8800 8800     2    2  8808 8808  2020 2020   888  | 2^-32.000000
[    4]    2  8800 8800  2000 2000   800  800     0    0   800  800  2000 2000  8800 8800     2    2  8808 8808  2020 2020   888  | 2^-30.000000
[    5]    2  8800 8800  2000 2000   800  800     0    0   800  800  2000 2000  8820 8820     A    A  8808 8808  2020 2020   888  | 2^-33.000000
[    6]    2  8800 8800  2000 2000   800  800     0    0   800  800  3008 3008  8820 8820     A    A  8808 8808  2020 2020   888  | 2^-36.000000
[    7]    2  8800 8800  2000 2000   800  800     0    0   800  800  3000 3000  8820 8820     A    A  8808 8808  2020 2020   888  | 2^-35.000000
[    8]    2  8800 8800  2000 2000   800  800     0    0   800  800  3008 3008  8800 8800     2    2  8808 8808  2020 2020   888  | 2^-33.000000
[    9]    2  8800 8800  2000 2000   800  800     0    0   800  800  3008 3008  8830 8830     A    A  8808 8808  2020 2020   888  | 2^-37.000000
[   10]    2  8800 8800  2000 2000   800  800     0    0   800  800  3000 3000  8800 8800     2    2  8808 8808  2020 2020   888  | 2^-32.000000
[   11]    2  8800 8800  2000 2000   800  800     0    0   800  800  2000 2000  8800 8800    82   82  8808 8808  2020 2020   888  | 2^-32.000000
[   12]    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8830 8830    8A   8A  8808 8808  2020 2020   888  | 2^-38.000000
[   13]    2  8800 8800  2000 2000   800  800     0    0   800  800  3008 3008  8820 8820    8A   8A  8808 8808  2020 2020   888  | 2^-38.000000
[   14]    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8820 8820     A    A  8808 8808  2020 2020   888  | 2^-35.000000
[   15]    2  8800 8800  2000 2000   800  800     0    0   800  800  3008 3008  8830 8830    8A   8A  8808 8808  2020 2020   888  | 2^-39.000000
[   16]    2  8800 8800  2000 2000   800  800     0    0   800  800  3000 3000  8800 8800    82   82  8808 8808  2020 2020   888  | 2^-34.000000
[   17]    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8830 8830     A    A  8808 8808  2020 2020   888  | 2^-36.000000
[   18]    2  8800 8800  2000 2000   800  800     0    0   800  800  3000 3000  8820 8820    8A   8A  8808 8808  2020 2020   888  | 2^-37.000000
[   19]    2  8800 8800  2000 2000   800  800     0    0   800  800  3008 3008  8800 8800    82   82  8808 8808  2020 2020   888  | 2^-35.000000
[   20]    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8820 8820    8A   8A  8808 8808  2020 2020   888  | 2^-37.000000

 */

/* --- */



#if 0									  // cluster trails
  std::unordered_map<std::string, differential_t**> trails_hash_map;
  uint32_t init_round = 0;
  uint32_t dx_in = trail[0].dx;
  uint32_t dy_in = trail[1].dx ^ trail[0].dy;//trail[0].dy;
  trail[0].dy = trail[1].dx;

  differential_t input_diff = {dx_in, dy_in};
  uint32_t dx_out = trail[num_rounds - 1].dx;
  uint32_t dy_out = trail[num_rounds - 1].dy;
  differential_t output_diff = {dx_out, dy_out};
  double eps = SIMON_EPS;//1.0 / (double)(1UL << 10);//0.125;//SIMON_EPS

  uint32_t trail_len = num_rounds;
  std::string s_trail = trail_to_string(trail, trail_len);
  //  std::pair<std::string, differential_t*> new_trail(s_trail, trail);
  //  trails_hash_map->insert(new_trail);
  //  (*trails_hash_map)[s_trail] = trail;

  differential_t** new_trail;
  new_trail = (differential_t** )calloc(1, sizeof(differential_t*));
  *new_trail = (differential_t*)calloc(trail_len, sizeof(differential_t));
  for(uint32_t i = 0; i < trail_len; i++) {
	 (*new_trail)[i].dx = trail[i].dx;
	 (*new_trail)[i].dy = trail[i].dy;
	 (*new_trail)[i].p = trail[i].p;
  }

  std::pair<std::string, differential_t**> new_pair (s_trail,new_trail);
  trails_hash_map->insert(new_pair);


  printf("[%s:%d] OLD Before simon_xor_cluster_trails()\n", __FILE__, __LINE__);

#if 1
  std::unordered_map<std::string, differential_t**>::const_iterator hash_map_iter = trails_hash_map->begin();
  //  hash_map_iter = trails_hash_map.begin();
  printf("[%s%d] Found %d trails:\n", __FILE__, __LINE__, trails_hash_map->size());
  uint32_t trail_cnt = 0;
  while(hash_map_iter != trails_hash_map->end()) {
	 trail_cnt++;
	 printf("[%5d] ", trail_cnt);
	 for(uint32_t i = 0; i < trail_len; i++) {
		printf("%4X %4X ", (*(hash_map_iter->second))[i].dx, (*(hash_map_iter->second))[i].dy);
	 }
	 printf("\n");
	 hash_map_iter++;
  }
#endif

#if 0
  double p_tot = 1.0;
  differential_t round_diffs[NROUNDS + 1] = {{0, 0, 0, 0.0}};
  simon_trail_to_round_diffs(trail, round_diffs, num_rounds, lrot_const_s, lrot_const_t, lrot_const_u);
  printf("[%s:%d] Final trail (round differences):\n", __FILE__, __LINE__);
  p_tot = 1.0;
  for(uint32_t i = 0; i < (num_rounds); i++) {
	 printf("%2d: %8X -> %8X %f (2^%f)\n", i, round_diffs[i].dx, round_diffs[i].dy, round_diffs[i].p, log2(round_diffs[i].p));
	 p_tot *= round_diffs[i].p;
  }
  printf("p_tot = %16.15f = 2^%f\n", p_tot, log2(p_tot));
#endif

  assert(dyy_init == 0);

  simon_xor_cluster_trails(init_round, num_rounds, B, diff, trail, trails_hash_map, dyy_init, input_diff, output_diff, lrot_const_s, lrot_const_t, lrot_const_u, &diff_mset_p, &diff_set_dx_dy, &croads_diff_mset_p, &croads_diff_set_dx_dy, eps);
  //  simon_xor_cluster_trails(init_round, num_rounds, B, diff, round_diffs, trails_hash_map, dyy_init, input_diff, output_diff, lrot_const_s, lrot_const_t, lrot_const_u, &diff_mset_p, &diff_set_dx_dy, &croads_diff_mset_p, &croads_diff_set_dx_dy, eps);

  printf("[%s:%d]  After simon_xor_cluster_trails()\n", __FILE__, __LINE__);

#if 1
  //  std::unordered_map<std::string, differential_t*>::const_iterator hash_map_iter = trails_hash_map.begin();
  hash_map_iter = trails_hash_map->begin();
  printf("[%s%d] Found %d trails:\n", __FILE__, __LINE__, trails_hash_map->size());
  trail_cnt = 0;
  while(hash_map_iter != trails_hash_map->end()) {
	 trail_cnt++;
	 printf("[%5d] ", trail_cnt);
	 for(uint32_t i = 0; i < trail_len; i++) {
		printf("%4X %4X ", (*(hash_map_iter->second))[i].dx, (*(hash_map_iter->second))[i].dy);
	 }
	 printf("\n");
	 hash_map_iter++;
  }
#endif


#endif
  printf("[%s:%d] num_rounds %d\n", __FILE__, __LINE__, num_rounds);



/* --- */

#if 0
  std::unordered_map<std::string, differential_t**>::const_iterator hash_map_iter = trails_hash_map.begin();
  printf("[%s:%d] Found %d trails:\n", __FILE__, __LINE__, trails_hash_map.size());
  uint32_t trail_cnt = 0;
  uint32_t trail_len = nrounds_full;
  while(hash_map_iter != trails_hash_map.end()) {
	 trail_cnt++;
	 printf("[%5d] ", trail_cnt);
	 double p = 1.0;
	 for(uint32_t i = 0; i < trail_len; i++) {
		//		printf("%4X %4X ", hash_map_iter->second[i].dx, hash_map_iter->second[i].dy);
		printf("%4X %4X ", (*(hash_map_iter->second))[i].dx, (*(hash_map_iter->second))[i].dy);
		p *= (*(hash_map_iter->second))[i].p;
	 }
	 printf(" | 2^%f\n", log2(p));
	 hash_map_iter++;
  }
#endif


/* --- */


/*
  [./src/simon-xor-threshold-search.cc924] Found 20 trails:

2^-36 + 3*2^-38 + 2*2^-39 + 2*2^-40 + 2*2^-41 + 2*2^-42 + 3*2^-43 + 2*2^-44 + 2^-45


[   14] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2000 2000  8800 8800     2    2  8808 8808  2020 2020   888  | 2^-36.000000

[    4] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2000 2000  8800 8800    82   82  8808 8808  2020 2020   888  | 2^-38.000000
[    5] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8800 8800     2    2  8808 8808  2020 2020   888  | 2^-38.000000
[    8] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3000 3000  8800 8800     2    2  8808 8808  2020 2020   888  | 2^-38.000000

[   17] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2000 2000  8820 8820     A    A  8808 8808  2020 2020   888  | 2^-39.000000
[   18] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3008 3008  8800 8800     2    2  8808 8808  2020 2020   888  | 2^-39.000000

[    3] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8800 8800    82   82  8808 8808  2020 2020   888  | 2^-40.000000
[   11] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3000 3000  8800 8800    82   82  8808 8808  2020 2020   888  | 2^-40.000000

[    9] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8820 8820     A    A  8808 8808  2020 2020   888  | 2^-41.000000
[   13] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3008 3008  8800 8800    82   82  8808 8808  2020 2020   888  | 2^-41.000000
[   16] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2000 2000  8820 8820    8A   8A  8808 8808  2020 2020   888  | 2^-41.000000
[   20] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3000 3000  8820 8820     A    A  8808 8808  2020 2020   888  | 2^-41.000000

[   15] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3008 3008  8820 8820     A    A  8808 8808  2020 2020   888  | 2^-42.000000
[   10] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8830 8830     A    A  8808 8808  2020 2020   888  | 2^-42.000000

[    6] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3000 3000  8820 8820    8A   8A  8808 8808  2020 2020   888  | 2^-43.000000
[    2] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8820 8820    8A   8A  8808 8808  2020 2020   888  | 2^-43.000000
[   19] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3008 3008  8830 8830     A    A  8808 8808  2020 2020   888  | 2^-43.000000

[    7] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8830 8830    8A   8A  8808 8808  2020 2020   888  | 2^-44.000000
[   12] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3008 3008  8820 8820    8A   8A  8808 8808  2020 2020   888  | 2^-44.000000

[    1] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3008 3008  8830 8830    8A   8A  8808 8808  2020 2020   888  | 2^-45.000000

*/
/* --- */

/* 
	[./src/simon-xor-threshold-search.cc:924] Does not match output diffs: (    20B8,     AE8) vs. (    2020,     888)[./src/simon-xor-threshold-search.cc:885] Add new trail: 2^-40.000000
	[./src/simon-xor-threshold-search.cc:900]     8808        2 0.015625
	[./src/simon-xor-threshold-search.cc:900]        2     8800 0.250000
	[./src/simon-xor-threshold-search.cc:900]     8800     2000 0.062500
	[./src/simon-xor-threshold-search.cc:900]     2000      800 0.250000
	[./src/simon-xor-threshold-search.cc:900]      800        0 0.250000
	[./src/simon-xor-threshold-search.cc:900]        0      800 1.000000
	[./src/simon-xor-threshold-search.cc:900]      800     2008 0.250000
	[./src/simon-xor-threshold-search.cc:900]     2008     8800 0.062500
	[./src/simon-xor-threshold-search.cc:900]     8800       82 0.062500
	[./src/simon-xor-threshold-search.cc:900]       82     8808 0.062500
	[./src/simon-xor-threshold-search.cc:900]     8808     2020 0.015625
	[./src/simon-xor-threshold-search.cc:900]     2020      888 0.062500

	[./src/simon-xor-threshold-search.cc906] Found 14 trails:

[    1] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8820 8820    8A   8A  8808 8808  2020 2020   888
[    2] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8800 8800    82   82  8808 8808  2020 2020   888
[    3] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2000 2000  8800 8800    82   82  8808 8808  2020 2020   888
[    4] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8800 8800     2    2  8808 8808  2020 2020   888
[    5] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3000 3000  8820 8820    8A   8A  8808 8808  2020 2020   888
[    6] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8830 8830    8A   8A  8808 8808  2020 2020   888
[    7] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3000 3000  8800 8800     2    2  8808 8808  2020 2020   888
[    8] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8820 8820     A    A  8808 8808  2020 2020   888
[    9] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2008 2008  8830 8830     A    A  8808 8808  2020 2020   888
[   10] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3000 3000  8800 8800    82   82  8808 8808  2020 2020   888
[   11] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2000 2000  8800 8800     2    2  8808 8808  2020 2020   888
[   12] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2000 2000  8820 8820    8A   8A  8808 8808  2020 2020   888
[   13] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  2000 2000  8820 8820     A    A  8808 8808  2020 2020   888
[   14] 8808     2    2  8800 8800  2000 2000   800  800     0    0   800  800  3000 3000  8820 8820     A    A  8808 8808  2020 2020   888


 */
/* --- */

/**
 * Experimentally verify the differential probability of one round of Simon.
 */
double dp_simon_rpind_exper(uint32_t )
{
	 uint32_t dx_out = trail[i].dy;
	 uint32_t dy_out = trail[i].dx;

	 for(uint64_t j = 0; j < npairs; j++) {
		uint32_t x1 = random32() & MASK;
		uint32_t x2 = XOR(x1, dx_in);

		uint32_t y1 = random32() & MASK;
		uint32_t y2 = XOR(y1, dy_in);

		simon_encrypt(key, one_round, &x1, &y1);
		simon_encrypt(key, one_round, &x2, &y2);

		uint32_t dx_ctext = XOR(x1, x2);
		uint32_t dy_ctext = XOR(y1, y2);

		if((dx_ctext == dx_out) && (dy_ctext == dy_out)) {
		  cnt++;
		}

	 }
	 double p_exp = (double)cnt / (double)npairs;;
}


/* --- */

  if((n == 0) && (nrounds > 1)) {						  // Round-0 and not last round
	 bool b_end = false;
	 std::multiset<differential_t, struct_comp_diff_p>::iterator mset_iter = diff_mset_p->begin();
	 uint32_t cnt = 0;
	 while((mset_iter != diff_mset_p->end()) && (!b_end)) {
		uint32_t dx = mset_iter->dx; // alpha
		uint32_t dy = mset_iter->dy; // gamma
		pn = mset_iter->p;
		uint32_t dxx = dy ^ dyy_init ^ LROT(dx, lrot_const_u); // dx_{i+1} = gamma ^ dy_i ^ (alpha <<< 2)
		double p = pn * B[nrounds - 1 - (n + 1)];
		assert(B[nrounds - 1 - (n + 1)] != 0.0);
		std::multiset<differential_t, struct_comp_diff_p>::iterator begin_iter = diff_mset_p->begin();
#if 0									  // DEBUG
		printf("\r[%s:%d] %2d: [%2d / %2d] %8X -> %8X, 2^%f, 2^%f", __FILE__, __LINE__, n, cnt, diff_mset_p->size(), dx, dy, log2(pn), log2(*Bn));
		fflush(stdout);
#endif
		if((p >= *Bn) && (p != 0.0)) {
		  diff[n].dx = dx;		  // dx_{i}
		  diff[n].dy = dxx;		  // dx_{i+1}
		  diff[n].p = pn;
		  simon_xor_threshold_search(n+1, nrounds, B, Bn, diff, trail, dyy_init, lrot_const_s, lrot_const_t, lrot_const_u, diff_mset_p, diff_set_dx_dy, croads_diff_mset_p, croads_diff_set_dx_dy, p_thres);
		} else {
		  b_end = true;
		}
		if(begin_iter != diff_mset_p->begin()) { // if the root was updated, start from beginning
		  mset_iter = diff_mset_p->begin();
		  printf("[%s:%d] Return to beginning\n", __FILE__, __LINE__);
		  cnt = 0;
		} else {
		  mset_iter++;
		  cnt++;
		}
	 }
  }

  if((n == 1) && (n != (nrounds - 1))) {						  // Round-1 and not last round
	 bool b_end = false;
	 uint32_t cnt = 0;
	 std::multiset<differential_t, struct_comp_diff_p>::iterator mset_iter = diff_mset_p->begin();
	 while((mset_iter != diff_mset_p->end()) && (!b_end)) {
		uint32_t dx = mset_iter->dx; // alpha = dx_{i}
		uint32_t dy = mset_iter->dy; // gamma
		pn = mset_iter->p;
		uint32_t dyy = diff[n-1].dx; // dy_{i} = dx_{i-1}
		uint32_t dxx = dy ^ dyy ^ LROT(dx, lrot_const_u); // dx_{i+1} = gamma ^ dx_{i-1} ^ (alpha <<< 2)
		double p = diff[0].p * pn * B[nrounds - 1 - (n + 1)];
		std::multiset<differential_t, struct_comp_diff_p>::iterator begin_iter = diff_mset_p->begin();
#if 0									  // DEBUG
		printf("\r[%s:%d] %2d: [%2d / %2d] %8X -> %8X, 2^%f, 2^%f", __FILE__, __LINE__, n, cnt, diff_mset_p->size(), dx, dy, log2(pn), log2(*Bn));
		fflush(stdout);
#endif
		if((p >= *Bn) && (p != 0.0)) {
		  diff[n].dx = dx;		  // dx_{i}
		  diff[n].dy = dxx;		  // dx_{i+1}
		  diff[n].p = pn;
		  simon_xor_threshold_search(n+1, nrounds, B, Bn, diff, trail, dyy_init, lrot_const_s, lrot_const_t, lrot_const_u, diff_mset_p, diff_set_dx_dy, croads_diff_mset_p, croads_diff_set_dx_dy, p_thres);
		} else {
		  b_end = true;
		} 
		if(begin_iter != diff_mset_p->begin()) { // if the root was updated, start from beginning
		  mset_iter = diff_mset_p->begin();
		  printf("[%s:%d] Return to beginning\n", __FILE__, __LINE__);
		  cnt = 0;
		} else {
		  mset_iter++;
		  cnt++;
		}
	 }	// while()
  }




/* --- */


 std::string s = trail_to_string(trail, trail_len);
 printf("%s\n", s.c_str());
 // std::cout << std::hex << s;
 std::hash<std::string> H;
 std::cout << H(s) << "\n";

 std::unordered_map<std::string, int> map;
 map["string"] = 10;

 std::unordered_map<std::string, differential_t*> trail_hash_map;
 // map["string"] = 10;

 trail_hash_map[s] = trail;



/* --- */

/* 
	[./tests/speck-xor-threshold-search-tests.cc:66] Final trail:
 0: 40020092 -> 10420000 1.000000
 1: 82020200 ->   120200 0.031250 (2^-5.000000)
 2:   900002 ->     1002 0.031250 (2^-5.000000)
 3:  2008002 ->  2000012 0.062500 (2^-4.000000)
 4:    20092 -> 10020002 0.031250 (2^-5.000000)
 5: 82020202 ->  2120212 0.031250 (2^-5.000000)
 6:   900010 -> 10001080 0.007812 (2^-7.000000)
 7:     8080 -> 80000480 0.062500 (2^-4.000000)
 8:      400 ->     2004 0.250000 (2^-2.000000)
 9:     2000 ->    12020 0.250000 (2^-2.000000)
10:    12000 ->    82100 0.125000 (2^-3.000000)
11:    82020 ->   492820 0.062500 (2^-4.000000)
12: 20492000 -> 22006100 0.007812 (2^-7.000000)
13: 22202820 -> 32232021 0.003906 (2^-8.000000)
p_tot = 0.000000000000000 = 2^-61.000000
[./tests/speck-xor-threshold-search-tests.cc:91] key
key[0] = 0xCE0F0240;
key[1] = 0xA0D6CE40;
key[2] = 0x4FF69FE1;
key[3] = 0xF304056B;
[./tests/speck-xor-threshold-search-tests.cc:95] Print in LaTeX in file log.txt:

real    439m4.346s
user    352m52.651s
sys     19m16.860s

 */

/* 
SPECK XOR THRES, p_thres 0.1, max diffs 2^30, word size = 24, 32

%------------------------
\toprule
$r$ & $\Delta X_{\mathrm{L}}$ & $\Delta X_{\mathrm{R}}$ & $\mathrm{log}_2 p$\\
\midrule
$ 0$ & \texttt{80000410} & \texttt{90000000} & $-0.00$ \\
$ 1$ & \texttt{80800004} & \texttt{  800000} & $-3.00$ \\
$ 2$ & \texttt{ 4008000} & \texttt{    8000} & $-3.00$ \\
$ 3$ & \texttt{   48080} & \texttt{    8080} & $-3.00$ \\
$ 4$ & \texttt{80008400} & \texttt{80048000} & $-3.00$ \\
$ 5$ & \texttt{80848084} & \texttt{80A08080} & $-5.00$ \\
$ 6$ & \texttt{ 4200400} & \texttt{ 1240004} & $-6.00$ \\
$ 7$ & \texttt{ 1202000} & \texttt{ 8002020} & $-5.00$ \\
$ 8$ & \texttt{ 8010000} & \texttt{48000100} & $-4.00$ \\
$ 9$ & \texttt{48080000} & \texttt{ 8080802} & $-4.00$ \\
$10$ & \texttt{ 8400002} & \texttt{48004012} & $-5.00$ \\
\midrule
 $\sum_{r}\mathrm{log}_2 p_r$ & & & $-41.00$ \\
 $\mathrm{log}_2 (p_{\mathrm{thres}})$ & & & $-3.32$ \\
 $\#{\mathrm{hways}}$ & & & $1073741824$ \\
 Time: & & & $0.0$ min.\\
\bottomrule
% WORD_SIZE = 32, SPECK_P_THRES = 0.100000, SPECK_MAX_DIFF_CNT = 2^30.000000, RIGHT_ROT_CONST = 8, LEFT_ROT_CONST = 3, NROUNDS = 10

%------------------------
\toprule
$r$ & $\Delta X_{\mathrm{L}}$ & $\Delta X_{\mathrm{R}}$ & $\mathrm{log}_2 p$\\
\midrule
$ 0$ & \texttt{   20082} & \texttt{  120200} & $-0.00$ \\
$ 1$ & \texttt{  900000} & \texttt{    1000} & $-3.00$ \\
$ 2$ & \texttt{    8000} & \texttt{       0} & $-2.00$ \\
$ 3$ & \texttt{      80} & \texttt{      80} & $-1.00$ \\
$ 4$ & \texttt{  800080} & \texttt{  800480} & $-1.00$ \\
$ 5$ & \texttt{    8480} & \texttt{    A084} & $-3.00$ \\
$ 6$ & \texttt{  80A000} & \texttt{  85A420} & $-4.00$ \\
$ 7$ & \texttt{  842480} & \texttt{  A90584} & $-7.00$ \\
$ 8$ & \texttt{  2880A0} & \texttt{  60AC85} & $-9.00$ \\
$ 9$ & \texttt{    8405} & \texttt{   5E02E} & $-9.00$ \\
\midrule
 $\sum_{r}\mathrm{log}_2 p_r$ & & & $-39.00$ \\
 $\mathrm{log}_2 (p_{\mathrm{thres}})$ & & & $-3.32$ \\
 $\#{\mathrm{hways}}$ & & & $1073741824$ \\
 Time: & & & $0.0$ min.\\
\bottomrule
% WORD_SIZE = 24, SPECK_P_THRES = 0.100000, SPECK_MAX_DIFF_CNT = 2^30.000000, RIGHT_ROT_CONST = 8, LEFT_ROT_CONST = 3, NROUNDS = 44


 */



/* --- */

/* 
--- 20130620, 10:30am ---

 0:     8000 ->       83 0.250000 (2^-2.000000)
 1:     A200 ->      800 0.031250 (2^-5.000000)
 2:      800 ->     8200 0.250000 (2^-2.000000)
 3:     8200 ->       80 0.062500 (2^-4.000000)
 4:       80 ->        0 0.250000 (2^-2.000000)
 5:        0 ->       80 1.000000 (2^0.000000)
 6:       80 ->      200 0.250000 (2^-2.000000)
 7:      200 ->      880 0.250000 (2^-2.000000)
 8:      880 ->     2000 0.062500 (2^-4.000000)
 9:     2000 ->     8880 0.250000 (2^-2.000000)
p_tot = 0.000000029802322 = 2^-25.000000, Bn = 0.000000 = 2^-25.000000
[./src/simon-xor-ddt-search.cc:430] nrounds = 11, Bn_init = 2^-31.000000 : key     B30C     3FF7     ECC5     C54C
[./src/simon-xor-ddt-search.cc:330] 10 | Update best found Bn: 2^-31.000000 -> 2^-30.000000
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-25.000000
B[10] = 2^-30.000000
0:     A000 ->     8002 0.125000 (2^-3.000000)
1:     2200 ->     2800 0.062500 (2^-4.000000)
2:     2800 ->     8200 0.125000 (2^-3.000000)
3:     8200 ->     2080 0.062500 (2^-4.000000)
4:     2080 ->       20 0.062500 (2^-4.000000)
5:       20 ->        0 0.250000 (2^-2.000000)
6:        0 ->       20 1.000000 (2^0.000000)
7:       20 ->       80 0.250000 (2^-2.000000)
8:       80 ->      220 0.250000 (2^-2.000000)
9:      220 ->      800 0.062500 (2^-4.000000)
10:      800 ->     2220 0.250000 (2^-2.000000)
p_tot = 0.000000000931323 = 2^-30.000000, Bn = 0.000000 = 2^-30.000000
[./src/simon-xor-ddt-search.cc:430] nrounds = 12, Bn_init = 2^-36.000000 : key     B30C     3FF7     ECC5     C54C
[./src/simon-xor-ddt-search.cc:330] 11 | Update best found Bn: 2^-36.000000 -> 2^-35.000000
[./src/simon-xor-ddt-search.cc:330] 11 | Update best found Bn: 2^-35.000000 -> 2^-34.000000


B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-25.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
[./tests/simon-xor-threshold-search-tests.cc:142] Final trail (round differences):
0:     A000 ->     A002 1.000000 (2^0.000000)
1:     2000 ->     A000 0.125000 (2^-3.000000)
2:     2000 ->     2000 0.250000 (2^-2.000000)
3:     A000 ->     2000 0.250000 (2^-2.000000)
4:     A023 ->     A000 0.125000 (2^-3.000000)
5:     4008 ->     A023 0.003906 (2^-8.000000)
6:     2002 ->     4008 0.062500 (2^-4.000000)
7:     8000 ->     2002 0.062500 (2^-4.000000)
8:     2000 ->     8000 0.250000 (2^-2.000000)
9:        0 ->     2000 0.250000 (2^-2.000000)
10:     2000 ->        0 1.000000 (2^0.000000)
11:     8000 ->     2000 0.250000 (2^-2.000000)
12:     2083 ->     8000 0.250000 (2^-2.000000)
p_tot = 0.000000000058208 = 2^-34.000000
[./tests/simon-xor-threshold-search-tests.cc:151] key
key[0] = 0xB30C;
key[1] = 0x3FF7;
key[2] = 0xECC5;
key[3] = 0xC54C;
[./tests/simon-xor-threshold-search-tests.cc:test_simon_xor_ddt_trail_search():155] Print in LaTeX in file log.txt:

real    714m28.825s
user    710m5.443s
sys     2m13.368s

 */


/* --- */

/* 
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-25.000000
B[10] = 2^-30.000000
 0:     A000 ->     8002 0.125000 (2^-3.000000)
 1:     2200 ->     2800 0.062500 (2^-4.000000)
 2:     2800 ->     8200 0.125000 (2^-3.000000)
 3:     8200 ->     2080 0.062500 (2^-4.000000)
 4:     2080 ->       20 0.062500 (2^-4.000000)
 5:       20 ->        0 0.250000 (2^-2.000000)
 6:        0 ->       20 1.000000 (2^0.000000)
 7:       20 ->       80 0.250000 (2^-2.000000)
 8:       80 ->      220 0.250000 (2^-2.000000)
 9:      220 ->      800 0.062500 (2^-4.000000)
10:      800 ->     2220 0.250000 (2^-2.000000)
p_tot = 0.000000000931323 = 2^-30.000000, Bn = 0.000000 = 2^-30.000000
[./src/simon-xor-ddt-search.cc:430] nrounds = 12, Bn_init = 2^-36.000000 : key     B30C     3FF7     ECC5     C54C
[./src/simon-xor-ddt-search.cc:330] 11 | Update best found Bn: 2^-36.000000 -> 2^-35.000000
[./src/simon-xor-ddt-search.cc:330] 11 | Update best found Bn: 2^-35.000000 -> 2^-34.000000


 */


/* --- */

/* 
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-25.000000
 0:     8000 ->       83 0.250000 (2^-2.000000)
 1:     A200 ->      800 0.031250 (2^-5.000000)
 2:      800 ->     8200 0.250000 (2^-2.000000)
 3:     8200 ->       80 0.062500 (2^-4.000000)
 4:       80 ->        0 0.250000 (2^-2.000000)
 5:        0 ->       80 1.000000 (2^0.000000)
 6:       80 ->      200 0.250000 (2^-2.000000)
 7:      200 ->      880 0.250000 (2^-2.000000)
 8:      880 ->     2000 0.062500 (2^-4.000000)
 9:     2000 ->     8880 0.250000 (2^-2.000000)
p_tot = 0.000000029802322 = 2^-25.000000, Bn = 0.000000 = 2^-25.000000
[./src/simon-xor-ddt-search.cc:429] nrounds = 11, Bn_init = 2^-31.000000 : key     70A5     44D8     9FE7     CAB3

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-25.000000
B[10] = 2^-30.000000
0:     A000 ->     8002 0.125000 (2^-3.000000)
1:     2200 ->     2800 0.062500 (2^-4.000000)
2:     2800 ->     8200 0.125000 (2^-3.000000)
3:     8200 ->     2080 0.062500 (2^-4.000000)
4:     2080 ->       20 0.062500 (2^-4.000000)
5:       20 ->        0 0.250000 (2^-2.000000)
6:        0 ->       20 1.000000 (2^0.000000)
7:       20 ->       80 0.250000 (2^-2.000000)
8:       80 ->      220 0.250000 (2^-2.000000)
9:      220 ->      800 0.062500 (2^-4.000000)
10:      800 ->     2220 0.250000 (2^-2.000000)
p_tot = 0.000000000931323 = 2^-30.000000, Bn = 0.000000 = 2^-30.000000
[./src/simon-xor-ddt-search.cc:429] nrounds = 12, Bn_init = 2^-36.000000 : key     70A5     44D8     9FE7     CAB3


----- End search -----
[./tests/simon-xor-threshold-search-tests.cc:130] Final bounds:
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-25.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
[./tests/simon-xor-threshold-search-tests.cc:142] Final trail (round differences):
0:     A000 ->     A002 1.000000 (2^0.000000)
1:     2000 ->     A000 0.125000 (2^-3.000000)
2:     2000 ->     2000 0.250000 (2^-2.000000)
3:     A000 ->     2000 0.250000 (2^-2.000000)
4:     A023 ->     A000 0.125000 (2^-3.000000)
5:     4008 ->     A023 0.003906 (2^-8.000000)
6:     2002 ->     4008 0.062500 (2^-4.000000)
7:     8000 ->     2002 0.062500 (2^-4.000000)
8:     2000 ->     8000 0.250000 (2^-2.000000)
9:        0 ->     2000 0.250000 (2^-2.000000)
10:     2000 ->        0 1.000000 (2^0.000000)
11:     8000 ->     2000 0.250000 (2^-2.000000)
12:     2083 ->     8000 0.250000 (2^-2.000000)
p_tot = 0.000000000058208 = 2^-34.000000
[./tests/simon-xor-threshold-search-tests.cc:151] key
key[0] = 0x70A5;
key[1] = 0x44D8;
key[2] = 0x9FE7;
key[3] = 0xCAB3;
[./tests/simon-xor-threshold-search-tests.cc:test_simon_xor_ddt_trail_search():155] Print in LaTeX in file log.txt:

real    602m36.468s
user    599m14.975s
sys     2m6.232s
vvelichkov@r-cluster1-1:~/skcrypto/trunk/work/src/yaarx$ 

----- End search -----
[./tests/simon-xor-threshold-search-tests.cc:130] Final bounds:
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-25.000000
B[10] = 2^-30.000000
B[11] = 2^-34.000000
[./tests/simon-xor-threshold-search-tests.cc:142] Final trail (round differences):
0:     A000 ->     A002 1.000000 (2^0.000000)
1:     2000 ->     A000 0.125000 (2^-3.000000)
2:     2000 ->     2000 0.250000 (2^-2.000000)
3:     A000 ->     2000 0.250000 (2^-2.000000)
4:     A023 ->     A000 0.125000 (2^-3.000000)
5:     4008 ->     A023 0.003906 (2^-8.000000)
6:     2002 ->     4008 0.062500 (2^-4.000000)
7:     8000 ->     2002 0.062500 (2^-4.000000)
8:     2000 ->     8000 0.250000 (2^-2.000000)
9:        0 ->     2000 0.250000 (2^-2.000000)
10:     2000 ->        0 1.000000 (2^0.000000)
11:     8000 ->     2000 0.250000 (2^-2.000000)
12:     2083 ->     8000 0.250000 (2^-2.000000)
p_tot = 0.000000000058208 = 2^-34.000000
[./tests/simon-xor-threshold-search-tests.cc:151] key
key[0] = 0x70A5;
key[1] = 0x44D8;
key[2] = 0x9FE7;
key[3] = 0xCAB3;
[./tests/simon-xor-threshold-search-tests.cc:test_simon_xor_ddt_trail_search():155] Print in LaTeX in file log.txt:

real    602m36.468s
user    599m14.975s
sys     2m6.232s
vvelichkov@r-cluster1-1:~/skcrypto/trunk/work/src/yaarx$ 


 */

/* ---- */

/* 
B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
 0:     2200 ->     8800 0.062500 (2^-4.000000)
 1:      800 ->      200 0.250000 (2^-2.000000)
 2:      200 ->        0 0.250000 (2^-2.000000)
 3:        0 ->      200 1.000000 (2^0.000000)
 4:      200 ->      800 0.250000 (2^-2.000000)
 5:      800 ->     2200 0.250000 (2^-2.000000)
 6:     2200 ->     8000 0.062500 (2^-4.000000)
 7:     8000 ->     2202 0.250000 (2^-2.000000)
p_tot = 0.000003814697266 = 2^-18.000000, Bn = 0.000004 = 2^-18.000000
[./src/simon-xor-threshold-search.cc:654] nrounds = 9, Bn_init = 2^-24.000000 : key     6B90     F09F     AEA2     1292                                   
 */

/* --- */

void simon_one_round_ddt_file(FILE* fp)
{
     FILE *wfp1;
     wfp1 = fopen ("sum.asc", "w");
     gsl_vector_fprintf (wfp1, *mv, "%.5g");
     fclose (wfp1); 
}


/* --- */

/*

Connection to e-cluster1-11

DDT for n = 16 bits, p_thres = 0.0

Initial set sizes: Dp 118359745, Dxy 118359745

B[ 0] = 2^0.000000
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
 0:     4004 ->       11 0.062500 (2^-4.000000)
 1:        1 ->     4000 0.250000 (2^-2.000000)
 2:     4000 ->        0 0.250000 (2^-2.000000)
 3:        0 ->     4000 1.000000 (2^0.000000)
 4:     4000 ->        1 0.250000 (2^-2.000000)
 5:        1 ->     4004 0.250000 (2^-2.000000)
p_tot = 0.000244140625000 = 2^-12.000000, Bn = 0.000244 = 2^-12.000000

*/
/* --- */

void simon_one_round_ddt(double D[ALL_WORDS][ALL_WORDS])
{

}




/* --- */

#define TRAIL_LEN 18

uint32_t g_trail[18][2] = {
  {0x20002020, 0x88008808},
  { 0x8000888, 0x20002020},
  {     0x200,  0x8000888},
  { 0x8000088,      0x200},
  {0x20000020,  0x8000088},
  {0x88000008, 0x20000020},
  {       0x2, 0x88000008},
  {0x88000000,        0x2},
  {0x20000000, 0x88000000},
  { 0x8000000, 0x20000000},
  {       0x0,  0x8000000},
  { 0x8000000,        0x0},
  {0x20000000, 0x 8000000},
  {0x88000000, 0x20000000},
  {       0x2, 0x88000000},
  {0x88000008,        0x2},
  {0x20000020, 0x88000008},
  { 0x8000088, 0x20000020},
  {     0x200,  0x8000088}
};



/* --- */

/* 


[./tests/simon-xor-threshold-search-tests.cc:81] Final trail (round differences):
0: 20002020 -> 88008808 1.000000 (2^0.000000)
1:  8000888 -> 20002020 0.015625 (2^-6.000000)
2:      200 ->  8000888 0.003906 (2^-8.000000)
3:  8000088 ->      200 0.250000 (2^-2.000000)
4: 20000020 ->  8000088 0.015625 (2^-6.000000)
5: 88000008 -> 20000020 0.062500 (2^-4.000000)
6:        2 -> 88000008 0.015625 (2^-6.000000)
7: 88000000 ->        2 0.250000 (2^-2.000000)
8: 20000000 -> 88000000 0.062500 (2^-4.000000)
9:  8000000 -> 20000000 0.250000 (2^-2.000000)
10:        0 ->  8000000 0.250000 (2^-2.000000)
11:  8000000 ->        0 1.000000 (2^0.000000)
12: 20000000 ->  8000000 0.250000 (2^-2.000000)
13: 88000000 -> 20000000 0.250000 (2^-2.000000)
14:        2 -> 88000000 0.062500 (2^-4.000000)
15: 88000008 ->        2 0.250000 (2^-2.000000)
16: 20000020 -> 88000008 0.015625 (2^-6.000000)
17:  8000088 -> 20000020 0.062500 (2^-4.000000)
18:      200 ->  8000088 0.015625 (2^-6.000000)
p_tot = 0.000000000000000 = 2^-68.000000

---

B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-24.000000
B[ 9] = 2^-28.000000
B[10] = 2^-30.000000
B[11] = 2^-36.000000
B[12] = 2^-38.000000
B[13] = 2^-44.000000
B[14] = 2^-48.000000
B[15] = 2^-54.000000
B[16] = 2^-62.000000
B[17] = 2^-68.000000
 0: 20002020 -> 80008080 0.015625 (2^-6.000000)
 1:  8000888 ->      200 0.003906 (2^-8.000000)
 2:      200 ->  8000088 0.250000 (2^-2.000000)
 3:  8000088 -> 20000020 0.015625 (2^-6.000000)
 4: 20000020 -> 88000008 0.062500 (2^-4.000000)
 5: 88000008 ->        2 0.015625 (2^-6.000000)
 6:        2 -> 88000000 0.250000 (2^-2.000000)
 7: 88000000 -> 20000000 0.062500 (2^-4.000000)
 8: 20000000 ->  8000000 0.250000 (2^-2.000000)
 9:  8000000 ->        0 0.250000 (2^-2.000000)
10:        0 ->  8000000 1.000000 (2^0.000000)
11:  8000000 -> 20000000 0.250000 (2^-2.000000)
12: 20000000 -> 88000000 0.250000 (2^-2.000000)
13: 88000000 ->        2 0.062500 (2^-4.000000)
14:        2 -> 88000008 0.250000 (2^-2.000000)
15: 88000008 -> 20000020 0.015625 (2^-6.000000)
16: 20000020 ->  8000088 0.062500 (2^-4.000000)
17:  8000088 ->      200 0.015625 (2^-6.000000)
p_tot = 0.000000000000000 = 2^-68.000000, Bn = 0.000000 = 2^-68.000000
[./src/simon-xor-threshold-search.cc:732] nrounds = 18
[./src/simon-xor-threshold-search.cc:115] Verify P for one round (2^15.000000 CPs)...
THE  0: 0.015625 (2^-6.000000) 20002020 -> 80008080
EXP  0: 0.016357 (2^-5.933911) 20002020 -> 80008080

THE  1: 0.003906 (2^-8.000000)  8000888 ->      200
EXP  1: 0.003754 (2^-8.057485)  8000888 ->      200

THE  2: 0.250000 (2^-2.000000)      200 ->  8000088
EXP  2: 0.251740 (2^-1.989996)      200 ->  8000088

THE  3: 0.015625 (2^-6.000000)  8000088 -> 20000020
EXP  3: 0.015869 (2^-5.977632)  8000088 -> 20000020

THE  4: 0.062500 (2^-4.000000) 20000020 -> 88000008
EXP  4: 0.065979 (2^-3.921849) 20000020 -> 88000008

THE  5: 0.015625 (2^-6.000000) 88000008 ->        2
EXP  5: 0.015289 (2^-6.031333) 88000008 ->        2

THE  6: 0.250000 (2^-2.000000)        2 -> 88000000
EXP  6: 0.247437 (2^-2.014870)        2 -> 88000000

THE  7: 0.062500 (2^-4.000000) 88000000 -> 20000000
EXP  7: 0.062469 (2^-4.000705) 88000000 -> 20000000

THE  8: 0.250000 (2^-2.000000) 20000000 ->  8000000
EXP  8: 0.248657 (2^-2.007770) 20000000 ->  8000000

THE  9: 0.250000 (2^-2.000000)  8000000 ->        0
EXP  9: 0.250122 (2^-1.999296)  8000000 ->        0

THE 10: 1.000000 (2^0.000000)        0 ->  8000000
EXP 10: 1.000000 (2^0.000000)        0 ->  8000000

THE 11: 0.250000 (2^-2.000000)  8000000 -> 20000000
EXP 11: 0.248505 (2^-2.008655)  8000000 -> 20000000

THE 12: 0.250000 (2^-2.000000) 20000000 -> 88000000
EXP 12: 0.245056 (2^-2.028816) 20000000 -> 88000000

THE 13: 0.062500 (2^-4.000000) 88000000 ->        2
EXP 13: 0.060760 (2^-4.040722) 88000000 ->        2

THE 14: 0.250000 (2^-2.000000)        2 -> 88000008
EXP 14: 0.253998 (2^-1.977112)        2 -> 88000008

THE 15: 0.015625 (2^-6.000000) 88000008 -> 20000020
EXP 15: 0.016144 (2^-5.952876) 88000008 -> 20000020

THE 16: 0.062500 (2^-4.000000) 20000020 ->  8000088
EXP 16: 0.062561 (2^-3.998592) 20000020 ->  8000088

THE 17: 0.015625 (2^-6.000000)  8000088 ->      200
EXP 17: 0.015411 (2^-6.019860)  8000088 ->      200

OK
[./src/simon-xor-threshold-search.cc:238] Verify P of differentials (2^15.000000 CPs)...
Input differences: 20002020 88008808

R# 0 Output differences:  8000888 20002020
THE  1: 0.015625 (2^-6.000000) 20002020 ->  8000888
EXP  1: 0.016052 (2^-5.961081) 20002020 ->  8000888

R# 1 Output differences:      200  8000888
THE  2: 0.000061 (2^-14.000000)  8000888 ->      200
EXP  2: 0.000061 (2^-14.000000)  8000888 ->      200

R# 2 Output differences:  8000088      200
THE  3: 0.000015 (2^-16.000000)      200 ->  8000088
EXP  3: 0.000031 (2^-15.000000)      200 ->  8000088

R# 3 Output differences: 20000020  8000088
THE  4: 0.000000 (2^-22.000000)  8000088 -> 20000020
EXP  4: 0.000000 (2^-inf)  8000088 -> 20000020

R# 4 Output differences: 88000008 20000020
THE  5: 0.000000 (2^-26.000000) 20000020 -> 88000008
EXP  5: 0.000000 (2^-inf) 20000020 -> 88000008

R# 5 Output differences:        2 88000008
THE  6: 0.000000 (2^-32.000000) 88000008 ->        2
EXP  6: 0.000000 (2^-inf) 88000008 ->        2

R# 6 Output differences: 88000000        2
THE  7: 0.000000 (2^-34.000000)        2 -> 88000000
EXP  7: 0.000000 (2^-inf)        2 -> 88000000

R# 7 Output differences: 20000000 88000000
THE  8: 0.000000 (2^-38.000000) 88000000 -> 20000000
EXP  8: 0.000000 (2^-inf) 88000000 -> 20000000

R# 8 Output differences:  8000000 20000000
THE  9: 0.000000 (2^-40.000000) 20000000 ->  8000000
EXP  9: 0.000000 (2^-inf) 20000000 ->  8000000

R# 9 Output differences:        0  8000000
THE 10: 0.000000 (2^-42.000000)  8000000 ->        0
EXP 10: 0.000000 (2^-inf)  8000000 ->        0

R#10 Output differences:  8000000        0
THE 11: 0.000000 (2^-42.000000)        0 ->  8000000
EXP 11: 0.000000 (2^-inf)        0 ->  8000000

R#11 Output differences: 20000000  8000000
THE 12: 0.000000 (2^-44.000000)  8000000 -> 20000000
EXP 12: 0.000000 (2^-inf)  8000000 -> 20000000

R#12 Output differences: 88000000 20000000
THE 13: 0.000000 (2^-46.000000) 20000000 -> 88000000
EXP 13: 0.000000 (2^-inf) 20000000 -> 88000000

R#13 Output differences:        2 88000000
THE 14: 0.000000 (2^-50.000000) 88000000 ->        2
EXP 14: 0.000000 (2^-inf) 88000000 ->        2

R#14 Output differences: 88000008        2
THE 15: 0.000000 (2^-52.000000)        2 -> 88000008
EXP 15: 0.000000 (2^-inf)        2 -> 88000008

R#15 Output differences: 20000020 88000008
THE 16: 0.000000 (2^-58.000000) 88000008 -> 20000020
EXP 16: 0.000000 (2^-inf) 88000008 -> 20000020

R#16 Output differences:  8000088 20000020
THE 17: 0.000000 (2^-62.000000) 20000020 ->  8000088
EXP 17: 0.000000 (2^-inf) 20000020 ->  8000088

R#17 Output differences:      200  8000088
THE 18: 0.000000 (2^-68.000000)  8000088 ->      200
EXP 18: 0.000000 (2^-inf)  8000088 ->      200

OK
[./tests/simon-xor-threshold-search-tests.cc:57]

 */



/* --- */
#if 0
  for(uint32_t row = 0; row < A_nrows; row++) {
	 for(uint32_t col = 0; col < A_ncols; col++) {
		uint32_t e = random32() % (all_blocks + 1);
		gsl_matrix_set(A, row, col, e);
	 }
  }
#endif


/* --- */

#if 1									  // DEBUG
	 printf("[%s:%d] B[%d] %f 2^%f, Bn %f\n", __FILE__, __LINE__, nrounds - 1, B[nrounds - 1], log2(B[nrounds - 1]), Bn);
#endif


/* --- */

/** 
 * Same as \ref xdp_add_pddt with additional input parameters.
 *
 * \param n word size.
 * \param p_thres probability threshold.
 * \see xdp_add_pddt, xdp_add_pddt_i.
 */
void xdp_add_pddt(uint32_t n, double p_thres, 
						std::multiset<differential_3d_t, struct_comp_diff_3d_p> diff_set)
{
  uint32_t k = 0;
  double p = 0.0;

  // init A
  gsl_matrix* A[2][2][2];
  xdp_add_alloc_matrices(A);
  xdp_add_sf(A);
  xdp_add_normalize_matrices(A);

  // init C
  gsl_vector* C = gsl_vector_calloc(XDP_ADD_MSIZE);
  gsl_vector_set(C, XDP_ADD_ISTATE, 1.0);

  uint32_t da = 0;
  uint32_t db = 0;
  uint32_t dc = 0;

  xdp_add_pddt_i(k, n, p_thres, A, C, &da, &db, &dc, &p, &diff_set);

  printf("[%s:%d] p_thres = %f (2^%f), n = %d, #diffs = %d\n", __FILE__, __LINE__, 
			p_thres, log2(p_thres), WORD_SIZE, diff_set.size());

#if 1									  // DEBUG
  uint32_t cnt = 0;
  std::multiset<differential_3d_t, struct_comp_diff_3d_p>::iterator set_iter;
  for(set_iter = diff_set.begin(); set_iter != diff_set.end(); set_iter++) {
	 differential_3d_t i_diff = *set_iter;
	 double p_the = xdp_add(A, i_diff.da, i_diff.db, i_diff.dc);
#if 0									  // print all
	 printf("[%s:%d] %4d: XDP_ADD_THRES[(%8X,%8X)->%8X] = %6.5f\n", 
			  __FILE__, __LINE__, cnt, i_diff.da, i_diff.db, i_diff.dc, i_diff.p);
#endif				 // #if 0
	 assert(p_the == i_diff.p);
	 cnt++;
  }
#endif

  gsl_vector_free(C);
  xdp_add_free_matrices(A);
}

/* --- */

void simon_compute_round_diff_matrix(uint32_t word_size, 
												 gsl_matrix** A, uint32_t A_nmatrix, uint32_t A_nrows, uint32_t A_ncols)
{
  uint32_t all_words = (1ULL << word_size);

  printf("[%s:%d] OK\n", __FILE__, __LINE__);
}

void test_simon_compute_round_diff_matrix()
{
  uint32_t word_size = 10;
  uint32_t all_words = (1ULL << word_size);
  uint32_t nrounds = 2;
  gsl_matrix* A[nrounds];
  uint32_t A_nrows = all_words;
  uint32_t A_ncols = all_words;
  uint32_t A_nmatrix = nrounds;

#if 1
  printf("[%s:%d] A[%d][%d x %d]\n", __FILE__, __LINE__, A_nmatrix, A_nrows, A_ncols);
#endif

  for(uint32_t i = 0; i < A_nmatrix; i++) {
	 A[i] = gsl_matrix_calloc(A_nrows, A_ncols);
  }

  for(uint32_t i = 0; i < nrounds; i++) {
	 for(uint32_t row = 0; row < A_nrows; row++) {
		for(uint32_t col = 0; col < A_ncols; col++) {
		  uint32_t e = random32() % (all_words + 1);
		  gsl_matrix_set(A[i], row, col, e);
		}
	 }
  }

  simon_compute_round_diff_matrix(word_size, A, A_nmatrix, A_nrows, A_ncols);

  for(uint32_t i = 0; i < A_nmatrix; i++) {
	 printf("A[%d] =\n", i);
	 for(uint32_t row = 0; row < A_nrows; row++) {
		for(uint32_t col = 0; col < A_ncols; col++) {
		  uint32_t e = gsl_matrix_get(A[i], row, col);
		  printf("%4d ", e);
		}
		printf("\n");
	 }
	 printf("\n");
  }

  for(uint32_t i = 0; i < nrounds; i++) {
	 gsl_matrix_free(A[i]);
  }
}

/* --- */

  //  uint32_t dy_max = 0;
  //  max_xdp_rot_and(dx_in, &dy_max, lrot_const_s, lrot_const_t);
  //  dy_init = dy_max ^ trail[0].dy ^ LROT(trail[0].dx, lrot_const_u);
  //  uint32_t dy_in = trail[0].dy;
  //  uint32_t dy_in = LROT(trail[0].dx, lrot_const_u) ^ trail[1].dx;


/* --- */

/* 
Input differences:     1000     4400 (       0)

R# 0 Output differences:     6000     1000
THE  1: 0.250000 (2^-2.000000)     1000 ->     6000
EXP  1: 0.248566 (2^-2.008301)     1000 ->     6000

R# 1 Output differences:        0      400
THE  2: 0.062500 (2^-4.000000)      400 ->        0
EXP  2: 0.063599 (2^-3.974860)      400 ->        0

dy_in =     4400
xdp-rot-and-tests: ./tests/xdp-rot-and-tests.cc:653: uint32_t simon_verify_xor_differential(uint32_t, uint32_t, uint32_t*, differential_t*, uint32_t, uint32_t, uint32_t, uint32_t): Assertion `1 == 0' failed.
Aborted

r
 */
/* --- */
/* 
B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
 0:        1 ->        4 0.250000 (2^-2.000000)
 1:     4400 ->     1000 0.062500 (2^-4.000000)
 2:     1000 ->      400 0.250000 (2^-2.000000)
 3:      400 ->        0 0.250000 (2^-2.000000)
 4:        0 ->      400 1.000000 (2^0.000000)
 5:      400 ->     1000 0.250000 (2^-2.000000)
 6:     1000 ->     4400 0.250000 (2^-2.000000)
 7:     4400 ->        1 0.062500 (2^-4.000000)
 8:        1 ->     4404 0.250000 (2^-2.000000)
 9:     4404 ->     1010 0.015625 (2^-6.000000)
10:     1010 ->      444 0.062500 (2^-4.000000)
p_tot = 0.000000000931323 = 2^-30.000000, Bn = 0.000000 = 2^-30.000000
[./tests/xdp-rot-and-tests.cc:991] nrounds = 12, Bn_init = 2^-36.000000 : key     ABC9     8ABC     5FBA     B81E

 */
/* --- */

  //  if(delta != 0) {
	 //  } 
//else {
//	 uint32_t new_dc = 0;
//	 xdp_rot_and_dx_pddt_i(n, n, s, t, u, delta, delta_prev, new_dc, hways_diff_set_dx_dy, hways_diff_mset_p, croads_diff_set_dx_dy, croads_diff_mset_p, cnt_diff, max_cnt, p_thres);
//  }

/* --- */
16 bit

B[ 1] = 2^-4.000000
B[ 2] = 2^-4.000000
B[ 3] = 2^-6.000000
B[ 4] = 2^-8.000000
B[ 5] = 2^-12.000000
B[ 6] = 2^-14.000000
B[ 7] = 2^-18.000000
B[ 8] = 2^-20.000000
B[ 9] = 2^-26.000000
B[10] = 2^-30.000000
	  pDDT sizes: Dp 73, Dxy 68
	  0:     2000 ->     C000 0.250000 (2^-2.000000)
	  1:      880 ->      200 0.062500 (2^-4.000000)
	  2:      200 ->       80 0.250000 (2^-2.000000)
	  3:       80 ->        0 0.250000 (2^-2.000000)
	  4:        0 ->       80 1.000000 (2^0.000000)
	  5:       80 ->      200 0.250000 (2^-2.000000)
	  6:      200 ->      880 0.250000 (2^-2.000000)
	  7:      880 ->     2000 0.062500 (2^-4.000000)
	  8:     2000 ->     8880 0.250000 (2^-2.000000)
	  9:     8880 ->      202 0.015625 (2^-6.000000)
	  10:      202 ->     8088 0.062500 (2^-4.000000)
	  p_tot = 0.000000000931323 = 2^-30.000000, Bn = 0.000000 = 2^-30.000000
	  [./tests/xdp-rot-and-tests.cc:989] nrounds = 12, Bn_init = 2^-36.000000 : key     F2AC     3A9C     A2F3     4C24

/* --- */
/*
void tea_add_threshold_search_full(const int n, const int nrounds, const uint32_t npairs, const uint32_t key[4],
											  gsl_matrix* A[2][2][2][2], double B[NROUNDS], double* Bn,
											  const differential_t diff_in[NROUNDS], differential_t trail[NROUNDS], 
											  uint32_t lsh_const, uint32_t rsh_const,
											  std::multiset<differential_t, struct_comp_diff_p>* diff_mset_p, // highways
											  std::set<differential_t, struct_comp_diff_dx_dy>* diff_set_dx_dy,
											  std::multiset<differential_t, struct_comp_diff_p>* croads_diff_mset_p, // country roads
											  std::set<differential_t, struct_comp_diff_dx_dy>* croads_diff_set_dx_dy)
*/


/* --- */

		//		printf("[%s:%d] %f %f\n", __FILE__, __LINE__, p, p_thres);
		//		assert(p > p_thres);



/* --- */

// ----{ PDDT ---
/*
  std::set<differential_t, struct_comp_diff_dx_dy>* hways_diff_set_dx_dy,
  std::multiset<differential_t, struct_comp_diff_p>* hways_diff_mset_p,
  std::set<differential_t, struct_comp_diff_dx_dy>* diff_set_dx_dy,
  std::multiset<differential_t, struct_comp_diff_p>* diff_mset_p,
*/

/*
// diff_state[da_i|db_i|dc_i][da_ii|db_ii|dc_ii]
*/
uint32_t xdp_rot_and_diff_state[8][8] = {{8, 0, 0, 0, 4, 4, 0, 0},
													  {0, 0, 0, 0, 0, 0, 0, 0},           //row is impossible state
													  {4, 0, 0, 0, 4, 0, 0, 0},
													  {4, 0, 0, 0, 0, 4, 0, 0},
													  {0, 0, 2, 2, 0, 0, 2, 2},
													  {0, 0, 2, 2, 0, 0, 2, 2},
													  {0, 0, 2, 2, 0, 0, 2, 2},
													  {0, 0, 2, 2, 0, 0, 2, 2}};

bool g_da_marked[WORD_SIZE] = {false};

void xdp_rot_and_pddt_i(uint32_t k, uint32_t n, uint32_t s, uint32_t t,
								gsl_matrix* A, gsl_vector* C[2], gsl_vector* L[2],
								uint32_t da_idx[WORD_SIZE], uint32_t db_idx[WORD_SIZE], uint32_t db_i_start,
								uint32_t da_i, uint32_t db_i, uint32_t dc_i, 
								const uint32_t da_in, const uint32_t db_in, const uint32_t dc_in)
{

  uint32_t da = da_in;
  uint32_t db = db_in;
  uint32_t dc = dc_in;

  if(k == (n - 1)) {

	 for(uint32_t i = 0; i < (n - 1); i++) {
#if 1
		uint32_t da_i  = (da >> da_idx[i]) & 1;
		uint32_t db_i  = (da >> db_idx[i]) & 1;
		uint32_t dc_i  = (dc >> da_idx[i]) & 1;
		uint32_t da_ii = (da >> da_idx[i+1]) & 1;
		uint32_t db_ii = (da >> db_idx[i+1]) & 1;
		uint32_t dc_ii = (dc >> da_idx[i+1]) & 1;
#endif
		uint32_t i_row = (da_i << 2) | (db_i << 1) | (dc_i << 0);
		uint32_t i_col = (da_ii << 2) | (db_ii << 1) | (dc_ii << 0);
		uint32_t npaths = xdp_rot_and_diff_state[i_row][i_col];
		printf("[%s:%d] %2d: %d %d | %d\n", __FILE__, __LINE__, i, i_row, i_col, npaths);
		if(npaths == 0)
		  printf("\n\n=============================> !!\n\n\n");
		//		assert(npaths != 0);
	 }

	 uint32_t new_da = 0;
	 //	 uint32_t new_db = 0;
	 uint32_t new_dc = 0;
#if 0
	 for(uint32_t i = 0; i < WORD_SIZE; i++) {
		uint32_t x = ((da) >> i) & 1;
		new_da |= (x << da_idx[i]);
		//		uint32_t y = ((dc) >> i) & 1;
		//		new_dc |= (y << da_idx[i]);
		//		printf("%d ", x);
	 }
#endif
	 //	 printf("\n");
	 new_da = RROT(da, s);
	 //	 new_db = RROT(db, t);
	 new_dc = RROT(dc, s);
	 //	 new_da = RROT(new_da, s);

	 double new_p = 0.0;
	 for(uint32_t j = 0; j < XDP_ROT_AND_NISTATES; j++) {
		double p_j = 0.0;
		gsl_blas_ddot(L[j], C[j], &p_j);
		new_p += p_j;
	 }
	 //	 uint32_t db_true = db;//LROT(new_da, t);

	 //	 if(db_true == db) {
		double p_th = xdp_rot_and(new_da, new_dc, s, t);
		printf("[%s:%d] %8X | %8X %8X %8X | %f %f\n\n", __FILE__, __LINE__, new_da, da, db, new_dc, p_th, new_p);
		//	 }
	 return;
  }

  uint32_t N = (1UL << 2);
  for(uint32_t w = 0; w < N; w++) {
	 uint32_t da_ii = (w >> 1) & 1;
	 uint32_t db_ii = da_i;//(w >> 1) & 1;
	 uint32_t dc_ii = (w >> 0) & 1;

	 //	 uint32_t da_i_prev = (da >> da_idx[k - 1]) & 1;
	 //	 uint32_t db_i_prev = (db >> db_idx[k - 1]) & 1;
	 //	 uint32_t dc_i_prev = (dc >> da_idx[k - 1]) & 1;

	 if(k == (n - 2)) {
		da_ii = db_i_start;
	 }

	 bool b_is_valid = true;
	 if(k > 0) {
		for(uint32_t i = 0; i < (k-1); i++) {
#if 1
		  uint32_t da_i  = (da >> da_idx[i]) & 1;
		  uint32_t db_i  = (da >> db_idx[i]) & 1;
		  uint32_t dc_i  = (dc >> da_idx[i]) & 1;
		  uint32_t da_ii = (da >> da_idx[i+1]) & 1;
		  uint32_t db_ii = (da >> db_idx[i+1]) & 1;
		  uint32_t dc_ii = (dc >> da_idx[i+1]) & 1;
#endif
		  uint32_t i_row = (da_i << 2) | (db_i << 1) | (dc_i << 0);
		  uint32_t i_col = (da_ii << 2) | (db_ii << 1) | (dc_ii << 0);
		  uint32_t npaths = xdp_rot_and_diff_state[i_row][i_col];
		  if(npaths == 0)
			 b_is_valid = false;
		}
	 }
	 //	 assert(b_is_valid);
	 if(!b_is_valid)
		continue;

#if 1
	 uint32_t i_row = (da_i << 2) | (db_i << 1) | (dc_i << 0);
	 uint32_t i_col = (da_ii << 2) | (db_ii << 1) | (dc_ii << 0);
	 uint32_t npaths = xdp_rot_and_diff_state[i_row][i_col];
	 if(npaths == 0)
		continue;
#endif

	 //	 if(da_i != db_ii)
	 //		continue;
	 assert(da_i == db_ii);

	 if(k == (n - 2)) {
		if(da_ii != db_i_start)
		  continue;
	 }

	 gsl_matrix_set_all(A, 0.0);	  // init
	 xdp_rot_and_compute_subgraph(A, da_i, db_i, dc_i, da_ii, db_ii, dc_ii);

	 // temp
	 gsl_vector* R[XDP_ROT_AND_NISTATES];
	 for(uint32_t j = 0; j < XDP_ROT_AND_NISTATES; j++) {
		R[j] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
	 }
	 double new_p = 0.0;

	 // L A C
	 for(uint32_t j = 0; j < XDP_ROT_AND_NISTATES; j++) { // initial states
		// L A C
		double p_j = 0.0;
		gsl_blas_dgemv(CblasNoTrans, 1.0, A, C[j], 0.0, R[j]);
		gsl_blas_ddot(L[j], R[j], &p_j);
		new_p += p_j;
		//		new_p += (p_j / 32.0);
	 }

	 if(new_p != 0.0) {
		//		assert(g_da_marked[da_idx[k]] == false);
		//		g_da_marked[da_idx[k]] = true;
		const uint32_t new_da = da_in | (da_i << da_idx[k]);
		//		const uint32_t new_db = db_in | (db_i << db_idx[k]);
		const uint32_t new_db = RROT(new_da, t);//db_in | (db_i << k);
		const uint32_t new_dc = dc_in | (dc_i << da_idx[k]);
		uint32_t new_da_i = da_ii;
		uint32_t new_db_i = da_i;//db_ii;
		uint32_t new_dc_i = dc_ii;

		//		printf("[%s:%d]\n", __FILE__, __LINE__);
		//	 printf("%2d: %d%d%d -> %d%d%d\n", i, da_i, db_i, dc_i, da_ii, db_ii, dc_ii);
		//		xdp_rot_and_compute_subgraph(A[i], da_i, db_i, dc_i, da_ii, db_ii, dc_ii);
		xdp_rot_and_pddt_i(k+1, n, s, t, A, R, L, da_idx, db_idx, db_i_start, new_da_i, new_db_i, new_dc_i, new_da, new_db, new_dc);
	 }

	 for(uint32_t j = 0; j < XDP_ROT_AND_NISTATES; j++) {
		gsl_vector_free(R[j]);
	 }
  }
}

void xdp_rot_and_pddt()
{
  const uint32_t s = 1;
  const uint32_t t = 3;//8 % WORD_SIZE;

  gsl_matrix* A;
  gsl_vector* C[2];
  gsl_vector* L[2];
  for(uint32_t i = 0; i < 2; i++) {
	 C[i] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
	 L[i] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
  }
  A = gsl_matrix_calloc(XDP_ROT_AND_MSIZE, XDP_ROT_AND_MSIZE);

  uint32_t da_idx[WORD_SIZE];
  uint32_t db_idx[WORD_SIZE];

  uint32_t i_start = 0;
  bool b_is_marked[WORD_SIZE] = {false};
  uint32_t start_idx = 0;
  uint32_t j = 0;
  bool b_all_marked = true;
  while((b_all_marked) && (j < WORD_SIZE)) {
	 b_all_marked = b_is_marked[j];
	 if(b_all_marked == false) {
		start_idx = j;
	 }
	 j++;
  }
  uint32_t cycle_len = xdp_rot_compute_indices(s, t, b_is_marked, i_start, start_idx, da_idx, db_idx);
  assert(cycle_len == WORD_SIZE);
  //  uint32_t cycle_len = WORD_SIZE;

  assert(start_idx == i_start);
  assert(start_idx == 0);

  uint32_t k = i_start;// + 1;
  uint32_t n = WORD_SIZE;//(i_start + (cycle_len - 1));

  printf("[%s:%d] da_idx = ", __FILE__, __LINE__);
  for(uint32_t i = 0; i < WORD_SIZE; i++) {
	 printf("%d ", da_idx[i]);
  }
  printf("\n");
  printf("[%s:%d] db_idx = ", __FILE__, __LINE__);
  for(uint32_t i = 0; i < WORD_SIZE; i++) {
	 printf("%d ", db_idx[i]);
  }
  printf("\n");

  uint32_t N = (1UL << 3);

  for(uint32_t w_init = 0; w_init < N; w_init++) {
	 assert(i_start == 0);
	 uint32_t da_i  = (w_init >> 2) & 1;
	 uint32_t db_i  = (w_init >> 1) & 1;
	 uint32_t dc_i  = (w_init >> 0) & 1;
	 uint32_t db_i_start = db_i;

	 const uint32_t da = 0;
	 const uint32_t db = 0;
	 const  uint32_t dc = 0;
	 //	 da |= (da_i << da_idx[i_start]);
	 //	 db |= (db_i << db_idx[i_start]);
	 //	 dc |= (dc_i << da_idx[i_start]);

	 printf("[%s:%d] %d %d %d | Init diffs: %8X %8X %8X\n", __FILE__, __LINE__, da_i, db_i, dc_i, da, db, dc);

	 gsl_vector_set_all(C[0], 0.0);
	 gsl_vector_set_all(L[0], 0.0);

	 gsl_vector_set_all(C[1], 0.0);
	 gsl_vector_set_all(L[1], 0.0);

	 gsl_vector_set(C[0], 0, 1.0);
	 gsl_vector_set(C[0], 2, 1.0);
	 gsl_vector_set(L[0], 0, 1.0);
	 gsl_vector_set(L[0], 1, 1.0);

	 gsl_vector_set(C[1], 1, 1.0);
	 gsl_vector_set(C[1], 3, 1.0);
	 gsl_vector_set(L[1], 2, 1.0);
	 gsl_vector_set(L[1], 3, 1.0);

	 xdp_rot_and_pddt_i(k, n, s, t, A, C, L, da_idx, db_idx, db_i_start, da_i, db_i, dc_i, da, db, dc);

  }

  gsl_matrix_free(A);
  for(uint32_t i = 0; i < 2; i++) {
	 gsl_vector_free(C[i]);
	 gsl_vector_free(L[i]);
  }
}

// --- PDDT }---


/* --- */

	 uint32_t da_i_prev = (*da >> da_idx[k - 1]) & 1;
	 if(da_i != da_i_prev)
		continue;
	 uint32_t db_i_prev = (*db >> db_idx[k - 1]) & 1;
	 if(db_i != db_i_prev)
		continue;
	 uint32_t dc_i_prev = (*dc >> da_idx[k - 1]) & 1;
	 if(dc_i != dc_i_prev)
		continue;



/* --- */

void xdp_rot_and_pddt()
{
  gsl_matrix* A[WORD_SIZE];
  uint32_t i_start = 0;
  uint32_t cycle_len = WORD_SIZE;
  uint32_t da_idx[WORD_SIZE];
  uint32_t db_idx[WORD_SIZE];
  //  uint32_t da;
  //  uint32_t db;
  //  uint32_t dc;

  uint32_t N = (1UL << 3);

  for(uint32_t w_init = 0; w_init < N; w_init++) {
	 uint32_t da_i  = (w_init >> 5) & 1;
	 uint32_t db_i  = (w_init >> 4) & 1;
	 uint32_t dc_i  = (w_init >> 3) & 1;

	 uint32_t da_ii = ALL_WORDS;
	 uint32_t db_ii = ALL_WORDS;
	 uint32_t dc_ii = ALL_WORDS;

	 assert((i_start + (cycle_len - 1)) <= (WORD_SIZE - 1));
	 for(uint32_t i = i_start + 1; i < (i_start + (cycle_len - 1)); i++) {
#if 0
		uint32_t da_i  = (da >> da_idx[i]) & 1;
		uint32_t db_i  = (da >> db_idx[i]) & 1;
		uint32_t dc_i  = (dc >> da_idx[i]) & 1;
		uint32_t da_ii = (da >> da_idx[i+1]) & 1;
		uint32_t db_ii = (da >> db_idx[i+1]) & 1;
		uint32_t dc_ii = (dc >> da_idx[i+1]) & 1;
#endif

		for(uint32_t w = 0; w < N; w++) {
		  da_ii = (w >> 2) & 1;
		  db_ii = (w >> 1) & 1;
		  dc_ii = (w >> 0) & 1;

		  uint32_t i_row = (da_i << 2) | (db_i << 1) | (dc_i << 0);
		  uint32_t i_col = (da_ii << 2) | (db_ii << 1) | (dc_ii << 0);

		  uint32_t npaths = xdp_rot_and_diff_state[i_row][i_col];
		  if(npaths == 0)
			 continue;

		  void xdp_rot_and_pddt_i()
		  {

		  }

		  //		printf("[%s:%d]\n", __FILE__, __LINE__);
		  printf("%2d: %d%d%d -> %d%d%d\n", i, da_i, db_i, dc_i, da_ii, db_ii, dc_ii);

#if 0									  // DEBUG
		  printf("--- [%2d] ---\n", i);
#endif
		  //		xdp_rot_and_compute_subgraph(A[i], da_i, db_i, dc_i, da_ii, db_ii, dc_ii);
		}
		printf("\n");
	 }
  }

}


/* --- */

void xdp_rot_and_pddt_i(uint32_t k, const uint32_t k_start, const uint32_t n, const double p_thres,
								gsl_matrix* A, gsl_vector* C[2], gsl_vector* L[2],
								uint32_t da_idx[WORD_SIZE], uint32_t db_idx[WORD_SIZE],
								uint32_t* da, uint32_t* db, uint32_t* dc, double* p, uint32_t s, uint32_t t,
								std::set<differential_t, struct_comp_diff_dx_dy>* hways_diff_set_dx_dy,
								std::multiset<differential_t, struct_comp_diff_p>* hways_diff_mset_p,
								uint32_t* cnt_new)
{

  if(k == (n  - 0)) {
	 assert(*p > p_thres);

	 //	 *p /= (double)(1ULL << WORD_SIZE);
	 differential_t diff;
	 diff.dx = *da;
	 diff.dy = *dc;
	 diff.p = *p;

#if 1									  // DEBUG
	 for(uint32_t j = 0; j < XDP_ROT_AND_NISTATES; j++) { // initial states
		printf("%d: C%d ", k, j);
		xdp_rot_and_print_vector(C[j]);
		printf("\n");
	 }
#endif

	 hways_diff_mset_p->insert(diff);
	 hways_diff_set_dx_dy->insert(diff);
	 (*cnt_new)++;

	 printf("[%s:%d]\n", __FILE__, __LINE__);
	 print_binary(*da);
	 printf("\n");
	 print_binary(*dc);
	 printf("\n");
#if 1									  // DEBUG
	 double p_th = xdp_rot_and(*da, *dc, s, t);
	 printf("[%s:%d] %2d %2d | XDP_AND_TH1[%8X->%8X] = %6.5f 2^%f\n", 
			  __FILE__, __LINE__, s, t, *da, *dc, *p, log2(*p));
	 printf("[%s:%d] %2d %2d | XDP_AND_TH2[%8X->%8X] = %6.5f 2^%f\n", 
			  __FILE__, __LINE__, s, t, *da, *dc, p_th, log2(p_th));
	 //	 printf("%f 2^%f\n", p_th, log2(p_th));
	 printf("\n");
	 assert(p_th <= 1.0);
#endif
	 return;
  } 

#if 0									  // DEBUG
  printf("[%s:%d] %d %d %d\n", __FILE__, __LINE__, k, k_start, n);
#endif
  assert(k < WORD_SIZE);

  uint32_t da_i = (*da >> da_idx[k - 1]) & 1;
  uint32_t db_i = (*db >> db_idx[k - 1]) & 1;
  uint32_t dc_i = (*dc >> da_idx[k - 1]) & 1;

  for(uint32_t dc_i = 0; dc_i < 2; dc_i++) { 
#if 1
	 if(k > k_start) {
		uint32_t dc_i_prev = (*dc >> da_idx[k - 1]) & 1;
		if(dc_i != dc_i_prev)
		  continue;
	 }
#endif
  for(uint32_t da_ii = 0; da_ii < 2; da_ii++) { 
	 if(k == (n - 1)) {
		assert(k_start == 0);
		uint32_t db_k_start = (*db >> db_idx[k_start]) & 1;
		if(da_ii != db_k_start)
		  continue;
	 }
	 uint32_t db_ii = da_i;
	 for(uint32_t dc_ii = 0; dc_ii < 2; dc_ii++) { 

		uint32_t i_row = (da_i << 2) | (db_i << 1) | (dc_i << 0);
		uint32_t i_col = (da_ii << 2) | (db_ii << 1) | (dc_ii << 0);

		uint32_t npaths = xdp_rot_and_diff_state[i_row][i_col];
		if(npaths == 0)
		  continue;

		gsl_matrix_set_all(A, 0.0);	  // init
		xdp_rot_and_compute_subgraph(A, da_i, db_i, dc_i, da_ii, db_ii, dc_ii);

		// temp
		gsl_vector* R[XDP_ROT_AND_NISTATES];
		for(uint32_t j = 0; j < XDP_ROT_AND_NISTATES; j++) {
		  R[j] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
		}
		double new_p = 0.0;

		// L A C
		for(uint32_t j = 0; j < XDP_ROT_AND_NISTATES; j++) { // initial states
#if 1									  // DEBUG
		  printf("%d: C%d ", k, j);
		  xdp_rot_and_print_vector(C[j]);
		  printf("\n");
#endif
		  // L A C
		  double p_j = 0.0;
		  gsl_blas_dgemv(CblasNoTrans, 1.0, A, C[j], 0.0, R[j]);
		  gsl_blas_ddot(L[j], R[j], &p_j);
		  //		  new_p += p_j;
		  new_p += (p_j / 32.0);
		}

		// continue only if the probability so far is still bigger than the best found so far
		if(new_p > p_thres) {
#if 1											  // DEBUG
		  uint32_t t_i = (*da >> da_idx[k]) & 1;
		  assert(t_i == 0);
#endif
		  printf("%2d: Assign bits %2d: %d %d %d\n", k, da_idx[k], da_ii, db_ii, dc_ii);
		  uint32_t new_da = *da | (da_ii << da_idx[k]);
		  uint32_t new_db = *db | (db_ii << db_idx[k]);
		  uint32_t new_dc = *dc | (dc_ii << da_idx[k]);
		  //		  printf("%d %d\n", da_idx[k - 1], da_idx[k]);
		  //		  printf("%d %d\n", db_idx[k - 1], db_idx[k]);
		  //		  printf("%d %d\n", da_idx[k - 1], da_idx[k]);

		  if(k == (n - 1)) {
			 printf("[%s:%d]\n", __FILE__, __LINE__);
			 print_binary(new_da);
			 printf("\n");
			 print_binary(new_db);
			 printf("\n");
			 print_binary(new_dc);
			 printf("\n");
		  }
		  xdp_rot_and_pddt_i(k+1, k_start, n, p_thres, A, R, L, da_idx, db_idx, &new_da, &new_db, &new_dc, &new_p, s, t, hways_diff_set_dx_dy, hways_diff_mset_p, cnt_new);
		}

		for(uint32_t j = 0; j < XDP_ROT_AND_NISTATES; j++) {
		  gsl_vector_free(R[j]);
		}
	 }
  }
  return;
}

/*
LSB -> MSB
da[0..15] =  0  7 14  5 12  3 10  1  8 15  6 13  4 11  2  9
db[0..15] =  9  0  7 14  5 12  3 10  1  8 15  6 13  4 11  2
*/
/**
 * Compute pDDT for XOR differences for the ROT-AND component of Simon.
 * \see max_xdp_rot_and_bounds_0
 */
uint32_t xdp_rot_and_pddt(const uint32_t s, const uint32_t t, const double p_thres,
								  std::set<differential_t, struct_comp_diff_dx_dy>* hways_diff_set_dx_dy,
								  std::multiset<differential_t, struct_comp_diff_p>* hways_diff_mset_p)
{
  uint32_t ndiff = 0;
  for(uint32_t w = 0; w < 1; w++) {
	 uint32_t da_idx[WORD_SIZE] = {0};
	 uint32_t db_idx[WORD_SIZE] = {0};

	 uint32_t cycle_len = WORD_SIZE;
	 uint32_t i_start = 0;
	 bool b_is_marked[WORD_SIZE] = {false};
	 while(i_start != WORD_SIZE) {
		uint32_t start_idx = 0;
		uint32_t j = 0;
		bool b_all_marked = true;
		while((b_all_marked) && (j < WORD_SIZE)) {
		  b_all_marked = b_is_marked[j];
		  if(b_all_marked == false) {
			 start_idx = j;
		  }
		  j++;
		}
		cycle_len = xdp_rot_compute_indices(s, t, b_is_marked, i_start, start_idx, da_idx, db_idx);
		assert(cycle_len == WORD_SIZE);

		gsl_matrix* A = gsl_matrix_calloc(XDP_ROT_AND_MSIZE, XDP_ROT_AND_MSIZE);
		gsl_vector* C[XDP_ROT_AND_NISTATES];
		gsl_vector* L[XDP_ROT_AND_NISTATES];

		for(uint32_t i = 0; i < XDP_ROT_AND_NISTATES; i++) {
		  C[i] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
		  L[i] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
		}

		for(int j = 0; j < XDP_ROT_AND_NISTATES; j++) { // start states
		  uint32_t ss[2] = {0};
		  if(j == 0) {
			 ss[0] = 0;
			 ss[1] = 2;
		  } 
		  if(j == 1) {
			 ss[0] = 1;
			 ss[1] = 3;
		  } 
		  gsl_vector_set(C[j], ss[0], 1.0); 
		  gsl_vector_set(C[j], ss[1], 1.0); 
		}

		for(int j = 0; j < XDP_ROT_AND_NISTATES; j++) { // final states
		  uint32_t fs[2] = {0};
		  if(j == 0) {
			 fs[0] = 0;
			 fs[1] = 1;
		  } 
		  if(j == 1) {
			 fs[0] = 2;
			 fs[1] = 3;
		  } 
		  assert((i_start + cycle_len) <= WORD_SIZE);
		  gsl_vector_set(L[j], fs[0], 1.0); 
		  gsl_vector_set(L[j], fs[1], 1.0); 
		}

		uint32_t n = i_start + cycle_len;
		uint32_t k = i_start + 1;	  // !!
		assert(n == WORD_SIZE);
		assert(k == 1);
		uint32_t k_start = k - 1;

		uint32_t da_0 = ((w >> 2) & 1);
		uint32_t db_0 = ((w >> 1) & 1);
		uint32_t dc_0 = ((w >> 0) & 1);

		uint32_t da_init = (da_0 << da_idx[i_start]);
		uint32_t db_init = (db_0 << da_idx[i_start]);
		uint32_t dc_init = (dc_0 << db_idx[i_start]);
		//		printf("%d: %8X %8X %8X %d %d %d\n", w, da_init, db_init, dc_init, ((w << 2) & 1), ((w << 1) & 1), ((w << 0) & 1));
		printf("%d: %8X %8X %8X | %d %d %d\n", w, da_init, db_init, dc_init, da_0, db_0, dc_0);
		double p_init = 0.0;
		xdp_rot_and_pddt_i(k, k_start, n, p_thres, A, C, L, da_idx, db_idx, &da_init, &db_init, &dc_init, &p_init, s, t, hways_diff_set_dx_dy, hways_diff_mset_p, &ndiff);

		for(uint32_t i = 0; i < XDP_ROT_AND_NISTATES; i++) {
		  gsl_vector_free(C[i]);
		  gsl_vector_free(L[i]);
		}
		gsl_matrix_free(A);

		i_start += cycle_len;

	 } // cycle
  }	// w
  return ndiff;
}


/* --- */

uint32_t xdp_rot_and_pddt_iterative(const uint32_t s, const uint32_t t, const double p_thres,
												std::set<differential_t, struct_comp_diff_dx_dy>* hways_diff_set_dx_dy,
												std::multiset<differential_t, struct_comp_diff_p>* hways_diff_mset_p)
{
  uint32_t ndiff = 0;
  for(uint32_t w = 0; w < 1; w++) {
	 uint32_t da_idx[WORD_SIZE] = {0};
	 uint32_t db_idx[WORD_SIZE] = {0};

	 uint32_t cycle_len = WORD_SIZE;
	 uint32_t i_start = 0;
	 bool b_is_marked[WORD_SIZE] = {false};

	 while(i_start != WORD_SIZE) {
		uint32_t start_idx = 0;
		uint32_t j = 0;
		bool b_all_marked = true;
		while((b_all_marked) && (j < WORD_SIZE)) {
		  b_all_marked = b_is_marked[j];
		  if(b_all_marked == false) {
			 start_idx = j;
		  }
		  j++;
		}
		cycle_len = xdp_rot_compute_indices(s, t, b_is_marked, i_start, start_idx, da_idx, db_idx);
		assert(cycle_len == WORD_SIZE);

		gsl_matrix* A = gsl_matrix_calloc(XDP_ROT_AND_MSIZE, XDP_ROT_AND_MSIZE);
		gsl_vector* C[XDP_ROT_AND_NISTATES];
		gsl_vector* L[XDP_ROT_AND_NISTATES];

		for(uint32_t i = 0; i < XDP_ROT_AND_NISTATES; i++) {
		  C[i] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
		  L[i] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
		}

		for(int j = 0; j < XDP_ROT_AND_NISTATES; j++) { // start states
		  uint32_t ss[2] = {0};
		  if(j == 0) {
			 ss[0] = 0;
			 ss[1] = 2;
		  } 
		  if(j == 1) {
			 ss[0] = 1;
			 ss[1] = 3;
		  } 
		  gsl_vector_set(C[j], ss[0], 1.0); 
		  gsl_vector_set(C[j], ss[1], 1.0); 
		}

		for(int j = 0; j < XDP_ROT_AND_NISTATES; j++) { // final states
		  uint32_t fs[2] = {0};
		  if(j == 0) {
			 fs[0] = 0;
			 fs[1] = 1;
		  } 
		  if(j == 1) {
			 fs[0] = 2;
			 fs[1] = 3;
		  } 
		  assert((i_start + cycle_len) <= WORD_SIZE);
		  gsl_vector_set(L[j], fs[0], 1.0); 
		  gsl_vector_set(L[j], fs[1], 1.0); 
		}

		uint32_t n = i_start + cycle_len;
		uint32_t k = i_start + 1;	  // !!
		assert(n == WORD_SIZE);
		assert(k == 1);
		uint32_t k_start = k - 1;

		uint32_t da_0 = ((w >> 2) & 1);
		uint32_t db_0 = ((w >> 1) & 1);
		uint32_t dc_0 = ((w >> 0) & 1);

		uint32_t da_init = (da_0 << da_idx[i_start]);
		uint32_t db_init = (db_0 << da_idx[i_start]);
		uint32_t dc_init = (dc_0 << db_idx[i_start]);
		//		printf("%d: %8X %8X %8X %d %d %d\n", w, da_init, db_init, dc_init, ((w << 2) & 1), ((w << 1) & 1), ((w << 0) & 1));
		printf("%d: %8X %8X %8X | %d %d %d\n", w, da_init, db_init, dc_init, da_0, db_0, dc_0);
		double p_init = 0.0;

		//		xdp_rot_and_pddt_i(k, k_start, n, p_thres, A, C, L, da_idx, db_idx, &da_init, &db_init, &dc_init, &p_init, s, t, hways_diff_set_dx_dy, hways_diff_mset_p, &ndiff);
		for(k = (i_start + 1); k < n; k++) {

		  uint32_t da_i = (da_init >> da_idx[k - 1]) & 1;
		  uint32_t db_i = (db_init >> db_idx[k - 1]) & 1;
		  uint32_t dc_i = (dc_init >> da_idx[k - 1]) & 1;
		  for(uint32_t da_ii = 0; da_ii < 2; da_ii++) { 
			 if(k == (n - 1)) {
				assert(k_start == 0);
				uint32_t db_k_start = (db_init >> db_idx[k_start]) & 1;
				if(da_ii != db_k_start)
				  continue;
			 }
			 uint32_t db_ii = da_i;
			 for(uint32_t dc_ii = 0; dc_ii < 2; dc_ii++) { 

				gsl_matrix_set_all(A, 0.0);	  // init
				xdp_rot_and_compute_subgraph(A, da_i, db_i, dc_i, da_ii, db_ii, dc_ii);

				// temp
				gsl_vector* R[XDP_ROT_AND_NISTATES];
				for(uint32_t j = 0; j < XDP_ROT_AND_NISTATES; j++) {
				  R[j] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
				}
				double new_p = 0.0;
				for(uint32_t j = 0; j < XDP_ROT_AND_NISTATES; j++) { // initial states
#if 1									  // DEBUG
				  printf("%d: C%d ", k, j);
				  xdp_rot_and_print_vector(C[j]);
#endif
				  // L A C
				  double p_j = 0.0;
				  gsl_blas_dgemv(CblasNoTrans, 1.0, A, C[j], 0.0, R[j]);
				  gsl_blas_ddot(L[j], R[j], &p_j);
				  //		  new_p += p_j;
				  new_p += (p_j / 32.0);
				}

				da_init = da_init | (da_ii << da_idx[k]);
				db_init = db_init | (db_ii << db_idx[k]);
				db_init = dc_init | (dc_ii << da_idx[k]);

				for(uint32_t j = 0; s < XDP_ROT_AND_NISTATES; j++) {
				  gsl_vector_free(R[j]);
				}

			 }
		  }
		}
	 

		for(uint32_t i = 0; i < XDP_ROT_AND_NISTATES; i++) {
		  gsl_vector_free(C[i]);
		  gsl_vector_free(L[i]);
		}
		gsl_matrix_free(A);

		i_start += cycle_len;

	 } // cycle
  }	// w
  return ndiff;
}


/* --- */


uint32_t xdp_rot_and_diff_state[8][8] = {{8, 0, 0, 0, 4, 4, 0, 0},
													  {0, 0, 0, 0, 0, 0, 0, 0},           //row is impossible state
													  {4, 0, 0, 0, 4, 0, 0, 0},
													  {4, 0, 0, 0, 0, 4, 0, 0},
													  {0, 0, 2, 2, 0, 0, 2, 2},
													  {0, 0, 2, 2, 0, 0, 2, 2},
													  {0, 0, 2, 2, 0, 0, 2, 2},
													  {0, 0, 2, 2, 0, 0, 2, 2}};

void xdp_rot_and_pddt_i(uint32_t k, const uint32_t k_start, const uint32_t n, const double p_thres,
								gsl_matrix* A, gsl_vector* C[2], gsl_vector* L[2],
								uint32_t da_idx[WORD_SIZE], uint32_t db_idx[WORD_SIZE],
								uint32_t* da, uint32_t* db, uint32_t* dc, double* p, uint32_t s, uint32_t t,
								std::set<differential_t, struct_comp_diff_dx_dy>* hways_diff_set_dx_dy,
								std::multiset<differential_t, struct_comp_diff_p>* hways_diff_mset_p,
								uint32_t* cnt_new)
{
  uint32_t new_da = *da;
  uint32_t new_db = *db;
  uint32_t new_dc = *dc;

  if(k == (n  - 1)) {
	 assert(*p > p_thres);		  // !!!

	 differential_t diff;
	 diff.dx = *da;
	 diff.dy = *dc;
	 diff.p = *p;

	 hways_diff_mset_p->insert(diff);
	 hways_diff_set_dx_dy->insert(diff);
	 (*cnt_new)++;

#if 1									  // DEBUG
	 double p_th = xdp_rot_and(*da, *dc, s, t);
	 printf("[%s:%d] %2d %2d | XDP_AND_TH1[%8X->%8X] = %6.5f 2^%f\n", 
			  __FILE__, __LINE__, s, t, *da, *dc, *p, log2(*p));
	 printf("[%s:%d] %2d %2d | XDP_AND_TH2[%8X->%8X] = %6.5f 2^%f\n", 
			  __FILE__, __LINE__, s, t, *da, *dc, p_th, log2(p_th));
	 //	 printf("%f 2^%f\n", p_th, log2(p_th));
	 printf("\n");
	 assert(p_th <= 1.0);
#endif

	 return;
  } 

#if 0									  // DEBUG
  printf("[%s:%d] %d %d %d\n", __FILE__, __LINE__, k, k_start, n);
#endif
  assert((k+1) < WORD_SIZE);

  // cycle over the possible values of the k-th and (k+1)-st bits of *dc
  for(uint32_t da_i = 0; da_i < 2; da_i++) { 
	 for(uint32_t db_i = 0; db_i < 2; db_i++) { 
		for(uint32_t dc_i = 0; dc_i < 2; dc_i++) { 
		  for(uint32_t da_ii = 0; da_ii < 2; da_ii++) { 
#if 1
			 if(k == (n - 2)) {
				uint32_t db_k_start = (new_db >> db_idx[k_start]) & 1;
				if(da_ii != db_k_start)
				  continue;
			 }
#endif
			 for(uint32_t db_ii = 0; db_ii < 2; db_ii++) { 
				for(uint32_t dc_ii = 0; dc_ii < 2; dc_ii++) { 

				  uint32_t i_row = (da_i << 2) | (db_i << 1) | (dc_i << 0);
				  uint32_t i_col = (da_ii << 2) | (db_ii << 1) | (dc_ii << 0);

				  uint32_t npaths = xdp_rot_and_diff_state[i_row][i_col];
				  if(npaths == 0)
					 continue;

				  gsl_matrix_set_all(A, 0.0);	  // init
				  xdp_rot_and_compute_subgraph(A, da_i, db_i, dc_i, da_ii, db_ii, dc_ii);

				  // temp
				  gsl_vector* R[XDP_ROT_AND_NISTATES];
				  for(uint32_t s = 0; s < XDP_ROT_AND_NISTATES; s++) {
					 R[s] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
				  }
				  double new_p = 0.0;

				  // L A C
				  for(uint32_t s = 0; s < XDP_ROT_AND_NISTATES; s++) { // initial states
					 // L A C
					 double p_s = 0.0;
					 gsl_blas_dgemv(CblasNoTrans, 1.0, A, C[s], 0.0, R[s]);
					 gsl_blas_ddot(L[s], R[s], &p_s);
					 //				  new_p += p_s;
					 new_p += (p_s / 32.0);
				  }

				  // continue only if the probability so far is still bigger than the best found so far
				  if(new_p > p_thres) {	  // !!!
#if 1											  // DEBUG
					 uint32_t t_i = (new_da >> da_idx[k]) & 1;
					 uint32_t t_ii = (new_da >> da_idx[k+1]) & 1;
				  assert(t_i == 0);
#endif
					 printf("%2d: %2d %2d | %2d %2d\n", k, da_idx[k], da_idx[k+1], db_idx[k], db_idx[k+1]);

					 new_da = new_da | (da_i << da_idx[k]) | (da_ii << da_idx[k+1]);
					 new_db = new_db | (db_i << db_idx[k]) | (db_ii << db_idx[k+1]);
					 new_dc = new_dc | (dc_i << da_idx[k]) | (dc_ii << da_idx[k+1]);
					 xdp_rot_and_pddt_i(k+1, k_start, n, p_thres, A, R, L, da_idx, db_idx, &new_da, &new_db, &new_dc, &new_p, s, t, hways_diff_set_dx_dy, hways_diff_mset_p, cnt_new);
				  }

				  for(uint32_t s = 0; s < XDP_ROT_AND_NISTATES; s++) {
					 gsl_vector_free(R[s]);
				  }
				}
			 }
		  }
		}
	 }
  }
}

/* --- */
uint32_t xdp_rot_and_diff_state[8][8] = {8, 0, 0, 0, 4, 4, 0, 0,
													  0, 0, 0, 0, 0, 0, 0, 0,           //row is impossible state
													  4, 0, 0, 0, 4, 0, 0, 0,
													  4, 0, 0, 0, 0, 4, 0, 0,
													  0, 0, 2, 2, 0, 0, 2, 2,
													  0, 0, 2, 2, 0, 0, 2, 2,
													  0, 0, 2, 2, 0, 0, 2, 2,
													  0, 0, 2, 2, 0, 0, 2, 2};

void xdp_rot_and_pddt_i(uint32_t k, const uint32_t k_start, const uint32_t n, const double p_thres,
								gsl_matrix* A, gsl_vector* C[2], gsl_vector* L[2],
								uint32_t da_idx[WORD_SIZE], uint32_t db_idx[WORD_SIZE],
								uint32_t* da, uint32_t* db, uint32_t* dc, double* p, uint32_t s, uint32_t t,
								std::set<differential_t, struct_comp_diff_dx_dy>* hways_diff_set_dx_dy,
								std::multiset<differential_t, struct_comp_diff_p>* hways_diff_mset_p,
								uint32_t* cnt_new)
{
  if(k == (n  - 1)) {
	 assert(*p > p_thres);		  // !!!

	 differential_t diff;
	 diff.dx = *da;
	 diff.dy = *dc;
	 diff.p = *p;

	 hways_diff_mset_p->insert(diff);
	 hways_diff_set_dx_dy->insert(diff);
	 (*cnt_new)++;

#if 1									  // DEBUG
	 double p_th = xdp_rot_and(*da, *dc, s, t);
	 printf("[%s:%d] %2d %2d | XDP_AND_TH1[%8X->%8X] = %6.5f 2^%f\n", 
			  __FILE__, __LINE__, s, t, *da, *dc, *p, log2(*p));
	 printf("[%s:%d] %2d %2d | XDP_AND_TH2[%8X->%8X] = %6.5f 2^%f\n", 
			  __FILE__, __LINE__, s, t, *da, *dc, p_th, log2(p_th));
	 //	 printf("%f 2^%f\n", p_th, log2(p_th));
	 printf("\n");
	 assert(p_th <= 1.0);
#endif

	 return;
  } 

  uint32_t new_da = *da;
  uint32_t new_db = *db;
  uint32_t new_dc = *dc;

#if 0									  // DEBUG
  printf("[%s:%d] %d %d %d\n", __FILE__, __LINE__, k, k_start, n);
#endif
  assert((k+1) < WORD_SIZE);

  // cycle over the possible values of the k-th and (k+1)-st bits of *dc
  for(uint32_t da_i = 0; da_i < 2; da_i++) { 
	 if(k > k_start) {
		uint32_t da_i_prev = (new_da >> da_idx[k - 1]) & 1;
		if(da_i != da_i_prev)
		  continue;
	 }
	 for(uint32_t db_i = 0; db_i < 2; db_i++) { 
		if(k > k_start) {
		  uint32_t db_i_prev = (new_db >> db_idx[k - 1]) & 1;
		  if(db_i != db_i_prev)
			 continue;
		}
		for(uint32_t dc_i = 0; dc_i < 2; dc_i++) { 
		  if(k > k_start) {
			 uint32_t dc_i_prev = (new_dc >> da_idx[k - 1]) & 1;
			 if(dc_i != dc_i_prev)
				continue;
		  }
		  for(uint32_t da_ii = 0; da_ii < 2; da_ii++) { 
			 if(k == (n - 2)) {
				uint32_t db_k_start = (new_db >> db_idx[k_start]) & 1;
				if(da_ii != db_k_start)
				  continue;
			 }
			 uint32_t db_ii = da_i;
			 for(uint32_t dc_ii = 0; dc_ii < 2; dc_ii++) { 

				gsl_matrix_set_all(A, 0.0);	  // init
				xdp_rot_and_compute_subgraph(A, da_i, db_i, dc_i, da_ii, db_ii, dc_ii);

				// temp
				gsl_vector* R[XDP_ROT_AND_NISTATES];
				for(uint32_t s = 0; s < XDP_ROT_AND_NISTATES; s++) {
				  R[s] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
				}
				double new_p = 0.0;

				// L A C
				for(uint32_t s = 0; s < XDP_ROT_AND_NISTATES; s++) { // initial states
				  // L A C
				  double p_s = 0.0;
				  gsl_blas_dgemv(CblasNoTrans, 1.0, A, C[s], 0.0, R[s]);
				  gsl_blas_ddot(L[s], R[s], &p_s);
				  //				  new_p += p_s;
				  new_p += (p_s / 32.0);
				}

				// continue only if the probability so far is still bigger than the best found so far
				if(new_p > p_thres) {	  // !!!
#if 0											  // DEBUG
				  uint32_t t_i = (new_da >> da_idx[k]) & 1;
				  uint32_t t_ii = (new_da >> da_idx[k+1]) & 1;
				  //				  assert(t_i == 0);
#endif
				  printf("%2d: %2d %2d | %2d %2d\n", k, da_idx[k], da_idx[k+1], db_idx[k], db_idx[k+1]);

				  new_da = new_da | (da_i << da_idx[k]) | (da_ii << da_idx[k+1]);
				  new_db = new_db | (db_i << db_idx[k]) | (db_ii << db_idx[k+1]);
				  new_dc = new_dc | (dc_i << da_idx[k]) | (dc_ii << da_idx[k+1]);
				  xdp_rot_and_pddt_i(k+1, k_start, n, p_thres, A, R, L, da_idx, db_idx, &new_da, &new_db, &new_dc, &new_p, s, t, hways_diff_set_dx_dy, hways_diff_mset_p, cnt_new);
				}

				for(uint32_t s = 0; s < XDP_ROT_AND_NISTATES; s++) {
				  gsl_vector_free(R[s]);
				}
			 }
		  }
		}
	 }
  }
}


/* --- */

/**
 * For fixed input difference da, generate a set of output differences
 * that have probability above a certain  threshold..
 */
void xdp_rot_and_dc_set(uint32_t k, const uint32_t k_start, const uint32_t n, double* p, uint32_t* dc,
								gsl_matrix* A, gsl_vector* B[XDP_ROT_AND_NISTATES][WORD_SIZE], gsl_vector* C[2],
								uint32_t da_idx[WORD_SIZE], uint32_t db_idx[WORD_SIZE],
								const uint32_t da, const uint32_t db, 
								uint32_t* dc_max, double* p_max,
								const double p_thres,
								std::set<differential_t, struct_comp_diff_dx_dy>* hways_diff_set_dx_dy,
								std::multiset<differential_t, struct_comp_diff_p>* hways_diff_mset_p,
								uint32_t* cnt_new)
{
  if(k == (n  - 1)) {
	 assert(*p > p_thres);		  // !!!
	 *p_max = *p;
	 *dc_max = *dc;

	 differential_t diff;
	 diff.dx = da;
	 diff.dy = *dc;
	 diff.p = *p;
	 return;
  } 

#if 0									  // DEBUG
  printf("[%s:%d] %d %d %d\n", __FILE__, __LINE__, k, k_start, n);
#endif
  assert((k+1) < WORD_SIZE);

  uint32_t da_i  = (da >> da_idx[k]) & 1;
  uint32_t db_i  = (da >> db_idx[k]) & 1;
  //	 uint32_t dc_i  = (dc >> da_idx[k]) & 1;

  uint32_t da_ii = (da >> da_idx[k+1]) & 1;
  uint32_t db_ii = (da >> db_idx[k+1]) & 1;
  //	 uint32_t dc_ii = (dc >> da_idx[k+1]) & 1;

  // cycle over the possible values of the k-th and (k+1)-st bits of *dc
  for(uint32_t dc_i = 0; dc_i < 2; dc_i++) { 
#if 1
	 if(k > k_start) {
		uint32_t dc_i_prev = (*dc >> da_idx[k - 1]) & 1;
		if(dc_i != dc_i_prev)
		  continue;
	 }
#endif
	 for(uint32_t dc_ii = 0; dc_ii < 2; dc_ii++) { 

		gsl_matrix_set_all(A, 0.0);	  // init
		xdp_rot_and_compute_subgraph(A, da_i, db_i, dc_i, da_ii, db_ii, dc_ii);
		//		double f = 1.0;
		//		xdp_rot_and_normalize_matrix(A, f);

		// temp
		gsl_vector* R[XDP_ROT_AND_NISTATES];
		for(uint32_t s = 0; s < XDP_ROT_AND_NISTATES; s++) {
		  R[s] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
		}
		double new_p = 0.0;

		// L A C
		for(uint32_t s = 0; s < XDP_ROT_AND_NISTATES; s++) { // initial states
		  // L A C
		  double p_s = 0.0;
		  gsl_blas_dgemv(CblasNoTrans, 1.0, A, C[s], 0.0, R[s]);
		  gsl_blas_ddot(B[s][k + 1], R[s], &p_s);
		  new_p += p_s;
#if 0									  // DEBUG
		  printf("[%s:%d] %d[%d]: ", __FILE__, __LINE__, k, s);
		  printf("R%d x B%d[%d] = ", s, s, k+1);
		  xdp_rot_and_print_vector(R[s]);
		  printf(" X ");
		  xdp_rot_and_print_vector(B[s][k+1]);
		  printf(" | %f\n", p_s);
#endif
		}

		// continue only if the probability so far is still bigger than the best found so far
		if(new_p > p_thres) {	  // !!!
		  uint32_t new_dc = *dc | (dc_i << da_idx[k]) | (dc_ii << da_idx[k+1]);
		  xdp_rot_and_dc_set(k+1, k_start, n, &new_p, &new_dc, A, B, R, da_idx, db_idx, da, db, dc_max, p_max, p_thres, hways_diff_set_dx_dy, hways_diff_mset_p, cnt_new);
		}

		for(uint32_t s = 0; s < XDP_ROT_AND_NISTATES; s++) {
		  gsl_vector_free(R[s]);
		}
	 }
  }
}


/* --- */

/* 
					 xdp_rot_and_pddt_i(
k+1, 
k_start, 
n, 
&new_p, 
&new_da, 
&new_db, 
&new_dc, 
A, 
B, 
R, 
da_idx, 
db_idx, 
p_thres, 
hways_diff_set_dx_dy, 
hways_diff_mset_p, 
cnt_new);
 */

/* --- */

void max_xdp_rot_and_bounds_0(uint32_t k, const uint32_t k_start, const uint32_t n, double* p, uint32_t* dc,
										gsl_matrix* A, gsl_vector* B[WORD_SIZE + 1], gsl_vector* C[2],
										uint32_t da_idx[WORD_SIZE], uint32_t db_idx[WORD_SIZE],
										const uint32_t da, const uint32_t db, 
										uint32_t* dc_max, double* p_max)
{
  if(k == n) {
	 assert(*p > *p_max);
	 *p_max = *p;
	 *dc_max = *dc;
	 return;
  } 

  uint32_t da_i  = (da >> da_idx[k]) & 1;
  uint32_t db_i  = (da >> db_idx[k]) & 1;
  //	 uint32_t dc_i  = (dc >> da_idx[k]) & 1;

  uint32_t da_ii = (da >> da_idx[k+1]) & 1;
  uint32_t db_ii = (da >> db_idx[k+1]) & 1;
  //	 uint32_t dc_ii = (dc >> da_idx[k+1]) & 1;

  // cycle over the possible values of the k-th and (k+1)-st bits of *dc
  for(uint32_t dc_i = 0; dc_i < 2; dc_i++) { 
	 if(k > k_start) {
		uint32_t dc_i_prev = (*dc >> da_idx[k - 1]) & 1;
		if(dc_i != dc_i_prev)
		  continue;
	 }
	 for(uint32_t dc_ii = 0; dc_ii < 2; dc_ii++) { 

		gsl_matrix_set_all(A, 0.0);	  // init
		xdp_rot_and_compute_subgraph(A, da_i, db_i, dc_i, da_ii, db_ii, dc_ii);
		double f = 1.0;
		xdp_rot_and_normalize_matrix(A, f);

		// temp
		gsl_vector* R = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
		double new_p = 0.0;

		// L A C
		gsl_blas_dgemv(CblasNoTrans, 1.0, A, C, 0.0, R);
		gsl_blas_ddot(B[k + 1], R, &new_p);

		// continue only if the probability so far is still bigger than the best found so far
		if(new_p > *p_max) {
		  uint32_t new_dc = *dc | (dc_i << da_idx[k]) | (dc_ii << da_idx[k+1]);

		  max_xdp_rot_and_bounds_i(k+1, k_start, n, &new_p, &new_dc, A, B, R, da_idx, db_idx, da, db, dc_max, p_max);
		  //		 max_adp_arx_bounds_i(k+1, n, lrot_const, &new_p, &new_de, A, B, R, dc, dd, de_max, p_max);
		}
		gsl_vector_free(R);

	 }
  }
}

/* --- */

double max_xdp_rot_and_one_cycle(gsl_matrix* A[WORD_SIZE], gsl_vector* B[XDP_ROT_AND_NISTATES][WORD_SIZE + 1],
											const uint32_t i_start, const uint32_t cycle_len,
											const uint32_t s, const uint32_t t,
											const uint32_t delta, uint32_t* dc)
{
  double p = 0.0;

  //  assert(WORD_SIZE == 5);
  gsl_vector* C[2];
  gsl_vector* L[2];
  gsl_vector* R = gsl_vector_calloc(XDP_ROT_AND_MSIZE);

  for(uint32_t i = 0; i < 2; i++) {
	 C[i] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
	 L[i] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
  }

  gsl_vector_set(C[0], 0, 1.0);
  gsl_vector_set(C[0], 2, 1.0);
  gsl_vector_set(L[0], 0, 1.0);
  gsl_vector_set(L[0], 1, 1.0);

  gsl_vector_set(C[1], 1, 1.0);
  gsl_vector_set(C[1], 3, 1.0);
  gsl_vector_set(L[1], 2, 1.0);
  gsl_vector_set(L[1], 3, 1.0);
#if 1									  // DEBUG
  double p_tmp[2][WORD_SIZE] = {{0.0}};
#endif
  double prob[2] = {0.0, 0.0};
  for(uint32_t j = 0; j < 2; j++) {

	 gsl_vector_set_all(R, 0.0);

	 assert((i_start + (cycle_len - 1)) <= (WORD_SIZE - 1));
	 for(uint32_t i = i_start; i < (i_start + (cycle_len - 1)); i++) {
#if 0									  // DEBUG
		printf("[%s:%d] %d|%2d:\n", __FILE__, __LINE__, j, i);
		printf("C%d[%d] = \n", j, i);
		xdp_rot_and_print_vector(C[j]);
		printf("\nA[%d] = \n", i);
		xdp_rot_and_print_matrix(A[i]);
#endif
		gsl_blas_dgemv(CblasNoTrans, 1.0, A[i], C[j], 0.0, R);
		gsl_vector_memcpy(C[j], R);
#if 1									  // DEBUG
		double p_i = 0.0;
		gsl_blas_ddot(L[j], C[j], &p_i);
		p_i /= (double)(1ULL << cycle_len);
		p_tmp[j][i] = p_i;
#endif
	 }
	 gsl_blas_ddot(L[j], C[j], &prob[j]);
#if 1									  // DEBUG
	 uint32_t tmp = prob[j];
	 prob[j] /= (double)(1ULL << cycle_len);
#endif
#if 1									  // DEBUG
	 if(prob[j] > 1.0) {
		printf("[%s:%d] %d %f %d %f\n", __FILE__, __LINE__, tmp, prob[j], (1U << cycle_len), (double)(1U << cycle_len));
	 }
#endif
	 assert(prob[j] <= 1.0);
#if 0									  // DEBUG
	 //	 printf("C%d[%d] = \n", j, WORD_SIZE - 1);
	 printf("C%d[%d] = \n", j, cycle_len - 1);
	 xdp_rot_and_print_vector(C[j]);
	 printf(" | p = %f\n", prob[j]);
#endif
  }
#if 0									  // DEBUG
  printf("[%s:%d] p[0] %f, p[1] %f\n", __FILE__, __LINE__, prob[0], prob[1]);
#endif
  p = prob[0] + prob[1];
#if 1									  // EDBUG
  for(uint32_t i = 0; i < WORD_SIZE ; i++) {
	 double p_i = p_tmp[0][i] + p_tmp[0][i];
	 printf("[%2d]%f ", i, p_i);
  }
  printf("\n");
  double p_prev = p_tmp[0][0] + p_tmp[1][0];
  for(uint32_t i = 1; i < WORD_SIZE ; i++) {
	 double p_i = p_tmp[0][i] + p_tmp[0][i];
	 if(p_i > p_prev) {
		printf("[%s:%d] WARNING!! %f %f\n", __FILE__, __LINE__, p_i, p_prev);
	 }
	 //	 assert(p_i <= p_prev);
	 p_prev = p_i;
  }
#endif
  for(uint32_t i = 0; i < 2; i++) {
	 gsl_vector_free(C[i]);
	 gsl_vector_free(L[i]);
  }
  gsl_vector_free(R);

  return p;
}

#if 0
void max_adp_arx_bounds_0(uint32_t k, const uint32_t n, const uint32_t lrot_const,
								  double* p, uint32_t* de,
								  gsl_matrix* A[2][2][2][2], gsl_vector* B[ADP_ARX_NISTATES][WORD_SIZE + 1], gsl_vector* C[ADP_ARX_NISTATES],
								  const uint32_t dc, const uint32_t dd, uint32_t* de_max, double* p_max)
{
  if(k == n) {
	 assert(*p > *p_max);
	 *p_max = *p;
	 *de_max = *de;
	 return;
  } 

  uint32_t spos = 0;			  // special position
  uint32_t k_rot = ((k + lrot_const) % WORD_SIZE); // (i+r) mod n
  if(k_rot == 0) {
	 spos = 1;
  }

  // get the k-th bit of dc and the (k+r)-th bit of dd
  uint32_t x = (dc >> k) & 1;
  uint32_t y = (dd >> k_rot) & 1;

  // cycle over the possible values of the k-th bits of *de
  for(uint32_t z = 0; z < 2; z++) { 

	 double new_p = 0.0;

	 // temp
	 gsl_vector* R[ADP_ARX_NISTATES];
	 for(uint32_t s = 0; s < ADP_ARX_NISTATES; s++) {
		R[s] = gsl_vector_calloc(ADP_ARX_MSIZE);
	 }

	 for(uint32_t s = 0; s < ADP_ARX_NISTATES; s++) { // initial states
		// L A C
		double p_s = 0.0;
		gsl_blas_dgemv(CblasNoTrans, 1.0, A[spos][x][y][z], C[s], 0.0, R[s]);
		gsl_blas_ddot(B[s][k + 1], R[s], &p_s);
		new_p += p_s;
	 }

	 // continue only if the probability so far is still bigger than the best found so far
	 if(new_p > *p_max) {
		//		uint32_t new_de = *de | (z << k);
		uint32_t new_de = *de | (z << k_rot);
		max_adp_arx_bounds_0(k+1, n, lrot_const, &new_p, &new_de, A, B, R, dc, dd, de_max, p_max);
	 }

	 for(uint32_t s = 0; s < ADP_ARX_NISTATES; s++) {
		gsl_vector_free(R[s]);
	 }

  } // z

}
#endif

/* --- */

/**
 * Compute a subgraph in which every input node has maximum degree (two)
 * \see xdp_rot_and_compute_subgraph
 */ 
/*
1.00, 1.00, 0.00, 0.00,
0.00, 0.00, 1.00, 1.00,
1.00, 1.00, 0.00, 0.00,
0.00, 0.00, 1.00, 1.00,
*/
void max_xdp_rot_and_compute_maxdegree_subgraph(gsl_matrix* A)
{
  uint32_t s[XDP_ROT_AND_MSIZE][XDP_ROT_AND_MSIZE] = {{0}};

  for(uint32_t i = 0; i < XDP_ROT_AND_MSIZE; i++) {
	 for(uint32_t j = 0; j < XDP_ROT_AND_MSIZE; j++) {

		uint32_t upper = (i >> 1) & 1;
		uint32_t lower = (j >> 0) & 1;
		if(upper == lower) {
		  assert(s[i][j] == 0);
		  s[i][j] = 1;
#if 1									  // DEBUG
		  printf("Add link: %d -> %d\n", i, j);
#endif
		}
	 }

  }
  // row = output, col = input
  for(uint32_t row = 0; row < XDP_ROT_AND_MSIZE; row++) {
	 for(uint32_t col = 0; col < XDP_ROT_AND_MSIZE; col++) {
		double e = (double)s[col][row];
		gsl_matrix_set(A, row, col, e);
	 }
  }
}


void test_max_xdp_rot_and_compute_maxdegree_subgraph()
{
  gsl_matrix* A = gsl_matrix_calloc(XDP_ROT_AND_MSIZE, XDP_ROT_AND_MSIZE);
  max_xdp_rot_and_compute_maxdegree_subgraph(A);
  xdp_rot_and_print_matrix(A);
  gsl_matrix_free(A);
}


  test_max_xdp_rot_and_compute_maxdegree_subgraph();


/* --- */

void test_xdp_rot_and_gamma_all()
{
  uint32_t s = 1;
  uint32_t t = 3;
  uint32_t da = 0x5;//random32() & MASK;//0xE;//random32() & MASK;	  // 0x1F;
  //  uint32_t dc = random32() & MASK;

  for(uint32_t i = 0; i < (1U << WORD_SIZE); i++) {
	 uint32_t dc = i;
#if 0
	 uint32_t b1 = (dc >> 2) & 1;
	 uint32_t b2 = (dc >> 4) & 1;
		 if(b1 || b2)
		continue;
	 assert((b1 == 0) && (b2 == 0));
#endif
	 double p1= xdp_rot_and(da, dc, s, t);
	 double p2= xdp_rot_and_exper(da, dc, s, t);
	 printf("[%s:%d] XDP_AND_TH[%8X->%8X] = %6.5f\n", 
			  __FILE__, __LINE__, da, dc, p1);
	 assert(p1 == p2);
#if 0 
		printf("[%s:%d] XDP_AND_EX[%8X->%8X] = %6.5f\n", 
				 __FILE__, __LINE__, da, dc, p2);
#endif
  }
}


/* --- */

void test_rot64()
{
#if 0
  uint64_t x = 1;
  uint32_t r = 1;
  //  uint64_t tmp = (((x >> r) | (x << (WORD_SIZE - r))));//RROT(x, 1);
  uint64_t tmp = RROT(x, 1);
  //  tmp = x >> 100;
#endif
}


/* --- */


void test_xdp_rot_and()
{
  uint32_t s = 1;
  uint32_t t = 3;
  //  uint32_t da = 0x13;
  //  uint32_t dc = 0x4;
  //  uint32_t da = 0x16;
  //  uint32_t dc = 0x10;
  //  uint32_t da = 0x1A;
  //  uint32_t dc = 0x14;
#if 0
  uint32_t da = 0x1F;//0x1F;//random32() & MASK;
  uint32_t dc = 0xB;//0;//random32() & MASK;
#else
  uint32_t da = 0;//0x1F;//random32() & MASK;
  uint32_t dc = 0;//random32() & MASK;
#endif

  double p1= xdp_rot_and(da, dc, s, t);
  double p2= xdp_rot_and_exper(da, dc, s, t);

  printf("[%s:%d] XDP_AND_TH[%8X->%8X] = %6.5f\n", 
			__FILE__, __LINE__, da, dc, p1);
  printf("[%s:%d] XDP_AND_EX[%8X->%8X] = %6.5f\n", 
			__FILE__, __LINE__, da, dc, p2);

#if 0									  // independent inputs
  uint32_t da_rot_1 = LROT(da, s);
  uint32_t da_rot_2 = LROT(da, t);
  uint32_t A[2][2][2] = {{{0}}};
  xdp_and_bf(A);
  double p3= xdp_and(A, da_rot_1, da_rot_2, dc);
  printf("[%s:%d] XDP_AND_TH[%8X=(%8X,%8X)->%8X] = %6.5f\n", 
			__FILE__, __LINE__, da, da_rot_1, da_rot_2, dc, p3);
#endif
}


/* --- */

// 
// transform a 2D arrey in which a single bit is atored in a 32-bit word
// into a matrix in which 32 bits are packed into a 32-bit word suitable
// to be manipulated iwth the solve_gf2_* routines.
// 
void xdp_and_arrey_to_matrix_gf2(uint32_t A[WORD_SIZE][WORD_SIZE + 1], 
											uint32_t** M, uint32_t M_rows, uint32_t M_cols)
{
  assert(M_rows == WORD_SIZE);
  assert(M_cols == ((WORD_SIZE + 1) / WORD_SIZE));

  // TODO:
  //  for()
}


/* --- */

  for(uint32_t i = 0; i < XDP_ROT_AND_MSIZE; i++) {
	 if(V[idx_in][i]) {
		for(uint32_t j = 0; j < XDP_ROT_AND_MSIZE; j++) {
		  if(V[idx_out][j]) {
			 uint32_t upper = (i >> 1) & 1;
			 uint32_t lower = (j >> 0) & 1;
			 if(upper == lower) {
				assert(s[i][j] == 0);
				s[i][j] = 1;
			 }
		  }
		}
	 }
  }


/* --- */

// diff_indx=(x,y): 0=(0,0), 1=(0,1), 2=(1,0), 3=(1,1)
void xdp_rot_and_compute_subgraph(gsl_matrix* A, 
											 uint32_t da_in, uint32_t db_in, uint32_t dc_in,
											 uint32_t da_out, uint32_t db_out, uint32_t dc_out)
{
  uint32_t idx_in  = (da_in << 2) | (db_in << 1) | (dc_in << 0);
  uint32_t idx_out = (da_out << 2) | (db_out << 1) | (dc_out << 0);

  uint32_t s[XDP_ROT_AND_MSIZE][XDP_ROT_AND_MSIZE] = {{0}};

  if(idx_in == 0) {			  // 000: x == *, y == *
	 // [x]y -> 0[x], 1[x]
	 // [0]0 -> 0[0], 1[0] : 0 -> 0, 2
	 // [0]1 -> 0[0], 1[0] : 1 -> 0, 2
	 // [1]0 -> 0[1], 1[1] : 2 -> 1, 3
	 // [1]1 -> 0[1], 1[1] : 3 -> 1, 3
	 for(uint32_t i = 0; i < XDP_ROT_AND_MSIZE; i++) {
		for(uint32_t j = 0; j < XDP_ROT_AND_MSIZE; j++) {
		  if(V[idx_in][i] && V[idx_in][j]) {
			 s[i][j] = 1;
		  }
		}
	 }

  }
  if(idx_in == 1) {			  // 010: x == 0
	 // impossible: set all to zero
  }
  if(idx_in == 2) {			  // 010: x == 0
	 // [x]y -> 0[x], 1[x]
	 // [0]0 -> 0[0], 1[0] : 0 -> 0, 2
	 // [0]1 -> 0[0], 1[0] : 1 -> 0, 2
	 s[0][0] = 1;
	 s[0][2] = 1;
	 s[1][0] = 1;
	 s[1][2] = 1;
  }
  if(idx_in == 3) {			  // 011: x == 1
	 // [x]y -> 0[x], 1[x]
	 // [1]0 -> 0[1], 1[1] : 2 -> 1, 3
	 // [1]1 -> 0[1], 1[1] : 3 -> 1, 3
	 s[2][1] = 1;
	 s[2][3] = 1;
	 s[3][1] = 1;
	 s[3][3] = 1;
  }
  if(idx_in == 4) {			  // 100: y == 0
	 // [x]y -> 0[x], 1[x]
	 // [0]0 -> 0[0], 1[0] : 0 -> 0, 2
	 // [1]0 -> 0[1], 1[1] : 2 -> 1, 3
	 s[0][0] = 1;
	 s[0][2] = 1;
	 s[2][1] = 1;
	 s[2][3] = 1;
  }
  if(idx_in == 5) {			  // 101: y == 1
	 // [x]y -> 0[x], 1[x]
	 // [0]1 -> 0[0], 1[0] : 1 -> 0, 2
	 // [1]1 -> 0[1], 1[1] : 3 -> 1, 3
	 s[1][0] = 1;
	 s[1][2] = 1;
	 s[3][1] = 1;
	 s[3][3] = 1;
  }
  if(idx_in == 6) {			  // 110: x != y
	 // [x]y -> 0[x], 1[x]
	 // [0]1 -> 0[0], 1[0] : 1 -> 0, 2
	 // [1]0 -> 0[1], 1[1] : 2 -> 1, 3
	 s[1][0] = 1;
	 s[1][2] = 1;
	 s[2][1] = 1;
	 s[2][3] = 1;
  }
  if(idx_in == 7) {			  // 111: x == y
	 // [x]y -> 0[x], 1[x]
	 // [0]0 -> 0[0], 1[0] : 0 -> 0, 2
	 // [1]1 -> 0[1], 1[1] : 3 -> 1, 3
	 s[0][0] = 1;
	 s[0][2] = 1;
	 s[3][1] = 1;
	 s[3][3] = 1;
  }
  // 
  //                 col = input
  //                         |
  //                         V
  //                  [x] [x] [x] [x]  
  // row = output <-  [x] [x] [x] [x]  
  //                  [x] [x] [x] [x]  
  // 
  // row = output, col = input
  for(uint32_t row = 0; row < XDP_ROT_AND_MSIZE; row++) {
	 for(uint32_t col = 0; col < XDP_ROT_AND_MSIZE; col++) {
		uint32_t e = s[col][row];
		gsl_matrix_set(A, row, col, e);
	 }
  }
} 

/* --- */

// delta - input difference; dc - output difference; s, t - rotation constants
double adp_rot_and_v2(const uint32_t delta, const uint32_t dc, 
							 const uint32_t s, const uint32_t t)
{
  double p = 0.0;
  uint32_t da = LROT(delta, s);
  uint32_t db = LROT(delta, t);

  gsl_matrix* A[WORD_SIZE];
  xdp_rot_and_alloc_matrices(A);
  xdp_rot_and_compute_graph(A, da, db, dc);

  gsl_vector* C[2];
  gsl_vector* L[2];
  gsl_vector* R = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
  gsl_matrix* T = gsl_matrix_calloc(XDP_ROT_AND_MSIZE, XDP_ROT_AND_MSIZE);

  for(uint32_t i = 0; i < 2; i++) {
	 C[i] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
	 L[i] = gsl_vector_calloc(XDP_ROT_AND_MSIZE);
  }

  gsl_vector_set(C[0], 1, 1.0);
  gsl_vector_set(C[0], 2, 1.0);
  gsl_vector_set(L[0], 0, 1.0);
  gsl_vector_set(L[0], 3, 1.0);

  gsl_vector_set(C[1], 0, 1.0);
  gsl_vector_set(C[1], 3, 1.0);
  gsl_vector_set(L[1], 1, 1.0);
  gsl_vector_set(L[1], 2, 1.0);

  double prob[2] = {0.0, 0.0};
  for(uint32_t j = 0; j < 2; j++) {
	 gsl_vector_set_all(R, 0.0);
	 gsl_matrix_set_all(T, 0.0);
	//	 for(uint32_t i = 0; i < WORD_SIZE; i++) {
	 for(uint32_t i = 1; i < WORD_SIZE; i++) {
#if 1									  // DEBUG
		printf("[%s:%d] %d|%2d:\n", __FILE__, __LINE__, j, i);
		printf("C%d[%d] = \n", j, i);
		xdp_rot_and_print_vector(C[j]);
		printf("\nA[%d] = \n", i);
		xdp_rot_and_print_matrix(A[i]);
#endif
		gsl_blas_dgemm(CblasNoTrans, 1.0, A[i], T, 0.0, T);
		//		gsl_blas_dgemv(CblasNoTrans, 1.0, A[i], C[j], 0.0, R);
		//		gsl_vector_memcpy(C[j], R);
	 }
	 //	 gsl_blas_ddot(L[j], C[j], &prob[j]);
	 gsl_blas_dgemv(CblasNoTrans, 1.0, T, C[j], 0.0, R);
	 gsl_blas_ddot(L[j], R, &prob[j]);

  }

  p = prob[0] + prob[1];

  for(uint32_t i = 0; i < 2; i++) {
	 gsl_vector_free(C[i]);
	 gsl_vector_free(L[i]);
  }
  gsl_matrix_free(T);
  gsl_vector_free(R);
  xdp_rot_and_free_matrices(A);

  return p;
}

/* --- */

bool xdp_and_rot_check_consistency_i(uint32_t* X_in, uint32_t XCOND)
{
  uint32_t X = *X_in;
  bool b_is_consistent = true;


  switch(X) {
  case 0:
	 if(XCOND == 1) {
		b_is_consistent = false;
	 }
	 break;
  case 1:
	 if(XCOND == 0) {
		b_is_consistent = false;
	 }
	 break;
  case 2:
		X = XCOND;					  // !
	 break;
  case 3:
  case 4:
  case 5:
	 break;
  default:
	 b_is_consistent = false;
	 break;
  }

  *X_in = X;
  return b_is_consistent;
}

/* --- */

bool xdp_and_rot_check_consistency_i(uint32_t* X_in, uint32_t XCOND)
{
  uint32_t X = *X_in;
  bool b_is_consistent = true;

  switch(X) {
  case 0:
	 if(XCOND == 1) {
		b_is_consistent = false;
	 }
	 break;
  case 1:
	 if(XCOND == 0) {
		b_is_consistent = false;
	 }
	 break;
  case 2:
	 if((XCOND == 0) || (XCOND == 1)) {
		X = XCOND;
	 }
	 break;
  case 3:
	 if((XCOND == 0) || (XCOND == 1)) {
		if(X == XCOND) {
		  b_is_consistent = false;
		}
	 }
	 break;
  case 4:
	 if((XCOND == 0) || (XCOND == 1)) {
		if(X != XCOND) {
		  b_is_consistent = false;
		}
	 }
  break;
  case 5:
	 assert(0 == 1);				  // normally shouldn't be here
	 b_is_consistent = false;
	 break;
  default:
	 b_is_consistent = false;
	 break;
  }

  *X_in = X;
  return b_is_consistent;
}

/* --- */

// XDP-ROT-AND with system of equations
double xdp_rot_and(const uint32_t delta, const uint32_t dc,
						 const uint32_t s_in, const uint32_t t_in)
{
  uint32_t s = s_in;
  uint32_t t = t_in;
  if(s > t) {
	 std::swap(s, t);
  }
  assert(s <= t);

  uint32_t da = LROT(delta, s);
  uint32_t db = LROT(delta, t);
  double p = 0.0;

#if 1									  // DEBUG
  printf("[%s:%d] %8X = (%8X %8X) -> %8X\n", __FILE__, __LINE__, delta, da, db, dc);
#endif

  bool b_is_possible = is_xdp_and_nonzero(da, db, dc);
  if(!b_is_possible) {
	 return 0.0;
  }

  uint32_t XCOND[2][2][2][2] = {{{{0}}}};
  xdp_rot_and_xcond_init(XCOND);

  uint32_t E[WORD_SIZE][WORD_SIZE + 1] = {{0}};

  uint32_t n = WORD_SIZE;
  const uint32_t x_start = (n - s);
  const uint32_t y_start = (n - t);

  for(uint32_t i = 0; i < n; i++) {
	 uint32_t x_i = (x_start + i) % n;
	 uint32_t y_i = (y_start + i) % n;
	 uint32_t da_i = (da >> i) & 1;
	 uint32_t db_i = (db >> i) & 1;
	 uint32_t dc_i = (dc >> i) & 1;

	 uint32_t x_cond = XCOND[da_i][db_i][dc_i][0];
	 uint32_t var_x = 0;

	 uint32_t y_cond = XCOND[da_i][db_i][dc_i][1];
	 uint32_t var_y = 0;

	 uint32_t var_eq = 0;

	 uint32_t xdp_and_cond_to_eq(const uint32_t x_cond, const uint32_t y_cond, uint32_t* eq)
	 {

	 }

	 E[i][x_i] = var_x;
	 E[i][y_i] = var_y;
	 E[i][WORD_SIZE] = var_eq;

#if 1									  // DEBUG
	 printf("[%s:%d] %3d %3d | %d %d %d\n", __FILE__, __LINE__, x_i, y_i, da_i, db_i, dc_i);
#endif
  }
#if 1									  // DEBUG
	 for(uint32_t row = 0; row < WORD_SIZE; row++) {
		for(uint32_t col = 0; col < WORD_SIZE + 1; col++) {
		  if(col == WORD_SIZE) {
			 printf(" | ");
		  }
		  printf("%d ", E[row][col]);
		}
		printf("\n");
	 }
#endif

  return p;
}

void test_xdp_rot_and()
{
  uint32_t s = 1;
  uint32_t t = 3;
#if 1
  uint32_t da = 0x1F;
  uint32_t dc = 0;
#endif
  //  uint32_t da = random32() & MASK;
  //  uint32_t dc = random32() & MASK;

  double p1= xdp_rot_and(da, dc, s, t);
  printf("[%s:%d] XDP_AND_TH[%8X->%8X] = %6.5f\n", 
			__FILE__, __LINE__, da, dc, p1);
#if 0
  uint32_t A[2][2][2] = {{{0}}};
  xdp_and_bf(A);
  uint32_t da_rot_1 = LROT(da, s);
  uint32_t da_rot_2 = LROT(da, t);
  double p2= xdp_rot_and_exper(da, dc, s, t);
  double p3= xdp_and(A, da_rot_1, da_rot_2, dc);
  printf("[%s:%d] XDP_AND_EX[%8X->%8X] = %6.5f\n", 
			__FILE__, __LINE__, da, dc, p2);
  printf("[%s:%d] XDP_AND_TH[%8X=(%8X,%8X)->%8X] = %6.5f\n", 
			__FILE__, __LINE__, da, da_rot_1, da_rot_2, dc, p3);
#endif
}


/* --- */

bool xdp_and_rot_check_consistency_i(uint32_t* xcond_in, uint32_t x_i)
{
  uint32_t xcond = *xcond_in;
  bool b_is_consistent = true;

  switch(xcond) {
  case 0:
	 if(x_i == 1) {
		b_is_consistent = false;
	 }
	 break;
  case 1:
	 if(x_i == 0) {
		b_is_consistent = false;
	 }
	 break;
  case 2:
	 if((x_i == 0) || (x_i == 1)) {
		xcond = x_i;
	 }
	 break;
  case 3:
	 if((x_i == 0) || (x_i == 1)) {
		if(xcond == x_i) {
		  b_is_consistent = false;
		}
	 }
	 break;
  case 4:
	 if((x_i == 0) || (x_i == 1)) {
		if(xcond != x_i) {
		  b_is_consistent = false;
		}
	 }
  break;
  case 5:
	 assert(0 == 1);				  // normally shouldn't be here
	 b_is_consistent = false;
	 break;
  default:
	 b_is_consistent = false;
	 break;
  }

  *xcond_in = xcond;
  return b_is_consistent;
}

// c = (a <<< s) & (a <<< t)
double xdp_rot_and(const uint32_t delta, const uint32_t dc,
						 const uint32_t s_in, const uint32_t t_in)
{
  uint32_t s = s_in;
  uint32_t t = t_in;
  if(s > t) {
	 std::swap(s, t);
  }
  assert(s <= t);

  uint32_t da = LROT(delta, s);
  uint32_t db = LROT(delta, t);
  double p = 0.0;

#if 1									  // DEBUG
  printf("[%s:%d] %8X = (%8X %8X) -> %8X\n", __FILE__, __LINE__, delta, da, db, dc);
#endif

  bool b_is_possible = is_xdp_and_nonzero(da, db, dc);
  if(!b_is_possible) {
	 return 0.0;
  }

  uint32_t XCOND[2][2][2][2] = {{{{0}}}};
  xdp_rot_and_xcond_init(XCOND);

  uint32_t X[WORD_SIZE] = {0};
#if 1									  // DEBUG
  for(uint32_t w = 0; w < 8; w++) {
	 uint32_t i = (w >> 2) & 1;
	 uint32_t j = (w >> 1) & 1;
	 uint32_t k = (w >> 0) & 1;
	 uint32_t x = XCOND[i][j][k][0];
	 uint32_t y = XCOND[i][j][k][1];
	 printf("A%d%d%d(%d %d)\n", i, j, k, x, y);
  }
  //	 assert(0 == 1);
#endif

#if 1									  // DEBUG
  debug_xdp_rot_and(s, t);
#endif

  uint32_t n = WORD_SIZE;
  uint32_t l_s = (s - t + n) % n;
  uint32_t l_t = (t - s) % n;
  uint32_t u = 0;					  // both bits are assigned
  uint32_t v = 0;					  // one bit is assigned
  uint32_t m = 0;					  // no bits are assigned

#if 1									  // DEBUG
  printf("[%s:%d] (s, t) = %2d %2d\n", __FILE__, __LINE__, s, t);
#endif

  if(l_t >= l_s) {
	 u = l_s;
	 v = l_t - l_s;
	 m = n - l_t;
  } else {
	 u = l_t;
	 v = l_s - l_t;

	 m = n - l_s;
  }

#if 1									  // DEBUG
  printf("[%s:%d] (l_s, l_t) = (%2d %2d) | (u, v, m) = (%2d %2d %2d)\n", __FILE__, __LINE__, l_s, l_t, u, v, m);
#endif
  assert((u + v + m) == n);

  const uint32_t x_start = (n - s);
  const uint32_t y_start = (n - t);

  for(uint32_t i = 0; i < u; i++) {
	 uint32_t x_i = (x_start + i) % n;
	 uint32_t y_i = (y_start + i) % n;
	 uint32_t da_i = (da >> i) & 1;
	 uint32_t db_i = (db >> i) & 1;
	 uint32_t dc_i = (dc >> i) & 1;
	 X[x_i] = XCOND[da_i][db_i][dc_i][0];
	 X[y_i] = XCOND[da_i][db_i][dc_i][1];

#if 1									  // DEBUG
	 printf("[%s:%d] %3d %3d | %d %d %d ", __FILE__, __LINE__, x_i, y_i, da_i, db_i, dc_i);
	 printf("| X[%2d %2d] %d %d\n", x_i, y_i, X[x_i], X[y_i]);
#endif

  }
  for(uint32_t i = u; i < (u + v); i++) {
	 uint32_t x_i = (x_start + i) % n;
	 uint32_t y_i = (y_start + i) % n;
	 uint32_t da_i = (da >> i) & 1;
	 uint32_t db_i = (db >> i) & 1;
	 uint32_t dc_i = (dc >> i) & 1;
	 X[x_i] = XCOND[da_i][db_i][dc_i][0];
	 uint32_t y_cond = XCOND[da_i][db_i][dc_i][1];

	 b_is_possible = xdp_and_rot_check_consistency_i(&X[y_i], y_cond);
#if 1									  // DEBUG
	 printf("[%s:%d] %3d %3d | %d %d %d ", __FILE__, __LINE__, x_i, y_i, da_i, db_i, dc_i);
	 printf("| X[%2d %2d] %d %d | b_is_possible %d\n", x_i, y_i, X[x_i], X[y_i], b_is_possible);
#endif
	 if(!b_is_possible)
		break;

  }
  if(!b_is_possible)
	 return 0.0;
  for(uint32_t i = (u + v); i < (u + v + m); i++) {
	 uint32_t x_i = (x_start + i) % n;
	 uint32_t y_i = (y_start + i) % n;
	 uint32_t da_i = (da >> i) & 1;
	 uint32_t db_i = (db >> i) & 1;
	 uint32_t dc_i = (dc >> i) & 1;
	 uint32_t x_cond = XCOND[da_i][db_i][dc_i][0];
	 uint32_t y_cond = XCOND[da_i][db_i][dc_i][1];

#if 1									  // DEBUG
	 printf("[%s:%d] %3d %3d | %d %d %d\n", __FILE__, __LINE__, x_i, y_i, da_i, db_i, dc_i);
#endif
	 b_is_possible = xdp_and_rot_check_consistency_i(&X[x_i], x_cond);
#if 1									  // DEBUG
	 printf("[%s:%d] %2d: X[%2d]%2d %2d | b_is_possible %d\n", __FILE__, __LINE__, i, x_i, X[x_i], x_cond, b_is_possible);
#endif
	 if(!b_is_possible)
		break;

	 b_is_possible = xdp_and_rot_check_consistency_i(&X[y_i], y_cond);
#if 1									  // DEBUG
	 printf("[%s:%d] %2d: X[%2d]%2d %2d | b_is_possible %d\n", __FILE__, __LINE__, i, y_i, X[y_i], y_cond, b_is_possible);
#endif
	 if(!b_is_possible)
		break;
  }
  if(!b_is_possible)
	 return 0.0;
  uint32_t cnt = 1;
  for(uint32_t i = 0; i < WORD_SIZE; i++) {
	 uint32_t x_i = X[i];
	 if(x_i == 2) {
		cnt *= 2;
	 }
	 assert((x_i >= 0) && (x_i <= 4));
  }
  p = (double)cnt / (double)ALL_WORDS;
#if 1									  // DEBUG
  printf("[%s:%d] ", __FILE__, __LINE__);
  for(uint32_t i = 0; i < WORD_SIZE; i++) {
	 printf("[%2d]%2d ", i, X[i]);
  }
  printf(" | %d %f\n", cnt, p);
#endif
  return p;
}

void test_xdp_rot_and()
{
  uint32_t s = 1;
  uint32_t t = 3;
  //  uint32_t da = 0x13;
  //  uint32_t dc = 0x4;
  //  uint32_t da = 0x16;
  //  uint32_t dc = 0x10;
  //  uint32_t da = 0x1A;
  //  uint32_t dc = 0x14;
#if 1
  uint32_t da = 0x1F;//random32() & MASK;
  uint32_t dc = 0;//random32() & MASK;
#endif
  //  uint32_t da = random32() & MASK;
  //  uint32_t dc = random32() & MASK;
  uint32_t da_rot_1 = LROT(da, s);
  uint32_t da_rot_2 = LROT(da, t);
  uint32_t A[2][2][2] = {{{0}}};
  xdp_and_bf(A);

  double p1= xdp_rot_and(da, dc, s, t);
  double p2= xdp_rot_and_exper(da, dc, s, t);
  double p3= xdp_and(A, da_rot_1, da_rot_2, dc);

  printf("[%s:%d] XDP_AND_TH[%8X->%8X] = %6.5f\n", 
			__FILE__, __LINE__, da, dc, p1);
  printf("[%s:%d] XDP_AND_EX[%8X->%8X] = %6.5f\n", 
			__FILE__, __LINE__, da, dc, p2);
  printf("[%s:%d] XDP_AND_TH[%8X=(%8X,%8X)->%8X] = %6.5f\n", 
			__FILE__, __LINE__, da, da_rot_1, da_rot_2, dc, p3);

}

/* --- */

	 if(X[y_i] == 2) {
		X[y_i] = y_cond;
	 } else {

		if(X[y_i] == 0) {
		  if(y_cond == 1) {
			 b_is_possible = false;
		  }
		} 

		if(X[y_i] == 1) {
		  if(y_cond == 0) {
			 b_is_possible = false;
		  }
		} 

		if(X[y_i] == 3) {
		  if(X[y_i] == y_cond) {
			 b_is_possible = false;
		  }
		} 

		if(X[y_i] == 4) {
		  if(X[y_i] != y_cond) {
			 b_is_possible = false;
		  }
		}

		if(!b_is_possible) {
		  printf("[%s:%d] X[%2d] %d != %d\n", __FILE__, __LINE__, y_i, X[y_i], y_cond);
		}

	 }


/* --- */

/*

A000: 0 0 | 1
A000: 1 0 | 2
A000: 0 1 | 3
A000: 1 1 | 4

A010: 0 0 | 1
A010: 0 1 | 2

A011: 1 0 | 1
A011: 1 1 | 2

A100: 0 0 | 1
A100: 1 0 | 2

A101: 0 1 | 1
A101: 1 1 | 2

A110: 1 0 | 1
A110: 0 1 | 2

A111: 0 0 | 1
A111: 1 1 | 2

*/

/* --- */

#if(WORD_SIZE == 64)
/*
Simon128/256
Key: 1f1e1d1c1b1a1918 1716151413121110 0f0e0d0c0b0a0908 0706050403020100
Plaintext: 74206e69206d6f6f 6d69732061207369
Ciphertext: 8d2b5579afc8a3a0 3bf72a87efe7b868
*/
uint64_t tv_key[4] = {0x0706050403020100LL, 0x0f0e0d0c0b0a0908LL, 0x1716151413121110LL, 0x1f1e1d1c1b1a1918LL};
uint64_t tv_pt[2] = {0x74206e69206d6f6fLL, 0x6d69732061207369LL}; // {x, y}
uint64_t tv_ct[2] = {0x8d2b5579afc8a3a0LL, 0x3bf72a87efe7b868LL};
#endif

/* ---- */

/**
 * The XOR DP of Boolean AND - efficient computation using a closed Boolean expression:
 *
 * adp-and(da,db->dc) = 2^{-n} (2 (~da_i & ~db_i & ~dc_i) + ~(~da_i & ~db_i) )
 */
double xdp_and(uint32_t da, uint32_t db, uint32_t dc)
{
  uint32_t cnt = 1;
  for(int pos = 0; pos < WORD_SIZE; pos++) {
	 uint32_t x = (da >> pos) & 1;
	 uint32_t y = (db >> pos) & 1;
	 uint32_t z = (dc >> pos) & 1;

	 uint32_t not_x = (~x) & 1;
	 uint32_t not_y = (~y) & 1;
	 uint32_t not_z = (~z) & 1;

	 uint32_t f = (2 * (not_x & not_y & not_z)) + (~(not_x & not_y) & 1);

	 cnt *= f;
  }
#if 0									  // DEBUG
  printf("[%s:%d] cnt %d\n", __FILE__, __LINE__, cnt);
#endif
  double p = (double)cnt / (double)(ALL_WORDS); // 2^n
  return p;
}

/* --- */

  uint32_t da = 0x5C;//random32() & MASK;
  uint32_t db = 0x5C;//random32() & MASK;
  uint32_t dc = 0x44;//random32() & MASK;


/* --- */
  uint32_t ninputs = 2;
  uint32_t N = (1U << ninputs);

  for(uint32_t i = 0; i < N; i++) {
uint32_t x
  }


/* --- */

       1 32768
       2 16384
       4 8192
       8 4096
      10 2048
      20 1024
      40 512
      80 256
     100 128
     200 64
     400 32
     800 16
    1000 8
    2000 4
    4000 2


/* --- */

[./src/idea.cc:181] ADP_IDEA_MUL_TH[(    556B,[    FD01])->    C000] = 0.50000 2^-1.000000

/* --- */

/* vpv@mazirat:~/temp$ ./../skcrypto/trunk/work/src/yaarx/bin/idea > plot.dat; less plot.dat |grep "#---"; gnuplot plot.plt */


/* --- */


/* 
       0        0    1    1    1
       0        0    1    1    1
       0       40    2    2    2
       0       40    2    2    2
       0       80    1    1    1
       0       80    1    1    1
       0       C0    2    2    2
       0       C0    2    2    2
      40        0    2    2    2
      40        0    2    2    2
      40       40    2    2    2
      40       40    2    2    2
      40       80    2    2    2
      40       80    2    2    2
      40       C0    2    2    2
      40       C0    2    2    2
      80        0    1    1    1
      80        0    1    1    1
      80       40    2    2    2
      80       40    2    2    2
      80       80    1    1    1
      80       80    1    1    1
      80       C0    2    2    2
      80       C0    2    2    2
      C0        0    2    2    2
      C0        0    2    2    2
      C0       40    2    2    2
      C0       40    2    2    2
      C0       80    2    2    2
      C0       80    2    2    2
      C0       C0    2    2    2
      C0       C0    2    2    2
      FF       FF  128  128  128
[
 */

/* --- */

/*

ADP_XOR_COUNT

s =   0: 0 0 0 0 0 0 0 0 
s =   1: 1 0 0 0 0 0 0 0 
s =   2: 0 1 0 0 0 0 0 0 
s =   3: 1 1 0 0 0 0 0 0 
s =   4: 0 0 1 0 0 0 0 0 
s =   5: 1 0 1 0 0 0 0 0 
s =   6: 0 1 1 0 0 0 0 0 
s =   7: 1 1 1 0 0 0 0 0 
s =   8: 0 0 0 1 0 0 0 0 
s =   9: 1 0 0 1 0 0 0 0 
s =  10: 0 1 0 1 0 0 0 0 
s =  11: 1 1 0 1 0 0 0 0 
s =  12: 0 0 1 1 0 0 0 0 
s =  13: 1 0 1 1 0 0 0 0 
s =  14: 0 1 1 1 0 0 0 0 
s =  15: 1 1 1 1 0 0 0 0 
s =  16: 0 0 0 0 1 0 0 0 
s =  17: 1 0 0 0 1 0 0 0 
s =  18: 0 1 0 0 1 0 0 0 
s =  19: 1 1 0 0 1 0 0 0 
s =  20: 0 0 1 0 1 0 0 0 
s =  21: 1 0 1 0 1 0 0 0 
s =  22: 0 1 1 0 1 0 0 0 
s =  23: 1 1 1 0 1 0 0 0 
s =  24: 0 0 0 1 1 0 0 0 
s =  25: 1 0 0 1 1 0 0 0 
s =  26: 0 1 0 1 1 0 0 0 
s =  27: 1 1 0 1 1 0 0 0 
s =  28: 0 0 1 1 1 0 0 0 
s =  29: 1 0 1 1 1 0 0 0 
s =  30: 0 1 1 1 1 0 0 0 
s =  31: 1 1 1 1 1 0 0 0 
s =  32: 0 0 0 0 0 1 0 0 
s =  33: 1 0 0 0 0 1 0 0 
s =  34: 0 1 0 0 0 1 0 0 
s =  35: 1 1 0 0 0 1 0 0 
s =  36: 0 0 1 0 0 1 0 0 
s =  37: 1 0 1 0 0 1 0 0 
s =  38: 0 1 1 0 0 1 0 0 
s =  39: 1 1 1 0 0 1 0 0 
s =  40: 0 0 0 1 0 1 0 0 
s =  41: 1 0 0 1 0 1 0 0 
s =  42: 0 1 0 1 0 1 0 0 
s =  43: 1 1 0 1 0 1 0 0 
s =  44: 0 0 1 1 0 1 0 0 
s =  45: 1 0 1 1 0 1 0 0 
s =  46: 0 1 1 1 0 1 0 0 
s =  47: 1 1 1 1 0 1 0 0 
s =  48: 0 0 0 0 1 1 0 0 
s =  49: 1 0 0 0 1 1 0 0 
s =  50: 0 1 0 0 1 1 0 0 
s =  51: 1 1 0 0 1 1 0 0 
s =  52: 0 0 1 0 1 1 0 0 
s =  53: 1 0 1 0 1 1 0 0 
s =  54: 0 1 1 0 1 1 0 0 
s =  55: 1 1 1 0 1 1 0 0 
s =  56: 0 0 0 1 1 1 0 0 
s =  57: 1 0 0 1 1 1 0 0 
s =  58: 0 1 0 1 1 1 0 0 
s =  59: 1 1 0 1 1 1 0 0 
s =  60: 0 0 1 1 1 1 0 0 
s =  61: 1 0 1 1 1 1 0 0 
s =  62: 0 1 1 1 1 1 0 0 
s =  63: 1 1 1 1 1 1 0 0 
s =  64: 0 0 0 0 0 0 1 0 
s =  65: 1 0 0 0 0 0 1 0 
s =  66: 0 1 0 0 0 0 1 0 
s =  67: 1 1 0 0 0 0 1 0 
s =  68: 0 0 1 0 0 0 1 0 
s =  69: 1 0 1 0 0 0 1 0 
s =  70: 0 1 1 0 0 0 1 0 
s =  71: 1 1 1 0 0 0 1 0 
s =  72: 0 0 0 1 0 0 1 0 
s =  73: 1 0 0 1 0 0 1 0 
s =  74: 0 1 0 1 0 0 1 0 
s =  75: 1 1 0 1 0 0 1 0 
s =  76: 0 0 1 1 0 0 1 0 
s =  77: 1 0 1 1 0 0 1 0 
s =  78: 0 1 1 1 0 0 1 0 
s =  79: 1 1 1 1 0 0 1 0 
s =  80: 0 0 0 0 1 0 1 0 
s =  81: 1 0 0 0 1 0 1 0 
s =  82: 0 1 0 0 1 0 1 0 
s =  83: 1 1 0 0 1 0 1 0 
s =  84: 0 0 1 0 1 0 1 0 
s =  85: 1 0 1 0 1 0 1 0 
s =  86: 0 1 1 0 1 0 1 0 
s =  87: 1 1 1 0 1 0 1 0 
s =  88: 0 0 0 1 1 0 1 0 
s =  89: 1 0 0 1 1 0 1 0 
s =  90: 0 1 0 1 1 0 1 0 
s =  91: 1 1 0 1 1 0 1 0 
s =  92: 0 0 1 1 1 0 1 0 
s =  93: 1 0 1 1 1 0 1 0 
s =  94: 0 1 1 1 1 0 1 0 
s =  95: 1 1 1 1 1 0 1 0 
s =  96: 0 0 0 0 0 1 1 0 
s =  97: 1 0 0 0 0 1 1 0 
s =  98: 0 1 0 0 0 1 1 0 
s =  99: 1 1 0 0 0 1 1 0 
s = 100: 0 0 1 0 0 1 1 0 
s = 101: 1 0 1 0 0 1 1 0 
s = 102: 0 1 1 0 0 1 1 0 
s = 103: 1 1 1 0 0 1 1 0 
s = 104: 0 0 0 1 0 1 1 0 
s = 105: 1 0 0 1 0 1 1 0 
s = 106: 0 1 0 1 0 1 1 0 
s = 107: 1 1 0 1 0 1 1 0 
s = 108: 0 0 1 1 0 1 1 0 
s = 109: 1 0 1 1 0 1 1 0 
s = 110: 0 1 1 1 0 1 1 0 
s = 111: 1 1 1 1 0 1 1 0 
s = 112: 0 0 0 0 1 1 1 0 
s = 113: 1 0 0 0 1 1 1 0 
s = 114: 0 1 0 0 1 1 1 0 
s = 115: 1 1 0 0 1 1 1 0 
s = 116: 0 0 1 0 1 1 1 0 
s = 117: 1 0 1 0 1 1 1 0 
s = 118: 0 1 1 0 1 1 1 0 
s = 119: 1 1 1 0 1 1 1 0 
s = 120: 0 0 0 1 1 1 1 0 
s = 121: 1 0 0 1 1 1 1 0 
s = 122: 0 1 0 1 1 1 1 0 
s = 123: 1 1 0 1 1 1 1 0 
s = 124: 0 0 1 1 1 1 1 0 
s = 125: 1 0 1 1 1 1 1 0 
s = 126: 0 1 1 1 1 1 1 0 
s = 127: 1 1 1 1 1 1 1 0 
s = 128: 0 0 0 0 0 0 0 1 
s = 129: 1 0 0 0 0 0 0 1 
s = 130: 0 1 0 0 0 0 0 1 
s = 131: 1 1 0 0 0 0 0 1 
s = 132: 0 0 1 0 0 0 0 1 
s = 133: 1 0 1 0 0 0 0 1 
s = 134: 0 1 1 0 0 0 0 1 
s = 135: 1 1 1 0 0 0 0 1 
s = 136: 0 0 0 1 0 0 0 1 
s = 137: 1 0 0 1 0 0 0 1 
s = 138: 0 1 0 1 0 0 0 1 
s = 139: 1 1 0 1 0 0 0 1 
s = 140: 0 0 1 1 0 0 0 1 
s = 141: 1 0 1 1 0 0 0 1 
s = 142: 0 1 1 1 0 0 0 1 
s = 143: 1 1 1 1 0 0 0 1 
s = 144: 0 0 0 0 1 0 0 1 
s = 145: 1 0 0 0 1 0 0 1 
s = 146: 0 1 0 0 1 0 0 1 
s = 147: 1 1 0 0 1 0 0 1 
s = 148: 0 0 1 0 1 0 0 1 
s = 149: 1 0 1 0 1 0 0 1 
s = 150: 0 1 1 0 1 0 0 1 
s = 151: 1 1 1 0 1 0 0 1 
s = 152: 0 0 0 1 1 0 0 1 
s = 153: 1 0 0 1 1 0 0 1 
s = 154: 0 1 0 1 1 0 0 1 
s = 155: 1 1 0 1 1 0 0 1 
s = 156: 0 0 1 1 1 0 0 1 
s = 157: 1 0 1 1 1 0 0 1 
s = 158: 0 1 1 1 1 0 0 1 
s = 159: 1 1 1 1 1 0 0 1 
s = 160: 0 0 0 0 0 1 0 1 
s = 161: 1 0 0 0 0 1 0 1 
s = 162: 0 1 0 0 0 1 0 1 
s = 163: 1 1 0 0 0 1 0 1 
s = 164: 0 0 1 0 0 1 0 1 
s = 165: 1 0 1 0 0 1 0 1 
s = 166: 0 1 1 0 0 1 0 1 
s = 167: 1 1 1 0 0 1 0 1 
s = 168: 0 0 0 1 0 1 0 1 
s = 169: 1 0 0 1 0 1 0 1 
s = 170: 0 1 0 1 0 1 0 1 
s = 171: 1 1 0 1 0 1 0 1 
s = 172: 0 0 1 1 0 1 0 1 
s = 173: 1 0 1 1 0 1 0 1 
s = 174: 0 1 1 1 0 1 0 1 
s = 175: 1 1 1 1 0 1 0 1 
s = 176: 0 0 0 0 1 1 0 1 
s = 177: 1 0 0 0 1 1 0 1 
s = 178: 0 1 0 0 1 1 0 1 
s = 179: 1 1 0 0 1 1 0 1 
s = 180: 0 0 1 0 1 1 0 1 
s = 181: 1 0 1 0 1 1 0 1 
s = 182: 0 1 1 0 1 1 0 1 
s = 183: 1 1 1 0 1 1 0 1 
s = 184: 0 0 0 1 1 1 0 1 
s = 185: 1 0 0 1 1 1 0 1 
s = 186: 0 1 0 1 1 1 0 1 
s = 187: 1 1 0 1 1 1 0 1 
s = 188: 0 0 1 1 1 1 0 1 
s = 189: 1 0 1 1 1 1 0 1 
s = 190: 0 1 1 1 1 1 0 1 
s = 191: 1 1 1 1 1 1 0 1 
s = 192: 0 0 0 0 0 0 1 1 
s = 193: 1 0 0 0 0 0 1 1 
s = 194: 0 1 0 0 0 0 1 1 
s = 195: 1 1 0 0 0 0 1 1 
s = 196: 0 0 1 0 0 0 1 1 
s = 197: 1 0 1 0 0 0 1 1 
s = 198: 0 1 1 0 0 0 1 1 
s = 199: 1 1 1 0 0 0 1 1 
s = 200: 0 0 0 1 0 0 1 1 
s = 201: 1 0 0 1 0 0 1 1 
s = 202: 0 1 0 1 0 0 1 1 
s = 203: 1 1 0 1 0 0 1 1 
s = 204: 0 0 1 1 0 0 1 1 
s = 205: 1 0 1 1 0 0 1 1 
s = 206: 0 1 1 1 0 0 1 1 
s = 207: 1 1 1 1 0 0 1 1 
s = 208: 0 0 0 0 1 0 1 1 
s = 209: 1 0 0 0 1 0 1 1 
s = 210: 0 1 0 0 1 0 1 1 
s = 211: 1 1 0 0 1 0 1 1 
s = 212: 0 0 1 0 1 0 1 1 
s = 213: 1 0 1 0 1 0 1 1 
s = 214: 0 1 1 0 1 0 1 1 
s = 215: 1 1 1 0 1 0 1 1 
s = 216: 0 0 0 1 1 0 1 1 
s = 217: 1 0 0 1 1 0 1 1 
s = 218: 0 1 0 1 1 0 1 1 
s = 219: 1 1 0 1 1 0 1 1 
s = 220: 0 0 1 1 1 0 1 1 
s = 221: 1 0 1 1 1 0 1 1 
s = 222: 0 1 1 1 1 0 1 1 
s = 223: 1 1 1 1 1 0 1 1 
s = 224: 0 0 0 0 0 1 1 1 
s = 225: 1 0 0 0 0 1 1 1 
s = 226: 0 1 0 0 0 1 1 1 
s = 227: 1 1 0 0 0 1 1 1 
s = 228: 0 0 1 0 0 1 1 1 
s = 229: 1 0 1 0 0 1 1 1 
s = 230: 0 1 1 0 0 1 1 1 
s = 231: 1 1 1 0 0 1 1 1 
s = 232: 0 0 0 1 0 1 1 1 
s = 233: 1 0 0 1 0 1 1 1 
s = 234: 0 1 0 1 0 1 1 1 
s = 235: 1 1 0 1 0 1 1 1 
s = 236: 0 0 1 1 0 1 1 1 
s = 237: 1 0 1 1 0 1 1 1 
s = 238: 0 1 1 1 0 1 1 1 
s = 239: 1 1 1 1 0 1 1 1 
s = 240: 0 0 0 0 1 1 1 1 
s = 241: 1 0 0 0 1 1 1 1 
s = 242: 0 1 0 0 1 1 1 1 
s = 243: 1 1 0 0 1 1 1 1 
s = 244: 0 0 1 0 1 1 1 1 
s = 245: 1 0 1 0 1 1 1 1 
s = 246: 0 1 1 0 1 1 1 1 
s = 247: 1 1 1 0 1 1 1 1 
s = 248: 0 0 0 1 1 1 1 1 
s = 249: 1 0 0 1 1 1 1 1 
s = 250: 0 1 0 1 1 1 1 1 
s = 251: 1 1 0 1 1 1 1 1 
s = 252: 0 0 1 1 1 1 1 1 
s = 253: 1 0 1 1 1 1 1 1 
s = 254: 0 1 1 1 1 1 1 1 
s = 255: 1 1 1 1 1 1 1 1 

*/

/* --- */
/* 
    FFFF     FFFF 1.000000 1.000000 : 3333333333333333 | 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0  |              2097152 2^21.000000

real    4m31.416s
user    0m40.215s
sys     0m7.988s
vpv@igor:~/skcrypto/trunk/work/src/yaarx$
 */
/* --- */

#if 1									  // DEBUG
	 if(max_cnt == 1) {
		for(uint32_t i = 0; i < WORD_SIZE; i++) {
		  if (S_in[i] == 3) {
			 assert((i == (n-2)) || (i == (n-1)));
		  }
		}
	 }
#endif

/* --- */
		uint32_t t = 0;
		for(int i = 1; i < ADP_XOR_FI_COUNT_MSIZE; i++) {
		  double e = gsl_vector_get(R, i);
		  printf("%1.0f ", e);
		  if(e != 0) {
			 t++;
		  }
		}
		printf(" | %d %f\n", t, max_cnt);
		assert(t <= max_cnt);


/* --- */

  if(k == n) {

	 gsl_vector* L = gsl_vector_calloc(ADP_XOR_FI_COUNT_MSIZE);
	 double cnt = 0;
	 gsl_vector_set_all(L, 1.0);
	 gsl_vector_set(L, 0, 0.0);
	 gsl_blas_ddot(L, C_in, &cnt);
	 printf("cnt %f\n", cnt);
	 assert(cnt <= (max_cnt));

	 for(uint32_t i = 0; i < WORD_SIZE; i++) {
		printf("%d", S_in[i]);
		if (S_in[i] == 3) {
		  assert((i == (n-2)) || (i == (n-1)));
		}
		if((S_in[i] == 3) && (i < (n-1))) {
		  assert((S_in[i+1] == 0));
		}
	 }
	 printf(" | ");
	 for(int i = 1; i < ADP_XOR_FI_COUNT_MSIZE; i++) {
		double e = gsl_vector_get(C_in, i);
		printf("%1.0f ", e);
	 }
	 printf("\n");
	 gsl_vector_free(L);
	 return;
  }

/* --- */

	 uint32_t nz_cnt = 0;
	 for(uint32_t i = 1; i < ADP_XOR_FI_COUNT_MSIZE; i++) {
		double e = gsl_vector_get(R, i);
		if(e != 0) {
		  nz_cnt++;
		}
	 }


/* --- */

void adp_xor_fi_count_odiff_min_set_size(gsl_matrix* P[2][2])
{
  gsl_vector* C = gsl_vector_calloc(ADP_XOR_FI_COUNT_MSIZE);
  gsl_vector* R = gsl_vector_calloc(ADP_XOR_FI_COUNT_MSIZE);

  uint32_t N = (1U << ADP_XOR_FI_COUNT_MSIZE);//16;
  for(uint32_t s = 1; s < N; s++) {
	 gsl_vector_set_all(C, 0.0);
	 for(uint32_t i = 1; i < ADP_XOR_FI_COUNT_MSIZE; i++) {
		uint32_t e = (s >> i) & 1;
		if(e != 0) {
		  gsl_vector_set(C, i, 1.0);
		}
	 }
	 for(int w = 0; w < 4; w++) {
		uint32_t  a = (w >> 0) & 1;
		uint32_t da = (w >> 1) & 1;
		gsl_vector_set_all(R, 0.0);
		gsl_blas_dgemv(CblasNoTrans, 1.0, P[a][da], C, 0.0, R);
		uint32_t nz_cnt = 0;
		for(uint32_t i = 1; i < ADP_XOR_FI_COUNT_MSIZE; i++) {
		  double e = gsl_vector_get(R, i);
		  if(e != 0) {
			 nz_cnt++;
		  }
		}
#if 1									  // DEBUG
		if(nz_cnt > 1) {
		  printf("R^t ");
		  for(int i = 0; i < ADP_XOR_FI_COUNT_MSIZE; i++) {
			 double e = gsl_vector_get(R, i);
			 printf("%1.0f ", e);
		  }
		  printf(" A%d%d ", da, a);
		  printf("C^t ");
		  for(int i = 0; i < ADP_XOR_FI_COUNT_MSIZE; i++) {
			 double e = gsl_vector_get(C, i);
			 printf("%1.0f ", e);
		  }
		  printf("\n");
		}
#endif
	 }
  }

  gsl_vector_free(R);
  gsl_vector_free(C);
}


/* --- */

/*
     [./tests/adp-xor-fi-tests.cc:160] s =  0: 0 0 0 0
	  [./tests/adp-xor-fi-tests.cc:160] s =  1: 1 0 0 0
	  [./tests/adp-xor-fi-tests.cc:160] s =  2: 0 1 0 0
	  [./tests/adp-xor-fi-tests.cc:160] s =  3: 1 1 0 0
	  [./tests/adp-xor-fi-tests.cc:160] s =  4: 0 0 1 0
	  [./tests/adp-xor-fi-tests.cc:160] s =  5: 1 0 1 0
	  [./tests/adp-xor-fi-tests.cc:160] s =  6: 0 1 1 0
	  [./tests/adp-xor-fi-tests.cc:160] s =  7: 1 1 1 0
	  [./tests/adp-xor-fi-tests.cc:160] s =  8: 0 0 0 1
	  [./tests/adp-xor-fi-tests.cc:160] s =  9: 1 0 0 1
	  [./tests/adp-xor-fi-tests.cc:160] s = 10: 0 1 0 1
	  [./tests/adp-xor-fi-tests.cc:160] s = 11: 1 1 0 1
	  [./tests/adp-xor-fi-tests.cc:160] s = 12: 0 0 1 1
	  [./tests/adp-xor-fi-tests.cc:160] s = 13: 1 0 1 1
	  [./tests/adp-xor-fi-tests.cc:160] s = 14: 0 1 1 1
	  [./tests/adp-xor-fi-tests.cc:160] s = 15: 1 1 1 1
*/

/* --- */

sage: list(powerset([0,1,2,3]))
[[], [0], [1], [0, 1], [2], [0, 2], [1, 2], [0, 1, 2], [3], [0, 3], [1, 3], [0, 1, 3], [2, 3], [0, 2, 3], [1, 2, 3], [0, 1, 2, 3]]
sage:

[
[], 
[0], 
[1], 
[2], 
[3], 
[0, 1], 
[0, 2], 
[0, 3], 
[1, 2], 
[1, 3], 
[2, 3], 
[0, 1, 2], 
[0, 1, 3], 
[0, 2, 3], 
[1, 2, 3], 
[0, 1, 2, 3]
]

/* --- */

#ifndef COUNT_H
#define COUNT_H

// matrices to count the number of output differences
// note: output state zero means 'no output state', don't include in summation
template <int N>
void num_diff(const int c[2][2][2][N][N], int d[2][2][1<<N][1<<N])
{
  memset(d, 0, 4 * (1<<N) * (1<<N) * sizeof(int));

  for (int in1=0; in1!=2; in1++) { // first input
    for (int in2=0; in2!=2; in2++) { // second input
      for (int out=0; out!=2; out++) { // for all output values
        int valid_outputs[N] = {0}; // mask of valid outputs for all states
        for (int state_in=0; state_in!=N; state_in++) {
          for (int state_out=0; state_out!=N; state_out++) {
            if (c[in1][in2][out][state_in][state_out]) {
              valid_outputs[state_in] |= (1<<state_out);
            }
          }
        }
        for (int mask_in=0; mask_in!=(1<<N); mask_in++) { // input state mask
           int mask_out = 0;
           for (int state=0; state!=N; state++) {
             if ((mask_in >> state) & 1) {
               mask_out |= valid_outputs[state];
             }
           }
           d[in1][in2][mask_in][mask_out]++;
        }
      }
    }
  }

}

#endif /* COUNT_H */


/* --- */

bool diff_comp_p(difference_t a, difference_t b) 
{ 
  return (a.p > b.p);
}


/* --- */

/**
  * Comparing differences by probability.
  */
struct struct_comp_difference_p : public std::binary_function<difference_t, difference_t, bool>
{
  bool operator()(difference_t a, difference_t b) const
  {
	 bool b_more = (a.p > b.p);	  // higher probability first
	 return b_more;
  }
};


/* --- */

#--- [./src/idea.cc:580] Tests, WORD_SIZE  = 16, MASK =     FFFF
	  #--- [./src/idea.cc:275] const     5001  20481 , dx     8354  33620
v
/* ---- */

#if 1									  // dp-matrix-minimize.cc5C

namespace equivalent {
  template <uint32_t M, uint32_t C>
  inline bool equal(const uint32_t q[M][C], const uint32_t c[M][C][C], const uint32_t i) {
    for (uint32_t k = 0; k < M; ++k)
      for (uint32_t j = 0; j < C; ++j)
        if (q[k][j] != c[k][i][j])
          return false;

    return true;
  }

  template <uint32_t M, uint32_t C>
  inline uint32_t find(const uint32_t q[M][C], const uint32_t c[M][C][C], const uint32_t n) {
    for (uint32_t i = 0; i < n; ++i)
      if (equivalent::equal<M>(q, c, i))
        return i;

    return n;
  }
}

template <uint32_t M, uint32_t N, uint32_t C>
uint32_t combine_equiv(const uint32_t m[M][N][N], uint32_t c[M][C][C]) {
  uint32_t r[2][N] = {{0}};
  uint32_t* s = r[0];
  uint32_t* t = r[1];
  uint32_t n = 0;

  while (true) {
    const uint32_t p = n;
    n = 0;

    for (uint32_t i = 0; i < N; ++i) {
      uint32_t q[M][C] = {{0}};

      for (uint32_t k = 0; k < M; ++k) {
        for (uint32_t j = 0; j < N; ++j) {
          q[k][s[j]] += m[k][i][j];
		  }
		}

      t[i] = equivalent::find<M>(q, c, n);

      if (t[i] == n) {
        assert(n < C);

        for (uint32_t k = 0; k < M; ++k) {
          for (uint32_t j = 0; j < C; ++j) { 
            c[k][n][j] = q[k][j];
			 }
		  }

        ++n;
      }
    }

    if (n == p) {
      return n;
	 }

    std::swap(s, t);
  }
}


uint32_t adp_xor_fi_minimize_matrix(gsl_matrix* A[2][2][2], 
												uint32_t C[2][2][2][ADP_XOR_FI_MSIZE][ADP_XOR_FI_MSIZE])
{
  uint32_t M[2][2][2][ADP_XOR_FI_MSIZE][ADP_XOR_FI_MSIZE];

  // init matrices
  for(uint32_t d = 0; d < 8; d++) {
	 uint32_t i = (d >> 0) & 1;
	 uint32_t j = (d >> 1) & 1;
	 uint32_t k = (d >> 2) & 1;

	 for(uint32_t row = 0; row < ADP_XOR_FI_MSIZE; row++) {
		for(uint32_t col = 0; col < ADP_XOR_FI_MSIZE; col++) {
		  M[i][j][k][row][col] = 0;
		  C[i][j][k][row][col] = 0;
		}
	 }
  }

  adp_xor_fi_matrix_to_arrey(A, M);

#if 1									  // DEBUG
  for(uint32_t d = 0; d < 8; d++) {
	 uint32_t i = (d >> 0) & 1;
	 uint32_t j = (d >> 1) & 1;
	 uint32_t k = (d >> 2) & 1;
	 printf("[%s:%d] M%d%d%d\n", __FILE__, __LINE__, k, j, i);
	 for(uint32_t row = 0; row < ADP_XOR_FI_MSIZE; row++) {
		for(uint32_t col = 0; col < ADP_XOR_FI_MSIZE; col++) {
		  uint32_t e = M[i][j][k][row][col];
		  printf("%2d", e);
		}
		printf("\n");
	 }
	 printf("\n");
  }
#endif

  uint32_t n = combine_equiv<8>(&M[0][0][0], &C[0][0][0]);

#if 1									  // DEBUG
  for(uint32_t d = 0; d < 8; d++) {
	 uint32_t i = (d >> 0) & 1;
	 uint32_t j = (d >> 1) & 1;
	 uint32_t k = (d >> 2) & 1;
	 printf("[%s:%d] C%d%d%d\n", __FILE__, __LINE__, k, j, i);
	 for(uint32_t row = 0; row < ADP_XOR_FI_MSIZE; row++) {
		for(uint32_t col = 0; col < ADP_XOR_FI_MSIZE; col++) {
		  uint32_t e = C[i][j][k][row][col];
		  printf("%2d", e);
		}
		printf("\n");
	 }
	 printf("\n");
  }
#endif

  printf("[%s:%d] Size: original %d, new %d\n", __FILE__, __LINE__, ADP_XOR_FI_MSIZE, n);
  return n;
}

uint32_t adp_xor_fi_minimize_matrix(gsl_matrix* A[2][2][2], 
												uint32_t C[2][2][2][ADP_XOR_FI_MSIZE][ADP_XOR_FI_MSIZE]);


void test_adp_xor_fi_minimize_matrix()
{
  gsl_matrix* A[2][2][2];
  uint32_t C[2][2][2][ADP_XOR_FI_MSIZE][ADP_XOR_FI_MSIZE];
  adp_xor_fixed_input_alloc_matrices(A);
  adp_xor_fixed_input_sf(A);
  //  adp_xor_fixed_input_normalize_matrices(A);
  //  adp_xor_fixed_input_print_matrices_sage(A);
  adp_xor_fi_minimize_matrix(A, C);
  adp_xor_fixed_input_free_matrices(A);
}


/* --- */

// {--- COUNT possible outputs start ---

void adp_xor_fi_count_odiffs_alloc_matrices(gsl_matrix* A[2][2]);

void adp_xor_fi_count_odiffs_free_matrices(gsl_matrix* A[2][2]);

void adp_xor_fi_count_odiffs_sf(gsl_matrix* A[2][2]);

void adp_xor_fi_count_odiffs_print_matrices(gsl_matrix* A[2][2]);

void adp_xor_fi_count_odiffs_print_matrices_sage(gsl_matrix* A[2][2]);

double adp_xor_fi_count_odiffs(gsl_matrix* A[2][2], uint32_t a, uint32_t db);

double adp_xor_fi_count_odiffs_exper(const uint32_t a, const uint32_t db);

void test_adp_xor_fi_count_odiffs()
{
  gsl_matrix* B[2][2][2];
  adp_xor_fixed_input_alloc_matrices(B);
  adp_xor_fixed_input_sf(B);
  adp_xor_fixed_input_normalize_matrices(B);

  gsl_matrix* A[2][2];
  adp_xor_fi_count_odiffs_alloc_matrices(A);
  adp_xor_fi_count_odiffs_sf(A);
  //  adp_xor_fi_count_odiffs_print_matrices(A);
  adp_xor_fi_count_odiffs_print_matrices_sage(A);
  uint32_t a = random32() & MASK;
  uint32_t db  = random32() & MASK;
  double cnt1 = adp_xor_fi_count_odiffs(A, a, db);
  double cnt2 = adp_xor_fi_count_odiffs_exper(a, db);
  double cnt3 = 0.0;
  for(uint32_t dc = 0; dc < ALL_WORDS; dc++) {
	 double p = adp_xor_fixed_input(B, a, db, dc);
	 if(p) {
		cnt3 += 1.0;
	 }
  }
  printf("[%s:%d] %f %f %f\n", __FILE__, __LINE__, cnt1, cnt2, cnt3);
  adp_xor_fi_count_odiffs_free_matrices(A);
  adp_xor_fixed_input_free_matrices(B);
}

#define ADP_XOR_FI_COUNT_MSIZE 2
#define ADP_XOR_FI_COUNT_ISTATE 0

void adp_xor_fi_count_odiffs_alloc_matrices(gsl_matrix* A[2][2])
{
  for(int i = 0; i < 4; i++){
	 int t = i;
	 int a = t & 1;
	 t /= 2;
	 int b = t & 1;
	 t /= 2;
	 A[a][b] = gsl_matrix_calloc(2, 2);
  }

}

void adp_xor_fi_count_odiffs_free_matrices(gsl_matrix* A[2][2])
{
  for(int i = 0; i < 4; i++){
	 int t = i;
	 int a = t & 1;
	 t /= 2;
	 int b = t & 1;
	 t /= 2;
	 gsl_matrix_free(A[a][b]);
  }
}

void adp_xor_fi_count_odiffs_sf(gsl_matrix* A[2][2])
{
  // number of possible input differences
  uint32_t ndiffs = (1UL << ADP_XOR_FI_NINPUTS);
  assert(ndiffs == 4);
  uint32_t nstates = 4;//ADP_XOR_FI_MSIZE;

  for(uint32_t i = 0; i < ndiffs; i++) {
	 uint32_t a = (i >> 0) & 1; // value, not difference!
	 uint32_t db = (i >> 1) & 1;  
	 //			 printf("%d%d\n", db, a);

	 for(int32_t u = 0; u < (int)nstates; u++) {
		int32_t t = u;
		int32_t in_s1 = t & 1;
		t /= 2;
		int32_t in_s2 = (t & 1) - 1;
		t /= 2;
		//					printf("[%2d] %2d%2d \n", u, in_s2, in_s1);

		for(uint32_t j = 0; j < 2; j++) {
		  uint32_t b1 = j;
		  uint32_t b2 = b1 ^ db ^ in_s1;
		  int32_t out_s1 = (b1 + db + in_s1) >> 1;

		  // xor with three inputs
		  uint32_t c1 = a ^ b1;
		  uint32_t c2 = a ^ b2;
		  uint32_t dc = (c2 - c1 + in_s2) & 1;
		  int32_t out_s2 = (int32_t)(c2 - c1 + in_s2) >> 1; // signed shift i.e. -1 >> 1 == -1
		  assert((dc == 0) || (dc == 1));
		  assert((c2 - c1 + in_s2) == ((out_s2 * 2) + dc));

		  // checks
		  assert((out_s1 == 0) || (out_s1 == 1));
		  assert((out_s2 == 0) || (out_s2 == -1));

		  uint32_t v = 0;

		  // compose the output state
		  //		  v = out_s2 + 1;
		  //		  v *= 2;
		  v = out_s1;

		  // add a link between U and V in the adjacency matrix
		  // 
		  //                   input u
		  //                     |
		  //                     V
		  //              [x] [x] [x] [x]  
		  // output v <-  [x] [x] [x] [x]  
		  //              [x] [x] [x] [x]  
		  // 
		  uint32_t col = in_s1;
		  uint32_t row = v;
		  uint32_t e = gsl_matrix_get(A[a][db], row, col);
		  //		  e = e + 1;
		  //		  if(e == 0) {
			 e = 1;
			 gsl_matrix_set(A[a][db], row, col, e);
			 //		  }

		} // vals
	 }		  // states
  }			  // diffs
}

void adp_xor_fi_count_odiffs_print_matrices(gsl_matrix* A[2][2])
{
  for(int i = 0; i < 4; i++){
	 int a = (i >> 0) & 1;
	 int b = (i >> 1) & 1;
	 printf("A%d%d \n", b, a);
	 for(int row = 0; row < 2; row++){
		for(int col = 0; col < 2; col++){
		  double e = gsl_matrix_get(A[a][b], row, col);
		  printf("%3.2f, ", e);
		}
		printf("\n");
	 }
	 printf("\n");
  }
}

void adp_xor_fi_count_odiffs_print_matrices_sage(gsl_matrix* A[2][2])
{
  printf("# [%s:%d] Matrices for ADP-XOR-FI-COUNT generated with %s() \n", __FILE__, __LINE__, __FUNCTION__);

  // print L
  gsl_vector* L = gsl_vector_calloc(ADP_XOR_FI_COUNT_MSIZE);
  gsl_vector_set_all(L, 1.0);
  printf("#--- Vector L --- \n");
  printf("L = vector(QQ,[ ");
  for(int col = 0; col < ADP_XOR_FI_COUNT_MSIZE; col++){
	 double e = gsl_vector_get(L, col);
	 printf("%4.3f", e);
	 if(col == ADP_XOR_FI_COUNT_MSIZE - 1) {
		printf(" ");
	 } else {
		printf(", ");
	 }
  }
  printf("])\n\n");

  // print C
  gsl_vector* C = gsl_vector_calloc(ADP_XOR_FI_COUNT_MSIZE);
  gsl_vector_set_zero(C);
  gsl_vector_set(C, 0, 1.0);
  printf("#--- Vector C --- \n");
  printf("C = vector(QQ,[ ");
  for(int col = 0; col < ADP_XOR_FI_COUNT_MSIZE; col++){
	 double e = gsl_vector_get(C, col);
	 printf("%4.3f", e);
	 if(col == ADP_XOR_FI_COUNT_MSIZE - 1) {
		printf(" ");
	 } else {
		printf(", ");
	 }
  }
  printf("])\n\n");

  // print A
  for(int i = 0; i < 4; i++){
	 int a = (i >> 0) & 1;
	 int b = (i >> 1) & 1;
	 printf("#---AA%d%d--- \n", b, a);
	 printf("AA%d%d = matrix(QQ,%d,%d,[\n", b, a, ADP_XOR_FI_COUNT_MSIZE, ADP_XOR_FI_COUNT_MSIZE);
	 for(int row = 0; row < ADP_XOR_FI_COUNT_MSIZE; row++){
		for(int col = 0; col < ADP_XOR_FI_COUNT_MSIZE; col++){
		  double e = gsl_matrix_get(A[a][b], row, col);
		  printf("%3.2f", e);
		  if((row == ADP_XOR_FI_COUNT_MSIZE - 1) && (col == ADP_XOR_FI_COUNT_MSIZE - 1)) {
			 printf(" ");
		  } else {
			 printf(", ");
		  }
		}
		printf("\n");
	 }
	 printf("])\n\n");
	 //	 printf("\n");
  }
  printf("\n");
  printf("AA = [AA00, AA01, AA10, AA11]\n");
}

double adp_xor_fi_count_odiffs_exper(const uint32_t a, const uint32_t db)
{
  uint64_t N = (1ULL << WORD_SIZE);
  uint32_t cnt = 0;
  bool b_W[ALL_WORDS] = {false};

  for(uint32_t b1 = 0; b1 < N; b1++) {
	 uint32_t b2 = (b1 + db) % MOD;
	 uint32_t c1 = a ^ b1;
	 uint32_t c2 = a ^ b2;
	 uint32_t dx = (c2 - c1 + MOD) % MOD;
	 assert((dx >= 0) && (dx < MOD));
	 if(b_W[dx] == false) {
		printf("%8X\n", dx);
		b_W[dx] = true;
		cnt++;
	 }
  }
  double p = (double)cnt;
  return p;
}

double adp_xor_fi_count_odiffs(gsl_matrix* A[2][2], uint32_t a, uint32_t db)
{
  double p = 1.0;
  gsl_vector* R;
  gsl_vector* L;
  gsl_vector* C;

  L = gsl_vector_calloc(ADP_XOR_FI_COUNT_MSIZE);
  C = gsl_vector_calloc(ADP_XOR_FI_COUNT_MSIZE);

  // init C
  gsl_vector_set(C, ADP_XOR_FI_COUNT_ISTATE, 1.0);
  // init L
  for(int i = 0; i < ADP_XOR_FI_COUNT_MSIZE; i++) {
	 gsl_vector_set(L, i, 1.0);
  }

  R = gsl_vector_calloc(ADP_XOR_FI_COUNT_MSIZE);

  for(int pos = 0; pos < WORD_SIZE; pos++) {
	 uint32_t i = (a >> pos) & 1;
	 uint32_t j = (db >> pos) & 1;

	 assert((i == 0) || (i == 1));
	 assert((j == 0) || (j == 1));
	 gsl_blas_dgemv(CblasNoTrans, 1.0, A[i][j], C, 0.0, R);
	 gsl_vector_memcpy(C, R);
  }
  gsl_blas_ddot(L, C, &p);

  gsl_vector_free(R);
  gsl_vector_free(C);
  gsl_vector_free(L);

#if 1									  // DEBUG
  printf("%8X %8X -> * : %f 2^%f\n", a, db, p, log2(p));
#endif

  return p;
}
 

/* count possible outputs end} */


/* --- */

// 
// A[a[i]][db[i]][j]
// 
// j = 0: dc[i] = 0   (exactly one possible)
// j = 1: dc[i] = 1   (exactly one possible)
// j = 2: dc[i] = 0/1 (both are possible)
// j = 3: dc[i] = -   (none are possible)
// 

/* --- */
#if 0									  // DEBUG
			 if(da == 0xF) {
				printf("\n[%s:%d] CNT %d: Dxy add %8X -> %8X  | %f = 2^%4.2f | %15d\n", __FILE__, __LINE__, *cnt_new, diff.dx, diff.dy, diff.p, log2(diff.p), diff_set_dx_dy->size());
				if(diff.dy == 0) {
				  printf("p_min = 2^%f\n", log2(p_thres));
				  //				  assert(0 == 1);
				}
			 }
#endif
#if 0									  // DEBUG
			 double p_min = hways_diff_mset_p->rbegin()->p;
			 if(p_f >= p_min) {
				hways_diff_mset_p->insert(diff);
			 }
			 //#else
#endif


/* --- */

void test_temp()
{
  uint32_t N = MOD;
  for(uint32_t x = 0; x < N; x++) {
	 uint32_t y1 = idea_mul(0, x);
	 uint32_t y2 = SUB(1, x);
	 printf("[%s:%d] %d | %d %d\n", __FILE__, __LINE__, x, y1, y2);
	 assert(y1 == y2);
  }
}


/* --- */

/* 
20130604

- Differential trail for Raiden

B[ 0] = 2^0.000000
B[ 1] = 2^-0.997976
B[ 2] = 2^-2.003092
B[ 3] = 2^-4.650513
B[ 4] = 2^-6.904208
B[ 5] = 2^-9.169075
B[ 6] = 2^-9.157518 <-
B[ 7] = 2^-12.040834
B[ 8] = 2^-14.024897
B[ 9] = 2^-14.292030
B[10] = 2^-16.278184
B[11] = 2^-18.262943
B[12] = 2^-18.331495
B[13] = 2^-20.341391
B[14] = 2^-22.351996
B[15] = 2^-22.253144 <-
B[16] = 2^-24.251735
B[17] = 2^-26.279474
B[18] = 2^-26.258336 <-
B[19] = 2^-28.253063
B[20] = 2^-30.272388
B[21] = 2^-30.280357
B[22] = 2^-32.306301
B[23] = 2^-34.318149
B[24] = 2^-34.273955 <-
B[25] = 2^-36.302411
B[26] = 2^-38.273297
B[27] = 2^-38.225304 <-
B[28] = 2^-40.215825
B[29] = 2^-42.211429
B[30] = 2^-42.388128
B[31] = 2^-44.395013
pDDT sizes: Dp 200, Dxy 336
 0:        0 <-        0 1.000000 (2^0.000000)
 1: 7FFFFF00 <- 7FFFFF00 0.161896 (2^-2.626863)
 2: 80000100 <- 7FFFFF00 0.245026 (2^-2.028995)
 3:        0 <-        0 1.000000 (2^0.000000)
 4: 7FFFFF00 <- 7FFFFF00 0.211670 (2^-2.240112)
 5: 80000100 <- 7FFFFF00 0.207306 (2^-2.270167)
 6:        0 <-        0 1.000000 (2^0.000000)
 7: 7FFFFF00 <- 7FFFFF00 0.113983 (2^-3.133107)
 8: 80000100 <- 7FFFFF00 0.248840 (2^-2.006708)
 9:        0 <-        0 1.000000 (2^0.000000)
10: 7FFFFF00 <- 7FFFFF00 0.248138 (2^-2.010783)
11: 80000100 <- 7FFFFF00 0.250977 (2^-1.994375)
12:        0 <-        0 1.000000 (2^0.000000)
13: 7FFFFF00 <- 7FFFFF00 0.250061 (2^-1.999648)
14: 80000100 <- 7FFFFF00 0.250763 (2^-1.995604)
15:        0 <-        0 1.000000 (2^0.000000)
16: 7FFFFF00 <- 7FFFFF00 0.249146 (2^-2.004940)
17: 80000100 <- 7FFFFF00 0.246033 (2^-2.023078)
18:        0 <-        0 1.000000 (2^0.000000)
19: 7FFFFF00 <- 7FFFFF00 0.248596 (2^-2.008124)
20: 80000100 <- 7FFFFF00 0.247284 (2^-2.015760)
21:        0 <-        0 1.000000 (2^0.000000)
22: 7FFFFF00 <- 7FFFFF00 0.245056 (2^-2.028816)
23: 80000100 <- 7FFFFF00 0.251495 (2^-1.991396)
24:        0 <-        0 1.000000 (2^0.000000)
25: 7FFFFF00 <- 7FFFFF00 0.247375 (2^-2.015226)
26: 80000100 <- 7FFFFF00 0.253876 (2^-1.977806)
27:        0 <-        0 1.000000 (2^0.000000)
28: 7FFFFF00 <- 7FFFFF00 0.249481 (2^-2.002997)
29: 80000100 <- 7FFFFF00 0.247650 (2^-2.013625)
30:        0 <-        0 1.000000 (2^0.000000)
31: 7FFFFF00 <- 7FFFFF00 0.248810 (2^-2.006885)
p_tot = 0.000000000000043 = 2^-44.395013, Bn = 0.000000 = 2^-44.395013
[./src/tea-add-threshold-search.cc:1007] nrounds = 32

Iterative for 3 rounds. 

 */

/* ---- */

void test_idea_lin()
{
  assert(WORD_SIZE == 16);

  long **K;
  // fix key to random
  ushort key[8] = {0xFD01, 0x3631, 0xFF19, 0x6C15, 0x8F26, 0x96BE, 0xCAE8, 0x15FE};

  // generate random key
#if 0
  for(uint32_t j = 0; j < 8; j++) {
	 key[j] = random32() & 0xFFFF;
	 printf("0x%4X, ", key[j]);
  }
  printf("\n");
#endif

  // alloc K
  K = (long **)calloc(9, sizeof(long *));
  for(uint32_t i = 0; i < 9; i++) {
    K[i] = (long *)calloc(6, sizeof(long));
    for(uint32_t j = 0; j < 6; j++) {
		K[i][j] = 0;
	 }
  }
  IDEA_encryption_key_schedule(key, K);

  for(uint32_t q = 0; q < 1; q++) { // index of active difference

	 ushort DX[4] = {0, 0, 0, 0};
	 for(uint32_t j = 0; j < 4; j++) {
		DX[j] = 0;
	 }
	 DX[q] = 1;						  // D[0] = alpha

	 printf("[%s:%d] DX = (%8X %8X %8X %8X)\n", __FILE__, __LINE__, DX[0], DX[1], DX[2], DX[3]);

	 uint32_t N = (1U << WORD_SIZE);
	 for(uint32_t i = 0; i < N; i++) {

		ushort DY[4] = {0, 0, 0, 0};
		ushort DY_lin[4] = {0, 0, 0, 0};
		ushort X1[4] = {0, 0, 0, 0};
		ushort X2[4] = {0, 0, 0, 0};
		ushort Y1[4] = {0, 0, 0, 0};
		ushort Y2[4] = {0, 0, 0, 0};

		for(uint32_t j = 0; j < 4; j++) {
		  X1[j] = random32() & 0xFFFF;
		  X2[j] = ADD(DX[j], X1[j]);
		}
		X1[0] = i;
		X2[0] = ADD(DX[0], X1[0]);

		IDEA_LIN_encryption(X1, Y1, K);
		IDEA_LIN_encryption(X2, Y2, K);

		for(uint32_t j = 0; j < 4; j++) {
		  DY_lin[j] = SUB(Y2[j], Y1[j]);
		}

		for(uint32_t j = 0; j < 4; j++) {
		  Y1[j] = 0;
		  Y2[j] = 0;
		}

		IDEA_encryption(X1, Y1, K);
		IDEA_encryption(X2, Y2, K);

		for(uint32_t j = 0; j < 4; j++) {
		  DY[j] = SUB(Y2[j], Y1[j]);
		}

		printf("[%s:%d] ", __FILE__, __LINE__);
		for(uint32_t j = 0; j < 4; j++) {
		  printf("%8X ", DY_lin[j]);
		}
		printf(" | ");
		for(uint32_t j = 0; j < 4; j++) {
		  printf("%8X ", DY[j]);
		}
#if 0
		printf(" | ");
		for(uint32_t j = 0; j < 4; j++) {
		  printf("%8X ", X1[j]);
		}
		printf(" | ");
		for(uint32_t j = 0; j < 4; j++) {
		  printf("%8X ", X2[j]);
		}
#endif
		printf("\n");
	 }
  }

  // free K
  for(uint32_t i = 0; i < 9; i++) {
    free(K[i]);
  }
  free(K);
}

/* --- */

/*
 * Multiplication, modulo (2**16)+1
 * Original GPG implementation
 */
#define low16(x) ((x) & 0xFFFF)

uint16_t idea_mul_orig(uint16_t a, uint16_t b)
{
  uint32_t p;

  p = (uint32_t) (a * b);
  if (p) {
	 b = low16(p);
	 a = p >> 16;
	 return (b - a) + (b < a);
  } else if (a) {
	 return 1 - b;
  } else {
	 return 1 - a;
  }
}

/* --- */
/*
 * Multiplication, modulo (2**16)+1
 * Note that this code is structured on the assumption that
 * untaken branches are cheaper than taken branches, and the
 * compiler doesn't schedule branches.
 */
#ifdef SMALL_CACHE
CONST static uint16 mul(register uint16 a, register uint16 b)
{
	  register word32 p;

	  p = (word32) a *b;
	  if (p) {
			 b = low16(p);
			 a = p >> 16;
			 return (b - a) + (b < a);
	  } else if (a) {
			 return 1 - b;
	  } else {
			 return 1 - a;
	  }
}				/* mul */
#endif				/* SMALL_CACHE */



/* --- */

double adp_mul(const uint32_t da, const uint32_t db, const uint32_t dc)
{
  // (db x) + (da y) = dc - (da db)
  double p = 0.0;
  uint32_t d = gcd(da, db);
#if 1									  // GMP Test
  // Compute GCD with the GNU MP library
  mpz_t z_da, z_db, z_g;
  mpz_init_set_ui(z_da, da);
  mpz_init_set_ui(z_db, db);
  mpz_init(z_g);
  mpz_gcd(z_g, z_da, z_db);
  uint32_t g = mpz_get_ui(z_g);

  int32_t A = da / (int32_t)g;
  int32_t B = db / (int32_t)g;
  int32_t c = (dc - (da * db));
  int32_t C = c / (int32_t)g;

  printf("[%s:%d] g = gcd(%d, %d) %d\n", __FILE__, __LINE__, da, db, g);
  printf("[%s:%d] A (%d/%d) %d, B (%d/%d) %d, C (%d/%d) %d\n", __FILE__, __LINE__, da, g, A, db, g, B, c, g, C);

  mpz_clear(z_da);
  mpz_clear(z_db);
  mpz_clear(z_g);
#endif
  printf("[%s:%d] gcd(%d,%d) = %d %d\n", __FILE__, __LINE__, da, db, d, g);
  assert(d == g);
  if(d == 0) {
	 return 0.0;
  }
  int32_t e = dc - (da * db);  // gamma - (alpha * beta)
  int32_t r = (e % d);			  // remainder from e / d
  printf("[%s:%d] gcd(%d,%d) = %d, e = %d, r = %d\n", __FILE__, __LINE__, da, db, d, e, r);
  if(r != 0) {						  // d does not divide e
	 return 0.0;
  }
  uint32_t n = (1UL << WORD_SIZE);
  p = 1.0 / (double)n; // 2^-n
  int32_t e_mod = e % MOD;		  // (gamma - (alpha * beta)) mod 2^n
  if(e != e_mod) {
	 p *= 2;
  }
  return p;
}

/* --- */

/* 
	[./src/tea.cc:201] R 1 key F691432E 777F2DD4 delta 9E3779B9
 1: 0.080665 (2^-3.631920)        F <- FFFFFFFF | 2^-3.631920
------------------------------------
[./src/tea.cc:201] R 2 key D059DD11 3E61E99B delta 9E3779B9
2: 1.000000 (2^0.000000)        0 <-        0 | 2^-3.631920
------------------------------------
[./src/tea.cc:201] R 3 key F691432E 777F2DD4 delta 3C6EF372
3: 0.140709 (2^-2.829214)        F <- FFFFFFFF | 2^-6.461134
------------------------------------
[./src/tea.cc:201] R 4 key D059DD11 3E61E99B delta 3C6EF372
4: 0.003910 (2^-7.998592)        0 <-        F | 2^-14.459726
------------------------------------
[./src/tea.cc:201] R 5 key F691432E 777F2DD4 delta DAA66D2B
5: 0.080753 (2^-3.630343) FFFFFFF1 <- FFFFFFFF | 2^-18.090069
------------------------------------
[./src/tea.cc:201] R 6 key D059DD11 3E61E99B delta DAA66D2B
6: 1.000000 (2^0.000000)        0 <-        0 | 2^-18.090069
------------------------------------
[./src/tea.cc:201] R 7 key F691432E 777F2DD4 delta 78DDE6E4
7: 0.136083 (2^-2.877440) FFFFFFF1 <- FFFFFFFF | 2^-20.967509
------------------------------------
[./src/tea.cc:201] R 8 key D059DD11 3E61E99B delta 78DDE6E4
8: 0.000125 (2^-12.961081)        2 <- FFFFFFF1 | 2^-33.928590
------------------------------------
[./src/tea.cc:201] R 9 key F691432E 777F2DD4 delta 1715609D
9: 0.080970 (2^-3.626472)        F <-        1 | 2^-37.555062
------------------------------------
[./src/tea.cc:201] R10 key D059DD11 3E61E99B delta 1715609D
10: 1.000000 (2^0.000000)        0 <-        0 | 2^-37.555062
------------------------------------
[./src/tea.cc:201] R11 key F691432E 777F2DD4 delta B54CDA56
11: 0.139676 (2^-2.839843) FFFFFFF1 <-        1 | 2^-40.394905
------------------------------------
[./src/tea.cc:201] R12 key D059DD11 3E61E99B delta B54CDA56
12: 0.000000 (2^-inf) FFFFFFFE <- FFFFFFF1 | 2^-inf
------------------------------------
[./src/tea.cc:201] R13 key F691432E 777F2DD4 delta 5384540F
13: 0.080468 (2^-3.635446)        F <- FFFFFFFF | 2^-inf
------------------------------------
[./src/tea.cc:201] R14 key D059DD11 3E61E99B delta 5384540F
14: 1.000000 (2^0.000000)        0 <-        0 | 2^-inf
------------------------------------
[./src/tea.cc:201] R15 key F691432E 777F2DD4 delta F1BBCDC8
15: 0.135598 (2^-2.882590)       11 <- FFFFFFFF | 2^-inf
------------------------------------
[./src/tea.cc:201] R16 key D059DD11 3E61E99B delta F1BBCDC8
16: 0.001965 (2^-8.991221)        0 <-       11 | 2^-inf
------------------------------------
[./src/tea.cc:201] R17 key F691432E 777F2DD4 delta 8FF34781
17: 0.079695 (2^-3.649363) FFFFFFEF <- FFFFFFFF | 2^-inf
------------------------------------
[./src/tea.cc:201] R18 key D059DD11 3E61E99B delta 8FF34781
18: 1.000000 (2^0.000000)        0 <-        0 | 2^-inf
------------------------------------
[./tests/tea-add-threshold-search-tests.cc:304] Total: 2^-inf
[./tests/tea-add-threshold-search-tests.cc:305] key
key[0] = 0xF691432E;
key[1] = 0x777F2DD4;
key[2] = 0xD059DD11;
key[3] = 0x3E61E99B;
cnt_good = [ 0 /  1]
OK

real    0m1.009s
user    0m1.000s
sys     0m0.004s

 */

/* --- */
/* 
	[./tests/tea-add-threshold-search-tests.cc:150] Final full trail:
 0:        0 <-        0 1.000000 (2^0.000000)
 1: FFFFFFF1 <-        1 0.126862 (2^-2.978673)
 2: FFFFFFFF <- FFFFFFF1 0.005249 (2^-7.573735)
 3:        0 <-        0 1.000000 (2^0.000000)
 4: FFFFFFFF <- FFFFFFF1 0.005493 (2^-7.508147)
 5:        F <- FFFFFFFF 0.132599 (2^-2.914860)
 6:        0 <-        0 1.000000 (2^0.000000)
 7:        F <- FFFFFFFF 0.081238 (2^-3.621705)
 8:        2 <-        F 0.004303 (2^-7.860449)
 9: FFFFFFF1 <-        1 0.127808 (2^-2.967954)
10:        0 <-        0 1.000000 (2^0.000000)
11:       11 <-        1 0.079956 (2^-3.644649)
12:        0 <-       11 0.002228 (2^-8.810175)
13: FFFFFFEF <-        1 0.134216 (2^-2.897368)
14:        0 <-        0 1.000000 (2^0.000000)
15:        F <-        1 0.084229 (2^-3.569547)
16: FFFFFFFF <-        F 0.001709 (2^-9.192645)
17:        0 <-        0 1.000000 (2^0.000000)
p_tot = 0.000000000000000 = 2^-63.539908
[./tests/tea-add-threshold-search-tests.cc:164] key
key[0] = 0x72AB3584;
key[1] = 0xBC1123CF;
key[2] = 0x1487D2B6;
key[3] = 0x70F2DE4;
[./tests/tea-add-threshold-search-tests.cc:168] Print in LaTeX:

real    13m15.865s
user    13m13.482s
sys     0m0.128s

 */
/* --- */

/*

add-threshold-search.cc:1134] tea_add_trail_search_full()
B[ 0] = 2^0.000000
B[ 1] = 2^-1.018968
B[ 2] = 2^-2.957259
B[ 3] = 2^-6.069958
B[ 4] = 2^-11.801666
B[ 5] = 2^-17.265197
B[ 6] = 2^-20.945037
B[ 7] = 2^-24.207170
B[ 8] = 2^-28.035174
B[ 9] = 2^-32.487802
B[10] = 2^-35.770491
B[11] = 2^-39.306786
B[12] = 2^-42.749709
B[13] = 2^-46.157456
B[14] = 2^-49.921512
B[15] = 2^-53.963286
B[16] = 2^-57.424109
B[17] = 2^-60.899134
B[18] = 2^-64.618808
pDDT sizes: Dp 65, Dxy 65 | hway 356511, croad 345961
 0:        F <-        1 0.136597 (2^-2.872006)
 1:        0 <-        0 1.000000 (2^0.000000)
 2:        F <-        1 0.131805 (2^-2.923518)
 3:        0 <-        F 0.002106 (2^-8.891476)
 4: FFFFFFF1 <-        1 0.135040 (2^-2.888538)
 5:        0 <-        0 1.000000 (2^0.000000)
 6:        F <-        1 0.136627 (2^-2.871683)
 7: FFFFFFFF <-        F 0.003967 (2^-7.977632)
 8:        0 <-        0 1.000000 (2^0.000000)
 9: FFFFFFFF <-        F 0.004028 (2^-7.955606)
 10: FFFFFFF1 <- FFFFFFFF 0.134735 (2^-2.891802)
 11:        0 <-        0 1.000000 (2^0.000000)
 12: FFFFFFF1 <- FFFFFFFF 0.134674 (2^-2.892456)
 13:        1 <- FFFFFFF1 0.004150 (2^-7.912537)
 14:        0 <-        0 1.000000 (2^0.000000)
 15:        1 <- FFFFFFF1 0.004028 (2^-7.955606)
 16:        F <-        1 0.134186 (2^-2.897696)
 17:        0 <-        0 1.000000 (2^0.000000)
 18: FFFFFFF1 <-        1 0.077576 (2^-3.688252)
 p_tot = 0.000000000000000 = 2^-64.618808, Bn = 0.000000 = 2^-64.618808
 [./src/tea-add-threshold-search.cc:1208] nrounds = 19
 [./tests/tea-add-threshold-search-tests.cc:137]
----- End search -----
 [./tests/tea-add-threshold-search-tests.cc:140] Final trail:
 0: FFFFFFF1 <-        1 0.135956 (2^-2.878790)
 1:        0 <-        0 1.000000 (2^0.000000)
 2:        F <-        1 0.134949 (2^-2.889517)
 3:        0 <-        F 0.001892 (2^-9.045804)
 4: FFFFFFF1 <-        1 0.133392 (2^-2.906252)
 5:        0 <-        0 1.000000 (2^0.000000)
 6:        F <-        1 0.135376 (2^-2.884956)
 7:        0 <-        F 0.002014 (2^-8.955606)
 8: FFFFFFF1 <-        1 0.132843 (2^-2.912206)
 9:        0 <-        0 1.000000 (2^0.000000)
 10:       11 <-        1 0.135498 (2^-2.883656)
 11:        0 <-       11 0.002136 (2^-8.870717)
 12: FFFFFFEF <-        1 0.134552 (2^-2.893764)
 13:        0 <-        0 1.000000 (2^0.000000)
 14:       11 <-        1 0.133301 (2^-2.907243)
 15:        0 <-       11 0.002380 (2^-8.714598)
 16: FFFFFFEF <-        1 0.131958 (2^-2.921849)
 17:        0 <-        0 1.000000 (2^0.000000)
 18: FFFFFFF1 <-        1 0.135559 (2^-2.883006)
 p_tot = 0.000000000000000 = 2^-64.547964, Bn = 0.000000 = 2^-64.547964
 [./tests/tea-add-threshold-search-tests.cc:150] Final full trail:
 0:        F <-        1 0.136597 (2^-2.872006)
 1:        0 <-        0 1.000000 (2^0.000000)
 2:        F <-        1 0.131805 (2^-2.923518)
 3:        0 <-        F 0.002106 (2^-8.891476)
 4: FFFFFFF1 <-        1 0.135040 (2^-2.888538)
 5:        0 <-        0 1.000000 (2^0.000000)
 6:        F <-        1 0.136627 (2^-2.871683)
 7: FFFFFFFF <-        F 0.003967 (2^-7.977632)
 8:        0 <-        0 1.000000 (2^0.000000)
 9: FFFFFFFF <-        F 0.004028 (2^-7.955606)
 10: FFFFFFF1 <- FFFFFFFF 0.134735 (2^-2.891802)
 11:        0 <-        0 1.000000 (2^0.000000)
 12: FFFFFFF1 <- FFFFFFFF 0.134674 (2^-2.892456)
 13:        1 <- FFFFFFF1 0.004150 (2^-7.912537)
 14:        0 <-        0 1.000000 (2^0.000000)
 15:        1 <- FFFFFFF1 0.004028 (2^-7.955606)
 16:        F <-        1 0.134186 (2^-2.897696)
 17:        0 <-        0 1.000000 (2^0.000000)
 18: FFFFFFF1 <-        1 0.077576 (2^-3.688252)
p_tot = 0.000000000000000 = 2^-64.618808
 [./tests/tea-add-threshold-search-tests.cc:164] key
 key[0] = 0xE028DF9A;
key[1] = 0x8819B4C3;
key[2] = 0x3AB116AF;
key[3] = 0x3C50723;
[./tests/tea-add-threshold-search-tests.cc:168] Print in LaTeX:

real    37m11.116s
user    35m43.542s
sys     0m1.232s
vpv@igor:~/skcrypto/trunk/work/src/yaarx$
vpv@igor:~/skcrypto/trunk/work/src/yaarx$
vpv@igor:~/skcrypto/trunk/work/src/yaarx$
vpv@igor:~/skcrypto/trunk/work/src/yaarx$
vpv@igor:~/skcrypto/trunk/work/src/yaarx$
*/

/* --- */
/* 

%------------------------
\texttt{key} & \texttt{1B2F30BF} & & \texttt{A8922EEA} & \texttt{DB39318C} & \texttt{FF5F3C72} \\
\toprule
$r$ & $\Delta y$ & & $\Delta x$ & $p$ & $\mathrm{log}_2 p$\\
\midrule
$ 0$ & \texttt{       F} & $\leftarrow$ & \texttt{FFFFFFFF} & $0.130249$ & $2^{-2.94}$ \\
$ 1$ & \texttt{       0} & $\leftarrow$ & \texttt{       0} & $1.000000$ & $2^{0.00}$ \\
$ 2$ & \texttt{FFFFFFEF} & $\leftarrow$ & \texttt{FFFFFFFF} & $0.079865$ & $2^{-3.65}$ \\
$ 3$ & \texttt{       0} & $\leftarrow$ & \texttt{FFFFFFEF} & $0.001068$ & $2^{-9.87}$ \\
$ 4$ & \texttt{      11} & $\leftarrow$ & \texttt{FFFFFFFF} & $0.135529$ & $2^{-2.88}$ \\
$ 5$ & \texttt{       0} & $\leftarrow$ & \texttt{       0} & $1.000000$ & $2^{0.00}$ \\
$ 6$ & \texttt{FFFFFFEF} & $\leftarrow$ & \texttt{FFFFFFFF} & $0.082092$ & $2^{-3.61}$ \\
$ 7$ & \texttt{       2} & $\leftarrow$ & \texttt{FFFFFFEF} & $0.001953$ & $2^{-9.00}$ \\
$ 8$ & \texttt{      11} & $\leftarrow$ & \texttt{       1} & $0.130005$ & $2^{-2.94}$ \\
$ 9$ & \texttt{       0} & $\leftarrow$ & \texttt{       0} & $1.000000$ & $2^{0.00}$ \\
$10$ & \texttt{      11} & $\leftarrow$ & \texttt{       1} & $0.080048$ & $2^{-3.64}$ \\
$11$ & \texttt{FFFFFFFE} & $\leftarrow$ & \texttt{      11} & $0.000793$ & $2^{-10.30}$ \\
$12$ & \texttt{FFFFFFEF} & $\leftarrow$ & \texttt{FFFFFFFF} & $0.136047$ & $2^{-2.88}$ \\
$13$ & \texttt{       0} & $\leftarrow$ & \texttt{       0} & $1.000000$ & $2^{0.00}$ \\
$14$ & \texttt{       F} & $\leftarrow$ & \texttt{FFFFFFFF} & $0.080658$ & $2^{-3.63}$ \\
$15$ & \texttt{FFFFFF01} & $\leftarrow$ & \texttt{       F} & $0.005402$ & $2^{-7.53}$ \\
$16$ & \texttt{FFFFF0F8} & $\leftarrow$ & \texttt{FFFFFF00} & $0.046509$ & $2^{-4.43}$ \\
\midrule
 $\prod_{r}$ & & & & & $2^{-67.30}$ \\
\bottomrule
% TEA_ADD_P_THRES = 0.050000, TEA_ADD_MAX_PDDT_SIZE = 2^25.000000, NROUNDS = 20
% Time: 12.4 min.
 */

/* --- */

/* 
%------------------------
\texttt{key} & \texttt{E028DF9A} & & \texttt{8819B4C3} & \texttt{3AB116AF} & \texttt{ 3C50723} \\
\toprule
$r$ & $\Delta y$ & & $\Delta x$ & $p$ & $\mathrm{log}_2 p$\\
\midrule
$ 0$ & \texttt{       0} & $\leftarrow$ & \texttt{       0} & $1.000000$ & $2^{0.00}$ \\
$ 1$ & \texttt{FFFFFFEF} & $\leftarrow$ & \texttt{FFFFFFFF} & $0.078400$ & $2^{-3.67}$ \\
$ 2$ & \texttt{       0} & $\leftarrow$ & \texttt{FFFFFFEF} & $0.000214$ & $2^{-12.19}$ \\
$ 3$ & \texttt{      11} & $\leftarrow$ & \texttt{FFFFFFFF} & $0.142639$ & $2^{-2.81}$ \\
$ 4$ & \texttt{       0} & $\leftarrow$ & \texttt{       0} & $1.000000$ & $2^{0.00}$ \\
$ 5$ & \texttt{       F} & $\leftarrow$ & \texttt{FFFFFFFF} & $0.079102$ & $2^{-3.66}$ \\
$ 6$ & \texttt{       2} & $\leftarrow$ & \texttt{       F} & $0.001984$ & $2^{-8.98}$ \\
$ 7$ & \texttt{FFFFFFF1} & $\leftarrow$ & \texttt{       1} & $0.134491$ & $2^{-2.89}$ \\
$ 8$ & \texttt{       0} & $\leftarrow$ & \texttt{       0} & $1.000000$ & $2^{0.00}$ \\
$ 9$ & \texttt{       F} & $\leftarrow$ & \texttt{       1} & $0.082306$ & $2^{-3.60}$ \\
$10$ & \texttt{FFFFFFFF} & $\leftarrow$ & \texttt{       F} & $0.004089$ & $2^{-7.93}$ \\
$11$ & \texttt{       0} & $\leftarrow$ & \texttt{       0} & $1.000000$ & $2^{0.00}$ \\
$12$ & \texttt{FFFFFFFF} & $\leftarrow$ & \texttt{       F} & $0.006287$ & $2^{-7.31}$ \\
$13$ & \texttt{FFFFFFF1} & $\leftarrow$ & \texttt{FFFFFFFF} & $0.078949$ & $2^{-3.66}$ \\
$14$ & \texttt{       0} & $\leftarrow$ & \texttt{       0} & $1.000000$ & $2^{0.00}$ \\
$15$ & \texttt{FFFFFFF1} & $\leftarrow$ & \texttt{FFFFFFFF} & $0.136536$ & $2^{-2.87}$ \\
$16$ & \texttt{FFFFFF01} & $\leftarrow$ & \texttt{FFFFFFF1} & $0.047241$ & $2^{-4.40}$ \\
\midrule
 $\prod_{r}$ & & & & & $2^{-64.00}$ \\
\bottomrule
% TEA_ADD_P_THRES = 0.050000, TEA_ADD_MAX_PDDT_SIZE = 2^25.000000, NROUNDS = 20

real    8m31.372s
user    8m30.260s
sys     0m0.124s

 */

/* ---- */

/* 

--- 20130504 ---

B[ 0] = 2^0.000000
B[ 1] = 2^-1.015582
B[ 2] = 2^-2.968580
B[ 3] = 2^-5.347015
B[ 4] = 2^-11.040928
B[ 5] = 2^-16.269840
B[ 6] = 2^-22.465169
B[ 7] = 2^-26.007645
B[ 8] = 2^-29.952421
B[ 9] = 2^-37.059350
B[10] = 2^-41.835323
B[11] = 2^-45.627288
B[12] = 2^-50.860549
B[13] = 2^-54.583891
B[14] = 2^-57.284255
B[15] = 2^-60.032646
B[16] = 2^-64.456162
pDDT sizes: Dp 59, Dxy 59 | Cp 0, Cxy 0
 0:        0 <-        0 1.000000 (2^0.000000)
 1: FFFFFFF1 <-        1 0.083160 (2^-3.587959)
 2:        0 <- FFFFFFF1 0.000305 (2^-11.678072)
 3:        F <-        1 0.141174 (2^-2.824450)
 4:        0 <-        0 1.000000 (2^0.000000)
 5: FFFFFFF1 <-        1 0.080292 (2^-3.638604)
 6: FFFFFFFE <- FFFFFFF1 0.001709 (2^-9.192645)
 7:        F <- FFFFFFFF 0.132507 (2^-2.915856)
 8:        0 <-        0 1.000000 (2^0.000000)
 9: FFFFFFF1 <- FFFFFFFF 0.081970 (2^-3.608756)
10:        1 <- FFFFFFF1 0.003906 (2^-8.000000)
11:        0 <-        0 1.000000 (2^0.000000)
12:        1 <- FFFFFFF1 0.006317 (2^-7.306513)
13:        F <-        1 0.081543 (2^-3.616296)
14:        0 <-        0 1.000000 (2^0.000000)
15: FFFFFFF1 <-        1 0.078918 (2^-3.663493)
16: FFFFFF01 <- FFFFFFF1 0.046600 (2^-4.423516)
p_tot = 0.000000000000000 = 2^-64.456162, Bn = 0.000000 = 2^-64.456162
[./src/tea-add-threshold-search.cc:1202] nrounds = 17
[./tests/tea-add-threshold-search-tests.cc:128]
----- End search -----
[./tests/tea-add-threshold-search-tests.cc:131] Final trail:
0:        0 <-        0 1.000000 (2^0.000000)
1:       11 <-        1 0.080658 (2^-3.632039)
2:        0 <-       11 0.000244 (2^-12.000000)
3: FFFFFFEF <-        1 0.143524 (2^-2.800634)
4:        0 <-        0 1.000000 (2^0.000000)
5:       11 <-        1 0.079285 (2^-3.656814)
6:        0 <-       11 0.000061 (2^-14.000000)
7: FFFFFFEF <-        1 0.138153 (2^-2.855660)
8:        0 <-        0 1.000000 (2^0.000000)
9:       11 <-        1 0.080872 (2^-3.628223)
10:        0 <-       11 0.000244 (2^-12.000000)
11: FFFFFFEF <-        1 0.141815 (2^-2.817916)
12:        0 <-        0 1.000000 (2^0.000000)
13: FFFFFFF1 <-        1 0.080353 (2^-3.637508)
14: FFFFFF01 <- FFFFFFF1 0.011353 (2^-6.460841)
p_tot = 0.000000000000000 = 2^-67.489637, Bn = 0.000000 = 2^-67.489637
[./tests/tea-add-threshold-search-tests.cc:141] Final full trail:
0:        0 <-        0 1.000000 (2^0.000000)
1: FFFFFFF1 <-        1 0.083160 (2^-3.587959)
2:        0 <- FFFFFFF1 0.000305 (2^-11.678072)
3:        F <-        1 0.141174 (2^-2.824450)
4:        0 <-        0 1.000000 (2^0.000000)
5: FFFFFFF1 <-        1 0.080292 (2^-3.638604)
6: FFFFFFFE <- FFFFFFF1 0.001709 (2^-9.192645)
7:        F <- FFFFFFFF 0.132507 (2^-2.915856)
8:        0 <-        0 1.000000 (2^0.000000)
9: FFFFFFF1 <- FFFFFFFF 0.081970 (2^-3.608756)
10:        1 <- FFFFFFF1 0.003906 (2^-8.000000)
11:        0 <-        0 1.000000 (2^0.000000)
12:        1 <- FFFFFFF1 0.006317 (2^-7.306513)
13:        F <-        1 0.081543 (2^-3.616296)
14:        0 <-        0 1.000000 (2^0.000000)
15: FFFFFFF1 <-        1 0.078918 (2^-3.663493)
16: FFFFFF01 <- FFFFFFF1 0.046600 (2^-4.423516)
p_tot = 0.000000000000000 = 2^-64.456162
[./tests/tea-add-threshold-search-tests.cc:155] key
key[0] = 0xE028DF9A;
key[1] = 0x8819B4C3;
key[2] = 0x3AB116AF;
key[3] = 0x3C50723;

real    12m5.335s
user    12m3.229s
sys     0m0.092s

 */

/* --- */

  //  std::multiset<differential_t, struct_comp_diff_p>::iterator find_iter = croads_init_mset_p.begin();
#if 0
  std::set<differential_t, struct_comp_diff_dx_dy>::iterator find_iter = croads_init_set_dx_dy.begin();
  while(find_iter != croads_init_set_dx_dy.end()) {
	 assert(0 == 1);
	 printf("[%s:%d] New entry: %8X %8X 2^%f\n", __FILE__, __LINE__, find_iter->dx, find_iter->dy, log2(find_iter->p));
	 diff_mset_p.insert(*find_iter);
	 diff_set_dx_dy.insert(*find_iter);
	 find_iter++;
  }
#endif

  // init croads with hways
#if 0
  std::set<differential_t, struct_comp_diff_dx_dy>::iterator hway_dx_dy = diff_set_dx_dy.begin();
  while(hway_dx_dy != diff_set_dx_dy.end()) {
	 croads_diff_set_dx_dy.insert(*hway_dx_dy);
	 hway_dx_dy++;
  }
  std::set<differential_t, struct_comp_diff_dx_dy>::iterator hway_p = diff_mset_p.begin();
  while(hway_p != diff_mset_p.end()) {
	 croads_diff_mset_p.insert(*hway_p);
	 hway_p++;
  }
#endif

#if 0									  // TEST
  //  3:        0 <-        F 0.003937 (2^-7.988773)
  //  differential_t tmp_diff = {0xFFFFFFF1, 0xFFFFFFFF, 0, 0.00381};
  differential_t tmp_diff = {0xF, 0x0, 0, 0.003973};
  croads_diff_set_dx_dy.insert(tmp_diff);
  croads_diff_mset_p.insert(tmp_diff);
#endif

  printf("AFTER Initial set sizes: Dp %d, Dxy %d\n", diff_mset_p.size(), diff_set_dx_dy.size());
  //  assert(diff_set_dx_dy.size() == diff_mset_p.size());


/* --- */

#if 0
	 if(nrounds == 7) {
		//	 if(nrounds == (NROUNDS - 1)) {
	 //		printf("[%s:%d] Dp:\n", __FILE__, __LINE__);
	 //		print_mset(diff_mset_p);
		printf("[%s:%d] Dxy:\n", __FILE__, __LINE__);
		print_set(diff_set_dx_dy);
		printf("\n");
	 }
#endif


/* --- */

  //  for(uint32_t nrounds = 1; nrounds <= NROUNDS; nrounds++ ) {
  //  bool b_start = false;
	 //	 for(uint32_t i = 0; i < 2; i++) { // first two rounds
	 //		if(trail[i].p < TEA_ADD_P_THRES) {
	 //		  croads_init_set_dx_dy->insert(trail[i]);
	 //		}
	 //	 }


/* --- */

/* 
B[ 1] = 2^-1.452382
B[ 2] = 2^-2.904283
B[ 3] = 2^-6.655170
B[ 4] = 2^-10.950524
B[ 5] = 2^-16.377181
B[ 6] = 2^-25.154256
B[ 7] = 2^-28.753910
B[ 8] = 2^-35.549339
B[ 9] = 2^-38.775611
B[10] = 2^-41.594769
B[11] = 2^-47.266094
B[12] = 2^-53.137958
B[13] = 2^-53.137958
B[14] = 2^-56.009320
B[15] = 2^-62.198748
B[16] = 2^-63.905545
pDDT sizes: Dp 66, Dxy 66 | Cp 0, Cxy 0
 0: FFFFFFF1 <- FFFFFFFF 0.082031 (2^-3.607683)
 1:        0 <-        0 1.000000 (2^0.000000)
 2: FFFFFFF1 <- FFFFFFFF 0.143433 (2^-2.801555)
 3:        1 <- FFFFFFF1 0.001312 (2^-9.573735)
 4:        0 <-        0 1.000000 (2^0.000000)
 5:        1 <- FFFFFFF1 0.005005 (2^-7.642448)
 6:        F <-        1 0.134827 (2^-2.890822)
 7:        0 <-        0 1.000000 (2^0.000000)
 8: FFFFFFEF <-        1 0.077148 (2^-3.696219)
 9: FFFFFFFE <- FFFFFFEF 0.001984 (2^-8.977632)
10:       11 <- FFFFFFFF 0.141052 (2^-2.825698)
11:        0 <-        0 1.000000 (2^0.000000)
12: FFFFFFF1 <- FFFFFFFF 0.079926 (2^-3.645200)
13:        2 <- FFFFFFF1 0.000305 (2^-11.678072)
14:        F <-        1 0.136169 (2^-2.876525)
15:        0 <-        0 1.000000 (2^0.000000)
16: FFFFFFF1 <-        1 0.077484 (2^-3.689955)
p_tot = 0.000000000000000 = 2^-63.905545, Bn = 0.000000 = 2^-63.905545
[./src/tea-add-threshold-search.cc:1257] nrounds = 17
[./tests/tea-add-threshold-search-tests.cc:120]
----- End search -----
[./tests/tea-add-threshold-search-tests.cc:123] Final trail:
0: FFFFFF01 <- FFFFFFF1 0.008209 (2^-6.928538)
1:        F <-        1 0.140564 (2^-2.830701)
2:        0 <-        0 1.000000 (2^0.000000)
3:        F <-        1 0.079834 (2^-3.646853)
4:        0 <-        F 0.002136 (2^-8.870717)
5: FFFFFFF1 <-        1 0.127441 (2^-2.972094)
6:        0 <-        0 1.000000 (2^0.000000)
7:        F <-        1 0.083679 (2^-3.578987)
8:        0 <-        F 0.004242 (2^-7.881059)
9: FFFFFFF1 <-        1 0.134552 (2^-2.893764)
10:        0 <-        0 1.000000 (2^0.000000)
11:       11 <-        1 0.081360 (2^-3.619539)
12:        0 <-       11 0.000092 (2^-13.415037)
13: FFFFFFEF <-        1 0.124725 (2^-3.003173)
14:        0 <-        0 1.000000 (2^0.000000)
15: FFFFFFF1 <-        1 0.084503 (2^-3.564851)
16: FFFFFF01 <- FFFFFFF1 0.017334 (2^-5.850253)
p_tot = 0.000000000000000 = 2^-69.055567, Bn = 0.000000 = 2^-69.055567
[./tests/tea-add-threshold-search-tests.cc:133] Final full trail:
0: FFFFFFF1 <- FFFFFFFF 0.082031 (2^-3.607683)
1:        0 <-        0 1.000000 (2^0.000000)
2: FFFFFFF1 <- FFFFFFFF 0.143433 (2^-2.801555)
3:        1 <- FFFFFFF1 0.001312 (2^-9.573735)
4:        0 <-        0 1.000000 (2^0.000000)
5:        1 <- FFFFFFF1 0.005005 (2^-7.642448)
6:        F <-        1 0.134827 (2^-2.890822)
7:        0 <-        0 1.000000 (2^0.000000)
8: FFFFFFEF <-        1 0.077148 (2^-3.696219)
9: FFFFFFFE <- FFFFFFEF 0.001984 (2^-8.977632)
10:       11 <- FFFFFFFF 0.141052 (2^-2.825698)
11:        0 <-        0 1.000000 (2^0.000000)
12: FFFFFFF1 <- FFFFFFFF 0.079926 (2^-3.645200)
13:        2 <- FFFFFFF1 0.000305 (2^-11.678072)
14:        F <-        1 0.136169 (2^-2.876525)
15:        0 <-        0 1.000000 (2^0.000000)
16: FFFFFFF1 <-        1 0.077484 (2^-3.689955)
p_tot = 0.000000000000000 = 2^-63.905545
[./tests/tea-add-threshold-search-tests.cc:147] key
key[0] = 0xD7A62B66;
key[1] = 0x6E8BE71C;
key[2] = 0x80ABE91A;
key[3] = 0x90CF01B8;

real    25m56.346s
user    25m51.521s
sys     0m0.260s

 */


/* --- */

/* 
--- 20130502 ---

----- End search -----


B[ 0] = 2^0.000000
B[ 1] = 2^-1.025675
B[ 2] = 2^-2.047143
B[ 3] = 2^-5.374837
B[ 4] = 2^-11.139214
B[ 5] = 2^-16.349632
B[ 6] = 2^-24.744992
B[ 7] = 2^-34.341126
B[ 8] = 2^-37.832785
B[ 9] = 2^-41.460464
B[10] = 2^-47.746218
B[11] = 2^-53.235311
B[12] = 2^-58.940618
B[13] = 2^-62.552062
B[14] = 2^-69.068246
pDDT sizes: Dp 70, Dxy 970
 0:        0 <-        0 1.000000 (2^0.000000)
 1:       11 <-        1 0.081573 (2^-3.615756)
 2:        0 <-       11 0.000366 (2^-11.415037)
 3: FFFFFFEF <-        1 0.137573 (2^-2.861728)
 4:        0 <-        0 1.000000 (2^0.000000)
 5:       11 <-        1 0.080048 (2^-3.642998)
 6:        0 <-       11 0.000031 (2^-15.000000)
 7: FFFFFFEF <-        1 0.136169 (2^-2.876525)
 8:        0 <-        0 1.000000 (2^0.000000)
 9:       11 <-        1 0.078735 (2^-3.666845)
10:        0 <-       11 0.000122 (2^-13.000000)
11: FFFFFFEF <-        1 0.137573 (2^-2.861728)
12:        0 <-        0 1.000000 (2^0.000000)
13: FFFFFFF1 <-        1 0.081818 (2^-3.611444)
14: FFFFFF01 <- FFFFFFF1 0.010925 (2^-6.516184)
p_tot = 0.000000000000000 = 2^-69.068246, Bn = 0.000000 = 2^-69.068246
[./src/tea-add-threshold-search.cc:1007] nrounds = 15

B[ 1] = 2^-1.025675
B[ 2] = 2^-2.947447
B[ 3] = 2^-5.358153
B[ 4] = 2^-11.073147
B[ 5] = 2^-16.258083
B[ 6] = 2^-22.527780
B[ 7] = 2^-26.176521
B[ 8] = 2^-29.813514
B[ 9] = 2^-36.823907
B[10] = 2^-41.821149
B[11] = 2^-44.885821
B[12] = 2^-51.655097
B[13] = 2^-54.023713
B[14] = 2^-58.803057
B[15] = 2^-62.489731
B[16] = 2^-67.229988
pDDT sizes: Dp 58, Dxy 58 | Cp 0, Cxy 0
 0:        0 <-        0 1.000000 (2^0.000000)
 1:        F <-        1 0.079376 (2^-3.655149)
 2:        0 <-        F 0.000305 (2^-11.678072)
 3: FFFFFFF1 <-        1 0.141052 (2^-2.825698)
 4:        0 <-        0 1.000000 (2^0.000000)
 5: FFFFFFF1 <-        1 0.079895 (2^-3.645751)
 6: FFFFFFFE <- FFFFFFF1 0.002106 (2^-8.891476)
 7:        F <- FFFFFFFF 0.132599 (2^-2.914860)
 8:        0 <-        0 1.000000 (2^0.000000)
 9: FFFFFFF1 <- FFFFFFFF 0.083618 (2^-3.580040)
10:        2 <- FFFFFFF1 0.000763 (2^-10.356144)
11:        F <-        1 0.141510 (2^-2.821024)
12:        0 <-        0 1.000000 (2^0.000000)
13:        F <-        1 0.080811 (2^-3.629313)
14: FFFFFFFF <-        F 0.002777 (2^-8.492205)
15:        0 <-        0 1.000000 (2^0.000000)
16: FFFFFF01 <-        F 0.037415 (2^-4.740257)
p_tot = 0.000000000000000 = 2^-67.229988, Bn = 0.000000 = 2^-67.229988
[./src/tea-add-threshold-search.cc:1257] nrounds = 17

key[0] = 0xE028DF9A;
key[1] = 0x8819B4C3;
key[2] = 0x3AB116AF;
key[3] = 0x3C50723;

real    10m25.708s
user    10m23.783s
sys     0m0.120s


 */

/* --- */

#if 0									  // DEBUG
  if(n >= 5) {
	 double p_tot = 1.0;
	 printf("[%s:%d] diff[%2d]:\n", __FILE__, __LINE__, n);
	 for(int i = 0; i < n; i++) {
		printf("%2d: %8X <- %8X %f (2^%f)\n", i, diff[i].dy, diff[i].dx, diff[i].p, log2(diff[i].p));
		p_tot *= diff[i].p;
	 }
	 printf("p_tot = %16.15f = 2^%f, Bn = %f = 2^%f\n", p_tot, log2(p_tot), *Bn, log2(*Bn));
  }
#endif  // #if 0									  // DEBUG


/* --- */

#if 0									  // OLD
	 if(b_found_in_hways) {
		//		while(hway_iter->dx == dx) {
		while((hway_iter->dx == dx) && (hway_iter->p >= p_min)) {
		  found_mset_p.insert(*hway_iter);
		  hway_iter++;
		}
#if 1
		assert(cnt_lp <= max_lp);
		if((b_found_in_croads) && (cnt_lp <= max_lp)) {
		  //		  while(croad_iter->dx == dx) {
		  while((croad_iter->dx == dx) && (croad_iter->p >= p_min)) {

			 uint32_t dy = croad_iter->dy;
			 uint32_t dx_prev = diff[n - 1].dx;
			 bool b_is_hway = is_dx_in_set_dx_dy(dy, dx_prev, *diff_set_dx_dy);
			 if(b_is_hway) {
				found_mset_p.insert(*croad_iter);
			 }
			 croad_iter++;
		  }
		}
#endif
	 } else {
		if(b_found_in_croads) {
		  //		  while(croad_iter->dx == dx) {
		  while((croad_iter->dx == dx) && (croad_iter->p >= p_min)) {

			 uint32_t dy = croad_iter->dy;
			 uint32_t dx_prev = diff[n - 1].dx;
			 bool b_is_hway = is_dx_in_set_dx_dy(dy, dx_prev, *diff_set_dx_dy);
			 //			 assert(b_is_hway);
			 if(b_is_hway) {
				found_mset_p.insert(*croad_iter);
			 }
			 croad_iter++;
		  }
		}
	 }
#endif  // OLD



/* --- */

#if 0
	 if(b_found_in_croads) {
		std::set<differential_t, struct_comp_diff_dx_dy>::iterator tmp_iter = croad_iter;
		double p_tmp_min = 1.0;
		while(tmp_iter->dx == dx) {
		  if(tmp_iter->p < p_tmp_min) {
			 p_tmp_min = tmp_iter->p;
		  }
		  tmp_iter++;
		}
		if(p_tmp_min > p_min) {
		  b_found_in_croads = false;
		}
	 }
	 if(b_found_in_hways) {
		std::set<differential_t, struct_comp_diff_dx_dy>::iterator tmp_iter = hway_iter;
		double p_tmp_min = 1.0;
		while(tmp_iter->dx == dx) {
		  if(tmp_iter->p < p_tmp_min) {
			 p_tmp_min = tmp_iter->p;
		  }
		  tmp_iter++;
		}
		if(p_tmp_min > p_min) {
		  b_found_in_hways = false;
		}
	 }
#endif



/* --- */

/* 
	[./src/tea-add-threshold-search.cc:663] [ 2 / 20]: Added 1 new country roads: p_min = 0.001154 (2^-9.758922). New sizes: Dxy 30011, Dp 30014 (cnt_lp 0 / 2).B[ 0] = 2^0.000000
B[ 1] = 2^-1.157159
B[ 2] = 2^-2.323873
B[ 3] = 2^-6.085142
B[ 4] = 2^-9.979185
B[ 5] = 2^-14.746161
B[ 6] = 2^-22.618143
B[ 7] = 2^-30.641573
B[ 8] = 2^-34.309724
B[ 9] = 2^-38.402913
B[10] = 2^-45.613117
B[11] = 2^-48.407404
B[12] = 2^-50.295533
B[13] = 2^-53.175619
B[14] = 2^-59.553567
B[15] = 2^-67.373658
pDDT sizes: Dp 69, Dxy 254 | Cp 30014, Cxy 30011
 0:        0 <-        0 1.000000 (2^0.000000)
 1: FFFFFFF1 <- FFFFFFFF 0.126526 (2^-2.982496)
 2:        1 <- FFFFFFF1 0.004120 (2^-7.923184)
 3:        0 <-        0 1.000000 (2^0.000000)
 4:        0 <- FFFFFFF1 0.000092 (2^-13.415037)
 5:        0 <-        0 1.000000 (2^0.000000)
 6:        1 <- FFFFFFF1 0.003876 (2^-8.011315)
 7:        F <-        1 0.083649 (2^-3.579513)
 8:        0 <-        0 1.000000 (2^0.000000)
 9: FFFFFFF1 <-        1 0.129517 (2^-2.948791)
10:        0 <- FFFFFFF1 0.004517 (2^-7.790547)
11:        F <-        1 0.079956 (2^-3.644649)
12:        0 <-        0 1.000000 (2^0.000000)
13: FFFFFFF1 <-        1 0.135834 (2^-2.880086)
14: FFFFFF01 <- FFFFFFF1 0.012024 (2^-6.377948)
15: FFFFF0E6 <- FFFFFF02 0.004425 (2^-7.820091)
p_tot = 0.000000000000000 = 2^-67.373658, Bn = 0.000000 = 2^-67.373658
[./src/tea-add-threshold-search.cc:1313] nrounds = 16
[./tests/tea-add-threshold-search-tests.cc:120]
----- End search -----
[./tests/tea-add-threshold-search-tests.cc:123] Final trail:
0:        0 <-        0 1.000000 (2^0.000000)
1: FFFFFFF1 <- FFFFFFFF 0.127716 (2^-2.968988)
2:        1 <- FFFFFFF1 0.004883 (2^-7.678072)
3:        0 <-        0 1.000000 (2^0.000000)
4:        1 <- FFFFFFF1 0.000061 (2^-14.000000)
5:        F <-        1 0.135651 (2^-2.882032)
6:        0 <-        0 1.000000 (2^0.000000)
7: FFFFFFF1 <-        1 0.083832 (2^-3.576359)
8:        1 <- FFFFFFF1 0.000930 (2^-10.071235)
9: FFFFFFE2 <-        2 0.080872 (2^-3.628223)
10: FFFFFD04 <- FFFFFFD3 0.001434 (2^-9.445411)
11:     2C82 <- FFFFFD06 0.000946 (2^-10.045804)
p_tot = 0.000000000000000 = 2^-64.296124, Bn = 0.000000 = 2^-64.296124
[./tests/tea-add-threshold-search-tests.cc:133] Final full trail:
0:        0 <-        0 1.000000 (2^0.000000)
1: FFFFFFF1 <- FFFFFFFF 0.126526 (2^-2.982496)
2:        1 <- FFFFFFF1 0.004120 (2^-7.923184)
3:        0 <-        0 1.000000 (2^0.000000)
4:        0 <- FFFFFFF1 0.000092 (2^-13.415037)
5:        0 <-        0 1.000000 (2^0.000000)
6:        1 <- FFFFFFF1 0.003876 (2^-8.011315)
7:        F <-        1 0.083649 (2^-3.579513)
8:        0 <-        0 1.000000 (2^0.000000)
9: FFFFFFF1 <-        1 0.129517 (2^-2.948791)
10:        0 <- FFFFFFF1 0.004517 (2^-7.790547)
11:        F <-        1 0.079956 (2^-3.644649)
12:        0 <-        0 1.000000 (2^0.000000)
13: FFFFFFF1 <-        1 0.135834 (2^-2.880086)
14: FFFFFF01 <- FFFFFFF1 0.012024 (2^-6.377948)
15: FFFFF0E6 <- FFFFFF02 0.004425 (2^-7.820091)
p_tot = 0.000000000000000 = 2^-67.373658
[./tests/tea-add-threshold-search-tests.cc:147] key
key[0] = 0xEBFC4336;
key[1] = 0xD0D3E14E;
key[2] = 0xE11CB47B;
key[3] = 0x2FFCBD53;

real    100m3.717s
user    99m52.683s
sys     0m0.076s

 */

/* ---- */

/* 
B[ 1] = 2^-1.780831
B[ 2] = 2^-3.568639
B[ 3] = 2^-6.662124
B[ 4] = 2^-10.401919
B[ 5] = 2^-16.199817
B[ 6] = 2^-20.882737
B[ 7] = 2^-26.076804
B[ 8] = 2^-29.293863
B[ 9] = 2^-33.559550
B[10] = 2^-39.028401
B[11] = 2^-44.447186
B[12] = 2^-50.559373
B[13] = 2^-53.949501
B[14] = 2^-57.907229
B[15] = 2^-62.249923
B[16] = 2^-67.311782
pDDT sizes: Dp 61, Dxy 61 | Cp 1157, Cxy 1157
 0:        F <- FFFFFFFF 0.128540 (2^-2.959710)
 1:        0 <-        0 1.000000 (2^0.000000)
 2:        F <- FFFFFFFF 0.081024 (2^-3.625504)
 3:        0 <-        F 0.003723 (2^-8.069263)
 4: FFFFFFF1 <- FFFFFFFF 0.132751 (2^-2.913200)
 5:        0 <-        0 1.000000 (2^0.000000)
 6: FFFFFFEF <- FFFFFFFF 0.080933 (2^-3.627135)
 7:        0 <- FFFFFFEF 0.000153 (2^-12.678072)
 8:       11 <- FFFFFFFF 0.125732 (2^-2.991571)
 9:        0 <-        0 1.000000 (2^0.000000)
10: FFFFFFF1 <- FFFFFFFF 0.081848 (2^-3.610906)
11:        1 <- FFFFFFF1 0.000275 (2^-11.830075)
12:        0 <-        0 1.000000 (2^0.000000)
13:        1 <- FFFFFFF1 0.004486 (2^-7.800328)
14:        F <-        1 0.086456 (2^-3.531885)
15:        0 <-        0 1.000000 (2^0.000000)
16: FFFFFFF1 <-        1 0.078339 (2^-3.674132)
p_tot = 0.000000000000000 = 2^-67.311782, Bn = 0.000000 = 2^-67.311782
[./src/tea-add-threshold-search.cc:1289] nrounds = 17
[./tests/tea-add-threshold-search-tests.cc:108]
----- End search -----
[./tests/tea-add-threshold-search-tests.cc:110] Final trail:
0: FFFFFFF1 <-        1 0.129089 (2^-2.953558)
1:        0 <-        0 1.000000 (2^0.000000)
2:        F <-        1 0.080170 (2^-3.640800)
3:        0 <-        F 0.004059 (2^-7.944718)
4: FFFFFFF1 <-        1 0.132782 (2^-2.912869)
5:        0 <-        0 1.000000 (2^0.000000)
6:       11 <-        1 0.083099 (2^-3.589019)
7:        0 <-       11 0.000122 (2^-13.000000)
8: FFFFFFEF <-        1 0.125000 (2^-3.000000)
9:        0 <-        0 1.000000 (2^0.000000)
10:       11 <-        1 0.079346 (2^-3.655704)
11:        0 <-       11 0.000153 (2^-12.678072)
12: FFFFFFEF <-        1 0.135986 (2^-2.878466)
13:        0 <-        0 1.000000 (2^0.000000)
14: FFFFFFF1 <-        1 0.084381 (2^-3.566936)
15: FFFFFF01 <- FFFFFFF1 0.005829 (2^-7.422571)
p_tot = 0.000000000000000 = 2^-67.242712, Bn = 0.000000 = 2^-67.242712
[./tests/tea-add-threshold-search-tests.cc:120] Final full trail:
0:        F <- FFFFFFFF 0.128540 (2^-2.959710)
1:        0 <-        0 1.000000 (2^0.000000)
2:        F <- FFFFFFFF 0.081024 (2^-3.625504)
3:        0 <-        F 0.003723 (2^-8.069263)
4: FFFFFFF1 <- FFFFFFFF 0.132751 (2^-2.913200)
5:        0 <-        0 1.000000 (2^0.000000)
6: FFFFFFEF <- FFFFFFFF 0.080933 (2^-3.627135)
7:        0 <- FFFFFFEF 0.000153 (2^-12.678072)
8:       11 <- FFFFFFFF 0.125732 (2^-2.991571)
9:        0 <-        0 1.000000 (2^0.000000)
10: FFFFFFF1 <- FFFFFFFF 0.081848 (2^-3.610906)
11:        1 <- FFFFFFF1 0.000275 (2^-11.830075)
12:        0 <-        0 1.000000 (2^0.000000)
13:        1 <- FFFFFFF1 0.004486 (2^-7.800328)
14:        F <-        1 0.086456 (2^-3.531885)
15:        0 <-        0 1.000000 (2^0.000000)
16: FFFFFFF1 <-        1 0.078339 (2^-3.674132)
p_tot = 0.000000000000000 = 2^-67.311782
[./tests/tea-add-threshold-search-tests.cc:134] key
key[0] = 0xD0C6E176;
key[1] = 0x35C21E2;
key[2] = 0xA52FFD16;
key[3] = 0x22075F;

real    22m15.883s
user    22m13.675s
sys     0m0.004s

 */

/* --- */

#if 0
	 if(dx == 0xF) {
		printf("[%s:%d] dx = %8X, b_found_in_hways = %d, b_found_in_croads = %d\n", __FILE__, __LINE__, dx, b_found_in_hways, b_found_in_croads);
		if(b_found_in_hways) {
		  std::set<differential_t, struct_comp_diff_dx_dy>::iterator tmp_iter = hway_iter;
		  while(tmp_iter->dx == dx) {
			 printf("Hway: %8X %8X 2^%f\n", tmp_iter->dx, tmp_iter->dy, log2(tmp_iter->p));
			 tmp_iter++;
		  }
		  assert(1 == 0);
		}
		if(b_found_in_croads) {
		  std::set<differential_t, struct_comp_diff_dx_dy>::iterator tmp_iter = croad_iter;
		  while(tmp_iter->dx == dx) {
			 printf("Croad: %8X %8X 2^%f\n", tmp_iter->dx, tmp_iter->dy, log2(tmp_iter->p));
			 tmp_iter++;
		  }
		  assert(1 == 0);
		}
	 }
#endif



/* --- */

#if 0
		  double p_min = diff_mset_p->rbegin()->p;
		  if(diff_max_dy.p >= p_min) {
			 diff_mset_p->insert(diff_max_dy);
		  }

		  diff_set_dx_dy->insert(diff_max_dy);
		  find_iter = diff_set_dx_dy->lower_bound(diff_max_dy);
#endif

/* --- */

/* 
B[ 0] = 2^0.000000
B[ 1] = 2^-1.774943
B[ 2] = 2^-3.567166
B[ 3] = 2^-6.612243
B[ 4] = 2^-10.358473
B[ 5] = 2^-16.049647
pDDT sizes: Dp 61, Dxy 792
 0: 40200000 <- 84000000 0.053040 (2^-4.236788)
 1: 7C000000 <- 80000000 0.492554 (2^-1.021647)
 2:        0 <-        0 1.000000 (2^0.000000)
 3: 84000000 <- 80000000 0.493835 (2^-1.017898)
 4: 40200000 <- 84000000 0.064117 (2^-3.963140)
 5: 3FE10000 <- C0200000 0.017822 (2^-5.810175)
p_tot = 0.000014742621785 = 2^-16.049647, Bn = 0.000015 = 2^-16.049647
[./src/tea-add-threshold-search.cc:922] nrounds = 7, Bn_init = 2^-25.692095 : key D0C6E176  35C21E2 A52FFD16   22075F
[./src/tea-add-threshold-search.cc:415] 6 | Update best found Bn: 2^-25.692095 -> 2^-25.493739
[./src/tea-add-threshold-search.cc:415] 6 | Update best found Bn: 2^-25.493739 -> 2^-21.316415
[./src/tea-add-threshold-search.cc:415] 6 | Update best found Bn: 2^-21.316415 -> 2^-21.188798
[./src/tea-add-threshold-search.cc:415] 6 | Update best found Bn: 2^-21.188798 -> 2^-21.145673
[./src/tea-add-threshold-search.cc:415] 6 | Update best found Bn: 2^-21.145673 -> 2^-21.084973
B[ 0] = 2^0.000000
B[ 1] = 2^-1.774943
B[ 2] = 2^-3.567166
B[ 3] = 2^-6.612243
B[ 4] = 2^-10.358473
B[ 5] = 2^-16.049647
B[ 6] = 2^-21.084973
pDDT sizes: Dp 62, Dxy 833
0:       11 <-        1 0.129028 (2^-2.954240)
1:        0 <-        0 1.000000 (2^0.000000)
2:        F <-        1 0.078583 (2^-3.669643)
3:        0 <-        F 0.004150 (2^-7.912537)
4: FFFFFFF1 <-        1 0.135681 (2^-2.881708)
5:        0 <-        0 1.000000 (2^0.000000)
6: FFFFFFF1 <-        1 0.078735 (2^-3.666845)
p_tot = 0.000000449563051 = 2^-21.084973, Bn = 0.000000 = 2^-21.084973
[
 */

/* --- */

/* 
	[./src/tea-add-threshold-search.cc:1154] nrounds = 17, Bn_init = 2^-63.963444 (B[16] = 2^-63.963444) : key E028DF9A 8819B4C3 3AB116AF  3C50723
B[ 0] = 2^0.000000
B[ 1] = 2^-1.025765
B[ 2] = 2^-2.054760
B[ 3] = 2^-5.340930
B[ 4] = 2^-11.095515
B[ 5] = 2^-16.449275
B[ 6] = 2^-22.433299
B[ 7] = 2^-26.481824
B[ 8] = 2^-30.985529
B[ 9] = 2^-35.000608
B[10] = 2^-42.426625
B[11] = 2^-45.555558
B[12] = 2^-50.359177
B[13] = 2^-54.416056
B[14] = 2^-56.691743
B[15] = 2^-59.593711
B[16] = 2^-63.963444
pDDT sizes: Dp 57, Dxy 123 | Cp 892, Cxy 892
 0:        0 <-        0 1.000000 (2^0.000000)
 1:        F <-        1 0.080200 (2^-3.640250)
 2:        0 <-        F 0.000214 (2^-12.192645)
 3: FFFFFFF1 <-        1 0.142303 (2^-2.812957)
 4:        0 <-        0 1.000000 (2^0.000000)
 5: FFFFFFF1 <-        1 0.082031 (2^-3.607683)
 6: FFFFFFFE <- FFFFFFF1 0.002258 (2^-8.790547)
 7:        F <- FFFFFFFF 0.139465 (2^-2.842022)
 8:        0 <-        0 1.000000 (2^0.000000)
 9: FFFFFFF1 <- FFFFFFFF 0.077820 (2^-3.683718)
10:        1 <- FFFFFFF1 0.004272 (2^-7.870717)
11:        0 <-        0 1.000000 (2^0.000000)
12:        1 <- FFFFFFF1 0.005005 (2^-7.642448)
13:        F <-        1 0.081970 (2^-3.608756)
14:        0 <-        0 1.000000 (2^0.000000)
15: FFFFFFF1 <-        1 0.133789 (2^-2.901968)
16: FFFFFF01 <- FFFFFFF1 0.048370 (2^-4.369733)
p_tot = 0.000000000000000 = 2^-63.963444, Bn = 0.000000 = 2^-63.963444
[./src/tea-add-threshold-search.cc:1238] nrounds = 17
[./tests/tea-add-threshold-search-tests.cc:101]
----- End search -----
[./tests/tea-add-threshold-search-tests.cc:103] Final trail:
0:        0 <-        0 1.000000 (2^0.000000)
1:        F <-        1 0.079254 (2^-3.657370)
2:        0 <-        F 0.000183 (2^-12.415037)
3: FFFFFFF1 <-        1 0.139069 (2^-2.846131)
4:        0 <-        0 1.000000 (2^0.000000)
5:       11 <-        1 0.079773 (2^-3.647957)
6:        0 <-       11 0.000031 (2^-15.000000)
7: FFFFFFEF <-        1 0.134125 (2^-2.898353)
8:        0 <-        0 1.000000 (2^0.000000)
9:       11 <-        1 0.080170 (2^-3.640800)
10:        0 <-       11 0.000153 (2^-12.678072)
11: FFFFFFEF <-        1 0.142181 (2^-2.814195)
12:        0 <-        0 1.000000 (2^0.000000)
13: FFFFFFF1 <-        1 0.082947 (2^-3.591670)
p_tot = 0.000000000000000 = 2^-63.189585, Bn = 0.000000 = 2^-63.189585
[./tests/tea-add-threshold-search-tests.cc:113] Final full trail:
0:        0 <-        0 1.000000 (2^0.000000)
1:        F <-        1 0.080200 (2^-3.640250)
2:        0 <-        F 0.000214 (2^-12.192645)
3: FFFFFFF1 <-        1 0.142303 (2^-2.812957)
4:        0 <-        0 1.000000 (2^0.000000)
5: FFFFFFF1 <-        1 0.082031 (2^-3.607683)
6: FFFFFFFE <- FFFFFFF1 0.002258 (2^-8.790547)
7:        F <- FFFFFFFF 0.139465 (2^-2.842022)
8:        0 <-        0 1.000000 (2^0.000000)
9: FFFFFFF1 <- FFFFFFFF 0.077820 (2^-3.683718)
10:        1 <- FFFFFFF1 0.004272 (2^-7.870717)
11:        0 <-        0 1.000000 (2^0.000000)
12:        1 <- FFFFFFF1 0.005005 (2^-7.642448)
13:        F <-        1 0.081970 (2^-3.608756)
14:        0 <-        0 1.000000 (2^0.000000)
15: FFFFFFF1 <-        1 0.133789 (2^-2.901968)
16: FFFFFF01 <- FFFFFFF1 0.048370 (2^-4.369733)
p_tot = 0.000000000000000 = 2^-63.963444
[./tests/tea-add-threshold-search-tests.cc:127] key
key[0] = 0xE028DF9A;
key[1] = 0x8819B4C3;
key[2] = 0x3AB116AF;
key[3] = 0x3C50723;

real    3m3.881s
user    3m3.555s
sys     0m0.012s

 */


/* --- */

#if 0									  // DEBUG
		if(cnt_new == 0)
		  assert(b_found_in_croads == false);
		else
		  assert(b_found_in_croads == true);
#endif

/* --- */

#if 0
	 if(b_found_in_croads) {
		std::set<differential_t, struct_comp_diff_dx_dy>::iterator tmp_iter = croad_iter;
		double p_tmp_min = 1.0;
		while(tmp_iter->dx == dx) {
		  if(tmp_iter->p < p_tmp_min) {
			 p_tmp_min = tmp_iter->p;
		  }
		  tmp_iter++;
		}
		if(p_tmp_min < p_min) {
		  b_found_in_croads = false;
		}
	 }
#endif


/* --- */

#if 1									  // TEST
  //  differential_t tmp_diff = {0xFFFFFFF1, 0xFFFFFFFF, 0, 0.00381};
  differential_t tmp_diff = {0xF, 0x1, 0, 0.003973};
  //  croads_diff_set_dx_dy.insert(tmp_diff);
  //  croads_diff_mset_p.insert(tmp_diff);
#endif


/* ---- */
	 std::set<differential_t, struct_comp_diff_dx_dy>::iterator croad_dx_dy = croads_diff_set_dx_dy.find(tmp_diff);
	 std::set<differential_t, struct_comp_diff_dx_dy>::iterator hway_dx_dy = diff_set_dx_dy.find(tmp_diff);

	 bool b_croad_found = (croads_diff_set_dx_dy.find(tmp_diff) != croads_diff_set_dx_dy.end());
	 bool b_hway_found = (diff_set_dx_dy.find(tmp_diff) != diff_set_dx_dy.end());

	 if(b_croad_found) {
		printf("\n[%s:%d] R%2d %8X %8X 2^%f | ", __FILE__, __LINE__, nrounds, croad_dx_dy->dx, croad_dx_dy->dy, log2(croad_dx_dy->p));
		printf("%8X %8X 2^%f\n", tmp_diff.dx, tmp_diff.dy, log2(tmp_diff.p));
		assert(1 == 0);
	 }
	 if(b_hway_found) {
		printf("\n[%s:%d] R%2d %8X %8X 2^%f | ", __FILE__, __LINE__, nrounds, hway_dx_dy->dx, hway_dx_dy->dy, log2(hway_dx_dy->p));
		printf("%8X %8X 2^%f\n", tmp_diff.dx, tmp_diff.dy, log2(tmp_diff.p));
		assert(1 == 0);
	 }

	 //	 if(nrounds == (NROUNDS - 1)) {
	 //		printf("[%s:%d] Dp:\n", __FILE__, __LINE__);
	 //		print_mset(diff_mset_p);
	 //		printf("[%s:%d] Dxy:\n", __FILE__, __LINE__);
	 //		print_set(diff_set_dx_dy);
	 //		printf("\n");
	 //	 }



/* --- */

/*

----- End search -----
[./tests/tea-add-threshold-search-tests.cc:103] Final trail:
0:       11 <-        1 0.080353 (2^-3.637508)
1:        0 <-        0 1.000000 (2^0.000000)
2:        F <-        1 0.138214 (2^-2.855023)
3:        0 <-        F 0.000031 (2^-15.000000)
4: FFFFFFF1 <-        1 0.081268 (2^-3.621163)
5:        0 <-        0 1.000000 (2^0.000000)
6: FFFFFFF1 <-        1 0.136566 (2^-2.872328)
7:        1 <- FFFFFFF1 0.000930 (2^-10.071235)
8: FFFFFFE2 <-        2 0.099609 (2^-3.327575)
9: FFFFFD04 <- FFFFFFD3 0.000549 (2^-10.830075)
10:     2C82 <- FFFFFD06 0.000854 (2^-10.192645)
11:        0 <-     2C55 0.000000 (2^-inf)
p_tot = 0.000000000000000 = 2^-inf, Bn = 0.000000 = 2^-inf
	  [./tests/tea-add-threshold-search-tests.cc:113] Final full trail:
	  0: FFFFFFF1 <-        1 0.082092 (2^-3.606610)
	  1:        0 <-        0 1.000000 (2^0.000000)
	  2:        F <-        1 0.139954 (2^-2.836979)
	  3:        0 <-        F 0.000092 (2^-13.415037)
	  4: FFFFFFF1 <-        1 0.084351 (2^-3.567458)
	  5:        0 <-        0 1.000000 (2^0.000000)
	  6: FFFFFFF1 <-        1 0.129852 (2^-2.945057)
	  7: FFFFFFFE <- FFFFFFF1 0.003601 (2^-8.117357)
	  8:        F <- FFFFFFFF 0.083252 (2^-3.586372)
	  9:        0 <-        0 1.000000 (2^0.000000)
	  10:        F <- FFFFFFFF 0.146271 (2^-2.773287)
	  11:        1 <-        F 0.003967 (2^-7.977632)
	  12:        0 <-        0 1.000000 (2^0.000000)
	  13:        1 <-        F 0.004730 (2^-7.723876)
	  14: FFFFFFF1 <-        1 0.134918 (2^-2.889843)
	  15:        0 <-        0 1.000000 (2^0.000000)
	  16: FFFFFFF1 <-        1 0.078949 (2^-3.662936)
p_tot = 0.000000000000000 = 2^-63.102443
	  [./tests/tea-add-threshold-search-tests.cc:127] key
	  key[0] = 0xD3DCBA64;
key[1] = 0xF1ACBEA;
key[2] = 0x5D98E5A4;
key[3] = 0xBA65798A;

real    10m21.061s
user    10m2.354s
sys     0m0.156s
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$

*/

/* --- */

/* 
----- End search -----
[./tests/tea-add-threshold-search-tests.cc:103] Final trail:
0: C0200000 <- 7C000000 0.053436 (2^-4.226037)
1: 84000000 <- 80000000 0.488068 (2^-1.034847)
2:        0 <-        0 1.000000 (2^0.000000)
3: 84000000 <- 80000000 0.490662 (2^-1.027200)
4: 40200000 <- 84000000 0.060303 (2^-4.051633)
5: 3FE10000 <- C0200000 0.015961 (2^-5.969333)
6:  3EFF800 <- C3E10000 0.001617 (2^-9.272080)
7: 7EF007C0 <- C40FF800 0.000275 (2^-11.830075)
8: 6DD6FBFE <- 42D107C0 0.000061 (2^-14.000000)
9:        2 <- 31E6F3FE 0.000000 (2^-35.590418)
p_tot = 0.000000000000000 = 2^-87.001622, Bn = 0.000000 = 2^-87.001622
[./tests/tea-add-threshold-search-tests.cc:113] Final full trail:
0: FFFFFFDE <- FFFFFFFE 0.078857 (2^-3.664610)
1: FFFFFFE2 <-        2 0.133301 (2^-2.907243)
2: FFFFFE1F <- FFFFFFE0 0.028198 (2^-5.148251)
3:       40 <- FFFFFE21 0.000061 (2^-14.000000)
4:      1E1 <-       20 0.028351 (2^-5.140465)
5: FFFFFFE2 <-        2 0.077148 (2^-3.696219)
6: FFFFFFDE <-        2 0.105469 (2^-3.245112)
7: FFFFFE1F <- FFFFFFE0 0.045441 (2^-4.459872)
8:       40 <- FFFFFE21 0.000641 (2^-10.607683)
9:      1E1 <-       20 0.050079 (2^-4.319640)
10: FFFFFFE2 <-        2 0.103577 (2^-3.271229)
11: FFFFFFE2 <-        2 0.077515 (2^-3.689387)
p_tot = 0.000000000000000 = 2^-64.149712
[./tests/tea-add-threshold-search-tests.cc:127] key
key[0] = 0xD0C6E176;
key[1] = 0x35C21E2;
key[2] = 0xA52FFD16;
key[3] = 0x22075F;

real    12m38.923s
user    12m36.491s
sys     0m0.216s

 */

/* --- */

/* 
----- End search -----
[./tests/tea-add-threshold-search-tests.cc:103] Final trail:
0:        0 <-        0 1.000000 (2^0.000000)
1:        F <-        1 0.079620 (2^-3.650719)
2:        0 <-        F 0.000214 (2^-12.192645)
3: FFFFFFF1 <-        1 0.137634 (2^-2.861088)
4:        0 <-        0 1.000000 (2^0.000000)
5:       11 <-        1 0.081055 (2^-3.624961)
6:        0 <-       11 0.000031 (2^-15.000000)
7: FFFFFFEF <-        1 0.136505 (2^-2.872973)
8:        0 <-        0 1.000000 (2^0.000000)
9:       11 <-        1 0.077850 (2^-3.683153)
10:        0 <-       11 0.000275 (2^-11.830075)
11: FFFFFFEF <-        1 0.139771 (2^-2.838868)
12:        0 <-        0 1.000000 (2^0.000000)
13: FFFFFFF1 <-        1 0.081818 (2^-3.611444)
14: FFFFFF01 <- FFFFFFF1 0.011566 (2^-6.433946)
p_tot = 0.000000000000000 = 2^-68.599872, Bn = 0.000000 = 2^-68.599872

B[ 0] = 2^0.000000
B[ 1] = 2^-1.021379
B[ 2] = 2^-2.074929
B[ 3] = 2^-5.340905
B[ 4] = 2^-11.133891
B[ 5] = 2^-16.452602
B[ 6] = 2^-22.584944
B[ 7] = 2^-26.316624
B[ 8] = 2^-30.841128
B[ 9] = 2^-35.299585
B[10] = 2^-39.350357
B[11] = 2^-42.164186
B[12] = 2^-47.611362
B[13] = 2^-52.917016
B[14] = 2^-53.310186
B[15] = 2^-56.655188
B[16] = 2^-61.075872
pDDT sizes: Dp 172, Dxy 171 | Cp 1455, Cxy 1455
[./tests/tea-add-threshold-search-tests.cc:113] Final full trail:
0: FFFFFF01 <- FFFFFFF1 0.042664 (2^-4.550851)
1:        F <- FFFFFFFF 0.079102 (2^-3.660150)
2:        0 <-        0 1.000000 (2^0.000000)
3:        F <- FFFFFFFF 0.140930 (2^-2.826948)
4:        1 <-        F 0.005219 (2^-7.582147)
5:        0 <-        0 1.000000 (2^0.000000)
6:        1 <-        F 0.001862 (2^-9.069263)
7: FFFFFFF1 <-        1 0.133026 (2^-2.910219)
8:        0 <-        0 1.000000 (2^0.000000)
9:        F <-        1 0.081909 (2^-3.609831)
10: FFFFFFFF <-        F 0.003723 (2^-8.069263)
11:        0 <-        0 1.000000 (2^0.000000)
12: FFFFFFFF <-        F 0.005463 (2^-7.516184)
13: FFFFFFF1 <- FFFFFFFF 0.083862 (2^-3.575834)
14:        0 <-        0 1.000000 (2^0.000000)
15: FFFFFFF1 <- FFFFFFFF 0.134003 (2^-2.899666)
16: FFFFFF01 <- FFFFFFF1 0.049194 (2^-4.345364)
17: FFFFF0F8 <- FFFFFF00 0.025635 (2^-5.285754)
p_tot = 0.000000000000000 = 2^-65.901474

[./tests/tea-add-threshold-search-tests.cc:127] key
key[0] = 0xE028DF9A;
key[1] = 0x8819B4C3;
key[2] = 0x3AB116AF;
key[3] = 0x3C50723;

real    11m32.323s
user    11m31.111s
sys     0m0.016s

 */



/* --- */

/* 
pDDT sizes: Dp 137, Dxy 139 | Cp 236, Cxy 236
 
	[./tests/tea-add-threshold-search-tests.cc:101] Final trail:
 0:       11 <-        1 0.130646 (2^-2.936268)
 1:        0 <-        0 1.000000 (2^0.000000)
 2:        F <-        1 0.082886 (2^-3.592732)
 3:        0 <-        F 0.004028 (2^-7.955606)
 4: FFFFFFF1 <-        1 0.132355 (2^-2.917518)
 5:        0 <-        0 1.000000 (2^0.000000)
 6:       11 <-        1 0.079895 (2^-3.645751)
 7:        0 <-       11 0.000122 (2^-13.000000)
 8: FFFFFFEF <-        1 0.127167 (2^-2.975207)
 9:        0 <-        0 1.000000 (2^0.000000)
10:       11 <-        1 0.080322 (2^-3.638056)
11:        0 <-       11 0.000122 (2^-13.000000)
12: FFFFFFEF <-        1 0.134613 (2^-2.893110)
13:        0 <-        0 1.000000 (2^0.000000)
14: FFFFFFF1 <-        1 0.085724 (2^-3.544159)
15: FFFFFF01 <- FFFFFFF1 0.004974 (2^-7.651272)
p_tot = 0.000000000000000 = 2^-67.749679, Bn = 0.000000 = 2^-67.749679
[./tests/tea-add-threshold-search-tests.cc:111] Final full trail:
0:        F <- FFFFFFFF 0.125854 (2^-2.990171)
1:        0 <-        0 1.000000 (2^0.000000)
2:        F <- FFFFFFFF 0.081512 (2^-3.616836)
3:        0 <-        F 0.004486 (2^-7.800328)
4: FFFFFFF1 <- FFFFFFFF 0.132568 (2^-2.915192)
5:        0 <-        0 1.000000 (2^0.000000)
6:       11 <- FFFFFFFF 0.081238 (2^-3.621705)
7:        0 <-       11 0.000183 (2^-12.415037)
8: FFFFFFEF <- FFFFFFFF 0.126373 (2^-2.984237)
9:        0 <-        0 1.000000 (2^0.000000)
10: FFFFFFF1 <- FFFFFFFF 0.081573 (2^-3.615756)
11:        1 <- FFFFFFF1 0.000275 (2^-11.830075)
12:        0 <-        0 1.000000 (2^0.000000)
13:        1 <- FFFFFFF1 0.004150 (2^-7.912537)
14:        F <-        1 0.080475 (2^-3.635318)
15:        0 <-        0 1.000000 (2^0.000000)
p_tot = 0.000000000000000 = 2^-63.337192
[./tests/tea-add-threshold-search-tests.cc:125] key
key[0] = 0xD0C6E176;
key[1] = 0x35C21E2;
key[2] = 0xA52FFD16;
key[3] = 0x22075F;

real    6m6.487s
user    6m5.851s
sys     0m0.004s

 */

/* --- */

/* 
pDDT sizes: Dp 57, Dxy 160 | Cp 1080, Cxy 1080
 
	[./tests/tea-add-threshold-search-tests.cc:93]
----- End search -----
[./tests/tea-add-threshold-search-tests.cc:95] Final trail:
0:       22 <- FFFFFFFE 0.137299 (2^-2.864611)
1:       11 <-        1 0.076904 (2^-3.700792)
2:        0 <-        F 0.000244 (2^-12.000000)
3: FFFFFFF1 <-        1 0.139587 (2^-2.840759)
4:        0 <-        0 1.000000 (2^0.000000)
5:       11 <-        1 0.081055 (2^-3.624961)
6:        0 <-       11 0.000092 (2^-13.415037)
7: FFFFFFEF <-        1 0.136902 (2^-2.868786)
8:        0 <-        0 1.000000 (2^0.000000)
9:       11 <-        1 0.080627 (2^-3.632585)
10:        0 <-       11 0.000122 (2^-13.000000)
11: FFFFFFEF <-        1 0.139343 (2^-2.843285)
12:        0 <-        0 1.000000 (2^0.000000)
13: FFFFFFF1 <-        1 0.080048 (2^-3.642998)
p_tot = 0.000000000000000 = 2^-64.433815, Bn = 0.000000 = 2^-64.433815

[./tests/tea-add-threshold-search-tests.cc:105] Final full trail:
0: FFFFFFDE <- FFFFFFFE 0.134216 (2^-2.897368)
1:        1 <- FFFFFFF1 0.004028 (2^-7.955606)
2:        F <- FFFFFFFF 0.082092 (2^-3.606610)
3:        0 <-        0 1.000000 (2^0.000000)
4: FFFFFFF1 <- FFFFFFFF 0.126434 (2^-2.983540)
5:        1 <- FFFFFFF1 0.004578 (2^-7.771181)
6:        0 <-        0 1.000000 (2^0.000000)
7:        1 <- FFFFFFF1 0.000458 (2^-11.093109)
8: FFFFFFF1 <-        1 0.133972 (2^-2.899995)
9:        1 <- FFFFFFE2 0.001862 (2^-9.069263)
10:       1E <-        2 0.099915 (2^-3.323161)
11:        0 <-        0 1.000000 (2^0.000000)
12:       22 <-        2 0.079895 (2^-3.645751)
13:      1FF <-       22 0.018707 (2^-5.740257)
14: FFFFE201 <-      201 0.015686 (2^-5.994375)
p_tot = 0.000000000000000 = 2^-66.980216
[./tests/tea-add-threshold-search-tests.cc:119] key
key[0] = 0xE028DF9A;
key[1] = 0x8819B4C3;
key[2] = 0x3AB116AF;
key[3] = 0x3C50723;

real    6m31.264s
user    6m30.460s
sys     0m0.040s
vpv@igor:~/skcrypto/trunk/work/src/yaarx$

*/

/* --- */

/*

0: FFFFFFF1 <-        1 0.081024 (2^-3.625504)
1:        0 <-        0 1.000000 (2^0.000000)
2:       11 <-        1 0.141785 (2^-2.818227)
3:        0 <-       11 0.000031 (2^-15.000000)
4: FFFFFFEF <-        1 0.077271 (2^-3.693938)
5:        0 <-        0 1.000000 (2^0.000000)
6:        F <-        1 0.135376 (2^-2.884956)
7:        0 <-        F 0.001953 (2^-9.000000)
8: FFFFFFF1 <-        1 0.079315 (2^-3.656259)
9:        0 <-        0 1.000000 (2^0.000000)
10:        F <-        1 0.140167 (2^-2.834779)
11:        0 <-        F 0.003510 (2^-8.154510)
12: FFFFFFF1 <-        1 0.079834 (2^-3.646853)
13:        0 <-        0 1.000000 (2^0.000000)
14: FFFFFFF1 <-        1 0.076324 (2^-3.711711)
15: FFFFFF01 <- FFFFFFF1 0.013336 (2^-6.228511)
p_tot = 0.000000000000000 = 2^-65.255247, Bn = 0.000000 = 2^-65.255247

*/

/* --- */

	 //	 if()

	 //	 if(!b_found_in_hways) {				  // if not a Highway, search in the Country roads table
	 //		croad_iter = croads_diff_set_dx_dy->lower_bound(diff_dy);
	 //		b_found_in_croads = (croad_iter != croads_diff_set_dx_dy->end()) && (croad_iter->dx == dx);
		//	 }


/* --- */

/* 
----- End search -----
[./tests/tea-add-threshold-search-tests.cc:83] Final trail:
0:        0 <-        0 1.000000 (2^0.000000)
1:        F <-        1 0.081299 (2^-3.620622)
2:        0 <-        F 0.000214 (2^-12.192645)
3: FFFFFFF1 <-        1 0.135620 (2^-2.882357)
4:        0 <-        0 1.000000 (2^0.000000)
5:        F <-        1 0.081299 (2^-3.620622)
6:        0 <-        F 0.002258 (2^-8.790547)
7:        F <-        1 0.138367 (2^-2.853431)
8:        1 <-       1E 0.002991 (2^-8.385290)
9: FFFFFFE2 <-        2 0.103882 (2^-3.266985)
10:        0 <-        0 1.000000 (2^0.000000)
11: FFFFFFE2 <-        2 0.128754 (2^-2.957315)
12: FFFFFE01 <- FFFFFFE2 0.015930 (2^-5.972094)
13: FFFFE1C3 <- FFFFFE03 0.001923 (2^-9.022720)
p_tot = 0.000000000000000 = 2^-63.564627, Bn = 0.000000 = 2^-63.564627
[./tests/tea-add-threshold-search-tests.cc:93] Final full trail:
0: FFFFFFF1 <-        1 0.080414 (2^-3.636413)
1:        0 <-        0 1.000000 (2^0.000000)
2:        F <-        1 0.140656 (2^-2.829762)
3: FFFFFFFF <-        F 0.004608 (2^-7.761595)
4:        0 <-        0 1.000000 (2^0.000000)
5: FFFFFFFF <-        F 0.001801 (2^-9.117357)
6: FFFFFFF1 <- FFFFFFFF 0.137665 (2^-2.860768)
7:        0 <-        0 1.000000 (2^0.000000)
8: FFFFFFF1 <- FFFFFFFF 0.078369 (2^-3.673571)
9:        0 <- FFFFFFF1 0.000946 (2^-10.045804)
10:        F <- FFFFFFFF 0.143158 (2^-2.804320)
11:        0 <-        0 1.000000 (2^0.000000)
12:        F <- FFFFFFFF 0.083984 (2^-3.573735)
13:        1 <-        F 0.005554 (2^-7.492205)
14:        0 <-        0 1.000000 (2^0.000000)
15:        0 <-        F 0.004120 (2^-7.923184)
16:        0 <-        0 1.000000 (2^0.000000)
17: FFFFFF01 <-        F 0.027954 (2^-5.160796)
p_tot = 0.000000000000000 = 2^-66.879511
[./tests/tea-add-threshold-search-tests.cc:107] key
key[0] = 0x2CDFA327;
key[1] = 0xBF180421;
key[2] = 0x278E5FEC;
key[3] = 0x120C8854;

real    3m41.065s
user    3m40.682s
sys     0m0.008s


 */



/* --- */
0: FFFFFF01 <- FFFFFFF1 0.008698 (2^-6.845182)
4:        0 <-        F 0.002075 (2^-8.912537) 

/* --- */

  differential_t diff_tmp;
  diff_tmp.dx = 0xFFFFFFF1;  
  diff_tmp.dy = 0xFFFFFF01;
  diff_tmp.p = 0.008698;
  printf("BEFORE Initial set sizes: Dp %d, Dxy %d\n", diff_mset_p.size(), diff_set_dx_dy.size());
  assert(diff_set_dx_dy.size() == diff_mset_p.size());
  diff_mset_p.insert(diff_tmp);
  diff_set_dx_dy.insert(diff_tmp);


/* --- */
/* 

 0: FFFFFF01 <- FFFFFFF1 0.008881 (2^-6.815125)
 1:        F <-        1 0.134735 (2^-2.891802)
 2:        0 <-        0 1.000000 (2^0.000000)
 3:       11 <-        1 0.079346 (2^-3.655704)
 4:        0 <-       11 0.002136 (2^-8.870717)
 5: FFFFFFEF <-        1 0.131226 (2^-2.929879)
 6:        0 <-        0 1.000000 (2^0.000000)
 7: FFFFFFF1 <-        1 0.079834 (2^-3.646853)
p_tot = 0.000000002124720 = 2^-28.810080, Bn = 0.000000 = 2^-28.810080

 */


/* --- */
/* 
  key[0] = 0xAAAEDCB2;
  key[1] = 0x46E15B91;
  key[2] = 0x17889304;
  key[3] = 0xDCCC9FBB;

----- End search -----
[./tests/tea-add-threshold-search-tests.cc:82] Final trail:
0:        0 <-        0 1.000000 (2^0.000000)
1:        1 <-       2D 0.000031 (2^-15.000000)
2: FFFFFFF1 <-        1 0.080170 (2^-3.640800)
3:        1 <-       1E 0.001740 (2^-9.167110)
4: FFFFFFE2 <-        2 0.135071 (2^-2.888212)
5:        0 <-        0 1.000000 (2^0.000000)
6:       22 <-        2 0.107269 (2^-3.220691)
7:        0 <-       22 0.000977 (2^-10.000000)
8: FFFFFFDE <-        2 0.075348 (2^-3.730289)
9:        0 <-        0 1.000000 (2^0.000000)
10: FFFFFFE2 <-        2 0.096222 (2^-3.377491)
11: FFFFFE01 <- FFFFFFE2 0.016449 (2^-5.925859)
12: FFFFE1C3 <- FFFFFE03 0.001892 (2^-9.045804)
p_tot = 0.000000000000000 = 2^-65.996254, Bn = 0.000000 = 2^-65.996254
[./tests/tea-add-threshold-search-tests.cc:92] Final full trail:
0: FFFFFFF1 <-        1 0.126282 (2^-2.985282)
1:        0 <-        0 1.000000 (2^0.000000)
2: FFFFFFF1 <-        1 0.079254 (2^-3.657370)
3: FFFFFFFF <- FFFFFFF1 0.003967 (2^-7.977632)
4:        0 <-        0 1.000000 (2^0.000000)
5: FFFFFFFF <- FFFFFFF1 0.003143 (2^-8.313499)
6:        F <- FFFFFFFF 0.085571 (2^-3.546729)
7:        0 <-        0 1.000000 (2^0.000000)
8: FFFFFFF1 <- FFFFFFFF 0.126587 (2^-2.981800)
9:        1 <- FFFFFFF1 0.003601 (2^-8.117357)
10:        0 <-        0 1.000000 (2^0.000000)
11:        1 <- FFFFFFF1 0.005066 (2^-7.624961)
12:        F <-        1 0.134827 (2^-2.890822)
13:        0 <-        0 1.000000 (2^0.000000)
14: FFFFFFEF <-        1 0.084930 (2^-3.557575)
15: FFFFFFFE <- FFFFFFEF 0.001221 (2^-9.678072)
16:       11 <- FFFFFFFF 0.128113 (2^-2.964514)
17:        0 <-        0 1.000000 (2^0.000000)
p_tot = 0.000000000000000 = 2^-64.295613

real    5m5.624s
user    5m5.083s
sys     0m0.008s
vpv@igor:~/skcrypto/trunk/work/src/yaarx$


 */


/* --- */

/* 

B[ 0] = 2^0.000000
B[ 1] = 2^-1.019236
B[ 2] = 2^-2.053181
B[ 3] = 2^-5.332826
B[ 4] = 2^-11.128117
B[ 5] = 2^-16.346259
B[ 6] = 2^-22.695802
B[ 7] = 2^-26.421499
B[ 8] = 2^-31.240675
B[ 9] = 2^-35.271060
B[10] = 2^-43.644738
B[11] = 2^-49.339029
B[12] = 2^-56.219195
B[13] = 2^-60.963055
B[14] = 2^-62.960625
B[15] = 2^-65.703011
pDDT sizes: Dp 58, Dxy 162 | Cp 3704, Cxy 3704
 0:        0 <-        0 1.000000 (2^0.000000)
 1: FFFFFFF1 <- FFFFFFFF 0.080841 (2^-3.628768)
 2: FFFFFFFF <- FFFFFFF1 0.003845 (2^-8.022720)
 3: FFFFFFE2 <- FFFFFFFE 0.124390 (2^-3.007062)
 4:        4 <- FFFFFFD3 0.001404 (2^-9.476438)
 5:       1E <-        2 0.100342 (2^-3.317005)
 6: FFFFFFFF <- FFFFFFF1 0.001953 (2^-9.000000)
 7:        F <-        1 0.133392 (2^-2.906252)
 8:        0 <-        0 1.000000 (2^0.000000)
 9:        F <-        1 0.080688 (2^-3.631494)
10: FFFFFFFF <-        F 0.003632 (2^-8.105182)
11:        0 <-        0 1.000000 (2^0.000000)
12: FFFFFFFF <-        F 0.006287 (2^-7.313499)
13: FFFFFFF1 <- FFFFFFFF 0.082367 (2^-3.601791)
14:        0 <-        0 1.000000 (2^0.000000)
15:        F <- FFFFFFFF 0.077332 (2^-3.692799)
p_tot = 0.000000000000000 = 2^-65.703011, Bn = 0.000000 = 2^-65.703011
[./src/tea-add-threshold-search.cc:1182] nrounds = 16
[./tests/tea-add-threshold-search-tests.cc:69]
----- End search -----
[./tests/tea-add-threshold-search-tests.cc:71] Final trail:
0:       22 <- FFFFFFFE 0.133911 (2^-2.900652)
1:       11 <-        1 0.080933 (2^-3.627135)
2:        0 <-        F 0.000458 (2^-11.093109)
3: FFFFFFF1 <-        1 0.143463 (2^-2.801248)
4:        0 <-        0 1.000000 (2^0.000000)
5:       11 <-        1 0.079010 (2^-3.661821)
6:        0 <-       11 0.000031 (2^-15.000000)
7: FFFFFFEF <-        1 0.135742 (2^-2.881059)
8:        0 <-        0 1.000000 (2^0.000000)
9:       11 <-        1 0.081696 (2^-3.613599)
10:        0 <-       11 0.000214 (2^-12.192645)
11: FFFFFFEF <-        1 0.140320 (2^-2.833209)
12:        0 <-        0 1.000000 (2^0.000000)
13: FFFFFFF1 <-        1 0.077271 (2^-3.693938)
p_tot = 0.000000000000000 = 2^-64.298415, Bn = 0.000000 = 2^-64.298415
[./tests/tea-add-threshold-search-tests.cc:81] Final full trail:
0:        0 <-        0 1.000000 (2^0.000000)
1: FFFFFFF1 <- FFFFFFFF 0.080841 (2^-3.628768)
2: FFFFFFFF <- FFFFFFF1 0.003845 (2^-8.022720)
3: FFFFFFE2 <- FFFFFFFE 0.124390 (2^-3.007062)
4:        4 <- FFFFFFD3 0.001404 (2^-9.476438)
5:       1E <-        2 0.100342 (2^-3.317005)
6: FFFFFFFF <- FFFFFFF1 0.001953 (2^-9.000000)
7:        F <-        1 0.133392 (2^-2.906252)
8:        0 <-        0 1.000000 (2^0.000000)
9:        F <-        1 0.080688 (2^-3.631494)
10: FFFFFFFF <-        F 0.003632 (2^-8.105182)
11:        0 <-        0 1.000000 (2^0.000000)
12: FFFFFFFF <-        F 0.006287 (2^-7.313499)
13: FFFFFFF1 <- FFFFFFFF 0.082367 (2^-3.601791)
14:        0 <-        0 1.000000 (2^0.000000)
15:        F <- FFFFFFFF 0.077332 (2^-3.692799)
p_tot = 0.000000000000000 = 2^-65.703011

real    5m36.633s
user    5m35.697s
sys     0m0.000s
v
 */

 /*


----- End search -----
[./tests/tea-add-threshold-search-tests.cc:71] Final trail:
 0:        0 <-        0 1.000000 (2^0.000000)
 1:        F <-        1 0.082855 (2^-3.593264)
 2:        0 <-        F 0.000183 (2^-12.415037)
 3: FFFFFFF1 <-        1 0.139984 (2^-2.836665)
 4:        0 <-        0 1.000000 (2^0.000000)
 5:       11 <-        1 0.080383 (2^-3.636960)
 6:        0 <-       11 0.000031 (2^-15.000000)
 7: FFFFFFEF <-        1 0.134766 (2^-2.891476)
 8:        0 <-        0 1.000000 (2^0.000000)
 9:       11 <-        1 0.079834 (2^-3.646853)
10:        0 <-       11 0.000183 (2^-12.415037)
11: FFFFFFEF <-        1 0.140839 (2^-2.827885)
12:        0 <-        0 1.000000 (2^0.000000)
13: FFFFFFF1 <-        1 0.079163 (2^-3.659037)
14: FFFFFF01 <- FFFFFFF1 0.012695 (2^-6.299560)
p_tot = 0.000000000000000 = 2^-69.221775, Bn = 0.000000 = 2^-69.221775
[./tests/tea-add-threshold-search-tests.cc:81] Final full trail:
 0:       1E <- FFFFFFFE 0.132050 (2^-2.920849)
 1:        0 <-        0 1.000000 (2^0.000000)
 2: FFFFFFE2 <- FFFFFFFE 0.102386 (2^-3.287903)
 3:        1 <- FFFFFFE2 0.002319 (2^-8.752072)
 4:        F <- FFFFFFFF 0.128265 (2^-2.962796)
 5:        1 <- FFFFFFF1 0.004333 (2^-7.850253)
 6:        0 <-        0 1.000000 (2^0.000000)
 7:        1 <- FFFFFFF1 0.000427 (2^-11.192645)
 8: FFFFFFF1 <-        1 0.136627 (2^-2.871683)
 9:        1 <- FFFFFFE2 0.002808 (2^-8.476438)
10:       1E <-        2 0.098785 (2^-3.339558)
11:        0 <-        0 1.000000 (2^0.000000)
12:       22 <-        2 0.079681 (2^-3.649613)
13:      1FF <-       22 0.019165 (2^-5.705379)
14: FFFFE201 <-      201 0.009033 (2^-6.790547)
p_tot = 0.000000000000000 = 2^-67.799737

real    5m47.511s
user    5m46.542s
sys     0m0.004s
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$

---

B[ 0] = 2^0.000000
B[ 1] = 2^-1.018790
B[ 2] = 2^-2.960642
B[ 3] = 2^-5.373575
B[ 4] = 2^-11.112446
B[ 5] = 2^-16.281079
B[ 6] = 2^-22.674470
B[ 7] = 2^-26.110358
B[ 8] = 2^-31.377126
B[ 9] = 2^-34.175885
B[10] = 2^-43.506585
B[11] = 2^-45.871746
B[12] = 2^-51.574971
B[13] = 2^-55.571639
B[14] = 2^-56.953454
pDDT sizes: Dp 57, Dxy 114 | Cp 190, Cxy 190
 0:        0 <-        0 1.000000 (2^0.000000)
 1: FFFFFFF1 <-        1 0.082855 (2^-3.593264)
 2:        0 <- FFFFFFF1 0.000214 (2^-12.192645)
 3:        F <-        1 0.137421 (2^-2.863329)
 4:        0 <-        0 1.000000 (2^0.000000)
 5: FFFFFFF1 <-        1 0.080505 (2^-3.634771)
 6: FFFFFFFE <- FFFFFFF1 0.001617 (2^-9.272080)
 7:        F <- FFFFFFFF 0.134003 (2^-2.899666)
 8:        0 <-        0 1.000000 (2^0.000000)
 9: FFFFFFF1 <- FFFFFFFF 0.081696 (2^-3.613599)
10:        1 <- FFFFFFF1 0.004211 (2^-7.891476)
11:        0 <-        0 1.000000 (2^0.000000)
12:        1 <- FFFFFFF1 0.006012 (2^-7.377948)
13:        F <-        1 0.081635 (2^-3.614677)
14:        0 <-        0 1.000000 (2^0.000000)

 */

/* --- */
/*
----- End search -----
uint32_t max_lp = 2;

[./tests/tea-add-threshold-search-tests.cc:71] Final trail:
 0: FFFFFFEF <-        1 0.136322 (2^-2.874909)
 1:        0 <-        0 1.000000 (2^0.000000)
 2:        F <-        1 0.082489 (2^-3.599654)
 3:        0 <-        F 0.000153 (2^-12.678072)
 4:        F <-        1 0.125580 (2^-2.993323)
 5:        1 <-       1E 0.002716 (2^-8.524267)
 6: FFFFFFE2 <-        2 0.101990 (2^-3.293504)
 7:        0 <-        0 1.000000 (2^0.000000)
 8: FFFFFFE2 <-        2 0.076019 (2^-3.717491)
p_tot = 0.000000000004538 = 2^-37.681220, Bn = 0.000000 = 2^-37.681220

[./tests/tea-add-threshold-search-tests.cc:81] Final full trail:
 0:        0 <-        0 1.000000 (2^0.000000)
 1:        F <- FFFFFFFF 0.077698 (2^-3.685983)
 2:        1 <-        F 0.003967 (2^-7.977632)
 3:        0 <-        0 1.000000 (2^0.000000)
 4:        1 <-        F 0.005951 (2^-7.392670)
 5: FFFFFFF1 <-        1 0.081696 (2^-3.613599)
 6:        0 <-        0 1.000000 (2^0.000000)
 7: FFFFFFEF <-        1 0.139801 (2^-2.838553)
 8: FFFFFF00 <- FFFFFFEF 0.020599 (2^-5.601256)
p_tot = 0.000000000431568 = 2^-31.109693

real    20m33.862s
user    20m30.309s
sys     0m0.084s

*/

/* --- */
#if 0
	 if(nrounds == 8) {
		printf("[%s:%d] Start highway table R#[%2d]\n", __FILE__, __LINE__, nrounds);
		print_set(diff_set_dx_dy);
		printf("[%s:%d] Start croads table R#[%2d]\n", __FILE__, __LINE__, nrounds);
		print_set(croads_diff_set_dx_dy);
		printf("[%s:%d] End tables R#[%2d]\n", __FILE__, __LINE__, nrounds);
	 }
#endif 



/* --- */

	 //	 std::set<differential_t, struct_comp_diff_dx_dy>* diff_set_dx_dy;
	 std::multiset<differential_t, struct_comp_diff_p> found_mset_p;
	 if(b_found_in_hways) {
		while((hway_iter->dx == dx) && (hway_iter != diff_set_dx_dy->end())) {
		  found_mset_p.insert(*hway_iter);
		  hway_iter++;
		}
		if((b_found_in_croads) && (cnt_lp <= max_lp)) {
		  while((croad_iter->dx == dx) && (croad_iter != croads_diff_set_dx_dy->end())) {
			 found_mset_p.insert(*croad_iter);
			 croad_iter++;
		  }
		}
	 } else {
		//		assert(b_found_in_croads == true);
		if(b_found_in_croads) {
		  while((croad_iter->dx == dx) && (croad_iter != croads_diff_set_dx_dy->end())) {
			 found_mset_p.insert(*croad_iter);
			 croad_iter++;
		  }
		}
	 }

/* --- */

bool is_dx_in_set_dx_dy(uint32_t dx, std::set<differential_t, struct_comp_diff_dx_dy> diff_set_dx_dy)
{
  bool b_is_inset = false;
  std::set<differential_t, struct_comp_diff_dx_dy>::iterator set_iter = diff_set_dx_dy.begin();;
  while((set_iter != diff_set_dx_dy.end()) && (!b_is_inset)) {
	 b_is_inset = (dx == set_iter->dx);
	 set_iter++;
  }
  return b_is_inset;
}

/* --- */

/**
 * Add entries to the pDDT for fixed input diference da. The same as 
 * \ref tea_f_add_pddt_i , but da is fixed .
 * \p cnt_new is the number of new entries that were added .
 */
void tea_f_da_add_pddt_i(const uint32_t k, const uint32_t n, 
								 const uint32_t lsh_const,  const uint32_t rsh_const,
								 gsl_matrix* A[2][2][2][2], gsl_vector* C,
								 const uint32_t da, uint32_t* db, uint32_t* dc, uint32_t* dd, 
								 double* p, const double p_thres,  
								 std::set<differential_t, struct_comp_diff_dx_dy>* hways_diff_set_dx_dy,
								 std::set<differential_t, struct_comp_diff_dx_dy>* diff_set_dx_dy,
								 std::multiset<differential_t, struct_comp_diff_p>* diff_mset_p,
								 uint32_t* cnt_new)
{
  if(k == n) {
	 // check for property (1)
	 double p_xor3 = adp_xor3(A, da, *db, *dc, *dd);
	 assert((p_xor3 >= 0.0) && (p_xor3 <= 1.0));
	 assert(p_xor3 == *p);
	 bool b_xor3 = (*p >= p_thres);
	 assert(b_xor3);
	 // check for property (2)
	 bool b_lsh = (*db) == (LSH(da, lsh_const));
	 assert(b_lsh);
	 // check for property (3)
	 uint32_t dx[4] = {0, 0, 0, 0};
	 adp_rsh_odiffs(dx, da, rsh_const);
	 bool b_rsh = (*dc == dx[0]) || (*dc == dx[1]) || (*dc == dx[2]) || (*dc == dx[3]);
	 assert(b_rsh);

	 bool b_is_valid = (b_xor3 && b_lsh && b_rsh);
	 assert(b_is_valid);

	 // check if the output difference *dd is in the Highway set 
	 bool b_is_inset = is_dx_in_set_dx_dy(*dd, *hways_diff_set_dx_dy);

	 double p_f = eadp_tea_f(A, da, *dd, &p_f, lsh_const, rsh_const); // eadp_tea_f
	 //	 if(p_f >= p_thres) {
	 if((p_f >= p_thres) && (b_is_inset)){

		differential_t diff;
		diff.dx = da;
		diff.dy = *dd;
		diff.p = p_f;

		if(diff_set_dx_dy->size() < TEA_ADD_MAX_PDDT_SIZE) {
#if 0									  // DEBUG
		  bool b_found = (diff_set_dx_dy->find(diff) != diff_set_dx_dy->end());
		  if(!b_found) {
			 printf("[%s:%d] CNT %d: Dxy add %8X -> %8X  | %f = 2^%4.2f | %15d\n", __FILE__, __LINE__, *cnt_new, diff.dx, diff.dy, diff.p, log2(diff.p), diff_set_dx_dy->size());
		  }
#endif
#if 0
		  double p_min = diff_mset_p->rbegin()->p;
		  if(p_f >= p_min) {
			 diff_mset_p->insert(diff);
		  }
		  //#else
#endif
		  diff_mset_p->insert(diff);
		  diff_set_dx_dy->insert(diff);
		  (*cnt_new)++;
		}
	 }
	 return;
  }

#if 0									  // DEBUG
  printf("\r[%s:%d] %s() [%2d]: 2^%f >? 2%f", __FILE__, __LINE__, __FUNCTION__, k, log2(*p), log2(p_thres));
  fflush(stdout);
#endif

  // init L
  gsl_vector* L = gsl_vector_calloc(ADP_XOR3_MSIZE);
  gsl_vector_set_all(L, 1.0);

  //  for(uint32_t x = 0; x < 2; x++) {
  uint32_t x = (da >> k) & 1;

	 for(uint32_t y = 0; y < 2; y++) {
		for(uint32_t z = 0; z < 2; z++) {
		  for(uint32_t t = 0; t < 2; t++) {
			 gsl_vector* R = gsl_vector_calloc(ADP_XOR3_MSIZE);
			 double new_p = 0.0;

			 // L A C
			 gsl_blas_dgemv(CblasNoTrans, 1.0, A[x][y][z][t], C, 0.0, R);
			 gsl_blas_ddot(L, R, &new_p);

			 // 
			 // For the averaged case adp-f (no-fixed-key) a sufficient condition
			 // for adp-f(da->dd) >= p_thres is adp-xor3(da,db,dc_i->dd) >= p_thres
			 // for every dc_i : dc_i = RSH(da);
			 //			 if(new_p != 0.0) { // <- this finds all differences, but is *slow*
			 if(new_p >= p_thres) {
				uint32_t new_da =  da;//*da | (x << k);
				uint32_t new_db = *db | (y << k);
				uint32_t new_dc = *dc | (z << k);
				uint32_t new_dd = *dd | (t << k);

				bool b_lsh_con = lsh_condition_is_sat(k, new_da, new_db);
				bool b_rsh_con = rsh_condition_is_sat(k, new_da, new_dc);

				if(b_lsh_con && b_rsh_con) {
				  tea_f_da_add_pddt_i(k+1, n, lsh_const, rsh_const, A, R, new_da, &new_db, &new_dc, &new_dd, &new_p, p_thres, hways_diff_set_dx_dy, diff_set_dx_dy, diff_mset_p, cnt_new);
				}
			 }
			 gsl_vector_free(R);

		  } // t
		}	 // z
	 }		 // y
	 //  }		 // x
  gsl_vector_free(L);
}

/**
 * Wrapper for \ref tea_f_da_add_pddt_i .
 * Returns the number of new entries that were added .
 */
uint32_t tea_f_da_add_pddt(uint32_t n, double p_thres, 
									uint32_t lsh_const, uint32_t rsh_const, const uint32_t da,
									std::set<differential_t, struct_comp_diff_dx_dy>* hways_diff_set_dx_dy,
									std::set<differential_t, struct_comp_diff_dx_dy>* diff_set_dx_dy,
									std::multiset<differential_t, struct_comp_diff_p>* diff_mset_p)
{
#if 0									  // DEBUG
  printf("[%s:%d] %s() enter... dx %8X, p_min 2^%f\n", __FILE__, __LINE__, __FUNCTION__, da, log2(p_thres));
#endif
  assert(n == WORD_SIZE);

  uint32_t k = 0;
  double p = 0.0;
  uint32_t cnt_new = 0;

  // init A
  gsl_matrix* A[2][2][2][2];
  adp_xor3_alloc_matrices(A);
  adp_xor3_sf(A);
  adp_xor3_normalize_matrices(A);

  // init C
  gsl_vector* C = gsl_vector_calloc(ADP_XOR3_MSIZE);
  gsl_vector_set(C, ADP_XOR3_ISTATE, 1.0);

  //  uint32_t da = 0;
  uint32_t db = 0;
  uint32_t dc = 0;
  uint32_t dd = 0;

  // compute Dxy
#if 1
  tea_f_da_add_pddt_i(k, n, lsh_const, rsh_const, A, C, da, &db, &dc, &dd, &p, p_thres, hways_diff_set_dx_dy, diff_set_dx_dy, diff_mset_p, &cnt_new);
#endif
  gsl_vector_free(C);
  adp_xor3_free_matrices(A);
#if 0									  // DEBUG
  //  printf("[%s:%d] %s() exit...\n", __FILE__, __LINE__, __FUNCTION__);
  printf("[%s:%d] %s() exit... dx %8X, p_min 2^%f\n", __FILE__, __LINE__, __FUNCTION__, da, log2(p_thres));
#endif
  return cnt_new;
}

/* --- */

		//		double p_max = 0.0;
		//		uint32_t dy_max = 0;
		//		max_eadp_tea_f(A, dx, &dy_max, &p_max, lsh_const, rsh_const);
		//		if(p_max >= p_min) {}

		// Add the new diff to Dp only if it has better prob. than the min.
		//		uint32_t cnt_new = tea_f_da_add_pddt(WORD_SIZE, p_min, lsh_const, rsh_const, diff_dy.dx, diff_set_dx_dy, croads_diff_set_dx_dy, croads_diff_mset_p);


/* --- */

#if 1
			 printf("\r[%s:%d] %2d*: %8X -> %8X 2^%f, 2^%f", __FILE__, __LINE__, n, dx, dy, log2(pn), log2(*Bn));
			 fflush(stdout);
#endif

/* --- */
#if 0
		  printf("\r[%s:%d] %2d [%3d / %3d]: %8X -> %8X 2^%f, 2^%f", __FILE__, __LINE__, n, cnt, diff_mset_p->size(), dx, dy, log2(pn), log2(*Bn));
		  fflush(stdout);
#endif
#if 0
		  printf("\r[%s:%d] %2d: %8X -> %8X 2^%f, 2^%f", __FILE__, __LINE__, n, dx, dy, log2(pn), log2(*Bn));
		  fflush(stdout);
#endif


/* --- */

#if 0	 // {----
		double p_max = 0.0;
		uint32_t dy_max = 0;
		max_eadp_tea_f(A, dx, &dy_max, &p_max, lsh_const, rsh_const);
#if 1
		printf("\r[%s:%d] %s() %8X -> %8X 2^%f 2^%f", __FILE__, __LINE__, __FUNCTION__, dx, dy_max, log2(p_max), log2(p_min));
		fflush(stdout);
#endif

		if(cnt_lp >= max_lp) {
		  //		  double p_min_orig = p_min;
		  p_min = std::max(p_min, p_thres);
		  //		  if(p_min_orig < p_min) {
		  //			 printf("[%s:%d] cnt_lp %d / %d: adjust min 2^%f -> 2^%f (2^%f)\n", __FILE__, __LINE__, cnt_lp, max_lp, log2(p_min_orig), log2(p_min), log2(p_thres));
		  //		  }
		  //		  fflush(stdout);
		}
#endif  // ---}


/* --- */

void tea_add_threshold_search_full(const int n, const int nrounds, const uint32_t npairs, const uint32_t key[4],
											  gsl_matrix* A[2][2][2][2], double B[NROUNDS], double* Bn,
											  const differential_t diff_in[NROUNDS], differential_t trail[NROUNDS], 
											  uint32_t lsh_const, uint32_t rsh_const,
											  std::multiset<differential_t, struct_comp_diff_p>* diff_mset_p,
											  std::set<differential_t, struct_comp_diff_dx_dy>* diff_set_dx_dy)
{
  double pn = 0.0;

  // make a local copy of the input diff trail
  differential_t diff[NROUNDS] = {{0, 0, 0, 0.0}};
  for(int i = 0; i < n; i++) {
	 diff[i].dx = diff_in[i].dx;
	 diff[i].dy = diff_in[i].dy;
	 diff[i].p = diff_in[i].p;
  }

#if 1
  uint32_t max_lp = 1;
  uint32_t cnt_lp = 0;
  uint32_t trail_len = n;
  double p_thres = TEA_ADD_P_THRES;
  cnt_lp = tea_add_threshold_count_lp(diff, trail_len, p_thres);
#endif
  //  printf("[%s:%d] cnt_lp %d / %d\n", __FILE__, __LINE__, cnt_lp, max_lp);

  if((n == 0) && (nrounds == 1)) {						  // Only one round
	 //	 assert(*Bn == 0.0);
	 bool b_end = false;
	 std::multiset<differential_t, struct_comp_diff_p>::iterator mset_iter = diff_mset_p->begin();
	 while((mset_iter != diff_mset_p->end()) && (!b_end)) {
		uint32_t dx = mset_iter->dx;
		uint32_t dy = mset_iter->dy;
		pn = mset_iter->p;;
		pn = tea_add_diff_adjust_to_key(npairs, n, dx, dy, key); // adjust the probability to the fixed round key
		if((pn >= *Bn) && (pn != 0.0)) {
		  trail[n].dx = dx;
		  trail[n].dy = dy;
		  trail[n].p = pn;
		  *Bn = pn;
		  B[n] = pn;
		} else {
		  b_end = true;
		}
		mset_iter++;
	 }	// while()
  }

  if((n == 0) && (nrounds > 1)) {						  // Round-0 and not last round
	 bool b_end = false;
	 std::multiset<differential_t, struct_comp_diff_p>::iterator mset_iter = diff_mset_p->begin();
	 while((mset_iter != diff_mset_p->end()) && (!b_end)) {
		uint32_t dx = mset_iter->dx;
		uint32_t dy = mset_iter->dy;
		pn = mset_iter->p;
		pn = tea_add_diff_adjust_to_key(npairs, n, dx, dy, key); // adjust the probability to the fixed round key
		double p = pn * B[nrounds - 1 - (n + 1)];
#if 0
		if(nrounds == 5) {
		  printf("[%s:%d] %8X -> %8X 2^%f | 2^%f >? 2^%f\n", __FILE__, __LINE__, dx, dy, log2(pn), log2(p), log2(*Bn));
		}
#endif
		assert(B[nrounds - 1 - (n + 1)] != 0.0);
		std::multiset<differential_t, struct_comp_diff_p>::iterator begin_iter = diff_mset_p->begin();
		if((p >= *Bn) && (p != 0.0)) {
		  diff[n].dx = dx;
		  diff[n].dy = dy;
		  diff[n].p = pn;
		  tea_add_threshold_search_full(n+1, nrounds, npairs, key, A, B, Bn, diff, trail, lsh_const, rsh_const, diff_mset_p, diff_set_dx_dy);
		} else {
		  b_end = true;
		}
		if(begin_iter != diff_mset_p->begin()) { // if the root was updated, start from beginning
		  mset_iter = diff_mset_p->begin();
		  printf("[%s:%d] Return to beginning\n", __FILE__, __LINE__);
		} else {
		  mset_iter++;
		}
	 }
  }

  if((n == 1) && (n != (nrounds - 1))) {						  // Round-1 and not last round
	 bool b_end = false;
	 std::multiset<differential_t, struct_comp_diff_p>::iterator mset_iter = diff_mset_p->begin();
	 while((mset_iter != diff_mset_p->end()) && (!b_end)) {
		uint32_t dx = mset_iter->dx;
		uint32_t dy = mset_iter->dy;
		pn = mset_iter->p;
		pn = tea_add_diff_adjust_to_key(npairs, n, dx, dy, key); // adjust the probability to the fixed round key
		double p = diff[0].p * pn * B[nrounds - 1 - (n + 1)];
		std::multiset<differential_t, struct_comp_diff_p>::iterator begin_iter = diff_mset_p->begin();
		if((p >= *Bn) && (p != 0.0)) {
		  diff[n].dx = dx;
		  diff[n].dy = dy;
		  diff[n].p = pn;
		  tea_add_threshold_search_full(n+1, nrounds, npairs, key, A, B, Bn, diff, trail, lsh_const, rsh_const, diff_mset_p, diff_set_dx_dy);
		} else {
		  b_end = true;
		} 
		if(begin_iter != diff_mset_p->begin()) { // if the root was updated, start from beginning
		  mset_iter = diff_mset_p->begin();
		  printf("[%s:%d] Return to beginning\n", __FILE__, __LINE__);
		} else {
		  mset_iter++;
		}
	 }	// while()
  }

  //  if((n >= 2) && (n != (nrounds - 1))) { // Round-i and not last round
 if((n >= 2) && (n != (nrounds - 1)) && (cnt_lp <= max_lp)) {
	 uint32_t dx = ADD(diff[n - 2].dx, diff[n - 1].dy);
	 uint32_t dy = 0;

	 differential_t diff_dy;
	 diff_dy.dx = dx;  
	 diff_dy.dy = 0;
	 diff_dy.p = 0.0;

#if 0
	 std::set<differential_t, struct_comp_diff_dx_dy> new_diff_set_dx_dy;
#endif
	 // check if the differential is not already in the set
	 std::set<differential_t, struct_comp_diff_dx_dy>::iterator find_iter = diff_set_dx_dy->lower_bound(diff_dy);
 	 bool b_found = (find_iter != diff_set_dx_dy->end()) && (find_iter->dx == dx);
	 if(!b_found) {				  // if not found, add new
		double p_min = 0.0;
		// p_i >= p_min = Bn / p1 * p2 ... * p{i-1} * B{n-i} 
		p_min = 1.0;
		for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
		  p_min *= diff[i].p;
		}
		p_min = p_min * 1.0 * B[nrounds - 1 - (n + 1)]; 
		p_min = *Bn / p_min;
		assert(p_min <= 1.0);

#if 0	 // {----
		double p_max = 0.0;
		uint32_t dy_max = 0;
		max_eadp_tea_f(A, dx, &dy_max, &p_max, lsh_const, rsh_const);
#if 1
		printf("\r[%s:%d] %s() %8X -> %8X 2^%f 2^%f", __FILE__, __LINE__, __FUNCTION__, dx, dy_max, log2(p_max), log2(p_min));
		fflush(stdout);
#endif

		if(cnt_lp >= max_lp) {
		  //		  double p_min_orig = p_min;
		  p_min = std::max(p_min, p_thres);
		  //		  if(p_min_orig < p_min) {
		  //			 printf("[%s:%d] cnt_lp %d / %d: adjust min 2^%f -> 2^%f (2^%f)\n", __FILE__, __LINE__, cnt_lp, max_lp, log2(p_min_orig), log2(p_min), log2(p_thres));
		  //		  }
		  //		  fflush(stdout);
		}
#endif  // ---}

		//		if(p_max >= p_min) {
		  // Add the new diff to Dp only if it has better prob. than the min.
		  uint32_t cnt_new = tea_f_da_add_pddt(WORD_SIZE, p_min, lsh_const, rsh_const, diff_dy.dx, diff_set_dx_dy, diff_mset_p);
		  if(cnt_new != 0) {
			 printf("[%s:%d] Added %d new elements: p_min = %f (2^%f). New sizes: Dxy %d, Dp %d (cnt_lp %d / %d).\n", 
					  __FILE__, __LINE__, cnt_new, p_min, log2(p_min), diff_set_dx_dy->size(), diff_mset_p->size(), cnt_lp, max_lp);
		  } else {
#if 0
			 if(diff_set_dx_dy->size() < TEA_ADD_MAX_PDDT_SIZE) {
				differential_t diff;
				diff.dx = dx;
				diff.dy = dy_max;
				diff.p = p_max;
				diff_set_dx_dy->insert(diff);
				printf("[%s:%d] Added 1 new element: p_min = %f (2^%f). New sizes: Dxy %d, Dp %d.\n", 
						 __FILE__, __LINE__, p_min, log2(p_min), diff_set_dx_dy->size(), diff_mset_p->size());
			 }
#endif
		  }

		  find_iter = diff_set_dx_dy->lower_bound(diff_dy);
#if 0									  // EDBUG
		  printf("\r[%s:%d] p_min = 2^%f / (", __FILE__, __LINE__, log2(*Bn));
		  for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
			 printf("[%d] 2^%f * ", i, log2(diff[i].p));
		  }
		  printf("B[%d] 2^%f) = ", nrounds - 1 - (n + 1), log2(B[nrounds - 1 - (n + 1)]));
		  printf(" 2^%f | p_thres 2^%f", log2(p_min), log2(TEA_ADD_P_THRES));
		  fflush(stdout);
#endif // #if 0									  // EDBUG
		  //		}
	 } 

	 //	 if((find_iter->dx == dx) && (cnt_lp < max_lp)) {
	 if(find_iter->dx == dx) {
		//		printf("[%s:%d] cnt_lp %d, max_lp %d\n", __FILE__, __LINE__, cnt_lp, max_lp);
		//		assert(cnt_lp < max_lp);
		while((find_iter->dx < (dx + 1)) && (find_iter != diff_set_dx_dy->end())) {
		//		while((find_iter->dx < (dx + 1)) && (find_iter != new_diff_set_dx_dy.end())) {
		  assert((find_iter->dx == dx));
		  diff_dy = *find_iter;

		  dx = diff_dy.dx;
		  dy = diff_dy.dy;
		  pn = diff_dy.p;
		  pn = tea_add_diff_adjust_to_key(npairs, n, dx, dy, key); // adjust the probability to the fixed round key

		  double p = 1.0;
		  for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
			 p *= diff[i].p;
		  }
		  p = p * pn * B[nrounds - 1 - (n + 1)]; 

		  // store the beginnig
#if 1
		  std::set<differential_t, struct_comp_diff_dx_dy>::iterator begin_iter = diff_set_dx_dy->begin();
#endif
		  if((p >= *Bn) && (p != 0.0)) {
			 diff[n].dx = dx;
			 diff[n].dy = dy;
			 diff[n].p = pn;
			 tea_add_threshold_search_full(n+1, nrounds, npairs, key, A, B, Bn, diff, trail, lsh_const, rsh_const, diff_mset_p, diff_set_dx_dy);
		  }
#if 1
		  if(begin_iter != diff_set_dx_dy->begin()) { // if the root was updated, start from beginning
			 diff_dy.dx = dx;  
			 diff_dy.dy = 0;
			 diff_dy.p = 0.0;
			 find_iter = diff_set_dx_dy->lower_bound(diff_dy);
			 printf("[%s:%d] Return to beginning\n", __FILE__, __LINE__);
			 assert((find_iter->dx == dx));
			 assert(1 == 0);
		  } else {
			 find_iter++;
		  }
#else
		  find_iter++;
#endif
		}	// while
	 }	// if
  }

  if((n == (nrounds - 1)) && (nrounds > 1)) {		  // Last round

	 uint32_t dx = 0;
	 uint32_t dy = 0;

	 if(nrounds == 2) { // Last round (n = 1) AND only two rounds - freely choose dx
		dx = diff_mset_p->begin()->dx;
		dy = diff_mset_p->begin()->dy;
		pn = diff_mset_p->begin()->p;
	 } else {

		dx = ADD(diff[n - 2].dx, diff[n - 1].dy);
		dy = 0;

		differential_t diff_max_dy;
		diff_max_dy.dx = dx;  
		diff_max_dy.dy = 0;
		diff_max_dy.p = 0.0;

		// check if a diff with the same dx is already in the set
		std::set<differential_t, struct_comp_diff_dx_dy>::iterator find_iter = diff_set_dx_dy->lower_bound(diff_max_dy);
		bool b_found = (find_iter != diff_set_dx_dy->end()) && (find_iter->dx == dx);
		if(!b_found) {				  // if not found, add new

		  max_eadp_tea_f(A, dx, &dy, &pn, lsh_const, rsh_const); // max_dy eadp_tea_f
		  pn = tea_add_diff_adjust_to_key(npairs, n, dx, dy, key); // adjust the probability to the fixed key

		  diff_max_dy.dx = dx; 
		  diff_max_dy.dy = dy;
		  diff_max_dy.p = pn;

		  // Add the new diff to Dp only if it has better prob. than the min.
		  double p_min = diff_mset_p->rbegin()->p;
		  if(diff_max_dy.p >= p_min) {
			 diff_mset_p->insert(diff_max_dy);
		  }

		  diff_set_dx_dy->insert(diff_max_dy);
		  find_iter = diff_set_dx_dy->lower_bound(diff_max_dy);
		} 
		assert((find_iter->dx == dx));

		diff_max_dy = *find_iter;
		while((find_iter->dx < (dx + 1)) && (find_iter != diff_set_dx_dy->end())) { // get the max among the available
		  double find_iter_p = tea_add_diff_adjust_to_key(npairs, n, find_iter->dx, find_iter->dy, key); // adjust the probability to the fixed key
		  if(find_iter_p > diff_max_dy.p) {
			 diff_max_dy = *find_iter;
		  }
		  find_iter++;
		}
		dx = diff_max_dy.dx;
		dy = diff_max_dy.dy;
		pn = diff_max_dy.p;
	 }

	 double p = 1.0;
	 for(int i = 0; i < n; i++) {
		p *= diff[i].p;
	 }
	 p *= pn;

	 if((p >= *Bn) && (p != 1.0) && (p != 0.0)) { // skip the 0-diff trail (p = 1.0)
#if 1									  // DEBUG
		if (p > *Bn) {
		  printf("[%s:%d] %d | Update best found Bn: 2^%f -> 2^%f\n", __FILE__, __LINE__, n, log2(*Bn), log2(p));
		}
#endif
		diff[n].dx = dx;
		diff[n].dy = dy;
		diff[n].p = pn;
		*Bn = p;
		B[n] = p;
		for(int i = 0; i < nrounds; i++) {
		  trail[i].dx = diff[i].dx;
		  trail[i].dy = diff[i].dy;
		  trail[i].p = diff[i].p;
		}
	 }
  }
}

/* --- */


		//		if(p_max >= p_min) {
		//		  p_max = tea_add_diff_adjust_to_key(npairs, n, dx, dy_max, key); // adjust the probability to the fixed round key
		//		}
		//max_dy_adp_f_fk

/* --- */

#if 0	// {---
uint32_t adp_f_assign_bit_x_dy(const uint32_t n, const uint32_t i, const uint32_t mask_i, const uint32_t x, 
										 const uint32_t lsh_const, const uint32_t rsh_const,
										 const uint32_t k0, const uint32_t k1, const uint32_t delta,
										 const uint32_t dx, const uint32_t dy, uint64_t* x_cnt, 
										 double* ret_prob, uint32_t* ret_dy)
{
  assert(n <= WORD_SIZE);
  assert(n >= (rsh_const * 2));
  assert(i <= (n + rsh_const));
  if((i == WORD_SIZE) && (dx == 0)) {
	 double p = 0.0;
	 if(dy == 0) {
		x_cnt[dy] = ALL_WORDS;	  // ! dy
		p = 1.0;
	 } else {
		x_cnt[dy] = 0;				  // ! dy
		p = 0.0;
	 }
	 if(p >= *ret_prob) {
		*ret_prob = p;
		*ret_dy = dy;				  // ! dy
	 }
	 return 0;
  } else {
	 if(i == (n + rsh_const)) {
#if DEBUG_ADP_TEA_F_FK
		double p = *ret_prob;
		printf("[%s:%d] %2d: # %8llX: %8X -> %8X | x = %8X  %f 2^%f\n", __FILE__, __LINE__, n, x_cnt[dy], dx, dy, x, p, log2(p));
#endif  // DEBUG_ADP_TEA_F_FK
		assert(dy < MOD);
		if(n == (WORD_SIZE)) {
		  bool b_ok = adp_f_check_x(lsh_const, rsh_const, k0, k1, delta, dx, dy, x);
		  assert(b_ok);
		}
		return 1;
	 }
  }
  bool b_adp_f_is_sat = adp_f_is_sat(mask_i, lsh_const, rsh_const, k0, k1, delta, dx, dy, x); // check x[i]
  if(b_adp_f_is_sat) {
	 if(i < (WORD_SIZE - 1)) { // x[30:0] are assigned and we shall assign the last bit x[31]
		uint32_t mask_i = ~(0xffffffff << ((i + 1) - rsh_const)); // select x[(i+1)-R:0]
		for(uint32_t next_bit_dy = 0; next_bit_dy < 2; next_bit_dy++) { // ! dy
		  uint32_t new_dy = (next_bit_dy << (i + 1)) | dy; // assign dx[i+1]
		  for(uint32_t next_bit_x = 0; next_bit_x < 2; next_bit_x++) {
			 uint32_t new_x = (next_bit_x << (i + 1)) | x; // assign x[i+1]
			 uint32_t ret = 
			 adp_f_assign_bit_x_dy(n, i + 1, mask_i, new_x, lsh_const, rsh_const, k0, k1, delta, dx, new_dy, x_cnt, ret_prob, ret_dy);
			 x_cnt[new_dy] += ret;
		  }
		}
	 } else {
		uint32_t mask_i = MASK;
		uint32_t new_dy = dy;
		uint32_t new_x = x;
		uint32_t ret =  
		adp_f_assign_bit_x_dy(n, i + 1, mask_i, new_x, lsh_const, rsh_const, k0, k1, delta, dx, new_dy, x_cnt, ret_prob, ret_dy);
		x_cnt[new_dy] += ret;
		if((i + 1) == (n + rsh_const)) {
		  double p = (double)x_cnt[new_dy] / (double)ALL_WORDS;
		  if(p >= *ret_prob) {
			 *ret_prob = p;
			 *ret_dy = dy;
		  }
#if DEBUG_ADP_TEA_F_FK
		  printf("\r[%s:%d] %2d: # %8llX: %8X -> %8X | x = %8X  %f 2^%f", __FILE__, __LINE__, n, x_cnt[dy], dx, dy, x, *ret_prob, log2(*ret_prob));
		  fflush(stdout);
#endif
		}
	 }
  } 
  return 0;
}

double max_dy_adp_f_fk(const uint32_t n, const uint32_t dx, uint32_t* ret_dy,
							  const uint32_t k0, const uint32_t k1, const uint32_t delta,
							  const uint32_t lsh_const, const uint32_t rsh_const)
{

#if DEBUG_ADP_TEA_F_FK
  printf("[%s:%d] %s() Input: %d %d %8X %8X %8X \n", __FILE__, __LINE__, __FUNCTION__, 
			lsh_const, rsh_const, k0, k1, delta);
#endif  // DEBUG_ADP_TEA_F_FK

  assert(lsh_const < rsh_const);
  assert(n <= WORD_SIZE);
  assert(n >= (rsh_const * 2));
  if(dx == 0) {					  // zero input difference
	 *ret_dy = 0;
	 return 1.0;
  }
  // number of initial LSB bits
  uint32_t nlsb_init = (rsh_const * 2);
  if(nlsb_init > WORD_SIZE)
	 nlsb_init = WORD_SIZE;
  // all 10-bit values
  uint32_t N = (1U << nlsb_init);
  uint32_t x = 0;
  uint32_t dy = 0;
  double max_p = 0.0;
  uint32_t max_dy = 0;

  //  uint32_t x_cnt[ALL_WORDS] = {0};
  uint64_t* x_cnt = (uint64_t *)calloc((size_t)ALL_WORDS, sizeof(uint64_t));
  if(x_cnt == NULL) {
	 printf("[%s:%d] ERROR: Bad calloc. Not enough memory. Exiting...\n", __FILE__, __LINE__);
	 exit(1);
  }

  //  const uint32_t n = WORD_SIZE; 
  for(uint32_t j = 0; j < N; j++) { // skip the zero difference
	 dy = j;
	 uint32_t dyy = max_dy;
	 double pp = max_p;
#if DEBUG_ADP_TEA_F_FK
	 printf("[%s:%d] dy[%d:0] = %8X\n", __FILE__, __LINE__, (nlsb_init - 1), j);
#endif  // DEBUG_ADP_TEA_F_FK
	 for(uint32_t l = 0; l < N; l++) {
		x = l;							  // assign x[9:0]
		uint32_t i = nlsb_init - 1; // start at x[9]
		uint32_t mask_i = ~(0xffffffff << ((i + 1) - rsh_const)); 
		adp_f_assign_bit_x_dy(n, i, mask_i, x, lsh_const, rsh_const, k0, k1, delta, dx, dy, x_cnt, &pp, &dyy);
#if DEBUG_ADP_TEA_F_FK
		printf("[%s:%d] %8X -> %8X %f 2^%f | max_p = %f\n", __FILE__, __LINE__, dyy, dy, pp, log2(pp), max_p);
#endif  // DEBUG_ADP_TEA_F_FK
	 }
	 if((pp >= max_p) && (pp != 1.0)) { // skip the zero difference (p == 1.0)
#if DEBUG_ADP_TEA_F_FK
		if(max_dy != dyy) {
		  printf("[%s:%d] Update max dy[%d:0] = %8X | %8X -> %8X %f 2^%f\n", __FILE__, __LINE__, (nlsb_init - 1), j, dyy, dy, pp, log2(pp));
		}
#endif  // DEBUG_ADP_TEA_F_FK
		max_p = pp;
		max_dy = dyy;
	 }
  }
  free(x_cnt);
  *ret_dy = max_dy;
  return max_p;
}
#endif // ---}

/* --- */
		double p_min_orig = p_min;
		double scale_fact = (p_min * 0.5);
		if((p_min + scale_fact) <= 1.0) {
		  p_min += scale_fact;
		}

/* --- */

// XXX ---
void tea_add_trail_search_full(uint32_t key[4], double BB[NROUNDS], uint32_t num_rounds)
{
  uint32_t lsh_const = TEA_LSH_CONST; 
  uint32_t rsh_const = TEA_RSH_CONST;
  double p_thres = TEA_ADD_P_THRES;
  uint32_t word_size = WORD_SIZE;
  uint32_t npairs = NPAIRS;
  uint32_t num_rounds = NROUNDS;

  gsl_matrix* A[2][2][2][2];	  // matrices to compute ADP
  differential_t diff[NROUNDS];	  // arrey of differences
  differential_t trail[NROUNDS];  // a differential trail
  double B[NROUNDS];				  // arey of bounds

  // init matrices
  adp_xor3_alloc_matrices(A);
  adp_xor3_sf(A);
  adp_xor3_normalize_matrices(A);

  // init bounds
  for(int i = 0; i < NROUNDS; i++) {
	 B[i] = 0.0;
  }

  std::set<differential_t, struct_comp_diff_dx_dy> diff_set_dx_dy; // Dxy
  std::multiset<differential_t, struct_comp_diff_p> diff_mset_p;	 // Dp

  tea_f_add_pddt(word_size, p_thres, lsh_const, rsh_const, &diff_set_dx_dy);
#if 0									  // DEBUG
  printf("[%s:%d] Dxy before adjust key\n", __FILE__, __LINE__);
  print_set(diff_set_dx_dy);
#endif

#if 1
  tea_f_add_pddt_adjust_to_key(num_rounds, npairs, key, p_thres, &diff_set_dx_dy);
#endif
#if 0									  // DEBUG
  printf("[%s:%d] Dxy after adjust key, p_thres = %f 2^%f\n", __FILE__, __LINE__, p_thres, log2(p_thres));
  print_set(diff_set_dx_dy);
#endif

  tea_f_add_pddt_dxy_to_dp(&diff_mset_p, diff_set_dx_dy);
#if 0									  // DEBUG
  printf("[%s:%d] Dp , p_thres = %f 2^%f\n", __FILE__, __LINE__, p_thres, log2(p_thres));
  print_mset(diff_mset_p);
#endif

  printf("Initial set sizes: Dp %d, Dxy %d\n", diff_mset_p.size(), diff_set_dx_dy.size());
  assert(diff_set_dx_dy.size() == diff_mset_p.size());

  double Bn_init = 0.0;

  //  for(uint32_t nrounds = 1; nrounds <= NROUNDS; nrounds++ ) {
  double p_rand = 1.0 / (double)(1ULL << ((2 * WORD_SIZE) - 1));
  printf("[%s:%d] p_rand 2^%f\n", __FILE__, __LINE__, log2(p_rand));

  uint32_t nrounds = 0;
  do {
	 nrounds++;
	 printf("[%s:%d] nrounds = %d, Bn_init = 2^%f : key %8X %8X %8X %8X\n", __FILE__, __LINE__, nrounds, log2(Bn_init), key[0], key[1], key[2], key[3]);
	 double Bn = Bn_init;
	 B[nrounds - 1] = Bn_init;
	 int r = 0;						  // initial round

	 // init diffs
	 for(int i = 0; i < NROUNDS; i++) {
		diff[i].dx = 0;
		diff[i].dy = 0;
		diff[i].p = 0.0;
	 }

	 tea_add_threshold_search(r, nrounds, npairs, key, A, B, &Bn, diff, trail, lsh_const, rsh_const, &diff_mset_p, &diff_set_dx_dy);

	 assert(B[nrounds - 1] == Bn);

#if 1									  // DEBUG
	 for(uint32_t i = 0; i < nrounds; i++) {
		printf("B[%2d] = 2^%f", i, log2(B[i]));
		if(i > 0) {
		  if(B[i-1] < B[i]) {
			 printf(" <-");
		  }
		}
		printf("\n");
	 }
	 printf("pDDT sizes: Dp %d, Dxy %d\n", diff_mset_p.size(), diff_set_dx_dy.size());
#endif
#if 1									  // DEBUG
	 double p_tot = 1.0;
	 for(uint32_t i = 0; i < nrounds; i++) {
		printf("%2d: %8X <- %8X %f (2^%f)\n", i, trail[i].dy, trail[i].dx, trail[i].p, log2(trail[i].p));
		p_tot *= trail[i].p;
	 }
	 printf("p_tot = %16.15f = 2^%f, Bn = %f = 2^%f\n", p_tot, log2(p_tot), Bn, log2(Bn));
#endif  // #if 0									  // DEBUG
#if 1	  // VERIFY
	 if(nrounds >=3) {
		for(uint32_t i = (nrounds - 1); i >= 2; i--) {
		  assert(trail[i].dx == ADD(trail[i - 2].dx, trail[i - 1].dy));
		}
	 }
#endif  // #if 1	  // VERIFY

	 // Compute an initial bound for the next round
	 uint32_t next_round = nrounds;
	 if((next_round >= 2) && (next_round < NROUNDS)) {
		uint32_t dx = ADD(trail[next_round - 2].dx, trail[next_round - 1].dy);
		uint32_t dy = 0;
		double p = 0.0;

		max_eadp_tea_f(A, dx, &dy, &p, lsh_const, rsh_const); // max_dy eadp_tea_f
		p = tea_add_diff_adjust_to_key(npairs, next_round, dx, dy, key); // adjust the probability to the fixed key
		if(p == 0.0) {
		  p = nz_eadp_tea_f(A, 0.0, dx, &dy); // just get an arbitrary non-zero dy
		  p = tea_add_diff_adjust_to_key(npairs, next_round, dx, dy, key); // adjust the probability to the fixed key
		}
		//		assert(p != 0.0);

		Bn_init = B[next_round - 1] * p;
		B[next_round] = Bn_init;

		//		printf("[%s:%d] Set B[%d] = 2^%f\n", __FILE__, __LINE__, next_round, log2(Bn_init));

		trail[next_round].dx = dx;
		trail[next_round].dy = dy;
		trail[next_round].p = p;

		differential_t diff;
		diff.dx = dx;
		diff.dy = dy;
		diff.p = p;
		diff_set_dx_dy.insert(diff);
		diff_mset_p.insert(diff);
	 } else {
		Bn_init = 0.0;
	 }
	 //	 Bn_init = 0.0;

	 // If the bound for i rounds is better than the bound for (i - 1) rounds -- start the search again from round 1
	 uint32_t i = nrounds - 1;
	 if(i > 0) {
		if(B[i-1] < B[i]) {
		  nrounds = 0;
		  Bn_init = 0.0;
		  for(int j = 0; j < NROUNDS; j++) {
			 B[j] = 0.0;
		  }
		  printf("[%s:%d] Start again from round 1\n", __FILE__, __LINE__);
		}
	 }
  } while((nrounds < NROUNDS) && ((B[nrounds - 1] != 0.0) || (nrounds == 0) ) && (B[nrounds - 1] > p_rand));
	 //  } // for(int nrounds = 1 ...

  printf("[%s:%d] nrounds = %d\n", __FILE__, __LINE__, nrounds);
  assert(nrounds <= NROUNDS);

  num_rounds = nrounds;
  tea_add_verify_trail(num_rounds, npairs, key, trail);
  tea_add_verify_differential(num_rounds, npairs, key, trail);

#if 1									  // PATCH
  double BB[NROUNDS] = {0.0};				  // copy original bounds
  differential_t ttrail[NROUNDS] = {{0, 0, 0, 0.0}};  // copy original differential trail

  printf("[%s:%d] Final bounds:\n", __FILE__, __LINE__);
  for(uint32_t i = 0; i < num_rounds; i++) {
	 BB[i] = B[i];
	 ttrail[i] = trail[i];
	 printf("B[%2d] 2^%f\n", i, log2(B[i]));
  }

  for(int i = 0; i < NROUNDS; i++) {
	 trail[i].dx = 0;
	 trail[i].dy = 0;
	 trail[i].p = 0.0;
  }

  // re-init DDTs
#if 1
  diff_set_dx_dy.clear();
  diff_mset_p.clear();			  // re-init
  tea_f_add_pddt(word_size, p_thres, lsh_const, rsh_const, &diff_set_dx_dy);
  tea_f_add_pddt_adjust_to_key(num_rounds, npairs, key, p_thres, &diff_set_dx_dy);
  tea_f_add_pddt_dxy_to_dp(&diff_mset_p, diff_set_dx_dy);
  printf("Initial set sizes: Dp %d, Dxy %d\n", diff_mset_p.size(), diff_set_dx_dy.size());
#endif

  //  uint32_t N = nrounds - 1;
  printf("[%s:%d] num_rounds for second pass: %d\n", __FILE__, __LINE__, num_rounds);

  // SECOND ROUND SEARCH
  double scale_fact = 1.0;
  for(uint32_t nrounds = 1; nrounds <= num_rounds; nrounds++ ) {

#if 0
	 if(nrounds > 7) {
		scale_fact = 0.25;
	 }
	 if(nrounds > 12) {
		scale_fact = 0.01;
	 }
#endif
	 double Bn = BB[nrounds - 1] * scale_fact; // !!!
	 int r = 0;		  // initial round

	 printf("[%s:%d] nrounds = %d, Bn_init = 2^%f (B[%d] = 2^%f) : key %8X %8X %8X %8X\n", __FILE__, __LINE__, nrounds, log2(Bn), nrounds - 1, log2(B[nrounds - 1]), key[0], key[1], key[2], key[3]);

	 // init diffs
	 for(int i = 0; i < NROUNDS; i++) {
		diff[i].dx = 0;
		diff[i].dy = 0;
		diff[i].p = 0.0;
	 }

	 tea_add_threshold_search_full(r, nrounds, npairs, key, A, B, &Bn, diff, trail, lsh_const, rsh_const, &diff_mset_p, &diff_set_dx_dy);

	 //	 assert(B[nrounds - 1] == Bn);

#if 1									  // DEBUG
	 for(uint32_t i = 0; i < nrounds; i++) {
		printf("B[%2d] = 2^%f", i, log2(B[i]));
		if(i > 0) {
		  if(B[i-1] < B[i]) {
			 printf(" <-");
		  }
		}
		printf("\n");
	 }
	 printf("pDDT sizes: Dp %d, Dxy %d\n", diff_mset_p.size(), diff_set_dx_dy.size());
#endif
#if 1									  // DEBUG
	 double p_tot = 1.0;
	 for(uint32_t i = 0; i < nrounds; i++) {
		printf("%2d: %8X <- %8X %f (2^%f)\n", i, trail[i].dy, trail[i].dx, trail[i].p, log2(trail[i].p));
		p_tot *= trail[i].p;
	 }
	 printf("p_tot = %16.15f = 2^%f, Bn = %f = 2^%f\n", p_tot, log2(p_tot), Bn, log2(Bn));
#endif  // #if 0									  // DEBUG
#if 1	  // VERIFY
	 if(nrounds >=3) {
		for(uint32_t i = (nrounds - 1); i >= 2; i--) {
		  if(trail[i].p != 0.0) {
			 assert(trail[i].dx == ADD(trail[i - 2].dx, trail[i - 1].dy));
		  }
		}
	 }
#endif  // #if 1	  // VERIFY

	 // If the bound for i rounds is better than the bound for (i - 1) rounds -- start the search again from round 1
	 uint32_t i = nrounds - 1;
	 if(i > 0) {
		//		if((B[i-1] < B[i]) || (trail[i].p == 0.0)) {
		if((B[i-1] < B[i]) || (scale_fact < 0.00005)) {
		  nrounds = 0;
		  Bn_init = 0.0;
		  for(int j = 0; j < NROUNDS; j++) { // copy the original bounds
			 B[j] = BB[j];
			 trail[j].dx = 0;
			 trail[j].dy = 0;
			 trail[j].p = 0;
		  }
		  printf("[%s:%d] Start again from round 1: trail[%d].p = 2^%f\n", __FILE__, __LINE__, i, log2(trail[i].p));
		} else {
		  if(trail[i].p == 0) {
			 nrounds -= 1;
			 scale_fact *= 0.5;
			 for(int j = 0; j < NROUNDS; j++) { // copy the original bounds
				B[j] = BB[j];
				//				trail[j].dx = 0;
				//				trail[j].dy = 0;
				//				trail[j].p = 0;
			 }
			 printf("[%s:%d] Start again from round %d: scale_fact = %f\n", __FILE__, __LINE__, i, scale_fact);
		  } else {
			 if(scale_fact < 1.0) {
				scale_fact = 1.0;
			 }
		  }
		}
	 }

  } // 2-nd round search

  //  for(uint32_t i = 0; i < NROUNDS; i++) {
  for(uint32_t i = 0; i < num_rounds; i++) {
	 printf("%2d: %8X <- %8X (2^%f) | ", i, ttrail[i].dy, ttrail[i].dx, log2(ttrail[i].p));
	 printf("%8X <- %8X (2^%f)\n", trail[i].dy, trail[i].dx, log2(trail[i].p));
  }
  //  printf("[%s:%d] BB[%2d] 2^%f, B[%2d] 2^%f\n", __FILE__, __LINE__, NROUNDS-1, log2(BB[NROUNDS - 1]), NROUNDS-1, log2(B[NROUNDS - 1]));
  printf("[%s:%d] BB[%2d] 2^%f, B[%2d] 2^%f\n", __FILE__, __LINE__, num_rounds-1, log2(BB[num_rounds - 1]), num_rounds-1, log2(B[num_rounds - 1]));

#endif  // #if 1 // PATCH

  adp_xor3_free_matrices(A);
}

/* --- */

void tea_add_trail_search(uint32_t key[4])
{
  uint32_t lsh_const = TEA_LSH_CONST; 
  uint32_t rsh_const = TEA_RSH_CONST;
  double p_thres = TEA_ADD_P_THRES;
  uint32_t word_size = WORD_SIZE;
  uint32_t npairs = NPAIRS;
  uint32_t num_rounds = NROUNDS;

  gsl_matrix* A[2][2][2][2];	  // matrices to compute ADP
  differential_t diff[NROUNDS];	  // arrey of differences
  differential_t trail[NROUNDS];  // a differential trail
  double B[NROUNDS];				  // arey of bounds

  // init matrices
  adp_xor3_alloc_matrices(A);
  adp_xor3_sf(A);
  adp_xor3_normalize_matrices(A);

  // init bounds
  for(int i = 0; i < NROUNDS; i++) {
	 B[i] = 0.0;
  }

  std::set<differential_t, struct_comp_diff_dx_dy> diff_set_dx_dy; // Dxy
  std::multiset<differential_t, struct_comp_diff_p> diff_mset_p;	 // Dp

  tea_f_add_pddt(word_size, p_thres, lsh_const, rsh_const, &diff_set_dx_dy);
#if 0									  // DEBUG
  printf("[%s:%d] Dxy before adjust key\n", __FILE__, __LINE__);
  print_set(diff_set_dx_dy);
#endif

#if 1
  tea_f_add_pddt_adjust_to_key(num_rounds, npairs, key, p_thres, &diff_set_dx_dy);
#endif
#if 0									  // DEBUG
  printf("[%s:%d] Dxy after adjust key, p_thres = %f 2^%f\n", __FILE__, __LINE__, p_thres, log2(p_thres));
  print_set(diff_set_dx_dy);
#endif

  tea_f_add_pddt_dxy_to_dp(&diff_mset_p, diff_set_dx_dy);
#if 0									  // DEBUG
  printf("[%s:%d] Dp , p_thres = %f 2^%f\n", __FILE__, __LINE__, p_thres, log2(p_thres));
  print_mset(diff_mset_p);
#endif

  printf("Initial set sizes: Dp %d, Dxy %d\n", diff_mset_p.size(), diff_set_dx_dy.size());
  assert(diff_set_dx_dy.size() == diff_mset_p.size());

  double Bn_init = 0.0;

  for(uint32_t nrounds = 1; nrounds <= NROUNDS; nrounds++ ) {

	 printf("[%s:%d] nrounds = %d, Bn_init = 2^%f : key %8X %8X %8X %8X\n", __FILE__, __LINE__, nrounds, log2(Bn_init), key[0], key[1], key[2], key[3]);
	 double Bn = Bn_init;
	 B[nrounds - 1] = Bn_init;
	 int r = 0;						  // initial round

	 // init diffs
	 for(int i = 0; i < NROUNDS; i++) {
		diff[i].dx = 0;
		diff[i].dy = 0;
		diff[i].p = 0.0;
	 }

	 tea_add_threshold_search(r, nrounds, npairs, key, A, B, &Bn, diff, trail, lsh_const, rsh_const, &diff_mset_p, &diff_set_dx_dy);

	 assert(B[nrounds - 1] == Bn);

#if 1									  // DEBUG
	 for(uint32_t i = 0; i < nrounds; i++) {
		printf("B[%2d] = 2^%f", i, log2(B[i]));
		if(i > 0) {
		  if(B[i-1] < B[i]) {
			 printf(" <-");
		  }
		}
		printf("\n");
	 }
	 printf("pDDT sizes: Dp %d, Dxy %d\n", diff_mset_p.size(), diff_set_dx_dy.size());
#endif
#if 1									  // DEBUG
	 double p_tot = 1.0;
	 for(uint32_t i = 0; i < nrounds; i++) {
		printf("%2d: %8X <- %8X %f (2^%f)\n", i, trail[i].dy, trail[i].dx, trail[i].p, log2(trail[i].p));
		p_tot *= trail[i].p;
	 }
	 printf("p_tot = %16.15f = 2^%f, Bn = %f = 2^%f\n", p_tot, log2(p_tot), Bn, log2(Bn));
#endif  // #if 0									  // DEBUG
#if 1	  // VERIFY
	 if(nrounds >=3) {
		for(uint32_t i = (nrounds - 1); i >= 2; i--) {
		  assert(trail[i].dx == ADD(trail[i - 2].dx, trail[i - 1].dy));
		}
	 }
#endif  // #if 1	  // VERIFY

	 // Compute an initial bound for the next round
	 uint32_t next_round = nrounds;
	 if((next_round >= 2) && (next_round < NROUNDS)) {
		uint32_t dx = ADD(trail[next_round - 2].dx, trail[next_round - 1].dy);
		uint32_t dy = 0;
		double p = 0.0;

		max_eadp_tea_f(A, dx, &dy, &p, lsh_const, rsh_const); // max_dy eadp_tea_f
		p = tea_add_diff_adjust_to_key(npairs, next_round, dx, dy, key); // adjust the probability to the fixed key
		if(p == 0.0) {
		  p = nz_eadp_tea_f(A, 0.0, dx, &dy); // just get an arbitrary non-zero dy
		  p = tea_add_diff_adjust_to_key(npairs, next_round, dx, dy, key); // adjust the probability to the fixed key
		}
		//		assert(p != 0.0);

		Bn_init = B[next_round - 1] * p;
		B[next_round] = Bn_init;

		//		printf("[%s:%d] Set B[%d] = 2^%f\n", __FILE__, __LINE__, next_round, log2(Bn_init));

		trail[next_round].dx = dx;
		trail[next_round].dy = dy;
		trail[next_round].p = p;

		differential_t diff;
		diff.dx = dx;
		diff.dy = dy;
		diff.p = p;
		diff_set_dx_dy.insert(diff);
		diff_mset_p.insert(diff);
	 } else {
		Bn_init = 0.0;
	 }

	 // If the bound for i rounds is better than the bound for (i - 1) rounds -- start the search again from round 1
	 uint32_t i = nrounds - 1;
	 if(i > 0) {
		if(B[i-1] < B[i]) {
		  nrounds = 0;
		  Bn_init = 0.0;
		  for(int j = 0; j < NROUNDS; j++) {
			 B[j] = 0.0;
		  }
		  printf("[%s:%d] Start again from round 1\n", __FILE__, __LINE__);
		}
	 }

  } // for(int nrounds = 1 ...

  tea_add_verify_trail(num_rounds, npairs, key, trail);
  tea_add_verify_differential(num_rounds, npairs, key, trail);
  adp_xor3_free_matrices(A);
}


/* --- */

	 bool b_none_found = false;
#if 1	  // VERIFY
	 if(nrounds >=3) {
		for(uint32_t i = (nrounds - 1); i >= 2; i--) {
		  if(trail[i].dx != ADD(trail[i - 2].dx, trail[i - 1].dy)) {
			 b_none_found = true;
		  }
		  //		  assert(trail[i].dx == ADD(trail[i - 2].dx, trail[i - 1].dy));
		}
	 }
#endif  // #if 1	  // VERIFY

	 // If the bound for i rounds is better than the bound for (i - 1) rounds -- start the search again from round 1
	 uint32_t i = nrounds - 1;
	 if(i > 0) {
		//		if((B[i-1] < B[i]) || (b_none_found == true) || (trail[i].p == 0.0)) {
		if((B[i-1] < B[i]) || (trail[i].p == 0.0)) {
		  nrounds = 0;
		  Bn_init = 0.0;
		  for(int j = 0; j < NROUNDS; j++) { // copy the original bounds
			 B[j] = BB[j];
		  }
		  printf("[%s:%d] Start again from round 1: trail[%d].p = 2^%f, b_none_found = %d\n", __FILE__, __LINE__, i, log2(trail[i].p), b_none_found);
		}
	 }


/* --- */


First round:

B[ 0] = 2^0.000000
B[ 1] = 2^-1.018254
B[ 2] = 2^-2.055810
B[ 3] = 2^-5.361799
B[ 4] = 2^-11.098062
B[ 5] = 2^-16.388564
B[ 6] = 2^-24.567849
B[ 7] = 2^-34.117833
B[ 8] = 2^-38.116154
B[ 9] = 2^-43.146782
B[10] = 2^-49.195497
B[11] = 2^-56.622590
B[12] = 2^-60.524357
B[13] = 2^-64.153125
	  pDDT sizes: Dp 70, Dxy 1062
	  0:        0 <-        0 1.000000 (2^0.000000)
	  1:       11 <-        1 0.078339 (2^-3.674132)
	  2:        0 <-       11 0.000122 (2^-13.000000)
	  3: FFFFFFEF <-        1 0.141754 (2^-2.818537)
	  4:        0 <-        0 1.000000 (2^0.000000)
	  5:       11 <-        1 0.079132 (2^-3.659594)
	  6:        0 <-       11 0.000061 (2^-14.000000)
	  7: FFFFFFEF <-        1 0.136230 (2^-2.875879)
	  8:        0 <-        0 1.000000 (2^0.000000)
	  9:       11 <-        1 0.078979 (2^-3.662378)
	  10:        0 <-       11 0.000061 (2^-14.000000)
	  11: FFFFFFEF <-        1 0.140259 (2^-2.833837)
	  12:        0 <-        0 1.000000 (2^0.000000)
	  13: FFFFFFF1 <-        1 0.080841 (2^-3.628768)
	  p_tot = 0.000000000000000 = 2^-64.153125, Bn = 0.000000 = 2^-64.153125



Second round:

B[ 1] = 2^-1.025227
B[ 2] = 2^-2.956967
B[ 3] = 2^-5.357931
B[ 4] = 2^-11.086067
B[ 5] = 2^-16.339391
B[ 6] = 2^-24.574305
B[ 7] = 2^-31.805116
B[ 8] = 2^-37.241295
B[ 9] = 2^-42.073683
B[10] = 2^-49.707038
B[11] = 2^-56.045881
B[12] = 2^-60.944141
B[13] = 2^-65.315758
	  pDDT sizes: Dp 71, Dxy 1100
	  0:       1E <-        2 0.134735 (2^-2.891802)
	  1:        F <-        1 0.081085 (2^-3.624417)
	  2:        0 <-       11 0.000397 (2^-11.299560)
	  3: FFFFFFEF <-        1 0.139343 (2^-2.843285)
	  4:        0 <-        0 1.000000 (2^0.000000)
	  5:       11 <-        1 0.080109 (2^-3.641898)
	  6:        0 <-       11 0.000061 (2^-14.000000)
	  7: FFFFFFEF <-        1 0.133911 (2^-2.900652)
	  8:        0 <-        0 1.000000 (2^0.000000)
	  9:       11 <-        1 0.082184 (2^-3.605001)
	  10:        0 <-       11 0.000061 (2^-14.000000)
	  11: FFFFFFEF <-        1 0.139709 (2^-2.839498)
	  12:        0 <-        0 1.000000 (2^0.000000)
	  13: FFFFFFF1 <-        1 0.078583 (2^-3.669643)
	  p_tot = 0.000000000000000 = 2^-65.315758, Bn = 0.000000 = 2^-65.315758


/* --- */
[./src/tea-add-threshold-search.cc:893] Final bounds:
B[ 0] 2^0.000000
B[ 1] 2^-1.013003
B[ 2] 2^-2.051010
B[ 3] 2^-5.326119
B[ 4] 2^-11.092500
B[ 5] 2^-16.420890
B[ 6] 2^-24.819300
B[ 7] 2^-33.665341
B[ 8] 2^-37.764419
B[ 9] 2^-42.717709
B[10] 2^-50.842433
B[11] 2^-55.295289
B[12] 2^-59.329377
B[13] 2^-62.969627
[./src/tea-add-threshold-search.cc:910] nrounds = 1, Bn_init = 2^-2.000000 : key E028DF9A 8819B4C3 3AB116AF  3C50723
B

/* --- */

B[ 0] 2^0.000000
B[ 1] 2^-1.014158
B[ 2] 2^-2.040550
B[ 3] 2^-5.386258
B[ 4] 2^-11.121815
B[ 5] 2^-16.419670
B[ 6] 2^-24.497897
B[ 7] 2^-32.064099
B[ 8] 2^-38.278724
B[ 9] 2^-43.208812
B[10] 2^-48.800461
B[11] 2^-56.974479
B[12] 2^-57.280305
B[13] 2^-60.896061


/* --- */

[ 0] 1.000000 (2^ 0.000) [ 1] 1.000000 (2^ 0.000) [ 2] 1.000000 (2^ 0.000) [ 3] 1.000000 (2^ 0.000)
[ 4] 1.000000 (2^ 0.000) [ 5] 1.000000 (2^ 0.000) [ 6] 1.000000 (2^ 0.000) [ 7] 1.000000 (2^ 0.000)
[ 8] 1.000000 (2^ 0.000) [ 9] 1.000000 (2^ 0.000) [10] 1.000000 (2^ 0.000) [11] 0.118164 (2^-3.081)
[12] 1.000000 (2^ 0.000) [13] 1.000000 (2^ 0.000) [14] 1.000000 (2^ 0.000) [15] 1.000000 (2^ 0.000)
[./tests/salsa-tests.cc:212] PW_exp vs. P_rand:
[ 0]       X (2^ 0.000) [ 1]       X (2^ 0.000) [ 2]       X (2^ 0.000) [ 3]       X (2^ 0.000)
[ 4]       X (2^ 0.000) [ 5]       X (2^ 0.000) [ 6]       X (2^ 0.000) [ 7]       X (2^ 0.000)
[ 8]       X (2^ 0.000) [ 9]       X (2^ 0.000) [10]       X (2^ 0.000) [11]       X (2^-3.000)
[12]       X (2^ 0.000) [13]       X (2^ 0.000) [14]       X (2^ 0.000) [15]       X (2^ 0.000)
[./tests/salsa-tests.cc:214] p = 0.000000 (2^-104.016596), p = 0.118164 (2^-3.081137)
[./tests/salsa-tests.cc:216] S:        0        0       73       28
[./tests/salsa-tests.cc:210] PW_exp:
[ 0] 0.121826 (2^-3.037) [ 1] 0.124512 (2^-3.006) [ 2] 0.034668 (2^-4.850) [ 3] 0.007812 (2^-7.000)
[ 4] 0.250000 (2^-2.000) [ 5] 0.033203 (2^-4.913) [ 6] 0.250977 (2^-1.994) [ 7] 0.062500 (2^-4.000)
[ 8] 0.120361 (2^-3.055) [ 9] 0.251953 (2^-1.989) [10] 1.000000 (2^ 0.000) [11] 0.009277 (2^-6.752)
[12] 0.505371 (2^-0.985) [13] 0.508057 (2^-0.977) [14] 1.000000 (2^ 0.000) [15] 1.000000 (2^ 0.000)
[./tests/salsa-tests.cc:212] PW_exp vs. P_rand:
[ 0]       X (2^-3.000) [ 1]       X (2^-3.000) [ 2]       X (2^-5.000) [ 3]       X (2^-7.000)
[ 4]       X (2^-2.000) [ 5]       X (2^-5.000) [ 6]       X (2^-2.000) [ 7]       X (2^-4.000)
salsa-tests: ./src/salsa.cc:492: void salsa_print_prob_vs_rand(double*, double*): Assertion `0 == 1' failed.
[ 8]       X (2^-3.000) [ 9]       X (2^-2.000) [10]       X (2^ 0.000) [11] 2^-6.752 (2^-8.000) Aborted
vpv@mazirat:~/skcrypto/trunk/work/src/yaarx$ 


/* --- */

		//		double eps = (2 * P[i]) - 1.0;
		//		double eps = 0.5 - P[i];
		//		if(P[i] > 0.5) {
		//		  eps = P[i] - 0.5;
		//		}
		//		printf("[%2d] %6.3f (2^%6.3f) ", i, eps, log2(eps));

/* ---- */

[./tests/salsa-tests.cc:184] PW_the:
[ 0] 0.000000 (2^-37.438) [ 1] 0.000000 (2^-37.708) [ 2] 0.000000 (2^-37.930) [ 3] 0.000000 (2^-40.293)
[ 4] 0.000000 (2^-49.086) [ 5] 0.000000 (2^-51.256) [ 6] 0.000000 (2^-43.802) [ 7] 0.000000 (2^-52.359)
[ 8] 0.000000 (2^-31.563) [ 9] 0.000000 (2^-31.438) [10] 0.000000 (2^-26.956) [11] 0.000000 (2^-32.978)
[12] 0.000000 (2^-30.086) [13] 0.000000 (2^-30.086) [14] 0.000000 (2^-27.823) [15] 0.000000 (2^-34.086)
[./tests/salsa-tests.cc:187] PW_exp:
[ 0] 0.124805 (2^-3.002) [ 1] 0.000512 (2^-10.931) [ 2] 0.001894 (2^-9.045) [ 3] 0.001013 (2^-9.947)
[ 4] 0.000246 (2^-11.992) [ 5] 0.000496 (2^-10.976) [ 6] 0.000032 (2^-14.934) [ 7] 0.000032 (2^-14.934)
[ 8] 0.000479 (2^-11.028) [ 9] 0.001968 (2^-8.989) [10] 0.000992 (2^-9.977) [11] 0.000131 (2^-12.902)
[12] 0.000064 (2^-13.923) [13] 0.000025 (2^-15.272) [14] 0.000248 (2^-11.975) [15] 0.001988 (2^-8.974)
[
	  [./tests/salsa-tests.cc:189] PW_exp vs. P_rand:
	  [ 0]       X (2^-3) [ 1]       X (2^-11) [ 2]       X (2^-9) [ 3]       X (2^-10)
	  [ 4]       X (2^-12) [ 5]       X (2^-11) [ 6]       X (2^-15) [ 7]       X (2^-15)
	  [ 8]       X (2^-11) [ 9]       X (2^-9) [10]       X (2^-10) [11]       X (2^-13)
	  [12]       X (2^-14) [13]       X (2^-15) [14]       X (2^-12) [15]       X (2^-9)
[

	  [./tests/salsa-tests.cc:184] PW_the:
	  [ 0] 0.000000 (2^-42.000) [ 1] 0.000000 (2^-38.000) [ 2] 0.000000 (2^-40.000) [ 3] 0.000000 (2^-45.000)
	  [ 4] 0.000000 (2^-61.000) [ 5] 0.000000 (2^-53.000) [ 6] 0.000000 (2^-56.000) [ 7] 0.000000 (2^-67.000)
	  [ 8] 0.000000 (2^-42.000) [ 9] 0.000000 (2^-42.000) [10] 0.000000 (2^-40.000) [11] 0.000000 (2^-47.000)
	  [12] 0.000000 (2^-35.000) [13] 0.000000 (2^-32.000) [14] 0.000000 (2^-29.000) [15] 0.000000 (2^-36.000)
	  [./tests/salsa-tests.cc:187] PW_exp:
	  [ 0] 0.000017 (2^-15.830) [ 1] 0.000019 (2^-15.715) [ 2] 0.000016 (2^-15.956) [ 3] 0.000016 (2^-15.913)
	  [ 4] 0.000015 (2^-16.000) [ 5] 0.000013 (2^-16.193) [ 6] 0.000020 (2^-15.608) [ 7] 0.000015 (2^-16.000)
	  [ 8] 0.000016 (2^-15.913) [ 9] 0.000016 (2^-15.913) [10] 0.000013 (2^-16.193) [11] 0.000012 (2^-16.300)
	  [12] 0.000014 (2^-16.142) [13] 0.000014 (2^-16.093) [14] 0.000014 (2^-16.142) [15] 0.000015 (2^-16.000)
[
/* --- */

  //  S[9] = random32() & MASK;//1U << (WORD_SIZE);
  uint32_t i_w = random32() % 4; // random index
#if 0
  //  i_w = 9;						  // Crowley
  i_w = 7;							  // Aumasson et al.
  S[9] = 1U << (WORD_SIZE - 1);
#else
  i_w += 6;
  assert((i_w == 6) || (i_w == 7) || (i_w == 8) || (i_w == 9));
  S[i_w] = gen_sparse(1, WORD_SIZE); // set 1 bit difference at random position
#endif

/* --- */

  // random32() & MASK;
#if 0
  S[6] = gen_sparse(8, WORD_SIZE);
  S[7] = gen_sparse(8, WORD_SIZE);
  S[8] = gen_sparse(8, WORD_SIZE);
  S[9] = gen_sparse(8, WORD_SIZE);
#endif


/* --- */
  // WARNING!!! This results in sub-optimal probability,
  // but improves the efficiency.
  //  if((WORD_SIZE == 32)) {
  //	 double p_thres = 1.0/(double)(1ULL << 15);
  //	 if(*r_max >= p_thres)
  //		return;
  //  }



/* --- */

void salsa_gen_word_deps(const uint32_t nrounds, 
								 const uint32_t e[SALSA_STATE + SALSA_STATE][5], 
								 uint32_t dep[SALSA_STATE][MAX_NROUNDS])
{  
  // initialize the dep array to 0
  for(uint32_t i = 0; i < SALSA_STATE; i++) {
	 for(uint32_t s = 0; s < MAX_NROUNDS; s++) {
		dep[i][s] = 0;
	 }
  }
  
  for(uint32_t r = 0; r < MAX_NROUNDS; r++) {
	 // i is index in the array e
	 // it points either to entries 0,1,..,15
	 // or to entries 16,17,...,31 depending
	 // on weather r is even or odd (resp. weather 
	 // we have column round or row round)
	 for(uint32_t i = 0; i < SALSA_STATE; i++) {
		// Copy a row from the array e. If r is even 
		// (r & 1 == 0) it means we have column round
		// (entries 0 to 15 of e[]) so 
		// we copy the i-th row from the array e[]. If
		// r is odd (r & 1 == 1) it means we have a row
		// round (entries 16 to 31 of e[]) so we copy
		// the (i+16)-th row of e[].
		// In summary f contains one row of e[]
		const uint32_t* const f = e[(r & 1) ? (i + 16) : i];		  
		// update the dependencies of the f[0]-th word:
		// the new dependency of the new word f[0]
		// is a composition of the dependencies so far 
		// (ie. up to round r) of the words
		// which participate it its computation ie. words
		// f[1],f[2],f[3] (according to the salsa round function
		// f[0] = f[1] ^ ((f[2]+f[3]) <<< const) ).
		// in our bit representation "composition" is be expressed
		// as a bitwise OR |
		for (int s = 0; s <= r ; ++s) { // vpv
		  //for (int s = 0; s < MAX_NROUNDS; ++s) {
		  dep[f[0]][s] = 
			 dep[f[1]][s] |
			 dep[f[2]][s] |
			 dep[f[3]][s];

		  //printf("round# %d, word[%d], dep[%d] ", r, f[0], s);
		  //print_bits32(dep[f[0]][s]);
		  //printf("\n");
		}
		// word f[0] of course depends also on the addition which
		// participates in the calculation of f[0]. according
		// to our enumeration rules this addition has the same
		// index as f[0]. we store this dependency by setting 
		// the f[0]-th bit of the 16-bit dependency word
		dep[f[0]][r] |= 1 << f[0]; //!!! 20091206 vpv
	 }
  }  
  // test statistics
#if 0
  // counts over words
  for (int i = 0; i < SALSA_STATE; ++i)
	 {
		printf("word %d after round %d depends on:\n", i, MAX_NROUNDS - 1);
		// counts over dependencies
		for (int s = 0; s < MAX_NROUNDS; ++s)
		  //printf("dep[%d][%d]=0x%08x\n", i, s, dep[i][s]);
		  // counts over bits within one dependency word
		  // (we use only the 16 lsb bits of each dep word)
		  for (int j = 0; j < WORD_SIZE/2; ++j)
			 if ((dep[i][s] >> j) & 1)
				printf("  addition %d of round %d\n", j, s);
	 }
#endif  // #if 0
}

/* --- */

/*

- Salsa 5 rounds, 2 stars max, 45 min.

[./tests/salsa-tests.cc:189] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF
	  [./tests/salsa-tests.cc:154] S[ 9] 80000000
	  [./src/salsa.cc:127] round# 0 / 4
	  [./src/salsa.cc:127] round# 1 / 4
	  [./src/salsa.cc:127] round# 2 / 4
	  [./src/salsa.cc:127] round# 3 / 4
	  [./src/salsa.cc:127] round# 4 / 4
R[-1]
	  [ 0]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 1]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 2]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 3]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 4]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 5]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 6]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 7]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 8]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 9] 80000000 10000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [10]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [11]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [12]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [13]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [14]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [15]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |

R[ 0]
	  [ 0]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 1]   201000 00000000001000000001000000000000 | 0.500000 (2^-1.000000) |
	  [ 2]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 3]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 4]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 5] 44000080 01000100000000000000000010000000 | 0.125000 (2^-3.000000) |
	  [ 6]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 7]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 8]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [ 9] 80000000 10000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [10]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [11]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [12]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [13]      100 00000000000000000000000100000000 | 1.000000 (2^0.000000) |
	  [14]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [15]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |

R[ 1]
	  [ 0] 20010880 00100000000000*10000100010000000 | 0.093750 (2^-3.415037) |
	  [ 1]   201000 00000000001000000001000000000000 | 1.000000 (2^0.000000) |
	  [ 2] 40200000 01000000001000000000000000000000 | 0.250000 (2^-2.000000) |
	  [ 3]  2000800 0000001000000000000*100000000000 | 0.250000 (2^-2.000000) |
	  [ 4] 20954010 00100000100101010100000000010000 | 0.002197 (2^-8.830075) |
	  [ 5] 562080D0 010101100*1000001000000011010000 | 0.001465 (2^-9.415037) |
	  [ 6]     4022 0000000000000000010000000*100010 | 0.250000 (2^-2.000000) |
	  [ 7]   814488 00000000100000010100010*10001000 | 0.023438 (2^-5.415037) |
	  [ 8]     8000 00000000000000001000000000000000 | 0.500000 (2^-1.000000) |
	  [ 9] 90080000 10010000000010000000000000000000 | 0.250000 (2^-2.000000) |
	  [10]    24022 00000000000000100100000000100010 | 0.125000 (2^-3.000000) |
	  [11]       40 00000000000000000000000001000000 | 1.000000 (2^0.000000) |
	  [12]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
	  [13]      100 00000000000000000000000100000000 | 1.000000 (2^0.000000) |
	  [14]   200000 00000000001000000000000000000000 | 0.500000 (2^-1.000000) |
	  [15]  4000080 00000100000000000000000010000000 | 0.250000 (2^-2.000000) |

R[ 2]
	  [ 0] 200148C2 00100000000000*1*100100011000*10 | 0.125000 (2^-3.000000) |
	  [ 1]  114BA22 0000000100*1010*1011101000100010 | 0.000057 (2^-14.093109) |
	  [ 2]  4A04402 00000100101000000100010000000010 | 0.015625 (2^-6.000000) |
	  [ 3]  2002802 00000010000000000*1*100000000010 | 0.250000 (2^-2.000000) |
	  [ 4] 20110000 00100000000100010000000000000000 | 0.046875 (2^-4.415037) |
	  [ 5] 70A981C2 01110*001*1*10011000000111000010 | 0.000051 (2^-14.245112) |
	  [ 6]  8800092 0000100010000000000000001**10010 | 0.015625 (2^-6.000000) |
	  [ 7]   D04084 00000000110100000100000*1000*100 | 0.017578 (2^-5.830075) |
	  [ 8] 20118000 00100000000100011000000000000000 | 0.023438 (2^-5.415037) |
	  [ 9] 90402829 10010000010000000*101000001010*1 | 0.002930 (2^-8.415037) |
	  [10] 124250A3 000100100100001001*1000010100011 | 0.001709 (2^-9.192645) |
	  [11]  510400A 0000010100010000*1000000000*1010 | 0.001648 (2^-9.245112) |
	  [12] 10000000 00010000000000000000000000000000 | 0.062500 (2^-4.000000) |
	  [13] 41501384 01000001010100000**1001110000100 | 0.000069 (2^-13.830075) |
	  [14]  1200000 00000001001000000000000000000000 | 0.046875 (2^-4.415037) |
	  [15]  6081180 00000110000*1000000100*110000000 | 0.001648 (2^-9.245112) |

R[ 3]
	  [ 0] 2905424A 001*10*1000001*1*100001001001*10 | 0.000028 (2^-15.129635) |
	  [ 1]  1849A33 0000000110*0010*1*01101000110011 | 0.001236 (2^-9.660150) |
	  [ 2]  CA42643 0000110010100100**10011001000011 | 0.000009 (2^-16.830075) |
	  [ 3]  2022882 000000100000**100*1*100010000010 | 0.000006 (2^-17.415037) |
	  [ 4] 3A02A28A 00111*10000000101*10001010001010 | 0.000002 (2^-19.245112) |
	  [ 5] 3A25E589 00111*100*1**1011110010110001001 | 0.000009 (2^-16.830075) |
	  [ 6] 44C0218A 0100010011000000**1000011**01010 | 0.000549 (2^-10.830075) |
	  [ 7] D1179023 110100010001011110*1000*0010**11 | 0.000039 (2^-14.660150) |
	  [ 8] A4B85024 101001001011100**101000000100100 | 0.000023 (2^-15.437758) |
	  [ 9] 1AC41C38 0001101011000100**011100001110*0 | 0.000033 (2^-14.907243) |
	  [10] C21058B3 110000100**1000001*1100010110011 | 0.000002 (2^-19.299560) |
	  [11]  404044B 00000100000**100*0000100010*1011 | 0.000488 (2^-11.000000) |
	  [12] 84084000 1000010000001000*10000000000000* | 0.008789 (2^-6.830075) |
	  [13] 41F11280 01000001111100*10**1001010000000 | 0.003296 (2^-8.245112) |
	  [14] 23700881 00100*110111000000001000100000*1 | 0.000069 (2^-13.830075) |
	  [15] 2E0C9404 0*101110000*110*100101*000000100 | 0.000360 (2^-11.437758) |

R[ 4]
	  [ 0] A811013A 101*10*000010**1*000000100111*10 | 0.000003 (2^-18.508147) |
	  [ 1] 808A3B63 1000000*10**101*0*11101101100011 | 0.000003 (2^-18.490225) |
	  [ 2]  55D04D1 0*00010101011101**00010011010001 | 0.000011 (2^-16.508147) |
	  [ 3]  2020811 000000100000**100*0*10**00010001 | 0.000032 (2^-14.923184) |
	  [ 4] B88387D8 10111*00100000111*00011111011*00 | 0.000097 (2^-13.338222) |
	  [ 5] BA84E78A 10111*101*0**1001110011110001010 | 0.000003 (2^-18.148251) |
	  [ 6] 60CA0110 01100000110*1010**0000010**1000* | 0.000011 (2^-16.437758) |
	  [ 7] D46F9A63 1101*100011*111110*1101*0110**11 | 0.000011 (2^-16.490225) |
	  [ 8] A3BC4CC5 10100011101111***100110011000101 | 0.000038 (2^-14.700792) |
	  [ 9] 5AA4103C 0101101010100100**01*00*001111*0 | 0.000000 (2^-23.245112) |
	  [10] D393C957 11010*111**10*1111*0100101010111 | 0.000006 (2^-17.370643) |
	  [11] 14020E0F 00010100000***10**001110000*1111 | 0.000000 (2^-22.215365) |
	  [12] F4982100 111101001*0110*0*01000010000000* | 0.000001 (2^-20.923184) |
	  [13] 401808B9 010000000*0110*00**0100010111001 | 0.000002 (2^-18.630403) |
	  [14] 1B746400 00011*1101110100011**100000000*0 | 0.000017 (2^-15.830075) |
	  [15]  E84947F 0*001110100**10*100101*001111111 | 0.000001 (2^-20.660150) |

	  [ 0] 0.000000 (2^  -inf) [ 1] 0.000000 (2^  -inf) [ 2] 0.000000 (2^  -inf) [ 3] 0.000000 (2^  -inf)
	  [ 4] 0.000000 (2^  -inf) [ 5] 0.000000 (2^  -inf) [ 6] 0.000000 (2^  -inf) [ 7] 0.000000 (2^  -inf)
	  [ 8] 0.000000 (2^  -inf) [ 9] 0.000000 (2^  -inf) [10] 0.000000 (2^  -inf) [11] 0.000000 (2^  -inf)
	  [12] 0.000000 (2^  -inf) [13] 0.000000 (2^  -inf) [14] 0.000000 (2^  -inf) [15] 0.000000 (2^  -inf)
	  [./tests/salsa-tests.cc:177] p = 0.000000 (2^-673.424354), p = 0.000000 (2^-inf)
	  [./tests/salsa-tests.cc:178] S[ 9] 80000000

real    44m47.357s
user    44m42.720s
sys     0m0.024s

*/

/* --- */
/* 
Salsa 4 rounds: 3 stars max, 2 hours

vpv@igor:~/skcrypto/trunk/work/src/yaarx$ time ./bin/salsa-tests
[./tests/salsa-tests.cc:175] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF
[./src/salsa.cc:127] round# 0 / 3
[./src/salsa.cc:127] round# 1 / 3
[./src/salsa.cc:127] round# 2 / 3
[./src/salsa.cc:127] round# 3 / 3
R[-1]
 [ 0]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 1]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 2]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 3]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 4]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 5]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 6]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 7]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 8]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 9] 80000000 10000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [10]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [11]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [12]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [13]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [14]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [15]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |

R[ 0]
 [ 0]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 1]   201000 00000000001000000001000000000000 | 0.500000 (2^-1.000000) |
 [ 2]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 3]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 4]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 5] 44000080 01000100000000000000000010000000 | 0.125000 (2^-3.000000) |
 [ 6]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 7]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 8]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [ 9] 80000000 10000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [10]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [11]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [12]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [13]      100 00000000000000000000000100000000 | 1.000000 (2^0.000000) |
 [14]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [15]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |

R[ 1]
 [ 0] 20010880 00100000000000*10000100010000000 | 0.093750 (2^-3.415037) |
 [ 1]   201000 00000000001000000001000000000000 | 1.000000 (2^0.000000) |
 [ 2] 40200000 01000000001000000000000000000000 | 0.250000 (2^-2.000000) |
 [ 3]  2000800 0000001000000000000*100000000000 | 0.250000 (2^-2.000000) |
 [ 4] 20954010 00100000100101010100000000010000 | 0.002197 (2^-8.830075) |
 [ 5] 562080D0 010101100*1000001000000011010000 | 0.001465 (2^-9.415037) |
 [ 6]     4022 0000000000000000010000000*100010 | 0.250000 (2^-2.000000) |
 [ 7]   814488 00000000100000010100010*10001000 | 0.023438 (2^-5.415037) |
 [ 8]     8000 00000000000000001000000000000000 | 0.500000 (2^-1.000000) |
 [ 9] 90080000 10010000000010000000000000000000 | 0.250000 (2^-2.000000) |
 [10]    24022 00000000000000100100000000100010 | 0.125000 (2^-3.000000) |
 [11]       40 00000000000000000000000001000000 | 1.000000 (2^0.000000) |
 [12]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
 [13]      100 00000000000000000000000100000000 | 1.000000 (2^0.000000) |
 [14]   200000 00000000001000000000000000000000 | 0.500000 (2^-1.000000) |
 [15]  4000080 00000100000000000000000010000000 | 0.250000 (2^-2.000000) |

R[ 2]
 [ 0] 200148C2 00100000000000*1*100100011000*10 | 0.125000 (2^-3.000000) |
 [ 1]  D14BA22 000011*10**101001011101000100010 | 0.000047 (2^-14.370643) |
 [ 2]  4A04402 00000100101000000100010000000010 | 0.015625 (2^-6.000000) |
 [ 3]  2002802 00000010000000000*1*100000000010 | 0.250000 (2^-2.000000) |
 [ 4] 20110000 00100000000100010000000000000000 | 0.046875 (2^-4.415037) |
 [ 5] 30A990C0 0*110*001*1*10011001000011000000 | 0.000013 (2^-16.245112) |
 [ 6]  8800092 0000100010000000000000001**10010 | 0.015625 (2^-6.000000) |
 [ 7]   D04084 00000000110100000100000*1000*100 | 0.017578 (2^-5.830075) |
 [ 8] 20118000 00100000000100011000000000000000 | 0.023438 (2^-5.415037) |
 [ 9] 90402829 10010000010000000*1010000*1010*1 | 0.005859 (2^-7.415037) |
 [10] 124250A3 000100100100001001*1000010100011 | 0.001709 (2^-9.192645) |
 [11]  510400A 0000010100010000*1000000000*1010 | 0.001648 (2^-9.245112) |
 [12] 10000000 00010000000000000000000000000000 | 0.062500 (2^-4.000000) |
 [13] 41509384 01000001010100001**100111000*100 | 0.000206 (2^-12.245112) |
 [14]  1200000 00000001001000000000000000000000 | 0.046875 (2^-4.415037) |
 [15]  6081180 00000110000*100000010**110000000 | 0.003296 (2^-8.245112) |

R[ 3]
[ 0]  8054C42 00*01**0000001*1*100110001000*10 | 0.000004 (2^-17.830075) |
[ 1]  D849A33 000011*11**001001*01101000110011 | 0.001236 (2^-9.660150) |
[ 2]  D64264B 000011010110010***10011001001011 | 0.000003 (2^-18.437758) |
[ 3]  2012BCA 000000100000***10*1*101111001010 | 0.000004 (2^-17.830075) |
[ 4] 12806099 0**100101000000001100000100110*1 | 0.000011 (2^-16.490225) |
[ 5] A2005380 1*100*10**0*00000101001110000**0 | 0.000007 (2^-17.075187) |
[ 6] 44C8209A 01000100110010000*1000001**11010 | 0.000412 (2^-11.245112) |
[ 7] C1B39427 110000*11*11001110*1010*0010*111 | 0.000029 (2^-15.075187) |
[ 8] A4B85024 1010*1001011100**101000000100100 | 0.000045 (2^-14.437758) |
[ 9] 1AC41C38 000110101100010***0111000*1110*0 | 0.000049 (2^-14.322280) |
[10] C21058B3 11*000100**1000001*1100010110011 | 0.000003 (2^-18.437758) |
[11]  404044B 00000100000**100*0000100010*1011 | 0.000366 (2^-11.415037) |
[12] 84084000 1000010000001000*1000000000000** | 0.010254 (2^-6.607683) |
[13] 41F19280 01000001111100*11**100101000*000 | 0.000961 (2^-10.022720) |
[14] 1F700881 0001111101110000000010001000***1 | 0.000040 (2^-14.607683) |
[15] 2E0D0002 0*101110000*1101000*0**000000*10 | 0.000029 (2^-15.084121) |

[ 0] 0.000000 (2^  -inf) [ 1] 0.000000 (2^  -inf) [ 2] 0.000000 (2^  -inf) [ 3] 0.000000 (2^  -inf)
[ 4] 0.000000 (2^  -inf) [ 5] 0.000000 (2^  -inf) [ 6] 0.000000 (2^  -inf) [ 7] 0.000000 (2^  -inf)
[ 8] 0.000000 (2^  -inf) [ 9] 0.000000 (2^  -inf) [10] 0.000000 (2^  -inf) [11] 0.000000 (2^  -inf)
[12] 0.000000 (2^  -inf) [13] 0.000000 (2^  -inf) [14] 0.000000 (2^  -inf) [15] 0.000000 (2^  -inf)
[./tests/salsa-tests.cc:164] p = 0.000000 (2^-392.687960), p = 0.000000 (2^-inf)

real    122m21.380s
user    122m8.146s
sys     0m0.020s

 */

/* --- */


/* 

Salsa 4 rounds, max 2 stars, 2 min

[./tests/salsa-tests.cc:175] Tests, WORD_SIZE  = 32, MASK = FFFFFFFF
R[-1]
[ 0]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 1]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 2]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 3]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 4]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 5]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 6]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 7]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 8]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 9] 80000000 10000000000000000000000000000000 | 1.000000 (2^0.000000) |
[10]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[11]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[12]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[13]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[14]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[15]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |

R[ 0]
[ 0]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 1]   201000 00000000001000000001000000000000 | 0.500000 (2^-1.000000) |
[ 2]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 3]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 4]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 5] 44000080 01000100000000000000000010000000 | 0.125000 (2^-3.000000) |
[ 6]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 7]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 8]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[ 9] 80000000 10000000000000000000000000000000 | 1.000000 (2^0.000000) |
[10]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[11]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[12]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[13]      100 00000000000000000000000100000000 | 1.000000 (2^0.000000) |
[14]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[15]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |

R[ 1]
[ 0] 20010880 00100000000000*10000100010000000 | 0.093750 (2^-3.415037) |
[ 1]   201000 00000000001000000001000000000000 | 1.000000 (2^0.000000) |
[ 2] 40200000 01000000001000000000000000000000 | 0.250000 (2^-2.000000) |
[ 3]  2000800 0000001000000000000*100000000000 | 0.250000 (2^-2.000000) |
[ 4] 20954010 00100000100101010100000000010000 | 0.002197 (2^-8.830075) |
[ 5] 562080D0 010101100*1000001000000011010000 | 0.001465 (2^-9.415037) |
[ 6]     4022 0000000000000000010000000*100010 | 0.250000 (2^-2.000000) |
[ 7]   814488 00000000100000010100010*10001000 | 0.023438 (2^-5.415037) |
[ 8]     8000 00000000000000001000000000000000 | 0.500000 (2^-1.000000) |
[ 9] 90080000 10010000000010000000000000000000 | 0.250000 (2^-2.000000) |
[10]    24022 00000000000000100100000000100010 | 0.125000 (2^-3.000000) |
[11]       40 00000000000000000000000001000000 | 1.000000 (2^0.000000) |
[12]        0 00000000000000000000000000000000 | 1.000000 (2^0.000000) |
[13]      100 00000000000000000000000100000000 | 1.000000 (2^0.000000) |
[14]   200000 00000000001000000000000000000000 | 0.500000 (2^-1.000000) |
[15]  4000080 00000100000000000000000010000000 | 0.250000 (2^-2.000000) |

R[ 2]
[ 0] 200148C2 00100000000000*1*100100011000*10 | 0.125000 (2^-3.000000) |
[ 1]  114BA22 0000000100*1010*1011101000100010 | 0.000057 (2^-14.093109) |
[ 2]  4A04402 00000100101000000100010000000010 | 0.015625 (2^-6.000000) |
[ 3]  2002802 00000010000000000*1*100000000010 | 0.250000 (2^-2.000000) |
[ 4] 20110000 00100000000100010000000000000000 | 0.046875 (2^-4.415037) |
[ 5] 70A981C2 01110*001*1*10011000000111000010 | 0.000051 (2^-14.245112) |
[ 6]  8800092 0000100010000000000000001**10010 | 0.015625 (2^-6.000000) |
[ 7]   D04084 00000000110100000100000*1000*100 | 0.017578 (2^-5.830075) |
[ 8] 20118000 00100000000100011000000000000000 | 0.023438 (2^-5.415037) |
[ 9] 90402829 10010000010000000*101000001010*1 | 0.002930 (2^-8.415037) |
[10] 124250A3 000100100100001001*1000010100011 | 0.001709 (2^-9.192645) |
[11]  510400A 0000010100010000*1000000000*1010 | 0.001648 (2^-9.245112) |
[12] 10000000 00010000000000000000000000000000 | 0.062500 (2^-4.000000) |
[13] 41501384 01000001010100000**1001110000100 | 0.000069 (2^-13.830075) |
[14]  1200000 00000001001000000000000000000000 | 0.046875 (2^-4.415037) |
[15]  6081180 00000110000*1000000100*110000000 | 0.001648 (2^-9.245112) |

R[ 3]
[ 0] 2905424A 001*10*1000001*1*100001001001*10 | 0.000028 (2^-15.129635) |
[ 1]  1849A33 0000000110*0010*1*01101000110011 | 0.001236 (2^-9.660150) |
[ 2]  CA42643 0000110010100100**10011001000011 | 0.000009 (2^-16.830075) |
[ 3]  2022882 000000100000**100*1*100010000010 | 0.000006 (2^-17.415037) |
[ 4] 3A02A28A 00111*10000000101*10001010001010 | 0.000002 (2^-19.245112) |
[ 5] 3A25E589 00111*100*1**1011110010110001001 | 0.000009 (2^-16.830075) |
[ 6] 44C0218A 0100010011000000**1000011**01010 | 0.000549 (2^-10.830075) |
[ 7] D1179023 110100010001011110*1000*0010**11 | 0.000039 (2^-14.660150) |
[ 8] A4B85024 101001001011100**101000000100100 | 0.000023 (2^-15.437758) |
[ 9] 1AC41C38 0001101011000100**011100001110*0 | 0.000033 (2^-14.907243) |
[10] C21058B3 110000100**1000001*1100010110011 | 0.000002 (2^-19.299560) |
[11]  404044B 00000100000**100*0000100010*1011 | 0.000488 (2^-11.000000) |
[12] 84084000 1000010000001000*10000000000000* | 0.008789 (2^-6.830075) |
[13] 41F11280 01000001111100*10**1001010000000 | 0.003296 (2^-8.245112) |
[14] 23700881 00100*110111000000001000100000*1 | 0.000069 (2^-13.830075) |
[15] 2E0C9404 0*101110000*110*100101*000000100 | 0.000360 (2^-11.437758) |

[ 0] 0.000000 (2^  -inf) [ 1] 0.000000 (2^  -inf) [ 2] 0.000000 (2^  -inf) [ 3] 0.000000 (2^  -inf)
[ 4] 0.000000 (2^  -inf) [ 5] 0.000000 (2^  -inf) [ 6] 0.000000 (2^  -inf) [ 7] 0.000000 (2^  -inf)
[ 8] 0.000000 (2^  -inf) [ 9] 0.000000 (2^  -inf) [10] 0.000000 (2^  -inf) [11] 0.000000 (2^  -inf)
[12] 0.000000 (2^  -inf) [13] 0.000000 (2^  -inf) [14] 0.000000 (2^  -inf) [15] 0.000000 (2^  -inf)
[./tests/salsa-tests.cc:164] p = 0.000000 (2^-387.004471), p = 0.000000 (2^-inf)

real    2m34.510s
user    2m34.270s
sys     0m0.000s


*/

/* --- */
//
// Test input states for the columnround() from the Salsa20
// specifictaion document (spec.pdf)
//
uint32_t test_state_1[16] = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
									  0x00000001, 0x00000000, 0x00000000, 0x00000000,
									  0x00000001, 0x00000000, 0x00000000, 0x00000000,
									  0x00000001, 0x00000000, 0x00000000, 0x00000000};

uint32_t test_state_2[16] = {0x08521bd6, 0x1fe88837, 0xbb2aa576, 0x3aa26365,
									  0xc54c6a5b, 0x2fc74c2f, 0x6dd39cc3, 0xda0a64f6,
									  0x90a2f23d, 0x067f95a6, 0x06b35f61, 0x41e4732e,
									  0xe859c100, 0xea4d84b7, 0x0f619bff, 0xbc6e965a};

uint32_t test_state_3[16] = {0x00000000, 0x1fe88837, 0x00000000, 0x00000000,
									  0x00000000, 0x2fc74c2f, 0x00000000, 0x00000000,
									  0x00000000, 0x067f95a6, 0x00000000, 0x00000000,
									  0x00000000, 0xea4d84b7, 0x00000000, 0x00000000};

uint32_t test_state_4[16] = {0x00000000, 0x1fe88837, 0x00000000, 0x00000000,
									  0x00000000, 0x2fc74c2f, 0x00000000, 0x00000000,
									  0x00000000, 0x067f95a6, 0x00000000, 0x00000000,
									  0x00000000, 0xea4d84b7, 0x00000000, 0x00000000};


/* --- */

#if 0
			 uint32_t da_temp = da_set.diff;
			 uint32_t db_temp = db_set.diff;
			 uint32_t max_dc_temp = 0;
			 double p_max_temp = max_xdp_add(A, da_temp, db_temp, &max_dc_temp);
#endif

/* --- */

  //  uint32_t nstar = hw32(da_set.fixed) + hw32(db_set.fixed) + hw32(dc_set->fixed);


/* --- */

#if 1									  // TEST
			 if((da_set.fixed == 0) && (db_set.fixed == 0)) {
				uint32_t da = da_set.diff;
				uint32_t db = db_set.diff;
				uint32_t dc_max = 0;
				double p_max_tmp = max_xdp_add_lm(da, db, &dc_max);
				p_max_tmp /= xdp_add_dset_size(dc_set_2);
				printf("%f %f (%8X %8X) %8X\n", p_max_tmp, p_max_2, dc_set_2.diff, dc_set_2.fixed, dc_max);
				assert(p_max_tmp == p_max_2);
			 }
#endif


/* --- */

#if 1									  // TEST
			 if((da_set.fixed == 0) && (db_set.fixed == 0)) {
				uint32_t da = da_set.diff;
				uint32_t db = db_set.diff;
				uint32_t dc_max = 0;
				double p_max_tmp = max_xdp_add_lm(da, db, &dc_max);
				p_max_tmp /= xdp_add_dset_size(dc_set_max_2);
				printf("%f %f (%8X %8X) %8X\n", p_max_tmp, p_max_2, dc_set_max_2.diff, dc_set_max_2.fixed, dc_max);
				assert(p_max_tmp == p_max_2);
			 }
#endif


/* --- */

		// Add the new diff to Dp only if it has better prob. than the min.
#if 0									  // ORIGINAL
		double p_min = diff_mset_p->rbegin()->p;
		if(diff_dy.p >= p_min) {
		  diff_mset_p->insert(diff_dy);
		}
#else
		// p_i >= p_min = Bn / p1 * p2 ... * p{i-1} * B{n-i} 
		p_min = 1.0;
		for(int i = 0; i < n; i++) { // p[0] * p[1] * p[n-1]
		  p_min *= diff[i].p;
		}
		p_min = p_min * 1.0 * B[nrounds - 1 - (n + 1)]; 
		p_min = *Bn / p_min;
		printf("[%s:%d] New: %f, p_min = %f\n", __FILE__, __LINE__, diff_dy.p, p_min);
		assert(p_min <= 1.0);
		tea_f_add_pddt(WORD_SIZE, p_min, lsh_const, rsh_const, diff_set_dx_dy);
#endif

/* --- */

void xdp_add_dset_threefish_mix(gsl_matrix* A[3][3][3], 
									 diff_set_t DX[4], diff_set_t DY[4], double P[4],
									 uint32_t rot_const_0, uint32_t rot_const_1,
									 bool b_single_diff)
{
  //  bool b_single_diff = b_single_diff_in;
#if 0									  // DEBUG
  for(uint32_t j = 0; j < 4; j++) { // copy output to input
	 printf("[%s:%d] ", __FILE__, __LINE__);
	 xdp_add_dset_print_set(DX[j]);
	 printf("\n");
  }
#endif
  uint32_t all_star = 0xFFFFFFFF & MASK;
  uint32_t dx_msb = (1U << (WORD_SIZE - 1));

  //  b_single_diff = b_single_diff_in;
  if((DX[0].fixed == all_star) && (DX[1].fixed == all_star)) {
	 //	 b_single_diff = true;
	 DY[0] = {dx_msb, 0};
	 P[0] = xdp_add_dset_all(A, WORD_SIZE, DX[0], DX[1], DY[0]);
  } else {
	 // MIX 0/0
	 P[0] = rmax_xdp_add_dset(A, DX[0], DX[1], &DY[0], b_single_diff);
  }
  DX[1] = lrot_dset(DX[1], rot_const_0);
  DY[1] = xor_dset(DX[1], DY[0]);
  P[1] = 1.0;

  //  b_single_diff = b_single_diff_in;
  if((DX[2].fixed == all_star) && (DX[3].fixed == all_star)) {
	 //	 b_single_diff = true;
	 DY[2] = {dx_msb, 0};
	 P[2] = xdp_add_dset_all(A, WORD_SIZE, DX[2], DX[3], DY[2]);
  } else {
	 // MIX 0/1
	 P[2] = rmax_xdp_add_dset(A, DX[2], DX[3], &DY[2], b_single_diff);
  }
  DX[3] = lrot_dset(DX[3], rot_const_1);
  DY[3] = xor_dset(DX[3], DY[2]);
  P[3] = 1.0;

#if 0									  // DEBUG
  printf("\n");
  for(uint32_t j = 0; j < 4; j++) { // copy output to input
	 printf("[%s:%d] ", __FILE__, __LINE__);
	 xdp_add_dset_print_set(DY[j]);
	 printf("\n");
  }
#endif
}


/* --- */

diff_set_t xor_dset(diff_set_t da_set_in, diff_set_t db_set_in, double* p, bool b_single_diff) 
{
  *p = 1.0;

  diff_set_t da_set = {da_set_in.diff, da_set_in.fixed};
  diff_set_t db_set = {db_set_in.diff, db_set_in.fixed};
  diff_set_t dc_set = {0, 0};
  // if a single difference is required on the output, 
  // then fix all bits of the input differences and divide
  // the probability by the product of the set sizes
#if 1
  if(b_single_diff == true) {	
	 uint32_t s_da = xdp_add_dset_size(da_set);
	 uint32_t s_db = xdp_add_dset_size(db_set);
	 da_set.fixed = 0;			  // fix all
	 db_set.fixed = 0;			  // fix all
	 *p /= (double)(s_da * s_db);				  // the prob drops by the set size
  }
#endif
  dc_set.fixed = (da_set.fixed | db_set.fixed) & MASK;
  dc_set.diff = ((~dc_set.fixed) & (XOR(da_set.diff, db_set.diff))) & MASK;
  return dc_set;
}


void xdp_add_dset_threefish_mix(gsl_matrix* A[3][3][3], 
									 diff_set_t DX[4], diff_set_t DY[4], double P[4],
									 uint32_t rot_const_0, uint32_t rot_const_1,
									 bool b_single_diff)
{
#if 1									  // DEBUG
  for(uint32_t j = 0; j < 4; j++) { // copy output to input
	 printf("[%s:%d] ", __FILE__, __LINE__);
	 xdp_add_dset_print_set(DX[j]);
	 printf("\n");
  }
#endif

  // MIX 0/0
  P[0] = rmax_xdp_add_dset(A, DX[0], DX[1], &DY[0], b_single_diff);
  DX[1] = lrot_dset(DX[1], rot_const_0);
  DY[1] = xor_dset(DX[1], DY[0], &P[1], b_single_diff);
  //  P[1] = 1.0;//P[0];
#if 1									  // DEBUG
  if(b_single_diff == true) {
	 assert(DY[1].fixed == 0);
  }
#endif

  // MIX 0/1
  P[2] = rmax_xdp_add_dset(A, DX[2], DX[3], &DY[2], b_single_diff);
  DX[3] = lrot_dset(DX[3], rot_const_1);
  DY[3] = xor_dset(DX[3], DY[2], &P[3], b_single_diff);
  //  P[3] = 1.0;//P[2];
#if 1									  // DEBUG
  if(b_single_diff == true) {
	 assert(DY[3].fixed == 0);
  }
#endif

#if 1									  // DEBUG
  printf("\n");
  for(uint32_t j = 0; j < 4; j++) { // copy output to input
	 printf("[%s:%d] ", __FILE__, __LINE__);
	 xdp_add_dset_print_set(DY[j]);
	 printf("\n");
  }
#endif
}

/* --- */

  // ---
  printf("DX[3] ");
  xdp_add_dset_print_set(DX[3]);
  printf("| rot sonst %d\n", rot_const_0);
  printf("DX[3] ");
  xdp_add_dset_print_set(DX[3]);
  printf("\n");
  printf("DY[2] ");
  xdp_add_dset_print_set(DY[2]);
  printf("\n");
  // ---


/* --- */

  uint32_t round_zero = 0;
  for(uint32_t i = 0; i < 4; i++) {
	 DX[i] = {DX_in[round_zero][i].diff, DX_in[round_zero][i].fixed};
	 DY[i] = {0, 0};
  }

/* --- */

  printf("[%s:%d] Input diff:\n", __FILE__, __LINE__);
  for(uint32_t j = 0; j < 4; j++) {
	 DX[j] = DX_set[j].diff;
	 printf("%8X ", DX[j]);
  }
  printf("\n");
  printf("[%s:%d] Output diff:\n", __FILE__, __LINE__);
  for(uint32_t j = 0; j < 4; j++) {
	 DY[j] = DY_set[j].diff;
	 printf("%8X ", DY[j]);
	 xdp_add_dset_print_set(DY_set[j]);
	 printf("\n");
  }
  printf("\n");

/* --- */

#if 0
  diff_set_t da_set = {0,0};
  diff_set_t db_set = {0,0};
  da_set.diff  = random32() & MASK;
  da_set.fixed = random32() & MASK;
  db_set.diff  = random32() & MASK;
  db_set.fixed = random32() & MASK;

  bool b_single_diff = false;
  diff_set_t dc_set = {0,0};
  double p_max = rmax_xdp_add_dset(A, da_set, db_set, &dc_set, b_single_diff);
  uint32_t s_max = xdp_add_dset_size(dc_set);
  double r_max = p_max / (double)s_max; 

  printf("[%s:%d] XDP_ADD_DIFF_SET ", __FILE__, __LINE__);
  printf("\n da = ");
  xdp_add_dset_print_set(da_set);
  printf("\n db = ");
  xdp_add_dset_print_set(db_set);
  printf("\n dc = ");
  xdp_add_dset_print_set(dc_set);
  printf("\n");
  printf("[%s:%d] THE   %f, %d, %f \n", __FILE__, __LINE__, r_max, s_max, p_max);
#endif


/* --- */


void skein256_2r(uint32_t X[4])
{
  uint32_t R_256_0_0 = 7;
  uint32_t R_256_0_1=  9;
  uint32_t R_256_1_0 = 26;
  uint32_t R_256_1_1 = 28;

  // MIX 0/0
  X[0] = ADD(X[0], X[1]); 
  X[1] = LROT(X[1], R_256_0_0); 
  X[1] = XOR(X[1], X[0]);

  // MIX 0/1
  X[2] = ADD(X[2], X[3]); 
  X[3] = LROT(X[3], R_256_0_1); 
  X[3] = XOR(X[3], X[2]);

  // MIX 1/0
  X[0] += X[3]; 
  X[3] = LROT(X[3], R_256_1_0); 
  X[3] ^= X[0];

  // MIX 1/1
  X[2] += X[1]; 
  X[1] = LROT(X[1], R_256_1_1); 
  X[1] ^= X[2];

}

/* --- */
/**
 * \ref max_xdp_add_i
 */
void rmax_xdp_add_dset_i(const uint32_t k_init, const uint32_t k, const uint32_t n, 
								 double* r, double* p, diff_set_t* dc_set,
								 gsl_matrix* A[3][3][3], gsl_vector* B[WORD_SIZE + 1], gsl_vector* C_in,  
								 const diff_set_t da_set, const diff_set_t db_set, diff_set_t* dc_set_max, 
								 double* r_max, double* p_max)
{
  if(k == n) {
	 assert(*r > *r_max);
	 *r_max = *r;
	 *p_max = *p;
	 *dc_set_max = {dc_set->diff, dc_set->fixed};
#if 1									  // DEBUG
	 printf("[%s:%d] Update bound [%2d]: r %f (%f), p %f (%f) | ", __FILE__, __LINE__, 
			  k_init, *r_max, log2(*r_max), *p_max, log2(*p_max));
#if 0
	 printf("\n");
	 xdp_add_dset_print_set(da_set);
	 printf("\n");
	 xdp_add_dset_print_set(db_set);
	 printf("\n");
#endif
	 xdp_add_dset_print_set(*dc_set_max);
	 printf("\n");
#endif
	 return;
  } 

  // get the k-th bit of da_set, db_set
  uint32_t x = 2;					  // *
  bool b_da_is_fixed = (((da_set.fixed >> k) & 1) == FIXED);
  if(b_da_is_fixed) {
	 x = ((da_set.diff >> k) & 1); // 0 or 1
  }
  uint32_t y = 2;					  // *
  bool b_db_is_fixed = (((db_set.fixed >> k) & 1) == FIXED);
  if(b_db_is_fixed) {
	 y = ((db_set.diff >> k) & 1); // 0 or 1
  }

  // cycle over the possible values of the k-th bits of *dc
  //  for(int z = 0; z < 2; z++) { 
  int hi_lim = 1;
  int lo_lim = 0;
  if(b_is_lsb) {
	 hi_lim = 2;
  }

  for(int z = hi_lim; z >= lo_lim; z--) { 

	 diff_set_t new_dc_set = {dc_set->diff, dc_set->fixed};

	 // set the k-th bit of dc_set
	 if((z == 0) || (z == 1)) {	// -
		new_dc_set.diff |= (z << k);
		new_dc_set.fixed |= (FIXED << k);
	 }
	 if(z == 2) {				   // *
		new_dc_set.diff |= (0 << k);
		new_dc_set.fixed |= (STAR << k);
	 }

	 // temp
	 gsl_vector* R = gsl_vector_calloc(XDP_ADD_DSET_MSIZE);
	 double new_p = 0.0;

	 gsl_vector* C = gsl_vector_calloc(XDP_ADD_DSET_MSIZE);
	 gsl_vector_memcpy(C, C_in);

	 if(k == (WORD_SIZE - 1)) {  // L
		bool b_da_msb_is_fixed = (((da_set.fixed >> k) & 1) == FIXED); 
		bool b_db_msb_is_fixed = (((db_set.fixed >> k) & 1) == FIXED); 
		bool b_dc_msb_is_fixed = (((new_dc_set.fixed >> k) & 1) == FIXED); 
		gsl_vector_set_all(B[k + 1], 0.0);
		xdp_add_dset_init_states(k, B[k + 1], da_set, db_set, new_dc_set);
		xdp_add_dset_final_states_norm(B[k + 1], b_da_msb_is_fixed, b_db_msb_is_fixed, b_dc_msb_is_fixed);
	 }
	 if(k == 0) {  // C
		gsl_vector_set_all(C, 0.0);
		xdp_add_dset_init_states(k, C, da_set, db_set, new_dc_set);
	 }
#if 0
	 if((k == k_init) && (k != 0) && (k != (WORD_SIZE - 1))) {
		double f = (1U << (((da_set.fixed >> k_init) & 1) 
								 + ((db_set.fixed >> k_init) & 1)
								 + ((new_dc_set.fixed >> k_init) & 1)));
		//		f = 1.0;
		gsl_vector_scale(C, f);
	 }
#else
	 if((k == k_init) && (k != 0) && (k != (WORD_SIZE - 1))) {
		double f = (1U << ((new_dc_set.fixed >> k_init) & 1));
		gsl_vector_scale(C, f);
		assert(f == 1.0);
	 }
#endif
	 // L A C
	 gsl_blas_dgemv(CblasNoTrans, 1.0, A[x][y][z], C, 0.0, R);
	 gsl_blas_ddot(B[k + 1], R, &new_p);

	 uint64_t s = xdp_add_dset_size(new_dc_set);
	 //	 double new_r = new_p / (double)s;
	 double new_r = new_p;
	 if(k == (WORD_SIZE - 1)) {  // MSB => divide by the set size
		new_r = new_p / (double)s;
	 }

	 // continue only if the probability so far is still bigger than the max. prob.
	 if(new_r > *r_max) {
		rmax_xdp_add_dset_i(k_init, k+1, n, &new_r, &new_p, &new_dc_set, A, B, R, da_set, db_set, dc_set_max, r_max, p_max);
	 }

	 gsl_vector_free(C);
	 gsl_vector_free(R);
  }
  return;
}

/* --- */
  int hi_lim = 1;
  int lo_lim = 0;
  if(b_is_lsb) {
	 hi_lim = 2;
	 if(b_da_is_fixed && b_db_is_fixed) {
		if(x == y) {
		  hi_lim = lo_lim = x;
		}
	 }
  }

/* --- */

  uint32_t da_diff_prev_i = 0;
  uint32_t da_fixed_prev_i = 0;
  uint32_t db_diff_prev_i = 0;
  uint32_t db_fixed_prev_i = 0;
  uint32_t dc_diff_prev_i = 0;
  uint32_t dc_fixed_prev_i = 0;
  if(k > k_init) {
	 da_diff_prev_i = (da_set.diff >> (k - 1)) & 1; 
	 da_fixed_prev_i = (da_set.fixed >> (k - 1)) & 1;
	 db_diff_prev_i = (db_set.diff >> (k - 1)) & 1; 
	 db_fixed_prev_i = (db_set.fixed >> (k - 1)) & 1;
	 dc_diff_prev_i = (dc_set->diff >> (k - 1)) & 1; 
	 dc_fixed_prev_i = (dc_set->fixed >> (k - 1)) & 1;
  }




/* --- */

void xdp_add_input_dset_to_output_dset(gsl_matrix* AA[2][2][2],
													const diff_set_t da_set, 
													const diff_set_t db_set,
													diff_set_t* dc_set)
{
  dc_set->diff = 0;
  dc_set->fixed = 0;

#if 0
  uint32_t i = 0;
  uint32_t da_diff_i = (da_set.diff >> i) & 1; 
  uint32_t da_fixed_i = (da_set.fixed >> i) & 1;
  uint32_t db_diff_i = (db_set.diff >> i) & 1; 
  uint32_t db_fixed_i = (db_set.fixed >> i) & 1;

  if((da_fixed_i == FIXED) && (db_fixed_i == FIXED)) { // (-,-)
	 dc_set->diff |= ((da_diff_i ^ db_diff_i) << i);
	 dc_set->fixed |= (FIXED << i);
  } else {
	 dc_set->diff |= (0 << i);
	 dc_set->fixed |= (FIXED << i);
  }
#endif
  for(uint32_t i = 0; i < WORD_SIZE; i++) {

	 uint32_t word_size = WORD_SIZE;//i + 1; // bits 0, 1, ..., i
	 double r_max = 0.0;
	 diff_set_t dc_set_max = {0, 0};

	 for(int j = 2; j >= 0; j--) {
		diff_set_t dc_set_i = {dc_set->diff, dc_set->fixed};
		if((j == 0) || (j == 1)){				  // -
		  dc_set_i.diff |= (j << i);
		  dc_set_i.fixed |= (FIXED << i);
		}
		if(j == 2) {				  // *
		  dc_set_i.diff |= (0 << i);
		  dc_set_i.fixed |= (STAR << i);
		}
		double p = xdp_add_dset(AA, word_size, da_set, db_set, dc_set_i);
		uint32_t s = xdp_add_dset_size(dc_set_i);
		double r = p / (double)s;
		if(r > r_max) {
		  r_max = r;
		  dc_set_max = {dc_set_i.diff, dc_set_i.fixed};
		}
		//		printf("[%s:%d] %d|%d: %f %d  %f\n", __FILE__, __LINE__, i, j, p, s, r);
	 }

	 *dc_set = {dc_set_max.diff, dc_set_max.fixed};
  }

}


/* --- */
void xdp_add_input_dset_to_output_dset(gsl_matrix* AA[2][2][2],
													const diff_set_t da_set, 
													const diff_set_t db_set,
													diff_set_t* dc_set)
{
  dc_set->diff = 0;
  dc_set->fixed = 0;

  uint32_t i = 0;
  uint32_t da_diff_i = (da_set.diff >> i) & 1; 
  uint32_t da_fixed_i = (da_set.fixed >> i) & 1;
  uint32_t db_diff_i = (db_set.diff >> i) & 1; 
  uint32_t db_fixed_i = (db_set.fixed >> i) & 1;

  if((da_fixed_i == FIXED) && (db_fixed_i == FIXED)) { // (-,-)
	 dc_set->diff |= ((da_diff_i ^ db_diff_i) << i);
	 dc_set->fixed |= (FIXED << i);
  } else {
	 dc_set->diff |= (0 << i);
	 dc_set->fixed |= (FIXED << i);
  }

  for(i = 1; i < WORD_SIZE; i++) {

	 uint32_t word_size = WORD_SIZE;//i + 1; // bits 0, 1, ..., i
	 double r_max = 0.0;
	 diff_set_t dc_set_max = {0, 0};


	 da_diff_i = (da_set.diff >> i) & 1;
	 da_fixed_i = (da_set.fixed >> i) & 1;
	 db_diff_i = (db_set.diff >> i) & 1;
	 db_fixed_i = (db_set.fixed >> i) & 1;

	 uint32_t da_diff_prev_i = 0;
	 uint32_t da_fixed_prev_i = 0;
	 uint32_t db_diff_prev_i = 0;
	 uint32_t db_fixed_prev_i = 0;
	 uint32_t dc_diff_prev_i = 0;
	 uint32_t dc_fixed_prev_i = 0;

	 if(i > 0) {
		da_diff_prev_i = (da_set.diff >> (i - 1)) & 1; 
		da_fixed_prev_i = (da_set.fixed >> (i - 1)) & 1;
		db_diff_prev_i = (db_set.diff >> (i - 1)) & 1; 
		db_fixed_prev_i = (db_set.fixed >> (i - 1)) & 1;
		dc_diff_prev_i = (dc_set->diff >> (i - 1)) & 1; 
		dc_fixed_prev_i = (dc_set->fixed >> (i - 1)) & 1;
	 }

	 bool b_is_prev_eq = 
		(is_eq(da_diff_prev_i, db_diff_prev_i, dc_diff_prev_i)) && 
		((da_fixed_prev_i == FIXED) && (db_fixed_prev_i == FIXED) && (dc_fixed_prev_i == FIXED)) &&
		(i > 0);

	 if(b_is_prev_eq) {

		diff_set_t dc_set_i = {dc_set->diff, dc_set->fixed};
		uint32_t dc_i = 0;
		if((da_fixed_i == FIXED) && (db_fixed_i == FIXED)) {
		  dc_i = da_diff_i ^ db_diff_i ^ da_diff_prev_i;
		} 
		if((da_fixed_i == FIXED) && (db_fixed_i == STAR)) {
		  dc_i = da_diff_i;
		} 
		if((da_fixed_i == STAR) && (db_fixed_i == FIXED)) {
		  dc_i = db_diff_i;
		} 
		//		xdp_add_dset(AA, word_size, da_set, db_set, dc_set_i);
		dc_set_i.diff |= (dc_i << i);
		dc_set_i.fixed |= (FIXED << i);
		dc_set_max = {dc_set_i.diff, dc_set_i.fixed};

	 } else {

		if((da_fixed_i == FIXED) && (db_fixed_i == STAR)) {
		  diff_set_t dc_set_i = {dc_set->diff, dc_set->fixed};
		  uint32_t dc_i = da_diff_i;
		  dc_set_i.diff |= (dc_i << i);
		  dc_set_i.fixed |= (FIXED << i);
		  dc_set_max = {dc_set_i.diff, dc_set_i.fixed};
		} 
		if((da_fixed_i == STAR) && (db_fixed_i == FIXED)) {
		  diff_set_t dc_set_i = {dc_set->diff, dc_set->fixed};
		  uint32_t dc_i = da_diff_i;
		  dc_set_i.diff |= (dc_i << i);
		  dc_set_i.fixed |= (FIXED << i);
		  dc_set_max = {dc_set_i.diff, dc_set_i.fixed};
		} 

		if(((da_fixed_i == FIXED) && (db_fixed_i == FIXED)) ||
			((da_fixed_i == STAR) && (db_fixed_i == STAR))) {

		  for(int j = 2; j >= 0; j--) {
			 diff_set_t dc_set_i = {dc_set->diff, dc_set->fixed};
			 if((j == 0) || (j == 1)){				  // -
				dc_set_i.diff |= (j << i);
				dc_set_i.fixed |= (FIXED << i);
			 }
			 if(j == 2) {				  // *
				dc_set_i.diff |= (0 << i);
				dc_set_i.fixed |= (STAR << i);
			 }
			 double p = xdp_add_dset(AA, word_size, da_set, db_set, dc_set_i);
			 uint32_t s = xdp_add_dset_size(dc_set_i);
			 double r = p / (double)s;
			 if(r > r_max) {
				r_max = r;
				dc_set_max = {dc_set_i.diff, dc_set_i.fixed};
			 }
			 printf("[%s:%d] %d|%d: %f %d  %f\n", __FILE__, __LINE__, i, j, p, s, r);
		  }
		}
	 }
	 *dc_set = {dc_set_max.diff, dc_set_max.fixed};
  }

}



/* ---- */
/**
 * Constructs dc_set by maximizing the ratio r:
 *
 * r = p / s = xdp-add(da_set, db_set, dc_set) / dc_set_size .
 *
 */
void xdp_add_input_dset_to_output_dset(gsl_matrix* AA[2][2][2],
													const diff_set_t da_set, 
													const diff_set_t db_set,
													diff_set_t* dc_set)
{
  dc_set->diff = 0;
  dc_set->fixed = 0;

  uint32_t i = 0;
  uint32_t da_diff_i = (da_set.diff >> i) & 1; 
  uint32_t da_fixed_i = (da_set.fixed >> i) & 1;
  uint32_t db_diff_i = (db_set.diff >> i) & 1; 
  uint32_t db_fixed_i = (db_set.fixed >> i) & 1;

  if((da_fixed_i == FIXED) && (db_fixed_i == FIXED)) { // (-,-)
	 dc_set->diff |= ((da_diff_i ^ db_diff_i) << i);
	 dc_set->fixed |= (FIXED << i);
  } else {
	 dc_set->diff |= (0 << i);
	 dc_set->fixed |= (FIXED << i);
  }

  for(i = 1; i < WORD_SIZE; i++) {

	 uint32_t word_size = i + 1; // bits 0, 1, ..., i
	 double r_max = 0.0;
	 diff_set_t dc_set_max = {0, 0};

	 uint32_t da_diff_prev_i = 0;
	 uint32_t da_fixed_prev_i = 0;
	 uint32_t db_diff_prev_i = 0;
	 uint32_t db_fixed_prev_i = 0;
	 uint32_t dc_diff_prev_i = 0;
	 uint32_t dc_fixed_prev_i = 0;

	 if(i > 0) {
		da_diff_prev_i = (da_set.diff >> (i - 1)) & 1; 
		da_fixed_prev_i = (da_set.fixed >> (i - 1)) & 1;
		db_diff_prev_i = (db_set.diff >> (i - 1)) & 1; 
		db_fixed_prev_i = (db_set.fixed >> (i - 1)) & 1;
		dc_diff_prev_i = (dc_set->diff >> (i - 1)) & 1; 
		dc_fixed_prev_i = (dc_set->fixed >> (i - 1)) & 1;
	 }

	 bool b_is_prev_eq = 
		(is_eq(da_diff_prev_i, db_diff_prev_i, dc_diff_prev_i)) && 
		((da_fixed_prev_i == FIXED) && (db_fixed_prev_i == FIXED) && (dc_fixed_prev_i == FIXED)) &&
		(i > 0);

	 if(b_is_prev_eq) {

		  diff_set_t dc_set_i = {dc_set->diff, dc_set->fixed};
		  uint32_t dc_i = da_diff_i ^ db_diff_i ^ da_diff_prev_i;
		  dc_set_i.diff |= (dc_i << i);
		  dc_set_i.fixed |= (FIXED << i);
		  xdp_add_dset(AA, word_size, da_set, db_set, dc_set_i);
		  dc_set_max = {dc_set_i.diff, dc_set_i.fixed};


	 } else {

		for(int j = 2; j >= 0; j--) {
		  diff_set_t dc_set_i = {dc_set->diff, dc_set->fixed};
		  if((j == 0) || (j == 1)){				  // -
			 dc_set_i.diff |= (j << i);
			 dc_set_i.fixed |= (FIXED << i);
		  }
		  if(j == 2) {				  // *
			 dc_set_i.diff |= (0 << i);
			 dc_set_i.fixed |= (STAR << i);
		  }
		  double p = xdp_add_dset(AA, word_size, da_set, db_set, dc_set_i);
		  uint32_t s = xdp_add_dset_size(dc_set_i);
		  double r = p / (double)s;
		  if(r > r_max) {
			 r_max = r;
			 dc_set_max = {dc_set_i.diff, dc_set_i.fixed};
		  }
		  printf("[%s:%d] %d|%d: %f %d  %f\n", __FILE__, __LINE__, i, j, p, s, r);
		}
	 }

	 *dc_set = {dc_set_max.diff, dc_set_max.fixed};
  }

}

/* --- */

void xdp_add_dset_final_states_norm(gsl_vector* L, 
												bool b_da_msb_is_fixed, bool b_db_msb_is_fixed, bool b_dc_msb_is_fixed)
{
  gsl_vector* V = gsl_vector_calloc(XDP_ADD_DSET_MSIZE);

  // three STAR => divide by 4
  if((!b_da_msb_is_fixed && !b_db_msb_is_fixed && !b_dc_msb_is_fixed)) {
	 gsl_vector_set_all(V, 1.0);
	 double e = 0.25;
	 if(WORD_SIZE == 1) {
		e = 0.4;
	 }
	 gsl_vector_set(V, 0, e);
	 gsl_vector_set(V, 1, e);
	 gsl_vector_set(V, 2, e);
	 gsl_vector_set(V, 3, e);
	 gsl_vector_set(V, 4, e);
	 gsl_vector_set(V, 5, e);
	 gsl_vector_set(V, 6, e);
	 gsl_vector_set(V, 7, e);
	 gsl_vector_mul(L, V);
  }
  // two STAR => divide by 2
  if((!b_da_msb_is_fixed && !b_db_msb_is_fixed && b_dc_msb_is_fixed) ||
	  (b_da_msb_is_fixed && !b_db_msb_is_fixed && !b_dc_msb_is_fixed) ||
	  (!b_da_msb_is_fixed && b_db_msb_is_fixed && !b_dc_msb_is_fixed)) {
	 gsl_vector_set_all(V, 1.0);
	 double e = 0.5;
	 if(WORD_SIZE == 1) {
		e = 1.0 / 1.5;
	 }
	 gsl_vector_set(V, 0, e);
	 gsl_vector_set(V, 1, e);
	 gsl_vector_set(V, 2, e);
	 gsl_vector_set(V, 3, e);
	 gsl_vector_set(V, 4, e);
	 gsl_vector_set(V, 5, e);
	 gsl_vector_set(V, 6, e);
	 gsl_vector_set(V, 7, e);
	 gsl_vector_mul(L, V);
  }
  // one STAR => leave matrix as it is
  if((!b_da_msb_is_fixed && b_db_msb_is_fixed && b_dc_msb_is_fixed) ||
	  (b_da_msb_is_fixed && !b_db_msb_is_fixed && b_dc_msb_is_fixed) ||
	  (b_da_msb_is_fixed && b_db_msb_is_fixed && !b_dc_msb_is_fixed)) {
	 ;
	 if(WORD_SIZE == 1) {
		gsl_vector_set_all(V, 1.0);
		double e = 2.0;
		gsl_vector_set(V, 1, e);
		gsl_vector_set(V, 2, e);
		gsl_vector_set(V, 3, e);
		gsl_vector_set(V, 4, e);
		gsl_vector_set(V, 5, e);
		gsl_vector_set(V, 6, e);
		gsl_vector_mul(L, V);
	 }
  }
  // all fixed (no STAR) => set 0.5 to 1.0
  if(b_da_msb_is_fixed && b_db_msb_is_fixed && b_dc_msb_is_fixed) { 
	 gsl_vector_set_all(V, 1.0);
	 double e = 2.0;
	 gsl_vector_set(V, 1, e);
	 gsl_vector_set(V, 2, e);
	 gsl_vector_set(V, 3, e);
	 gsl_vector_set(V, 4, e);
	 gsl_vector_set(V, 5, e);
	 gsl_vector_set(V, 6, e);
	 gsl_vector_mul(L, V);
  }
  gsl_vector_free(V);
}


/* --- */
// 
// If (-,*) or (*,-) set to (-,-), otherwise leave (*,*)
// 
void xdp_add_input_dset_to_output_dset_old(const diff_set_t da_set, 
														 const diff_set_t db_set,
														 diff_set_t dc_set[2])
{
  for(uint32_t j = 0; j <= 1; j++) {

	 dc_set[j].diff = 0;
	 dc_set[j].fixed = 0;

	 for(uint32_t i = 0; i < WORD_SIZE; i++) {

		uint32_t da_diff_i = (da_set.diff >> i) & 1; 
		uint32_t da_fixed_i = (da_set.fixed >> i) & 1;
		uint32_t db_diff_i = (db_set.diff >> i) & 1; 
		uint32_t db_fixed_i = (db_set.fixed >> i) & 1;

		uint32_t da_diff_prev_i = 0;
		uint32_t da_fixed_prev_i = 0;
		uint32_t db_diff_prev_i = 0;
		uint32_t db_fixed_prev_i = 0;
		uint32_t dc_diff_prev_i = 0;
		uint32_t dc_fixed_prev_i = 0;

		if(i > 0) {
		  da_diff_prev_i = (da_set.diff >> (i - 1)) & 1; 
		  da_fixed_prev_i = (da_set.fixed >> (i - 1)) & 1;
		  db_diff_prev_i = (db_set.diff >> (i - 1)) & 1; 
		  db_fixed_prev_i = (db_set.fixed >> (i - 1)) & 1;
		  dc_diff_prev_i = (dc_set[j].diff >> (i - 1)) & 1; 
		  dc_fixed_prev_i = (dc_set[j].fixed >> (i - 1)) & 1;
		}
		if((da_fixed_i == STAR) && (db_fixed_i == STAR)) { // (*,*)
		  dc_set[j].diff |= (j << i);
		  dc_set[j].fixed |= (FIXED << i);
		} 
		if((da_fixed_i == FIXED) && (db_fixed_i == STAR) && !(is_eq(da_diff_prev_i, db_diff_prev_i, dc_diff_prev_i))) { // (-,*)
		  dc_set[j].diff |= (da_diff_i << i);
		  if(i == 0) {
			 dc_set[j].fixed |= (FIXED << i);
		  } else {
			 dc_set[j].fixed |= (STAR << i);
		  }
		} 
		if((da_fixed_i == STAR) && (db_fixed_i == FIXED) && !(is_eq(da_diff_prev_i, db_diff_prev_i, dc_diff_prev_i))) { // (*,-)
		  dc_set[j].diff |= (db_diff_i << i);
		  if(i == 0) {
			 dc_set[j].fixed |= (FIXED << i);
		  } else {
			 dc_set[j].fixed |= (STAR << i);
		  }
		} 
		if((i > 0) &&
			//		  (((da_fixed_i == FIXED) && (db_fixed_i == STAR)) ||
			//		  ((da_fixed_i == STAR) && (db_fixed_i == FIXED))) &&
		  ((da_fixed_prev_i == FIXED) && (db_fixed_prev_i == FIXED) && (dc_fixed_prev_i == FIXED)) &&
		  (is_eq(da_diff_prev_i, db_diff_prev_i, dc_diff_prev_i))) { // (-,*)

		  uint32_t dc_i = da_diff_i ^ db_diff_i ^ da_diff_prev_i;
		  dc_set[j].diff |= (dc_i << i);
		  dc_set[j].fixed |= (FIXED << i);
		} else {
		  if((da_fixed_i == FIXED) && (db_fixed_i == FIXED)) { // (-,-)

			 if(i == 0) {				  // LSB
				dc_set[j].diff |= ((da_diff_i ^ db_diff_i) << i);
				dc_set[j].fixed |= (FIXED << i);
			 } else {
				if(da_diff_i == db_diff_i) {
				  dc_set[j].diff |= (da_diff_i << i);
				  dc_set[j].fixed |= (FIXED << i);
				} else {
				  dc_set[j].diff |= (j << i);
				  dc_set[j].fixed |= (STAR << i);
				}
			 }

		  }
		} 
	 }
  } // j
}


/* --- */

			 double p[2] = {0.0, 0.0};
			 double pp = 0.0;
			 for(uint32_t j = 0; j < 2; j++) {
				xdp_add_input_diff_to_output_dset(da[j], db[j], &dc_set[j]);
#if 1									  // DEBUG
				printf("\ndc%d = ", j);
				xdp_add_dset_print_set(dc_set[j]);
				printf("\n");
#endif
				diff_set_t da_set_temp = {da[j],0};
				diff_set_t db_set_temp = {db[j],0};
				double pp_temp = xdp_add_dset(AA, da_set_temp, db_set_temp, dc_set[j]);
				printf("[%s:%d]pp[%d] = %f\n", __FILE__, __LINE__, j, pp_temp);
				if(pp_temp > pp) {
				  pp = pp_temp;
				}

				p[j] = xdp_add_dset(AA, da_set, db_set, dc_set[j]);
#if 1								  // DEBUG
				printf("[%s:%d] p[%d] = %f\n", __FILE__, __LINE__, j, p[j]);
#endif
				//				p[j] = pp;
			 }


/* --- */
void test_xdp_add_dc_set_is_max()
{
  gsl_matrix* A[2][2][2];
  xdp_add_alloc_matrices(A);
  xdp_add_sf(A);
  xdp_add_normalize_matrices(A);

  diff_set_t da_set = {0,0};
  diff_set_t db_set = {0,0};
  uint32_t da[2] = {0,0};
  uint32_t db[2] = {0,0};


  for(uint32_t d1 = 0; d1 < ALL_WORDS; d1++) {
	 for(uint32_t f1 = 0; f1 < ALL_WORDS; f1++) {
		for(uint32_t d2 = 0; d2 < ALL_WORDS; d2++) {
		  for(uint32_t f2 = 0; f2 < ALL_WORDS; f2++) {

			 da_set.diff = d1;
			 da_set.fixed = f1;
			 db_set.diff = d2;
			 db_set.fixed = f2;

			 xdp_add_input_dsets_to_diffs(da_set, db_set, da, db);
#if 1
			 printf("[%s:%d] Input sets: da (%8X,%8X), db (%8X,%8X)\n", 
					  __FILE__, __LINE__, da_set.diff, da_set.fixed, db_set.diff, db_set.fixed);
			 printf("[%s:%d] Input diffs: 0:(%8X,%8X), 1:(%8X,%8X)\n",
					  __FILE__, __LINE__, da[0], db[0], da[1], db[1]);
#endif
			 diff_set_t dc_set[2] = {{0,0}};
			 uint32_t dc_set_len[2] = {0};

			 double p[2] = {0.0, 0.0};
			 for(uint32_t j = 0; j < 2; j++) {
				xdp_add_gen_output_dset(da[j], db[j], &dc_set[j]);

				std::vector<uint32_t> dc_set_all;
				xdp_add_diff_set_to_diff_all(dc_set[j], &dc_set_all);

				dc_set_len[j] = dc_set_all.size();

				std::vector<uint32_t>::iterator vec_iter;
				for(vec_iter = dc_set_all.begin(); vec_iter != dc_set_all.end(); vec_iter++) {
				  uint32_t dc_i = *vec_iter;
				  double p_i = xdp_add(A, da[j], db[j], dc_i);
#if 0								  // DEBUG
				  printf("[%s:%d] XDP_ADD[(%8X,%8X)->%8X] = %6.5f\n", 
							__FILE__, __LINE__, da[j], db[j], dc_i, p_i);
#endif
				  assert(p_i != 0.0);
				  p[j] += p_i;
				}
#if 1								  // DEBUG
				printf("[%s:%d] p[%d] = %f\n", __FILE__, __LINE__, j, p[j]);
#endif
			 }

			 double p_max = 0.0;
			 p_max = std::max(p[0],p[1]);

			 diff_set_t dc_set_out = {0,0};
			 uint32_t hw0 = hw32(da[0] ^ db[0]);
			 uint32_t hw1 = hw32(da[1] ^ db[1]);
			 if(hw0 > hw1) {
				dc_set_out = {dc_set[1].diff, dc_set[1].fixed};
				assert(p[0] < p[1]);
			 }
			 if(hw0 < hw1) {
				dc_set_out = {dc_set[0].diff, dc_set[0].fixed};
				assert(p[0] > p[1]);
			 }
			 if(hw0 == hw1) {
				if(dc_set_len[0] >= dc_set_len[1]) {
				  dc_set_out = {dc_set[0].diff, dc_set[0].fixed};
				} else {
				  dc_set_out = {dc_set[1].diff, dc_set[1].fixed};
				}
			 }

			 std::vector<uint32_t> da_set_all;
			 xdp_add_diff_set_to_diff_all(da_set, &da_set_all);
			 std::vector<uint32_t>::iterator da_iter = da_set_all.begin();

			 std::vector<uint32_t> db_set_all;
			 xdp_add_diff_set_to_diff_all(db_set, &db_set_all);
			 std::vector<uint32_t>::iterator db_iter = db_set_all.begin();

			 for(da_iter = da_set_all.begin(); da_iter != da_set_all.end(); da_iter++) {
				for(db_iter = db_set_all.begin(); db_iter != db_set_all.end(); db_iter++) {
				  uint32_t da_i = *da_iter;
				  uint32_t db_i = *db_iter;

				  diff_set_t dc_set_i = {0,0};
				  xdp_add_gen_output_dset(da_i, db_i, &dc_set_i);
				  std::vector<uint32_t> dc_set_all_i;
				  xdp_add_diff_set_to_diff_all(dc_set_i, &dc_set_all_i);

				  double p = 0.0;
				  std::vector<uint32_t>::iterator vec_iter;
				  for(vec_iter = dc_set_all_i.begin(); vec_iter != dc_set_all_i.end(); vec_iter++) {
					 uint32_t dc_i = *vec_iter;
					 double p_i = xdp_add(A, da_i, db_i, dc_i);
#if 0									  // DEBUG
					 printf("[%s:%d] XDP_ADD[(%8X,%8X)->%8X] = %6.5f\n", 
							  __FILE__, __LINE__, da_i, db_i, dc_i, p_i);
#endif
					 assert(p_i != 0.0);
					 p += p_i;
				  }
				  if(p > p_max) {
					 printf("[%s:%d] p_max %f, p %f ", __FILE__, __LINE__, p_max, p);
					 printf("%8X %8X -> {%8X,%8X} vs. {%8X,%8X}\n", da_i, db_i, dc_set_i.diff, dc_set_i.fixed, dc_set_out.diff, dc_set_out.fixed);
				  }
				  //				  assert(p <= p_max);
				}
			 }

		  }
		}
	 }
  }

  xdp_add_free_matrices(A);
}

/* --- */

#if 0
				  uint32_t da_msb_star = (da_set.fixed >> (WORD_SIZE - 1)) & 1;
				  if(da_msb_star == FIXED) {
					 da_set.fixed ^= 1 << (WORD_SIZE - 1); 
				  } 
				  uint32_t db_msb_star = (db_set.fixed >> (WORD_SIZE - 1)) & 1;
				  if(db_msb_star == FIXED) {
					 db_set.fixed ^= 1 << (WORD_SIZE - 1); 
				  } 
				  uint32_t dc_msb_star = (dc_set.fixed >> (WORD_SIZE - 1)) & 1;
				  if(dc_msb_star == FIXED) {
					 dc_set.fixed ^= 1 << (WORD_SIZE - 1); 
				  } 
#endif


/* --- */

		  if(pos == 0) {			  // LSB
			 bool b_is_valid = ((da_0 ^ db_0 ^ dc_0) == 0);
			 if(b_is_valid) {
				uint32_t idx = (dc_0 << 2) | (db_0 << 1) | da_0;
				assert((idx == 0)||(idx == 3)||(idx == 5)||(idx == 6));
				double val = 1.0;
				gsl_vector_set(C, idx, val);
			 }
		  } else {
			 uint32_t idx = (dc_0 << 2) | (db_0 << 1) | da_0;
			 double val = 1.0;
			 gsl_vector_set(C, idx, val);
		  }
		}

/* --- */

void xdp_add_dset_gen_matrices_msb(gsl_matrix* A[2][2][2])
{
  for(int i = 0; i < XDP_ADD_DSET_MSIZE; i++) {
	 int x = i;
	 int da_in = x & 1;
	 x /= 2;
	 int db_in = x & 1;
	 x /= 2;
	 int dc_in = x & 1;
	 x /= 2;

	 //	 printf("[%s:%d] %d = (%d,%d,%d)\n", __FILE__, __LINE__, i, da_in, db_in, dc_in);
	 for(int j = 0; j < XDP_ADD_DSET_MSIZE; j++) {
		int y = j;
		int da_out = y & 1;
		y /= 2;
		int db_out = y & 1;
		y /= 2;
		int dc_out = y & 1;
		y /= 2;

		double e = 0.0;
		// 
		// An xdp-add differential is possible if:
		// da[i] = db[i] = dc[i] => da[i+1] ^ db[i+1] ^ dc[i+1] ^ da[i] = 0
		// 
		bool b_is_possible = ((is_eq(da_in, db_in, dc_in) & 
									  (da_out ^ db_out ^ dc_out ^ db_in)) == 0);
#if 0
		if(b_is_possible) {
		  e = 1.0;
		}
#endif
#if 1
		if(b_is_possible) {
		  //		  if((!is_eq(da_out, db_out, dc_out))) { // not equal
		  if((!is_eq(da_in, db_in, dc_in))) { // not equal
			 e = 0.5;
		  } else {
			 e = 1.0;
		  }
		}
#endif
		uint32_t col = i;
		uint32_t row = j;
		gsl_matrix_set(A[da_in][db_in][dc_in], row, col, e);
		//		uint32_t dc_in_flip = dc_in ^ 1;
		//		gsl_matrix_set(A[da_in][db_in][dc_in_flip], row, col, e);
		//		printf("[%s:%d] %d%d%d: in(%d)->out(%d)\n", __FILE__, __LINE__, da_in, db_in, dc_in, col, row);
	 }
  }
}


/* --- */

  if((!b_da_is_fixed) && (i == (WORD_SIZE - 1))) {
	 gsl_vector* V = gsl_vector_calloc(XDP_ADD_DSET_MSIZE);
	 double e = 0.5;
	 gsl_vector_set(V, 0, e);
	 gsl_vector_set(V, 0, e);
	 gsl_vector_free(V);
  } 

/* --- */
	 if(i == (WORD_SIZE - 1)) {	  // MSB
		for(uint32_t j = 1; j < 7; j++) {
		  double e = gsl_vector_get(R, j);
		  e *= 2.0;
		  gsl_vector_set(R, j, e);
		}
	 }

/* --- */
	 uint32_t da_i = (da_set.diff >> i) & 1;
	 uint32_t db_i = (db_set.diff >> i) & 1;
	 uint32_t dc_i = (dc_set.diff >> i) & 1;

	 bool b_da_is_fixed = ((da_set.fixed & 1) == FIXED);
	 bool b_db_is_fixed = ((db_set.fixed & 1) == FIXED);
	 bool b_dc_is_fixed = ((dc_set.fixed & 1) == FIXED);

/* --- */

void xdp_add_dset_print_matrices_sage(gsl_matrix* A[2][2][2])
{
  printf("# [%s:%d] Matrices for XDP-ADD generated with %s() \n", __FILE__, __LINE__, __FUNCTION__);

  printf("#--- Normalization factor --- \n");
  printf("f = %f\n", XDP_ADD_DSET_NORM);

  // print L
  gsl_vector* L = gsl_vector_calloc(XDP_ADD_DSET_MSIZE);
  gsl_vector_set_all(L, 1.0);
  printf("#--- Vector L --- \n");
  printf("L = vector(QQ,[ ");
  for(int col = 0; col < XDP_ADD_DSET_MSIZE; col++){
	 double e = gsl_vector_get(L, col);
	 printf("%4.3f", e);
	 if(col == XDP_ADD_DSET_MSIZE - 1) {
		printf(" ");
	 } else {
		printf(", ");
	 }
  }
  printf("])\n\n");

  // print C
  gsl_vector* C = gsl_vector_calloc(XDP_ADD_DSET_MSIZE);
  gsl_vector_set_zero(C);
  gsl_vector_set(C, XDP_ADD_DSET_ISTATE, 1.0);
  printf("#--- Vector C --- \n");
  printf("C = vector(QQ,[ ");
  for(int col = 0; col < XDP_ADD_DSET_MSIZE; col++){
	 double e = gsl_vector_get(C, col);
	 printf("%4.3f", e);
	 if(col == XDP_ADD_DSET_MSIZE - 1) {
		printf(" ");
	 } else {
		printf(", ");
	 }
  }
  printf("])\n\n");

  // print A
  for(int i = 0; i < XDP_ADD_DSET_NMATRIX; i++){
	 int a = (i >> 0) & 1;
	 int b = (i >> 1) & 1;
	 int c = (i >> 2) & 1;
	 printf("#---AA%d%d%d--- \n", c, b, a);
	 printf("AA%d%d%d = matrix(QQ,%d,%d,[\n", c, b, a, XDP_ADD_DSET_MSIZE, XDP_ADD_DSET_MSIZE);
	 for(int row = 0; row < XDP_ADD_DSET_MSIZE; row++){
		for(int col = 0; col < XDP_ADD_DSET_MSIZE; col++){
		  double e = gsl_matrix_get(A[a][b][c], row, col);
		  printf("%3.2f", e);
		  if((row == XDP_ADD_DSET_MSIZE - 1) && (col == XDP_ADD_DSET_MSIZE - 1)) {
			 printf(" ");
		  } else {
			 printf(", ");
		  }
		}
		printf("\n");
	 }
	 printf("])\n\n");
	 //	 printf("\n");
  }
  for(int i = 0; i < XDP_ADD_DSET_NMATRIX; i++){
	 int a = (i >> 0) & 1;
	 int b = (i >> 1) & 1;
	 int c = (i >> 2) & 1;
	 printf("A%d%d%d = f * AA%d%d%d\n", c, b, a, c, b, a);
  }
  printf("\n");
  printf("A = [A000, A001, A010, A011, A100, A101, A110, A111]\n");
  printf("\n");
  printf("AA = [AA000, AA001, AA010, AA011, AA100, AA101, AA110, AA111]\n");
}

/* --- */
		if(is_eq(da_this, db_this, dc_this) && ()) {
		  fixed_this = 1;
		}


/* --- */

/**
 * Generating a set of non-zero probability outout differences
 * Based on \ref max_adp_add_lm .
 */
void xdp_add_dc_set(uint32_t da, uint32_t db, diff_set_t* dc_set)
{
  uint32_t n = WORD_SIZE;
  uint32_t dc = 0;

  // if fixed[i] = 1, dc[i] can be anything, if fixed[i] = 0, dc[i] is fixed
  uint32_t fixed = 0;

  dc |= (da & 1) ^ (db & 1);

  for(uint32_t i = 1; i < n; i++) {

	 uint32_t da_prev = (da >> (i - 1)) & 1;
	 uint32_t db_prev = (db >> (i - 1)) & 1;
	 uint32_t dc_prev = (dc >> (i - 1)) & 1;
	 uint32_t da_this = (da >> i) & 1;
	 uint32_t db_this = (db >> i) & 1;
	 uint32_t dc_this = 0;		  // to be determined
	 uint32_t fixed_this = 0;		  // is this bit fixeded or no
	 if(is_eq(da_prev, db_prev, dc_prev)) {
		dc_this = (da_this ^ db_this ^ da_prev);
		fixed_this = 0;				  // fixed
	 } else {
#if 0
		if((i == (n-1)) || (da_this != db_this)) {
		  dc_this = 0;
		  fixed_this = 1;			  // can be 0/1 
		} else {
		  dc_this = da_this;
		  fixed_this = 0;				  // fixed
		}
#else
		dc_this = 0;
		fixed_this = 1;			  // can be 0/1 
#endif
	 }
	 dc |= (dc_this << i);
	 fixed |= (fixed_this << i);
  }

#if 0									  // DEBUG
  printf("[%s:%d] %8X %8X (%8X %8X)\n", __FILE__, __LINE__, da, db, dc, fixed);
#endif

  dc_set->diff = dc;
  dc_set->fixed = fixed;
}

/* --- */

		for(uint32_t i = 0; i < WORD_SIZE; i++) {
		  uint32_t t = (dc_set.fixed >> i) & 1;
		  if(t == 0)
			 continue;

		  uint32_t dc_new = dc_set.diff | (1 << i);
		  double pp = xdp_add_lm(da, db, dc_new);
#if 0									  // DEBUG
		  printf("[%s:%d] %8X %8X (%8X %8X) %f\n", __FILE__, __LINE__, da, db, dc_new, dc_set.fixed, pp);
#endif
		  assert(pp != 0.0);

		  p += pp;

		}


/* --- */
		  if((double)i_pos >= logN) {
			 printf("[%s:%d] %d %f %d\n", __FILE__, __LINE__, i_pos, logN, N);
		  }

/* --- */

  printf(" a[1] = ");
  print_binary(a[1]);
  printf("\n");
  printf("~a[1] = ");
  print_binary(~a[1]);

/* --- */
			 if(i == 1) {
				if(chi_prev == 1) {
				  //				  g |= (star << i);
				  g |= (0 << i);			  // fixed
				  dc |= (da_this << i);			  // dc[i] = da[i] = db[i]
				} else {
				  g |= (0 << i);			  // fixed
				  dc |= (da_this << i);			  // dc[i] = da[i] = db[i]
				}
			 } else {
				dc |= (da_this ^ db_this ^ dc_prev) << i;
			 }

/* --- */

/**
 * Constructing a set of output differences for xdp-add.
 */
void test_xdp_add_gamma_set()
{
  gsl_matrix* A[2][2][2];
  xdp_add_alloc_matrices(A);
  xdp_add_sf(A);
  xdp_add_normalize_matrices(A);

  //  uint32_t da = 0x3F;//random32() & MASK;
  //  uint32_t db = 0x3F;//random32() & MASK;
  for(uint32_t da = 0; da < ALL_WORDS; da++) 
	 {
		for(uint32_t db = 0; db < ALL_WORDS; db++) 
		  {

		uint32_t chi = (da ^ db);

		// if g[i] = 1, dc[i] can be anything i.e. g[i] == *
		uint32_t g = 0;
		uint32_t star = 1;

		uint32_t dc = 0;

		dc |= (da & 1) ^ (db & 1);	  // dc[0] = da[0] ^ db[0]

		//				g |= (star << 0);		  // *

		for(uint32_t i = 1; i < WORD_SIZE; i++) {

		  uint32_t dc_prev = (dc >> (i - 1)) & 1;
		  uint32_t g_prev = (g >> (i - 1)) & 1;
		  uint32_t chi_this = (chi >> i) & 1;
		  uint32_t da_this = (da >> i) & 1;
		  uint32_t db_this = (db >> i) & 1;
		  uint32_t da_prev = (da >> (i - 1)) & 1;
		  uint32_t db_prev = (db >> (i - 1)) & 1;

		  if((g_prev != star) || (i == 1)) { // dc[i] = da[i] ^ db[i] ^ dc[i-1]
			 dc |= (da_this ^ db_this ^ dc_prev) << i;
#if 0
			 if(i == 1) {
				if((da_prev != db_prev) || (da_prev != dc_prev)) {
				  if(chi_this == 0) {
					 g |= (0 << i);			  // fixed
					 dc |= (da_this << i);			  // dc[i] = da[i] = db[i]
				  } else {
					 //					 g |= (star << i);		  // *
					 //					 dc |= (0 << i);			  // dc[i] = *x
				  }
				}
			 }
#endif
		  } else {
			 //			 assert(1 == 0);
			 if(chi_this == 1) {
				g |= (star << i);		  // *
				dc |= (0 << i);			  // dc[i] = *
			 } else {
				g |= (0 << i);			  // fixed
				dc |= (da_this << i);			  // dc[i] = da[i] = db[i]
			 }
		  }
		}

		//		g |= (0xFFFFFFFE & MASK);

		double p = xdp_add(A, da, db, dc);
		printf("[%s:%d] %8X %8X (%8X %8X) %f\n", __FILE__, __LINE__, da, db, dc, g, p);
		assert(p != 0.0);

		for(uint32_t i = 0; i < WORD_SIZE; i++) {

		  uint32_t t = (g >> i) & 1;
		  if(t == 0)
			 continue;

		  uint32_t dc_new = dc ^ (1 << i);
		  double pp = xdp_add(A, da, db, dc_new);
		  printf("[%s:%d] %8X %8X (%8X %8X) %f\n", __FILE__, __LINE__, da, db, dc_new, g, pp);
		  assert(pp != 0.0);

		  p += pp;
		}
#if 0
		print_binary(da);
		printf("\n");
		print_binary(db);
		printf("\n");
		print_binary(dc);
		printf("\n");
#endif
		double p_max = max_xdp_add(A, da, db, &dc);
		if(p > p_max) {
		  printf("%f %f\n", p, p_max);
		}
		//		printf("Total: %f, max (%f %8X)\n", p, p_max, dc);
#if 0
		print_binary(da);
		printf("\n");
		print_binary(db);
		printf("\n");
		print_binary(dc);
		printf("\n");
#endif
	 }
  }

  xdp_add_free_matrices(A);
}


/* --- */

#if 0
		  if(i == 1) {
			 if(da_prev != db_prev) {
				g_prev = star;				  // *
				g |= (star << (i - 1));		  // *
			 }
		  }
#endif
//		g = g & 0xE;						  // set the LSB to 0


/* --- */

/**
 * Test if the condition for a non-zero probability ADP-XOR differential
 * (cf. Theorem 2, Wallen) is valid for XDP-ADD
 */
void test_xdp_add_nonzero_all()
{
  printf("[%s:%d] Running test %s() ...\n", __FILE__, __LINE__, __FUNCTION__);

  gsl_matrix* A[2][2][2];
  xdp_add_alloc_matrices(A);
  xdp_add_sf(A);
  xdp_add_normalize_matrices(A);

  //  uint32_t q = random32() % WORD_SIZE; // initial bit position
  //		uint32_t x = (random32() % 2);
  //		uint32_t y = (random32() % 2);
  for(uint32_t q = 0; q < WORD_SIZE; q++) {

	 for(uint32_t r = 1; r < 8; r++) { // skip x = y = 0

		const uint32_t x = (r >> 0) & 1;
		const uint32_t y = (r >> 1) & 1;
		const uint32_t z = (r >> 2) & 1;

		printf("\n[%s:%d] --- q = %2d | %d %d %d ---\n", __FILE__, __LINE__, q, x, y, z);
		uint32_t cnt_all = 0;

		uint64_t N = (1ULL << (WORD_SIZE - q - 1)); // bits da[n-1:q+1]
		for(uint32_t i = 0; i < N; i++) {
		  for(uint32_t j = 0; j < N; j++) {
			 uint32_t cnt_o = 0;	  // output diffs
			 for(uint32_t k = 0; k < N; k++) {

				uint32_t da, db, dc;
				da = db = dc = 0;
				da |= (x << q);					  // da[q:0] = da[q] | 0*
				db |= (y << q);					  // db[q:0] = db[q] | 0*
				dc |= (z << q);					  // dc[q:0] = dc[q] | 0*

				da |= (i << (q+1));	  // da[n-1:q+1]
				db |= (j << (q+1));	  // db[n-1:q+1]
				dc |= (k << (q+1));	  // dc[n-1:q+1]

#if 0
				printf("%10d ", cnt_all);
				print_binary(da);
				print_binary(db);
				print_binary(dc);
				print_binary(dc_unaf);
				printf("\n");
#endif
				double p = xdp_add(A, da, db, dc);
				if(p != 0.0) {
				  cnt_all++;
				  cnt_o++;
				}
			 }
			 uint32_t tot_o_th = std::pow(2, (WORD_SIZE - q - 1));
			 printf("[%s:%d]Total out: %d %d (2^%f)\n", __FILE__, __LINE__, cnt_o, tot_o_th, log2(cnt_o));
		  }
		}
		uint32_t tot_th = std::pow(2, (3 * (WORD_SIZE - q - 1)));
		printf("[%s:%d]Total: %d %d (2^%f)\n", __FILE__, __LINE__, cnt_all, tot_th, log2(cnt_all));
		//		printf("\n[%s:%d] q = %2d | %d %d %d | total: %d (2^%f) | %d\n", __FILE__, __LINE__, q, x, y, z, cnt_all, log2(cnt_all), tot_th);
	 }

  }
  xdp_add_free_matrices(A);
  printf("[%s:%d] WORD_SIZE = %d. Test %s() OK.\n", __FILE__, __LINE__, WORD_SIZE, __FUNCTION__);
}


/* --- */

/**
 * Test if the condition for a non-zero probability ADP-XOR differential
 * (cf. Theorem 2, Wallen) is valid for XDP-ADD
 */
void test_xdp_add_nonzero_all()
{
  printf("[%s:%d] Running test %s() ...\n", __FILE__, __LINE__, __FUNCTION__);

  gsl_matrix* A[2][2][2];
  xdp_add_alloc_matrices(A);
  xdp_add_sf(A);
  xdp_add_normalize_matrices(A);

  //  uint32_t q = random32() % WORD_SIZE; // initial bit position
  //		uint32_t x = (random32() % 2);
  //		uint32_t y = (random32() % 2);
  for(uint32_t q = 0; q < WORD_SIZE; q++) {

	 for(uint32_t r = 1; r < 4; r++) { // skip x = y = 0

		const uint32_t x = r & 1;
		const uint32_t y = (r >> 1) & 1;
		const uint32_t z = x ^ y;

		printf("\n[%s:%d] --- q = %2d | %d %d %d ---\n", __FILE__, __LINE__, q, x, y, z);
		uint32_t cnt_all = 0;

		uint64_t N = (1ULL << (WORD_SIZE - q - 1)); // bits da[n-1:q+1]
		for(uint32_t i = 0; i < N; i++) {
		  for(uint32_t j = 0; j < N; j++) {
			 uint32_t cnt_o = 0;	  // output diffs
			 for(uint32_t k = 0; k < N; k++) {

				uint32_t da, db, dc;
				da = db = dc = 0;
				da |= (x << q);					  // da[q:0] = da[q] | 0*
				db |= (y << q);					  // db[q:0] = db[q] | 0*
				dc |= (z << q);					  // dc[q:0] = dc[q] | 0*

				da |= (i << (q+1));	  // da[n-1:q+1]
				db |= (j << (q+1));	  // db[n-1:q+1]
				dc |= (k << (q+1));	  // dc[n-1:q+1]

				bsd_t dc_naf = naf(dc);
				uint32_t dc_unaf = dc_naf.val;

#if 0
				printf("%10d ", cnt_all);
				print_binary(da);
				print_binary(db);
				print_binary(dc);
				print_binary(dc_unaf);
				printf("\n");
#endif
				double p = xdp_add(A, da, db, dc);
				assert(p != 0.0);
				cnt_all++;
				cnt_o++;
			 }
			 uint32_t tot_o_th = std::pow(2, (WORD_SIZE - q - 1));
			 printf("[%s:%d]Total out: %d %d (2^%f)\n", __FILE__, __LINE__, cnt_o, tot_o_th, log2(cnt_o));
			 assert(tot_o_th == cnt_o);
		  }
		}
		uint32_t tot_th = std::pow(2, (3 * (WORD_SIZE - q - 1)));
		printf("[%s:%d]Total: %d %d (2^%f)\n", __FILE__, __LINE__, cnt_all, tot_th, log2(cnt_all));
		assert(tot_th == cnt_all);
		//		printf("\n[%s:%d] q = %2d | %d %d %d | total: %d (2^%f) | %d\n", __FILE__, __LINE__, q, x, y, z, cnt_all, log2(cnt_all), tot_th);
	 }

  }
  xdp_add_free_matrices(A);
  printf("[%s:%d] WORD_SIZE = %d. Test %s() OK.\n", __FILE__, __LINE__, WORD_SIZE, __FUNCTION__);
}


/* --- */
				printf("[%s:%d] q=%d |  %d %d %d\n", __FILE__, __LINE__, q, x, y, z);

				printf("[%s:%d] %d %d %d\n", __FILE__, __LINE__, da, db, dc);


/* --- */
void test_max_adp_arx(uint32_t N)
{
  printf("[%s:%d] Running test %s() ...\n", __FILE__, __LINE__, __FUNCTION__);

  gsl_matrix* A[2][2][2][2];
  adp_arx_alloc_matrices(A);
  adp_arx_sf(A);
  adp_arx_normalize_matrices(A);

  for(uint32_t i = 0; i < N; i++) {

	 uint32_t r = random32() % WORD_SIZE;
	 uint32_t da = random32() & MASK;
	 uint32_t db = random32() & MASK;
	 uint32_t dd = random32() & MASK;
	 uint32_t de_max = 0;

	 double p1 = max_adp_arx(A, r, da, db, dd, &de_max);
	 double p2 = adp_arx(A, r, da, db, dd, de_max);
	 assert((p2 >= 0.0) && (p2 <= 1.0));

#if 1
	 printf("[%s:%d] ADP_ARX_MAX[(%2d|%8X,%8X,%8X)->%8X] = %6.5f (2^%f)\n", 
			  __FILE__, __LINE__, r, da, db, dd, de_max, p1, log2(p1));
	 printf("[%s:%d] ADP_ARX_THE[(%2d|%8X,%8X,%8X)->%8X] = %6.5f (2^%f)\n", 
			  __FILE__, __LINE__, r, da, db, dd, de_max, p2, log2(p2));
#else
	 printf("\r[%s:%d] %2d / %2d | %2d %f %f", __FILE__, __LINE__, r, WORD_SIZE, r, p1, p2);
	 fflush(stdout);
#endif
	 assert(p1 == p2);
  }

  adp_arx_free_matrices(A);
  printf("\n");
  printf("[%s:%d] WORD_SIZE = %d. Test %s() OK.\n", __FILE__, __LINE__, WORD_SIZE, __FUNCTION__);
}


/* --- */

/**
 * Compute an \em upper \em bound \f$B[k][i]\f$ on the maximum probability 
 * of the differential \f$(dc[n-1:k], dd[n-1:k] \rightarrow de[n-1:k])\f$,
 * where \f$dc = da + db\f$ and \f$da, db\f$ are the inputs to \ref ADD in \ref ARX, 
 * starting from initial state \p i of the S-function i.e.
 * \f$\mathrm{dp}(dc[n-1:k],dd[n-1:k] \rightarrow de[n-1:k]) = 
 * L A_{n-1} A_{n-2} \ldots A_{k} C^{i}_{k-1}\f$,
 * given the upper bounds \f$B[k][i]\f$ on the probabilities of the differentials
 * \f$(dc[n-1:j], dd[n-1:j] \rightarrow de[n-1:j])\f$ for \f$j = k+1, k+2, \ldots, n-1\f$,
 * where \f$L = [1~1~\ldots~1]\f$ is a row vector of size \p A_size and \f$C^{i}_{k-1}\f$ 
 * is a unit column vector of size \p A_size with 1 at position \f$i\f$
 * and \f$C^{i}_{-1} = C\f$.
 *
 * \note Note that \f$dc = da + db\f$, where \f$da, db\f$ are the inputs to
 *       \ref ADD in the \ref ARX operation so that the DP of \ref ARX is:
 *       \f$\mathrm{adp}^{\mathrm{ARX}}(da,db,dd \rightarrow de)\f$.
 *       
 * \param i index of the state of the S-function: \p A_size \f$> i \ge 0\f$.
 * \param k current bit position: \f$ n > k \ge 0\f$.
 * \param n word size.
 * \param lrot_const left-rotatoin constant.
 * \param p the estimated probability at bit position \p k.
 * \param de output difference.
 * \param A transition probability matrices.
 * \param B array of size \p A_size rows by (\p n + 1) columns containing upper bounds on the 
 *        maximum probabilities of all \p j bit differentials \f$n \ge j \ge 1\f$
 *        beginning from any state \p i: \p A_size \f$> i \ge 0\f$.
 * \param C unit row vector of size \p A_size rows, initialized with 1 at state index \p i.
 * \param dc first input difference.
 * \param dd second input difference.
 * \param de_max maximum probability output difference.
 * \param p_max the maximum probability.
 *
 * \b Algorithm \b Outline:
 *
 * Recursively assign values to the bits of the output difference \p dc starting 
 * at bit popsition \f$j = k\f$ and terminating at bit position \p n. The recursion 
 * proceeds to bit postion \f$j + 1\f$ only if the  probability \f$p_j\f$ of the 
 * partially constructed differential \f$(dc[j:k], dd[j:k] \rightarrow de[j:k])\f$ 
 * multiplied by the bound of the probability until the end \f$B[j+1]\f$ is bigger than 
 * the best probability found so far i.e. if:
 * \f$\sum_{s} B[s][j+1] A_{j} A_{j-1} \ldots A_{k} C^{i}_{k-1} > p_{\mathrm{max}}\f$.
 * When \f$j = n\f$ update the max.: 
 * \f$p_{\mathrm{max}} \leftarrow p_{n-1} = 
 * \mathrm{dp}(dc[n-1:k],dd[n-1:k] \rightarrow de[n-1:k])\f$.
 *
 * \note Note that since \f$dc = da + db\f$, where \f$da, db\f$ are the inputs to
 *       \ref ADD in the \ref ARX operation so that the DP of \ref ARX is:
 *       \f$\mathrm{max}_{de}~\mathrm{dp}(dc[n-1:k],dd[n-1:k] \rightarrow de[n-1:k]) = 
 *       \mathrm{max}_{de}~\mathrm{adp}^{\mathrm{ARX}}
 *       (da[n-1:k],db[n-1:k],dd[n-1:k] \rightarrow de[n-1:k])\f$.
 */
void max_adp_arx_i(const uint32_t k, const uint32_t n, 
						 const uint32_t lrot_const, double p_is[ADP_ARX_NISTATES], double* p, uint32_t* de, 
						 gsl_matrix* A[2][2][2][2], gsl_vector* B[ADP_ARX_NISTATES][WORD_SIZE + 1], gsl_vector* C[ADP_ARX_NISTATES],  
						 const uint32_t dc, const uint32_t dd, uint32_t* de_max, 
						 double p_max_is[ADP_ARX_NISTATES], double* p_max)
{
  if(k == n) {
	 assert(*p > *p_max);
#if 0									  // DEBUG
	 printf("[%s:%d] B[%2d] updcte 2^%f -> 2^%f\n", __FILE__, __LINE__, i, log2(*p_max), log2(*p));
#endif
	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
		p_max_is[is] = p_is[is];
	 }
	 *p_max = *p;
	 *de_max = *de;
	 return;
  } 

  uint32_t spos = 0;			  // special position
  uint32_t k_rot = ((k + lrot_const) % WORD_SIZE); // (i+r) mod n
  if(k_rot == 0) {
	 spos = 1;
  }

  // get the k-th bit of dc and  dd
  uint32_t x = (dc >> k) & 1;
  uint32_t y = (dd >> k_rot) & 1;


  // cycle over the possible values of the k-th bits of *de
  for(uint32_t t = 0; t < 2; t++) { 

	 double new_p = 0.0;

	 // temp
	 gsl_vector* R[ADP_ARX_NISTATES];
	 double new_p_is[ADP_ARX_NISTATES];
	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
		R[is] = gsl_vector_calloc(ADP_ARX_MSIZE);
		new_p_is[is] = 0.0;
	 }

	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) { // initial states

		// L A C
		gsl_blas_dgemv(CblasNoTrans, 1.0, A[spos][x][y][t], C[is], 0.0, R[is]);
		gsl_blas_ddot(B[is][k + 1], R[is], &new_p_is[is]);

		new_p += new_p_is[is];

	 }	// is

    // continue only if the probability so far is still bigger than the threshold 
	 if(new_p > *p_max) {
		uint32_t new_de = *de | (t << k);
		max_adp_arx_i(k+1, n, lrot_const, new_p_is, &new_p, &new_de, A, B, R, dc, dd, de_max, p_max_is, p_max);
	 }

	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
		gsl_vector_free(R[is]);
	 }

  } // t

  //  gsl_vector_free(L);
  return;
}

/**
 * Compute an array of bounds that can be used in the computation
 * of the maximum differential probability.
 *
 * \param A transition probability matrices.
 * \param B array of size \p A_size rows by (\p n + 1) columns containing upper bounds on the 
 *        maximum probabilities of all \p j bit differentials \f$n \ge j \ge 1\f$
 *        beginning from any state \p i: \p A_size \f$> i \ge 0\f$.
 * \param lrot_const left-rotatoin constant.
 * \param da first input difference.
 * \param db second input difference.
 * \param dd_max maximum probability output difference.
 *
 * \see max_adp_xor_bounds
 */
void max_adp_arx_bounds(gsl_matrix* A[2][2][2][2], gsl_vector* B[WORD_SIZE + 1], gsl_vector* C[ADP_ARX_NISTATES],
								const uint32_t lrot_const, const uint32_t dc,
								const uint32_t dd, uint32_t* de_max)
{
  // dc is the input to the rotation
  //  uint32_t dc = ADD(da, db);
  gsl_vector* C[ADP_ARX_NISTATES];
  for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
	 C[is] = gsl_vector_calloc(ADP_ARX_MSIZE);
  }
  for(uint32_t k = (WORD_SIZE - 1); k > 0; k--) {

	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {

		for(uint32_t i = 0; i < ADP_ARX_MSIZE; i++) {

		  gsl_vector_set_all(C[is], 0.0);
		  gsl_vector_set(C[is], i, 1.0);

		  uint32_t n = WORD_SIZE;
		  uint32_t de_init = 0;
		  double p_init = 0.0;
		  double p_is_init[ADP_ARX_NISTATES] = {0.0};
		  double p_max = 0.0;
		  double p_max_is[ADP_ARX_NISTATES] = {0.0};
		  max_adp_arx_i(k, n, lrot_const, p_is_init, &p_init, &de_init, A, B, C, dc, dd, de_max, p_max_is, &p_max);

		  gsl_vector_set(B[is][k], i, p_max_is[is]);

#if 0
		  double p_max_i = 0.0;
		  for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
			 if(p_max_is[is] > p_max_i) {
				p_max_i = p_max_is[is];
			 }
			 assert(p_max_is[is] <= 1.0);
		  }
		  for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
			 gsl_vector_set(B[is][k], i, p_max_i);
		  }
#endif
		} // i
	 }	// is
  } // k
  for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
	 gsl_vector_free(C[is]);
  }
}

/**
 * Compute the maximum differential probability over all output differences:
 * \f$\mathrm{max}_{dc}~\mathrm{adp}^{\oplus}(da,db \rightarrow dc)\f$.
 * \b Complexity c: \f$O(n) \le c \le O(2^n)\f$.
 * 
 * \param A transition probability matrices.
 * \param lrot_const left-rotatoin constant.
 * \param da first input difference.
 * \param db second input difference.
 * \param dd_max maximum probability output difference.
 * \return \f$\mathrm{max}_{dc}~\mathrm{adp}^{\oplus}(da,db \rightarrow dc)\f$.
 *
 * \see max_adp_arx_bounds
 */
double max_adp_arx(gsl_matrix* A[2][2][2][2], const uint32_t lrot_const, 
						 const uint32_t da, const uint32_t db, const uint32_t dd, uint32_t* de_max)
{
  // dc is the input to the rotation
  uint32_t dc = ADD(da, db);

  // alloc the four initial states C
  gsl_vector* C[ADP_ARX_NISTATES];
  for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
	 C[is] = gsl_vector_calloc(ADP_ARX_MSIZE);
  }

  // alloc separate vector of bounds for each initial state
  gsl_vector* B[ADP_ARX_NISTATES][WORD_SIZE + 1];
  for(uint32_t i = 0; i < WORD_SIZE + 1; i++) {
	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) { // 4 initial states
		B[is][i] = gsl_vector_calloc(ADP_ARX_MSIZE);
	 }
  }

  // init the four initial states C[i], i = 0,1,2,3 and the 
  for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
	 uint32_t istate = ADP_ARX_ISTATES[is];
	 gsl_vector_set(C[is], istate, 1.0);
  }

  // init the final states B[i][n] corresponding to each initial state
  for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
	 for(uint32_t fs = 0; fs < ADP_ARX_NFSTATES; fs++) {
		uint32_t fstate = ADP_ARX_FSTATES[is][fs];
		gsl_vector_set(B[is][WORD_SIZE], fstate, 1.0); // init B[n] to the final states
	 }
  }

  //  max_adp_arx_bounds(A, B, C, lrot_const, dc, dd, de_max);
  max_adp_arx_bounds(A, B, lrot_const, dc, dd, de_max);

  // init the four initial states C[i], i = 0,1,2,3 and the 
  for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
	 gsl_vector_set_all(C[is], 0.0);
	 uint32_t istate = ADP_ARX_ISTATES[is];
	 gsl_vector_set(C[is], istate, 1.0);
  }

  uint32_t n = WORD_SIZE;
  uint32_t k = 0;
  uint32_t de_init = 0;
  double p_init = 0.0;
  double p_is_init[ADP_ARX_NISTATES] = {0.0};
  double p_max = 0.0;
  double p_max_is[ADP_ARX_NISTATES] = {0.0};
  max_adp_arx_i(k, n, lrot_const, p_is_init, &p_init, &de_init, A, B, C, dc, dd, de_max, p_max_is, &p_max);

  // rotate back
  // *de_max = RROT(*de_max, lrot_const);
  *de_max = LROT(*de_max, lrot_const);

#if 1									  // DEBUG
  double p_the = adp_arx(A, lrot_const, da, db, dd, *de_max);
#if 0
  printf("[%s:%d] ADP_ARX_MAX[(%2d|%8X,%8X,%8X)->%8X] = %6.5f\n", 
			__FILE__, __LINE__, lrot_const, da, db, dd, *de_max, p_max);
  printf("[%s:%d] ADP_ARX_THE[(%2d|%8X,%8X,%8X)->%8X] = %6.5f\n", 
			__FILE__, __LINE__, lrot_const, da, db, dd, *de_max, p_the);
#endif
  assert(p_max == p_the);
#endif

  // free array of vectors for the initial states
  for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
	 gsl_vector_free(C[is]);
  }

  // free the vector of bounds for each initial state
  for(uint32_t i = 0; i < WORD_SIZE + 1; i++) {
	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) { // 4 initial states
		gsl_vector_free(B[is][i]);
	 }
  }

  return p_max;
}


/* --- */

void max_adp_arx_i(const uint32_t k, const uint32_t n, 
						 const uint32_t lrot_const, double p_is[ADP_ARX_NISTATES], double* p, uint32_t* de, 
						 gsl_matrix* A[2][2][2][2], gsl_vector* B[ADP_ARX_NISTATES][WORD_SIZE + 1], gsl_vector* C[ADP_ARX_NISTATES],  
						 const uint32_t dc, const uint32_t dd, uint32_t* de_max, 
						 double p_max_is[ADP_ARX_NISTATES], double* p_max)
{
  if(k == n) {
	 assert(*p > *p_max);
#if 0									  // DEBUG
	 printf("[%s:%d] B[%2d] updcte 2^%f -> 2^%f\n", __FILE__, __LINE__, i, log2(*p_max), log2(*p));
#endif
	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
		p_max_is[is] = p_is[is];
	 }
	 *p_max = *p;
	 *de_max = *de;
	 return;
  } 

  uint32_t spos = 0;			  // special position
  uint32_t k_rot = ((k + lrot_const) % WORD_SIZE); // (i+r) mod n
  if(k_rot == 0) {
	 spos = 1;
  }

  // get the k-th bit of dc and  dd
  uint32_t x = (dc >> k) & 1;
  uint32_t y = (dd >> k_rot) & 1;


  // cycle over the possible values of the k-th bits of *de
  for(uint32_t t = 0; t < 2; t++) { 

	 double new_p = 0.0;

	 // temp
	 gsl_vector* R[ADP_ARX_NISTATES];
	 double new_p_is[ADP_ARX_NISTATES];
	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
		R[is] = gsl_vector_calloc(ADP_ARX_MSIZE);
		new_p_is[is] = 0.0;
	 }

	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) { // initial states

		// L A C
		gsl_blas_dgemv(CblasNoTrans, 1.0, A[spos][x][y][t], C[is], 0.0, R[is]);
		gsl_blas_ddot(B[is][k + 1], R[is], &new_p_is[is]);

		new_p += new_p_is[is];

	 }	// is

    // continue only if the probability so far is still bigger than the threshold 
	 if(new_p > *p_max) {
		uint32_t new_de = *de | (t << k);
		max_adp_arx_i(k+1, n, lrot_const, new_p_is, &new_p, &new_de, A, B, R, dc, dd, de_max, p_max_is, p_max);
	 }

	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
		gsl_vector_free(R[is]);
	 }

  } // t

  //  gsl_vector_free(L);
  return;
}

/* --- */
	 uint32_t spos = 0;			  // special position
	 uint32_t rot_pos = ((k + lrot_const) % WORD_SIZE); // (i+r) mod n
	 if(rot_pos == 0) {
		spos = 1;
	 }



/* --- */

  // alloc the composite array of bounds B that will be
  // computed as the sum of the four arrays B[is]
  gsl_vector* B_sum[WORD_SIZE + 1];
  for(uint32_t i = 0; i < WORD_SIZE + 1; i++) {
	 B_sum[i] = gsl_vector_calloc(ADP_XOR3_MSIZE);
  }


  // init the composite array of bounds B as the sum of the four arrays B[is]
  for(int k = 0; k < WORD_SIZE; k++) { // bit pos
	 for(int i = 0; i < ADP_XOR3_MSIZE; i++) { // state index
		double p_sum_i = 0.0;
		for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
		  double p_is_i = gsl_vector_get(B[is][k], i);
		  p_sum_i += p_is_i;
		}
		gsl_vector_set(B_sum[k], i, p_sum_i);
	 }
  }
  gsl_vector_set_all(B_max[WORD_SIZE], 1.0); // ?

  //  max_adp_arx_bounds(A, B, lrot_const, da, db, dd_max, ADP_ARX_MSIZE);



/* --- */

/**
 * Compute an array of bounds that can be used in the computation
 * of the maximum differential probability.
 *
 * \param A transition probability matrices.
 * \param B array of size \p A_size rows by (\p n + 1) columns containing upper bounds on the 
 *        maximum probabilities of all \p j bit differentials \f$n \ge j \ge 1\f$
 *        beginning from any state \p i: \p A_size \f$> i \ge 0\f$.
 * \param lrot_const left-rotatoin constant.
 * \param da first input difference.
 * \param db second input difference.
 * \param dd_max maximum probability output difference.
 *
 * \see max_adp_xor_bounds
 */
void max_adp_arx_bounds(gsl_matrix* A[2][2][2][2], gsl_vector* B[WORD_SIZE + 1],
								const uint32_t lrot_const, const uint32_t da, const uint32_t db,
								const uint32_t dd, uint32_t* de_max)
{
  // dc is the input to the rotation
  uint32_t dc = ADD(da, db);

  for(uint32_t k = (WORD_SIZE - 1); k > 0; k--) {

	 for(uint32_t i = 0; i < ADP_ARX_MSIZE; i++) {

		gsl_vector* C = gsl_vector_calloc(ADP_ARX_MSIZE);
		gsl_vector_set(C, i, 1.0);

		uint32_t n = WORD_SIZE;
		uint32_t de_init = 0;
		double p_init = gsl_vector_get(B[k], i);
		double p_max_i = 0.0;
		max_adp_arx_i(i, k, n, lrot_const, &p_init, &de_init, A, B, C, dc, dd, de_max, &p_max_i);
		gsl_vector_set(B[k], i, p_max_i);

		gsl_vector_free(C);
	 } // i
  } // k
}



/* --- */

/**
 * Compute an array of bounds that can be used in the computation
 * of the maximum differential probability.
 *
 * \param A transition probability matrices.
 * \param B array of size \p A_size rows by (\p n + 1) columns containing upper bounds on the 
 *        maximum probabilities of all \p j bit differentials \f$n \ge j \ge 1\f$
 *        beginning from any state \p i: \p A_size \f$> i \ge 0\f$.
 * \param lrot_const left-rotatoin constant.
 * \param da first input difference.
 * \param db second input difference.
 * \param dd_max maximum probability output difference.
 *
 * \see max_adp_xor_bounds
 */
void max_adp_arx_bounds(gsl_matrix* A[2][2][2][2], gsl_vector* B[ADP_ARX_NISTATES][WORD_SIZE + 1],
								const uint32_t lrot_const, const uint32_t da, const uint32_t db,
								const uint32_t dd, uint32_t* de_max)
{
  // dc is the input to the rotation
  uint32_t dc = ADD(da, db);

  for(uint32_t k = (WORD_SIZE - 1); k > 0; k--) {

	 gsl_vector* C[ADP_ARX_NISTATES];
	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
		C[is] = gsl_vector_calloc(ADP_ARX_MSIZE);
	 }

	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) { // 4 initial states

		gsl_vector_set_all(B[is][WORD_SIZE], 0.0); // clear the final state B[n] = L
		uint32_t istate = ADP_ARX_ISTATES[is];
		for(uint32_t fs = 0; fs < ADP_ARX_NFSTATES; fs++) {
		  uint32_t fstate = ADP_ARX_FSTATES[is][fs];
		  gsl_vector_set(B[is][WORD_SIZE], fstate, 1.0); // init B[n] to the final states
		}

		for(uint32_t i = 0; i < ADP_ARX_MSIZE; i++) {

		  gsl_vector_set_all(C[is], 0.0);
		  gsl_vector_set(C[is], i, 1.0);

		  uint32_t n = WORD_SIZE;
		  uint32_t de_init = 0;
		  double p_init = gsl_vector_get(B[is][k], i);
		  double p_max_i = 0.0;
		  max_adp_arx_i(i, k, n, lrot_const, &p_init, &de_init, A, B, C, dc, dd, de_max, &p_max_i);
		  gsl_vector_set(B[is][k], i, p_max_i);
		} // i

	 }	  // is

	 for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) {
		gsl_vector_free(C[is]);
	 }

  } // k

}


/* --- */
	 // compute the final bound at bit k as the sum of the bounds 
    // BB[0], BB[1], BB[2], BB[3] for each of the four initial  states 'is'
	 for(uint32_t i = 0; i < A_size; i++) {
		// B[k][i] = \sum_{is} BB[is][k][i] 
		double pk_sum_i = 0.0;										 // sum at bit k of the i-th state for each BB
		for(uint32_t is = 0; is < ADP_ARX_NISTATES; is++) { // initial states
		  pk_sum_i += gsl_vector_get(BB[is][k], i);			 // get the i-th state of each max BB
		}
		gsl_vector_set(B[k], i, pk_sum_i)
	 }


/* --- */
/**
 * Compute an \em upper \em bound \f$B[k][i]\f$ on the maximum probability 
 * of the differential \f$(dc[n-1:k], dd[n-1+r:k+r] \rightarrow de[n-1+r:k+r])\f$,
 * where \f$r\f$ is the rotation constant of \ref ARX, \f$dc = da + db\f$, 
 * where \f$da, db\f$ are the inputs to \ref ADD in \ref ARX, 
 * starting from initial state \p i of the S-function i.e.
 * \f$\mathrm{dp}(dc[n-1:k],dd[n-1+r:k+r] \rightarrow de[n-1+r:k+r]) = 
 * L A_{n-1} A_{n-2} \ldots A_{k} C^{i}_{k-1}\f$,
 * given the upper bounds \f$B[k][i]\f$ on the probabilities of the differentials
 * \f$(dc[n-1:j], dd[n-1+r:j+r] \rightarrow de[n-1+r:j+r])\f$ for \f$j = k+1, k+2, \ldots, n-1\f$,
 * where \f$L = [1~1~\ldots~1]\f$ is a row vector of size \p A_size and \f$C^{i}_{k-1}\f$ 
 * is a unit column vector of size \p A_size with 1 at position \f$i\f$
 * and \f$C^{i}_{-1} = C\f$.
 *
 * \note Note that \f$dc = da + db\f$, where \f$da, db\f$ are the inputs to
 *       \ref ADD in the \ref ARX operation so that the DP of \ref ARX is:
 *       \f$\mathrm{adp}^{\mathrm{ARX}}(da,db,dd \rightarrow de)\f$.
 *       
 * \param i index of the state of the S-function: \p A_size \f$> i \ge 0\f$.
 * \param k current bit position: \f$ n > k \ge 0\f$.
 * \param n word size.
 * \param p the estimated probability at bit position \p k.
 * \param de output difference.
 * \param A transition probability matrices.
 * \param B array of size \p A_size rows by (\p n + 1) columns containing upper bounds on the 
 *        maximum probabilities of all \p j bit differentials \f$n \ge j \ge 1\f$
 *        beginning from any state \p i: \p A_size \f$> i \ge 0\f$.
 * \param C unit row vector of size \p A_size rows, initialized with 1 at state index \p i.
 * \param dc first input difference.
 * \param dd second input difference.
 * \param de_max maximum probability output difference.
 * \param p_max the maximum probability.
 * \param A_size size of the square transition probability matrices
 *        (equivalently, the number of states of the S-function).
 *
 * \b Algorithm \b Outline:
 *
 * Recursively assign values to the bits of the output difference \p dc starting 
 * at bit popsition \f$j = k\f$ and terminating at bit position \p n. The recursion 
 * proceeds to bit postion \f$j + 1\f$ only if the  probability \f$p_j\f$ of the 
 * partially constructed differential \f$(dc[j:k], dd[j+r:k+r] \rightarrow de[j+r:k+r])\f$ 
 * multiplied by the bound of the probability until the end \f$B[j+1]\f$ is bigger than 
 * the best probability found so far i.e. if:
 * \f$B[j+1] A_{j} A_{j-1} \ldots A_{k} C^{i}_{k-1} > p_{\mathrm{max}}\f$.
 * When \f$j = n\f$ update the max.: 
 * \f$p_{\mathrm{max}} \leftarrow p_{n-1} = 
 * \mathrm{dp}(dc[n-1:k],dd[n-1+r:k+r] \rightarrow de[n-1+r:k+r])\f$.
 *
 * \note Note that since \f$dc = da + db\f$, where \f$da, db\f$ are the inputs to
 *       \ref ADD in the \ref ARX operation so that the DP of \ref ARX is:
 *       \f$\mathrm{max}_{de}~\mathrm{dp}(dc[n-1:k],dd[n-1+r:k+r] \rightarrow de[n-1+r:k+r]) = 
 *       \mathrm{max}_{de}~\mathrm{adp}^{\mathrm{ARX}}
 *       (da[n-1:k],db[n-1:k],dd[n-1+r:k+r] \rightarrow de[n-1+r:k+r])\f$.
 *
 * \see max_adp_xor_i
 */
void max_adp_arx_i(const int i, const uint32_t k, const uint32_t n, double* p, uint32_t* de,
						 gsl_matrix* A[2][2][2][2], gsl_vector* B[WORD_SIZE + 1], gsl_vector* C,  
						 const uint32_t dc, const uint32_t dd, uint32_t* de_max, 
						 double* p_max, uint32_t A_size)
{
  if(k == n) {
	 assert(*p > *p_max);
#if 0									  // DEBUG
	 printf("[%s:%d] B[%2d] updcte 2^%f -> 2^%f\n", __FILE__, __LINE__, i, log2(*p_max), log2(*p));
#endif
	 *p_max = *p;
	 *de_max = *de;
	 return;
  } 

  // get the k-th bit of dc, dd, dc
  uint32_t x = (dc >> k) & 1;
  uint32_t y = (dd >> k) & 1;

  // cycle over the possible values of the k-th bits of *de
  for(uint32_t t = 0; t < 2; t++) { 

	 // temp
	 //	 gsl_vector* R = gsl_vector_calloc(ADP_XOR_MSIZE);
	 gsl_vector* R = gsl_vector_calloc(A_size);
	 double new_p = 0.0;

	 // L A C
	 gsl_blas_dgemv(CblasNoTrans, 1.0, A[x][y][t], C, 0.0, R);
	 gsl_blas_deot(B[k + 1], R, &new_p);

	 // continue only if the probability so far is still bigger than the threshold 
	 if(new_p > *p_max) {
		uint32_t new_de = *de | (t << k);
		max_adp_arx_i(i, k+1, n, &new_p, &new_de, A, B, R, dc, dd, de_max, p_max, A_size);
	 }
	 gsl_vector_free(R);

  }
  //  gsl_vector_free(L);
  return;
}


/* --- */

/*
 * \note Note that since \f$dc = da + db\f$, where \f$da, db\f$ are the inputs to
 *       \ref ADD in the \ref ARX operation so that the DP of \ref ARX is:
 *       \f$\mathrm{max}_{de}~\mathrm{dp}(dc[n-1:k],dd[n-1:k] \rightarrow de[n-1:k]) = 
 *          \mathrm{max}_{de}~\mathrm{adp}^{\mathrm{ARX}}(da,db,dd \rightarrow de)\f$.
 *
 */

/* --- */

/**
 * Compute an upper bound \f$B[k][i]\f$ on the maximum probability 
 * of the differential \f$(da[n-1:k], db[n-1:k], dc[n-1:k] \rightarrow dd[n-1:k])\f$
 * starting from initial state \p i of the S-function 
 * given the upper bounds \f$B[k][i]\f$ on the probabilities of the differentials
 * \f$(da[n-1:j], db[n-1:j], dc[n-1:j] \rightarrow dd[n-1:j])\f$ 
 * for \f$j = k+1, k+2, \ldots, n-1\f$.
 * 
 * \param i index of the state of the S-function: \p A_size \f$> i \ge 0\f$.
 * \param k current bit position: \f$ n > k \ge 0\f$.
 * \param n word size.
 * \param p the transition probability of state \p i at bit position \p k.
 * \param de output difference.
 * \param A transition probability matrices.
 * \param B array of size \p A_size rows by (\p n + 1) columns containing upper bounds on the 
 *        maximum probabilities of all \p j bit differentials \f$n \ge j \ge 1\f$
 *        beginning from any state \p i: \p A_size \f$> i \ge 0\f$.
 * \param C unit row vector of size \p A_size rows, initialized with 1 at state index \p i.
 * \param da first input difference.
 * \param db second input difference.
 * \param dd third input difference.
 * \param de_max maximum probability output difference.
 * \param p_max the maximum probability.
 *
 * \see max_adp_xor3_i
 */
void max_adp_arx_i(const int i, const uint32_t k, const uint32_t n, double* p, uint32_t* de,
						 gsl_matrix* A[2][2][2][2], gsl_vector* B[WORD_SIZE + 1], gsl_vector* C,  
						 const uint32_t da, const uint32_t db, const uint32_t dd, uint32_t* de_max, 
						 double* p_max)
{
  if(k == n) {
	 assert(*p > *p_max);
#if 0									  // DEBUG
	 printf("[%s:%d] B[%2d] update 2^%f -> 2^%f\n", __FILE__, __LINE__, i, log2(*p_max), log2(*p));
#endif
	 *p_max = *p;
	 *de_max = *de;
	 return;
  } 

  uint32_t dc = ADD(da, db);	  // input to ROT

  // get the k-th bit of da, db, dd
  uint32_t x = (da >> k) & 1;
  uint32_t y = (db >> k) & 1;
  uint32_t z = (dd >> k) & 1;

  // cycle over the possible values of the k-th bits of *de
  for(uint32_t t = 0; t < 2; t++) { 

	 // temp
	 gsl_vector* R = gsl_vector_calloc(ADP_XOR3_MSIZE);
	 double new_p = 0.0;

	 // L A C
	 gsl_blas_dgemv(CblasNoTrans, 1.0, A[x][y][z][t], C, 0.0, R);
	 gsl_blas_deot(B[k + 1], R, &new_p);

	 // continue only if the probability so far is still bigger than the threshold 
	 if(new_p > *p_max) {
		uint32_t new_de = *de | (t << k);
		max_adp_xor3_i(i, k+1, n, &new_p, &new_de, A, B, R, da, db, dd, de_max, p_max);
	 }
	 gsl_vector_free(R);

  }
  //  gsl_vector_free(L);
  return;
}

/* --- */
/**
 *
 * Compute an upper bound \f$B[k][i]\f$ on the maximum probability 
 * of the differential  
 * \f$(da[n-1:k],db[n-1:k],de[n-1:k]\rightarrow dd[n-1:k])\f$,
 * starting from the four initial states \ref ADP_ARX_ISTATES 
 * of the \f$\mathrm{adp}^{\mathrm{ARX}}\f$ S-function and
 * given the upper bounds \f$B[k][i]\f$ on the probabilities of the differentials
 * \f$(da[n-1:j],db[n-1:j],de[n-1:j]\rightarrow dd[n-1:j])\f$,
 * for \f$j = k+1, k+2, \ldots, n-1\f$.
 * 
 * \param i index of the state of the S-function: \p A_size \f$> i \ge 0\f$.
 * \param k current bit position: \f$ n > k \ge 0\f$.
 * \param n word size.
 * \param p the estimated probability at bit position \p k.
 * \param de output difference.
 * \param A transition probability matrices.
 * \param B array of size \p A_size rows by (\p n + 1) columns containing upper bounds on the 
 *        maximum probabilities of all \p j bit differentials \f$n \ge j \ge 1\f$
 *        beginning from any state \p i: \p A_size \f$> i \ge 0\f$.
 * \param C unit row vector of size \p A_size rows, initialized with 1 at state index \p i.
 * \param da first input difference.
 * \param db second input difference.
 * \param dd set of input differences.
 * \param de_max maximum probability output difference.
 * \param p_max the maximum probability.
 *
 * \b Algorithm \b Outline:
 *
 * \TODO
 *
 * \see max_adp_xor3_set_i
 */
void max_adp_arx_i(const int i, const uint32_t k, const uint32_t n, double* p, uint32_t* de,
						 gsl_matrix* A[2][2][2][2], gsl_vector* B[WORD_SIZE + 1], gsl_vector* C[ADP_XOR3_SET_SIZE],  
						 const uint32_t da, const uint32_t db, const uint32_t dd[ADP_XOR3_SET_SIZE], uint32_t* de_max, 
						 double* p_max)
{
  if(k == n) {
	 assert(*p >= *p_max);
	 *p_max = *p;
	 *de_max = *de;
	 return;
  } 

  // get the k-th bit of da, db, dd
  uint32_t x = (da >> k) & 1;
  uint32_t y = (db >> k) & 1;

  // cycle over the possible values of the k-th bits of *de
  for(uint32_t t = 0; t < 2; t++) { // choose the k-th bit of de

	 double new_p = 0.0;

	 gsl_vector* R[ADP_XOR3_SET_SIZE];
	 double p[ADP_XOR3_SET_SIZE];
	 for(uint32_t j = 0; j < ADP_XOR3_SET_SIZE; j++) {
		R[j] = gsl_vector_calloc(ADP_XOR3_MSIZE);
		p[j] = 0.0;
	 }

	 for(uint32_t j = 0; j < ADP_XOR3_SET_SIZE; j++) { 
		uint32_t z = (dd[j] >> k) & 1;
		// L A C
		gsl_blas_dgemv(CblasNoTrans, 1.0, A[x][y][z][t], C[j], 0.0, R[j]);
		gsl_blas_deot(B[k + 1], R[j], &p[j]);

		new_p += p[j];
	 }

	 // continue only if the probability so far is still bigger than the threshold 
	 if(new_p > *p_max) {
		uint32_t new_de = *de | (t << k);
		max_adp_xor3_set_i(i, k+1, n, &new_p, &new_de, A, B, R, da, db, dd, de_max, p_max);
	 }

	 for(uint32_t j = 0; j < ADP_XOR3_SET_SIZE; j++) {
		gsl_vector_free(R[j]);
	 }
  }
  //  gsl_vector_free(L);
  return;
}

/* --- */
#if 0										  // DEBUG
	 printf("[%s:%d] istate = %d: ", __FILE__, __LINE__, istate);
#endif
#if 0										  // DEBUG
		printf("%d ", fstate);
#endif
#if 0										  // DEBUG
	 printf("\n");
#endif


/* --- */

  for(uint32_t r = 0; r < WORD_SIZE; r++) {

	 }

/* --- */
// The ARX operation: (((a + b) <<< k) ^ d) == xor(d,(rot(add(a+b),k)))
uint32_t arx(uint32_t a, uint32_t b, uint32_t d, uint32_t k)
{
	  uint32_t e;
	  e = XOR(d,ROT(ADD(a,b),k));
	  return e;
}

/* --- */
/**
 * Initial states for the \f$\mathrm{adp}^{\mathrm{ARX}}\f$ S-function.
 */
#ifndef ADP_ARX_ISTATE
#define ADP_ARX_ISTATE_1 0 /**< First initial state for the \f$\mathrm{adp}^{\mathrm{ARX}}\f$ S-function. */
#define ADP_ARX_ISTATE_2 2 /**< Second initial state for the \f$\mathrm{adp}^{\mathrm{ARX}}\f$ S-function. */
#define ADP_ARX_ISTATE_3 4 /**< Third initial state for the \f$\mathrm{adp}^{\mathrm{ARX}}\f$ S-function. */
#define ADP_ARX_ISTATE_4 6 /**< Fourth initial state for the \f$\mathrm{adp}^{\mathrm{ARX}}\f$ S-function. */
#endif

/* --- */

/* 

 * <a href="https://en.wikipedia.org/wiki/MD4">MD4</a>
 * <a href="https://en.wikipedia.org/wiki/MD5">MD5</a>
 * <a href="https://en.wikipedia.org/wiki/BLAKE_%28hash_function%29">BLAKE</a>
 * <a href="https://en.wikipedia.org/wiki/Skein_%28hash_function%29">Skein</a>
 * <a href="https://131002.net/siphash/">SipHash</a>
 * <a href="https://en.wikipedia.org/wiki/RC5">RC5</a>
 * <a href="https://en.wikipedia.org/wiki/FEAL">FEAL</a>
 * <a href="https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm">TEA</a>
 * <a href="https://en.wikipedia.org/wiki/XTEA">XTEA</a>
 * <a href="https://en.wikipedia.org/wiki/Salsa20">Salsa20</a>

 */

/* --- */

/*
 * <tr>
 * <td></td>
 * <td></td>
 * <td>Automatic search for ADD differential trails in block cipher TEA.</td>
 * <td></td>
 * </tr>
 *
 * <tr>
 * <td></td>
 * <td></td>
 * <td>Automatic search for ADD differential trails in block cipher XTEA.</td>
 * <td></td>
 * </tr>
 * <tr>
 * <td></td>
 * <td></td>
 * <td>Automatic search for XOR differential trails in block cipher XTEA.</td>
 * <td></td>
 * </tr>
 *
 * <tr>
 * <td></td>
 * <td></td>
 * <td>Computing an ADD partial difference distribution table (pDDT) for the F-function of block cipher XTEA.</td>
 * <td></td>
 * </tr>
 *
 * <tr>
 * <td></td>
 * <td></td>
 * <td>Computing an XOR partial difference distribution table (pDDT) for the F-function of block cipher XTEA.</td>
 * <td></td>
 * </tr>
 *
 * <tr>
 * <td></td>
 * <td></td>
 * <td>Computing an ADD partial difference distribution table (pDDT) for the F-function of block cipher TEA.</td>
 * <td></td>
 * </tr>
 */

/* ---- */

/*
 * <tr>
 * <td></td>
 * <td></td>
 * <td>\f$\mathrm{adp}^{\ll}\f$</td>
 * <td>The ADD differential probability of left shift (LSH).</td>
 * <td><center>\f$O(1)\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td>\f$\mathrm{adp}^{\gg}\f$</td>
 * <td>The ADD differential probability of right shift (RSH).</td>
 * <td><center>\f$O(1)\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td>\f$\mathrm{adp}^{\gg\oplus}\f$</td>
 * <td>The ADD differential probability of RSH followed by XOR.</td>
 * <td><center>\f$O(n)\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td><center>\f$O()\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td><center>\f$O()\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td>\f$\mathrm{adp}^{3\oplus}(da,db,dc \rightarrow dd)\f$</td>
 * <td>The ADD differential probability of XOR with three inputs.</td>
 * <td><center>\f$O(n)\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td>\f$\mathrm{adp}^{\oplus}(da,db \rightarrow dc)\f$</td>
 * <td>The ADD differential probability of XOR.</td>
 * <td><center>\f$O(n)\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td>\f$\mathrm{adp}^{\oplus}_{\mathrm{FI}}(a,db \rightarrow db)\f$</td>
 * <td>The ADD differential probability of XOR with one fixed input.</td>
 * <td><center>\f$O(n)\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td><center>\f$O()\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td><center>\f$O()\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td><center>\f$O()\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td><center>\f$O()\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td>\f$\mathrm{xdp}^{+}(da,db \rightarrow dc)\f$</td>
 * <td>The XOR differential probability of ADD.</td>
 * <td><center>\f$O(n)\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td>\f$\max_{dc}~\mathrm{adp}^{\oplus}(da, db \rightarrow dc)\f$</td>
 * <td>The maximum ADD differential probability of XOR.</td>
 * <td><center>\f$O(n) \le c \le O(2^n)\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td>\f$\max_{dc}~\mathrm{xdp}^{+}(da, db \rightarrow dc)\f$</td>
 * <td>The maximum XOR differential probability of ADD.</td>
 * <td><center>\f$O(n) \le c \le O(2^n)\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td></td>
 * <td><center>\f$O()\f$</center></td>
 * </tr>

 * <tr>
 * <td></td>
 * <td></td>
 * <td>row 2, cell 1</td>
 * <td>row 2, cell 2</td>
 * <td>row 2, cell 3</td>
 * </tr>
 */

/* --- */
Indeed, providing a generic implementaion for 

it is not a trivial task to come up with an implementation that can handle any ARX design, since the degrees of freedom are too much to assess. For example, 

is can be applied as it is to perform the require computation. 

 and only then apply to apply it the problem at hand.



/*
 * All YAARX files:
 * 
 * yaarx/include/adp-rsh-xor.hh
 * yaarx/include/adp-shift.hh
 * yaarx/include/adp-tea-f-fk-ddt.hh
 * yaarx/include/adp-tea-f-fk-noshift.hh
 * yaarx/include/adp-tea-f-fk.hh
 * yaarx/include/adp-xor-fi.hh
 * yaarx/include/adp-xor-pddt.hh
 * yaarx/include/adp-xor.hh
 * yaarx/include/adp-xor3.hh
 * yaarx/include/adp-xtea-f-fk.hh
 * yaarx/include/common.hh
 * yaarx/include/eadp-tea-f.hh
 * yaarx/include/max-adp-xor-fi.hh
 * yaarx/include/max-adp-xor.hh
 * yaarx/include/max-adp-xor3-set.hh
 * yaarx/include/max-adp-xor3.hh
 * yaarx/include/max-xdp-add.hh
 * yaarx/include/tea-add-ddt-search.hh
 * yaarx/include/tea-add-threshold-search.hh
 * yaarx/include/tea-f-add-pddt.hh
 * yaarx/include/tea.hh
 * yaarx/include/xdp-add-pddt.hh
 * yaarx/include/xdp-add.hh
 * yaarx/include/xdp-tea-f-fk.hh
 * yaarx/include/xdp-xtea-f-fk.hh
 * yaarx/include/xtea-add-threshold-search.hh
 * yaarx/include/xtea-f-add-pddt.hh
 * yaarx/include/xtea-f-xor-pddt.hh
 * yaarx/include/xtea-xor-threshold-search.hh
 * yaarx/include/xtea.hh
 *
 * yaarx/src/adp-lsh-program.cc
 * yaarx/src/adp-rsh-program.cc
 * yaarx/src/adp-rsh-xor.cc
 * yaarx/src/adp-shift.cc
 * yaarx/src/adp-tea-f-fk-ddt.cc
 * yaarx/src/adp-tea-f-fk-noshift.cc
 * yaarx/src/adp-tea-f-fk.cc
 * yaarx/src/adp-xor-fi-program.cc
 * yaarx/src/adp-xor-fi.cc
 * yaarx/src/adp-xor-pddt.cc
 * yaarx/src/adp-xor-program.cc
 * yaarx/src/adp-xor.cc
 * yaarx/src/adp-xor3-program.cc
 * yaarx/src/adp-xor3.cc
 * yaarx/src/adp-xtea-f-fk.cc
 * yaarx/src/common.cc
 * yaarx/src/eadp-tea-f-program.cc
 * yaarx/src/eadp-tea-f.cc
 * yaarx/src/max-adp-xor-fi-program.cc
 * yaarx/src/max-adp-xor-fi.cc
 * yaarx/src/max-adp-xor-program.cc
 * yaarx/src/max-adp-xor.cc
 * yaarx/src/max-adp-xor3-program.cc
 * yaarx/src/max-adp-xor3-set.cc
 * yaarx/src/max-adp-xor3.cc
 * yaarx/src/max-eadp-tea-f-program.cc
 * yaarx/src/max-xdp-add-program.cc
 * yaarx/src/max-xdp-add.cc
 * yaarx/src/tea-add-ddt-search.cc
 * yaarx/src/tea-add-threshold-search.cc
 * yaarx/src/tea-f-add-pddt.cc
 * yaarx/src/tea.cc
 * yaarx/src/xdp-add-pddt.cc
 * yaarx/src/xdp-add-program.cc
 * yaarx/src/xdp-add.cc
 * yaarx/src/xdp-tea-f-fk.cc
 * yaarx/src/xdp-xtea-f-fk.cc
 * yaarx/src/xtea-add-threshold-search.cc
 * yaarx/src/xtea-f-add-pddt.cc
 * yaarx/src/xtea-f-xor-pddt.cc
 * yaarx/src/xtea-xor-threshold-search.cc
 * yaarx/src/xtea.cc
 *
 * yaarx/tests/adp-rsh-xor-tests.cc
 * yaarx/tests/adp-shift-tests.cc
 * yaarx/tests/adp-tea-f-fk-ddt-tests.cc
 * yaarx/tests/adp-tea-f-fk-noshift-tests.cc
 * yaarx/tests/adp-tea-f-fk-tests.cc
 * yaarx/tests/adp-xor-fi-tests.cc
 * yaarx/tests/adp-xor-pddt-tests.cc
 * yaarx/tests/adp-xor-tests.cc
 * yaarx/tests/adp-xor3-tests.cc
 * yaarx/tests/adp-xtea-f-fk-tests.cc
 * yaarx/tests/eadp-tea-f-tests.cc
 * yaarx/tests/max-adp-xor-fi-tests.cc
 * yaarx/tests/max-adp-xor-tests.cc
 * yaarx/tests/max-adp-xor3-set-tests.cc
 * yaarx/tests/max-adp-xor3-tests.cc
 * yaarx/tests/max-xdp-add-tests.cc
 * yaarx/tests/tea-add-ddt-search-tests.cc
 * yaarx/tests/tea-add-threshold-search-tests.cc
 * yaarx/tests/tea-f-add-pddt-tests.cc
 * yaarx/tests/xdp-add-pddt-tests.cc
 * yaarx/tests/xdp-add-tests.cc
 * yaarx/tests/xdp-tea-f-fk-tests.cc
 * yaarx/tests/xdp-xtea-f-fk-tests.cc
 * yaarx/tests/xtea-add-threshold-search-tests.cc
 * yaarx/tests/xtea-xor-threshold-search-tests.cc
 *
 * Committed revision 3648.
 */

/* --- */

// 
// Automatic search for n-round differentials for XTEA using XOR differences,
// based on Matsui search strategy. Uses a threshold to cut-off low probability differentials.
// Not guaranteed to find *the best* trail.
// 
// n - number of current round
// nrounds - total number of rounds
// A - matrices used to compute ADP-XOR
// B - arrey with the best differential probabilities for i rounds: 0 <= i < n
// Bn - the best probability on n rounds. It is updated recursively.
// diff - arrey of differentials
// trail - the actual differential trail for n-rounds
// diff_vec_p  - vector of differentials (dx,dy,p) sorted by probability p
// diff_vec_xy - vector of differentials containing the same elements as diff_vec_p,
//               but sorted by index k = (dx 2^{n} + dy)
// 
// the final prob. is the product of the probabilities of the F-function (F) and the second addition (ADD2):
// 
// F-function: y = F(x) = x + ((x << 4) ^ (x >> 5))
// ADD2:       yy = xx + (y ^ (delta + key))
// 
// Thus xdp-f2 ~= xdp-add2 * xdp-f
// 
// Every entry in the trail[] and diff[] arrays contain dx, dyy and xdp-f2:
// 
// trail[i].dx = dx
// trail[i].dy = dyy
// trail[i].p = xdp-f2~ = xdp-add2(dxx, dy -> dyy) * xdp-f(dx, dx_lxr -> dy)
// 
// where dxx = trail[i-1].dx, if i > 0 and dxx = 0 if i = 0
// 
// Note: All diff sets contain differences and probabilities for the XTEA F-function
// i.e. they do NOT include the second ADD operation!!!
// 
// See also: tea_add_threshold_search()
// 

/* --- */

// 
//
// Recursively compute all differentials (dx -> dy) 
// that have probability larger than a threshold P_THRES
// for the the F-function (the first ADD operation) of XTEA:
// 
// y = x + ((x << 4) ^ (x >> 5))
// 
// Logic sketch:
// 
// 1) Treat the input a = ((x << 4) ^ (x >> 5)) as independent from the input x
// 2) Compute a list of differentials (da, dx, dy) for the ADD operation
// 3) Store in a vector only the differentials for which da == ((dx << 4) ^ (dx >> 5))
// 
// See also: tea_f_add_pddt_i
// 
//void mmult_xdp_xtea_f_v2(const uint32_t k, const uint32_t n, 


/* --- */
// 
// Construct a list of differentials for the XTEA F-function
// that have probability above certain threshold p_thres
// An updated version of adp_xtea_f_diff_vector() using STL sets
// 
// See also: adp-xtea-f.cc:adp_xtea_f_diff_vector(), xtea-search-xor-threshold-v2.cc:xtea_xor_pddt()
// 

/* --- */

// 
// This procedure recursively computes a list of differentials (da -> dd)  
// for the F funtion of XTEA that have probability ADP_F_LXR(da -> dd) 
// that is bigger than a pre-defined threshold p_thres.
// 
// da: input to the F-function
// db, dc: inputs to the first XOR
// dd: output from the secon XOR
// dk = 0, dz = (da + dd): inputs to the second XOR
// dy: output from the second XOR and from the F-function
// 
// db = da << 4
// dc \in {(da >> 5), (da >> 5) + 1, (da >> 5) - 2^{n-5}, (da >> 5) - 2^{n-5} + 1}
// dd: (db, dc -> dd) through xor
//  A[2][2][2] - matrices for computation of adp_xor
// AA[2][2][2] - matrices for computation of adp_xor with one fixed input
//         key - secret key for the round
//       delta - pre-defined round constant
// diff_set_dx_dy - set of differentials with probability >= p_thres (a pDDT)
// 


/* --- */
// 
// Automatic search for n-round differentials for XTEA. Based on Matsui search strategy.
// Uses a threshold to cut-off low probability differentials.
// Does not find *the best* trail.
// 
// n - number of current round
// nrounds - total number of rounds
// A - matrices used to compute ADP-XOR
// AA - matrices used to compute ADP-XOR with one input -- a fixed value and not a difference
// B - arrey with the best differential probabilities for i rounds: 0 <= i < n
// Bn - the best probability on n rounds. It is updated recursively.
// diff - arrey of differentials
// trail - the actual differential trail for n-rounds
// diff_vec_p  - vector of differentials (dx,dy,p) sorted by probability p
// diff_vec_xy - vector of differentials containing the same elements as diff_vec_p,
//               but sorted by index k = (dx 2^{n} + dy)
// 
// See also: tea-search-threshold.cc:round_thres()
// 

/* --- */

//
// An updated version of adp-tea-f.cc:mmult_f. The changes are
// 
// - Uses STL set instead of vector
// - Computes expected probabilities averaged over all keys and delta-s rather than fixed-key probabilities
// 
// The main logic is the same as adp-tea-f.cc:mmult_f():
// 
// This procedure recursively lists all differentials for XOR3: (da, db, dc -> dd) 
// that satisfy the following properties:
// 
// (1) adp-xor3(da, db, dc -> dd) > p_thres
// (2) db = da << 4
// (3) dc \in {(da >> 5), (da >> 5) + 1, (da >> 5) - 2^{n-5}, (da >> 5) - 2^{n-5} + 1} = {dx[0], dx[1], dx[2], dx[3]}
//     where da >> 5 = dx[i], 0 <= i < 3
// 
// Only the entries for which EADP_F(da -> dd) > p_thres are stored
//
// See also: adp-tea-f.cc:mmult_f() and adp-tea-f.cc:mmult()
// 
// old name: mmult_f__v2
// 

/* -- */
/**
 * 
 * For two partially constructed differences \f$da\f$ and \f$dc\f$, respectively input and output of the RSH operation, 
 * Given are two differences da and dc, that are only partially constructed 
 * up to bit k (counting from the LSB)
 * 
 * This function performs checks on da and dc and outputs if dc is such that dc = RSH(da, R).
 * The idea is to be able to discrad pairs of diferences (da, dc) before they have been 
 * fully constructed. This allows to more efficiently constrct a list of valid differentials for 
 * TEA-F recursively. We use these conditions in mmult_f() to discard invalid entries early.
 * Note: the function is NOT optimal. It is overly-restrictive i.e. all diferences (da,dc)
 * which pass the conditions are valid, there exist also valid differences that
 * do not pass the checks. The reason is that it is hard to detect all valid pairs
 * before they have been constructed.
 * 
 * \param k bit position: \ref WORD_SIZE \f$< k \ge 0\f$.
 * \param new_da input difference to RSH partially constructed up to bit \f$k\f$.
 * \param new_dc output difference from RSH partially constructed up to bit \f$k\f$.
 *
 * We use the following relations:
 * 
 * dc = RSH(da, R) iff dc \in {dc_0, dc_1, dc_2, dc_3} where:
 * 
 * dc_0 = (da >> R)                      (1)
 * dc_2 = (da >> R) - 2^{n-R}            (2)
 * dc_1 = (da >> R) + 1                  (3)
 * dc_3 = (da >> R) - 2^{n-R} + 1        (4)
 * 
 * Based on the above we perform the following checks:
 * 
 * Check-1: (k >= R) check (k-R) LSBits
 * 
 * if (k >= R) we check if the first (k-R) LSB bits of (da>>R) are equal to the first (k-R) bits 
 * of dc_0 plus the additional factors from the above equations. So we check which of the following 
 * four equations hold:
 * 
 * (da >> R)[0:(k - R)] = (dc_0)[0:(k - R)]                   (1a)
 * (da >> R)[0:(k - R)] = (dc_0 + 2^{n-R})[0:(k - R)]         (2a)
 * (da >> R)[0:(k - R)] = (dc_0 - 1)[0:(k - R)]               (3a)
 * (da >> R)[0:(k - R)] = (dc_0 + 2^{n-R} - 1)[0:(k - R)]     (4a)
 * 
 * Check-2: check that R LSBits of da are not zero: da[(r-1):0] != 0 (why??)
 * 
 * Check-3: (k >= R) AND (k > (n - R)) check ((n-R) MSBits)
 * 
 * When (k > (n-R)), (da >> R)[k] = 0 and we check the top (n-R) MSBits of ds. We check if 
 * the intial four equations hold for the (n-R) MSBits of the operands:
 * 
 * dc_0[(n-1):(n-R+1)] = (da >> R)[(n-1):(n-R+1)]
 * dc_1[(n-1):(n-R+1)] = ((da >> R) + 1)[(n-1):(n-R+1)]
 * dc_2[(n-1):(n-R+1)] = ((da >> R) - 2^{n-R})[(n-1):(n-R+1)]
 * dc_3[(n-1):(n-R+1)] = ((da >> R) - 2^{n-R} + 1)[(n-1):(n-R+1)]
 * 
 *
 */

/* --- */
// 
// Compute the max for the i-th state
// 
// Recursively computes dd_max = MAX_{dd} ADP-XOR3(da, db, dc -> dd)
// by starting at bit position k = 0 and proceeding up to k = 32 
// only if the probability so far (p) is still above
// the maximum that was found up to now (p_max)
// 
// Note: The maximum p_max is obtained using a pre-computed array of bounds B[WORD_SIZE+1][NSTATES]
// For every bit position j the arrey B[j] contains the maximum probabilities p_max_i 
// for each of the NSTATES number of states (p_max_i: 0 <= i < NSTATES)
// By using bound on every single state p_max_i we obtain a tighter bound p_max
// on the max ptobability. As a result the search is more efficient as compared
// to adp-xor3.cc:mmult_maxt_rec() (adp-xor3.cc:max_adp_xor3_rec()).
// 
// Note: The array of bound B is computed by running the same function max_adp_xor3_i()
// for every bit position k and every state i (see adp_xor3.cc:max_adp_xor3_bounds())
// 
// See also: max_adp_xor_i()
// 

/* --- */
/**
 * 
 * Recursively compute the maximum differential probability over all output differences
 * of the partial \f$(n-k)\f$-bit differential
 * \f$\mathrm{max}_{dc}~\mathrm{adp}^{\oplus}(da[n-1:k],db[n-1:k],dc[n-1:k] \rightarrow dd[n-1:k])\f$.
 * 
 * \param k current bit position: \f$ n > k \ge 0\f$.
 * \param n word size.
 * \param p the transition probability of state \p i at bit position \p k.
 * \param dd output difference.
 * \param A transition probability matrices.
 * \param C unit row vector initialized with 1 at the nitial state.
 * \param da first input difference.
 * \param db second input difference.
 * \param dc third input difference.
 * \param dd_max maximum probability output difference.
 * \param p_max the maximum probability.
 *
 * \b Algorithm \b Sketch:
 *
 * The function works recursively over the bits of the output difference
 * starting at the LS bit position \f$k = 0\f$ and proceeding to \f$k+1\f$ 
 * only if the probability so far is still above
 * the maximum that was found up to now. The initial value for the maximum 
 * probability \p p_max is 0 and is updated dynamically during the process
 * every time a higher probability is encountered. The recursion
 * stops at the MSB \f$k = n\f$.
 *
 * \note This function is more efficient than exhaustive search over all 
 *       output differences \ref max_adp_xor3_exper, but is less efficient
 *       than the function \ref max_adp_xor3 that uses using bounds.
 *       The reason is that this function \ref \ref max_adp_xor3_rec_i, 
 *       at every bit position implicitly assumes that the remaining probability until 
 *       the end (i.e. until the MSB) is 1, while the bounds computed by \ref max_adp_xor3
 *       are tighter than that and thus more branches of the recursion are cur
 *       earlier in the computation.
 * 
 * See also: max_adp_xor_i()
 * 
 */

/* --- */
// 
// Compute the max for the i-th state (for definintion of "state" see S-functions)
// 
// Recursively computes dd_max = MAX_{dd} ADP-XOR3(da, db -> dd)
// by starting at bit position k = 0 and proceeding up to k = 32 
// only if the probability so far (p) is still above
// the maximum that was found up to now (p_max)
// 
// Note: The maximum p_max is obtained using a pre-computed array of bounds B[WORD_SIZE+1][NSTATES]
// For every bit position j the arrey B[j] contains the maximum probabilities p_max_i 
// for each of the NSTATES number of states (p_max_i: 0 <= i < NSTATES)
// By using bound on every single state p_max_i we obtain a tighter bound p_max
// on the max probability. As a result the search is more efficient as compared
// to a direct recursive search.
// 
// Note: The array of bounds B is computed by running the same function mmult_max_i()
// for every bit position k and every state i (see max_adp_xor_bounds()).
// 
// See also: adp-xor3.cc:mmult_max_i()
// 

/* --- */
/**
 *
 * \param i index of the state of the S-function: \p A_size \f$> i \ge 0\f$.
 * \param k current bit position: \f$ n > k \ge 0\f$.
 * \param n word size.
 * \param p the transition probability of state \p i at bit position \p k.
 * \param dd output difference.
 * \param A transition probability matrices.
 * \param B array of size \p A_size rows by (\p n + 1) columns containing upper bounds on the 
 *        maximum transition probabilities of every state \p i at every bit position \p k
 * \param C unit vector, initialized with 1 at state index \p i.
 * \param da first input difference.
 * \param db second input difference.
 * \param dd_max maximum probability output difference.
 * \param p_max the maximum probability.
 * \param A_size size of the square transition probability matrices
 *        (equivalently, the number of states of the S-function).
 * 
 * Meaning of the array of bounds \p B[\p n][\p A_size].
 * 
 * Let \f$ B[k][i] = \bar{p}\f$ for some i: \p A_size \f$> i \ge 0\f$ and some k: \f$ n > k \ge 0\f$. 
 * The probability \f$\bar{p}\f$ is a \em bound in the following sense.
 * 
 * For any output difference dc
 * 
 * Let \f$dc[k-1:0]\f$ be a partially constructed \f$k\f$-bit output difference
 * and let \f$H[k-1] = A_{k-1} A_{k-2} \ldots A_{0}\f$. 
 *
 * Let \f$dc[n-1:k]\f$ be any assignment of the remaining \f$(n-k)\f$ MS bits of dc
 * and let \f$G[k] = L A_{n-1} A_{n-2} \ldots A_{k}\f$ be the multiplication of the 
 * corresponding transition probability matrices. Then 
 * \f$\mathrm{dp}(da,db \rightarrow dc) = G[k] H[k-1] \le B[k] H[k-1]\f$ 
 * for \em any choice of \f$dc[n-1:k]\f$.
 *
 * In other words, for any choice of \f$dc[n-1:k]\f$ the actual probabilities
 * \f$G[k][i]\f$ will always be less than the bound probabilities \f$B[k][i]\f$ for all \p i.
 *
 * \f$B[k][i]\f$ is an \em upper \em bound on the probability of state \p i at bit position \p k
 * because clearly for any choice \f$dc[n-1:k]\f$ of the \f$(n-k)\f$ MS bits of dc, the probability
 * \f$ L A_{n-1} A_{n-2} \ldots A_{k} C^i_{k-1}\f$ will never be bigger than \f$B[k][i]\f$.
 * Consequently, for any transition probability vector at bit position \p k:
 * \f$H[k-1] = A_{k-1} A_{k-2} \ldots A_{0}\f$ the total probability of any differential it holds
 * \f[\mathrm{dp}(da,db \rightarrow dc) = G[k] H[k-1] \le B[k] H[k-1]\f].
 *
 */

/* --- */

/*
 * ADP_XTEA_F(da -> dd) ~= ADP_XOR(db, dc -> dy) x ADP_XOR_FIXED_INPUT((key + delta), (dy + da) -> dd) > 0.0
 * 
 * where
 * 
 * db = da << 4
 * dc[i] \in {(da >> 5), (da >> 5) + 1, (da >> 5) - 2^{n-5}, (da >> 5) - 2^{n-5} + 1}
 * 
 * Algorithm sketch: 
 * 
 *   -# Compute dy: dx_{dc[i]} ADP_XOR(db, dc[i] -> dy) 
 *   -# Compute dt = dy + da
 *   -# Compute dd: max_{dd} ADP_XOR((key + delta), dt -> dd)
 *   -# For da and dd compute the exact probability: p = ADP_XTEA_F(da -> dd)
 *   -# return p, dd
 */

/* --- */
// 
// Assigns the i-th bit of x, dx, and the key (k0 and k1)
// 
// x_cnt[k0][k1][dx] - stores the number of right pairs for a given key k0, k1 and input difference dx
// 


/* --- */

// Assigns the i-th bit of x and dx
// This function is used to compute the maximum probability input difference dx 
// for a given output difference dy (max_dx_adp_f_fk())
// 

/* --- */
// 
// For the TEA F-function, for fixed keys k0 and k1, fixed constant delta,
// and fixed input and output differences (resp. dx, dy),
// count the number of values x for which the following equation holds:
// 
// y2 - y1 = dy ,  where y1 = tea_f(x), y2 = tea_f(x + dx):
// 
// y1 = ((x << 4) + k0) ^ (x + delta) ^ ((x >> 5) + k1) ,
// y2 = (((x + dx) << 4) + k0) ^ ((x + dx) + delta) ^ (((x + dx) >> 5) + k1) .
// 
// Return the probability based on this count i.e.
// basically computes ADP_F for fixed key and delta.
// 
/*  */

/*  *  \f$y = F'(k_0, k_1, \delta | x) = ((x \ll 4) + k_0) \oplus (x + \delta) ^ ((x \gg 5) + k_1)\f$.
 */

/* --- */
/*  * \brief The ADD differential probability of the TEA F-function for a fixed key and
 *        round constants (\f$\mathrm{adp}^{F}(k_0, k_1, \delta | da \rightarrow dd)\f$) 
 *        computed using full DDT. Complexity \f$O(2^n)\f$.
 */


// ---

# --- ADP-XOR ---

ADP_XOR_OBJ = $(OBJ_PATH)adp-xor.o $(OBJ_PATH)adp-xor-program.o
ADP_XOR_TESTS_OBJ = $(OBJ_PATH)common.o $(OBJ_PATH)adp-xor.o $(OBJ_PATH)adp-xor-tests.o

adp-xor: adp-xor.o adp-xor-program.o
	$(CC) $(LFLAGS) $(ADP_XOR_OBJ) -o $(BIN_PATH)adp-xor $(GSL_LIB)

adp-xor-tests: common.o adp-xor.o adp-xor-tests.o
	$(CC) $(LFLAGS) $(ADP_XOR_TESTS_OBJ) -o $(BIN_PATH)adp-xor-tests $(GSL_LIB)

adp-xor.o:
	$(CC) $(CFLAGS) -I$(INCLUDES) $(SOURCE_PATH)adp-xor.cc -o $(OBJ_PATH)adp-xor.o

adp-xor-program.o: 
	$(CC) $(CFLAGS) -I$(INCLUDES) $(SOURCE_PATH)adp-xor-program.cc -o $(OBJ_PATH)adp-xor-program.o

adp-xor-tests.o: 
	$(CC) $(CFLAGS) -I$(INCLUDES) $(TESTS_PATH)adp-xor-tests.cc -o $(OBJ_PATH)adp-xor-tests.o

// ---

XDP_TEA_F_FK_TESTS_OBJ = $(OBJ_PATH)common.o $(OBJ_PATH)xdp-add.o $(OBJ_PATH)max-xdp-add.o $(OBJ_PATH)tea.o $(OBJ_PATH)xdp-tea-f-fk.o $(OBJ_PATH)xdp-tea-f-fk-tests.o

xdp-tea-f-fk-tests: common.o xdp-add.o max-xdp-add.o tea.o xdp-tea-f-fk.o xdp-tea-f-fk-tests.o
	$(CC) $(LFLAGS) $(XDP_TEA_F_FK_TESTS_OBJ) -o $(BIN_PATH)xdp-tea-f-fk-tests $(GSL_LIB)

xdp-tea-f-fk.o: 
	$(CC) $(CFLAGS) -I$(INCLUDES) $(SOURCE_PATH)xdp-tea-f-fk.cc -o $(OBJ_PATH)xdp-tea-f-fk.o

xdp-tea-f-fk-tests.o: 
	$(CC) $(CFLAGS) -I$(INCLUDES) $(TESTS_PATH)xdp-tea-f-fk-tests.cc -o $(OBJ_PATH)xdp-tea-f-fk-tests.o

// ---

typedef struct {
  uint64_t diff;					  // Ox:   (dx,dy)
  double p;							  // Oy:    p(dx -> dy)
  uint64_t nparams;				  // Label: number of (k0,k1,delta) for which (dx -> dy) with probability p
} coord_t;

// fixed parameters to the F-function of TEA
typedef struct {
  uint32_t key_0;
  uint32_t key_1;
  uint32_t delta;
  double p;
} fparams_t;

bool operator<(fparams_t x, fparams_t y)
{
  if(x.p >= y.p)
	 return true;
  return false;
}

// 
// Investigate EADP_F (expected ADP_F) vs ADP_F_FK (ADP_F for fixed key and delta)
// 
// - fix da, db
// - compute EADP_F(da -> db)
// - for every key and delta compute ADP_F_FK(key, delta | da -> db)
// - Show that EADP_F is the average ADP_F over all keys and delta-s
// - Show that even when EADP_F != 0.0, there exist keys and deltas s.t. ADP_F_FK = 0,0
// - Compute the biggest deviation of ADP_F_FK from EADP_F
// 
void test_eadp_f_vs_adp_f_fixed_key(std::vector<coord_t>* plot_vec)
{
  uint32_t lsh_const = TEA_LSH_CONST; 
  uint32_t rsh_const = TEA_RSH_CONST;

  // init matrices
  gsl_matrix* A[2][2][2][2];	  // matrices to compute ADP
  adp_xor3_alloc_matrices(A);
  adp_xor3_sf(A);
  adp_xor3_normalize_matrices(A);

  uint64_t all_diffs = ALL_WORDS * ALL_WORDS;
  for(uint64_t i = 0; i < all_diffs; i++) { // fix dx, dy
	 uint64_t temp = i;
	 uint32_t dx = temp & MASK;
	 temp /= ALL_WORDS; 
	 uint32_t dy = temp & MASK;
	 temp /= ALL_WORDS; 

	 double p_eadp = eadp_tea_f(A, dx, dy, &p_eadp, lsh_const, rsh_const);
#if 0
	 printf("[%s:%d] %2d %2d | EADP_F(%8X -> %8X) = %6.5f\n", __FILE__, __LINE__, lsh_const, rsh_const, dx, dy, p_eadp);
#endif
	 uint64_t cnt_rpairs_all = 0;  // count the right pairs for all keys and delta
	 uint64_t all_inputs = ALL_WORDS * ALL_WORDS * ALL_WORDS * ALL_WORDS;
	 uint64_t all_keysndeltas = ALL_WORDS * ALL_WORDS * ALL_WORDS;

	 std::vector<fparams_t> fparams_vector;

	 for(uint64_t j = 0; j < all_keysndeltas; j++) { // for all k0, k1, delta
		uint64_t temp = j;
		uint32_t delta = temp & MASK;
		temp /= ALL_WORDS; 
		uint32_t k1 = temp & MASK;
		temp /= ALL_WORDS; 
		uint32_t k0 = temp & MASK;
		temp /= ALL_WORDS; 

		uint32_t cnt_rpairs = 0;
		uint64_t all_pairs = ALL_WORDS;
		for(uint32_t x1 = 0; x1 < ALL_WORDS; x1++) {
		  uint32_t x2 = ADD(x1, dx);
		  uint32_t y1 = tea_f(x1, k0, k1, delta, lsh_const, rsh_const);
		  uint32_t y2 = tea_f(x2, k0, k1, delta, lsh_const, rsh_const);
		  uint32_t y_sub = SUB(y2, y1);
		  if(y_sub == dy) {
			 cnt_rpairs++;
		  }
		}
		cnt_rpairs_all += cnt_rpairs;
		double p_adp_fk = (double)cnt_rpairs / (double)(all_pairs);
#if 0
		printf("[%s:%d] %2d %2d | ADP_F_FK(%8X %8X %8X | %8X -> %8X) = %6.5f\n", __FILE__, __LINE__, lsh_const, rsh_const, k0, k1, delta, dx, dy, p_adp_fk);
#endif
		fparams_t fparams;
		fparams.key_0 = k0;
		fparams.key_1 = k1;
		fparams.delta = delta;
		fparams.p = p_adp_fk;
		fparams_vector.push_back(fparams);
	 }	// k0, k1, delta

	 assert(fparams_vector.size() == all_keysndeltas);
	 std::vector<fparams_t>::iterator vec_iter;
#if 0
	 uint32_t cnt_el = 0;
	 double p_prev = fparams_vector.begin()->p;
#endif
	 for(vec_iter = fparams_vector.begin(); vec_iter != fparams_vector.end(); vec_iter++) {
#if 1
		fparams_t fparams = *vec_iter;
		uint32_t k0 = fparams.key_0;
		uint32_t k1 = fparams.key_1;
		uint32_t delta = fparams.delta;
		double p = fparams.p;
		printf("[%s:%d] %2d %2d | ADP_F_FK(%8X %8X %8X | %8X -> %8X) = %6.5f\n", __FILE__, __LINE__, lsh_const, rsh_const, k0, k1, delta, dx, dy, p);
#else
		double p = vec_iter->p;
		if(p == p_prev) {
		  cnt_el++;
		} else {
#if 0
		  double percent = ((double)cnt_el / (double)all_keysndeltas) * (100.0);
		  printf("[%s:%d] ADP_F_FK(%8X -> %8X) = %f | %5d %3.0f%%\n", __FILE__, __LINE__, dx, dy, p_prev, cnt_el, percent);
#endif
		  coord_t coord;
		  coord.diff = i;
		  coord.nparams = cnt_el;
		  coord.p = p_prev;
		  plot_vec->push_back(coord);
		  p_prev = p;
		  cnt_el = 1;
		}
		// print the last one
		std::vector<fparams_t>::iterator next = vec_iter;
		next++;
		if(next == fparams_vector.end()) {
#if 0
		  double percent = ((double)cnt_el / (double)all_keysndeltas) * (100.0);
		  printf("[%s:%d] ADP_F_FK(%8X -> %8X) = %f | %5d %3.0f%%\n", __FILE__, __LINE__, dx, dy, p_prev, cnt_el, percent);
#endif
		  coord_t coord;
		  coord.diff = i;
		  coord.nparams = cnt_el;
		  coord.p = p_prev;
		  plot_vec->push_back(coord);
		}
#endif
	 }
	 double p_adp_avrg = (double)cnt_rpairs_all / (double)all_inputs;
#if 0									  // plot average
	 printf("%5lld %f\n", i, p_adp_avrg);
#endif
#if 1
	 printf("[%s:%d] %2d %2d |     EADP_F(%8X -> %8X) = %6.5f\n", __FILE__, __LINE__, lsh_const, rsh_const, dx, dy, p_eadp);
	 printf("[%s:%d] %2d %2d | ADP_F_AVRG(%8X -> %8X) = %6.5f\n", __FILE__, __LINE__, lsh_const, rsh_const, dx, dy, p_adp_avrg);
	 printf("\n");
#endif
	 assert(p_eadp == p_adp_avrg);
  } // dx, dy
  adp_xor3_free_matrices(A);
}

// ---

typedef struct {
  uint64_t diff;					  // Ox:   (dx,dy)
  double p;							  // Oy:    p(dx -> dy)
  uint64_t nparams;				  // Label: number of (k0,k1,delta) for which (dx -> dy) with probability p
} coord_t;

// fixed parameters to the F-function of TEA
typedef struct {
  uint32_t key_0;
  uint32_t key_1;
  uint32_t delta;
  double p;
} fparams_t;

bool operator<(fparams_t x, fparams_t y)
{
  if(x.p >= y.p)
	 return true;
  return false;
}


// ---

void test_max_dx_adp_f_fk()
{
  printf("[%s:%d] Running test %s() ...\n", __FILE__, __LINE__, __FUNCTION__);
  assert(TEA_LSH_CONST < TEA_RSH_CONST);
  uint32_t lsh_const = TEA_LSH_CONST;
  uint32_t rsh_const = TEA_RSH_CONST;
  uint32_t delta = DELTA_INIT & MASK; 
  const uint32_t n = WORD_SIZE;

  uint32_t k0 = random32() & MASK; 
  uint32_t k1 = random32() & MASK; 
  uint32_t dx = random32() & MASK; 
  uint32_t dy = random32() & MASK; 

  double p_the = max_dx_adp_f_fk(n, &dx, dy, k0, k1, delta, lsh_const, rsh_const);

#if DEBUG_ADP_TEA_F_FK_TESTS
  printf("[%s:%d] n %d, key %8X %8X, delta %8X, L %d, R %d\n", __FILE__, __LINE__, WORD_SIZE, k0, k1, delta, lsh_const, rsh_const);
  printf("[%s:%d] ADP_F_FK(%d %d | %8X %8X %8X | %8X -> %8X) = %f 2^%f\n", __FILE__, __LINE__, lsh_const, rsh_const, delta, k0, k1, dx, dy, p_the, log2(p_the));
#endif  // #if DEBUG_ADP_TEA_F_FK_TESTS
  assert(p_the == p_the);		  // avoid compilation warnings
  printf("[%s:%d] WORD_SIZE = %d. Test %s() OK.\n", __FILE__, __LINE__, WORD_SIZE, __FUNCTION__);
}

// ---

  if(n < (rsh_const * 2)) {
	 printf("[%s:%d] n = %d, rsh_const = %d\n", __FILE__, __LINE__, n, rsh_const);
  }
  if(n < (rsh_const * 2)) {
	 printf("[%s:%d] n = %d, rsh_const = %d\n", __FILE__, __LINE__, n, rsh_const);
  }

// # --- ADP-TEA-F-FK ---

ADP_TEA_F_FK_TESTS_OBJ = $(OBJ_PATH)common.o $(OBJ_PATH)adp-tea-f-fk.o

adp-tea-f-fk-tests: common.o adp-tea-f-fk.o
	$(CC) $(LFLAGS) $(TEA_F_ADD_PDDT_TESTS_OBJ) -o $(BIN_PATH)adp-tea-f-fk-tests $(GSL_LIB)


// ---
//#define MASK (uint32_t)(~((uint64_t)0xffffffff << WORD_SIZE)) /**< A mask for the WORD_SIZE LS bits of a word. */


// ---
void test_max_eadp_tea_f_all()
{
  uint32_t lsh_const;
  uint32_t rsh_const;

  // init matrices
  gsl_matrix* A[2][2][2][2];	  // matrices to compute ADP
  adp_xor3_alloc_matrices(A);
  adp_xor3_sf(A);
  adp_xor3_normalize_matrices(A);

  for(lsh_const = 0; lsh_const < WORD_SIZE; lsh_const++) {
	 for(rsh_const = 0; rsh_const < WORD_SIZE; rsh_const++) {
		if((lsh_const + rsh_const) > WORD_SIZE)
		  continue;
		for(uint32_t dx = 0; dx < ALL_WORDS; dx++) {
		  uint32_t dy = 0;
		  double p1 = max_eadp_tea_f(A, dx, &dy, &p1, lsh_const, rsh_const);
		  printf("[%s:%d] %d %d | MAX_EADP_TEA_F_TH3(%8X -> %8X) = %31.30f = 2^%f\n", __FILE__, __LINE__, lsh_const, rsh_const, dx, dy, p1, log2(p1));
		  double p2 = eadp_tea_f(A, dx, dy, &p2, lsh_const, rsh_const);
		  printf("[%s:%d] %d %d | MAX_EADP_TEA_F_TH2(%8X -> %8X) = %31.30f = 2^%f\n", __FILE__, __LINE__, lsh_const, rsh_const, dx, dy, p2, log2(p2));

		  if(p1 != p2) {
			 printf("[%s:%d] WARNING:     p_adp = 2^%f !=  p_max_adp = 2^%f\n", __FILE__, __LINE__, log2(p1), log2(p2));
		  }
		  assert(p1 == p2);
		  //			 assert(float_equals(p1, p2));
		}
	 }
  }
  adp_xor3_free_matrices(A);
}

// ---

EADP_TEA_F_OBJ = 
$(OBJ_PATH)common.o 
$(OBJ_PATH)adp-xor3.o  
$(OBJ_PATH)max-adp-xor3.o  
$(OBJ_PATH)max-adp-xor3-set.o  
$(OBJ_PATH)adp-shift.o  
$(OBJ_PATH)tea.o  
$(OBJ_PATH)eadp-tea-f.o  
$(OBJ_PATH)eadp-tea-f-program.o

// ---

//double adp_rsh_xor_approx(uint32_t da, uint32_t dx, uint32_t db, int r)
double adp_rsh_xor_approx(uint32_t da, uint32_t db, int r)
{
  gsl_matrix* A[2][2][2];
  double p_tot = 0.0;

  // allocate memory
  adp_xor_alloc_matrices(A);
  adp_xor_sf(A);
  adp_xor_normalize_matrices(A);

  // compute
  uint32_t dx[4] = {0, 0, 0, 0};

  adp_rsh_odiffs(dx, da, r);

  for(int i = 0; i < 4; i++) {

	 double p1 = adp_rsh(da, dx[i], r);
	 double p2 = adp_xor(A, da, dx[i], db);
#if DEBUG_ADP_RSH_XOR
	 printf("[%s:%d] ADP_RSH[(%d -%d-> %d)] = %6.5f\n", 
			  __FILE__, __LINE__, da, r, dx[i], p1);
	 printf("[%s:%d] ADP_XOR[(%d, %d -> %d)] = %6.5f\n", 
			  __FILE__, __LINE__, da, dx[i], db, p2);
#endif
	 p_tot += (p1 * p2);
  }
  printf("p_tot = %f\n", p_tot);

  // free memory
  adp_xor_free_matrices(A);

  return p_tot;
}

// ---

#if 0									  // generate non 1.0 probabilities
			 if(ADP_XOR3_SET_SIZE == 2) {
				p_dc[0] = (double)(random() % 101) / (double)100;
				assert((p_dc[0] >= 0) && (p_dc[0] <= 1.0));
				p_dc[1] = 1.0 - p_dc[0];
				assert((p_dc[1] >= 0) && (p_dc[1] <= 1.0));
			 }
#endif

// ---
				p_dc[j] = (double)(random() % 101) / (double)100;
				assert((p_dc[j] >= 0) && (p_dc[j] <= 1.0));
/* ---- */
	  {0x11010001, 0x44040004, 0, 0.003906}, //(2^-8.000000)
	  {0x40000000, 0x11010000, 0, 0.250000}, //(2^-2.000000)
	  {0x11010000,  0x4040000, 0, 0.015625}, //(2^-6.000000)
	  { 0x4040000,  0x1110000, 0, 0.062500}, //(2^-4.000000)
	  { 0x1110000,   0x400000, 0, 0.015625}, //(2^-6.000000)
	  {  0x400000,   0x110000, 0, 0.250000}, //(2^-2.000000)
	  {  0x110000,    0x40000, 0, 0.062500}, //(2^-4.000000)
	  {   0x40000,    0x10000, 0, 0.250000}, //(2^-2.000000)
	  {   0x10000,        0x0, 0, 0.250000}, //(2^-2.000000)
	  {       0x0,    0x10000, 0, 1.000000}, //(2^0.000000)
	  {   0x10000,    0x40000, 0, 0.250000}, //(2^-2.000000)
	  {   0x40000,   0x110000, 0, 0.250000}, //(2^-2.000000)
	  {  0x110000,   0x400000, 0, 0.062500}, //(2^-4.000000)
	  {  0x400000,  0x1110000, 0, 0.250000}, //(2^-2.000000)
	  { 0x1110000,  0x4040000, 0, 0.015625}, //(2^-6.000000)
	  { 0x4040000, 0x11010000, 0, 0.062500}, //(2^-4.000000)
	  {0x11010000, 0x40000000, 0, 0.015625}, //(2^-6.000000)
	  {0x40000000, 0x11010001, 0, 0.250000}, //(2^-2.000000)
	  {0x11010001,  0x4040004, 0, 0.003906}, //(2^-8.000000)
	  { 0x4040004,  0x1110011, 0, 0.015625} //(2^-6.000000)
back to top