diff --git a/drama/src/rev-mc.c b/drama/src/rev-mc.c index dc94777..4c765a9 100644 --- a/drama/src/rev-mc.c +++ b/drama/src/rev-mc.c @@ -172,7 +172,7 @@ reduce_masks(std::vector masks) //---------------------------------------------------------- // from https://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation -uint64_t +inline uint64_t next_bit_permutation(uint64_t v) { uint64_t t = v | (v - 1); @@ -300,8 +300,9 @@ find_row_mask(std::vector &sets, row_mask <<= CL_SHIFT;// skip the lowest 6 bits since they're used for CL addressing while (row_mask < last_mask) { + uint64_t next_row_mask = next_bit_permutation(row_mask); if (row_mask & LS_BITMASK(CL_SHIFT)) { - row_mask = next_bit_permutation(row_mask); + row_mask = next_row_mask; continue; } @@ -316,7 +317,7 @@ find_row_mask(std::vector &sets, break; next_mask: - row_mask = next_bit_permutation(row_mask); + row_mask = next_row_mask; } // super hackish way to recover the real row mask @@ -330,6 +331,9 @@ find_row_mask(std::vector &sets, } //---------------------------------------------------------- +#pragma push_options +#pragma optimize("O3") + void rev_mc(size_t sets_cnt, size_t threshold, size_t rounds, size_t m_size, char *o_file, uint64_t flags) { @@ -413,6 +417,8 @@ rev_mc(size_t sets_cnt, size_t threshold, size_t rounds, size_t m_size, char *o_ free_buffer(&mem); } +#pragma pop_options + // Fin. //----------------------------------------------------------