diff --git a/drama/src/rev-mc.c b/drama/src/rev-mc.c index 4bfc05e..2b1de40 100644 --- a/drama/src/rev-mc.c +++ b/drama/src/rev-mc.c @@ -304,8 +304,9 @@ find_row_mask(std::vector &sets, uint64_t row_mask = LS_BITMASK(16);// use 16 bits for the row const uint64_t last_mask = (row_mask << (40 - 16)); row_mask <<= CL_SHIFT;// skip the lowest 6 bits since they're used for CL addressing + bool need_update_row_mask = false; - auto resolve = [=](uint64_t row_mask) -> bool { + auto resolve = [=](uint64_t row_mask) -> bool { if (row_mask & LS_BITMASK(CL_SHIFT)) { return false; } for (auto addr_pool : same_row_sets) { addr_tuple base_addr = addr_pool[0]; @@ -329,8 +330,7 @@ find_row_mask(std::vector &sets, if (thread_num > 2) { thread_num -= 2; } fprintf(stderr, "thread_num: %ld\n", thread_num); - uint64_t step = 1 << 20; - bool lazy_advance_step = false; + uint64_t step = 1 << 20; // for (uint64_t i = row_mask; i < last_mask; ++step) { i = next_bit_permutation(i); } // fprintf(stderr, "total_step: %ld\n", step); // step /= thread_num; @@ -338,49 +338,28 @@ find_row_mask(std::vector &sets, fprintf(stderr, "worker_step: %ld\n", step); for (int i = 0; i < thread_num; ++i) { - workers.emplace_back([=, &row_mask, &found, &lock, &lazy_advance_step] { + workers.emplace_back([&] { while (!found) { uint64_t cur_mask = last_mask; { std::lock_guard _(lock); - if (row_mask >= last_mask) { break; } - if (lazy_advance_step) { + if (row_mask >= last_mask || found) { break; } + if (need_update_row_mask) { for (int i = 0; i < step && row_mask < last_mask; ++i) { row_mask = next_bit_permutation(row_mask); } } - cur_mask = row_mask; - lazy_advance_step = true; + cur_mask = row_mask; + need_update_row_mask = true; } for (int i = 0; i < step && cur_mask < last_mask; ++i) { - if (found.load(std::memory_order_relaxed)) { break; } - - if (cur_mask & LS_BITMASK(CL_SHIFT)) { - cur_mask = next_bit_permutation(row_mask); - continue; - } - bool mismatch = false; - for (auto addr_pool : same_row_sets) { - addr_tuple base_addr = addr_pool[0]; - for (int i = 1; i < addr_pool.size(); i++) { - addr_tuple tmp = addr_pool[i]; - if ((tmp.p_addr & row_mask) != (base_addr.p_addr & cur_mask)) { - mismatch = true; - break; - } + if (!found.load(std::memory_order_relaxed) && resolve(cur_mask)) { + std::lock_guard _(lock); + if (!found.exchange(true)) { + row_mask = cur_mask; + break; } - if (mismatch) { break; } - } - if (mismatch) { - cur_mask = next_bit_permutation(row_mask); - continue; - } - - std::lock_guard _(lock); - if (!found.exchange(true)) { - row_mask = cur_mask; - break; } } }