preamble
Investigating
https://www.geeksforgeeks.org/write-a-c-program-that-given-a-set-a-of-n-numbers-and-another-number-x-determines-whether-or-not-there-exist-two-elements-in-s-whose-sum-is-exactly-x/
I was wondering if cache locality trumps using a hashed set, on a modern CPU.
There are 3 implementations of the search.
Investigation
For small sets of data say 100 elements, the random walk pattern looks more successful, as the hashing/inserting in the third solution is taking way too much time.
[js]
————————————————–
Benchmark Time CPU Iterations
————————————————–
BM_METHOD1 14700 ns 14676 ns 43151
BM_METHOD2 1540 ns 1539 ns 447113
BM_METHOD3 23578 ns 23554 ns 29603
[/js]
For larger sets of data say 1000000, the sorting of the random walk pattern appears to dominate and then the hashing solution is definitely a clear winner.
[js]
————————————————–
Benchmark Time CPU Iterations
————————————————–
BM_METHOD1 108818475 ns 108747000 ns 6
BM_METHOD2 66346574 ns 66334000 ns 10
BM_METHOD3 1583479 ns 1583115 ns 436
[/js]
Conclusion
You need to benchmark if in any doubt. Algorithm always trumps caching on large scale, but on small scale the expense of the algorithm becomes important.
Full code
[js]
#include
#include
#include
#include
#include
#include
using namespace std;
vector
GenVec(size_t size_, int valueRange_)
{
vector
// create source of randomness, and initialize it with non-deterministic seed
std::random_device r;
std::seed_seq seed{r(), r(), r(), r(), r(), r(), r(), r()};
std::mt19937 eng{seed};
// a distribution that takes randomness and produces values in specified range
std::uniform_int_distribution<> dist(-valueRange_, valueRange_);
for(int i = 0; i < size_; ++i)
{
vec.emplace_back(dist(eng));
}
return vec;
}
static vector
static int target = 1600000;
using RetT = pair
//Naive approach – O(n^2)
RetT method1(const vector
{
auto it = src_.begin();
auto itEnd = src_.end();
for(; it != itEnd; ++it)
{
auto it2 = it;
++it2;
for(;it2 != itEnd; ++it2)
{
int sum = *it + *it2;
if (sum == target_)
{
assert(sum == target);
return RetT{*it, *it2};
}
}
}
return RetT{0,0};
}
//Random walk on sorted vector – sort dominates task – O(n log n)
RetT method2(const vector
{
vector
sort(src.begin(), src.end());
auto itS = src.begin();
auto itE = src.end();
while(itS < itE)
{
auto sum = *itS + *itE;
if (sum == target_)
{
assert(sum == target);
return RetT{*itS, *itE};
}
else if(sum < target_)
{
++itS;
}
else
{
--itE;
}
}
return RetT{0,0};
}
//Hashing dominates - O(n)
RetT method3(const vector
{
unordered_set
for (auto i: src_)
{
int temp = target_ – i;
if (temp >= 0 && s.find(temp) != s.end())
{
return RetT{i, temp};
}
s.insert(i);
}
}
static void BM_METHOD1(benchmark::State& st_)
{
for (auto _ : st_)
{
method1(vec, target);
}
}
BENCHMARK(BM_METHOD1);
static void BM_METHOD2(benchmark::State& st_)
{
for (auto _ : st_)
{
method2(vec, target);
}
}
BENCHMARK(BM_METHOD2);
static void BM_METHOD3(benchmark::State& st_)
{
for (auto _ : st_)
{
method3(vec, target);
}
}
BENCHMARK(BM_METHOD3);
BENCHMARK_MAIN();
[/js]