71         << 
"BitMagic Dictionary Search Sample (c) 2018" << std::endl
    72         << 
"-idict  file-name            -- input set file to parse" << std::endl
    73         << 
"-svout  spase vector output  -- sparse vector name to save" << std::endl
    74         << 
"-svin   sparse vector input  -- sparse vector file name to load " << std::endl
    75         << 
"-diag                        -- run diagnostics"                  << std::endl
    76         << 
"-bench                       -- run benchmarks"                   << std::endl
    77         << 
"-timing                      -- collect timings"                  << std::endl
    95     for (
int i = 1; i < argc; ++i)
    97         std::string arg = argv[i];
    98         if ((arg == 
"-h") || (arg == 
"--help"))
   104         if (arg == 
"-svout" || arg == 
"--svout")
   112                 std::cerr << 
"Error: -svout requires file name" << std::endl;
   118         if (arg == 
"-svin" || arg == 
"--svin")
   126                 std::cerr << 
"Error: -svin requires file name" << std::endl;
   132         if (arg == 
"-idict" || arg == 
"--idict" )
   140                 std::cerr << 
"Error: -idict requires file name" << std::endl;
   146         if (arg == 
"-diag" || arg == 
"--diag" || arg == 
"-d" || arg == 
"--d")
   151         if (arg == 
"-timing" || arg == 
"--timing" || arg == 
"-t" || arg == 
"--t")
   156         if (arg == 
"-bench" || arg == 
"--bench" || arg == 
"-b" || arg == 
"--b")
   162         std::cerr << 
"Error: unknown argument: " << arg << std::endl;
   187     std::ifstream fin(fname.c_str(), std::ios::in);
   192     std::regex reg(
"[|]");
   193     std::sregex_token_iterator it_end;
   195     string trim_chars(
"\" ");
   197     for (
unsigned i = 0; std::getline(fin, line); ++i)
   199         if (line.empty() || !isdigit(line.front()))
   203         std::sregex_token_iterator it(line.begin(), line.end(), reg, -1);
   204         std::vector<std::string> line_vec(it, it_end);
   205         if (line_vec.empty())
   210             string& col13 = line_vec.at(13);
   211             col13.erase(0, col13.find_first_not_of(trim_chars));
   212             col13.erase(col13.find_last_not_of(trim_chars) + 1);
   215                 str_vec.emplace_back(col13);
   217         catch (std::exception&)
   223             cout << 
"\rReading input file: " << i << flush;
   235     if (str_vec.size() != str_sv.
size())
   236         throw runtime_error(
"Error. size() comparison failed!");
   241         const string& s_control = str_vec[i];
   243             throw runtime_error(
"Error. element comparison failed!");
   245     std::cout << 
"Check ok. Dictionary size = " << str_sv.
size() << std:: endl;
   256     std::uniform_int_distribution<unsigned> 
rand_dis(0, 
unsigned(str_vec.size()-1)); 
   269             if (idx < str_vec.size())
   270                 bench_vec.push_back(str_vec[idx]);
   277             string str_nf = str_vec[idx];
   278             string::reverse_iterator rit = str_nf.rbegin();
   279             string::reverse_iterator rit_end = str_nf.rend();
   280             for (; rit != rit_end; ++rit)
   283                 int a = rand() % 26 + int(
'A'); 
   286                 auto it = std::lower_bound(str_vec.begin(), str_vec.end(), str_nf);
   287                 if (it == str_vec.end() || *it != str_nf) 
   289                     bench_vec_not_found.push_back(str_nf);
   309     cout << 
"Picked " << bench_vec.
size() << 
" / "    310          << bench_vec_not_found.size() << 
" samples. Running benchmarks."    313     unsigned bench_size = unsigned(bench_vec.size());
   317             for (
const string& term : bench_vec)
   319                 auto it = std::lower_bound(str_vec.begin(), str_vec.end(), term);
   320                 if (it != str_vec.end())
   322                     string_vector::size_type idx =
   323                         string_vector::size_type(std::distance(str_vec.begin(), it));
   324                     bv1.
set(
unsigned(idx));
   330             for (
const string& term : bench_vec_not_found)
   332                 std::lower_bound(str_vec.begin(), str_vec.end(), term);
   339         std::map<string, unsigned> str_map;
   340         for (string_vector::size_type i = 0; i < str_vec.size(); ++i)
   342             const string& s = str_vec[i];
   343             str_map[s] = unsigned(i);
   347             for (
const string& term : bench_vec)
   349                 auto it = str_map.find(term);
   350                 if (it != str_map.end())
   352                     bv2.
set(
unsigned(it->second));
   358             for (
const string& term : bench_vec_not_found)
   360                 auto it = str_map.find(term);
   361                 if (it != str_map.end())
   363                     cerr << 
"empty search returned value..." << endl;
   373             for (
const string& term : bench_vec)
   376                 bool found = scanner.
find_eq_str(str_sv, term.c_str(), pos);
   385             for (
const string& term : bench_vec_not_found)
   388                 bool found = scanner.
find_eq_str(str_sv, term.c_str(), pos);
   391                     cerr << 
"scanner empty search returned value..." << endl;
   399         scanner.
bind(str_sv, 
true); 
   403             for (
const string& term : bench_vec)
   415             for (
const string& term : bench_vec_not_found)
   421                     cerr << 
"scanner empty search returned value..." << endl;
   431         throw runtime_error(
"Error. RB-search mismatch!");
   434         throw runtime_error(
"Error. scanner mismatch!");
   438         throw runtime_error(
"Error. binary scanner mismatch!");
   440     if (bv1.
count() != bench_size)
   441         throw runtime_error(
"Error. Search result missing elements!");
   447 int main(
int argc, 
char *argv[])
   472             cout << 
"Loaded " << str_vec.size() << 
" dictionary names." << endl;
   474             std::sort(str_vec.begin(), str_vec.end());
   481             for (
const string& term : str_vec)
   489                 str_sv.
swap(str_sv_remap);
   515                     str_vec.emplace_back(std::move(s));
   525                 print_svector_stat(str_sv, 
true);
   530                 size_t total_size = 0;
   531                 for (
const string& term : str_vec)
   533                     total_size += term.size();
   535                 cout << 
"String dictionary size: "   536                      << total_size / 1024 << 
"KB (" << total_size / (1024*1024) << 
"MB)"   540             if (str_sv.
size() && str_vec.size())
   542                 cout << 
"Run full comparison check..." << endl;
   544                 cout << 
"Ok" << endl;
   555             std::cout << std::endl << 
"Performance:" << std::endl;
   559     catch (std::exception& ex)
   561         std::cerr << 
"Error:" << ex.what() << std::endl;
 
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators. 
bm::chrono_taker::duration_map_type timing_map
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, MAX_STR_SIZE >::statistics *stat=0)
run memory optimization for all vector plains 
void bind(const SV &sv, bool sorted)
bind sparse vector for all searches 
size_type size() const
return current size of the vector (bits) 
Timing utilities for benchmarking (internal) 
algorithms for sparse_vector scan/seach 
bm::str_sparse_vector< char, bm::bvector<>, 64 > str_sparse_vect
string sparse vector based on bit-transposed matrix 
static int load_dict_report(const std::string &fname, string_vector &str_vec)
Parse the input file and extract dictionary values. 
int compare(const bvector< Alloc > &bvect) const
Lexicographical comparison with a bitvector. 
#define BM_DECLARE_TEMP_BLOCK(x)
Algorithms for bvector<> (main include) 
std::uniform_int_distribution rand_dis(1, int(vector_max))
void push_back(const StrType &str)
push back a string 
bvector< Alloc > & set(size_type n, bool val=true)
Sets bit n if val is true, clears bit n if val is false. 
static void run_benchmark(const str_sparse_vect &str_sv, const string_vector &str_vec)
std::map< std::string, statistics > duration_map_type
test name to duration map 
sparse vector for strings with compression using bit transposition method 
static void print_duration_map(const duration_map_type &dmap, format fmt=ct_time)
void remap_from(const str_sparse_vector &str_sv)
Build remapping profile and load content from another sparse vector. 
size_type count() const
population cout (count of ON bits) 
bool find_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
find first sparse vector element (string) 
Serialization for sparse_vector<> 
static void check_sparse(const str_sparse_vect &str_sv, const string_vector &str_vec)
Compare STL vector with bit-transposed container to check correcness. 
Utility class to collect performance measurements and statistics. 
static void pick_benchmark_set(string_vector &bench_vec, string_vector &bench_vec_not_found, const string_vector &str_vec)
Sample a few random terms out of collection. 
Generation of random subset. 
static int parse_args(int argc, char *argv[])
size_type size() const
return size of the vector 
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs...
bool test(size_type n) const
returns true if bit n is set and false is bit n is 0. 
void resize(size_type new_size)
Change size of the bvector. 
bool empty() const
return true if vector is empty 
bvector_type::size_type size_type
void swap(str_sparse_vector &str_sv) BMNOEXEPT
int main(int argc, char *argv[])
const unsigned benchmark_max
std::mt19937 gen(rand_dev())
std::random_device rand_dev
static void show_help()
Print help. 
size_type get(size_type idx, value_type *str, size_type buf_size) const
get specified element 
vector< string > string_vector
bool bfind_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
binary find first sparse vector element (string) Sparse vector must be sorted. 
Algorithms for sparse_vector<>