1#ifndef KAORI_VARIABLE_LIBRARY_HPP
2#define KAORI_VARIABLE_LIBRARY_HPP
9#include <unordered_map>
25template<
typename Trie_>
26inline void fill_library(
const std::vector<const char*>& options, std::unordered_map<std::string, BarcodeIndex>& exact, Trie_& trie,
bool reverse) {
27 std::size_t len = trie.length();
28 auto nopt = options.size();
30 for (
decltype(nopt) i = 0; i < nopt; ++i) {
31 auto ptr = options[i];
35 current = std::string(ptr, ptr + len);
38 for (
size_t j = 0; j < len; ++j) {
39 current += complement_base<true, true>(ptr[len - j - 1]);
45 auto status = trie.add(current.c_str());
47 if (!status.has_ambiguous) {
48 if (!status.is_duplicate || status.duplicate_replaced) {
50 }
else if (status.duplicate_cleared) {
104 my_trie(barcode_pool.length(), options.duplicates),
105 my_max_mm(options.max_mismatches)
107 fill_library(barcode_pool.
pool(), my_exact, my_trie, options.
reverse);
113 std::unordered_map<std::string, BarcodeIndex> my_exact;
116 CacheEntry() =
default;
117 CacheEntry(
BarcodeIndex index,
int mismatches) : index(index), mismatches(mismatches) {}
121 std::unordered_map<std::string, CacheEntry> my_cache;
148 std::unordered_map<std::string, CacheEntry> cache;
171 my_cache.merge(state.cache);
186 search(search_seq, state, my_max_mm);
202 void search(
const std::string& search_seq,
State& state,
int allowed_mismatches)
const {
203 auto it = my_exact.find(search_seq);
204 if (it != my_exact.end()) {
205 state.
index = it->second;
210 auto set_from_cache = [&](
const CacheEntry& cached) ->
void {
211 if (cached.mismatches > allowed_mismatches) {
216 state.
index = cached.index;
221 auto cIt = my_cache.find(search_seq);
222 if (cIt != my_cache.end()) {
223 set_from_cache(cIt->second);
227 auto lIt = state.cache.find(search_seq);
228 if (lIt != state.cache.end()) {
229 set_from_cache(lIt->second);
233 auto missed = my_trie.
search(search_seq.c_str(), allowed_mismatches);
236 state.
index = missed.index;
238 state.cache[search_seq] = CacheEntry(missed.index, missed.mismatches);
254 state.cache[search_seq] = CacheEntry(missed.index, missed.mismatches);
257 state.
index = missed.index;
271template<
int num_segments_>
321 std::array<SeqLength, num_segments_> segments,
327 std::reverse(segments.begin(), segments.end());
335 auto copy = options.max_mismatches;
336 if (options.reverse) {
337 std::reverse(copy.begin(), copy.end());
343 if (barcode_pool.length() != my_trie.length()) {
344 throw std::runtime_error(
"variable sequences should have the same length as the sum of segment lengths");
346 fill_library(barcode_pool.pool(), my_exact, my_trie, options.reverse);
350 SegmentedMismatches<num_segments_> my_trie;
351 std::array<int, num_segments_> my_max_mm;
352 std::unordered_map<std::string, BarcodeIndex> my_exact;
355 CacheEntry() =
default;
356 CacheEntry(
BarcodeIndex index,
int mismatches, std::array<int, num_segments_> per_segment) :
357 index(index), mismatches(mismatches), per_segment(per_segment) {}
360 std::array<int, num_segments_> per_segment;
362 std::unordered_map<std::string, CacheEntry> my_cache;
398 std::unordered_map<std::string, CacheEntry> cache;
421 my_cache.merge(state.cache);
436 search(search_seq, state, my_max_mm);
452 void search(
const std::string& search_seq,
State& state, std::array<int, num_segments_> allowed_mismatches)
const {
453 auto it = my_exact.find(search_seq);
454 if (it != my_exact.end()) {
455 state.
index = it->second;
457 std::fill_n(state.
per_segment.begin(), num_segments_, 0);
461 auto set_from_cache = [&](
const CacheEntry& cached) ->
void {
464 for (
int s = 0; s < num_segments_; ++s) {
465 if (cached.per_segment[s] > allowed_mismatches[s]) {
472 state.
index = cached.index;
475 auto cIt = my_cache.find(search_seq);
476 if (cIt != my_cache.end()) {
477 set_from_cache(cIt->second);
481 auto lIt = state.cache.find(search_seq);
482 if (lIt != state.cache.end()) {
483 set_from_cache(lIt->second);
487 auto missed = my_trie.search(search_seq.c_str(), allowed_mismatches);
490 state.
index = missed.index;
493 state.cache[search_seq] = CacheEntry(missed.index, missed.mismatches, missed.per_segment);
509 state.cache[search_seq] = CacheEntry(missed.index, missed.mismatches, missed.per_segment);
512 state.
index = missed.index;
Defines the BarcodePool class.
Defines trie-based classes for mismatch-tolerant sequence matching.
Search for barcodes with mismatches anywhere.
Definition MismatchTrie.hpp:383
Result search(const char *search_seq, int max_mismatches) const
Definition MismatchTrie.hpp:470
Pool of barcode sequences.
Definition BarcodePool.hpp:24
const std::vector< const char * > & pool() const
Definition BarcodePool.hpp:63
Search against known barcode sequences with segmented mismatches.
Definition BarcodeSearch.hpp:272
State initialize() const
Definition BarcodeSearch.hpp:409
SegmentedBarcodeSearch(const BarcodePool &barcode_pool, std::array< SeqLength, num_segments_ > segments, const Options &options)
Definition BarcodeSearch.hpp:319
void reduce(State &state)
Definition BarcodeSearch.hpp:420
void search(const std::string &search_seq, State &state) const
Definition BarcodeSearch.hpp:435
SegmentedBarcodeSearch()=default
void search(const std::string &search_seq, State &state, std::array< int, num_segments_ > allowed_mismatches) const
Definition BarcodeSearch.hpp:452
Search against known barcodes.
Definition BarcodeSearch.hpp:70
SimpleBarcodeSearch(const BarcodePool &barcode_pool, const Options &options)
Definition BarcodeSearch.hpp:103
void search(const std::string &search_seq, State &state, int allowed_mismatches) const
Definition BarcodeSearch.hpp:202
void search(const std::string &search_seq, State &state) const
Definition BarcodeSearch.hpp:185
SimpleBarcodeSearch()=default
State initialize() const
Definition BarcodeSearch.hpp:159
void reduce(State &state)
Definition BarcodeSearch.hpp:170
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:16
constexpr BarcodeIndex STATUS_AMBIGUOUS
Definition utils.hpp:53
constexpr BarcodeIndex STATUS_UNMATCHED
Definition utils.hpp:48
DuplicateAction
Definition utils.hpp:26
bool is_barcode_index_ok(BarcodeIndex index)
Definition utils.hpp:60
std::vector< constchar * >::size_type BarcodeIndex
Definition utils.hpp:43
Optional parameters for a SegmentedBarcodeSearch.
Definition BarcodeSearch.hpp:277
std::array< int, num_segments_ > max_mismatches
Definition BarcodeSearch.hpp:291
bool reverse
Definition BarcodeSearch.hpp:298
DuplicateAction duplicates
Definition BarcodeSearch.hpp:303
Options(int max_mismatch_per_segment=0)
Definition BarcodeSearch.hpp:282
State of the search.
Definition BarcodeSearch.hpp:371
std::array< int, num_segments_ > per_segment
Definition BarcodeSearch.hpp:391
int mismatches
Definition BarcodeSearch.hpp:384
BarcodeIndex index
Definition BarcodeSearch.hpp:377
Optional parameters for SimpleBarcodeSearch.
Definition BarcodeSearch.hpp:75
bool reverse
Definition BarcodeSearch.hpp:84
int max_mismatches
Definition BarcodeSearch.hpp:79
DuplicateAction duplicates
Definition BarcodeSearch.hpp:89
State of the search.
Definition BarcodeSearch.hpp:130
BarcodeIndex index
Definition BarcodeSearch.hpp:136
int mismatches
Definition BarcodeSearch.hpp:143
Utilites for sequence matching.