1#ifndef KAORI_VARIABLE_LIBRARY_HPP
2#define KAORI_VARIABLE_LIBRARY_HPP
7#include <unordered_map>
25 const std::vector<const char*>& options,
26 std::unordered_map<std::string, int>& exact,
30 size_t len = trie.get_length();
32 for (
size_t i = 0; i < options.size(); ++i) {
33 auto ptr = options[i];
37 current = std::string(ptr, ptr + len);
40 for (
size_t j = 0; j < len; ++j) {
41 current += complement_base<true, true>(ptr[len - j - 1]);
47 auto status = trie.add(current.c_str());
49 if (!status.has_ambiguous) {
50 if (!status.is_duplicate || status.duplicate_replaced) {
52 }
else if (status.duplicate_cleared) {
62template<
class Methods,
class Cache,
class Trie,
class Result,
class Mismatch>
63void matcher_in_the_rye(
const std::string& x,
const Cache& cache,
const Trie& trie, Result& res,
const Mismatch& mismatches,
const Mismatch& max_mismatches) {
65 auto cit = cache.find(x);
66 if (cit == cache.end()) {
67 auto lit = res.cache.find(x);
68 if (lit != res.cache.end()) {
69 Methods::update(res, lit->second, mismatches);
72 auto missed = trie.search(x.c_str(), mismatches);
81 if (Methods::index(missed) >= 0 || mismatches == max_mismatches) {
82 res.cache[x] = missed;
87 Methods::update(res, missed);
90 Methods::update(res, cit->second, mismatches);
139 trie(barcode_pool.length, options.duplicates),
140 max_mm(options.max_mismatches)
142 fill_library(barcode_pool.
pool, exact, trie, options.
reverse);
169 std::unordered_map<std::string, std::pair<int, int> > cache;
193 cache.merge(state.cache);
199 static int index(
const std::pair<int, int>& val) {
203 static void update(State& state,
const std::pair<int, int>& val) {
204 state.
index = val.first;
205 state.mismatches = val.second;
209 static void update(State& state,
const std::pair<int, int>& val,
int mismatches) {
210 state.index = (val.second > mismatches ? -1 : val.first);
211 state.mismatches = val.second;
227 search(search_seq, state, max_mm);
243 void search(
const std::string& search_seq,
State& state,
int allowed_mismatches)
const {
244 auto it = exact.find(search_seq);
245 if (it != exact.end()) {
246 state.
index = it->second;
249 matcher_in_the_rye<Methods>(search_seq, cache, trie, state, allowed_mismatches, max_mm);
254 std::unordered_map<std::string, int> exact;
256 std::unordered_map<std::string, std::pair<int, int> > cache;
265template<
size_t total,
size_t position>
267 static bool check(
const std::array<int, total>& left,
const std::array<int, total>& right) {
268 return (HasMore<total, position + 1>::check(left, right) || left[position] > right[position]);
272template<
size_t total>
273struct HasMore<total, total> {
274 static bool check(
const std::array<int, total>&,
const std::array<int, total>&) {
return false; }
289template<
size_t num_segments>
337 std::array<int, num_segments> segments,
344 auto copy = segments;
345 std::reverse(copy.begin(), copy.end());
353 options.max_mismatches :
355 auto copy = options.max_mismatches;
356 std::reverse(copy.begin(), copy.end());
362 if (barcode_pool.length != trie.get_length()) {
363 throw std::runtime_error(
"variable sequences should have the same length as the sum of segment lengths");
365 fill_library(barcode_pool.pool, exact, trie, options.reverse);
400 std::unordered_map<std::string, typename SegmentedMismatches<num_segments>::Result> cache;
424 cache.merge(state.cache);
432 static int index(
const SegmentedResult& val) {
436 static void update(State& state,
const SegmentedResult& val) {
437 state.index = val.index;
438 state.mismatches = val.total;
439 state.per_segment = val.per_segment;
443 static void update(State& state,
const SegmentedResult& val,
const std::array<int, num_segments>& mismatches) {
444 state.index = (HasMore<num_segments, 0>::check(val.per_segment, mismatches) ? -1 : val.index);
445 state.mismatches = val.total;
446 state.per_segment = val.per_segment;
462 search(search_seq, state, max_mm);
478 void search(
const std::string& search_seq,
State& state, std::array<int, num_segments> allowed_mismatches)
const {
479 auto it = exact.find(search_seq);
480 if (it != exact.end()) {
481 state.
index = it->second;
483 std::fill_n(state.
per_segment.begin(), num_segments, 0);
485 matcher_in_the_rye<Methods>(search_seq, cache, trie, state, allowed_mismatches, max_mm);
490 std::unordered_map<std::string, int> exact;
492 std::unordered_map<std::string, SegmentedResult> cache;
493 std::array<int, num_segments> max_mm;
Defines the BarcodePool class.
Defines the MismatchTrie class and its subclasses.
Search for barcodes with mismatches anywhere.
Definition MismatchTrie.hpp:419
Search for known barcode sequences with segmented mismatches.
Definition BarcodeSearch.hpp:290
void reduce(State &state)
Definition BarcodeSearch.hpp:423
SegmentedBarcodeSearch()
Definition BarcodeSearch.hpp:327
void search(const std::string &search_seq, State &state) const
Definition BarcodeSearch.hpp:461
State initialize() const
Definition BarcodeSearch.hpp:411
SegmentedBarcodeSearch(const BarcodePool &barcode_pool, std::array< int, num_segments > segments, const Options &options)
Definition BarcodeSearch.hpp:335
void search(const std::string &search_seq, State &state, std::array< int, num_segments > allowed_mismatches) const
Definition BarcodeSearch.hpp:478
Search for barcodes with segmented mismatches.
Definition MismatchTrie.hpp:514
Search for known barcode sequences.
Definition BarcodeSearch.hpp:105
SimpleBarcodeSearch(const BarcodePool &barcode_pool, const Options &options)
Definition BarcodeSearch.hpp:138
void search(const std::string &search_seq, State &state, int allowed_mismatches) const
Definition BarcodeSearch.hpp:243
void search(const std::string &search_seq, State &state) const
Definition BarcodeSearch.hpp:226
SimpleBarcodeSearch()
Definition BarcodeSearch.hpp:132
State initialize() const
Definition BarcodeSearch.hpp:180
void reduce(State &state)
Definition BarcodeSearch.hpp:192
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:13
Pool of barcode sequences for a variable region.
Definition BarcodePool.hpp:21
std::vector< const char * > pool
Definition BarcodePool.hpp:54
Optional parameters for a SegmentedBarcodeSearch.
Definition BarcodeSearch.hpp:295
std::array< int, num_segments > max_mismatches
Definition BarcodeSearch.hpp:309
Options(int max_mismatch_per_segment=0)
Definition BarcodeSearch.hpp:300
bool reverse
Definition BarcodeSearch.hpp:315
DuplicateAction duplicates
Definition BarcodeSearch.hpp:320
State of the search.
Definition BarcodeSearch.hpp:376
std::array< int, num_segments > per_segment
Definition BarcodeSearch.hpp:393
int index
Definition BarcodeSearch.hpp:381
int mismatches
Definition BarcodeSearch.hpp:387
Result of the segmented search.
Definition MismatchTrie.hpp:540
int index
Definition MismatchTrie.hpp:553
Optional parameters for SimpleBarcodeSearch.
Definition BarcodeSearch.hpp:110
bool reverse
Definition BarcodeSearch.hpp:119
int max_mismatches
Definition BarcodeSearch.hpp:114
DuplicateAction duplicates
Definition BarcodeSearch.hpp:124
State of the search.
Definition BarcodeSearch.hpp:153
int index
Definition BarcodeSearch.hpp:158
int mismatches
Definition BarcodeSearch.hpp:164