kaori
A C++ library for barcode extraction and matching
Loading...
Searching...
No Matches
SimpleSingleMatch.hpp
Go to the documentation of this file.
1#ifndef KAORI_SIMPLE_SINGLE_MATCH_HPP
2#define KAORI_SIMPLE_SINGLE_MATCH_HPP
3
4#include "ScanTemplate.hpp"
5#include "BarcodePool.hpp"
6#include "BarcodeSearch.hpp"
7#include "utils.hpp"
8
9#include <string>
10#include <unordered_map>
11#include <vector>
12
19namespace kaori {
20
30template<SeqLength max_size_>
32public:
36 struct Options {
41
45 DuplicateAction duplicates = DuplicateAction::ERROR;
46
50 SearchStrand strand = SearchStrand::FORWARD;
51 };
52
53public:
62 const char* template_seq,
63 SeqLength template_length,
64 const BarcodePool& barcode_pool,
65 const Options& options
66 ) :
67 my_forward(search_forward(options.strand)),
68 my_reverse(search_reverse(options.strand)),
69 my_max_mm(options.max_mismatches),
70 my_constant(template_seq, template_length, options.strand)
71 {
72 // Exact strandedness doesn't matter here, just need the number and length.
73 const auto& regions = my_constant.forward_variable_regions();
74 if (regions.size() != 1) {
75 throw std::runtime_error("expected one variable region in the constant template");
76 }
77
78 SeqLength var_length = regions[0].second - regions[0].first;
79 if (var_length != barcode_pool.length()) {
80 throw std::runtime_error("length of barcode_pool sequences (" + std::to_string(barcode_pool.length()) +
81 ") should be the same as the barcode_pool region (" + std::to_string(var_length) + ")");
82 }
83
85 bs_opt.duplicates = options.duplicates;
86 bs_opt.max_mismatches = my_max_mm;
87
88 if (my_forward) {
89 bs_opt.reverse = false;
90 my_forward_lib = SimpleBarcodeSearch(barcode_pool, bs_opt);
91 }
92 if (my_reverse) {
93 bs_opt.reverse = true;
94 my_reverse_lib = SimpleBarcodeSearch(barcode_pool, bs_opt);
95 }
96 }
97
98private:
99 bool my_forward, my_reverse;
100 int my_max_mm;
101 ScanTemplate<max_size_> my_constant;
102 SimpleBarcodeSearch my_forward_lib, my_reverse_lib;
103
104public:
108 struct State {
114
121
127 int mismatches = 0;
128
134
139 bool reverse = false;
140
144 std::string buffer;
145 typename SimpleBarcodeSearch::State forward_details, reverse_details;
149 };
150
157 return State();
158 }
159
167 void reduce(State& state) {
168 if (my_forward) {
169 my_forward_lib.reduce(state.forward_details);
170 }
171 if (my_reverse) {
172 my_reverse_lib.reduce(state.reverse_details);
173 }
174 }
175
176private:
177 void forward_match(const char* seq, const typename ScanTemplate<max_size_>::State& details, State& state) const {
178 auto start = seq + details.position;
179 const auto& range = my_constant.forward_variable_regions()[0];
180 state.buffer.clear();
181 state.buffer.insert(state.buffer.end(), start + range.first, start + range.second);
182 my_forward_lib.search(state.buffer, state.forward_details, my_max_mm - details.forward_mismatches);
183 }
184
185 void reverse_match(const char* seq, const typename ScanTemplate<max_size_>::State& details, State& state) const {
186 auto start = seq + details.position;
187 const auto& range = my_constant.reverse_variable_regions()[0];
188 state.buffer.clear();
189 state.buffer.insert(state.buffer.end(), start + range.first, start + range.second);
190 my_reverse_lib.search(state.buffer, state.reverse_details, my_max_mm - details.reverse_mismatches);
191 }
192
193public:
206 bool search_first(const char* read_seq, SeqLength read_length, State& state) const {
207 auto deets = my_constant.initialize(read_seq, read_length);
208 bool found = false;
209 state.index = STATUS_UNMATCHED;
210 state.mismatches = 0;
211 state.variable_mismatches = 0;
212
213 auto update = [&](bool rev, int const_mismatches, const typename SimpleBarcodeSearch::State& x) -> bool {
214 if (!is_barcode_index_ok(x.index)) {
215 return false;
216 }
217
218 int total = const_mismatches + x.mismatches;
219 if (total > my_max_mm) {
220 return false;
221 }
222
223 found = true;
224 state.position = deets.position;
225 state.mismatches = total;
226 state.reverse = rev;
227 state.index = x.index;
228 state.variable_mismatches = x.mismatches;
229 return true;
230 };
231
232 while (!deets.finished) {
233 my_constant.next(deets);
234
235 if (my_forward && deets.forward_mismatches <= my_max_mm) {
236 forward_match(read_seq, deets, state);
237 if (update(false, deets.forward_mismatches, state.forward_details)) {
238 break;
239 }
240 }
241
242 if (my_reverse && deets.reverse_mismatches <= my_max_mm) {
243 reverse_match(read_seq, deets, state);
244 if (update(true, deets.reverse_mismatches, state.reverse_details)) {
245 break;
246 }
247 }
248 }
249
250 return found;
251 }
252
265 bool search_best(const char* read_seq, SeqLength read_length, State& state) const {
266 auto deets = my_constant.initialize(read_seq, read_length);
267 state.index = STATUS_UNMATCHED;
268 bool found = false;
269 int best = my_max_mm + 1;
270
271 auto update = [&](bool rev, int const_mismatches, const typename SimpleBarcodeSearch::State& x) -> void {
272 if (!is_barcode_index_ok(x.index)) {
273 return;
274 }
275
276 auto total = x.mismatches + const_mismatches;
277 if (total == best) {
278 if (state.index != x.index) { // ambiguous, setting back to a mismatch.
279 found = false;
280 state.index = STATUS_AMBIGUOUS;
281 }
282 } else if (total < best) {
283 found = true;
284 best = total;
285 // A further optimization at this point would be to narrow
286 // max_mm to the current 'best'. But this probably
287 // isn't worth it.
288
289 state.index = x.index;
290 state.mismatches = total;
291 state.variable_mismatches = x.mismatches;
292 state.position = deets.position;
293 state.reverse = rev;
294 }
295 };
296
297 while (!deets.finished) {
298 my_constant.next(deets);
299
300 if (my_forward && deets.forward_mismatches <= my_max_mm) {
301 forward_match(read_seq, deets, state);
302 update(false, deets.forward_mismatches, state.forward_details);
303 }
304
305 if (my_reverse && deets.reverse_mismatches <= my_max_mm) {
306 reverse_match(read_seq, deets, state);
307 update(true, deets.reverse_mismatches, state.reverse_details);
308 }
309 }
310
311 return found;
312 }
313};
314
315}
316
317#endif
Defines the BarcodePool class.
Search for barcode sequences.
Defines the ScanTemplate class.
Pool of barcode sequences.
Definition BarcodePool.hpp:24
SeqLength length() const
Definition BarcodePool.hpp:70
Scan a read sequence for the template sequence.
Definition ScanTemplate.hpp:38
State initialize(const char *read_seq, SeqLength read_length) const
Definition ScanTemplate.hpp:159
const std::vector< std::pair< SeqLength, SeqLength > > & forward_variable_regions() const
Definition ScanTemplate.hpp:291
void next(State &state) const
Definition ScanTemplate.hpp:201
const std::vector< std::pair< SeqLength, SeqLength > > & reverse_variable_regions() const
Definition ScanTemplate.hpp:300
Search against known barcodes.
Definition BarcodeSearch.hpp:70
void search(const std::string &search_seq, State &state) const
Definition BarcodeSearch.hpp:185
void reduce(State &state)
Definition BarcodeSearch.hpp:170
Search for a template with a single variable region.
Definition SimpleSingleMatch.hpp:31
void reduce(State &state)
Definition SimpleSingleMatch.hpp:167
bool search_best(const char *read_seq, SeqLength read_length, State &state) const
Definition SimpleSingleMatch.hpp:265
bool search_first(const char *read_seq, SeqLength read_length, State &state) const
Definition SimpleSingleMatch.hpp:206
SimpleSingleMatch(const char *template_seq, SeqLength template_length, const BarcodePool &barcode_pool, const Options &options)
Definition SimpleSingleMatch.hpp:61
State initialize() const
Definition SimpleSingleMatch.hpp:156
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:16
std::size_t SeqLength
Definition utils.hpp:37
constexpr BarcodeIndex STATUS_AMBIGUOUS
Definition utils.hpp:53
SearchStrand
Definition utils.hpp:31
constexpr BarcodeIndex STATUS_UNMATCHED
Definition utils.hpp:48
DuplicateAction
Definition utils.hpp:26
bool is_barcode_index_ok(BarcodeIndex index)
Definition utils.hpp:60
std::vector< constchar * >::size_type BarcodeIndex
Definition utils.hpp:43
Details on the current match to the read sequence.
Definition ScanTemplate.hpp:109
int forward_mismatches
Definition ScanTemplate.hpp:120
SeqLength position
Definition ScanTemplate.hpp:114
Optional parameters for SimpleBarcodeSearch.
Definition BarcodeSearch.hpp:75
DuplicateAction duplicates
Definition BarcodeSearch.hpp:89
State of the search.
Definition BarcodeSearch.hpp:130
Optional parameters for SimpleSingleMatch.
Definition SimpleSingleMatch.hpp:36
DuplicateAction duplicates
Definition SimpleSingleMatch.hpp:45
int max_mismatches
Definition SimpleSingleMatch.hpp:40
SearchStrand strand
Definition SimpleSingleMatch.hpp:50
State of search().
Definition SimpleSingleMatch.hpp:108
SeqLength position
Definition SimpleSingleMatch.hpp:120
int mismatches
Definition SimpleSingleMatch.hpp:127
bool reverse
Definition SimpleSingleMatch.hpp:139
int variable_mismatches
Definition SimpleSingleMatch.hpp:133
BarcodeIndex index
Definition SimpleSingleMatch.hpp:113
Utilites for sequence matching.