kaori
A C++ library for barcode extraction and matching
Loading...
Searching...
No Matches
SimpleSingleMatch.hpp
Go to the documentation of this file.
1#ifndef KAORI_SIMPLE_SINGLE_MATCH_HPP
2#define KAORI_SIMPLE_SINGLE_MATCH_HPP
3
4#include "ScanTemplate.hpp"
5#include "BarcodePool.hpp"
6#include "BarcodeSearch.hpp"
7#include "utils.hpp"
8
9#include <string>
10#include <unordered_map>
11#include <vector>
12
19namespace kaori {
20
30template<size_t max_size>
32public:
36 struct Options {
41
45 DuplicateAction duplicates = DuplicateAction::ERROR;
46
50 SearchStrand strand = SearchStrand::FORWARD;
51 };
52
53public:
62 const char* template_seq,
63 size_t template_length,
64 const BarcodePool& barcode_pool,
65 const Options& options
66 ) :
67 num_options(barcode_pool.pool.size()),
68 forward(search_forward(options.strand)),
69 reverse(search_reverse(options.strand)),
70 max_mm(options.max_mismatches),
71 constant(template_seq, template_length, options.strand)
72 {
73 // Exact strandedness doesn't matter here, just need the number and length.
74 const auto& regions = constant.variable_regions();
75 if (regions.size() != 1) {
76 throw std::runtime_error("expected one variable region in the constant template");
77 }
78
79 size_t var_length = regions[0].second - regions[0].first;
80 if (var_length != barcode_pool.length) {
81 throw std::runtime_error("length of barcode_pool sequences (" + std::to_string(barcode_pool.length) +
82 ") should be the same as the barcode_pool region (" + std::to_string(var_length) + ")");
83 }
84
86 bs_opt.duplicates = options.duplicates;
87 bs_opt.max_mismatches = max_mm;
88
89 if (forward) {
90 bs_opt.reverse = false;
91 forward_lib = SimpleBarcodeSearch(barcode_pool, bs_opt);
92 }
93 if (reverse) {
94 bs_opt.reverse = true;
95 reverse_lib = SimpleBarcodeSearch(barcode_pool, bs_opt);
96 }
97 }
98
99public:
103 struct State {
108 int index = 0;
109
114 size_t position = 0;
115
120 int mismatches = 0;
121
127
131 bool reverse = false;
132
136 typename SimpleBarcodeSearch::State forward_details, reverse_details;
140 };
141
148 return State();
149 }
150
159 void reduce(State& state) {
160 if (forward) {
161 forward_lib.reduce(state.forward_details);
162 }
163 if (reverse) {
164 reverse_lib.reduce(state.reverse_details);
165 }
166 }
167
168private:
169 bool has_match(int obs_mismatches) const {
170 return (obs_mismatches >= 0 && obs_mismatches <= max_mm);
171 }
172
173 void forward_match(const char* seq, const typename ScanTemplate<max_size>::State& details, State& state) const {
174 auto start = seq + details.position;
175 const auto& range = constant.variable_regions()[0];
176 std::string curseq(start + range.first, start + range.second);
177 forward_lib.search(curseq, state.forward_details, max_mm - details.forward_mismatches);
178 }
179
180 void reverse_match(const char* seq, const typename ScanTemplate<max_size>::State& details, State& state) const {
181 auto start = seq + details.position;
182 const auto& range = constant.template variable_regions<true>()[0];
183 std::string curseq(start + range.first, start + range.second);
184 reverse_lib.search(curseq, state.reverse_details, max_mm - details.reverse_mismatches);
185 }
186
187public:
200 bool search_first(const char* read_seq, size_t read_length, State& state) const {
201 auto deets = constant.initialize(read_seq, read_length);
202 bool found = false;
203 state.index = -1;
204 state.mismatches = 0;
205 state.variable_mismatches = 0;
206
207 auto update = [&](bool rev, int const_mismatches, const typename SimpleBarcodeSearch::State& x) -> bool {
208 if (x.index < 0) {
209 return false;
210 }
211
212 int total = const_mismatches + x.mismatches;
213 if (total > max_mm) {
214 return false;
215 }
216
217 found = true;
218 state.position = deets.position;
219 state.mismatches = total;
220 state.reverse = rev;
221 state.index = x.index;
222 state.variable_mismatches = x.mismatches;
223 return true;
224 };
225
226 while (!deets.finished) {
227 constant.next(deets);
228
229 if (forward && has_match(deets.forward_mismatches)) {
230 forward_match(read_seq, deets, state);
231 if (update(false, deets.forward_mismatches, state.forward_details)) {
232 break;
233 }
234 }
235
236 if (reverse && has_match(deets.reverse_mismatches)) {
237 reverse_match(read_seq, deets, state);
238 if (update(true, deets.reverse_mismatches, state.reverse_details)) {
239 break;
240 }
241 }
242 }
243
244 return found;
245 }
246
259 bool search_best(const char* read_seq, size_t read_length, State& state) const {
260 auto deets = constant.initialize(read_seq, read_length);
261 state.index = -1;
262 bool found = false;
263 int best = max_mm + 1;
264
265 auto update = [&](bool rev, int const_mismatches, const typename SimpleBarcodeSearch::State& x) -> void {
266 if (x.index < 0) {
267 return;
268 }
269
270 auto total = x.mismatches + const_mismatches;
271 if (total == best) {
272 if (state.index != x.index) { // ambiguous, setting back to a mismatch.
273 found = false;
274 state.index = -1;
275 }
276 } else if (total < best) {
277 found = true;
278 best = total;
279 // A further optimization at this point would be to narrow
280 // max_mm to the current 'best'. But this probably
281 // isn't worth it.
282
283 state.index = x.index;
284 state.mismatches = total;
285 state.variable_mismatches = x.mismatches;
286 state.position = deets.position;
287 state.reverse = rev;
288 }
289 };
290
291 while (!deets.finished) {
292 constant.next(deets);
293
294 if (forward && has_match(deets.forward_mismatches)) {
295 forward_match(read_seq, deets, state);
296 update(false, deets.forward_mismatches, state.forward_details);
297 }
298
299 if (reverse && has_match(deets.reverse_mismatches)) {
300 reverse_match(read_seq, deets, state);
301 update(true, deets.reverse_mismatches, state.reverse_details);
302 }
303 }
304
305 return found;
306 }
307
308private:
309 size_t num_options;
310 bool forward, reverse;
311 int max_mm;
312
313 ScanTemplate<max_size> constant;
314 SimpleBarcodeSearch forward_lib, reverse_lib;
315};
316
317}
318
319#endif
Defines the BarcodePool class.
Search for barcode sequences.
Defines the ScanTemplate class.
Scan a read sequence for the template sequence.
Definition ScanTemplate.hpp:37
Search for known barcode sequences.
Definition BarcodeSearch.hpp:105
void search(const std::string &search_seq, State &state) const
Definition BarcodeSearch.hpp:226
void reduce(State &state)
Definition BarcodeSearch.hpp:192
Search for a template with a single variable region.
Definition SimpleSingleMatch.hpp:31
bool search_best(const char *read_seq, size_t read_length, State &state) const
Definition SimpleSingleMatch.hpp:259
State initialize() const
Definition SimpleSingleMatch.hpp:147
void reduce(State &state)
Definition SimpleSingleMatch.hpp:159
bool search_first(const char *read_seq, size_t read_length, State &state) const
Definition SimpleSingleMatch.hpp:200
SimpleSingleMatch(const char *template_seq, size_t template_length, const BarcodePool &barcode_pool, const Options &options)
Definition SimpleSingleMatch.hpp:61
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:13
Pool of barcode sequences for a variable region.
Definition BarcodePool.hpp:21
size_t length
Definition BarcodePool.hpp:59
Optional parameters for SimpleBarcodeSearch.
Definition BarcodeSearch.hpp:110
bool reverse
Definition BarcodeSearch.hpp:119
int max_mismatches
Definition BarcodeSearch.hpp:114
DuplicateAction duplicates
Definition BarcodeSearch.hpp:124
State of the search.
Definition BarcodeSearch.hpp:153
Optional parameters for SimpleSingleMatch.
Definition SimpleSingleMatch.hpp:36
SearchStrand strand
Definition SimpleSingleMatch.hpp:50
int max_mismatches
Definition SimpleSingleMatch.hpp:40
DuplicateAction duplicates
Definition SimpleSingleMatch.hpp:45
State of the search on a read sequence.
Definition SimpleSingleMatch.hpp:103
int variable_mismatches
Definition SimpleSingleMatch.hpp:126
int index
Definition SimpleSingleMatch.hpp:108
bool reverse
Definition SimpleSingleMatch.hpp:131
size_t position
Definition SimpleSingleMatch.hpp:114
int mismatches
Definition SimpleSingleMatch.hpp:120