kaori
A C++ library for barcode extraction and matching
Loading...
Searching...
No Matches
DualBarcodesSingleEnd.hpp
Go to the documentation of this file.
1#ifndef KAORI_DUAL_BARCODES_SINGLE_END_HPP
2#define KAORI_DUAL_BARCODES_SINGLE_END_HPP
3
4#include "../ScanTemplate.hpp"
5#include "../BarcodeSearch.hpp"
6#include "../utils.hpp"
7
8#include <array>
9#include <vector>
10
17namespace kaori {
18
30template<size_t max_size>
32public:
36 struct Options {
41
45 bool use_first = true;
46
50 SearchStrand strand = SearchStrand::FORWARD;
51
55 DuplicateAction duplicates = DuplicateAction::ERROR;
56 };
57
58public:
67 DualBarcodesSingleEnd(const char* template_seq, size_t template_length, const std::vector<BarcodePool>& barcode_pools, const Options& options) :
68 forward(search_forward(options.strand)),
69 reverse(search_reverse(options.strand)),
70 max_mm(options.max_mismatches),
71 use_first(options.use_first),
72 constant_matcher(template_seq, template_length, options.strand)
73 {
74 const auto& regions = constant_matcher.variable_regions();
75 num_variable = regions.size();
76 if (barcode_pools.size() != num_variable) {
77 throw std::runtime_error("length of 'barcode_pools' should equal the number of variable regions");
78 }
79
80 for (size_t i = 0; i < num_variable; ++i) {
81 size_t rlen = regions[i].second - regions[i].first;
82 size_t vlen = barcode_pools[i].length;
83 if (vlen != rlen) {
84 throw std::runtime_error("length of variable region " + std::to_string(i + 1) + " (" + std::to_string(rlen) +
85 ") should be the same as its sequences (" + std::to_string(vlen) + ")");
86 }
87 }
88
89 size_t num_choices = 0;
90 if (num_variable) {
91 num_choices = barcode_pools[0].size();
92 for (size_t i = 1; i < num_variable; ++i) {
93 if (num_choices != barcode_pools[i].size()) {
94 throw std::runtime_error("all entries of 'barcode_pools' should have the same length");
95 }
96 }
97 }
98 counts.resize(num_choices);
99
100 // Constructing the combined varlib.
101 std::vector<std::string> combined(num_choices);
102 for (size_t v = 0; v < num_variable; ++v) {
103 const auto& curpool = barcode_pools[v];
104 size_t n = curpool.length;
105 for (size_t c = 0; c < num_choices; ++c) {
106 auto ptr = curpool[c];
107 combined[c].insert(combined[c].end(), ptr, ptr + n);
108 }
109 }
110
112 bopt.max_mismatches = options.max_mismatches;
113 bopt.duplicates = options.duplicates;
114
115 if (forward) {
116 bopt.reverse = false;
117 forward_lib = SimpleBarcodeSearch(combined, bopt);
118 }
119
120 if (reverse) {
121 bopt.reverse = true;
122 reverse_lib = SimpleBarcodeSearch(combined, bopt);
123 }
124 }
125
126public:
130 struct State {
131 std::vector<int> counts;
132 int total = 0;
133
134 std::string buffer;
135
136 // Default constructors should be called in this case, so it should be fine.
137 typename SimpleBarcodeSearch::State forward_details, reverse_details;
138 };
143private:
144 template<bool reverse>
145 std::pair<int, int> find_match(
146 const char* seq,
147 size_t position,
148 int obs_mismatches,
149 const SimpleBarcodeSearch& lib,
150 typename SimpleBarcodeSearch::State state,
151 std::string& buffer
152 ) const {
153 const auto& regions = constant_matcher.template variable_regions<reverse>();
154 buffer.clear();
155
156 for (size_t r = 0; r < num_variable; ++r) {
157 auto start = seq + position;
158 buffer.insert(buffer.end(), start + regions[r].first, start + regions[r].second);
159 }
160
161 lib.search(buffer, state, max_mm - obs_mismatches);
162 return std::make_pair(state.index, obs_mismatches + state.mismatches);
163 }
164
165 std::pair<int, int> forward_match(const char* seq, const typename ScanTemplate<max_size>::State& deets, State& state) const {
166 return find_match<false>(seq, deets.position, deets.forward_mismatches, forward_lib, state.forward_details, state.buffer);
167 }
168
169 std::pair<int, int> reverse_match(const char* seq, const typename ScanTemplate<max_size>::State& deets, State& state) const {
170 return find_match<true>(seq, deets.position, deets.reverse_mismatches, reverse_lib, state.reverse_details, state.buffer);
171 }
172
173private:
174 bool process_first(State& state, const std::pair<const char*, const char*>& x) const {
175 auto deets = constant_matcher.initialize(x.first, x.second - x.first);
176
177 while (!deets.finished) {
178 constant_matcher.next(deets);
179
180 if (forward && deets.forward_mismatches <= max_mm) {
181 auto id = forward_match(x.first, deets, state).first;
182 if (id >= 0) {
183 ++state.counts[id];
184 return true;
185 }
186 }
187
188 if (reverse && deets.reverse_mismatches <= max_mm) {
189 auto id = reverse_match(x.first, deets, state).first;
190 if (id >= 0) {
191 ++state.counts[id];
192 return true;
193 }
194 }
195 }
196 return false;
197 }
198
199 bool process_best(State& state, const std::pair<const char*, const char*>& x) const {
200 auto deets = constant_matcher.initialize(x.first, x.second - x.first);
201 bool found = false;
202 int best_mismatches = max_mm + 1;
203 int best_id = -1;
204
205 auto update = [&](std::pair<int, int> match) -> void {
206 if (match.first < 0){
207 return;
208 }
209 if (match.second == best_mismatches) {
210 if (best_id != match.first) { // ambiguous.
211 found = false;
212 }
213 } else if (match.second < best_mismatches) {
214 // A further optimization at this point would be to narrow
215 // max_mm to the current 'best_mismatches'. But
216 // this probably isn't worth it.
217
218 found = true;
219 best_mismatches = match.second;
220 best_id = match.first;
221 }
222 };
223
224 while (!deets.finished) {
225 constant_matcher.next(deets);
226
227 if (forward && deets.forward_mismatches <= max_mm) {
228 update(forward_match(x.first, deets, state));
229 }
230
231 if (reverse && deets.reverse_mismatches <= max_mm) {
232 update(reverse_match(x.first, deets, state));
233 }
234 }
235
236 if (found) {
237 ++state.counts[best_id];
238 }
239 return found;
240 }
241
242public:
246 State initialize() const {
247 State output;
248 output.counts.resize(counts.size());
249 return output;
250 }
251
252 void reduce(State& s) {
253 if (forward) {
254 forward_lib.reduce(s.forward_details);
255 }
256 if (reverse) {
257 reverse_lib.reduce(s.reverse_details);
258 }
259
260 for (size_t i = 0, end = counts.size(); i < end; ++i) {
261 counts[i] += s.counts[i];
262 }
263 total += s.total;
264 return;
265 }
266
267 bool process(State& state, const std::pair<const char*, const char*>& x) const {
268 ++state.total;
269 if (use_first) {
270 return process_first(state, x);
271 } else {
272 return process_best(state, x);
273 }
274 }
275
276 static constexpr bool use_names = false;
281public:
285 const std::vector<int>& get_counts() const {
286 return counts;
287 }
288
292 int get_total() const {
293 return total;
294 }
295private:
296 bool forward;
297 bool reverse;
298 int max_mm;
299 bool use_first;
300
301 ScanTemplate<max_size> constant_matcher;
302 size_t num_variable;
303
304 SimpleBarcodeSearch forward_lib, reverse_lib;
305 std::vector<int> counts;
306 int total = 0;
307};
308
309}
310
311#endif
Handler for single-end dual barcodes.
Definition DualBarcodesSingleEnd.hpp:31
const std::vector< int > & get_counts() const
Definition DualBarcodesSingleEnd.hpp:285
DualBarcodesSingleEnd(const char *template_seq, size_t template_length, const std::vector< BarcodePool > &barcode_pools, const Options &options)
Definition DualBarcodesSingleEnd.hpp:67
int get_total() const
Definition DualBarcodesSingleEnd.hpp:292
Scan a read sequence for the template sequence.
Definition ScanTemplate.hpp:37
Search for known barcode sequences.
Definition BarcodeSearch.hpp:105
void reduce(State &state)
Definition BarcodeSearch.hpp:192
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:13
Optional parameters for DualBarcodeSingleEnd.
Definition DualBarcodesSingleEnd.hpp:36
DuplicateAction duplicates
Definition DualBarcodesSingleEnd.hpp:55
SearchStrand strand
Definition DualBarcodesSingleEnd.hpp:50
bool use_first
Definition DualBarcodesSingleEnd.hpp:45
int max_mismatches
Definition DualBarcodesSingleEnd.hpp:40
Optional parameters for SimpleBarcodeSearch.
Definition BarcodeSearch.hpp:110
bool reverse
Definition BarcodeSearch.hpp:119
int max_mismatches
Definition BarcodeSearch.hpp:114
DuplicateAction duplicates
Definition BarcodeSearch.hpp:124
State of the search.
Definition BarcodeSearch.hpp:153