kaori
A C++ library for barcode extraction and matching
Loading...
Searching...
No Matches
CombinatorialBarcodesPairedEnd.hpp
Go to the documentation of this file.
1#ifndef KAORI_COMBINATORIAL_BARCODES_PAIRED_END_HPP
2#define KAORI_COMBINATORIAL_BARCODES_PAIRED_END_HPP
3
4#include "../SimpleSingleMatch.hpp"
5#include "../utils.hpp"
6
7#include <array>
8#include <vector>
9
16namespace kaori {
17
28template<size_t max_size>
30public:
31 struct Options {
36 bool use_first = true;
37
41 int max_mismatches1 = 0;
42
46 SearchStrand strand1 = SearchStrand::FORWARD;
47
51 int max_mismatches2 = 0;
52
56 SearchStrand strand2 = SearchStrand::FORWARD;
57
61 DuplicateAction duplicates = DuplicateAction::ERROR;
62
68 bool random = false;
69 };
70
71public:
86 const char* template_seq1, size_t template_length1, const BarcodePool& barcode_pool1,
87 const char* template_seq2, size_t template_length2, const BarcodePool& barcode_pool2,
88 const Options& options
89 ) :
90 matcher1(
91 template_seq1,
92 template_length1,
93 barcode_pool1,
94 [&]{
96 opt.strand = options.strand1;
97 opt.max_mismatches = options.max_mismatches1;
98 opt.duplicates = options.duplicates;
99 return opt;
100 }()
101 ),
102 matcher2(
103 template_seq2,
104 template_length2,
105 barcode_pool2,
106 [&]{
108 opt.strand = options.strand2;
109 opt.max_mismatches = options.max_mismatches2;
110 opt.duplicates = options.duplicates;
111 return opt;
112 }()
113 ),
114 randomized(options.random),
115 use_first(options.use_first)
116 {
117 num_options[0] = barcode_pool1.size();
118 num_options[1] = barcode_pool2.size();
119 }
120
121public:
125 struct State {
126 State() {}
127
128 State(typename SimpleSingleMatch<max_size>::State s1, typename SimpleSingleMatch<max_size>::State s2) : search1(std::move(s1)), search2(std::move(s2)) {}
129
130 std::vector<std::array<int, 2> >collected;
131 int barcode1_only = 0;
132 int barcode2_only = 0;
133 int total = 0;
134
138 typename SimpleSingleMatch<max_size>::State search1;
139 typename SimpleSingleMatch<max_size>::State search2;
143 };
144
145 State initialize() const {
146 return State(matcher1.initialize(), matcher2.initialize());
147 }
148
149 void reduce(State& s) {
150 matcher1.reduce(s.search1);
151 matcher2.reduce(s.search2);
152 combinations.insert(combinations.end(), s.collected.begin(), s.collected.end());
153 total += s.total;
154 barcode1_only += s.barcode1_only;
155 barcode2_only += s.barcode2_only;
156 }
157
158 constexpr static bool use_names = false;
163public:
167 void process(State& state, const std::pair<const char*, const char*>& r1, const std::pair<const char*, const char*>& r2) const {
168 if (use_first) {
169 bool m1 = matcher1.search_first(r1.first, r1.second - r1.first, state.search1);
170 bool m2 = matcher2.search_first(r2.first, r2.second - r2.first, state.search2);
171
172 if (m1 && m2) {
173 state.collected.emplace_back(std::array<int, 2>{ state.search1.index, state.search2.index });
174 } else if (randomized) {
175 bool n1 = matcher1.search_first(r2.first, r2.second - r2.first, state.search1);
176 bool n2 = matcher2.search_first(r1.first, r1.second - r1.first, state.search2);
177 if (n1 && n2) {
178 state.collected.emplace_back(std::array<int, 2>{ state.search1.index, state.search2.index });
179 } else {
180 if (m1 || n1) {
181 ++state.barcode1_only;
182 } else if (m2 || n2) {
183 ++state.barcode2_only;
184 }
185 }
186 } else {
187 if (m1) {
188 ++state.barcode1_only;
189 } else if (m2) {
190 ++state.barcode2_only;
191 }
192 }
193
194 } else {
195 bool m1 = matcher1.search_best(r1.first, r1.second - r1.first, state.search1);
196 bool m2 = matcher2.search_best(r2.first, r2.second - r2.first, state.search2);
197
198 if (!randomized) {
199 if (m1 && m2) {
200 state.collected.emplace_back(std::array<int, 2>{ state.search1.index, state.search2.index });
201 } else if (m1) {
202 ++state.barcode1_only;
203 } else if (m2) {
204 ++state.barcode2_only;
205 }
206 } else if (m1 && m2) {
207 std::array<int, 2> candidate{ state.search1.index, state.search2.index };
208 int mismatches = state.search1.mismatches + state.search2.mismatches;
209
210 bool n1 = matcher1.search_best(r2.first, r2.second - r2.first, state.search1);
211 bool n2 = matcher2.search_best(r1.first, r1.second - r1.first, state.search2);
212
213 if (n1 && n2) {
214 int rmismatches = state.search1.mismatches + state.search2.mismatches;
215 if (mismatches > rmismatches) {
216 state.collected.emplace_back(std::array<int, 2>{ state.search1.index, state.search2.index });
217 } else if (mismatches < rmismatches) {
218 state.collected.emplace_back(candidate);
219 } else if (candidate[0] == state.search1.index && candidate[1] == state.search2.index) {
220 // If the mismatches are the same, it may not be ambiguous
221 // if the indices would be the same anyway.
222 state.collected.emplace_back(candidate);
223 }
224 } else {
225 state.collected.emplace_back(candidate);
226 }
227 } else {
228 bool n1 = matcher1.search_best(r2.first, r2.second - r2.first, state.search1);
229 bool n2 = matcher2.search_best(r1.first, r1.second - r1.first, state.search2);
230
231 if (n1 && n2) {
232 state.collected.emplace_back(std::array<int, 2>{ state.search1.index, state.search2.index });
233 } else if (m1 || n1) {
234 ++state.barcode1_only;
235 } else if (m2 || n2) {
236 ++state.barcode2_only;
237 }
238 }
239 }
240
241 ++state.total;
242 }
247public:
252 void sort() {
253 sort_combinations(combinations, num_options);
254 }
255
260 const std::vector<std::array<int, 2> >& get_combinations() const {
261 return combinations;
262 }
263
267 int get_total() const {
268 return total;
269 }
270
274 int get_barcode1_only() const {
275 return barcode1_only;
276 }
277
281 int get_barcode2_only() const {
282 return barcode2_only;
283 }
284private:
285 SimpleSingleMatch<max_size> matcher1, matcher2;
286 std::array<size_t, 2> num_options;
287
288 bool randomized;
289 bool use_first = true;
290
291 std::vector<std::array<int, 2> > combinations;
292 int total = 0;
293 int barcode1_only = 0;
294 int barcode2_only = 0;
295};
296
297}
298
299#endif
Handler for paired-end combinatorial barcodes.
Definition CombinatorialBarcodesPairedEnd.hpp:29
CombinatorialBarcodesPairedEnd(const char *template_seq1, size_t template_length1, const BarcodePool &barcode_pool1, const char *template_seq2, size_t template_length2, const BarcodePool &barcode_pool2, const Options &options)
Definition CombinatorialBarcodesPairedEnd.hpp:85
int get_barcode1_only() const
Definition CombinatorialBarcodesPairedEnd.hpp:274
int get_total() const
Definition CombinatorialBarcodesPairedEnd.hpp:267
const std::vector< std::array< int, 2 > > & get_combinations() const
Definition CombinatorialBarcodesPairedEnd.hpp:260
int get_barcode2_only() const
Definition CombinatorialBarcodesPairedEnd.hpp:281
void sort()
Definition CombinatorialBarcodesPairedEnd.hpp:252
Search for a template with a single variable region.
Definition SimpleSingleMatch.hpp:31
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:13
Pool of barcode sequences for a variable region.
Definition BarcodePool.hpp:21
Optional parameters for SimpleSingleMatch.
Definition SimpleSingleMatch.hpp:36
SearchStrand strand
Definition SimpleSingleMatch.hpp:50
int max_mismatches
Definition SimpleSingleMatch.hpp:40
DuplicateAction duplicates
Definition SimpleSingleMatch.hpp:45