kaori
A C++ library for barcode extraction and matching
Loading...
Searching...
No Matches
CombinatorialBarcodesPairedEnd.hpp
Go to the documentation of this file.
1#ifndef KAORI_COMBINATORIAL_BARCODES_PAIRED_END_HPP
2#define KAORI_COMBINATORIAL_BARCODES_PAIRED_END_HPP
3
5#include "../utils.hpp"
6
7#include <array>
8#include <vector>
9#include <unordered_map>
10
17namespace kaori {
18
29template<SeqLength max_size_>
31public:
32 struct Options {
37 bool use_first = true;
38
42 int max_mismatches1 = 0;
43
47 SearchStrand strand1 = SearchStrand::FORWARD;
48
52 int max_mismatches2 = 0;
53
57 SearchStrand strand2 = SearchStrand::FORWARD;
58
62 DuplicateAction duplicates = DuplicateAction::ERROR;
63
69 bool random = false;
70 };
71
72public:
87 const char* template_seq1, SeqLength template_length1, const BarcodePool& barcode_pool1,
88 const char* template_seq2, SeqLength template_length2, const BarcodePool& barcode_pool2,
89 const Options& options
90 ) :
91 my_matcher1(
92 template_seq1,
93 template_length1,
94 barcode_pool1,
95 [&]{
97 opt.strand = options.strand1;
98 opt.max_mismatches = options.max_mismatches1;
99 opt.duplicates = options.duplicates;
100 return opt;
101 }()
102 ),
103 my_matcher2(
104 template_seq2,
105 template_length2,
106 barcode_pool2,
107 [&]{
109 opt.strand = options.strand2;
110 opt.max_mismatches = options.max_mismatches2;
111 opt.duplicates = options.duplicates;
112 return opt;
113 }()
114 ),
115 my_randomized(options.random),
116 my_use_first(options.use_first)
117 {
118 my_pool_size[0] = barcode_pool1.size();
119 my_pool_size[1] = barcode_pool2.size();
120 }
121
122private:
123 SimpleSingleMatch<max_size_> my_matcher1, my_matcher2;
124 std::array<BarcodeIndex, 2> my_pool_size;
125
126 bool my_randomized;
127 bool my_use_first = true;
128
129 std::unordered_map<std::array<BarcodeIndex, 2>, Count, CombinationHash<2> > my_combinations;
130 Count my_total = 0;
131 Count my_barcode1_only = 0;
132 Count my_barcode2_only = 0;
133
134public:
138 struct State {
139 State() = default;
140 State(typename SimpleSingleMatch<max_size_>::State s1, typename SimpleSingleMatch<max_size_>::State s2) : search1(std::move(s1)), search2(std::move(s2)) {}
141
142 std::unordered_map<std::array<BarcodeIndex, 2>, Count, CombinationHash<2> >collected;
143 Count barcode1_only = 0;
144 Count barcode2_only = 0;
145 Count total = 0;
146
150 typename SimpleSingleMatch<max_size_>::State search1;
151 typename SimpleSingleMatch<max_size_>::State search2;
155 };
156
157 State initialize() const {
158 return State(my_matcher1.initialize(), my_matcher2.initialize());
159 }
160
161 void reduce(State& s) {
162 my_matcher1.reduce(s.search1);
163 my_matcher2.reduce(s.search2);
164 for (const auto& col : s.collected) {
165 my_combinations[col.first] += col.second;
166 }
167 my_total += s.total;
168 my_barcode1_only += s.barcode1_only;
169 my_barcode2_only += s.barcode2_only;
170 }
171
172 constexpr static bool use_names = false;
177public:
181 void process(State& state, const std::pair<const char*, const char*>& r1, const std::pair<const char*, const char*>& r2) const {
182 if (my_use_first) {
183 bool m1 = my_matcher1.search_first(r1.first, r1.second - r1.first, state.search1);
184 bool m2 = my_matcher2.search_first(r2.first, r2.second - r2.first, state.search2);
185
186 if (m1 && m2) {
187 std::array<BarcodeIndex, 2> key{ state.search1.index, state.search2.index };
188 ++state.collected[std::move(key)];
189 } else if (my_randomized) {
190 bool n1 = my_matcher1.search_first(r2.first, r2.second - r2.first, state.search1);
191 bool n2 = my_matcher2.search_first(r1.first, r1.second - r1.first, state.search2);
192 if (n1 && n2) {
193 std::array<BarcodeIndex, 2> key{ state.search1.index, state.search2.index };
194 ++state.collected[std::move(key)];
195 } else {
196 if (m1 || n1) {
197 ++state.barcode1_only;
198 } else if (m2 || n2) {
199 ++state.barcode2_only;
200 }
201 }
202 } else {
203 if (m1) {
204 ++state.barcode1_only;
205 } else if (m2) {
206 ++state.barcode2_only;
207 }
208 }
209
210 } else {
211 bool m1 = my_matcher1.search_best(r1.first, r1.second - r1.first, state.search1);
212 bool m2 = my_matcher2.search_best(r2.first, r2.second - r2.first, state.search2);
213
214 if (!my_randomized) {
215 if (m1 && m2) {
216 std::array<BarcodeIndex, 2> key{ state.search1.index, state.search2.index };
217 ++state.collected[std::move(key)];
218 } else if (m1) {
219 ++state.barcode1_only;
220 } else if (m2) {
221 ++state.barcode2_only;
222 }
223 } else if (m1 && m2) {
224 std::array<BarcodeIndex, 2> candidate{ state.search1.index, state.search2.index };
225 int mismatches = state.search1.mismatches + state.search2.mismatches;
226
227 bool n1 = my_matcher1.search_best(r2.first, r2.second - r2.first, state.search1);
228 bool n2 = my_matcher2.search_best(r1.first, r1.second - r1.first, state.search2);
229
230 if (n1 && n2) {
231 int rmismatches = state.search1.mismatches + state.search2.mismatches;
232 if (mismatches > rmismatches) {
233 std::array<BarcodeIndex, 2> key{ state.search1.index, state.search2.index };
234 ++state.collected[std::move(key)];
235 } else if (mismatches < rmismatches) {
236 ++state.collected[candidate];
237 } else if (candidate[0] == state.search1.index && candidate[1] == state.search2.index) {
238 // If the mismatches are the same, it may not be ambiguous
239 // if the indices would be the same anyway.
240 ++state.collected[candidate];
241 }
242 } else {
243 ++state.collected[candidate];
244 }
245 } else {
246 bool n1 = my_matcher1.search_best(r2.first, r2.second - r2.first, state.search1);
247 bool n2 = my_matcher2.search_best(r1.first, r1.second - r1.first, state.search2);
248
249 if (n1 && n2) {
250 std::array<BarcodeIndex, 2> key{ state.search1.index, state.search2.index };
251 ++state.collected[std::move(key)];
252 } else if (m1 || n1) {
253 ++state.barcode1_only;
254 } else if (m2 || n2) {
255 ++state.barcode2_only;
256 }
257 }
258 }
259
260 ++state.total;
261 }
266public:
271 const std::unordered_map<std::array<BarcodeIndex, 2>, Count, CombinationHash<2> >& get_combinations() const {
272 return my_combinations;
273 }
274
279 return my_total;
280 }
281
286 return my_barcode1_only;
287 }
288
293 return my_barcode2_only;
294 }
295};
296
297}
298
299#endif
Defines the SimpleSingleMatch class.
Pool of barcode sequences.
Definition BarcodePool.hpp:24
Hash a combination of barcode indices.
Definition utils.hpp:286
Handler for paired-end combinatorial barcodes.
Definition CombinatorialBarcodesPairedEnd.hpp:30
const std::unordered_map< std::array< BarcodeIndex, 2 >, Count, CombinationHash< 2 > > & get_combinations() const
Definition CombinatorialBarcodesPairedEnd.hpp:271
BarcodeIndex get_barcode2_only() const
Definition CombinatorialBarcodesPairedEnd.hpp:292
CombinatorialBarcodesPairedEnd(const char *template_seq1, SeqLength template_length1, const BarcodePool &barcode_pool1, const char *template_seq2, SeqLength template_length2, const BarcodePool &barcode_pool2, const Options &options)
Definition CombinatorialBarcodesPairedEnd.hpp:86
BarcodeIndex get_total() const
Definition CombinatorialBarcodesPairedEnd.hpp:278
BarcodeIndex get_barcode1_only() const
Definition CombinatorialBarcodesPairedEnd.hpp:285
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:16
std::size_t SeqLength
Definition utils.hpp:37
SearchStrand
Definition utils.hpp:31
DuplicateAction
Definition utils.hpp:26
std::vector< constchar * >::size_type BarcodeIndex
Definition utils.hpp:43
unsigned long long Count
Definition utils.hpp:67
Optional parameters for SimpleSingleMatch.
Definition SimpleSingleMatch.hpp:36
DuplicateAction duplicates
Definition SimpleSingleMatch.hpp:45
int max_mismatches
Definition SimpleSingleMatch.hpp:40
SearchStrand strand
Definition SimpleSingleMatch.hpp:50
Utilites for sequence matching.