kaori
A C++ library for barcode extraction and matching
Loading...
Searching...
No Matches
RandomBarcodeSingleEnd.hpp
Go to the documentation of this file.
1#ifndef KAORI_RANDOM_BARCODE_SINGLE_END_HPP
2#define KAORI_RANDOM_BARCODE_SINGLE_END_HPP
3
4#include "../ScanTemplate.hpp"
5#include <vector>
6#include <string>
7#include <unordered_map>
8
15namespace kaori {
16
27template<size_t max_size>
29public:
33 struct Options {
38
43 bool use_first = true;
44
48 SearchStrand strand = SearchStrand::FORWARD;
49 };
50
51public:
59 RandomBarcodeSingleEnd(const char* template_seq, size_t template_length, const Options& options) :
60 forward(search_forward(options.strand)),
61 reverse(search_reverse(options.strand)),
62 constant(template_seq, template_length, options.strand),
63 max_mm(options.max_mismatches),
64 use_first(options.use_first)
65 {}
66
67private:
68 std::unordered_map<std::string, int> counts;
69 int total = 0;
70
71 bool forward, reverse;
73 int max_mm;
74 bool use_first;
75
76 bool has_match(int obs_mismatches) const {
77 return (obs_mismatches >= 0 && obs_mismatches <= max_mm);
78 }
79
80public:
84 struct State {
85 State() {}
86 State(size_t varsize) : buffer(varsize, ' ') {}
87
88 std::unordered_map<std::string, int> counts;
89 std::string buffer;
90 int total = 0;
91 };
92
93 void forward_match(const char* seq, size_t position, State& state) const {
94 auto start = seq + position;
95 const auto& range = constant.variable_regions()[0];
96 std::copy(start + range.first, start + range.second, state.buffer.data());
97
98 auto it = state.counts.find(state.buffer);
99 if (it != state.counts.end()) {
100 ++(it->second);
101 } else {
102 state.counts[state.buffer] = 1;
103 }
104 }
105
106 void reverse_match(const char* seq, size_t position, State& state) const {
107 const auto& range = constant.variable_regions()[0];
108 auto start = seq + position + range.first;
109 size_t len = state.buffer.size();
110 for (size_t j = 0; j < len; ++j) {
111 state.buffer[j] = complement_base<true>(start[len - j - 1]);
112 }
113
114 auto it = state.counts.find(state.buffer);
115 if (it != state.counts.end()) {
116 ++(it->second);
117 } else {
118 state.counts[state.buffer] = 1;
119 }
120 }
121
122 void process(State& state, const std::pair<const char*, const char*>& x) const {
123 auto read_seq = x.first;
124 auto deets = constant.initialize(read_seq, x.second - x.first);
125
126 if (use_first) {
127 while (!deets.finished) {
128 constant.next(deets);
129 if (forward && has_match(deets.forward_mismatches)) {
130 forward_match(read_seq, deets.position, state);
131 break;
132 }
133 if (reverse && has_match(deets.reverse_mismatches)) {
134 reverse_match(read_seq, deets.position, state);
135 break;
136 }
137 }
138
139 } else {
140 int best = max_mm + 1;
141 bool best_forward = true;
142 size_t best_position = 0;
143 bool best_tied = false;
144
145 while (!deets.finished) {
146 constant.next(deets);
147
148 if (forward && has_match(deets.forward_mismatches)) {
149 if (deets.forward_mismatches < best) {
150 best = deets.forward_mismatches;
151 best_position = deets.position;
152 best_forward = true;
153 best_tied = false;
154 } else if (deets.forward_mismatches == best) {
155 best_tied = true;
156 }
157 }
158
159 if (reverse && has_match(deets.reverse_mismatches)) {
160 if (deets.reverse_mismatches < best) {
161 best = deets.reverse_mismatches;
162 best_position = deets.position;
163 best_forward = false;
164 best_tied = false;
165 } else if (deets.reverse_mismatches == best) {
166 best_tied = true;
167 }
168 }
169 }
170
171 if (!best_tied && best <= max_mm) {
172 if (best_forward) {
173 forward_match(read_seq, best_position, state);
174 } else {
175 reverse_match(read_seq, best_position, state);
176 }
177 }
178 }
179
180 ++state.total;
181 }
182
183 static constexpr bool use_names = false;
188public:
192 State initialize() const {
193 const auto& range = constant.variable_regions()[0];
194 return State(range.second - range.first);
195 }
196
197 void reduce(State& s) {
198 for (const auto& pair : s.counts) {
199 auto it = counts.find(pair.first);
200 if (it != counts.end()) {
201 it->second += pair.second;
202 } else {
203 counts[pair.first] = pair.second;
204 }
205 }
206 total += s.total;
207 }
212public:
216 const std::unordered_map<std::string, int>& get_counts() const {
217 return counts;
218 }
219
223 int get_total() const {
224 return total;
225 }
226};
227
228}
229
230#endif
Handler for single-end random barcodes.
Definition RandomBarcodeSingleEnd.hpp:28
int get_total() const
Definition RandomBarcodeSingleEnd.hpp:223
RandomBarcodeSingleEnd(const char *template_seq, size_t template_length, const Options &options)
Definition RandomBarcodeSingleEnd.hpp:59
const std::unordered_map< std::string, int > & get_counts() const
Definition RandomBarcodeSingleEnd.hpp:216
Scan a read sequence for the template sequence.
Definition ScanTemplate.hpp:37
void next(State &state) const
Definition ScanTemplate.hpp:198
const std::vector< std::pair< int, int > > & variable_regions() const
Definition ScanTemplate.hpp:295
State initialize(const char *read_seq, size_t read_length) const
Definition ScanTemplate.hpp:156
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:13
Optional parameters for SingleBarcodeSingleEnd.
Definition RandomBarcodeSingleEnd.hpp:33
bool use_first
Definition RandomBarcodeSingleEnd.hpp:43
SearchStrand strand
Definition RandomBarcodeSingleEnd.hpp:48
int max_mismatches
Definition RandomBarcodeSingleEnd.hpp:37