kaori
A C++ library for barcode extraction and matching
Loading...
Searching...
No Matches
RandomBarcodeSingleEnd.hpp
Go to the documentation of this file.
1#ifndef KAORI_RANDOM_BARCODE_SINGLE_END_HPP
2#define KAORI_RANDOM_BARCODE_SINGLE_END_HPP
3
4#include "../ScanTemplate.hpp"
5#include <vector>
6#include <string>
7#include <unordered_map>
8
15namespace kaori {
16
27template<SeqLength max_size_>
29public:
33 struct Options {
38
43 bool use_first = true;
44
48 SearchStrand strand = SearchStrand::FORWARD;
49 };
50
51public:
59 RandomBarcodeSingleEnd(const char* template_seq, SeqLength template_length, const Options& options) :
60 my_forward(search_forward(options.strand)),
61 my_reverse(search_reverse(options.strand)),
62 my_constant(template_seq, template_length, options.strand),
63 my_max_mm(options.max_mismatches),
64 my_use_first(options.use_first)
65 {}
66
67private:
68 std::unordered_map<std::string, Count> my_counts;
69 Count my_total = 0;
70
71 bool my_forward, my_reverse;
72 ScanTemplate<max_size_> my_constant;
73 int my_max_mm;
74 bool my_use_first;
75
76public:
80 struct State {
81 State() {}
82 State(SeqLength varsize) : buffer(varsize, ' ') {}
83
84 std::unordered_map<std::string, Count> counts;
85 std::string buffer;
86 Count total = 0;
87 };
88
89 void forward_match(const char* seq, SeqLength position, State& state) const {
90 auto start = seq + position;
91 const auto& range = my_constant.forward_variable_regions()[0];
92 std::copy(start + range.first, start + range.second, state.buffer.data());
93
94 auto it = state.counts.find(state.buffer);
95 if (it != state.counts.end()) {
96 ++(it->second);
97 } else {
98 state.counts[state.buffer] = 1;
99 }
100 }
101
102 void reverse_match(const char* seq, SeqLength position, State& state) const {
103 const auto& range = my_constant.forward_variable_regions()[0];
104 auto start = seq + position + range.first;
105 SeqLength len = state.buffer.size();
106 for (SeqLength j = 0; j < len; ++j) {
107 state.buffer[j] = complement_base<true>(start[len - j - 1]);
108 }
109
110 auto it = state.counts.find(state.buffer);
111 if (it != state.counts.end()) {
112 ++(it->second);
113 } else {
114 state.counts[state.buffer] = 1;
115 }
116 }
117
118 void process(State& state, const std::pair<const char*, const char*>& x) const {
119 auto read_seq = x.first;
120 auto deets = my_constant.initialize(read_seq, x.second - x.first);
121
122 if (my_use_first) {
123 while (!deets.finished) {
124 my_constant.next(deets);
125 if (my_forward && deets.forward_mismatches <= my_max_mm) {
126 forward_match(read_seq, deets.position, state);
127 break;
128 }
129 if (my_reverse && deets.reverse_mismatches <= my_max_mm) {
130 reverse_match(read_seq, deets.position, state);
131 break;
132 }
133 }
134
135 } else {
136 int best = my_max_mm + 1;
137 bool best_forward = true;
138 SeqLength best_position = 0;
139 bool best_tied = false;
140
141 while (!deets.finished) {
142 my_constant.next(deets);
143
144 if (my_forward && deets.forward_mismatches <= my_max_mm) {
145 if (deets.forward_mismatches < best) {
146 best = deets.forward_mismatches;
147 best_position = deets.position;
148 best_forward = true;
149 best_tied = false;
150 } else if (deets.forward_mismatches == best) {
151 best_tied = true;
152 }
153 }
154
155 if (my_reverse && deets.reverse_mismatches <= my_max_mm) {
156 if (deets.reverse_mismatches < best) {
157 best = deets.reverse_mismatches;
158 best_position = deets.position;
159 best_forward = false;
160 best_tied = false;
161 } else if (deets.reverse_mismatches == best) {
162 best_tied = true;
163 }
164 }
165 }
166
167 if (!best_tied && best <= my_max_mm) {
168 if (best_forward) {
169 forward_match(read_seq, best_position, state);
170 } else {
171 reverse_match(read_seq, best_position, state);
172 }
173 }
174 }
175
176 ++state.total;
177 }
178
179 static constexpr bool use_names = false;
184public:
188 State initialize() const {
189 const auto& range = my_constant.forward_variable_regions()[0];
190 return State(range.second - range.first);
191 }
192
193 void reduce(State& s) {
194 for (const auto& pair : s.counts) {
195 auto it = my_counts.find(pair.first);
196 if (it != my_counts.end()) {
197 it->second += pair.second;
198 } else {
199 my_counts[pair.first] = pair.second;
200 }
201 }
202 my_total += s.total;
203 }
208public:
212 const std::unordered_map<std::string, Count>& get_counts() const {
213 return my_counts;
214 }
215
219 Count get_total() const {
220 return my_total;
221 }
222};
223
224}
225
226#endif
Defines the ScanTemplate class.
Handler for single-end random barcodes.
Definition RandomBarcodeSingleEnd.hpp:28
RandomBarcodeSingleEnd(const char *template_seq, SeqLength template_length, const Options &options)
Definition RandomBarcodeSingleEnd.hpp:59
Count get_total() const
Definition RandomBarcodeSingleEnd.hpp:219
const std::unordered_map< std::string, Count > & get_counts() const
Definition RandomBarcodeSingleEnd.hpp:212
Scan a read sequence for the template sequence.
Definition ScanTemplate.hpp:38
State initialize(const char *read_seq, SeqLength read_length) const
Definition ScanTemplate.hpp:159
const std::vector< std::pair< SeqLength, SeqLength > > & forward_variable_regions() const
Definition ScanTemplate.hpp:291
void next(State &state) const
Definition ScanTemplate.hpp:201
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:16
std::size_t SeqLength
Definition utils.hpp:37
SearchStrand
Definition utils.hpp:31
unsigned long long Count
Definition utils.hpp:67
Optional parameters for SingleBarcodeSingleEnd.
Definition RandomBarcodeSingleEnd.hpp:33
int max_mismatches
Definition RandomBarcodeSingleEnd.hpp:37
SearchStrand strand
Definition RandomBarcodeSingleEnd.hpp:48
bool use_first
Definition RandomBarcodeSingleEnd.hpp:43