kaori
A C++ library for barcode extraction and matching
Loading...
Searching...
No Matches
utils.hpp
Go to the documentation of this file.
1#ifndef KAORI_UTILS_HPP
2#define KAORI_UTILS_HPP
3
4#include <bitset>
5#include <vector>
6#include <array>
7#include <cstddef>
8#include <limits>
9#include <functional>
10
16namespace kaori {
17
26enum class DuplicateAction : char { FIRST, LAST, NONE, ERROR };
27
31enum class SearchStrand : char { FORWARD, REVERSE, BOTH };
32
37typedef std::size_t SeqLength;
38
43typedef typename std::vector<const char*>::size_type BarcodeIndex; // we use the size_type from the BarcodePool's internal vector of barcodes.
44
48inline constexpr BarcodeIndex STATUS_UNMATCHED = static_cast<BarcodeIndex>(-1);
49
53inline constexpr BarcodeIndex STATUS_AMBIGUOUS = static_cast<BarcodeIndex>(-2);
54
61 return index < STATUS_AMBIGUOUS;
62}
63
67typedef unsigned long long Count;
68
72inline bool search_forward(SearchStrand x) {
73 return x == SearchStrand::FORWARD || x == SearchStrand::BOTH;
74}
75
76inline bool search_reverse(SearchStrand x) {
77 return x == SearchStrand::REVERSE || x == SearchStrand::BOTH;
78}
79
80template<bool allow_n_ = false, bool allow_iupac_ = false>
81char complement_base(char b) {
82 char output;
83 switch (b) {
84 case 'A': case 'a':
85 output = 'T';
86 break;
87 case 'C': case 'c':
88 output = 'G';
89 break;
90 case 'G': case 'g':
91 output = 'C';
92 break;
93 case 'T': case 't':
94 output = 'A';
95 break;
96
97 case 'N': case 'n':
98 if constexpr(allow_n_ || allow_iupac_) {
99 output = 'N';
100 break;
101 }
102
103 case 'R': case 'r':
104 if constexpr(allow_iupac_) {
105 output = 'Y';
106 break;
107 }
108 case 'Y': case 'y':
109 if constexpr(allow_iupac_) {
110 output = 'R';
111 break;
112 }
113 case 'S': case 's':
114 if constexpr(allow_iupac_) {
115 output = 'S'; // S = A/T, so complement is just itself.
116 break;
117 }
118 case 'W': case 'w':
119 if constexpr(allow_iupac_) {
120 output = 'W'; // W = C/G, so complement is just itself.
121 break;
122 }
123 case 'K': case 'k':
124 if constexpr(allow_iupac_) {
125 output = 'M';
126 break;
127 }
128 case 'M': case 'm':
129 if constexpr(allow_iupac_) {
130 output = 'K';
131 break;
132 }
133
134 case 'B': case 'b':
135 if constexpr(allow_iupac_) {
136 output = 'V'; // B can't be A, so complement can't be T ==> V.
137 break;
138 }
139 case 'D': case 'd':
140 if constexpr(allow_iupac_) {
141 output = 'H'; // D can't be C, so complement can't be G ==> H.
142 break;
143 }
144 case 'H': case 'h':
145 if constexpr(allow_iupac_) {
146 output = 'D'; // H can't be G, so complement can't be C ==> D.
147 break;
148 }
149 case 'V': case 'v':
150 if constexpr(allow_iupac_) {
151 output = 'B'; // V can't be T, so complement can't be A ==> B.
152 break;
153 }
154
155 default:
156 throw std::runtime_error("cannot complement unknown base '" + std::string(1, b) + "'");
157 }
158 return output;
159}
160
161inline bool is_standard_base(char b) {
162 bool okay = false;
163 switch (b) {
164 case 'A': case 'a':
165 case 'C': case 'c':
166 case 'G': case 'g':
167 case 'T': case 't':
168 okay = true;
169 break;
170 }
171 return okay;
172}
173
174template<size_t N>
175void shift_hash(std::bitset<N>& x) {
176 x <<= 4;
177}
178
179template<size_t N>
180void add_base_to_hash(std::bitset<N>& x, char b) {
181 shift_hash(x);
182 switch (b) {
183 case 'A': case 'a':
184 x.set(0);
185 break;
186 case 'C': case 'c':
187 x.set(1);
188 break;
189 case 'G': case 'g':
190 x.set(2);
191 break;
192 case 'T': case 't':
193 x.set(3);
194 break;
195 default:
196 throw std::runtime_error("unknown base '" + std::string(1, b) + "'");
197 break;
198 }
199 return;
200}
201
202template<size_t N>
203void add_other_to_hash(std::bitset<N>& x) {
204 shift_hash(x);
205 x.set(0);
206 x.set(1);
207 x.set(2);
208 x.set(3);
209 return;
210}
211
212inline constexpr int NUM_BASES = 4;
213
214// Adapted from https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x/78509978#78509978, which was in turn derived from Boost.ContainerHash:
215// https://github.com/boostorg/container_hash/blob/ee5285bfa64843a11e29700298c83a37e3132fcd/include/boost/container_hash/hash.hpp#L471
216inline std::size_t hash_combine(std::size_t seed, std::size_t other) {
242 static constexpr auto digits = std::numeric_limits<std::size_t>::digits;
243
244 if constexpr(digits == 64) {
245 // https://github.com/boostorg/container_hash/blob/ee5285bfa64843a11e29700298c83a37e3132fcd/include/boost/container_hash/detail/hash_mix.hpp#L67
246 size_t x = seed + 0x9e3779b9 + other;
247 const std::size_t m = 0xe9846af9b1a615d;
248 x ^= x >> 32;
249 x *= m;
250 x ^= x >> 32;
251 x *= m;
252 x ^= x >> 28;
253 return x;
254
255 } else if constexpr(digits == 32) {
256 // https://github.com/boostorg/container_hash/blob/ee5285bfa64843a11e29700298c83a37e3132fcd/include/boost/container_hash/detail/hash_mix.hpp#L88
257 std::size_t x = seed + 0x9e3779b9 + other;
258 const std::size_t m1 = 0x21f0aaad;
259 const std::size_t m2 = 0x735a2d97;
260 x ^= x >> 16;
261 x *= m1;
262 x ^= x >> 15;
263 x *= m2;
264 x ^= x >> 15;
265 return x;
266
267 } else {
268 // Uses Boost's old hash_combine function (pre-1.81).
269 // Whatever, just get it to compile on weird machines until a better solution comes up.
270 return seed ^ (0x9e3779b9 + other + (seed << 6) + (seed >> 2));
271 }
272}
285template<int num_variable_>
287public:
292 std::size_t operator()(const std::array<BarcodeIndex, num_variable_>& key) const {
293 std::size_t seed = 0;
294 for (int v = 0; v < num_variable_; ++v) {
295 seed = hash_combine(seed, key[v]); // don't bother pretending that std::hash<int> might be something other than the identity function.
296 }
297 return seed;
298 }
299};
300
301}
302
303#endif
Hash a combination of barcode indices.
Definition utils.hpp:286
std::size_t operator()(const std::array< BarcodeIndex, num_variable_ > &key) const
Definition utils.hpp:292
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:16
std::size_t SeqLength
Definition utils.hpp:37
constexpr BarcodeIndex STATUS_AMBIGUOUS
Definition utils.hpp:53
SearchStrand
Definition utils.hpp:31
constexpr BarcodeIndex STATUS_UNMATCHED
Definition utils.hpp:48
DuplicateAction
Definition utils.hpp:26
bool is_barcode_index_ok(BarcodeIndex index)
Definition utils.hpp:60
std::vector< constchar * >::size_type BarcodeIndex
Definition utils.hpp:43
unsigned long long Count
Definition utils.hpp:67