kaori
A C++ library for barcode extraction and matching
Loading...
Searching...
No Matches
utils.hpp
Go to the documentation of this file.
1#ifndef KAORI_UTILS_HPP
2#define KAORI_UTILS_HPP
3
4#include <bitset>
5#include <vector>
6#include <array>
7#include <cstddef>
8#include <limits>
9#include <functional>
10
16namespace kaori {
17
26enum class DuplicateAction : char { FIRST, LAST, NONE, ERROR };
27
31enum class SearchStrand : char { FORWARD, REVERSE, BOTH };
32
37typedef std::size_t SeqLength;
38
43typedef typename std::vector<const char*>::size_type BarcodeIndex; // we use the size_type from the BarcodePool's internal vector of barcodes.
44
48inline constexpr BarcodeIndex STATUS_UNMATCHED = static_cast<BarcodeIndex>(-1);
49
53inline constexpr BarcodeIndex STATUS_AMBIGUOUS = static_cast<BarcodeIndex>(-2);
54
60inline bool is_barcode_index_ok(BarcodeIndex index) {
61 return index < STATUS_AMBIGUOUS;
62}
63
67typedef unsigned long long Count;
68
72inline bool search_forward(SearchStrand x) {
73 return x == SearchStrand::FORWARD || x == SearchStrand::BOTH;
74}
75
76inline bool search_reverse(SearchStrand x) {
77 return x == SearchStrand::REVERSE || x == SearchStrand::BOTH;
78}
79
80template<bool allow_n_ = false, bool allow_iupac_ = false>
81char complement_base(char b) {
82 char output;
83 switch (b) {
84 case 'A': case 'a':
85 output = 'T';
86 break;
87 case 'C': case 'c':
88 output = 'G';
89 break;
90 case 'G': case 'g':
91 output = 'C';
92 break;
93 case 'T': case 't':
94 output = 'A';
95 break;
96
97 case 'N': case 'n':
98 if constexpr(allow_n_ || allow_iupac_) {
99 output = 'N';
100 break;
101 }
102
103 case 'R': case 'r':
104 if constexpr(allow_iupac_) {
105 output = 'Y';
106 break;
107 }
108 case 'Y': case 'y':
109 if constexpr(allow_iupac_) {
110 output = 'R';
111 break;
112 }
113 case 'S': case 's':
114 if constexpr(allow_iupac_) {
115 output = 'S'; // S = A/T, so complement is just itself.
116 break;
117 }
118 case 'W': case 'w':
119 if constexpr(allow_iupac_) {
120 output = 'W'; // W = C/G, so complement is just itself.
121 break;
122 }
123 case 'K': case 'k':
124 if constexpr(allow_iupac_) {
125 output = 'M';
126 break;
127 }
128 case 'M': case 'm':
129 if constexpr(allow_iupac_) {
130 output = 'K';
131 break;
132 }
133
134 case 'B': case 'b':
135 if constexpr(allow_iupac_) {
136 output = 'V'; // B can't be A, so complement can't be T ==> V.
137 break;
138 }
139 case 'D': case 'd':
140 if constexpr(allow_iupac_) {
141 output = 'H'; // D can't be C, so complement can't be G ==> H.
142 break;
143 }
144 case 'H': case 'h':
145 if constexpr(allow_iupac_) {
146 output = 'D'; // H can't be G, so complement can't be C ==> D.
147 break;
148 }
149 case 'V': case 'v':
150 if constexpr(allow_iupac_) {
151 output = 'B'; // V can't be T, so complement can't be A ==> B.
152 break;
153 }
154
155 default:
156 throw std::runtime_error("cannot complement unknown base '" + std::string(1, b) + "'");
157 }
158 return output;
159}
160
161inline bool is_standard_base(char b) {
162 bool okay = false;
163 switch (b) {
164 case 'A': case 'a':
165 case 'C': case 'c':
166 case 'G': case 'g':
167 case 'T': case 't':
168 okay = true;
169 break;
170 }
171 return okay;
172}
173
174template<size_t N>
175void shift_hash(std::bitset<N>& x) {
176 x <<= 4;
177}
178
179template<size_t N>
180void add_base_to_hash(std::bitset<N>& x, char b) {
181 shift_hash(x);
182 switch (b) {
183 case 'A': case 'a':
184 x.set(0);
185 break;
186 case 'C': case 'c':
187 x.set(1);
188 break;
189 case 'G': case 'g':
190 x.set(2);
191 break;
192 case 'T': case 't':
193 x.set(3);
194 break;
195 default:
196 throw std::runtime_error("unknown base '" + std::string(1, b) + "'");
197 break;
198 }
199 return;
200}
201
202template<size_t N>
203void add_other_to_hash(std::bitset<N>& x) {
204 shift_hash(x);
205 x.set(0);
206 x.set(1);
207 x.set(2);
208 x.set(3);
209 return;
210}
211
212inline constexpr int NUM_BASES = 4;
213
222template<int num_variable_>
223class CombinationHash {
224public:
229 std::size_t operator()(const std::array<BarcodeIndex, num_variable_>& key) const {
230 unsigned long long seed = 0;
231
232 for (int v = 0; v < num_variable_; ++v) {
233 seed = [&](){
234 // Adapted from https://stackoverflow.com/questions/2590677/how-do-i-combine-hash-values-in-c0x/78509978#78509978, which was in turn derived from Boost.ContainerHash:
235 // https://github.com/boostorg/container_hash/blob/ee5285bfa64843a11e29700298c83a37e3132fcd/include/boost/container_hash/detail/hash_mix.hpp#L67
236 // We hard-code it to use the 64-bit implementation for simplicity, given that a ULL is guaranteed to have at least 64 bits.
237 unsigned long long x = seed + 0x9e3779b9 + key[v];
238 constexpr unsigned long long m = 0xe9846af9b1a615d;
239 x ^= x >> 32;
240 x *= m;
241 x ^= x >> 32;
242 x *= m;
243 x ^= x >> 28;
244 return x;
245 }();
246 }
247
248 return seed;
249 }
250};
251
252}
253
254#endif
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:16