kaori
A C++ library for barcode extraction and matching
Loading...
Searching...
No Matches
FastqReader.hpp
Go to the documentation of this file.
1#ifndef KAORI_FASTQ_READER_HPP
2#define KAORI_FASTQ_READER_HPP
3
4#include <cctype>
5#include <vector>
6#include <stdexcept>
7
8#include "byteme/PerByte.hpp"
9
10#include "utils.hpp"
11
18namespace kaori {
19
31template<typename Pointer_>
33public:
37 FastqReader(Pointer_ p) : my_pb(p) {
38 my_sequence.reserve(200);
39 my_name.reserve(200);
40 my_okay = my_pb.valid();
41 }
42
50 bool operator()() {
51 // Quitting early if the buffer is already empty.
52 if (!my_okay) {
53 return false;
54 }
55
56 auto init_line = my_line_count;
57
58 // Processing the name. This should be on a single line, hopefully.
59 my_name.clear();
60 char val = my_pb.get();
61 if (val != '@') {
62 throw std::runtime_error("read name should start with '@' (starting line " + std::to_string(init_line + 1) + ")");
63 }
64
65 val = advance_and_check();
66 while (!std::isspace(val)) {
67 my_name.push_back(val);
68 val = advance_and_check();
69 }
70
71 while (val != '\n') {
72 val = advance_and_check();
73 }
74 ++my_line_count;
75
76 // Processing the sequence itself until we get to a '+'.
77 my_sequence.clear();
78 val = advance_and_check();
79 while (1) {
80 if (val == '\n') {
81 val = advance_and_check();
82 if (val == '+') {
83 break;
84 }
85 }
86 my_sequence.push_back(val);
87 val = advance_and_check();
88 }
89 ++my_line_count;
90
91 // Line 3 should be a single line; starting with '+' is implicit from above.
92 val = advance_and_check();
93 while (val != '\n') {
94 val = advance_and_check();
95 }
96 ++my_line_count;
97
98 // Processing the qualities. Extraction is allowed to fail if we're at
99 // the end of the file. Note that we can't check for '@' as a
100 // delimitor, as this can be a valid score, so instead we check at each
101 // newline whether we've reached the specified length, and quit if so.
102 SeqLength seq_length = my_sequence.size(), qual_length = 0;
103 my_okay = false;
104
105 while (my_pb.advance()) {
106 val = my_pb.get();
107 if (val != '\n') {
108 ++qual_length;
109 } else if (qual_length >= seq_length) {
110 my_okay = my_pb.advance(); // sneak past the newline.
111 break;
112 }
113 }
114
115 if (qual_length != seq_length) {
116 // Technically qual_length could overflow as the length of the quality string is unbounded.
117 // This would cause this check to not be triggered (unlike the other overflow cases where we should get a bad_alloc).
118 // In practice, who cares, and besides, the quality strings aren't even used for anything here.
119 throw std::runtime_error("non-equal lengths for quality and sequence strings (starting line " + std::to_string(init_line + 1) + ")");
120 }
121
122 ++my_line_count;
123 return true;
124 }
125
126private:
128
129 char advance_and_check() {
130 if (!my_pb.advance()) {
131 throw std::runtime_error("premature end of the file at line " + std::to_string(my_line_count + 1));
132 }
133 return my_pb.get();
134 }
135
136private:
137 std::vector<char> my_sequence;
138 std::vector<char> my_name;
139 bool my_okay;
140 unsigned long long my_line_count = 0; // guarantee at least 64 bits for the line counter.
141
142public:
147 const std::vector<char>& get_sequence() const {
148 return my_sequence;
149 }
150
156 const std::vector<char>& get_name() const {
157 return my_name;
158 }
159};
160
161}
162
163#endif
Stream reads from a FASTQ file.
Definition FastqReader.hpp:32
const std::vector< char > & get_sequence() const
Definition FastqReader.hpp:147
bool operator()()
Definition FastqReader.hpp:50
const std::vector< char > & get_name() const
Definition FastqReader.hpp:156
FastqReader(Pointer_ p)
Definition FastqReader.hpp:37
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:16
std::size_t SeqLength
Definition utils.hpp:37
Utilites for sequence matching.