kaori
A C++ library for barcode extraction and matching
Loading...
Searching...
No Matches
FastqReader.hpp
Go to the documentation of this file.
1#ifndef KAORI_FASTQ_READER_HPP
2#define KAORI_FASTQ_READER_HPP
3
4#include <cctype>
5#include <vector>
6#include <stdexcept>
7
8#include "byteme/byteme.hpp"
9
10#include "utils.hpp"
11
18namespace kaori {
19
31template<typename Pointer_>
33public:
40 Pointer_ p,
41 std::size_t buffer_size = 65536 /* default for back-compatibility */
42 ) :
43 my_pb(p, buffer_size)
44 {
45 my_sequence.reserve(200);
46 my_name.reserve(200);
47 my_okay = my_pb.valid();
48 }
49
57 bool operator()() {
58 // Quitting early if the buffer is already empty.
59 if (!my_okay) {
60 return false;
61 }
62
63 auto init_line = my_line_count;
64
65 // Processing the name. This should be on a single line, hopefully.
66 my_name.clear();
67 char val = my_pb.get();
68 if (val != '@') {
69 throw std::runtime_error("read name should start with '@' (starting line " + std::to_string(init_line + 1) + ")");
70 }
71
72 val = advance_and_check();
73 while (!std::isspace(val)) {
74 my_name.push_back(val);
75 val = advance_and_check();
76 }
77
78 while (val != '\n') {
79 val = advance_and_check();
80 }
81 ++my_line_count;
82
83 // Processing the sequence itself until we get to a '+'.
84 my_sequence.clear();
85 val = advance_and_check();
86 while (1) {
87 if (val == '\n') {
88 val = advance_and_check();
89 if (val == '+') {
90 break;
91 }
92 }
93 my_sequence.push_back(val);
94 val = advance_and_check();
95 }
96 ++my_line_count;
97
98 // Line 3 should be a single line; starting with '+' is implicit from above.
99 val = advance_and_check();
100 while (val != '\n') {
101 val = advance_and_check();
102 }
103 ++my_line_count;
104
105 // Processing the qualities. Extraction is allowed to fail if we're at
106 // the end of the file. Note that we can't check for '@' as a
107 // delimitor, as this can be a valid score, so instead we check at each
108 // newline whether we've reached the specified length, and quit if so.
109 SeqLength seq_length = my_sequence.size(), qual_length = 0;
110 my_okay = false;
111
112 while (my_pb.advance()) {
113 val = my_pb.get();
114 if (val != '\n') {
115 ++qual_length;
116 } else if (qual_length >= seq_length) {
117 my_okay = my_pb.advance(); // sneak past the newline.
118 break;
119 }
120 }
121
122 if (qual_length != seq_length) {
123 // Technically qual_length could overflow as the length of the quality string is unbounded.
124 // This would cause this check to not be triggered (unlike the other overflow cases where we should get a bad_alloc).
125 // In practice, who cares, and besides, the quality strings aren't even used for anything here.
126 throw std::runtime_error("non-equal lengths for quality and sequence strings (starting line " + std::to_string(init_line + 1) + ")");
127 }
128
129 ++my_line_count;
130 return true;
131 }
132
133private:
135
136 char advance_and_check() {
137 if (!my_pb.advance()) {
138 throw std::runtime_error("premature end of the file at line " + std::to_string(my_line_count + 1));
139 }
140 return my_pb.get();
141 }
142
143private:
144 std::vector<char> my_sequence;
145 std::vector<char> my_name;
146 bool my_okay;
147 unsigned long long my_line_count = 0; // guarantee at least 64 bits for the line counter.
148
149public:
154 const std::vector<char>& get_sequence() const {
155 return my_sequence;
156 }
157
163 const std::vector<char>& get_name() const {
164 return my_name;
165 }
166};
167
168}
169
170#endif
Stream reads from a FASTQ file.
Definition FastqReader.hpp:32
const std::vector< char > & get_sequence() const
Definition FastqReader.hpp:154
FastqReader(Pointer_ p, std::size_t buffer_size=65536)
Definition FastqReader.hpp:39
bool operator()()
Definition FastqReader.hpp:57
const std::vector< char > & get_name() const
Definition FastqReader.hpp:163
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:16
Utilites for sequence matching.