kaori
A C++ library for barcode extraction and matching
Loading...
Searching...
No Matches
FastqReader.hpp
Go to the documentation of this file.
1#ifndef KAORI_FASTQ_READER_HPP
2#define KAORI_FASTQ_READER_HPP
3
4#include "byteme/PerByte.hpp"
5#include <cctype>
6#include <vector>
7#include <stdexcept>
8
15namespace kaori {
16
25public:
29 FastqReader(byteme::Reader* p) : pb(p) {
30 sequence.reserve(200);
31 name.reserve(200);
32 okay = pb.valid();
33 }
34
42 bool operator()() {
43 // Quitting early if the buffer is already empty.
44 if (!okay) {
45 return false;
46 }
47
48 int init_line = line_count;
49
50 // Processing the name. This should be on a single line, hopefully.
51 name.clear();
52 char val = pb.get();
53 if (val != '@') {
54 throw std::runtime_error("read name should start with '@' (starting line " + std::to_string(init_line + 1) + ")");
55 }
56
57 val = advance_and_check();
58 while (!std::isspace(val)) {
59 name.push_back(val);
60 val = advance_and_check();
61 }
62
63 while (val != '\n') {
64 val = advance_and_check();
65 }
66 ++line_count;
67
68 // Processing the sequence itself until we get to a '+'.
69 sequence.clear();
70 val = advance_and_check();
71 while (val != '+') {
72 if (val != '\n') {
73 sequence.push_back(val);
74 }
75 val = advance_and_check();
76 }
77 ++line_count;
78
79 // Line 3 should be a single line; starting with '+' is implicit from above.
80 val = advance_and_check();
81 while (val != '\n') {
82 val = advance_and_check();
83 }
84 ++line_count;
85
86 // Processing the qualities. Extraction is allowed to fail if we're at
87 // the end of the file. Note that we can't check for '@' as a
88 // delimitor, as this can be a valid score, so instead we check at each
89 // newline whether we've reached the specified length, and quit if so.
90 size_t qual_length = 0, seq_length = sequence.size();
91 okay = false;
92
93 while (pb.advance()) {
94 val = pb.get();
95 if (val != '\n') {
96 ++qual_length;
97 } else if (qual_length >= seq_length) {
98 okay = pb.advance(); // sneak past the newline.
99 break;
100 }
101 }
102
103 if (qual_length != seq_length) {
104 throw std::runtime_error("non-equal lengths for quality and sequence strings (starting line " + std::to_string(init_line + 1) + ")");
105 }
106
107 ++line_count;
108
109 return true;
110 }
111
112private:
113 byteme::PerByte<> pb;
114
115 char advance_and_check() {
116 if (!pb.advance()) {
117 throw std::runtime_error("premature end of the file at line " + std::to_string(line_count + 1));
118 }
119 return pb.get();
120 }
121
122private:
123 std::vector<char> sequence;
124 std::vector<char> name;
125 bool okay;
126 int line_count = 0;
127
128public:
132 const std::vector<char>& get_sequence() const {
133 return sequence;
134 }
135
140 const std::vector<char>& get_name() const {
141 return name;
142 }
143};
144
145}
146
147#endif
Stream reads from a FASTQ file.
Definition FastqReader.hpp:24
bool operator()()
Definition FastqReader.hpp:42
const std::vector< char > & get_sequence() const
Definition FastqReader.hpp:132
const std::vector< char > & get_name() const
Definition FastqReader.hpp:140
FastqReader(byteme::Reader *p)
Definition FastqReader.hpp:29
Namespace for the kaori barcode-matching library.
Definition BarcodePool.hpp:13