singlepp_loaders
Load pre-processed reference datasets for SingleR
Loading...
Searching...
No Matches
markers.hpp
Go to the documentation of this file.
1#ifndef SINGLEPP_LOADERS_MARKERS_HPP
2#define SINGLEPP_LOADERS_MARKERS_HPP
3
4#include "byteme/PerByte.hpp"
5#include "byteme/RawFileReader.hpp"
6#include "byteme/GzipFileReader.hpp"
7#include "byteme/ZlibBufferReader.hpp"
8
9#include "tatami/tatami.hpp"
10#include "singlepp/singlepp.hpp"
11
12#include <string>
13#include <vector>
14#include <cctype>
15#include <type_traits>
16#include <stdexcept>
17
23namespace singlepp_loaders {
24
32 bool parallel = false;
33
37 size_t buffer_size = 65536;
38};
39
43namespace internal {
44
45template<typename Index_>
46singlepp::Markers<Index_> load_markers(byteme::Reader& reader, bool parallel) {
48
49 std::unique_ptr<byteme::PerByteInterface<char> > pbptr;
50 if (parallel) {
51 pbptr.reset(new byteme::PerByteParallel<char, byteme::Reader*>(&reader));
52 } else {
53 pbptr.reset(new byteme::PerByteSerial<char, byteme::Reader*>(&reader));
54 }
55 auto& pb = *pbptr;
56
57 bool okay = pb.valid();
58 while (okay) {
59 // Processing the label IDs.
60 size_t first = 0, second = 0;
61 for (int l = 0; l < 2; ++l) {
62 auto& current = (l == 0 ? first : second);
63 bool non_empty = false;
64
65 do {
66 char x = pb.get();
67 okay = pb.advance();
68
69 if (x == '\t') {
70 if (!non_empty) {
71 throw std::runtime_error("empty field detected in the label indices");
72 }
73 break;
74 } else if (x == '\n') {
75 okay = false; // hit the error below.
76 break;
77 } else if (!std::isdigit(x)) {
78 throw std::runtime_error("label indices should be integers");
79 }
80
81 non_empty = true;
82 current *= 10;
83 current += (x - '0');
84 } while (okay);
85
86 if (!okay) {
87 throw std::runtime_error("expected at least three tab-separated fields on each line");
88 }
89 }
90
91 if (first >= markers.size()) {
92 markers.resize(first + 1);
93 }
94 auto& fmarkers = markers[first];
95 if (second >= fmarkers.size()) {
96 fmarkers.resize(second + 1);
97 }
98 auto& values = fmarkers[second];
99 if (!values.empty()) {
100 throw std::runtime_error("multiple marker sets listed for a single pairwise comparison");
101 }
102
103 // Processing the actual gene indices.
104 bool non_empty = false;
105 Index_ current = 0;
106 while (okay) {
107 char x = pb.get();
108 okay = pb.advance();
109
110 if (std::isdigit(x)) {
111 non_empty = true;
112 current *= 10;
113 current += (x - '0');
114
115 } else if (x == '\t') {
116 if (!non_empty) {
117 throw std::runtime_error("gene index fields should not be empty");
118 }
119 values.push_back(current);
120 current = 0;
121 non_empty = false;
122
123 } else if (x == '\n') {
124 break;
125
126 } else {
127 throw std::runtime_error("gene index fields should be integers");
128 }
129 }
130
131 // Adding the last element. We don't do this inside the newline check,
132 // as we need to account for cases where the file is not newline-terminated.
133 if (!non_empty) {
134 throw std::runtime_error("gene index fields should not be empty");
135 }
136 values.push_back(current);
137 }
138
139 // Now, making sure that every label has the same number of elements.
140 size_t expected_nlabels = markers.size();
141 for (const auto& m : markers) {
142 expected_nlabels = std::max(expected_nlabels, m.size());
143 }
144 markers.resize(expected_nlabels);
145 for (auto& m : markers) {
146 m.resize(expected_nlabels);
147 }
148
149 return markers;
150}
151
152}
171template<typename Index_ = singlepp::DefaultIndex>
173 byteme::RawFileReaderOptions read_opt;
174 read_opt.buffer_size = options.buffer_size;
175 byteme::RawFileReader reader(path, read_opt);
176 return internal::load_markers<Index_>(reader, options.parallel);
177}
178
189template<typename Index_ = singlepp::DefaultIndex>
191 byteme::GzipFileReaderOptions read_opt;
192 read_opt.buffer_size = options.buffer_size;
193 byteme::GzipFileReader reader(path, read_opt);
194 return internal::load_markers<Index_>(reader, options.parallel);
195}
196
208template<typename Index_ = singlepp::DefaultIndex>
209singlepp::Markers<Index_> load_markers_from_zlib_buffer(const unsigned char* buffer, size_t len, const LoadMarkersOptions& options) {
210 byteme::ZlibBufferReaderOptions read_opt;
211 read_opt.mode = 3;
212 read_opt.buffer_size = options.buffer_size;
213 byteme::ZlibBufferReader reader(buffer, len, read_opt);
214 return internal::load_markers<Index_>(reader, options.parallel);
215}
216
217}
218
219#endif
Load pre-processed single reference datasets.
Definition labels.hpp:23
singlepp::Markers< Index_ > load_markers_from_gzip_file(const char *path, const LoadMarkersOptions &options)
Definition markers.hpp:190
singlepp::Markers< Index_ > load_markers_from_zlib_buffer(const unsigned char *buffer, size_t len, const LoadMarkersOptions &options)
Definition markers.hpp:209
singlepp::Markers< Index_ > load_markers_from_text_file(const char *path, const LoadMarkersOptions &options)
Definition markers.hpp:172
std::vector< std::vector< std::vector< Index_ > > > Markers
Options for loading markers.
Definition markers.hpp:28
bool parallel
Definition markers.hpp:32
size_t buffer_size
Definition markers.hpp:37