singlepp_loaders
Load pre-processed reference datasets for SingleR
Loading...
Searching...
No Matches
markers.hpp
Go to the documentation of this file.
1#ifndef SINGLEPP_LOADERS_MARKERS_HPP
2#define SINGLEPP_LOADERS_MARKERS_HPP
3
4#include "byteme/PerByte.hpp"
5#include "byteme/RawFileReader.hpp"
6#include "byteme/GzipFileReader.hpp"
7#include "byteme/ZlibBufferReader.hpp"
8
9#include "tatami/tatami.hpp"
10#include "singlepp/singlepp.hpp"
11
12#include <string>
13#include <vector>
14#include <cctype>
15#include <type_traits>
16#include <stdexcept>
17
23namespace singlepp_loaders {
24
32 bool parallel = false;
33
37 size_t buffer_size = 65536;
38};
39
43namespace internal {
44
45template<typename Index_, bool parallel_>
46singlepp::Markers<Index_> load_markers(byteme::Reader& reader) {
48 typename std::conditional<parallel_, byteme::PerByte<char>, byteme::PerByteParallel<char> >::type pb(&reader);
49
50 bool okay = pb.valid();
51 while (okay) {
52 // Processing the label IDs.
53 size_t first = 0, second = 0;
54 for (int l = 0; l < 2; ++l) {
55 auto& current = (l == 0 ? first : second);
56 bool non_empty = false;
57
58 do {
59 char x = pb.get();
60 okay = pb.advance();
61
62 if (x == '\t') {
63 if (!non_empty) {
64 throw std::runtime_error("empty field detected in the label indices");
65 }
66 break;
67 } else if (x == '\n') {
68 okay = false; // hit the error below.
69 break;
70 } else if (!std::isdigit(x)) {
71 throw std::runtime_error("label indices should be integers");
72 }
73
74 non_empty = true;
75 current *= 10;
76 current += (x - '0');
77 } while (okay);
78
79 if (!okay) {
80 throw std::runtime_error("expected at least three tab-separated fields on each line");
81 }
82 }
83
84 if (first >= markers.size()) {
85 markers.resize(first + 1);
86 }
87 auto& fmarkers = markers[first];
88 if (second >= fmarkers.size()) {
89 fmarkers.resize(second + 1);
90 }
91 auto& values = fmarkers[second];
92 if (!values.empty()) {
93 throw std::runtime_error("multiple marker sets listed for a single pairwise comparison");
94 }
95
96 // Processing the actual gene indices.
97 bool non_empty = false;
98 Index_ current = 0;
99 while (okay) {
100 char x = pb.get();
101 okay = pb.advance();
102
103 if (std::isdigit(x)) {
104 non_empty = true;
105 current *= 10;
106 current += (x - '0');
107
108 } else if (x == '\t') {
109 if (!non_empty) {
110 throw std::runtime_error("gene index fields should not be empty");
111 }
112 values.push_back(current);
113 current = 0;
114 non_empty = false;
115
116 } else if (x == '\n') {
117 break;
118
119 } else {
120 throw std::runtime_error("gene index fields should be integers");
121 }
122 }
123
124 // Adding the last element. We don't do this inside the newline check,
125 // as we need to account for cases where the file is not newline-terminated.
126 if (!non_empty) {
127 throw std::runtime_error("gene index fields should not be empty");
128 }
129 values.push_back(current);
130 }
131
132 // Now, making sure that every label has the same number of elements.
133 size_t expected_nlabels = markers.size();
134 for (const auto& m : markers) {
135 expected_nlabels = std::max(expected_nlabels, m.size());
136 }
137 markers.resize(expected_nlabels);
138 for (auto& m : markers) {
139 m.resize(expected_nlabels);
140 }
141
142 return markers;
143}
144
145template<typename Index_>
146singlepp::Markers<Index_> load_markers(byteme::Reader& reader, bool parallel) {
147 if (parallel) {
148 return load_markers<Index_, true>(reader);
149 } else {
150 return load_markers<Index_, false>(reader);
151 }
152}
153
154}
173template<typename Index_ = singlepp::DefaultIndex>
175 byteme::RawFileReader reader(path, options.buffer_size);
176 return internal::load_markers<Index_>(reader, options.parallel);
177}
178
189template<typename Index_ = singlepp::DefaultIndex>
191 byteme::GzipFileReader reader(path, options.buffer_size);
192 return internal::load_markers<Index_>(reader, options.parallel);
193}
194
206template<typename Index_ = singlepp::DefaultIndex>
207singlepp::Markers<Index_> load_markers_from_zlib_buffer(const unsigned char* buffer, size_t len, const LoadMarkersOptions& options) {
208 byteme::ZlibBufferReader reader(buffer, len, 3, options.buffer_size);
209 return internal::load_markers<Index_>(reader, options.parallel);
210}
211
212}
213
214#endif
Load pre-processed single reference datasets.
Definition labels.hpp:23
singlepp::Markers< Index_ > load_markers_from_gzip_file(const char *path, const LoadMarkersOptions &options)
Definition markers.hpp:190
singlepp::Markers< Index_ > load_markers_from_zlib_buffer(const unsigned char *buffer, size_t len, const LoadMarkersOptions &options)
Definition markers.hpp:207
singlepp::Markers< Index_ > load_markers_from_text_file(const char *path, const LoadMarkersOptions &options)
Definition markers.hpp:174
std::vector< std::vector< std::vector< Index_ > > > Markers
Options for loading markers.
Definition markers.hpp:28
bool parallel
Definition markers.hpp:32
size_t buffer_size
Definition markers.hpp:37