singlepp_loaders
Load pre-processed reference datasets for SingleR
Loading...
Searching...
No Matches
rankings.hpp
Go to the documentation of this file.
1#ifndef SINGLEPP_LOADERS_RANKINGS_HPP
2#define SINGLEPP_LOADERS_RANKINGS_HPP
3
4#include "byteme/PerByte.hpp"
5#include "byteme/RawFileReader.hpp"
6#include "byteme/GzipFileReader.hpp"
7#include "byteme/ZlibBufferReader.hpp"
8
9#include "tatami/tatami.hpp"
10#include "singlepp/singlepp.hpp"
11
12#include <string>
13#include <vector>
14#include <cctype>
15#include <type_traits>
16#include <stdexcept>
17
23namespace singlepp_loaders {
24
34template<typename Value_ = double, typename Index_ = int, typename Rank_ = int>
36
44 bool parallel = false;
45
49 size_t buffer_size = 65536;
50};
51
52
56namespace internal {
57
58template<typename Value_, typename Index_>
59RankMatrix<Value_, Index_> load_rankings(byteme::Reader& reader, bool parallel) {
60 size_t nfeatures = 0;
61 size_t line = 0;
62 std::vector<int> values;
63
64 size_t field = 0;
65 bool non_empty = false;
66 int current = 0;
67
68 bool has_nfeatures = false;
69 auto check_nfeatures = [&]() -> void {
70 if (!has_nfeatures) {
71 has_nfeatures = true;
72 nfeatures = field + 1;
73 } else if (field + 1 != nfeatures) {
74 throw std::runtime_error("number of fields on each line should be equal to the number of features");
75 }
76 };
77
78 std::unique_ptr<byteme::PerByteInterface<char> > pbptr;
79 if (parallel) {
80 pbptr.reset(new byteme::PerByteParallel<char, byteme::Reader*>(&reader));
81 } else {
82 pbptr.reset(new byteme::PerByteSerial<char, byteme::Reader*>(&reader));
83 }
84 auto& pb = *pbptr;
85
86 bool okay = pb.valid();
87 while (okay) {
88 char x = pb.get();
89 okay = pb.advance();
90
91 if (x == '\n') {
92 check_nfeatures();
93 if (!non_empty) {
94 throw std::runtime_error("fields should not be empty");
95 }
96 values.push_back(current);
97 current = 0;
98 field = 0;
99 non_empty = false;
100 ++line;
101
102 } else if (x == ',') {
103 if (!non_empty) {
104 throw std::runtime_error("fields should not be empty");
105 }
106 values.push_back(current);
107 current = 0;
108 ++field;
109 non_empty = false;
110
111 } else if (std::isdigit(x)) {
112 non_empty = true;
113 current *= 10;
114 current += (x - '0');
115
116 } else {
117 throw std::runtime_error("fields should only contain integer ranks");
118 }
119 }
120
121 if (field || non_empty) { // aka no terminating newline.
122 check_nfeatures();
123 if (!non_empty) {
124 throw std::runtime_error("fields should not be empty");
125 }
126 values.push_back(current);
127 ++line;
128 }
129
130 return RankMatrix<Value_, Index_>(nfeatures, line, std::move(values), false);
131}
132
133}
153template<typename Value_ = singlepp::DefaultValue, typename Index_ = singlepp::DefaultIndex>
155 byteme::RawFileReaderOptions read_opt;
156 read_opt.buffer_size = options.buffer_size;
157 byteme::RawFileReader reader(path, read_opt);
158 return internal::load_rankings<Value_, Index_>(reader, options.parallel);
159}
160
173template<typename Value_ = singlepp::DefaultValue, typename Index_ = singlepp::DefaultIndex>
175 byteme::GzipFileReaderOptions read_opt;
176 read_opt.buffer_size = options.buffer_size;
177 byteme::GzipFileReader reader(path, read_opt);
178 return internal::load_rankings<Value_, Index_>(reader, options.parallel);
179}
180
194template<typename Value_ = singlepp::DefaultValue, typename Index_ = singlepp::DefaultIndex>
195RankMatrix<Value_, Index_> load_rankings_from_zlib_buffer(const unsigned char* buffer, size_t len, const LoadRankingsOptions& options) {
196 byteme::ZlibBufferReaderOptions read_opt;
197 read_opt.mode = 3;
198 read_opt.buffer_size = options.buffer_size;
199 byteme::ZlibBufferReader reader(buffer, len, read_opt);
200 return internal::load_rankings<Value_, Index_>(reader, options.parallel);
201}
202
203}
204
205#endif
Load pre-processed single reference datasets.
Definition labels.hpp:23
RankMatrix< Value_, Index_ > load_rankings_from_zlib_buffer(const unsigned char *buffer, size_t len, const LoadRankingsOptions &options)
Definition rankings.hpp:195
RankMatrix< Value_, Index_ > load_rankings_from_text_file(const char *path, const LoadRankingsOptions &options)
Definition rankings.hpp:154
RankMatrix< Value_, Index_ > load_rankings_from_gzip_file(const char *path, const LoadRankingsOptions &options)
Definition rankings.hpp:174
Options for loading rankings.
Definition rankings.hpp:40
bool parallel
Definition rankings.hpp:44
size_t buffer_size
Definition rankings.hpp:49