singlepp_loaders
Load pre-processed reference datasets for SingleR
Loading...
Searching...
No Matches
rankings.hpp
Go to the documentation of this file.
1#ifndef SINGLEPP_LOADERS_RANKINGS_HPP
2#define SINGLEPP_LOADERS_RANKINGS_HPP
3
4#include "byteme/PerByte.hpp"
5#include "byteme/RawFileReader.hpp"
6#include "byteme/GzipFileReader.hpp"
7#include "byteme/ZlibBufferReader.hpp"
8
9#include "tatami/tatami.hpp"
10#include "singlepp/singlepp.hpp"
11
12#include <string>
13#include <vector>
14#include <cctype>
15#include <type_traits>
16#include <stdexcept>
17
23namespace singlepp_loaders {
24
34template<typename Value_ = double, typename Index_ = int, typename Rank_ = int>
36
44 bool parallel = false;
45
49 size_t buffer_size = 65536;
50};
51
52
56namespace internal {
57
58template<typename Value_, typename Index_, bool parallel_>
59RankMatrix<Value_, Index_> load_rankings(byteme::Reader& reader) {
60 size_t nfeatures = 0;
61 size_t line = 0;
62 std::vector<int> values;
63
64 size_t field = 0;
65 bool non_empty = false;
66 int current = 0;
67
68 bool has_nfeatures = false;
69 auto check_nfeatures = [&]() -> void {
70 if (!has_nfeatures) {
71 has_nfeatures = true;
72 nfeatures = field + 1;
73 } else if (field + 1 != nfeatures) {
74 throw std::runtime_error("number of fields on each line should be equal to the number of features");
75 }
76 };
77
78 typename std::conditional<parallel_, byteme::PerByte<char>, byteme::PerByteParallel<char> >::type pb(&reader);
79 bool okay = pb.valid();
80 while (okay) {
81 char x = pb.get();
82 okay = pb.advance();
83
84 if (x == '\n') {
85 check_nfeatures();
86 if (!non_empty) {
87 throw std::runtime_error("fields should not be empty");
88 }
89 values.push_back(current);
90 current = 0;
91 field = 0;
92 non_empty = false;
93 ++line;
94
95 } else if (x == ',') {
96 if (!non_empty) {
97 throw std::runtime_error("fields should not be empty");
98 }
99 values.push_back(current);
100 current = 0;
101 ++field;
102 non_empty = false;
103
104 } else if (std::isdigit(x)) {
105 non_empty = true;
106 current *= 10;
107 current += (x - '0');
108
109 } else {
110 throw std::runtime_error("fields should only contain integer ranks");
111 }
112 }
113
114 if (field || non_empty) { // aka no terminating newline.
115 check_nfeatures();
116 if (!non_empty) {
117 throw std::runtime_error("fields should not be empty");
118 }
119 values.push_back(current);
120 ++line;
121 }
122
123 return RankMatrix<Value_, Index_>(nfeatures, line, std::move(values), false);
124}
125
126template<typename Value_, typename Index_>
127RankMatrix<Value_, Index_> load_rankings(byteme::Reader& reader, bool parallel) {
128 if (parallel) {
130 } else {
132 }
133}
134
135}
155template<typename Value_ = singlepp::DefaultValue, typename Index_ = singlepp::DefaultIndex>
157 byteme::RawFileReader reader(path, options.buffer_size);
158 return internal::load_rankings<Value_, Index_>(reader, options.parallel);
159}
160
173template<typename Value_ = singlepp::DefaultValue, typename Index_ = singlepp::DefaultIndex>
175 byteme::GzipFileReader reader(path, options.buffer_size);
176 return internal::load_rankings<Value_, Index_>(reader, options.parallel);
177}
178
192template<typename Value_ = singlepp::DefaultValue, typename Index_ = singlepp::DefaultIndex>
194 byteme::ZlibBufferReader reader(buffer, len, 3, options.buffer_size);
195 return internal::load_rankings<Value_, Index_>(reader, options.parallel);
196}
197
198}
199
200#endif
Load pre-processed single reference datasets.
Definition labels.hpp:23
tatami::DenseMatrix< Value_, Index_, std::vector< Rank_ > > RankMatrix
Definition rankings.hpp:35
RankMatrix< Value_, Index_ > load_rankings_from_zlib_buffer(const unsigned char *buffer, size_t len, const LoadRankingsOptions &options)
Definition rankings.hpp:193
RankMatrix< Value_, Index_ > load_rankings_from_text_file(const char *path, const LoadRankingsOptions &options)
Definition rankings.hpp:156
RankMatrix< Value_, Index_ > load_rankings_from_gzip_file(const char *path, const LoadRankingsOptions &options)
Definition rankings.hpp:174
Options for loading rankings.
Definition rankings.hpp:40
bool parallel
Definition rankings.hpp:44
size_t buffer_size
Definition rankings.hpp:49