singlepp
A C++ library for cell type classification
Loading...
Searching...
No Matches
choose_classic_markers.hpp
Go to the documentation of this file.
1#ifndef SINGLEPP_CHOOSE_CLASSIC_MARKERS_HPP
2#define SINGLEPP_CHOOSE_CLASSIC_MARKERS_HPP
3
4#include "defs.hpp"
5
6#include "tatami/tatami.hpp"
7
8#include "Markers.hpp"
9
10#include <vector>
11#include <cmath>
12#include <stdexcept>
13#include <algorithm>
14#include <set>
15#include <cstddef>
16
22namespace singlepp {
23
34inline std::size_t number_of_classic_markers(std::size_t num_labels) {
35 return std::round(500.0 * std::pow(2.0/3.0, std::log(static_cast<double>(num_labels)) / std::log(2.0)));
36}
37
46 int number = -1;
47
52 int num_threads = 1;
53};
54
79template<typename Value_, typename Index_, typename Label_>
81 const std::vector<const tatami::Matrix<Value_, Index_>*>& representatives,
82 const std::vector<const Label_*>& labels,
83 const ChooseClassicMarkersOptions& options)
84{
85 auto nrefs = representatives.size();
86 if (nrefs != labels.size()) {
87 throw std::runtime_error("'representatives' and 'labels' should have the same length");
88 }
89 if (nrefs == 0) {
90 throw std::runtime_error("'representatives' should contain at least one entry");
91 }
92 auto ngenes = representatives.front()->nrow();
93
94 std::size_t nlabels = 0;
95 for (decltype(nrefs) r = 0; r < nrefs; ++r) {
96 const auto& current = *representatives[r];
97
98 auto nrows = current.nrow();
99 if (nrows != ngenes) {
100 throw std::runtime_error("all entries of 'representatives' should have the same number of rows");
101 }
102
103 auto ncols = current.ncol();
104 if (ncols) {
105 auto curlab = labels[r];
106 nlabels = std::max(nlabels, static_cast<std::size_t>(*std::max_element(curlab, curlab + ncols)) + 1);
107 }
108 }
109
110 // Generating mappings.
111 std::vector<std::vector<std::pair<bool, Index_> > > labels_to_index(nrefs);
112 for (decltype(nrefs) r = 0; r < nrefs; ++r) {
113 auto& current = labels_to_index[r];
114 current.resize(nlabels);
115 auto ncols = representatives[r]->ncol();
116 auto curlab = labels[r];
117
118 for (decltype(ncols) c = 0; c < ncols; ++c) {
119 auto& info = current[curlab[c]];
120 if (info.first) {
121 throw std::runtime_error("each label should correspond to no more than one column in each reference");
122 }
123 info.first = 1;
124 info.second = c;
125 }
126 }
127
128 Markers<Index_> output(nlabels);
129 for (auto& x : output) {
130 x.resize(nlabels);
131 }
132
133 int actual_number = options.number;
134 if (actual_number < 0) {
135 actual_number = number_of_classic_markers(nlabels);
136 }
137 if (actual_number > static_cast<int>(ngenes)) {
138 actual_number = ngenes;
139 }
140
141 // Generating pairs for compute; this sacrifices some memory for convenience.
142 std::vector<std::pair<Label_, Label_> > pairs;
143 {
144 std::set<std::pair<Label_, Label_> > pairs0;
145 for (decltype(nrefs) r = 0; r < nrefs; ++r) {
146 auto ncols = representatives[r]->ncol();
147 auto curlab = labels[r];
148 for (decltype(ncols) c1 = 0; c1 < ncols; ++c1) {
149 for (decltype(c1) c2 = 0; c2 < c1; ++c2) {
150 pairs0.emplace(curlab[c1], curlab[c2]);
151 }
152 }
153 }
154 pairs.insert(pairs.end(), pairs0.begin(), pairs0.end()); // already sorted by the std::set.
155 }
156
157 auto npairs = pairs.size();
158 tatami::parallelize([&](int, decltype(npairs) start, decltype(npairs) len) {
159 std::vector<std::pair<Value_, Index_> > sorter(ngenes);
160 std::vector<Value_> rbuffer(ngenes), lbuffer(ngenes);
161 std::vector<std::shared_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > > rextractors(nrefs), lextractors(nrefs);
162
163 for (decltype(npairs) p = start, end = start + len; p < end; ++p) {
164 auto curleft = pairs[p].first;
165 auto curright = pairs[p].second;
166
167 for (decltype(ngenes) g = 0; g < ngenes; ++g) {
168 sorter[g].first = 0;
169 sorter[g].second = g;
170 }
171
172 for (decltype(nrefs) i = 0; i < nrefs; ++i) {
173 const auto& curavail = labels_to_index[i];
174 auto lcol = curavail[curleft];
175 auto rcol = curavail[curright];
176 if (!lcol.first || !rcol.first) {
177 continue;
178 }
179
180 // Initialize extractors as needed.
181 auto& lext = lextractors[i];
182 if (!lext) {
183 lext = representatives[i]->dense_column();
184 }
185 auto lptr = lext->fetch(lcol.second, lbuffer.data());
186
187 auto& rext = rextractors[i];
188 if (!rext) {
189 rext = representatives[i]->dense_column();
190 }
191 auto rptr = rext->fetch(rcol.second, rbuffer.data());
192
193 for (decltype(ngenes) g = 0; g < ngenes; ++g) {
194 sorter[g].first += lptr[g] - rptr[g];
195 }
196 }
197
198 // At flip = 0, we're looking for genes upregulated in right over left,
199 // as we're sorting on left - right in increasing order. At flip = 1,
200 // we reverse the signs to we sort on right - left.
201 for (int flip = 0; flip < 2; ++flip) {
202 if (flip) {
203 for (auto& s : sorter) {
204 s.first *= -1;
205 }
206 }
207
208 // partial sort is guaranteed to be stable due to the second index resolving ties.
209 std::partial_sort(sorter.begin(), sorter.begin() + actual_number, sorter.end());
210
211 std::vector<Index_> stuff;
212 stuff.reserve(actual_number);
213 for (int g = 0; g < actual_number && sorter[g].first < 0; ++g) { // only negative values (i.e., positive log-fold changes for our comparison).
214 stuff.push_back(sorter[g].second);
215 }
216
217 if (flip) {
218 output[curleft][curright] = std::move(stuff);
219 } else {
220 output[curright][curleft] = std::move(stuff);
221 }
222 }
223 }
224 }, npairs, options.num_threads);
225
226 return output;
227}
228
232// Overload for the non-const case.
233template<typename Value_, typename Index_, typename Label_>
235 const std::vector<tatami::Matrix<Value_, Index_>*>& representatives,
236 const std::vector<const Label_*>& labels,
237 const ChooseClassicMarkersOptions& options)
238{
239 std::vector<const tatami::Matrix<Value_, Index_>*> rep2(representatives.begin(), representatives.end());
240 return choose_classic_markers(rep2, labels, options);
241}
267template<typename Value_, typename Index_, typename Label_>
268Markers<Index_> choose_classic_markers(const tatami::Matrix<Value_, Index_>& representative, const Label_* labels, const ChooseClassicMarkersOptions& options) {
269 return choose_classic_markers(std::vector<const tatami::Matrix<Value_, Index_>*>{ &representative }, std::vector<const Label_*>{ labels }, options);
270}
271
272}
273
274#endif
Define the Markers typedef.
Common definitions for singlepp.
Cell type classification using the SingleR algorithm in C++.
Definition classify_single.hpp:20
std::size_t number_of_classic_markers(std::size_t num_labels)
Definition choose_classic_markers.hpp:34
std::vector< std::vector< std::vector< Index_ > > > Markers
Definition Markers.hpp:40
Markers< Index_ > choose_classic_markers(const std::vector< const tatami::Matrix< Value_, Index_ > * > &representatives, const std::vector< const Label_ * > &labels, const ChooseClassicMarkersOptions &options)
Definition choose_classic_markers.hpp:80
Options for choose_classic_markers().
Definition choose_classic_markers.hpp:41
int num_threads
Definition choose_classic_markers.hpp:52
int number
Definition choose_classic_markers.hpp:46