singlepp
A C++ library for cell type classification
Loading...
Searching...
No Matches
choose_classic_markers.hpp
Go to the documentation of this file.
1#ifndef SINGLEPP_CHOOSE_CLASSIC_MARKERS_HPP
2#define SINGLEPP_CHOOSE_CLASSIC_MARKERS_HPP
3
4#include "defs.hpp"
5
6#include "tatami/tatami.hpp"
7
8#include "Markers.hpp"
9
10#include <vector>
11#include <cmath>
12#include <stdexcept>
13#include <algorithm>
14#include <set>
15
21namespace singlepp {
22
33inline size_t number_of_classic_markers(size_t num_labels) {
34 return std::round(500.0 * std::pow(2.0/3.0, std::log(static_cast<double>(num_labels)) / std::log(2.0)));
35}
36
45 int number = -1;
46
51 int num_threads = 1;
52};
53
78template<typename Value_, typename Index_, typename Label_>
80 const std::vector<const tatami::Matrix<Value_, Index_>*>& representatives,
81 const std::vector<const Label_*>& labels,
82 const ChooseClassicMarkersOptions& options)
83{
84 size_t nrefs = representatives.size();
85 if (nrefs != labels.size()) {
86 throw std::runtime_error("'representatives' and 'labels' should have the same length");
87 }
88 if (nrefs == 0) {
89 throw std::runtime_error("'representatives' should contain at least one entry");
90 }
91 size_t ngenes = representatives.front()->nrow();
92
93 size_t nlabels = 0;
94 for (size_t r = 0; r < nrefs; ++r) {
95 const auto& current = *representatives[r];
96
97 size_t nrows = current.nrow();
98 if (nrows != ngenes) {
99 throw std::runtime_error("all entries of 'representatives' should have the same number of rows");
100 }
101
102 size_t ncols = current.ncol();
103 if (ncols) {
104 auto curlab = labels[r];
105 nlabels = std::max(nlabels, static_cast<size_t>(*std::max_element(curlab, curlab + ncols)) + 1);
106 }
107 }
108
109 // Generating mappings.
110 std::vector<std::vector<std::pair<bool, Index_> > > labels_to_index(nrefs);
111 for (size_t r = 0; r < nrefs; ++r) {
112 auto& current = labels_to_index[r];
113 current.resize(nlabels);
114 size_t ncols = representatives[r]->ncol();
115 auto curlab = labels[r];
116
117 for (size_t c = 0; c < ncols; ++c) {
118 auto& info = current[curlab[c]];
119 if (info.first) {
120 throw std::runtime_error("each label should correspond to no more than one column in each reference");
121 }
122 info.first = 1;
123 info.second = c;
124 }
125 }
126
127 Markers<Index_> output(nlabels);
128 for (auto& x : output) {
129 x.resize(nlabels);
130 }
131
132 int actual_number = options.number;
133 if (actual_number < 0) {
134 actual_number = number_of_classic_markers(nlabels);
135 }
136 if (actual_number > static_cast<int>(ngenes)) {
137 actual_number = ngenes;
138 }
139
140 // Generating pairs for compute; this sacrifices some memory for convenience.
141 std::vector<std::pair<Label_, Label_> > pairs;
142 {
143 std::set<std::pair<Label_, Label_> > pairs0;
144 for (size_t r = 0; r < nrefs; ++r) {
145 size_t ncols = representatives[r]->ncol();
146 auto curlab = labels[r];
147 for (size_t c1 = 0; c1 < ncols; ++c1) {
148 for (size_t c2 = 0; c2 < c1; ++c2) {
149 pairs0.emplace(curlab[c1], curlab[c2]);
150 }
151 }
152 }
153 pairs.insert(pairs.end(), pairs0.begin(), pairs0.end()); // already sorted by the std::set.
154 }
155
156 tatami::parallelize([&](int, size_t start, size_t len) {
157 std::vector<std::pair<Value_, Index_> > sorter(ngenes);
158 std::vector<Value_> rbuffer(ngenes), lbuffer(ngenes);
159 std::vector<std::shared_ptr<tatami::MyopicDenseExtractor<Value_, Index_> > > rextractors(nrefs), lextractors(nrefs);
160
161 for (size_t p = start, end = start + len; p < end; ++p) {
162 auto curleft = pairs[p].first;
163 auto curright = pairs[p].second;
164
165 for (size_t g = 0; g < ngenes; ++g) {
166 sorter[g].first = 0;
167 sorter[g].second = g;
168 }
169
170 for (size_t i = 0; i < nrefs; ++i) {
171 const auto& curavail = labels_to_index[i];
172 auto lcol = curavail[curleft];
173 auto rcol = curavail[curright];
174 if (!lcol.first || !rcol.first) {
175 continue;
176 }
177
178 // Initialize extractors as needed.
179 auto& lext = lextractors[i];
180 if (!lext) {
181 lext = representatives[i]->dense_column();
182 }
183 auto lptr = lext->fetch(lcol.second, lbuffer.data());
184
185 auto& rext = rextractors[i];
186 if (!rext) {
187 rext = representatives[i]->dense_column();
188 }
189 auto rptr = rext->fetch(rcol.second, rbuffer.data());
190
191 for (size_t g = 0; g < ngenes; ++g) {
192 sorter[g].first += lptr[g] - rptr[g];
193 }
194 }
195
196 // At flip = 0, we're looking for genes upregulated in right over left,
197 // as we're sorting on left - right in increasing order. At flip = 1,
198 // we reverse the signs to we sort on right - left.
199 for (int flip = 0; flip < 2; ++flip) {
200 if (flip) {
201 for (auto& s : sorter) {
202 s.first *= -1;
203 }
204 }
205
206 // partial sort is guaranteed to be stable due to the second index resolving ties.
207 std::partial_sort(sorter.begin(), sorter.begin() + actual_number, sorter.end());
208
209 std::vector<Index_> stuff;
210 stuff.reserve(actual_number);
211 for (int g = 0; g < actual_number && sorter[g].first < 0; ++g) { // only negative values (i.e., positive log-fold changes for our comparison).
212 stuff.push_back(sorter[g].second);
213 }
214
215 if (flip) {
216 output[curleft][curright] = std::move(stuff);
217 } else {
218 output[curright][curleft] = std::move(stuff);
219 }
220 }
221 }
222 }, pairs.size(), options.num_threads);
223
224 return output;
225}
226
230// Overload for the non-const case.
231template<typename Value_, typename Index_, typename Label_>
233 const std::vector<tatami::Matrix<Value_, Index_>*>& representatives,
234 const std::vector<const Label_*>& labels,
235 const ChooseClassicMarkersOptions& options)
236{
237 std::vector<const tatami::Matrix<Value_, Index_>*> rep2(representatives.begin(), representatives.end());
239}
265template<typename Value_, typename Index_, typename Label_>
267 return choose_classic_markers(std::vector<const tatami::Matrix<Value_, Index_>*>{ &representative }, std::vector<const Label_*>{ labels }, options);
268}
269
270}
271
272#endif
Define the Markers typedef.
Common definitions for singlepp.
Cell type classification using the SingleR algorithm in C++.
Definition classify_single.hpp:19
std::vector< std::vector< std::vector< Index_ > > > Markers
Definition Markers.hpp:40
size_t number_of_classic_markers(size_t num_labels)
Definition choose_classic_markers.hpp:33
Markers< Index_ > choose_classic_markers(const std::vector< const tatami::Matrix< Value_, Index_ > * > &representatives, const std::vector< const Label_ * > &labels, const ChooseClassicMarkersOptions &options)
Definition choose_classic_markers.hpp:79
Options for choose_classic_markers().
Definition choose_classic_markers.hpp:40
int num_threads
Definition choose_classic_markers.hpp:51
int number
Definition choose_classic_markers.hpp:45