singlepp
A C++ library for cell type classification
Loading...
Searching...
No Matches
train_single.hpp
Go to the documentation of this file.
1#ifndef SINGLEPP_TRAIN_SINGLE_HPP
2#define SINGLEPP_TRAIN_SINGLE_HPP
3
4#include "defs.hpp"
5
6#include "tatami/tatami.hpp"
7
8#include "build_reference.hpp"
9#include "subset_to_markers.hpp"
10#include "utils.hpp"
11
12#include <vector>
13#include <memory>
14#include <cstddef>
15#include <cassert>
16
22namespace singlepp {
23
32 int num_threads = 1;
33};
34
38template<typename Index_, typename Float_>
39std::size_t get_num_labels_from_built(const BuiltReference<Index_, Float_>& built) {
40 if (built.sparse.has_value()) {
41 return built.sparse->size();
42 } else {
43 return built.dense->size();
44 }
45}
46
47template<typename Index_, typename Float_>
48std::size_t get_num_profiles_from_built(const BuiltReference<Index_, Float_>& built) {
49 std::size_t n = 0;
50 if (built.sparse.has_value()) {
51 for (const auto& ref : *(built.sparse)) {
52 n += get_num_samples(ref);
53 }
54 } else {
55 for (const auto& ref : *(built->dense)) {
56 n += get_num_samples(ref);
57 }
58 }
59 return n;
60}
74template<typename Index_, typename Float_>
76public:
81 Index_ test_nrow,
83 std::vector<Index_> subset,
84 BuiltReference<Index_, Float_> built
85 ) :
86 my_test_nrow(test_nrow),
87 my_markers(std::move(markers)),
88 my_subset(std::move(subset)),
89 my_built(std::move(built))
90 {
91 assert(is_sorted_unique(subset.size(), subset.data()));
92
93 const auto nlabels = my_built.dense.has_value() ? my_built.dense->size() : my_built.sparse->size();
94 if (!sanisizer::is_equal(my_markers.size(), nlabels)) {
95 throw std::runtime_error("'markers' length should be equal to the number of unique labels");
96 }
97 for (const auto& mm : my_markers) {
98 if (!sanisizer::is_equal(mm.size(), nlabels)) {
99 throw std::runtime_error("length of each entry of 'markers' should be equal to the number of unique labels");
100 }
101 }
102 }
107private:
108 Index_ my_test_nrow;
109 PairwiseMarkers<Index_> my_markers;
110 std::vector<Index_> my_subset;
111 BuiltReference<Index_, Float_> my_built;
112
113public:
117 Index_ test_nrow() const {
118 return my_test_nrow;
119 }
120
128 return my_markers;
129 }
130
136 const std::vector<Index_>& subset() const {
137 return my_subset;
138 }
139
143 std::size_t num_labels() const {
144 return get_num_labels_from_built(my_built);
145 }
146
150 std::size_t num_profiles() const {
151 return get_num_profiles_from_built(my_built);
152 }
153
157 const auto& built() const {
158 return my_built;
159 }
163};
164
191template<typename Float_ = double, typename Value_, typename Index_, typename Label_>
193 const tatami::Matrix<Value_, Index_>& ref,
194 const Label_* labels,
196 const TrainSingleOptions& options
197) {
198 auto subset = subset_to_markers(ref.nrow(), markers);
199 auto subref = build_reference<Float_>(ref, labels, subset, options.num_threads);
200 const Index_ test_nrow = ref.nrow(); // remember, test and ref are assumed to have the same features.
201 return TrainedSingle<Index_, Float_>(test_nrow, std::move(markers), std::move(subset), std::move(subref));
202}
203
237template<typename Float_ = double, typename Index_, typename Value_, typename Label_>
239 Index_ test_nrow,
240 const Intersection<Index_>& intersection,
241 const tatami::Matrix<Value_, Index_>& ref,
242 const Label_* labels,
244 std::vector<Index_>* ref_subset,
245 const TrainSingleOptions& options
246) {
247 auto pairs = subset_to_markers(test_nrow, intersection, ref.nrow(), markers);
248 auto subref = build_reference<Float_>(ref, labels, pairs.second, options.num_threads);
249 if (ref_subset) {
250 *ref_subset = std::move(pairs.second);
251 }
252 return TrainedSingle<Index_, Float_>(test_nrow, std::move(markers), std::move(pairs.first), std::move(subref));
253}
254
290template<typename Float_ = double, typename Index_, typename Id_, typename Value_, typename Label_>
292 Index_ test_nrow,
293 const Id_* test_id,
294 const tatami::Matrix<Value_, Index_>& ref,
295 const Id_* ref_id,
296 const Label_* labels,
298 std::vector<Index_>* ref_subset,
299 const TrainSingleOptions& options
300) {
301 auto intersection = intersect_genes(test_nrow, test_id, ref.nrow(), ref_id);
302 return train_single(test_nrow, intersection, ref, labels, std::move(markers), ref_subset, options);
303}
304
305}
306
307#endif
Classifier trained from a single reference.
Definition train_single.hpp:75
const std::vector< Index_ > & subset() const
Definition train_single.hpp:136
Index_ test_nrow() const
Definition train_single.hpp:117
const PairwiseMarkers< Index_ > & markers() const
Definition train_single.hpp:127
std::size_t num_labels() const
Definition train_single.hpp:143
std::size_t num_profiles() const
Definition train_single.hpp:150
Common definitions for singlepp.
Cell type classification using the SingleR algorithm in C++.
Definition classify_single.hpp:20
std::vector< std::vector< std::vector< Index_ > > > PairwiseMarkers
Definition Markers.hpp:39
TrainedSingle< Index_, Float_ > train_single(const tatami::Matrix< Value_, Index_ > &ref, const Label_ *labels, PairwiseMarkers< Index_ > markers, const TrainSingleOptions &options)
Definition train_single.hpp:192
Intersection< Index_ > intersect_genes(Index_ test_nrow, const Id_ *test_id, Index_ ref_nrow, const Id_ *ref_id)
Definition Intersection.hpp:54
std::vector< std::pair< Index_, Index_ > > Intersection
Definition Intersection.hpp:35
Options for train_single() and friends.
Definition train_single.hpp:27
int num_threads
Definition train_single.hpp:32