IsoSpec 2.2.3
Loading...
Searching...
No Matches
fixedEnvelopes.cpp
1/*
2 * Copyright (C) 2015-2020 Mateusz Łącki and Michał Startek.
3 *
4 * This file is part of IsoSpec.
5 *
6 * IsoSpec is free software: you can redistribute it and/or modify
7 * it under the terms of the Simplified ("2-clause") BSD licence.
8 *
9 * IsoSpec is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 * You should have received a copy of the Simplified BSD Licence
14 * along with IsoSpec. If not, see <https://opensource.org/licenses/BSD-2-Clause>.
15 */
16
17#include "fixedEnvelopes.h"
18#include <limits>
19#include "isoMath.h"
20
21namespace IsoSpec
22{
23
24FixedEnvelope::FixedEnvelope(const FixedEnvelope& other) :
25_masses(array_copy<double>(other._masses, other._confs_no)),
26_probs(array_copy<double>(other._probs, other._confs_no)),
27_confs(array_copy_nptr<int>(other._confs, other._confs_no*other.allDim)),
28_confs_no(other._confs_no),
29allDim(other.allDim),
30sorted_by_mass(other.sorted_by_mass),
31sorted_by_prob(other.sorted_by_prob),
32total_prob(other.total_prob)
33{}
34
35FixedEnvelope::FixedEnvelope(FixedEnvelope&& other) :
36_masses(other._masses),
37_probs(other._probs),
38_confs(other._confs),
39_confs_no(other._confs_no),
40allDim(other.allDim),
41sorted_by_mass(other.sorted_by_mass),
42sorted_by_prob(other.sorted_by_prob),
43total_prob(other.total_prob)
44{
45other._masses = nullptr;
46other._probs = nullptr;
47other._confs = nullptr;
48other._confs_no = 0;
49other.total_prob = 0.0;
50}
51
52FixedEnvelope::FixedEnvelope(double* in_masses, double* in_probs, size_t in_confs_no, bool masses_sorted, bool probs_sorted, double _total_prob) :
53_masses(in_masses),
54_probs(in_probs),
55_confs(nullptr),
56_confs_no(in_confs_no),
57allDim(0),
58sorted_by_mass(masses_sorted),
59sorted_by_prob(probs_sorted),
60total_prob(_total_prob)
61{}
62
63FixedEnvelope FixedEnvelope::operator+(const FixedEnvelope& other) const
64{
65 double* nprobs = reinterpret_cast<double*>(malloc(sizeof(double) * (_confs_no+other._confs_no)));
66 if(nprobs == nullptr)
67 throw std::bad_alloc();
68 double* nmasses = reinterpret_cast<double*>(malloc(sizeof(double) * (_confs_no+other._confs_no)));
69 if(nmasses == nullptr)
70 {
71 free(nprobs);
72 throw std::bad_alloc();
73 }
74
75 memcpy(nprobs, _probs, sizeof(double) * _confs_no);
76 memcpy(nmasses, _masses, sizeof(double) * _confs_no);
77
78 memcpy(nprobs+_confs_no, other._probs, sizeof(double) * other._confs_no);
79 memcpy(nmasses+_confs_no, other._masses, sizeof(double) * other._confs_no);
80
81 return FixedEnvelope(nmasses, nprobs, _confs_no + other._confs_no);
82}
83
84FixedEnvelope FixedEnvelope::operator*(const FixedEnvelope& other) const
85{
86 double* nprobs = reinterpret_cast<double*>(malloc(sizeof(double) * _confs_no * other._confs_no));
87 if(nprobs == nullptr)
88 throw std::bad_alloc();
89 // deepcode ignore CMemoryLeak: It's not a memleak: the memory is passed to FixedEnvelope which
90 // deepcode ignore CMemoryLeak: takes ownership of it, and will properly free() it in destructor.
91 double* nmasses = reinterpret_cast<double*>(malloc(sizeof(double) * _confs_no * other._confs_no));
92 if(nmasses == nullptr)
93 {
94 free(nprobs);
95 throw std::bad_alloc();
96 }
97
98 size_t tgt_idx = 0;
99
100 for(size_t ii = 0; ii < _confs_no; ii++)
101 for(size_t jj = 0; jj < other._confs_no; jj++)
102 {
103 nprobs[tgt_idx] = _probs[ii] * other._probs[jj];
104 nmasses[tgt_idx] = _masses[ii] + other._masses[jj];
105 tgt_idx++;
106 }
107
108 return FixedEnvelope(nmasses, nprobs, tgt_idx);
109}
110
111void FixedEnvelope::sort_by_mass()
112{
113 if(sorted_by_mass)
114 return;
115
116 sort_by(_masses);
117
118 sorted_by_mass = true;
119 sorted_by_prob = false;
120}
121
122
123void FixedEnvelope::sort_by_prob()
124{
125 if(sorted_by_prob)
126 return;
127
128 sort_by(_probs);
129
130 sorted_by_prob = true;
131 sorted_by_mass = false;
132}
133
134template<typename T> void reorder_array(T* arr, size_t* order, size_t size, bool can_destroy = false)
135{
136 if(!can_destroy)
137 {
138 size_t* order_c = new size_t[size];
139 memcpy(order_c, order, sizeof(size_t)*size);
140 order = order_c;
141 }
142
143 for(size_t ii = 0; ii < size; ii++)
144 while(order[ii] != ii)
145 {
146 std::swap(arr[ii], arr[order[ii]]);
147 std::swap(order[order[ii]], order[ii]);
148 }
149
150 if(!can_destroy)
151 delete[] order;
152}
153
154void FixedEnvelope::sort_by(double* order)
155{
156 if(_confs_no <= 1)
157 return;
158
159 size_t* indices = new size_t[_confs_no];
160
161 for(size_t ii = 0; ii < _confs_no; ii++)
162 indices[ii] = ii;
163
164 std::sort<size_t*>(indices, indices + _confs_no, TableOrder<double>(order));
165
166 size_t* inverse = new size_t[_confs_no];
167
168 for(size_t ii = 0; ii < _confs_no; ii++)
169 inverse[indices[ii]] = ii;
170
171 delete[] indices;
172
173 reorder_array(_masses, inverse, _confs_no);
174 reorder_array(_probs, inverse, _confs_no, _confs == nullptr);
175 if(_confs != nullptr)
176 {
177 int* swapspace = new int[allDim];
178 for(size_t ii = 0; ii < _confs_no; ii++)
179 while(inverse[ii] != ii)
180 {
181 memcpy(swapspace, &_confs[ii*allDim], allDimSizeofInt);
182 memcpy(&_confs[ii*allDim], &_confs[inverse[ii]*allDim], allDimSizeofInt);
183 memcpy(&_confs[inverse[ii]*allDim], swapspace, allDimSizeofInt);
184 std::swap(inverse[inverse[ii]], inverse[ii]);
185 }
186 delete[] swapspace;
187 }
188 delete[] inverse;
189}
190
191
192double FixedEnvelope::get_total_prob()
193{
194 if(std::isnan(total_prob))
195 {
196 total_prob = 0.0;
197 for(size_t ii = 0; ii < _confs_no; ii++)
198 total_prob += _probs[ii];
199 }
200 return total_prob;
201}
202
203void FixedEnvelope::scale(double factor)
204{
205 for(size_t ii = 0; ii < _confs_no; ii++)
206 _probs[ii] *= factor;
207 total_prob *= factor;
208}
209
210void FixedEnvelope::normalize()
211{
212 double tp = get_total_prob();
213 if(tp != 1.0)
214 {
215 scale(1.0/tp);
216 total_prob = 1.0;
217 }
218}
219
220void FixedEnvelope::shift_mass(double value)
221{
222 for(size_t ii = 0; ii < _confs_no; ii++)
223 _masses[ii] += value;
224}
225
226void FixedEnvelope::resample(size_t samples, double beta_bias)
227{
228 if(_confs_no == 0)
229 throw std::logic_error("Resample called on an empty spectrum");
230
231 double pprob = 0.0;
232 double cprob = 0.0;
233 size_t pidx = -1; // Overflows - but it doesn't matter.
234
235 _probs[_confs_no-1] = (std::numeric_limits<double>::max)();
236
237 while(samples > 0)
238 {
239 pprob += _probs[++pidx];
240 _probs[pidx] = 0.0;
241 double covered_part = (pprob - cprob) / (1.0 - cprob);
242 while(samples * covered_part < beta_bias && samples > 0)
243 {
244 cprob += rdvariate_beta_1_b(samples) * (1.0 - cprob);
245 while(pprob < cprob)
246 {
247 pprob += _probs[++pidx];
248 _probs[pidx] = 0.0;
249 }
250 _probs[pidx] += 1.0;
251 samples--;
252 covered_part = (pprob - cprob) / (1.0 - cprob);
253 }
254 if(samples <= 0)
255 break;
256 size_t nrtaken = rdvariate_binom(samples, covered_part);
257 _probs[pidx] += static_cast<double>(nrtaken);
258 samples -= nrtaken;
259 cprob = pprob;
260 }
261
262 pidx++;
263 memset(_probs + pidx, 0, sizeof(double)*(_confs_no - pidx));
264}
265
266FixedEnvelope FixedEnvelope::LinearCombination(const std::vector<const FixedEnvelope*>& spectra, const std::vector<double>& intensities)
267{
268 return LinearCombination(spectra.data(), intensities.data(), spectra.size());
269}
270
271FixedEnvelope FixedEnvelope::LinearCombination(const FixedEnvelope* const * spectra, const double* intensities, size_t size)
272{
273 size_t ret_size = 0;
274 for(size_t ii = 0; ii < size; ii++)
275 ret_size += spectra[ii]->_confs_no;
276
277 double* newprobs = reinterpret_cast<double*>(malloc(sizeof(double)*ret_size));
278 if(newprobs == nullptr)
279 throw std::bad_alloc();
280 double* newmasses = reinterpret_cast<double*>(malloc(sizeof(double)*ret_size));
281 if(newmasses == nullptr)
282 {
283 free(newprobs);
284 throw std::bad_alloc();
285 }
286
287 size_t cntr = 0;
288 for(size_t ii = 0; ii < size; ii++)
289 {
290 double mul = intensities[ii];
291 for(size_t jj = 0; jj < spectra[ii]->_confs_no; jj++)
292 newprobs[jj+cntr] = spectra[ii]->_probs[jj] * mul;
293 memcpy(newmasses + cntr, spectra[ii]->_masses, sizeof(double) * spectra[ii]->_confs_no);
294 cntr += spectra[ii]->_confs_no;
295 }
296 return FixedEnvelope(newmasses, newprobs, cntr);
297}
298
299double FixedEnvelope::WassersteinDistance(FixedEnvelope& other)
300{
301 double ret = 0.0;
302 if((get_total_prob()*0.999 > other.get_total_prob()) || (other.get_total_prob() > get_total_prob()*1.001))
303 throw std::logic_error("Spectra must be normalized before computing Wasserstein Distance");
304
305 if(_confs_no == 0 || other._confs_no == 0)
306 return 0.0;
307
308 sort_by_mass();
309 other.sort_by_mass();
310
311 size_t idx_this = 0;
312 size_t idx_other = 0;
313
314 double acc_prob = 0.0;
315 double last_point = 0.0;
316
317
318 while(idx_this < _confs_no && idx_other < other._confs_no)
319 {
320 if(_masses[idx_this] < other._masses[idx_other])
321 {
322 ret += (_masses[idx_this] - last_point) * std::abs(acc_prob);
323 acc_prob += _probs[idx_this];
324 last_point = _masses[idx_this];
325 idx_this++;
326 }
327 else
328 {
329 ret += (other._masses[idx_other] - last_point) * std::abs(acc_prob);
330 acc_prob -= other._probs[idx_other];
331 last_point = other._masses[idx_other];
332 idx_other++;
333 }
334 }
335
336 acc_prob = std::abs(acc_prob);
337
338 while(idx_this < _confs_no)
339 {
340 ret += (_masses[idx_this] - last_point) * acc_prob;
341 acc_prob -= _probs[idx_this];
342 last_point = _masses[idx_this];
343 idx_this++;
344 }
345
346 while(idx_other < other._confs_no)
347 {
348 ret += (other._masses[idx_other] - last_point) * acc_prob;
349 acc_prob -= other._probs[idx_other];
350 last_point = other._masses[idx_other];
351 idx_other++;
352 }
353
354 return ret;
355}
356
357
358double FixedEnvelope::OrientedWassersteinDistance(FixedEnvelope& other)
359{
360 double ret = 0.0;
361 if((get_total_prob()*0.999 > other.get_total_prob()) || (other.get_total_prob() > get_total_prob()*1.001))
362 throw std::logic_error("Spectra must be normalized before computing Wasserstein Distance");
363
364 if(_confs_no == 0 || other._confs_no == 0)
365 return 0.0;
366
367 sort_by_mass();
368 other.sort_by_mass();
369
370 size_t idx_this = 0;
371 size_t idx_other = 0;
372
373 double acc_prob = 0.0;
374 double last_point = 0.0;
375
376
377 while(idx_this < _confs_no && idx_other < other._confs_no)
378 {
379 if(_masses[idx_this] < other._masses[idx_other])
380 {
381 ret += (_masses[idx_this] - last_point) * acc_prob;
382 acc_prob += _probs[idx_this];
383 last_point = _masses[idx_this];
384 idx_this++;
385 }
386 else
387 {
388 ret += (other._masses[idx_other] - last_point) * acc_prob;
389 acc_prob -= other._probs[idx_other];
390 last_point = other._masses[idx_other];
391 idx_other++;
392 }
393 }
394
395 while(idx_this < _confs_no)
396 {
397 ret += (_masses[idx_this] - last_point) * acc_prob;
398 acc_prob -= _probs[idx_this];
399 last_point = _masses[idx_this];
400 idx_this++;
401 }
402
403 while(idx_other < other._confs_no)
404 {
405 ret += (other._masses[idx_other] - last_point) * acc_prob;
406 acc_prob -= other._probs[idx_other];
407 last_point = other._masses[idx_other];
408 idx_other++;
409 }
410
411 return ret;
412}
413
414double FixedEnvelope::AbyssalWassersteinDistance(FixedEnvelope& other, double abyss_depth, double other_scale)
415{
416 sort_by_mass();
417 other.sort_by_mass();
418
419 std::vector<std::pair<double, double>> carried;
420
421 size_t idx_this = 0;
422 size_t idx_other = 0;
423
424 //std::cout << "AAA" << std::endl;
425
426 auto finished = [&]() -> bool { return idx_this >= _confs_no && idx_other >= other._confs_no; };
427 auto next = [&]() -> std::pair<double, double> {
428 if(idx_this >= _confs_no || (idx_other < other._confs_no && _masses[idx_this] > other._masses[idx_other]))
429 {
430 std::pair<double, double> res = std::pair<double, double>(other._masses[idx_other], other._probs[idx_other]*other_scale);
431 idx_other++;
432 return res;
433 }
434 else
435 {
436 std::pair<double, double> res = std::pair<double, double>(_masses[idx_this], -_probs[idx_this]);
437 idx_this++;
438 return res;
439 }
440 };
441 double accd = 0.0;
442 double condemned = 0.0;
443
444 while(!finished())
445 {
446 auto pair = next();
447 double m = pair.first;
448 double p = pair.second;
449 if(!carried.empty() && carried[0].second * p > 0.0)
450 {
451 carried.emplace_back(m, p);
452 continue;
453 }
454
455 while(!carried.empty())
456 {
457 double cm = carried.back().first;
458 double cp = carried.back().second;
459 if(m - cm >= abyss_depth)
460 {
461 for(auto it = carried.cbegin(); it != carried.cend(); it++)
462 condemned += fabs(it->second);
463 carried.clear();
464 break;
465 }
466 if((cp+p)*p > 0.0)
467 {
468 accd += fabs((m-cm)*cp);
469 p += cp;
470 carried.pop_back();
471 }
472 else
473 {
474 accd += fabs((m-cm)*p);
475 cp += p;
476 p = 0.0;
477 break;
478 }
479 }
480 if(p != 0.0)
481 carried.emplace_back(m, p);
482 //std::cout << m << " " << p << std::endl;
483 }
484
485 for(auto it = carried.cbegin(); it != carried.cend(); it++)
486 condemned += fabs(it->second);
487
488 return accd + condemned * abyss_depth * 0.5;
489}
490
491#if 0
492double FixedEnvelope::ScaledAbyssalWassersteinDistance(FixedEnvelope * const * others, double abyss_depth, const double* other_scales, const size_t N)
493{
494 sort_by_mass();
495
496 std::priority_queue<std::pair<double, size_t>> PQ;
497 std::unique_ptr<size_t[]> indices = std::make_unique<size_t[]>(N);
498 memset(indices.get(), 0, sizeof(size_t)*N);
499
500 for(size_t ii=0; ii<N; ii++)
501 {
502 others[ii]->sort_by_mass();
503 if(others[ii]->_confs_no > 0)
504 PQ.emplace_back({others._probs[0], ii});
505 }
506
507
508
509
510 std::vector<std::pair<double, double>> carried;
511
512 size_t idx_this = 0;
513 size_t idx_other = 0;
514
515 //std::cout << "AAA" << std::endl;
516
517 auto finished = [&]() -> bool { return idx_this >= _confs_no && PQ.empty(); };
518 auto next = [&]() -> std::tuple<double, double, size_t> {
519 if(idx_this >= _confs_no || (idx_other < other._confs_no && _masses[idx_this] > other._masses[idx_other]))
520 {
521 std::pair<double, double> res = std::pair<double, double>(other._masses[idx_other], other._probs[idx_other]*other_scale);
522 idx_other++;
523 return res;
524 }
525 else
526 {
527 std::pair<double, double> res = std::pair<double, double>(_masses[idx_this], -_probs[idx_this]);
528 idx_this++;
529 return res;
530 }
531 };
532 double accd = 0.0;
533 double condemned = 0.0;
534
535 while(!finished())
536 {
537 auto [m, p] = next();
538 if(!carried.empty() && carried[0].second * p > 0.0)
539 {
540 carried.emplace_back(m, p);
541 continue;
542 }
543
544 while(!carried.empty())
545 {
546 auto& [cm, cp] = carried.back();
547 if(m - cm >= abyss_depth)
548 {
549 for(auto it = carried.cbegin(); it != carried.cend(); it++)
550 condemned += fabs(it->second);
551 carried.clear();
552 break;
553 }
554 if((cp+p)*p > 0.0)
555 {
556 accd += fabs((m-cm)*cp);
557 p += cp;
558 carried.pop_back();
559 }
560 else
561 {
562 accd += fabs((m-cm)*p);
563 cp += p;
564 p = 0.0;
565 break;
566 }
567 }
568 if(p != 0.0)
569 carried.emplace_back(m, p);
570 //std::cout << m << " " << p << std::endl;
571 }
572
573 for(auto it = carried.cbegin(); it != carried.cend(); it++)
574 condemned += fabs(it->second);
575
576 return accd + condemned * abyss_depth * 0.5;
577}
578
579#endif
580
581#if 0
582double AbyssalWassersteinDistanceGrad(FixedEnvelope* const* envelopes, const double* scales, double* ret_gradient, size_t N, double abyss_depth_exp, double abyss_depth_the)
583{
584return 0.0;
585 std::unique_ptr<size_t[]> env_idx = std::make_unique<size_t[]>(N+1);
586 memset(env_idx.get(), 0, (N+1)*sizeof(size_t));
587 memset(ret_gradient, 0, (N+1)*sizeof(double));
588
589 auto pq_cmp = [](std::pair<double, size_t>& p1, std::pair<double, size_t>& p2) { return p1.first > p2.first; };
590 std::priority_queue<std::pair<double, size_t>, std::vector<std::pair<double, size_t>>, decltype(pq_cmp)> PQ(pq_cmp);
591
592 for(size_t ii=0; ii<=N; ii++)
593 {
594 envelopes[ii]->sort_by_mass();
595 if(envelopes[ii]->_confs_no > 0)
596 {
597 std::cout << "Initial push: " << envelopes[ii]->_masses[0] << " " << ii << '\n';
598 PQ.push({envelopes[ii]->_masses[0], ii});
599 }
600 }
601
602 std::vector<std::tuple<double, double, size_t>> carried;
603
604 auto next = [&]() -> std::tuple<double, double, size_t> {
605 auto [mass, eidx] = PQ.top();
606 double prob = envelopes[eidx]->_probs[env_idx[eidx]];
607 PQ.pop();
608 if(eidx == 0)
609 prob = -prob;
610 else
611 prob = prob * scales[eidx];
612 std::cout << "Next: " << mass << " " << prob << " " << eidx << '\n';
613 env_idx[eidx]++;
614 if(env_idx[eidx] < envelopes[eidx]->_confs_no)
615 PQ.push({envelopes[eidx]->_masses[env_idx[eidx]], eidx});
616
617 return {mass, prob, eidx};
618 };
619 double accd = 0.0;
620 double condemned = 0.0;
621 //double flow;
622 const double max_flow_dist = abyss_depth_exp + abyss_depth_the;
623 max_flow_dist *= 2.0;
624
625 while(!PQ.empty())
626 {
627 auto [m, p, eidx] = next();
628 if(!carried.empty() && std::get<1>(carried[0]) * p > 0.0)
629 {
630 carried.emplace_back(m, p, eidx);
631 continue;
632 }
633
634 while(!carried.empty())
635 {
636 auto& [cm, cp, ceidx] = carried.back();
637 if(m - cm >= max_flow_dist)
638 {
639 for(auto it = carried.cbegin(); it != carried.cend(); it++)
640 condemned += fabs(std::get<1>(*it));
641 carried.clear();
642 break;
643 }
644 std::cout << "accing\n";
645 if((cp+p)*p > 0.0)
646 {
647 accd += fabs((m-cm)*cp);
648 p += cp;
649 carried.pop_back();
650 std::cout << "cprob:" << cp << '\n';
651 }
652 else
653 {
654 accd += fabs((m-cm)*p);
655 cp += p;
656 std::cout << "prob:" << p << '\n';
657 p = 0.0;
658 break;
659 }
660 }
661 if(p != 0.0)
662 carried.emplace_back(m, p, eidx);
663 //std::cout << m << " " << p << std::endl;
664 }
665
666 for(auto it = carried.cbegin(); it != carried.cend(); it++)
667 condemned += fabs(std::get<1>(*it));
668
669 std::cout << "ISO:" << accd << " " << condemned << '\n';
670 return accd + condemned * max_flow_dist * 0.5;
671 while(!PQ.empty())
672 {
673 auto [m, p, eidx] = next();
674 if(!carried.empty() && (std::get<1>(carried[0]) * p > 0.0))
675 {
676 carried.emplace_back(m, p, eidx);
677 continue;
678 }
679
680 while(!carried.empty())
681 {
682 auto& [cm, cp, ceidx] = carried.back();
683 if(m - cm >= max_flow_dist)
684 {
685 for(auto it = carried.cbegin(); it != carried.cend(); it++)
686 {
687 flow = fabs(std::get<1>(*it));
688 const size_t target_idx = std::get<2>(*it);
689 flow *= target_idx == 0 ? abyss_depth_exp : abyss_depth_the;
690 ret_gradient[target_idx] += flow;
691 condemned += flow;
692 std::cout << "condemnin': " << m << " " << p << " " << eidx << " | " << cm << " " << cp << " " << ceidx << '\n';
693 }
694 carried.clear();
695 break;
696 }
697 if((cp+p)*p > 0.0)
698 {
699 flow = fabs((m-cm)*cp);
700 accd += flow;
701 p += cp;
702 ret_gradient[ceidx] += flow;
703 carried.pop_back();
704 }
705 else
706 {
707 flow = fabs((m-cm)*p);
708 accd += flow;
709 cp += p;
710 ret_gradient[eidx] += flow;
711 p = 0.0;
712 break;
713 }
714 }
715 if(p != 0.0)
716 carried.emplace_back(m, p, eidx);
717 //std::cout << m << " " << p << std::endl;
718 }
719
720 for(auto it = carried.cbegin(); it != carried.cend(); it++)
721 condemned += fabs(std::get<1>(*it));
722
723
724 return accd + condemned * (abyss_depth_exp + abyss_depth_the) * 0.5;
725}
726#endif
727
728
729std::tuple<double, double, double> FixedEnvelope::WassersteinMatch(FixedEnvelope& other, double flow_distance, double other_scale)
730{
731 if(_confs_no == 0)
732 return {0.0, other.get_total_prob() * other_scale, 0.0};
733
734 double unmatched1 = 0.0;
735 double unmatched2 = 0.0;
736 double massflow = 0.0;
737
738 sort_by_mass();
739 other.sort_by_mass();
740
741 size_t idx_this = 0;
742 size_t idx_other = 0;
743 double used_prob_this = 0.0;
744 double used_prob_other = 0.0;
745
746 while(idx_this < _confs_no && idx_other < other._confs_no)
747 {
748 bool moved = true;
749 while(moved && idx_this < _confs_no && idx_other < other._confs_no)
750 {
751 moved = false;
752 if(_masses[idx_this] < other._masses[idx_other] - flow_distance)
753 {
754 unmatched1 += _probs[idx_this] - used_prob_this;
755 used_prob_this = 0.0;
756 idx_this++;
757 moved = true;
758 }
759 if(other._masses[idx_other] < _masses[idx_this] - flow_distance)
760 {
761 unmatched2 += other._probs[idx_other]*other_scale - used_prob_other;
762 used_prob_other = 0.0;
763 idx_other++;
764 moved = true;
765 }
766 }
767 if(idx_this < _confs_no && idx_other < other._confs_no)
768 {
769 assert(_probs[idx_this] - used_prob_this >= 0.0);
770 assert(other._probs[idx_other]*other_scale - used_prob_other >= 0.0);
771
772 if(_probs[idx_this] - used_prob_this < other._probs[idx_other]*other_scale - used_prob_other)
773 {
774 massflow += _probs[idx_this] - used_prob_this;
775 used_prob_other += _probs[idx_this] - used_prob_this;
776 assert(used_prob_other >= 0.0);
777 used_prob_this = 0.0;
778 idx_this++;
779 }
780 else
781 {
782 massflow += other._probs[idx_other]*other_scale - used_prob_other;
783 used_prob_this += other._probs[idx_other]*other_scale - used_prob_other;
784 assert(used_prob_this >= 0.0);
785 used_prob_other = 0.0;
786 idx_other++;
787 }
788 }
789 }
790
791 unmatched1 -= used_prob_this;
792 unmatched2 -= used_prob_other;
793
794 for(; idx_this < _confs_no; idx_this++)
795 unmatched1 += _probs[idx_this];
796 for(; idx_other < other._confs_no; idx_other++)
797 unmatched2 += other._probs[idx_other]*other_scale;
798
799 return {unmatched1, unmatched2, massflow};
800}
801
802FixedEnvelope FixedEnvelope::bin(double bin_width, double middle)
803{
804 sort_by_mass();
805
806 FixedEnvelope ret;
807
808 if(_confs_no == 0)
809 return ret;
810
811 ret.reallocate_memory<false>(ISOSPEC_INIT_TABLE_SIZE);
812
813 if(bin_width == 0)
814 {
815 double curr_mass = _masses[0];
816 double accd_prob = _probs[0];
817 for(size_t ii = 1; ii<_confs_no; ii++)
818 {
819 if(curr_mass != _masses[ii])
820 {
821 ret.store_conf(curr_mass, accd_prob);
822 curr_mass = _masses[ii];
823 accd_prob = _probs[ii];
824 }
825 else
826 accd_prob += _probs[ii];
827 }
828 ret.store_conf(curr_mass, accd_prob);
829 return ret;
830 }
831
832 size_t ii = 0;
833
834 double half_width = 0.5*bin_width;
835 double hwmm = half_width-middle;
836
837 while(ii < _confs_no)
838 {
839 double current_bin_middle = floor((_masses[ii]+hwmm)/bin_width)*bin_width + middle;
840 double current_bin_end = current_bin_middle + half_width;
841 double bin_prob = 0.0;
842
843 while(ii < _confs_no && _masses[ii] <= current_bin_end)
844 {
845 bin_prob += _probs[ii];
846 ii++;
847 }
848 ret.store_conf(current_bin_middle, bin_prob);
849 }
850
851 return ret;
852}
853
854template<bool tgetConfs> void FixedEnvelope::reallocate_memory(size_t new_size)
855{
856 current_size = new_size;
857 // FIXME: Handle overflow gracefully here. It definitely could happen for people still stuck on 32 bits...
858 _masses = reinterpret_cast<double*>(realloc(_masses, new_size * sizeof(double)));
859 if(_masses == nullptr)
860 throw std::bad_alloc();
861 tmasses = _masses + _confs_no;
862
863 _probs = reinterpret_cast<double*>(realloc(_probs, new_size * sizeof(double)));
864 if(_probs == nullptr)
865 throw std::bad_alloc();
866 tprobs = _probs + _confs_no;
867
868 constexpr_if(tgetConfs)
869 {
870 _confs = reinterpret_cast<int*>(realloc(_confs, new_size * allDimSizeofInt));
871 if(_confs == nullptr)
872 throw std::bad_alloc();
873 tconfs = _confs + (allDim * _confs_no);
874 }
875}
876
877void FixedEnvelope::slow_reallocate_memory(size_t new_size)
878{
879 current_size = new_size;
880 // FIXME: Handle overflow gracefully here. It definitely could happen for people still stuck on 32 bits...
881 _masses = reinterpret_cast<double*>(realloc(_masses, new_size * sizeof(double)));
882 if(_masses == nullptr)
883 throw std::bad_alloc();
884 tmasses = _masses + _confs_no;
885
886 _probs = reinterpret_cast<double*>(realloc(_probs, new_size * sizeof(double)));
887 if(_probs == nullptr)
888 throw std::bad_alloc();
889 tprobs = _probs + _confs_no;
890
891 if(_confs != nullptr)
892 {
893 _confs = reinterpret_cast<int*>(realloc(_confs, new_size * allDimSizeofInt));
894 if(_confs == nullptr)
895 throw std::bad_alloc();
896 tconfs = _confs + (allDim * _confs_no);
897 }
898}
899
900template<bool tgetConfs> void FixedEnvelope::threshold_init(Iso&& iso, double threshold, bool absolute)
901{
902 IsoThresholdGenerator generator(std::move(iso), threshold, absolute);
903
904 size_t tab_size = generator.count_confs();
905 this->allDim = generator.getAllDim();
906 this->allDimSizeofInt = this->allDim * sizeof(int);
907
908 this->reallocate_memory<tgetConfs>(tab_size);
909
910 double* ttmasses = this->_masses;
911 double* ttprobs = this->_probs;
912 ISOSPEC_MAYBE_UNUSED int* ttconfs;
913 constexpr_if(tgetConfs)
914 ttconfs = _confs;
915
916 while(generator.advanceToNextConfiguration())
917 {
918 *ttmasses = generator.mass(); ttmasses++;
919 *ttprobs = generator.prob(); ttprobs++;
920 constexpr_if(tgetConfs) { generator.get_conf_signature(ttconfs); ttconfs += allDim; }
921 }
922
923 this->_confs_no = tab_size;
924}
925
926template void FixedEnvelope::threshold_init<true>(Iso&& iso, double threshold, bool absolute);
927template void FixedEnvelope::threshold_init<false>(Iso&& iso, double threshold, bool absolute);
928
929
930template<bool tgetConfs> void FixedEnvelope::total_prob_init(Iso&& iso, double target_total_prob, bool optimize)
931{
932 if(target_total_prob <= 0.0)
933 return;
934
935 if(target_total_prob >= 1.0)
936 {
937 threshold_init<tgetConfs>(std::move(iso), 0.0, true);
938 return;
939 }
940
941 IsoLayeredGenerator generator(std::move(iso), 1000, 1000, true, std::min<double>(target_total_prob, 0.9999));
942
943 this->allDim = generator.getAllDim();
944 this->allDimSizeofInt = this->allDim*sizeof(int);
945
946
947 this->reallocate_memory<tgetConfs>(ISOSPEC_INIT_TABLE_SIZE);
948
949 size_t last_switch = 0;
950 double prob_at_last_switch = 0.0;
951 double prob_so_far = 0.0;
952 double layer_delta;
953
954 const double sum_above = log1p(-target_total_prob) - 2.3025850929940455; // log(0.1);
955
956 do
957 { // Store confs until we accumulate more prob than needed - and, if optimizing,
958 // store also the rest of the last layer
959 while(generator.advanceToNextConfigurationWithinLayer())
960 {
961 this->template addConfILG<tgetConfs>(generator);
962 prob_so_far += *(tprobs-1); // The just-stored probability
963 if(prob_so_far >= target_total_prob)
964 {
965 if (optimize)
966 {
967 while(generator.advanceToNextConfigurationWithinLayer())
968 this->template addConfILG<tgetConfs>(generator);
969 break;
970 }
971 else
972 return;
973 }
974 }
975 if(prob_so_far >= target_total_prob)
976 break;
977
978 last_switch = this->_confs_no;
979 prob_at_last_switch = prob_so_far;
980
981 layer_delta = sum_above - log1p(-prob_so_far);
982 layer_delta = (std::max)((std::min)(layer_delta, -0.1), -5.0);
983 } while(generator.nextLayer(layer_delta));
984
985 if(!optimize || prob_so_far <= target_total_prob)
986 return;
987
988 // Right. We have extra configurations and we have been asked to produce an optimal p-set, so
989 // now we shall trim unneeded configurations, using an algorithm dubbed "quicktrim"
990 // - similar to the quickselect algorithm, except that we use the cumulative sum of elements
991 // left of pivot to decide whether to go left or right, instead of the positional index.
992 // We'll be sorting by the prob array, permuting the other ones in parallel.
993
994 int* conf_swapspace = nullptr;
995 constexpr_if(tgetConfs)
996 conf_swapspace = reinterpret_cast<int*>(malloc(this->allDimSizeofInt));
997
998 size_t start = last_switch;
999 size_t end = this->_confs_no;
1000 double sum_to_start = prob_at_last_switch;
1001
1002 while(start < end)
1003 {
1004 // Partition part
1005 size_t len = end - start;
1006#if ISOSPEC_BUILDING_R
1007 size_t pivot = len/2 + start;
1008#else
1009 size_t pivot = random_gen() % len + start; // Using Mersenne twister directly - we don't
1010 // need a very uniform distribution just for pivot
1011 // selection
1012#endif
1013 double pprob = this->_probs[pivot];
1014 swap<tgetConfs>(pivot, end-1, conf_swapspace);
1015
1016 double new_csum = sum_to_start;
1017
1018 size_t loweridx = start;
1019 for(size_t ii = start; ii < end-1; ii++)
1020 if(this->_probs[ii] > pprob)
1021 {
1022 swap<tgetConfs>(ii, loweridx, conf_swapspace);
1023 new_csum += this->_probs[loweridx];
1024 loweridx++;
1025 }
1026
1027 swap<tgetConfs>(end-1, loweridx, conf_swapspace);
1028
1029 // Selection part
1030 if(new_csum < target_total_prob)
1031 {
1032 start = loweridx + 1;
1033 sum_to_start = new_csum + this->_probs[loweridx];
1034 }
1035 else
1036 end = loweridx;
1037 }
1038
1039 constexpr_if(tgetConfs)
1040 free(conf_swapspace);
1041
1042 if(end <= current_size/2)
1043 // Overhead in memory of 2x or more, shrink to fit
1044 this->template reallocate_memory<tgetConfs>(end);
1045
1046 this->_confs_no = end;
1047}
1048
1049template void FixedEnvelope::total_prob_init<true>(Iso&& iso, double target_total_prob, bool optimize);
1050template void FixedEnvelope::total_prob_init<false>(Iso&& iso, double target_total_prob, bool optimize);
1051
1052template<bool tgetConfs> void FixedEnvelope::stochastic_init(Iso&& iso, size_t _no_molecules, double _precision, double _beta_bias)
1053{
1054 IsoStochasticGenerator generator(std::move(iso), _no_molecules, _precision, _beta_bias);
1055
1056 this->allDim = generator.getAllDim();
1057 this->allDimSizeofInt = this->allDim * sizeof(int);
1058
1059 this->reallocate_memory<tgetConfs>(ISOSPEC_INIT_TABLE_SIZE);
1060
1061 while(generator.advanceToNextConfiguration())
1062 addConfILG<tgetConfs, IsoStochasticGenerator>(generator);
1063}
1064
1065template void FixedEnvelope::stochastic_init<true>(Iso&& iso, size_t _no_molecules, double _precision, double _beta_bias);
1066template void FixedEnvelope::stochastic_init<false>(Iso&& iso, size_t _no_molecules, double _precision, double _beta_bias);
1067
1068double FixedEnvelope::empiric_average_mass()
1069{
1070 double ret = 0.0;
1071 for(size_t ii = 0; ii < _confs_no; ii++)
1072 {
1073 ret += _masses[ii] * _probs[ii];
1074 }
1075 return ret / get_total_prob();
1076}
1077
1078double FixedEnvelope::empiric_variance()
1079{
1080 double ret = 0.0;
1081 double avg = empiric_average_mass();
1082 for(size_t ii = 0; ii < _confs_no; ii++)
1083 {
1084 double msq = _masses[ii] - avg;
1085 ret += msq * msq * _probs[ii];
1086 }
1087
1088 return ret / get_total_prob();
1089}
1090
1091FixedEnvelope FixedEnvelope::Binned(Iso&& iso, double target_total_prob, double bin_width, double bin_middle)
1092{
1093 FixedEnvelope ret;
1094
1095 double min_mass = iso.getLightestPeakMass();
1096 double range_len = iso.getHeaviestPeakMass() - min_mass;
1097 size_t no_bins = static_cast<size_t>(range_len / bin_width) + 2;
1098 double half_width = 0.5*bin_width;
1099 double hwmm = half_width-bin_middle;
1100 size_t idx_min = floor((min_mass + hwmm) / bin_width);
1101 size_t idx_max = idx_min + no_bins;
1102
1103 double* acc;
1104# if ISOSPEC_GOT_MMAN
1105 acc = reinterpret_cast<double*>(mmap(nullptr, sizeof(double)*no_bins, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
1106#else
1107 // This will probably crash for large molecules and high resolutions...
1108 acc = reinterpret_cast<double*>(calloc(no_bins, sizeof(double)));
1109#endif
1110 if(acc == NULL)
1111 throw std::bad_alloc();
1112
1113 acc -= idx_min;
1114
1115 IsoLayeredGenerator ITG(std::move(iso));
1116
1117
1118 bool non_empty;
1119 while((non_empty = ITG.advanceToNextConfiguration()) && ITG.prob() == 0.0)
1120 {}
1121
1122 if(non_empty)
1123 {
1124 double accum_prob = ITG.prob();
1125 size_t nonzero_idx = floor((ITG.mass() + hwmm)/bin_width);
1126 acc[nonzero_idx] = accum_prob;
1127
1128 while(ITG.advanceToNextConfiguration() && accum_prob < target_total_prob)
1129 {
1130 double prob = ITG.prob();
1131 accum_prob += prob;
1132 size_t bin_idx = floor((ITG.mass() + hwmm)/bin_width);
1133 acc[bin_idx] += prob;
1134 }
1135
1136 // Making the assumption that there won't be gaps of more than 10 Da in the spectrum. This is true for all
1137 // molecules made of natural elements.
1138 size_t distance_10da = static_cast<size_t>(10.0/bin_width) + 1;
1139
1140 size_t empty_steps = 0;
1141
1142 ret.reallocate_memory<false>(ISOSPEC_INIT_TABLE_SIZE);
1143
1144 for(size_t ii = nonzero_idx; ii >= idx_min && empty_steps < distance_10da; ii--)
1145 {
1146 if(acc[ii] > 0.0)
1147 {
1148 empty_steps = 0;
1149 ret.store_conf(static_cast<double>(ii)*bin_width + bin_middle, acc[ii]);
1150 }
1151 else
1152 empty_steps++;
1153 }
1154
1155 empty_steps = 0;
1156 for(size_t ii = nonzero_idx+1; ii < idx_max && empty_steps < distance_10da; ii++)
1157 {
1158 if(acc[ii] > 0.0)
1159 {
1160 empty_steps = 0;
1161 ret.store_conf(static_cast<double>(ii)*bin_width + bin_middle, acc[ii]);
1162 }
1163 else
1164 empty_steps++;
1165 }
1166 }
1167
1168 acc += idx_min;
1169
1170# if ISOSPEC_GOT_MMAN
1171 munmap(acc, sizeof(double)*no_bins);
1172#else
1173 free(acc);
1174#endif
1175
1176 return ret;
1177}
1178
1179} // namespace IsoSpec