12 #ifndef __RD_SPARSE_INT_VECT_20070921__
13 #define __RD_SPARSE_INT_VECT_20070921__
27 template <
typename IndexType>
39 d_length = other.d_length;
41 d_data.insert(other.d_data.begin(), other.d_data.end());
46 initFromText(pkl.c_str(), pkl.size());
50 initFromText(pkl, len);
57 d_length = other.d_length;
59 d_data.insert(other.d_data.begin(), other.d_data.end());
67 #pragma clang diagnostic push
68 #pragma clang diagnostic ignored "-Wtautological-compare"
69 #elif (defined(__GNUC__) || defined(__GNUG__)) && \
70 (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 1))
71 #if (__GNUC__ > 4 || __GNUC_MINOR__ > 5)
72 #pragma GCC diagnostic push
74 #pragma GCC diagnostic ignored "-Wtype-limits"
78 if (idx < 0 || idx >= d_length) {
82 typename StorageType::const_iterator iter = d_data.find(idx);
83 if (iter != d_data.end()) {
90 void setVal(IndexType idx,
int val) {
91 if (idx < 0 || idx >= d_length) {
101 #pragma clang diagnostic pop
102 #elif (defined(__GNUC__) || defined(__GNUG__)) && \
103 (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 5))
104 #pragma GCC diagnostic pop
116 typename StorageType::const_iterator iter;
117 for (iter = d_data.begin(); iter != d_data.end(); ++iter) {
121 res += abs(iter->second);
135 if (other.d_length != d_length) {
139 typename StorageType::iterator iter = d_data.begin();
140 typename StorageType::const_iterator oIter = other.d_data.begin();
141 while (iter != d_data.end()) {
143 while (oIter != other.d_data.end() && oIter->first < iter->first) {
146 if (oIter != other.d_data.end() && oIter->first == iter->first) {
148 if (oIter->second < iter->second) {
149 iter->second = oIter->second;
156 typename StorageType::iterator tmpIter = iter;
174 if (other.d_length != d_length) {
178 typename StorageType::iterator iter = d_data.begin();
179 typename StorageType::const_iterator oIter = other.d_data.begin();
180 while (iter != d_data.end()) {
182 while (oIter != other.d_data.end() && oIter->first < iter->first) {
183 d_data[oIter->first] = oIter->second;
186 if (oIter != other.d_data.end() && oIter->first == iter->first) {
188 if (oIter->second > iter->second) {
189 iter->second = oIter->second;
196 while (oIter != other.d_data.end()) {
197 d_data[oIter->first] = oIter->second;
209 if (other.d_length != d_length) {
212 typename StorageType::iterator iter = d_data.begin();
213 typename StorageType::const_iterator oIter = other.d_data.begin();
214 while (oIter != other.d_data.end()) {
215 while (iter != d_data.end() && iter->first < oIter->first) {
218 if (iter != d_data.end() && oIter->first == iter->first) {
220 iter->second += oIter->second;
222 typename StorageType::iterator tIter = iter;
230 d_data[oIter->first] = oIter->second;
243 if (other.d_length != d_length) {
246 typename StorageType::iterator iter = d_data.begin();
247 typename StorageType::const_iterator oIter = other.d_data.begin();
248 while (oIter != other.d_data.end()) {
249 while (iter != d_data.end() && iter->first < oIter->first) {
252 if (iter != d_data.end() && oIter->first == iter->first) {
254 iter->second -= oIter->second;
256 typename StorageType::iterator tIter = iter;
264 d_data[oIter->first] = -oIter->second;
276 typename StorageType::iterator iter = d_data.begin();
277 while (iter != d_data.end()) {
288 typename StorageType::iterator iter = d_data.begin();
289 while (iter != d_data.end()) {
300 typename StorageType::iterator iter = d_data.begin();
301 while (iter != d_data.end()) {
312 typename StorageType::iterator iter = d_data.begin();
313 while (iter != d_data.end()) {
325 if (d_length != v2.d_length) {
328 return d_data == v2.d_data;
331 return !(*
this == v2);
336 std::stringstream ss(std::ios_base::binary | std::ios_base::out |
341 tInt =
sizeof(IndexType);
344 IndexType nEntries = d_data.size();
347 typename StorageType::const_iterator iter = d_data.begin();
348 while (iter != d_data.end()) {
350 std::int32_t tInt = iter->second;
358 initFromText(txt.c_str(), txt.length());
365 void initFromText(
const char *pkl,
const unsigned int len) {
367 std::stringstream ss(std::ios_base::binary | std::ios_base::out |
373 if (vers == 0x0001) {
376 if (tInt >
sizeof(IndexType)) {
378 "IndexType cannot accommodate index size in SparseIntVect pickle");
382 readVals<unsigned char>(ss);
384 case sizeof(std::int32_t):
385 readVals<std::uint32_t>(ss);
387 case sizeof(boost::int64_t):
388 readVals<boost::uint64_t>(ss);
397 template <
typename T>
398 void readVals(std::stringstream &ss) {
399 PRECONDITION(
sizeof(T) <=
sizeof(IndexType),
"invalid size");
405 for (T i = 0; i < nEntries; ++i) {
414 template <
typename IndexType,
typename SequenceType>
416 const SequenceType &seq) {
417 typename SequenceType::const_iterator seqIt;
418 for (seqIt = seq.begin(); seqIt != seq.end(); ++seqIt) {
420 IndexType idx = *seqIt;
426 template <
typename IndexType>
427 void calcVectParams(
const SparseIntVect<IndexType> &v1,
428 const SparseIntVect<IndexType> &v2,
double &v1Sum,
429 double &v2Sum,
double &andSum) {
430 if (v1.getLength() != v2.getLength()) {
433 v1Sum = v2Sum = andSum = 0.0;
436 typename SparseIntVect<IndexType>::StorageType::const_iterator iter1, iter2;
437 iter1 = v1.getNonzeroElements().begin();
438 if (iter1 != v1.getNonzeroElements().end()) {
439 v1Sum += abs(iter1->second);
441 iter2 = v2.getNonzeroElements().begin();
442 if (iter2 != v2.getNonzeroElements().end()) {
443 v2Sum += abs(iter2->second);
445 while (iter1 != v1.getNonzeroElements().end()) {
446 while (iter2 != v2.getNonzeroElements().end() &&
447 iter2->first < iter1->first) {
449 if (iter2 != v2.getNonzeroElements().end()) {
450 v2Sum += abs(iter2->second);
453 if (iter2 != v2.getNonzeroElements().end()) {
454 if (iter2->first == iter1->first) {
455 if (abs(iter2->second) < abs(iter1->second)) {
456 andSum += abs(iter2->second);
458 andSum += abs(iter1->second);
461 if (iter2 != v2.getNonzeroElements().end()) {
462 v2Sum += abs(iter2->second);
466 if (iter1 != v1.getNonzeroElements().end()) {
467 v1Sum += abs(iter1->second);
473 if (iter1 != v1.getNonzeroElements().end()) {
475 while (iter1 != v1.getNonzeroElements().end()) {
476 v1Sum += abs(iter1->second);
480 if (iter2 != v2.getNonzeroElements().end()) {
482 while (iter2 != v2.getNonzeroElements().end()) {
483 v2Sum += abs(iter2->second);
490 template <
typename IndexType>
493 bool returnDistance =
false,
double bounds = 0.0) {
499 if (!returnDistance && bounds > 0.0) {
502 double denom = v1Sum + v2Sum;
503 if (fabs(denom) < 1e-6) {
507 double minV = v1Sum < v2Sum ? v1Sum : v2Sum;
508 if (2. * minV / denom < bounds) {
517 calcVectParams(v1, v2, v1Sum, v2Sum, numer);
519 double denom = v1Sum + v2Sum;
521 if (fabs(denom) < 1e-6) {
524 sim = 2. * numer / denom;
526 if (returnDistance) sim = 1. - sim;
531 template <
typename IndexType>
534 bool returnDistance =
false,
double bounds = 0.0) {
543 calcVectParams(v1, v2, v1Sum, v2Sum, andSum);
545 double denom = a * v1Sum + b * v2Sum + (1 - a - b) * andSum;
548 if (fabs(denom) < 1e-6) {
551 sim = andSum / denom;
553 if (returnDistance) sim = 1. - sim;
558 template <
typename IndexType>
561 bool returnDistance =
false,
double bounds = 0.0) {
#define RDUNUSED_PARAM(x)
#define PRECONDITION(expr, mess)
const int ci_SPARSEINTVECT_VERSION
version number to use in pickles
Class to allow us to throw an IndexError from C++ and have it make it back to Python.
a class for efficiently storing sparse vectors of ints
SparseIntVect< IndexType > & operator+=(int v)
SparseIntVect< IndexType > & operator/(int v)
SparseIntVect(IndexType length)
initialize with a particular length
unsigned int size() const
returns the length
const SparseIntVect< IndexType > operator+(const SparseIntVect< IndexType > &other) const
SparseIntVect< IndexType > & operator*(int v)
SparseIntVect< IndexType > & operator+=(const SparseIntVect< IndexType > &other)
bool operator==(const SparseIntVect< IndexType > &v2) const
SparseIntVect(const SparseIntVect< IndexType > &other)
Copy constructor.
~SparseIntVect()=default
destructor (doesn't need to do anything)
SparseIntVect< IndexType > & operator|=(const SparseIntVect< IndexType > &other)
const SparseIntVect< IndexType > operator-(const SparseIntVect< IndexType > &other) const
const SparseIntVect< IndexType > operator|(const SparseIntVect< IndexType > &other) const
SparseIntVect< IndexType > & operator/=(int v)
const SparseIntVect< IndexType > operator&(const SparseIntVect< IndexType > &other) const
SparseIntVect(const char *pkl, const unsigned int len)
constructor from a pickle
int operator[](IndexType idx) const
support indexing using []
void fromString(const std::string &txt)
SparseIntVect< IndexType > & operator*=(int v)
void setVal(IndexType idx, int val)
set the value at an index
SparseIntVect< IndexType > & operator&=(const SparseIntVect< IndexType > &other)
SparseIntVect & operator=(const SparseIntVect< IndexType > &other)
std::string toString() const
returns a binary string representation (pickle)
int getTotalVal(bool doAbs=false) const
SparseIntVect< IndexType > & operator-(int v)
std::map< IndexType, int > StorageType
SparseIntVect< IndexType > & operator-=(const SparseIntVect< IndexType > &other)
bool operator!=(const SparseIntVect< IndexType > &v2) const
SparseIntVect< IndexType > & operator-=(int v)
SparseIntVect(const std::string &pkl)
constructor from a pickle
SparseIntVect< IndexType > & operator+(int v)
IndexType getLength() const
returns the length
int getVal(IndexType idx) const
return the value at an index
const StorageType & getNonzeroElements() const
returns our nonzero elements as a map(IndexType->int)
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
double TverskySimilarity(const SparseIntVect< IndexType > &v1, const SparseIntVect< IndexType > &v2, double a, double b, bool returnDistance=false, double bounds=0.0)
void updateFromSequence(SparseIntVect< IndexType > &vect, const SequenceType &seq)
double TanimotoSimilarity(const SparseIntVect< IndexType > &v1, const SparseIntVect< IndexType > &v2, bool returnDistance=false, double bounds=0.0)
double DiceSimilarity(const SparseIntVect< IndexType > &v1, const SparseIntVect< IndexType > &v2, bool returnDistance=false, double bounds=0.0)
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream