RDKit
Open-source cheminformatics and machine learning.
new_canon.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Greg Landrum
3 // Adapted from pseudo-code from Roger Sayle
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 
12 #include <RDGeneral/export.h>
13 #include <RDGeneral/hanoiSort.h>
14 #include <GraphMol/ROMol.h>
15 #include <GraphMol/RingInfo.h>
17 #include <cstdint>
18 #include <boost/dynamic_bitset.hpp>
20 #include <cstring>
21 #include <iostream>
22 #include <cassert>
23 #include <cstring>
24 #include <vector>
25 
26 // #define VERBOSE_CANON 1
27 
28 namespace RDKit {
29 namespace Canon {
30 
33  unsigned int bondStereo;
34  unsigned int nbrSymClass{0};
35  unsigned int nbrIdx{0};
36  const std::string *p_symbol{
37  nullptr}; // if provided, this is used to order bonds
38  bondholder() : bondStereo(static_cast<unsigned int>(Bond::STEREONONE)) {}
39  bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni,
40  unsigned int nsc)
41  : bondType(bt),
42  bondStereo(static_cast<unsigned int>(bs)),
43  nbrSymClass(nsc),
44  nbrIdx(ni) {}
45  bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
46  unsigned int nsc)
47  : bondType(bt), bondStereo(bs), nbrSymClass(nsc), nbrIdx(ni) {}
48  bool operator<(const bondholder &o) const {
49  if (p_symbol && o.p_symbol) {
50  return (*p_symbol) < (*o.p_symbol);
51  }
52  if (bondType != o.bondType) {
53  return bondType < o.bondType;
54  }
55  if (bondStereo != o.bondStereo) {
56  return bondStereo < o.bondStereo;
57  }
58  return nbrSymClass < o.nbrSymClass;
59  }
60  static bool greater(const bondholder &lhs, const bondholder &rhs) {
61  if (lhs.p_symbol && rhs.p_symbol && (*lhs.p_symbol) != (*rhs.p_symbol)) {
62  return (*lhs.p_symbol) > (*rhs.p_symbol);
63  }
64  if (lhs.bondType != rhs.bondType) {
65  return lhs.bondType > rhs.bondType;
66  }
67  if (lhs.bondStereo != rhs.bondStereo) {
68  return lhs.bondStereo > rhs.bondStereo;
69  }
70  return lhs.nbrSymClass > rhs.nbrSymClass;
71  }
72 
73  static int compare(const bondholder &x, const bondholder &y,
74  unsigned int div = 1) {
75  if (x.p_symbol && y.p_symbol) {
76  if ((*x.p_symbol) < (*y.p_symbol)) {
77  return -1;
78  } else if ((*x.p_symbol) > (*y.p_symbol)) {
79  return 1;
80  }
81  }
82  if (x.bondType < y.bondType) {
83  return -1;
84  } else if (x.bondType > y.bondType) {
85  return 1;
86  }
87  if (x.bondStereo < y.bondStereo) {
88  return -1;
89  } else if (x.bondStereo > y.bondStereo) {
90  return 1;
91  }
92  return x.nbrSymClass / div - y.nbrSymClass / div;
93  }
94 };
96  public:
97  const Atom *atom{nullptr};
98  int index{-1};
99  unsigned int degree{0};
100  unsigned int totalNumHs{0};
101  bool hasRingNbr{false};
102  bool isRingStereoAtom{false};
103  int *nbrIds{nullptr};
104  const std::string *p_symbol{
105  nullptr}; // if provided, this is used to order atoms
106  std::vector<int> neighborNum;
107  std::vector<int> revistedNeighbors;
108  std::vector<bondholder> bonds;
109 
111 
112  ~canon_atom() { free(nbrIds); }
113 };
114 
116  canon_atom *atoms, std::vector<bondholder> &nbrs);
117 
119  canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
120  std::vector<std::pair<unsigned int, unsigned int>> &result);
121 
122 /*
123  * Different types of atom compare functions:
124  *
125  * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
126  *dependent chirality
127  * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
128  *highly symmetrical graphs/molecules
129  * - AtomCompareFunctor: Basic atom compare function which also allows to
130  *include neighbors within the ranking
131  */
132 
134  public:
135  Canon::canon_atom *dp_atoms{nullptr};
136  const ROMol *dp_mol{nullptr};
137  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
138  *dp_bondsInPlay{nullptr};
139 
142  Canon::canon_atom *atoms, const ROMol &m,
143  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
144  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
145  : dp_atoms(atoms),
146  dp_mol(&m),
147  dp_atomsInPlay(atomsInPlay),
148  dp_bondsInPlay(bondsInPlay) {}
149  int operator()(int i, int j) const {
150  PRECONDITION(dp_atoms, "no atoms");
151  PRECONDITION(dp_mol, "no molecule");
152  PRECONDITION(i != j, "bad call");
153  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
154  return 0;
155  }
156 
157  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
158  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
159  }
160  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
161  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
162  }
163  for (unsigned int ii = 0;
164  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
165  int cmp =
166  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
167  if (cmp) return cmp;
168  }
169 
170  std::vector<std::pair<unsigned int, unsigned int>> swapsi;
171  std::vector<std::pair<unsigned int, unsigned int>> swapsj;
172  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
173  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
174  }
175  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
176  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
177  }
178  for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
179  int cmp = swapsi[ii].second - swapsj[ii].second;
180  if (cmp) return cmp;
181  }
182  return 0;
183  }
184 };
185 
187  public:
188  Canon::canon_atom *dp_atoms{nullptr};
189  const ROMol *dp_mol{nullptr};
190  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
191  *dp_bondsInPlay{nullptr};
192 
195  Canon::canon_atom *atoms, const ROMol &m,
196  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
197  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
198  : dp_atoms(atoms),
199  dp_mol(&m),
200  dp_atomsInPlay(atomsInPlay),
201  dp_bondsInPlay(bondsInPlay) {}
202  int operator()(int i, int j) const {
203  PRECONDITION(dp_atoms, "no atoms");
204  PRECONDITION(dp_mol, "no molecule");
205  PRECONDITION(i != j, "bad call");
206  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
207  return 0;
208  }
209 
210  if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
211  return -1;
212  } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
213  return 1;
214  }
215 
216  if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
217  return -1;
218  } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
219  return 1;
220  }
221 
222  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
223  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
224  }
225  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
226  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
227  }
228  for (unsigned int ii = 0;
229  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
230  int cmp =
231  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
232  if (cmp) return cmp;
233  }
234 
235  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
236  return -1;
237  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
238  return 1;
239  }
240  return 0;
241  }
242 };
243 
245  unsigned int getAtomRingNbrCode(unsigned int i) const {
246  if (!dp_atoms[i].hasRingNbr) return 0;
247 
248  int *nbrs = dp_atoms[i].nbrIds;
249  unsigned int code = 0;
250  for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
251  if (dp_atoms[nbrs[j]].isRingStereoAtom) {
252  code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
253  }
254  }
255  return code;
256  }
257 
258  int basecomp(int i, int j) const {
259  PRECONDITION(dp_atoms, "no atoms");
260  unsigned int ivi, ivj;
261 
262  // always start with the current class:
263  ivi = dp_atoms[i].index;
264  ivj = dp_atoms[j].index;
265  if (ivi < ivj) {
266  return -1;
267  } else if (ivi > ivj) {
268  return 1;
269  }
270  // use the atom-mapping numbers if they were assigned
271  /* boost::any_cast FILED here:
272  std::string molAtomMapNumber_i="";
273  std::string molAtomMapNumber_j="";
274  */
275  int molAtomMapNumber_i = 0;
276  int molAtomMapNumber_j = 0;
277  dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
278  molAtomMapNumber_i);
279  dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
280  molAtomMapNumber_j);
281  if (molAtomMapNumber_i < molAtomMapNumber_j) {
282  return -1;
283  } else if (molAtomMapNumber_i > molAtomMapNumber_j) {
284  return 1;
285  }
286  // start by comparing degree
287  ivi = dp_atoms[i].degree;
288  ivj = dp_atoms[j].degree;
289  if (ivi < ivj) {
290  return -1;
291  } else if (ivi > ivj) {
292  return 1;
293  }
294  if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
295  if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol)) {
296  return -1;
297  } else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol)) {
298  return 1;
299  } else {
300  return 0;
301  }
302  }
303 
304  // move onto atomic number
305  ivi = dp_atoms[i].atom->getAtomicNum();
306  ivj = dp_atoms[j].atom->getAtomicNum();
307  if (ivi < ivj) {
308  return -1;
309  } else if (ivi > ivj) {
310  return 1;
311  }
312  // isotopes if we're using them
313  if (df_useIsotopes) {
314  ivi = dp_atoms[i].atom->getIsotope();
315  ivj = dp_atoms[j].atom->getIsotope();
316  if (ivi < ivj) {
317  return -1;
318  } else if (ivi > ivj) {
319  return 1;
320  }
321  }
322 
323  // nHs
324  ivi = dp_atoms[i].totalNumHs;
325  ivj = dp_atoms[j].totalNumHs;
326  if (ivi < ivj) {
327  return -1;
328  } else if (ivi > ivj) {
329  return 1;
330  }
331  // charge
332  ivi = dp_atoms[i].atom->getFormalCharge();
333  ivj = dp_atoms[j].atom->getFormalCharge();
334  if (ivi < ivj) {
335  return -1;
336  } else if (ivi > ivj) {
337  return 1;
338  }
339  // chirality if we're using it
340  if (df_useChirality) {
341  // first atom stereochem:
342  ivi = 0;
343  ivj = 0;
344  std::string cipCode;
345  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
346  cipCode)) {
347  ivi = cipCode == "R" ? 2 : 1;
348  }
349  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
350  cipCode)) {
351  ivj = cipCode == "R" ? 2 : 1;
352  }
353  if (ivi < ivj) {
354  return -1;
355  } else if (ivi > ivj) {
356  return 1;
357  }
358  // can't actually use values here, because they are arbitrary
359  ivi = dp_atoms[i].atom->getChiralTag() != 0;
360  ivj = dp_atoms[j].atom->getChiralTag() != 0;
361  if (ivi < ivj) {
362  return -1;
363  } else if (ivi > ivj) {
364  return 1;
365  }
366  }
367 
368  if (df_useChiralityRings) {
369  // ring stereochemistry
370  ivi = getAtomRingNbrCode(i);
371  ivj = getAtomRingNbrCode(j);
372  if (ivi < ivj) {
373  return -1;
374  } else if (ivi > ivj) {
375  return 1;
376  } // bond stereo is taken care of in the neighborhood comparison
377  }
378  return 0;
379  }
380 
381  public:
382  Canon::canon_atom *dp_atoms{nullptr};
383  const ROMol *dp_mol{nullptr};
384  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
385  *dp_bondsInPlay{nullptr};
386  bool df_useNbrs{false};
387  bool df_useIsotopes{true};
388  bool df_useChirality{true};
389  bool df_useChiralityRings{true};
390 
393  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
394  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
395  : dp_atoms(atoms),
396  dp_mol(&m),
397  dp_atomsInPlay(atomsInPlay),
398  dp_bondsInPlay(bondsInPlay),
399  df_useNbrs(false),
400  df_useIsotopes(true),
401  df_useChirality(true),
402  df_useChiralityRings(true) {}
403  int operator()(int i, int j) const {
404  PRECONDITION(dp_atoms, "no atoms");
405  PRECONDITION(dp_mol, "no molecule");
406  PRECONDITION(i != j, "bad call");
407  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
408  return 0;
409  }
410  int v = basecomp(i, j);
411  if (v) {
412  return v;
413  }
414 
415  if (df_useNbrs) {
416  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
417  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
418  }
419  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
420  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
421  }
422 
423  for (unsigned int ii = 0;
424  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
425  ++ii) {
426  int cmp =
427  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
428  if (cmp) {
429  return cmp;
430  }
431  }
432 
433  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
434  return -1;
435  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
436  return 1;
437  }
438  }
439  return 0;
440  }
441 };
442 
443 /*
444  * A compare function to discriminate chiral atoms, similar to the CIP rules.
445  * This functionality is currently not used.
446  */
447 
448 const unsigned int ATNUM_CLASS_OFFSET = 10000;
450  void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
451  for (unsigned j = 0; j < nbrs.size(); ++j) {
452  unsigned int nbrIdx = nbrs[j].nbrIdx;
453  if (nbrIdx == ATNUM_CLASS_OFFSET) {
454  // Ignore the Hs
455  continue;
456  }
457  const Atom *nbr = dp_atoms[nbrIdx].atom;
458  nbrs[j].nbrSymClass =
459  nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
460  }
461  std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
462  // FIX: don't want to be doing this long-term
463  }
464 
465  int basecomp(int i, int j) const {
466  PRECONDITION(dp_atoms, "no atoms");
467  unsigned int ivi, ivj;
468 
469  // always start with the current class:
470  ivi = dp_atoms[i].index;
471  ivj = dp_atoms[j].index;
472  if (ivi < ivj)
473  return -1;
474  else if (ivi > ivj)
475  return 1;
476 
477  // move onto atomic number
478  ivi = dp_atoms[i].atom->getAtomicNum();
479  ivj = dp_atoms[j].atom->getAtomicNum();
480  if (ivi < ivj)
481  return -1;
482  else if (ivi > ivj)
483  return 1;
484 
485  // isotopes:
486  ivi = dp_atoms[i].atom->getIsotope();
487  ivj = dp_atoms[j].atom->getIsotope();
488  if (ivi < ivj)
489  return -1;
490  else if (ivi > ivj)
491  return 1;
492 
493  // atom stereochem:
494  ivi = 0;
495  ivj = 0;
496  std::string cipCode;
497  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
498  cipCode)) {
499  ivi = cipCode == "R" ? 2 : 1;
500  }
501  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
502  cipCode)) {
503  ivj = cipCode == "R" ? 2 : 1;
504  }
505  if (ivi < ivj)
506  return -1;
507  else if (ivi > ivj)
508  return 1;
509 
510  // bond stereo is taken care of in the neighborhood comparison
511  return 0;
512  }
513 
514  public:
515  Canon::canon_atom *dp_atoms{nullptr};
516  const ROMol *dp_mol{nullptr};
517  bool df_useNbrs{false};
520  : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false) {}
521  int operator()(int i, int j) const {
522  PRECONDITION(dp_atoms, "no atoms");
523  PRECONDITION(dp_mol, "no molecule");
524  PRECONDITION(i != j, "bad call");
525  int v = basecomp(i, j);
526  if (v) return v;
527 
528  if (df_useNbrs) {
529  getAtomNeighborhood(dp_atoms[i].bonds);
530  getAtomNeighborhood(dp_atoms[j].bonds);
531 
532  // we do two passes through the neighbor lists. The first just uses the
533  // atomic numbers (by passing the optional 10000 to bondholder::compare),
534  // the second takes the already-computed index into account
535  for (unsigned int ii = 0;
536  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
537  ++ii) {
538  int cmp = bondholder::compare(
539  dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
540  if (cmp) return cmp;
541  }
542  for (unsigned int ii = 0;
543  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
544  ++ii) {
545  int cmp =
546  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
547  if (cmp) return cmp;
548  }
549  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
550  return -1;
551  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
552  return 1;
553  }
554  }
555  return 0;
556  }
557 };
558 
559 /*
560  * Basic canonicalization function to organize the partitions which will be
561  * sorted next.
562  * */
563 
564 template <typename CompareFunc>
565 void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
566  int mode, int *order, int *count, int &activeset,
567  int *next, int *changed, char *touchedPartitions) {
568  unsigned int nAtoms = mol.getNumAtoms();
569  int partition;
570  int symclass = 0;
571  int *start;
572  int offset;
573  int index;
574  int len;
575  int i;
576  // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
577 
578  // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
579  while (activeset != -1) {
580  // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
581  // std::cerr<<" next: ";
582  // for(unsigned int ii=0;ii<nAtoms;++ii){
583  // std::cerr<<ii<<":"<<next[ii]<<" ";
584  // }
585  // std::cerr<<std::endl;
586  // for(unsigned int ii=0;ii<nAtoms;++ii){
587  // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
588  // "<<atoms[order[ii]].index<<std::endl;
589  // }
590 
591  partition = activeset;
592  activeset = next[partition];
593  next[partition] = -2;
594 
595  len = count[partition];
596  offset = atoms[partition].index;
597  start = order + offset;
598  // std::cerr<<"\n\n**************************************************************"<<std::endl;
599  // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
600  // for(unsigned int ii=0;ii<len;++ii){
601  // std::cerr<<" "<<order[offset+ii]+1;
602  // }
603  // std::cerr<<std::endl;
604  // for(unsigned int ii=0;ii<nAtoms;++ii){
605  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
606  // "<<atoms[order[ii]].index<<std::endl;
607  // }
608  hanoisort(start, len, count, changed, compar);
609  // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
610  // std::cerr<<" result:";
611  // for(unsigned int ii=0;ii<nAtoms;++ii){
612  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
613  // "<<atoms[order[ii]].index<<std::endl;
614  // }
615  for (int k = 0; k < len; ++k) {
616  changed[start[k]] = 0;
617  }
618 
619  index = start[0];
620  // std::cerr<<" len:"<<len<<" index:"<<index<<"
621  // count:"<<count[index]<<std::endl;
622  for (i = count[index]; i < len; i++) {
623  index = start[i];
624  if (count[index]) symclass = offset + i;
625  atoms[index].index = symclass;
626  // std::cerr<<" "<<index+1<<"("<<symclass<<")";
627  // if(mode && (activeset<0 || count[index]>count[activeset]) ){
628  // activeset=index;
629  //}
630  for (unsigned j = 0; j < atoms[index].degree; ++j) {
631  changed[atoms[index].nbrIds[j]] = 1;
632  }
633  }
634  // std::cerr<<std::endl;
635 
636  if (mode) {
637  index = start[0];
638  for (i = count[index]; i < len; i++) {
639  index = start[i];
640  for (unsigned j = 0; j < atoms[index].degree; ++j) {
641  unsigned int nbor = atoms[index].nbrIds[j];
642  touchedPartitions[atoms[nbor].index] = 1;
643  }
644  }
645  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
646  if (touchedPartitions[ii]) {
647  partition = order[ii];
648  if ((count[partition] > 1) && (next[partition] == -2)) {
649  next[partition] = activeset;
650  activeset = partition;
651  }
652  touchedPartitions[ii] = 0;
653  }
654  }
655  }
656  }
657 } // end of RefinePartitions()
658 
659 template <typename CompareFunc>
660 void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
661  int mode, int *order, int *count, int &activeset, int *next,
662  int *changed, char *touchedPartitions) {
663  unsigned int nAtoms = mol.getNumAtoms();
664  int partition;
665  int offset;
666  int index;
667  int len;
668  int oldPart = 0;
669 
670  for (unsigned int i = 0; i < nAtoms; i++) {
671  partition = order[i];
672  oldPart = atoms[partition].index;
673  while (count[partition] > 1) {
674  len = count[partition];
675  offset = atoms[partition].index + len - 1;
676  index = order[offset];
677  atoms[index].index = offset;
678  count[partition] = len - 1;
679  count[index] = 1;
680 
681  // test for ions, water molecules with no
682  if (atoms[index].degree < 1) {
683  continue;
684  }
685  for (unsigned j = 0; j < atoms[index].degree; ++j) {
686  unsigned int nbor = atoms[index].nbrIds[j];
687  touchedPartitions[atoms[nbor].index] = 1;
688  changed[nbor] = 1;
689  }
690 
691  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
692  if (touchedPartitions[ii]) {
693  int npart = order[ii];
694  if ((count[npart] > 1) && (next[npart] == -2)) {
695  next[npart] = activeset;
696  activeset = npart;
697  }
698  touchedPartitions[ii] = 0;
699  }
700  }
701  RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
702  changed, touchedPartitions);
703  }
704  // not sure if this works each time
705  if (atoms[partition].index != oldPart) {
706  i -= 1;
707  }
708  }
709 } // end of BreakTies()
710 
712  int *order, int *count,
713  canon_atom *atoms);
714 
715 RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order,
716  int *count, int &activeset,
717  int *next, int *changed);
718 
720  std::vector<unsigned int> &res,
721  bool breakTies = true,
722  bool includeChirality = true,
723  bool includeIsotopes = true);
724 
726  const ROMol &mol, std::vector<unsigned int> &res,
727  const boost::dynamic_bitset<> &atomsInPlay,
728  const boost::dynamic_bitset<> &bondsInPlay,
729  const std::vector<std::string> *atomSymbols,
730  const std::vector<std::string> *bondSymbols, bool breakTies,
731  bool includeChirality, bool includeIsotope);
732 
733 inline void rankFragmentAtoms(
734  const ROMol &mol, std::vector<unsigned int> &res,
735  const boost::dynamic_bitset<> &atomsInPlay,
736  const boost::dynamic_bitset<> &bondsInPlay,
737  const std::vector<std::string> *atomSymbols = nullptr,
738  bool breakTies = true, bool includeChirality = true,
739  bool includeIsotopes = true) {
740  rankFragmentAtoms(mol, res, atomsInPlay, bondsInPlay, atomSymbols, nullptr,
741  breakTies, includeChirality, includeIsotopes);
742 };
743 
745  std::vector<unsigned int> &res);
746 
748  std::vector<Canon::canon_atom> &atoms,
749  bool includeChirality = true);
750 
751 } // namespace Canon
752 } // namespace RDKit
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
The class for representing atoms.
Definition: Atom.h:68
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:115
class for representing a bond
Definition: Bond.h:46
BondType
the type of Bond
Definition: Bond.h:55
@ UNSPECIFIED
Definition: Bond.h:56
BondStereo
the nature of the bond's stereochem (for cis/trans)
Definition: Bond.h:94
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:392
int operator()(int i, int j) const
Definition: new_canon.h:403
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition: new_canon.h:519
int operator()(int i, int j) const
Definition: new_canon.h:521
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:141
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:194
std::vector< bondholder > bonds
Definition: new_canon.h:108
unsigned int degree
Definition: new_canon.h:99
std::vector< int > revistedNeighbors
Definition: new_canon.h:107
std::vector< int > neighborNum
Definition: new_canon.h:106
unsigned int getNumAtoms() const
returns our number of atoms
Definition: ROMol.h:332
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:209
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int >> &result)
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true)
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const unsigned int ATNUM_CLASS_OFFSET
Definition: new_canon.h:448
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:660
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:565
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, bool breakTies, bool includeChirality, bool includeIsotope)
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
RDKIT_RDGENERAL_EXPORT const std::string _CIPCode
RDKIT_RDGENERAL_EXPORT const std::string molAtomMapNumber
Std stuff.
Definition: Abbreviations.h:18
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition: hanoiSort.h:145
const std::string * p_symbol
Definition: new_canon.h:36
Bond::BondType bondType
Definition: new_canon.h:32
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition: new_canon.h:60
bool operator<(const bondholder &o) const
Definition: new_canon.h:48
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:39
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:45
unsigned int bondStereo
Definition: new_canon.h:33
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition: new_canon.h:73
unsigned int nbrSymClass
Definition: new_canon.h:34