RDKit
Open-source cheminformatics and machine learning.
new_canon.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Greg Landrum
3 // Adapted from pseudo-code from Roger Sayle
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 
12 #include <RDGeneral/export.h>
13 #include <RDGeneral/hanoiSort.h>
14 #include <GraphMol/ROMol.h>
15 #include <GraphMol/RingInfo.h>
17 #include <cstdint>
18 #include <boost/foreach.hpp>
19 #include <boost/dynamic_bitset.hpp>
21 #include <cstring>
22 #include <iostream>
23 #include <cassert>
24 #include <cstring>
25 #include <vector>
26 
27 // #define VERBOSE_CANON 1
28 
29 namespace RDKit {
30 namespace Canon {
31 
34  unsigned int bondStereo;
35  unsigned int nbrSymClass{0};
36  unsigned int nbrIdx{0};
37  const std::string *p_symbol{
38  nullptr}; // if provided, this is used to order bonds
39  bondholder() : bondStereo(static_cast<unsigned int>(Bond::STEREONONE)){};
40  bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni,
41  unsigned int nsc)
42  : bondType(bt),
43  bondStereo(static_cast<unsigned int>(bs)),
44  nbrSymClass(nsc),
45  nbrIdx(ni){};
46  bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni,
47  unsigned int nsc)
48  : bondType(bt), bondStereo(bs), nbrSymClass(nsc), nbrIdx(ni){};
49  bool operator<(const bondholder &o) const {
50  if (p_symbol && o.p_symbol) {
51  return (*p_symbol) < (*o.p_symbol);
52  }
53  if (bondType != o.bondType) {
54  return bondType < o.bondType;
55  }
56  if (bondStereo != o.bondStereo) {
57  return bondStereo < o.bondStereo;
58  }
59  return nbrSymClass < o.nbrSymClass;
60  }
61  static bool greater(const bondholder &lhs, const bondholder &rhs) {
62  if (lhs.p_symbol && rhs.p_symbol && (*lhs.p_symbol) != (*rhs.p_symbol)) {
63  return (*lhs.p_symbol) > (*rhs.p_symbol);
64  }
65  if (lhs.bondType != rhs.bondType) {
66  return lhs.bondType > rhs.bondType;
67  }
68  if (lhs.bondStereo != rhs.bondStereo) {
69  return lhs.bondStereo > rhs.bondStereo;
70  }
71  return lhs.nbrSymClass > rhs.nbrSymClass;
72  }
73 
74  static int compare(const bondholder &x, const bondholder &y,
75  unsigned int div = 1) {
76  if (x.p_symbol && y.p_symbol) {
77  if ((*x.p_symbol) < (*y.p_symbol)) {
78  return -1;
79  } else if ((*x.p_symbol) > (*y.p_symbol)) {
80  return 1;
81  }
82  }
83  if (x.bondType < y.bondType) {
84  return -1;
85  } else if (x.bondType > y.bondType) {
86  return 1;
87  }
88  if (x.bondStereo < y.bondStereo) {
89  return -1;
90  } else if (x.bondStereo > y.bondStereo) {
91  return 1;
92  }
93  return x.nbrSymClass / div - y.nbrSymClass / div;
94  }
95 };
97  public:
98  const Atom *atom{nullptr};
99  int index{-1};
100  unsigned int degree{0};
101  unsigned int totalNumHs{0};
102  bool hasRingNbr{false};
103  bool isRingStereoAtom{false};
104  int *nbrIds{nullptr};
105  const std::string *p_symbol{
106  nullptr}; // if provided, this is used to order atoms
107  std::vector<int> neighborNum;
108  std::vector<int> revistedNeighbors;
109  std::vector<bondholder> bonds;
110 
112 
113  ~canon_atom() { free(nbrIds); }
114 };
115 
117  canon_atom *atoms, std::vector<bondholder> &nbrs);
118 
120  canon_atom *atoms, std::vector<bondholder> &nbrs, unsigned int atomIdx,
121  std::vector<std::pair<unsigned int, unsigned int>> &result);
122 
123 /*
124  * Different types of atom compare functions:
125  *
126  * - SpecialChiralityAtomCompareFunctor: Allows canonizing molecules exhibiting
127  *dependent chirality
128  * - SpecialSymmetryAtomCompareFunctor: Very specialized, allows canonizing
129  *highly symmetrical graphs/molecules
130  * - AtomCompareFunctor: Basic atom compare function which also allows to
131  *include neighbors within the ranking
132  */
133 
135  public:
136  Canon::canon_atom *dp_atoms{nullptr};
137  const ROMol *dp_mol{nullptr};
138  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
139  *dp_bondsInPlay{nullptr};
140 
142 
143  {};
145  Canon::canon_atom *atoms, const ROMol &m,
146  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
147  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
148  : dp_atoms(atoms),
149  dp_mol(&m),
150  dp_atomsInPlay(atomsInPlay),
151  dp_bondsInPlay(bondsInPlay){};
152  int operator()(int i, int j) const {
153  PRECONDITION(dp_atoms, "no atoms");
154  PRECONDITION(dp_mol, "no molecule");
155  PRECONDITION(i != j, "bad call");
156  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
157  return 0;
158  }
159 
160  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
161  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
162  }
163  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
164  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
165  }
166  for (unsigned int ii = 0;
167  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
168  int cmp =
169  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
170  if (cmp) return cmp;
171  }
172 
173  std::vector<std::pair<unsigned int, unsigned int>> swapsi;
174  std::vector<std::pair<unsigned int, unsigned int>> swapsj;
175  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
176  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[i].bonds, i, swapsi);
177  }
178  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
179  updateAtomNeighborNumSwaps(dp_atoms, dp_atoms[j].bonds, j, swapsj);
180  }
181  for (unsigned int ii = 0; ii < swapsi.size() && ii < swapsj.size(); ++ii) {
182  int cmp = swapsi[ii].second - swapsj[ii].second;
183  if (cmp) return cmp;
184  }
185  return 0;
186  }
187 };
188 
190  public:
191  Canon::canon_atom *dp_atoms{nullptr};
192  const ROMol *dp_mol{nullptr};
193  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
194  *dp_bondsInPlay{nullptr};
195 
197 
198  {};
200  Canon::canon_atom *atoms, const ROMol &m,
201  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
202  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
203  : dp_atoms(atoms),
204  dp_mol(&m),
205  dp_atomsInPlay(atomsInPlay),
206  dp_bondsInPlay(bondsInPlay){};
207  int operator()(int i, int j) const {
208  PRECONDITION(dp_atoms, "no atoms");
209  PRECONDITION(dp_mol, "no molecule");
210  PRECONDITION(i != j, "bad call");
211  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
212  return 0;
213  }
214 
215  if (dp_atoms[i].neighborNum < dp_atoms[j].neighborNum) {
216  return -1;
217  } else if (dp_atoms[i].neighborNum > dp_atoms[j].neighborNum) {
218  return 1;
219  }
220 
221  if (dp_atoms[i].revistedNeighbors < dp_atoms[j].revistedNeighbors) {
222  return -1;
223  } else if (dp_atoms[i].revistedNeighbors > dp_atoms[j].revistedNeighbors) {
224  return 1;
225  }
226 
227  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
228  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
229  }
230  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
231  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
232  }
233  for (unsigned int ii = 0;
234  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size(); ++ii) {
235  int cmp =
236  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
237  if (cmp) return cmp;
238  }
239 
240  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
241  return -1;
242  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
243  return 1;
244  }
245  return 0;
246  }
247 };
248 
250  unsigned int getAtomRingNbrCode(unsigned int i) const {
251  if (!dp_atoms[i].hasRingNbr) return 0;
252 
253  int *nbrs = dp_atoms[i].nbrIds;
254  unsigned int code = 0;
255  for (unsigned j = 0; j < dp_atoms[i].degree; ++j) {
256  if (dp_atoms[nbrs[j]].isRingStereoAtom) {
257  code += dp_atoms[nbrs[j]].index * 10000 + 1; // j;
258  }
259  }
260  return code;
261  }
262 
263  int basecomp(int i, int j) const {
264  PRECONDITION(dp_atoms, "no atoms");
265  unsigned int ivi, ivj;
266 
267  // always start with the current class:
268  ivi = dp_atoms[i].index;
269  ivj = dp_atoms[j].index;
270  if (ivi < ivj) {
271  return -1;
272  } else if (ivi > ivj) {
273  return 1;
274  }
275  // use the atom-mapping numbers if they were assigned
276  /* boost::any_cast FILED here:
277  std::string molAtomMapNumber_i="";
278  std::string molAtomMapNumber_j="";
279  */
280  int molAtomMapNumber_i = 0;
281  int molAtomMapNumber_j = 0;
282  dp_atoms[i].atom->getPropIfPresent(common_properties::molAtomMapNumber,
283  molAtomMapNumber_i);
284  dp_atoms[j].atom->getPropIfPresent(common_properties::molAtomMapNumber,
285  molAtomMapNumber_j);
286  if (molAtomMapNumber_i < molAtomMapNumber_j) {
287  return -1;
288  } else if (molAtomMapNumber_i > molAtomMapNumber_j) {
289  return 1;
290  }
291  // start by comparing degree
292  ivi = dp_atoms[i].degree;
293  ivj = dp_atoms[j].degree;
294  if (ivi < ivj) {
295  return -1;
296  } else if (ivi > ivj) {
297  return 1;
298  }
299  if (dp_atoms[i].p_symbol && dp_atoms[j].p_symbol) {
300  if (*(dp_atoms[i].p_symbol) < *(dp_atoms[j].p_symbol)) {
301  return -1;
302  } else if (*(dp_atoms[i].p_symbol) > *(dp_atoms[j].p_symbol)) {
303  return 1;
304  } else {
305  return 0;
306  }
307  }
308 
309  // move onto atomic number
310  ivi = dp_atoms[i].atom->getAtomicNum();
311  ivj = dp_atoms[j].atom->getAtomicNum();
312  if (ivi < ivj) {
313  return -1;
314  } else if (ivi > ivj) {
315  return 1;
316  }
317  // isotopes if we're using them
318  if (df_useIsotopes) {
319  ivi = dp_atoms[i].atom->getIsotope();
320  ivj = dp_atoms[j].atom->getIsotope();
321  if (ivi < ivj) {
322  return -1;
323  } else if (ivi > ivj) {
324  return 1;
325  }
326  }
327 
328  // nHs
329  ivi = dp_atoms[i].totalNumHs;
330  ivj = dp_atoms[j].totalNumHs;
331  if (ivi < ivj) {
332  return -1;
333  } else if (ivi > ivj) {
334  return 1;
335  }
336  // charge
337  ivi = dp_atoms[i].atom->getFormalCharge();
338  ivj = dp_atoms[j].atom->getFormalCharge();
339  if (ivi < ivj) {
340  return -1;
341  } else if (ivi > ivj) {
342  return 1;
343  }
344  // chirality if we're using it
345  if (df_useChirality) {
346  // first atom stereochem:
347  ivi = 0;
348  ivj = 0;
349  std::string cipCode;
350  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
351  cipCode)) {
352  ivi = cipCode == "R" ? 2 : 1;
353  }
354  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
355  cipCode)) {
356  ivj = cipCode == "R" ? 2 : 1;
357  }
358  if (ivi < ivj) {
359  return -1;
360  } else if (ivi > ivj) {
361  return 1;
362  }
363  // can't actually use values here, because they are arbitrary
364  ivi = dp_atoms[i].atom->getChiralTag() != 0;
365  ivj = dp_atoms[j].atom->getChiralTag() != 0;
366  if (ivi < ivj) {
367  return -1;
368  } else if (ivi > ivj) {
369  return 1;
370  }
371  }
372 
373  if (df_useChiralityRings) {
374  // ring stereochemistry
375  ivi = getAtomRingNbrCode(i);
376  ivj = getAtomRingNbrCode(j);
377  if (ivi < ivj) {
378  return -1;
379  } else if (ivi > ivj) {
380  return 1;
381  } // bond stereo is taken care of in the neighborhood comparison
382  }
383  return 0;
384  }
385 
386  public:
387  Canon::canon_atom *dp_atoms{nullptr};
388  const ROMol *dp_mol{nullptr};
389  const boost::dynamic_bitset<> *dp_atomsInPlay{nullptr},
390  *dp_bondsInPlay{nullptr};
391  bool df_useNbrs{false};
392  bool df_useIsotopes{true};
393  bool df_useChirality{true};
394  bool df_useChiralityRings{true};
395 
397 
398  {};
400  const boost::dynamic_bitset<> *atomsInPlay = nullptr,
401  const boost::dynamic_bitset<> *bondsInPlay = nullptr)
402  : dp_atoms(atoms),
403  dp_mol(&m),
404  dp_atomsInPlay(atomsInPlay),
405  dp_bondsInPlay(bondsInPlay),
406  df_useNbrs(false),
407  df_useIsotopes(true),
408  df_useChirality(true),
409  df_useChiralityRings(true){};
410  int operator()(int i, int j) const {
411  PRECONDITION(dp_atoms, "no atoms");
412  PRECONDITION(dp_mol, "no molecule");
413  PRECONDITION(i != j, "bad call");
414  if (dp_atomsInPlay && !((*dp_atomsInPlay)[i] || (*dp_atomsInPlay)[j])) {
415  return 0;
416  }
417  int v = basecomp(i, j);
418  if (v) {
419  return v;
420  }
421 
422  if (df_useNbrs) {
423  if (!dp_atomsInPlay || (*dp_atomsInPlay)[i]) {
424  updateAtomNeighborIndex(dp_atoms, dp_atoms[i].bonds);
425  }
426  if (!dp_atomsInPlay || (*dp_atomsInPlay)[j]) {
427  updateAtomNeighborIndex(dp_atoms, dp_atoms[j].bonds);
428  }
429 
430  for (unsigned int ii = 0;
431  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
432  ++ii) {
433  int cmp =
434  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
435  if (cmp) {
436  return cmp;
437  }
438  }
439 
440  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
441  return -1;
442  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
443  return 1;
444  }
445  }
446  return 0;
447  }
448 };
449 
450 /*
451  * A compare function to discriminate chiral atoms, similar to the CIP rules.
452  * This functionality is currently not used.
453  */
454 
455 const unsigned int ATNUM_CLASS_OFFSET = 10000;
457  void getAtomNeighborhood(std::vector<bondholder> &nbrs) const {
458  for (unsigned j = 0; j < nbrs.size(); ++j) {
459  unsigned int nbrIdx = nbrs[j].nbrIdx;
460  if (nbrIdx == ATNUM_CLASS_OFFSET) {
461  // Ignore the Hs
462  continue;
463  }
464  const Atom *nbr = dp_atoms[nbrIdx].atom;
465  nbrs[j].nbrSymClass =
466  nbr->getAtomicNum() * ATNUM_CLASS_OFFSET + dp_atoms[nbrIdx].index + 1;
467  }
468  std::sort(nbrs.begin(), nbrs.end(), bondholder::greater);
469  // FIX: don't want to be doing this long-term
470  }
471 
472  int basecomp(int i, int j) const {
473  PRECONDITION(dp_atoms, "no atoms");
474  unsigned int ivi, ivj;
475 
476  // always start with the current class:
477  ivi = dp_atoms[i].index;
478  ivj = dp_atoms[j].index;
479  if (ivi < ivj)
480  return -1;
481  else if (ivi > ivj)
482  return 1;
483 
484  // move onto atomic number
485  ivi = dp_atoms[i].atom->getAtomicNum();
486  ivj = dp_atoms[j].atom->getAtomicNum();
487  if (ivi < ivj)
488  return -1;
489  else if (ivi > ivj)
490  return 1;
491 
492  // isotopes:
493  ivi = dp_atoms[i].atom->getIsotope();
494  ivj = dp_atoms[j].atom->getIsotope();
495  if (ivi < ivj)
496  return -1;
497  else if (ivi > ivj)
498  return 1;
499 
500  // atom stereochem:
501  ivi = 0;
502  ivj = 0;
503  std::string cipCode;
504  if (dp_atoms[i].atom->getPropIfPresent(common_properties::_CIPCode,
505  cipCode)) {
506  ivi = cipCode == "R" ? 2 : 1;
507  }
508  if (dp_atoms[j].atom->getPropIfPresent(common_properties::_CIPCode,
509  cipCode)) {
510  ivj = cipCode == "R" ? 2 : 1;
511  }
512  if (ivi < ivj)
513  return -1;
514  else if (ivi > ivj)
515  return 1;
516 
517  // bond stereo is taken care of in the neighborhood comparison
518  return 0;
519  }
520 
521  public:
522  Canon::canon_atom *dp_atoms{nullptr};
523  const ROMol *dp_mol{nullptr};
524  bool df_useNbrs{false};
527  : dp_atoms(atoms), dp_mol(&m), df_useNbrs(false){};
528  int operator()(int i, int j) const {
529  PRECONDITION(dp_atoms, "no atoms");
530  PRECONDITION(dp_mol, "no molecule");
531  PRECONDITION(i != j, "bad call");
532  int v = basecomp(i, j);
533  if (v) return v;
534 
535  if (df_useNbrs) {
536  getAtomNeighborhood(dp_atoms[i].bonds);
537  getAtomNeighborhood(dp_atoms[j].bonds);
538 
539  // we do two passes through the neighbor lists. The first just uses the
540  // atomic numbers (by passing the optional 10000 to bondholder::compare),
541  // the second takes the already-computed index into account
542  for (unsigned int ii = 0;
543  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
544  ++ii) {
545  int cmp = bondholder::compare(
546  dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii], ATNUM_CLASS_OFFSET);
547  if (cmp) return cmp;
548  }
549  for (unsigned int ii = 0;
550  ii < dp_atoms[i].bonds.size() && ii < dp_atoms[j].bonds.size();
551  ++ii) {
552  int cmp =
553  bondholder::compare(dp_atoms[i].bonds[ii], dp_atoms[j].bonds[ii]);
554  if (cmp) return cmp;
555  }
556  if (dp_atoms[i].bonds.size() < dp_atoms[j].bonds.size()) {
557  return -1;
558  } else if (dp_atoms[i].bonds.size() > dp_atoms[j].bonds.size()) {
559  return 1;
560  }
561  }
562  return 0;
563  }
564 };
565 
566 /*
567  * Basic canonicalization function to organize the partitions which will be
568  * sorted next.
569  * */
570 
571 template <typename CompareFunc>
572 void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
573  int mode, int *order, int *count, int &activeset,
574  int *next, int *changed, char *touchedPartitions) {
575  unsigned int nAtoms = mol.getNumAtoms();
576  int partition;
577  int symclass = 0;
578  int *start;
579  int offset;
580  int index;
581  int len;
582  int i;
583  // std::vector<char> touchedPartitions(mol.getNumAtoms(),0);
584 
585  // std::cerr<<"&&&&&&&&&&&&&&&& RP"<<std::endl;
586  while (activeset != -1) {
587  // std::cerr<<"ITER: "<<activeset<<" next: "<<next[activeset]<<std::endl;
588  // std::cerr<<" next: ";
589  // for(unsigned int ii=0;ii<nAtoms;++ii){
590  // std::cerr<<ii<<":"<<next[ii]<<" ";
591  // }
592  // std::cerr<<std::endl;
593  // for(unsigned int ii=0;ii<nAtoms;++ii){
594  // std::cerr<<order[ii]<<" count: "<<count[order[ii]]<<" index:
595  // "<<atoms[order[ii]].index<<std::endl;
596  // }
597 
598  partition = activeset;
599  activeset = next[partition];
600  next[partition] = -2;
601 
602  len = count[partition];
603  offset = atoms[partition].index;
604  start = order + offset;
605  // std::cerr<<"\n\n**************************************************************"<<std::endl;
606  // std::cerr<<" sort - class:"<<atoms[partition].index<<" len: "<<len<<":";
607  // for(unsigned int ii=0;ii<len;++ii){
608  // std::cerr<<" "<<order[offset+ii]+1;
609  // }
610  // std::cerr<<std::endl;
611  // for(unsigned int ii=0;ii<nAtoms;++ii){
612  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
613  // "<<atoms[order[ii]].index<<std::endl;
614  // }
615  hanoisort(start, len, count, changed, compar);
616  // std::cerr<<"*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*"<<std::endl;
617  // std::cerr<<" result:";
618  // for(unsigned int ii=0;ii<nAtoms;++ii){
619  // std::cerr<<order[ii]+1<<" count: "<<count[order[ii]]<<" index:
620  // "<<atoms[order[ii]].index<<std::endl;
621  // }
622  for (int k = 0; k < len; ++k) {
623  changed[start[k]] = 0;
624  }
625 
626  index = start[0];
627  // std::cerr<<" len:"<<len<<" index:"<<index<<"
628  // count:"<<count[index]<<std::endl;
629  for (i = count[index]; i < len; i++) {
630  index = start[i];
631  if (count[index]) symclass = offset + i;
632  atoms[index].index = symclass;
633  // std::cerr<<" "<<index+1<<"("<<symclass<<")";
634  // if(mode && (activeset<0 || count[index]>count[activeset]) ){
635  // activeset=index;
636  //}
637  for (unsigned j = 0; j < atoms[index].degree; ++j) {
638  changed[atoms[index].nbrIds[j]] = 1;
639  }
640  }
641  // std::cerr<<std::endl;
642 
643  if (mode) {
644  index = start[0];
645  for (i = count[index]; i < len; i++) {
646  index = start[i];
647  for (unsigned j = 0; j < atoms[index].degree; ++j) {
648  unsigned int nbor = atoms[index].nbrIds[j];
649  touchedPartitions[atoms[nbor].index] = 1;
650  }
651  }
652  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
653  if (touchedPartitions[ii]) {
654  partition = order[ii];
655  if ((count[partition] > 1) && (next[partition] == -2)) {
656  next[partition] = activeset;
657  activeset = partition;
658  }
659  touchedPartitions[ii] = 0;
660  }
661  }
662  }
663  }
664 } // end of RefinePartitions()
665 
666 template <typename CompareFunc>
667 void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar,
668  int mode, int *order, int *count, int &activeset, int *next,
669  int *changed, char *touchedPartitions) {
670  unsigned int nAtoms = mol.getNumAtoms();
671  int partition;
672  int offset;
673  int index;
674  int len;
675  int oldPart = 0;
676 
677  for (unsigned int i = 0; i < nAtoms; i++) {
678  partition = order[i];
679  oldPart = atoms[partition].index;
680  while (count[partition] > 1) {
681  len = count[partition];
682  offset = atoms[partition].index + len - 1;
683  index = order[offset];
684  atoms[index].index = offset;
685  count[partition] = len - 1;
686  count[index] = 1;
687 
688  // test for ions, water molecules with no
689  if (atoms[index].degree < 1) {
690  continue;
691  }
692  for (unsigned j = 0; j < atoms[index].degree; ++j) {
693  unsigned int nbor = atoms[index].nbrIds[j];
694  touchedPartitions[atoms[nbor].index] = 1;
695  changed[nbor] = 1;
696  }
697 
698  for (unsigned int ii = 0; ii < nAtoms; ++ii) {
699  if (touchedPartitions[ii]) {
700  int npart = order[ii];
701  if ((count[npart] > 1) && (next[npart] == -2)) {
702  next[npart] = activeset;
703  activeset = npart;
704  }
705  touchedPartitions[ii] = 0;
706  }
707  }
708  RefinePartitions(mol, atoms, compar, mode, order, count, activeset, next,
709  changed, touchedPartitions);
710  }
711  // not sure if this works each time
712  if (atoms[partition].index != oldPart) {
713  i -= 1;
714  }
715  }
716 } // end of BreakTies()
717 
719  int *order, int *count,
720  canon_atom *atoms);
721 
722 RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order,
723  int *count, int &activeset,
724  int *next, int *changed);
725 
727  std::vector<unsigned int> &res,
728  bool breakTies = true,
729  bool includeChirality = true,
730  bool includeIsotopes = true);
731 
733  const ROMol &mol, std::vector<unsigned int> &res,
734  const boost::dynamic_bitset<> &atomsInPlay,
735  const boost::dynamic_bitset<> &bondsInPlay,
736  const std::vector<std::string> *atomSymbols,
737  const std::vector<std::string> *bondSymbols, bool breakTies,
738  bool includeChirality, bool includeIsotope);
739 
740 inline void rankFragmentAtoms(
741  const ROMol &mol, std::vector<unsigned int> &res,
742  const boost::dynamic_bitset<> &atomsInPlay,
743  const boost::dynamic_bitset<> &bondsInPlay,
744  const std::vector<std::string> *atomSymbols = nullptr,
745  bool breakTies = true, bool includeChirality = true,
746  bool includeIsotopes = true) {
747  rankFragmentAtoms(mol, res, atomsInPlay, bondsInPlay, atomSymbols, nullptr,
748  breakTies, includeChirality, includeIsotopes);
749 };
750 
752  std::vector<unsigned int> &res);
753 
755  std::vector<Canon::canon_atom> &atoms,
756  bool includeChirality = true);
757 
758 } // namespace Canon
759 } // namespace RDKit
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
Defines the primary molecule class ROMol as well as associated typedefs.
The class for representing atoms.
Definition: Atom.h:69
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:116
class for representing a bond
Definition: Bond.h:47
BondType
the type of Bond
Definition: Bond.h:56
@ UNSPECIFIED
Definition: Bond.h:57
BondStereo
the nature of the bond's stereochem (for cis/trans)
Definition: Bond.h:95
AtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:399
int operator()(int i, int j) const
Definition: new_canon.h:410
ChiralAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m)
Definition: new_canon.h:526
int operator()(int i, int j) const
Definition: new_canon.h:528
SpecialChiralityAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:144
SpecialSymmetryAtomCompareFunctor(Canon::canon_atom *atoms, const ROMol &m, const boost::dynamic_bitset<> *atomsInPlay=nullptr, const boost::dynamic_bitset<> *bondsInPlay=nullptr)
Definition: new_canon.h:199
std::vector< bondholder > bonds
Definition: new_canon.h:109
std::vector< int > revistedNeighbors
Definition: new_canon.h:108
std::vector< int > neighborNum
Definition: new_canon.h:107
unsigned int getNumAtoms() const
returns our number of atoms
Definition: ROMol.h:299
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:346
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborNumSwaps(canon_atom *atoms, std::vector< bondholder > &nbrs, unsigned int atomIdx, std::vector< std::pair< unsigned int, unsigned int >> &result)
RDKIT_GRAPHMOL_EXPORT void CreateSinglePartition(unsigned int nAtoms, int *order, int *count, canon_atom *atoms)
RDKIT_GRAPHMOL_EXPORT void initCanonAtoms(const ROMol &mol, std::vector< Canon::canon_atom > &atoms, bool includeChirality=true)
RDKIT_GRAPHMOL_EXPORT void ActivatePartitions(unsigned int nAtoms, int *order, int *count, int &activeset, int *next, int *changed)
const unsigned int ATNUM_CLASS_OFFSET
Definition: new_canon.h:455
void BreakTies(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:667
void RefinePartitions(const ROMol &mol, canon_atom *atoms, CompareFunc compar, int mode, int *order, int *count, int &activeset, int *next, int *changed, char *touchedPartitions)
Definition: new_canon.h:572
RDKIT_GRAPHMOL_EXPORT void rankFragmentAtoms(const ROMol &mol, std::vector< unsigned int > &res, const boost::dynamic_bitset<> &atomsInPlay, const boost::dynamic_bitset<> &bondsInPlay, const std::vector< std::string > *atomSymbols, const std::vector< std::string > *bondSymbols, bool breakTies, bool includeChirality, bool includeIsotope)
RDKIT_GRAPHMOL_EXPORT void chiralRankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res)
RDKIT_GRAPHMOL_EXPORT void updateAtomNeighborIndex(canon_atom *atoms, std::vector< bondholder > &nbrs)
RDKIT_GRAPHMOL_EXPORT void rankMolAtoms(const ROMol &mol, std::vector< unsigned int > &res, bool breakTies=true, bool includeChirality=true, bool includeIsotopes=true)
RDKIT_RDGENERAL_EXPORT const std::string _CIPCode
RDKIT_RDGENERAL_EXPORT const std::string molAtomMapNumber
Std stuff.
Definition: Abbreviations.h:17
void hanoisort(int *base, int nel, int *count, int *changed, CompareFunc compar)
Definition: hanoiSort.h:145
const std::string * p_symbol
Definition: new_canon.h:37
Bond::BondType bondType
Definition: new_canon.h:33
static bool greater(const bondholder &lhs, const bondholder &rhs)
Definition: new_canon.h:61
bool operator<(const bondholder &o) const
Definition: new_canon.h:49
bondholder(Bond::BondType bt, Bond::BondStereo bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:40
bondholder(Bond::BondType bt, unsigned int bs, unsigned int ni, unsigned int nsc)
Definition: new_canon.h:46
unsigned int bondStereo
Definition: new_canon.h:34
static int compare(const bondholder &x, const bondholder &y, unsigned int div=1)
Definition: new_canon.h:74
unsigned int nbrSymClass
Definition: new_canon.h:35