10 #ifndef RGROUP_DECOMP_DATA
11 #define RGROUP_DECOMP_DATA
29 std::vector<std::vector<RGroupMatch>>
matches;
40 :
params(std::move(inputParams)) {
47 :
params(std::move(inputParams)) {
48 for (
size_t i = 0; i < inputCores.size(); ++i) {
56 for (
auto &core :
cores) {
57 RWMol *alignCore = core.first ?
cores[0].core.get() :
nullptr;
59 "Could not prepare at least one core");
61 core.second.findIndicesWithRLabel();
63 core.second.labelledCore.reset(
new RWMol(*core.second.core));
85 for (
size_t mol_idx = 0; mol_idx <
permutation.size(); ++mol_idx) {
86 std::vector<RGroupMatch> keepVector;
98 std::vector<RGroupMatch> results;
101 "Best Permutation mol idx out of range");
103 "Selected match at permutation out of range");
107 if (removeAllHydrogenRGroups) {
114 std::map<int, std::set<int>> labelCores;
115 for (
auto &position : results) {
116 int core_idx = position.core_idx;
117 if (labelCores.find(core_idx) == labelCores.end()) {
118 auto core =
cores.find(core_idx);
119 if (core !=
cores.end()) {
120 for (
auto rlabels :
getRlabels(*core->second.core)) {
121 int rlabel = rlabels.first;
122 labelCores[rlabel].insert(core_idx);
128 for (
int label :
labels) {
130 for (
auto &position : results) {
131 R_DECOMP::const_iterator rgroup = position.rgroups.find(label);
132 bool labelHasCore = labelCores[label].find(position.core_idx) !=
133 labelCores[label].end();
134 if (labelHasCore && (rgroup == position.rgroups.end() ||
135 !rgroup->second->is_hydrogen)) {
142 for (
auto &position : results) {
143 position.rgroups.erase(label);
173 UsedLabels &used_labels,
const std::set<int> &indexLabels,
174 std::map<
int, std::vector<int>> extraAtomRLabels) {
182 std::map<int, Atom *> atoms =
getRlabels(core);
190 std::map<int, std::vector<int>> bondsToCore;
191 std::vector<std::pair<Atom *, Atom *>> atomsToAdd;
194 for (
auto rlabels : atoms) {
195 int userLabel = rlabels.first;
199 Atom *atom = rlabels.second;
200 mappings[userLabel] = userLabel;
201 used_labels.
add(userLabel);
206 auto *newAt =
new Atom(0);
208 atomsToAdd.emplace_back(atom, newAt);
213 for (
auto newLabel : indexLabels) {
214 auto atm = atoms.find(newLabel);
215 if (atm == atoms.end()) {
219 Atom *atom = atm->second;
222 auto mapping = mappings.find(newLabel);
223 if (mapping == mappings.end()) {
224 rlabel = used_labels.
next();
225 mappings[newLabel] = rlabel;
227 rlabel = mapping->second;
233 auto *newAt =
new Atom(0);
235 atomsToAdd.emplace_back(atom, newAt);
240 for (
auto &extraAtomRLabel : extraAtomRLabels) {
241 auto atm = atoms.find(extraAtomRLabel.first);
242 if (atm == atoms.end()) {
245 Atom *atom = atm->second;
247 for (
int &i : extraAtomRLabel.second) {
248 int rlabel = used_labels.
next();
253 "Multiple attachments to a dummy (or hydrogen) is weird.");
254 auto *newAt =
new Atom(0);
256 atomsToAdd.emplace_back(atom, newAt);
260 for (
auto &i : atomsToAdd) {
261 core.
addAtom(i.second,
false,
true);
277 std::vector<std::pair<Atom *, Atom *>> atomsToAdd;
284 const std::vector<int> &rlabels =
288 for (
int rlabel : rlabels) {
289 auto label = mappings.find(rlabel);
295 auto *newAt =
new Atom(0);
297 atomsToAdd.emplace_back(atom, newAt);
303 for (
auto &i : atomsToAdd) {
304 mol.
addAtom(i.second,
false,
true);
310 bool implicitOnly =
false;
311 bool updateExplicitCount =
false;
312 bool sanitize =
false;
327 std::set<int> indexLabels;
336 std::map<int, std::vector<int>> extraAtomRLabels;
338 for (
auto &it : best) {
339 for (
auto &rgroup : it.rgroups) {
340 if (rgroup.first >= 0) {
343 if (rgroup.first < 0) {
344 indexLabels.insert(rgroup.first);
347 std::map<int, int> rlabelsUsedInRGroup =
348 rgroup.second->getNumBondsToRlabels();
349 for (
auto &numBondsUsed : rlabelsUsedInRGroup) {
351 if (numBondsUsed.second > 1) {
352 extraAtomRLabels[numBondsUsed.first].resize(numBondsUsed.second -
364 for (
auto &core :
cores) {
365 core.second.labelledCore.reset(
new RWMol(*core.second.core));
367 indexLabels, extraAtomRLabels);
370 for (
auto &it : best) {
371 for (
auto &rgroup : it.rgroups) {
380 const std::vector<size_t> &tied_permutation,
381 std::vector<int> &heavy_counts) {
383 size_t num_added_rgroups = 0;
384 heavy_counts.resize(
labels.size(), 0);
385 for (
int label :
labels) {
386 int incr = (label > 0) ? -1 : 1;
387 bool incremented =
false;
388 for (
size_t m = 0; m < tied_permutation.size();
390 auto rg =
matches[m][tied_permutation[m]].rgroups.find(label);
391 if (rg !=
matches[m][tied_permutation[m]].rgroups.end() &&
392 !rg->second->is_hydrogen) {
393 if (label <= 0 && !incremented) {
397 heavy_counts[i] += incr;
402 return num_added_rgroups;
405 bool process(
bool pruneMatches,
bool finalize =
false) {
409 auto t0 = std::chrono::steady_clock::now();
413 std::vector<size_t> permutations;
416 std::back_inserter(permutations),
417 [](
const std::vector<RGroupMatch> &m) { return m.size(); });
418 permutation = std::vector<size_t>(permutations.size(), 0);
422 double best_score = 0;
423 std::vector<size_t> best_permutation =
permutation;
424 std::vector<std::vector<size_t>> ties;
428 std::cerr <<
"Processing" << std::endl;
434 while (iterator.
next()) {
439 std::cerr <<
"**************************************************"
444 if (fabs(newscore - best_score) <
447 }
else if (newscore > best_score) {
449 std::cerr <<
" ===> current best:" << newscore <<
">" << best_score
454 best_score = newscore;
460 if (ties.size() > 1) {
461 size_t min_perm_value = 0;
462 size_t smallest_added_rgroups =
labels.size();
463 for (
const auto &tied_permutation : ties) {
464 std::vector<int> heavy_counts;
465 std::vector<int> largest_heavy_counts(
labels.size(), 0);
466 size_t num_added_rgroups =
468 if (num_added_rgroups < smallest_added_rgroups) {
469 smallest_added_rgroups = num_added_rgroups;
470 best_permutation = tied_permutation;
471 }
else if (num_added_rgroups == smallest_added_rgroups) {
472 if (heavy_counts < largest_heavy_counts) {
473 largest_heavy_counts = heavy_counts;
474 best_permutation = tied_permutation;
475 }
else if (heavy_counts == largest_heavy_counts) {
476 size_t perm_value = iterator.
value();
477 if (perm_value < min_perm_value) {
478 min_perm_value = perm_value;
479 best_permutation = tied_permutation;
487 if (pruneMatches || finalize) {
#define CHECK_INVARIANT(expr, mess)
#define PRECONDITION(expr, mess)
The class for representing atoms.
void setIsotope(unsigned int what)
sets our isotope number
int getAtomicNum() const
returns our atomic number
void setAtomMapNum(int mapno, bool strict=true)
Set the atom map Number of the atom.
void getProp(const std::string &key, T &res) const
allows retrieval of a particular property value
bool hasProp(const std::string &key) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
void setProp(const std::string &key, T val, bool computed=false) const
sets a property value
std::set< int > labels_used
AtomIterator endAtoms()
get an AtomIterator pointing at the end of our Atoms
void updatePropertyCache(bool strict=true)
calculates any of our lazy properties
AtomIterator beginAtoms()
get an AtomIterator pointing at our first Atom
RWMol is a molecule class that is intended to be edited.
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
static std::string to_string(const Descriptor &desc)
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_RDGENERAL_EXPORT const std::string dummyLabel
std::map< int, Atom * > getRlabels(const RWMol &mol)
Get the RLabels,atom mapping for the current molecule.
RDKIT_GRAPHMOL_EXPORT void setAtomRLabel(Atom *atm, int rlabel)
Set the atom's MDL integer RLabel.
double score(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
bool checkForTimeout(const std::chrono::steady_clock::time_point &t0, double timeout, bool throwOnTimeout=true)
const std::string SIDECHAIN_RLABELS
const unsigned int EMPTY_CORE_LABEL
iterate through all possible permutations of the rgroups
std::vector< size_t > permutation
RCore is the core common to a series of molecules.
A single rgroup attached to a given core.
boost::shared_ptr< RWMol > combinedMol
size_t compute_num_added_rgroups(const std::vector< size_t > &tied_permutation, std::vector< int > &heavy_counts)
std::vector< std::vector< RGroupMatch > > matches
RGroupDecompData(const RWMol &inputCore, RGroupDecompositionParameters inputParams)
void relabelRGroup(RGroupData &rgroup, const std::map< int, int > &mappings)
std::vector< size_t > permutation
std::map< int, std::vector< int > > userLabels
RGroupDecompositionParameters params
std::map< std::string, int > newCores
std::map< int, RCore > cores
void setRlabel(Atom *atom, int rlabel)
void relabelCore(RWMol &core, std::map< int, int > &mappings, UsedLabels &used_labels, const std::set< int > &indexLabels, std::map< int, std::vector< int >> extraAtomRLabels)
std::vector< int > processedRlabels
std::map< int, int > finalRlabelMapping
RGroupDecompData(const std::vector< ROMOL_SPTR > &inputCores, RGroupDecompositionParameters inputParams)
std::vector< RGroupMatch > GetCurrentBestPermutation() const
bool process(bool pruneMatches, bool finalize=false)
bool removeAllHydrogenRGroups
double timeout
timeout in seconds. <=0 indicates no timeout
bool removeHydrogensPostMatch
unsigned int rgroupLabelling
bool prepareCore(RWMol &, const RWMol *alignCore)