Source code for mdfptools.utils

import sys
from rdkit import Chem
from rdkit.Chem import SaltRemover, AllChem, Draw

[docs]def get_data_filename(relative_path): #TODO put in utils """Get the full path to one of the reference files in testsystems. In the source distribution, these files are in ``mdfptools/data/``, but on installation, they're moved to somewhere in the user's python site-packages directory. Parameters ---------- name : str Name of the file to load (with respect to the repex folder). Returns --------- fn : str filename """ import os from pkg_resources import resource_filename fn = resource_filename('mdfptools', os.path.join('data', relative_path)) if not os.path.exists(fn): raise ValueError("Sorry! %s does not exist. If you just added it, you'll have to re-install" % fn) return fn
[docs]def canonical_smiles_from_smiles(smiles, sanitize = True): """ Apply canonicalisation with rdkit Parameters ------------ smiles : str sanitize : bool Wether to apply rdkit sanitisation, default yes. Returns --------- canonical_smiles : str Returns None if canonicalisation fails """ try: mol = Chem.MolFromSmiles(smiles, sanitize = sanitize) mol.UpdatePropertyCache() #mol = Chem.AddHs(mol) Chem.GetSSSR(mol) return Chem.MolToSmiles(mol,canonical=True, allHsExplicit=True, kekuleSmiles = False, allBondsExplicit = True, isomericSmiles = True) except: return None
[docs]def hashing(smiles): """ Converts a string to hexdecimal representation (length 32). Specifically, it is used in mdfptools to convert canonical smiles to hex so it can be used as filename when store to disk. Parameters ----------- smiles : str Returns ------------ hex_str : str Hexdecimal representation """ import hashlib hash_object = hashlib.md5(canonical_smiles_from_smiles(smiles).encode("utf-8")) return hash_object.hexdigest()
[docs]def screen_organic(smiles): """ Heuristic to determine if a input SMILES string is considered as only organic matter. Parameters ----------- smiles : str Returns ------------ is_organic : bool """ if smiles is None: return False remover = SaltRemover.SaltRemover() # SMARTS pattern for organic elements # H, B, C, N, O, F, P, S, Cl, Br, I patt = '[!$([#1,#5,#6,#7,#8,#9,#15,#16,#17,#35,#53])]' mpatt = Chem.MolFromSmarts(patt) m = Chem.MolFromSmiles(smiles, sanitize = True) if m is None: return False # remove salts res = remover.StripMol(m) if res is not None and res.GetNumAtoms() < m.GetNumAtoms(): return False # take only the largest fragment frags = AllChem.GetMolFrags(m, asMols=True) if len(frags) > 1: return False # nums = [(f.GetNumAtoms(), f) for f in frags] # nums.sort(reverse=True) # m = nums[0][1] # take only organic molecules if not m.HasSubstructMatch(mpatt): return True else: return False