Source code for mdfptools.Composer

from rdkit import Chem
from rdkit.Chem import AllChem
# from Per_Frame_Property_Extractor import *
from .Extractor import *

from numpy import mean, std, median
import functools

[docs]class MDFP(): """ A MDFP object contains a set of features for a molecule, obtaining from a simulation or a set of simulations. .. todo:: - method to give back the keys - store some metdadata? """
[docs] def __init__(self, mdfp_dict): """ Parameters ---------- fp_dict : dict Keys are each of the type features a given `Extractor` obtains, e.g. "2d_count" are the 2D topological features obtained from molecule SMILES, "intra_lj" are the intra-molecular LJ energies obtained from simulation. Values are the corresponding set of numerics, stored as lists. """ self.mdfp = mdfp_dict
[docs] def get_mdfp(self): """ Returns ---------- a list of floating values, i.e. the mdfp feature vector """ return functools.reduce(lambda a, b : a + b, self.mdfp.values())
def __str__(self): return str(self.mdfp)
[docs]class BaseComposer(): """ The BaseComposer class containing functions that can be used by different composers for different types of simulations """
[docs] @classmethod def run(cls, smiles ): """ Parameters ---------- smiles : str SMILES string of the solute molecule """ cls.smiles = smiles cls.fp = {} cls._get_relevant_properties() return MDFP(cls.fp)
[docs] @classmethod def _get_relevant_properties(cls): """ Where the set of features to be included in the final MDFP are defined """ cls.fp = {**cls.fp, **cls._get_2d_descriptors()}
[docs] @classmethod def _get_2d_descriptors(cls): """ Obtain those 2D topological features as described in the original publication. """ m = Chem.MolFromSmiles(cls.smiles, sanitize = True) if m is None: m = Chem.MolFromSmiles(cls.smiles, sanitize = False) m.UpdatePropertyCache(strict=False) Chem.GetSSSR(m) fp = [] fp.append(m.GetNumHeavyAtoms()) fp.append(AllChem.CalcNumRotatableBonds(m)) fp.append(len(m.GetSubstructMatches(Chem.MolFromSmarts('[#7]')))) # nitrogens fp.append(len(m.GetSubstructMatches(Chem.MolFromSmarts('[#8]')))) # oxygens fp.append(len(m.GetSubstructMatches(Chem.MolFromSmarts('[#9]')))) # fluorines fp.append(len(m.GetSubstructMatches(Chem.MolFromSmarts('[#15]')))) # phosphorous fp.append(len(m.GetSubstructMatches(Chem.MolFromSmarts('[#16]')))) # sulfurs fp.append(len(m.GetSubstructMatches(Chem.MolFromSmarts('[#17]')))) # chlorines fp.append(len(m.GetSubstructMatches(Chem.MolFromSmarts('[#35]')))) # bromines fp.append(len(m.GetSubstructMatches(Chem.MolFromSmarts('[#53]')))) # iodines return {"2d_counts" : fp}
[docs] @classmethod def _get_statistical_moments(cls, property_extractor, statistical_moments = [mean, std, median], **kwargs): """ Performs statistical weighting of the numerical properties (e.g. LJ and electrostatics energies) obtained from each frame of simulation. Parameters ---------- property_extractor : mdfptools.Extractor The particular type of Extractor methodclass used to obtain the various properties from simulation. statistical_moments : list The list of statistical weighting to be performed to each properties from all the frames. Default list of weighting are the mean, standard deviation and median. """ cls.statistical_moments = [i.__name__ for i in statistical_moments] fp = {} prop = property_extractor(**kwargs) for i in prop: fp[i] = [] for func in statistical_moments: fp[i].append(func(prop[i])) return fp
""" class TrialSolutionComposer(BaseComposer): def __init__(cls, smiles, mdtraj_obj, parmed_obj, **kwargs): cls.kwargs = {"mdtraj_obj" : mdtraj_obj , "parmed_obj" : parmed_obj} cls.kwargs = {**cls.kwargs , **kwargs} super(TrialSolutionComposer, cls).__init__(smiles) def _get_relevant_properties(cls): cls.fp = {**cls.fp, **cls._get_2d_descriptors()} cls.fp = {**cls.fp, **cls._get_statistical_moments(TrialSolutionExtractor.extract_energies, **cls.kwargs)} cls.fp = {**cls.fp, **cls._get_statistical_moments(WaterExtractor.extract_rgyr, **cls.kwargs)} cls.fp = {**cls.fp, **cls._get_statistical_moments(WaterExtractor.extract_sasa, **cls.kwargs)} del cls.kwargs """ # class MDFPComposer(BaseComposer):
[docs]class SolutionComposer(BaseComposer): """ Composer used to extract features from solution simulations, namely one copy of solute in water solvent. This generates fingerprint most akin to that from the original publication. """
[docs] @classmethod def run(cls, mdtraj_obj, parmed_obj, smiles = None, **kwargs): """ Parameters ----------- mdtraj_obj : mdtraj.trajectory The simulated trajectory parmed_obj : parmed.structure Parmed object of the fully parameterised simulated system. smiles : str SMILES string of the solute. If mdfptools.Parameteriser was used during parameterisation, then smiles is automatically obtained from the parmed_obj. """ cls.kwargs = {"mdtraj_obj" : mdtraj_obj , "parmed_obj" : parmed_obj} cls.kwargs = {**cls.kwargs , **kwargs} if smiles is None: if parmed_obj.title != '': #try to obtain it from `parmed_obj` smiles = parmed_obj.title else: raise ValueError("Input ParMed Object does not contain SMILES string, add SMILES as an additional variable") return super().run(smiles)
[docs] @classmethod def _get_relevant_properties(cls): """ Where the set of features to be included in the final MDFP are defined """ cls.fp = {**cls.fp, **cls._get_2d_descriptors()} cls.fp = {**cls.fp, **cls._get_statistical_moments(WaterExtractor.extract_energies, **cls.kwargs)} cls.fp = {**cls.fp, **cls._get_statistical_moments(WaterExtractor.extract_rgyr, **cls.kwargs)} cls.fp = {**cls.fp, **cls._get_statistical_moments(WaterExtractor.extract_sasa, **cls.kwargs)} del cls.kwargs
[docs]class LiquidComposer(BaseComposer): """ Composer used to extract features from liquid simulations, namely a box containing replicates of the same molecule. """ # def __init__(cls, smiles, mdtraj_obj, parmed_obj):
[docs] @classmethod def run(cls, mdtraj_obj, parmed_obj, smiles = None, **kwargs): """ Parameters ----------- mdtraj_obj : mdtraj.trajectory The simulated trajectory parmed_obj : parmed.structure Parmed object of the fully parameterised simulated system. smiles : str SMILES string of one copy of the solute. If mdfptools.Parameteriser was used during parameterisation, then smiles is automatically obtained from the parmed_obj. """ cls.kwargs = {"mdtraj_obj" : mdtraj_obj , "parmed_obj" : parmed_obj} cls.kwargs = {**cls.kwargs , **kwargs} if smiles is None: if parmed_obj.title != '': #try to obtain it from `parmed_obj` smiles = parmed_obj.title else: raise ValueError("Input ParMed Object does not contain SMILES string, add SMILES as an additional variable") return super().run(smiles)
[docs] @classmethod def _get_relevant_properties(cls): """ Where the set of features to be included in the final MDFP are defined """ cls.fp = {**cls.fp, **cls._get_2d_descriptors()} cls.fp = {**cls.fp, **cls._get_statistical_moments(LiquidExtractor.extract_energies, **cls.kwargs)} cls.fp = {**cls.fp, **cls._get_statistical_moments(LiquidExtractor.extract_rgyr, **cls.kwargs)} cls.fp = {**cls.fp, **cls._get_statistical_moments(LiquidExtractor.extract_sasa, **cls.kwargs)} cls.fp = {**cls.fp, **cls._get_statistical_moments(LiquidExtractor.extract_dipole_magnitude, **cls.kwargs)} del cls.kwargs
[docs]class SolutionLiquidComposer(BaseComposer): """ Composer used to extract features from pairs of solution and liquid simulations. """
[docs] @classmethod def __init__(cls, solv_mdtraj_obj, solv_parmed_obj, liq_mdtraj_obj, liq_parmed_obj, smiles = None, **kwargs): """ Parameters ----------- solv_mdtraj_obj : mdtraj.trajectory The simulated solution trajectory solv_parmed_obj : parmed.structure Parmed object of the fully parameterised simulated solution system. liq_mdtraj_obj : mdtraj.trajectory The simulated liquid trajectory liq_parmed_obj : parmed.structure Parmed object of the fully parameterised simulated liquid system. smiles : str SMILES string of one copy of the solute. If mdfptools.Parameteriser was used during parameterisation, then smiles is automatically obtained from the parmed_obj. """ cls.kwargs_solv = {"mdtraj_obj" : solv_mdtraj_obj , "parmed_obj" : solv_parmed_obj} cls.kwargs_liq = {"mdtraj_obj" : liq_mdtraj_obj , "parmed_obj" : liq_parmed_obj} cls.kwargs_liq = {**cls.kwargs_liq , **kwargs} cls.kwargs_solv = {**cls.kwargs_solv , **kwargs} if smiles is None: if parmed_obj.title != '': #try to obtain it from `parmed_obj` smiles = parmed_obj.title else: raise ValueError("Input ParMed Object does not contain SMILES string, add SMILES as an additional variable") return super().run(smiles)
[docs] @classmethod def _get_relevant_properties(cls): """ Where the set of features to be included in the final MDFP are defined """ cls.fp = {**cls.fp, **cls._get_2d_descriptors()} cls.fp = {**cls.fp, **cls._get_statistical_moments(WaterExtractor.extract_energies, **cls.kwargs_solv)} cls.fp = {**cls.fp, **cls._get_statistical_moments(WaterExtractor.extract_rgyr, **cls.kwargs_solv)} cls.fp = {**cls.fp, **cls._get_statistical_moments(WaterExtractor.extract_sasa, **cls.kwargs_solv)} cls.fp = {**cls.fp, **cls._get_statistical_moments(LiquidExtractor.extract_energies, **cls.kwargs_liq)} cls.fp = {**cls.fp, **cls._get_statistical_moments(LiquidExtractor.extract_rgyr, **cls.kwargs_liq)} cls.fp = {**cls.fp, **cls._get_statistical_moments(LiquidExtractor.extract_sasa, **cls.kwargs_liq)} cls.fp = {**cls.fp, **cls._get_statistical_moments(LiquidExtractor.extract_dipole_magnitude, **cls.kwargs_liq)} del cls.kwargs_liq, cls.kwargs_solv
""" parm_path = '/home/shuwang/Documents/Modelling/MDFP/Codes/vapour_pressure/crc_handbook/corrupted/RU18.1_8645.pickle' parm = pickle.load(open(parm_path,"rb")) traj = md.load('/home/shuwang/Documents/Modelling/MDFP/Codes/vapour_pressure/crc_handbook/corrupted/RU18.1_8645.h5')[:10] # print(Liquid_Extractor.extract_dipole_magnitude(traj, parm)) x = MDFPComposer("Cl-C1:C:C:C:C2:C:C:C:C:C:1:2", traj, parm) # print(x._get_statistical_moments(Base_Extractor.extract_rgyr, **{"mdtraj_obj" : traj})) # print(x._get_statistical_moments(Liquid_Extractor.extract_dipole_magnitude, **{"mdtraj_obj" : traj, "parmed_obj" : parm})) # print(x._get_statistical_moments(Base_Extractor.extract_sasa, **{"mdtraj_obj" : traj, "parmed_obj" : parm})) # print(x._get_statistical_moments(Liquid_Extractor.extract_energies, **{"mdtraj_obj" : traj, "parmed_obj" : parm , "platform" : "OpenCL"})) print(x.fp) print(x.__dict__) print(x.get_mdfp()) pickle.dump(x, open("/home/shuwang/tmp.pickle", "wb")) """