import tempfile
# import contextlib
from simtk import unit #Unit handling for OpenMM
from simtk.openmm import *
from simtk.openmm.app import *
from simtk.openmm.app import PDBFile
# from openforcefield.utils.toolkits import RDKitToolkitWrapper, ToolkitRegistry
from openforcefield.topology import Molecule, Topology
from openforcefield.typing.engines.smirnoff import ForceField
import parmed
# from rdkit import Chem
import pickle
import shutil
import os
import numpy as np
# import mdfptools
# from mdfptools.utils import get_data_filename
# from .utils import get_data_filename
# from utils import get_data_filename
from .utils import get_data_filename
"""
TODOs:
- proper handling of tip3p water loading
- SMILES string is stored in the `title` field of parmed object
"""
##############################################################
[docs]class BaseParameteriser():
"""
.. warning :: The base class should not be used directly
"""
system_pmd = None
[docs] @classmethod
def via_openeye(cls):
"""
Abstract method
"""
raise NotImplementedError
[docs] @classmethod
def via_rdkit(cls):
"""
Abstract method
"""
raise NotImplementedError
[docs] @classmethod
def pmd_generator(cls):
"""
Abstract method
"""
raise NotImplementedError
[docs] @classmethod
def _rdkit_setter(cls, smiles, **kwargs):
"""
Prepares an rdkit molecule with 3D coordinates.
Parameters
------------
smiles : str
SMILES string of the solute moleulce
Returns
---------
mol : rdkit.Chem.Mol
"""
from rdkit import Chem
from rdkit.Chem import AllChem
cls.smiles = smiles
mol = Chem.MolFromSmiles(smiles, sanitize = False)
# mol = Chem.MolFromSmiles(smiles, sanitize = True)
mol.SetProp("_Name", cls.smiles)
mol.UpdatePropertyCache(strict = False)
mol = Chem.AddHs(mol)
Chem.GetSSSR(mol)
# params = AllChem.ETKDG()
# params.enforceChirality = True
AllChem.EmbedMolecule(mol, enforceChirality = True)
### TESTING
# ref = Chem.MolFromSmiles('C-C-[C@H](-C)-[C@@H]1-N-C(=O)-[C@H](-C(-C)-C)-N(-C)-C(=O)-[C@H](-C-c2:c:[nH]:c3:c:c:c:c:c:2:3)-N-C(=O)-C-N(-C)-C(=O)-[C@H](-[C@@H](-C)-C-C)-N(-C)-C(=O)-[C@H](-C(-C)-C)-N-C(=O)-C-N(-C)-C(=O)-[C@H](-[C@@H](-C)-C-C)-N(-C)-C(=O)-[C@H](-C(-C)-C)-N(-C)-C(=O)-C-N(-C)-C(=O)-[C@H](-C(-C)-C)-N(-C)-C(=O)-[C@H](-C(-C)-C)-N(-C)-C-1=O')
# mol = Chem.MolFromPDBFile('/home/shuwang/Documents/Modelling/CP/Datum/conformer_generator/OmphalotinA_MD/0-emin/emin_50.cnf.pdb', removeHs = False, sanitize = False)
# mol.SetProp("_Name", cls.smiles)
# mol = AllChem.AssignBondOrdersFromTemplate(ref, mol)
return mol
"""
@classmethod
def _rdkit_charger(cls, mol):
if not hasattr(cls, "charge_engine"):
raise ValueError("No Useable charge engine Exist")
# cls.mol = cls.charge_engine(mol)
# return cls.mol
return cls.charge_engine(mol)
"""
[docs] @classmethod
def _rdkit_parameteriser(cls, mol, **kwargs):
from rdkit import Chem
from openforcefield.utils.toolkits import RDKitToolkitWrapper, ToolkitRegistry
"""
Creates a parameterised system from rdkit molecule
Parameters
----------
mol : rdkit.Chem.Mol
"""
try:
forcefield = ForceField('test_forcefields/smirnoff99Frosst.offxml')
molecule = Molecule.from_rdkit(mol, allow_undefined_stereo = cls.allow_undefined_stereo)
if hasattr(cls, "_ddec_charger"):
molecule.partial_charges = unit.Quantity(np.array(cls._ddec_charger(mol, cls.rf)), unit.elementary_charge)
else:
from openforcefield.utils.toolkits import AmberToolsToolkitWrapper
molecule.compute_partial_charges_am1bcc(toolkit_registry = AmberToolsToolkitWrapper())
topology = Topology.from_molecules(molecule)
openmm_system = forcefield.create_openmm_system(topology, charge_from_molecules= [molecule])
ligand_pmd = parmed.openmm.topsystem.load_topology(topology.to_openmm(), openmm_system, molecule._conformers[0])
except Exception as e:
raise ValueError("Parameterisation Failed : {}".format(e)) #TODO
ligand_pmd.title = cls.smiles
for i in ligand_pmd.residues:
i.name = 'LIG'
tmp_dir = tempfile.mkdtemp()
# We need all molecules as both pdb files (as packmol input)
# and mdtraj.Trajectory for restoring bonds later.
pdb_filename = tempfile.mktemp(suffix=".pdb", dir=tmp_dir)
Chem.MolToPDBFile(mol, pdb_filename)
cls.pdb_filename = pdb_filename
cls.ligand_pmd = ligand_pmd
[docs] @classmethod
def load_ddec_models(cls, epsilon = 4, **kwargs):
"""
Charging molecule using machine learned charge instead of the default AM1-BCC method.
Requires first installing the mlddec(https://github.com/rinikerlab/mlddec) package. Parameters are availible for elements : {H,C,N,O,Cl,Br,F}.
Parameters
------------
epsilon : int
Dielectric constant to be used, polarity of the resulting molecule varies, possible values are {4,78}.
"""
try:
import mlddec
except ImportError:
raise ImportError('mlddec not properly installed')
cls.rf = mlddec.load_models(epsilon)
# cls.charge_engine = cls._ddec_charger
cls._ddec_charger = mlddec.get_charges
# @classmethod
# def _ddec_charger(cls, mol):
# try:
# import mlddec
# except ImportError:
# raise ImportError('mlddec not properly installed')
#
# return mlddec.get_charges(mol, cls.rf)
[docs] @classmethod
def unload_ddec_models(cls, **kwargs):
"""
Unload the machine-learned charge model, which takes over 1 GB of memory.
"""
del cls.rf
[docs] @classmethod
def _openeye_setter(cls, smiles, **kwargs):
"""
Prepares an openeye molecule with 3D coordinates.
Parameters
------------
smiles : str
SMILES string of the solute moleulce
Returns
---------
mol : oechem.OEMol
"""
from openeye import oechem # OpenEye Python toolkits
from openeye import oeomega # Omega toolkit
# from openeye import oequacpac #Charge toolkit
cls.smiles = smiles
cls.omega = oeomega.OEOmega()
# modified in accordance to recommendation on omega example script: https://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html
# reduced the default total number of confs from 800 to 100 to save execution time
eWindow = 15.0
cls.omega.SetEnergyWindow(eWindow)
if "openeye_maxconf" in kwargs and type(kwargs["openeye_maxconf"]) is int and kwargs["openeye_maxconf"] > 0 :
cls.omega.SetMaxConfs(kwargs["openeye_maxconf"])
else:
cls.omega.SetMaxConfs(100)
cls.omega.SetRMSThreshold(1.0)
cls.omega.SetIncludeInput(False)
cls.omega.SetStrictStereo(False) #Refuse to generate conformers if stereochemistry not provided
# cls.charge_engine = oequacpac.OEAM1BCCCharges()
mol = oechem.OEMol()
oechem.OESmilesToMol(mol, smiles)
oechem.OEAddExplicitHydrogens(mol)
oechem.OETriposAtomNames(mol)
# Generate conformers with Omega; keep only best conformer
status = cls.omega(mol)
if not status:
raise ValueError("Error generating conformers for %s." % (cls.smiles)) #TODO
return mol
# @classmethod
# def _openeye_charger(cls, mol):
# # Set to use a simple neutral pH model
# #oequacpac.OESetNeutralpHModel(mol) #FIXME input smiles should ensure neutral molecule
#
# # Generate conformers with Omega; keep only best conformer
# status = cls.omega(mol)
# if not status:
# raise ValueError("Error generating conformers for %s." % (cls.smiles)) #TODO
#
# # Assign AM1-BCC charges
# oequacpac.OEAssignCharges(mol, cls.charge_engine)
# return mol
[docs] @classmethod
def _openeye_parameteriser(cls, mol, **kwargs):
"""
Creates a parameterised system from openeye molecule
Parameters
----------
mol : oechem.OEMol
"""
try:
forcefield = ForceField('test_forcefields/smirnoff99Frosst.offxml')
molecule = Molecule.from_openeye(mol, allow_undefined_stereo = cls.allow_undefined_stereo)
from openforcefield.utils.toolkits import OpenEyeToolkitWrapper
molecule.compute_partial_charges_am1bcc(toolkit_registry = OpenEyeToolkitWrapper())
topology = Topology.from_molecules(molecule)
openmm_system = forcefield.create_openmm_system(topology, charge_from_molecules= [molecule])
ligand_pmd = parmed.openmm.topsystem.load_topology(topology.to_openmm(), openmm_system, molecule._conformers[0])
except Exception as e:
raise ValueError("Parameterisation Failed : {}".format(e)) #TODO
ligand_pmd.title = cls.smiles
for i in ligand_pmd.residues:
i.name = 'LIG'
tmp_dir = tempfile.mkdtemp()
# We need all molecules as both pdb files (as packmol input)
# and mdtraj.Trajectory for restoring bonds later.
pdb_filename = tempfile.mktemp(suffix=".pdb", dir=tmp_dir)
from openeye import oechem # OpenEye Python toolkits
oechem.OEWriteMolecule( oechem.oemolostream( pdb_filename ), mol)
cls.pdb_filename = pdb_filename
cls.ligand_pmd = ligand_pmd
[docs] @classmethod
def save(cls, file_name, file_path = "./", **kwargs):
"""
Save to file the parameterised system.
Parameters
------------
file_name : str
No file type postfix is necessary
file_path : str
Default to current directory
Returns
--------
path : str
The absolute path where the trajectory is written to.
"""
path = '{}/{}.pickle'.format(file_path, file_name)
pickle_out = open(path, "wb")
pickle.dump(cls.system_pmd , pickle_out)
pickle_out.close()
return os.path.abspath(path)
run = via_openeye
[docs]class LiquidParameteriser(BaseParameteriser):
"""
Parameterisation of liquid box, i.e. multiple replicates of the same molecule
"""
[docs] @classmethod
def via_openeye(cls, smiles, density, allow_undefined_stereo = False, num_lig = 100, **kwargs):
"""
Parameterisation perfromed via openeye toolkit.
Parameters
----------------
smiles : str
SMILES string of the molecule to be parametersied
density : simtk.unit
Density of liquid box
allow_undefined_stereo : bool
Flag passed to OpenForceField `Molecule` object during parameterisation. When set to False an error is returned if SMILES have no/ambiguous steroechemistry. Default to False here as a sanity check for user.
num_lig : int
Number of replicates of the molecule
Returns
------------------
system_pmd : parmed.structure
The parameterised system as parmed object
"""
cls.allow_undefined_stereo = allow_undefined_stereo
mol = cls._openeye_setter(smiles, **kwargs)
# mol = cls._openeye_charger(mol)
cls._openeye_parameteriser(mol, **kwargs)
return cls._via_helper(density, num_lig, **kwargs)
[docs] @classmethod
def via_rdkit(cls, smiles, density, allow_undefined_stereo = False, num_lig = 100, **kwargs):
#TODO !!!!!!!!!!!! approximating volue by density if not possible via rdkit at the moment.
"""
Parameterisation perfromed via rdkit.
Parameters
----------------
smiles : str
SMILES string of the molecule to be parametersied
density : simtk.unit
Density of liquid box
allow_undefined_stereo : bool
Flag passed to OpenForceField `Molecule` object during parameterisation. When set to False an error is returned if SMILES have no/ambiguous steroechemistry. Default to False here as a sanity check for user.
num_lig : int
Number of replicates of the molecule
Returns
------------------
system_pmd : parmed.structure
The parameterised system as parmed object
"""
cls.allow_undefined_stereo = allow_undefined_stereo
mol = cls._rdkit_setter(smiles, **kwargs)
# mol = cls._openeye_charger(mol)
cls._rdkit_parameteriser(mol, **kwargs)
return cls._via_helper(density, num_lig, **kwargs)
[docs] @classmethod
def _via_helper(cls, density, num_lig, **kwargs):
#TODO !!!!!!!!!!!! approximating volue by density if not possible via rdkit at the moment.
"""
Helper function for via_rdkit or via_openeye
Parameters
----------------
density : simtk.unit
Density of liquid box
num_lig : int
Number of replicates of the molecule
Returns
------------------
system_pmd : parmed.structure
The parameterised system as parmed object
"""
import mdtraj as md
from openmoltools import packmol
density = density.value_in_unit(unit.gram / unit.milliliter)
ligand_mdtraj = md.load(cls.pdb_filename)[0]
#box_size = packmol.approximate_volume_by_density([smiles], [num_lig], density=density, box_scaleup_factor=1.1, box_buffer=2.0)
box_size = packmol.approximate_volume_by_density([smiles], [num_lig], density=density, box_scaleup_factor=1.5, box_buffer=2.0)
packmol_out = packmol.pack_box([ligand_mdtraj], [num_lig], box_size = box_size)
cls.system_pmd = cls.ligand_pmd * num_lig
cls.system_pmd.positions = packmol_out.openmm_positions(0)
cls.system_pmd.box_vectors = packmol_out.openmm_boxes(0)
try:
shutil.rmtree(cls.pdb_filename)
del cls.ligand_pmd, cls.pdb_filename
except Exception as e:
print("Error due to : {}".format(e))
cls.system_pmd.title = cls.smiles
return cls.system_pmd
run = via_openeye
[docs]class SolutionParameteriser(BaseParameteriser):
"""
Parameterisation of solution box, i.e. one copy of solute molecule surronded by water.
Parameters
--------------
solvent_pmd : parmed.structure
Parameterised tip3p water as parmed object
"""
# forcefield = ForceField('tip3p.offxml')
# molecule = Molecule.from_file(get_data_filename("water.sdf"))
# print(molecule.partial_charges)
# topology = Topology.from_molecules(molecule)
# openmm_system = forcefield.create_openmm_system(topology, charge_from_molecules= [molecule])
# solvent_pmd = parmed.openmm.topsystem.load_topology(topology.to_openmm(), openmm_system, [[1,0,0],[0,1,0],[0,0,1]])
try:
solvent_pmd = parmed.load_file(get_data_filename("tip3p.prmtop")) #FIXME #TODO
except ValueError:
print("Water file cannot be located")
# default_padding = 1.25 #nm
[docs] @classmethod
def via_openeye(cls, smiles, allow_undefined_stereo = False, default_padding = 1.25*unit.nanometer, **kwargs):
"""
Parameterisation perfromed via openeye.
Parameters
--------------------
smiles : str
SMILES string of the solute molecule
allow_undefined_stereo : bool
Flag passed to OpenForceField `Molecule` object during parameterisation. When set to False an error is returned if SMILES have no/ambiguous steroechemistry. Default to False here as a sanity check for user.
default_padding : simtk.unit
Dictates amount of water surronding the solute. Default is 1.25 nanometers
Returns
------------------
system_pmd : parmed.structure
The parameterised system as parmed object
"""
#TODO currently only supports one solute molecule
cls.allow_undefined_stereo = allow_undefined_stereo
mol = cls._openeye_setter(smiles, **kwargs)
# mol = cls._openeye_charger(mol)
cls._openeye_parameteriser(mol, **kwargs)
cls.default_padding = default_padding.value_in_unit(unit.nanometer)
return cls._via_helper(**kwargs)
[docs] @classmethod
def via_rdkit(cls, smiles, allow_undefined_stereo = False, default_padding = 1.25*unit.nanometer, **kwargs):
"""
Parameterisation perfromed via openeye.
Parameters
--------------------
smiles : str
SMILES string of the solute molecule
allow_undefined_stereo : bool
Flag passed to OpenForceField `Molecule` object during parameterisation. When set to False an error is returned if SMILES have no/ambiguous steroechemistry. Default to False here as a sanity check for user.
default_padding : simtk.unit
Dictates amount of water surronding the solute. Default is 1.25 nanometers
Returns
------------------
system_pmd : parmed.structure
The parameterised system as parmed object
"""
cls.allow_undefined_stereo = allow_undefined_stereo
mol = cls._rdkit_setter(smiles, **kwargs)
# mol = cls._rdkit_charger(mol)
cls._rdkit_parameteriser(mol, **kwargs)
cls.default_padding = default_padding.value_in_unit(unit.nanometer)
return cls._via_helper(**kwargs)
[docs] @classmethod
def _via_helper(cls, **kwargs):
"""
Helper function for via_rdkit or via_openeye
Returns
------------------
system_pmd : parmed.structure
The parameterised system as parmed object
"""
from pdbfixer import PDBFixer # for solvating
fixer = PDBFixer(cls.pdb_filename)
if "padding" not in kwargs:
fixer.addSolvent(padding = cls.default_padding)
else:
fixer.addSolvent(padding = float(kwargs["padding"]))
tmp_dir = tempfile.mkdtemp()
cls.pdb_filename = tempfile.mktemp(suffix=".pdb", dir=tmp_dir)
with open(cls.pdb_filename, "w") as f:
PDBFile.writeFile(fixer.topology, fixer.positions, f)
complex = parmed.load_file(cls.pdb_filename)
solvent = complex["(:HOH)"]
num_solvent = len(solvent.residues)
solvent_pmd = cls.solvent_pmd * num_solvent
solvent_pmd.positions = solvent.positions
cls.system_pmd = cls.ligand_pmd + solvent_pmd
cls.system_pmd.box_vectors = complex.box_vectors
try:
shutil.rmtree(cls.pdb_filename)
del cls.ligand_pmd, cls.pdb_filename
except:
pass
cls.system_pmd.title = cls.smiles
return cls.system_pmd
run = via_openeye
[docs]class VaccumParameteriser(BaseParameteriser):
[docs] @classmethod
def via_openeye(cls, smiles, allow_undefined_stereo = False, **kwargs):
"""
Parameterisation perfromed via openeye toolkit.
Parameters
----------------
smiles : str
SMILES string of the molecule to be parametersied
allow_undefined_stereo : bool
Flag passed to OpenForceField `Molecule` object during parameterisation. When set to False an error is returned if SMILES have no/ambiguous steroechemistry. Default to False here as a sanity check for user.
Returns
------------------
system_pmd : parmed.structure
The parameterised system as parmed object
"""
cls.allow_undefined_stereo = allow_undefined_stereo
mol = cls._openeye_setter(smiles, **kwargs)
# mol = cls._openeye_charger(mol)
cls._openeye_parameteriser(mol, **kwargs)
cls.system_pmd = cls.ligand_pmd
cls.system_pmd.title = cls.smiles
return cls.system_pmd
[docs] @classmethod
def via_rdkit(cls, smiles, allow_undefined_stereo = False, **kwargs):
"""
Parameterisation perfromed via rdkit toolkit.
Parameters
----------------
smiles : str
SMILES string of the molecule to be parametersied
allow_undefined_stereo : bool
Flag passed to OpenForceField `Molecule` object during parameterisation. When set to False an error is returned if SMILES have no/ambiguous steroechemistry. Default to False here as a sanity check for user.
Returns
------------------
system_pmd : parmed.structure
The parameterised system as parmed object
"""
cls.allow_undefined_stereo = allow_undefined_stereo
mol = cls._rdkit_setter(smiles, **kwargs)
# mol = cls._rdkit_charger(mol)
cls._rdkit_parameteriser(mol, **kwargs)
cls.system_pmd = cls.ligand_pmd
cls.system_pmd.title = cls.smiles
return cls.system_pmd
run = via_openeye
"""
from mdfptools.Parameteriser import *
print(SolutionParameteriser().run("CC"))
SolutionParameteriser.load_ddec_models()
print(SolutionParameteriser().via_rdkit("CC"))
SolutionParameteriser.unload_ddec_models()
print(LiquidParameteriser().run("CC", density = 12 * unit.gram / unit.liter))
"""