版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u012325865/article/details/82082317
基于RDKit的骨架分析
代码实例:
# In[1]:
#!/usr/bin/env python3
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Scaffolds import MurckoScaffold
# In[2]:
drugbank_input = Chem.SDMolSupplier('drugbank.sdf')
drugbank = [m for m in drugbank_input if m]
# In[3]:
basic_structure = drugbank[222]
atomic_scaffold = MurckoScaffold.GetScaffoldForMol(basic_structure)
atomic_scaffold.Compute2DCoords()
graph_scaffold = MurckoScaffold.MakeScaffoldGeneric(atomic_scaffold)
Draw.MolsToGridImage([basic_structure, atomic_scaffold, graph_scaffold])
# In[4]:
drugbank_atomic_scaffolds = [MurckoScaffold.GetScaffoldForMol(mol) for mol in drugbank]
for i in drugbank_atomic_scaffolds:
i.Compute2DCoords()
# In[5]:
def genericize_scaffold(s):
try:
return MurckoScaffold.MakeScaffoldGeneric(s)
except ValueError:
return None
drugbank_grafh_scaffolds = [genericize_scaffold(s) for s in drugbank_atomic_scaffolds]
# In[6]:
len(drugbank), len(drugbank_atomic_scaffolds), len(drugbank_grafh_scaffolds), len([x for x in drugbank_grafh_scaffolds if x == None])
# In[7]:
Draw.MolsToGridImage([drugbank[111], drugbank_atomic_scaffolds[111], drugbank_grafh_scaffolds[111]])
# In[8]:
scaffold_smiles = [Chem.MolToSmiles(scaffold) for scaffold in drugbank_grafh_scaffolds if scaffold != None]
# In[9]:
len(scaffold_smiles), scaffold_smiles[111]
# In[10]:
import collections
counter=collections.Counter(scaffold_smiles)
# In[11]:
print(counter)
# In[12]:
most_freq = Chem.MolFromSmiles('C1CCCCC1')
second_freq = Chem.MolFromSmiles('C1CCC(CC2CCCCC2)CC1')
# In[13]:
Draw.MolsToGridImage([most_freq, second_freq])
需要实例中的数据集,联系博主!