Tuesday, December 16, 2008

OBDotNet Enumerator Workaround

As some of you may have noticed, in the initial release of OBDotNet the enumerator types are not mapped correctly. The proxy classes are generated but the parameter typemaps have a problem and no operators are overloaded. As a temporary fix, this gist contains some extension methods for enumerating atoms and bonds.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using OpenBabel;
namespace OBDotNetExamples
{
public static class OBDotNetExtensions
{
public static IEnumerable<OBAtom> Neighbors(this OBAtom atom)
{
return Bonds(atom).Select<OBBond, OBAtom>((bond) => bond.GetNbrAtom(atom));
}
public static bool Contains(this OBBond bond, OBAtom atom)
{
uint idx = atom.GetIdx();
if (bond.GetBeginAtomIdx() == idx || bond.GetEndAtomIdx() == idx)
return true;
return false;
}
public static IEnumerable<OBBond> Bonds(this OBAtom atom)
{
OBBond bond;
int bondsFound = 0;
for(int i = 0; i < atom.GetParent().NumBonds(); i++)
{
bond = atom.GetParent().GetBond(i);
if (bond.Contains(atom))
{
bondsFound++;
yield return bond;
}
else if (bondsFound == atom.GetValence())
yield break;
}
}
public static IEnumerable<OBAtom> Atoms(this OBMol mol)
{
uint numAtoms = mol.NumAtoms();
if(numAtoms == 0)
yield return null;
for(int i = 1; i <= numAtoms; i++)
yield return mol.GetAtom(i);
}
public static IEnumerable<OBAtom> AtomsDFS(this OBMol mol)
{
return AtomsDFS(mol,1);
}
public static IEnumerable<OBAtom> AtomsDFS(this OBMol mol, int startingIndex)
{
if (startingIndex < 1)
throw new ArgumentException("Atom indices must be greater than 0");
if (startingIndex > mol.NumAtoms())
throw new ArgumentOutOfRangeException("Index " + startingIndex + " is invalid. Structure contains " + mol.NumAtoms() + " atoms");
bool[] visited = new bool[mol.NumAtoms()];
visited[startingIndex - 1] = true;
OBAtom current = mol.GetAtom(startingIndex);
yield return current;
Stack<OBAtom> path = new Stack<OBAtom>();
foreach (OBAtom neighbor in current.Neighbors().Where(a => !visited[a.GetIdx() - 1]))
{
visited[neighbor.GetIdx() - 1] = true;
path.Push(neighbor);
}
while (path.Count != 0)
{
current = path.Pop();
yield return current;
foreach (OBAtom neighbor in current.Neighbors().Where(a => !visited[a.GetIdx() - 1]))
{
visited[neighbor.GetIdx() - 1] = true;
path.Push(neighbor);
}
}
}
public static IEnumerable<AtomDepthPair> AtomsBFSWDepth(this OBMol mol, int startingIndex)
{
if (startingIndex < 1)
throw new ArgumentException("Atom indices must be greater than 0");
if (startingIndex > mol.NumAtoms())
throw new ArgumentOutOfRangeException("Index " + startingIndex + " is invalid. Structure contains " + mol.NumAtoms() + " atoms");
bool[] visited = new bool[mol.NumAtoms()];
visited[startingIndex - 1] = true;
OBAtom first = mol.GetAtom(startingIndex);
yield return new AtomDepthPair(first,1);
Queue<AtomDepthPair> queue = new Queue<AtomDepthPair>();
int depth = 2;
foreach(OBAtom a in first.Neighbors().Where(a=>!visited[a.GetIdx()-1]))
{
queue.Enqueue(new AtomDepthPair(a, depth));
}
AtomDepthPair current;
while (queue.Count != 0)
{
current = queue.Dequeue();
visited[current.Atom.GetIdx() - 1] = true;
yield return current;
foreach(OBAtom neighbor in current.Atom.Neighbors().Where(a=>!visited[a.GetIdx()-1]))
queue.Enqueue(new AtomDepthPair(neighbor,current.Depth+1));
}
}
public static IEnumerable<OBAtom> AtomsBFS(this OBMol mol)
{
foreach(AtomDepthPair pair in AtomsBFSWDepth(mol, 1))
{
yield return pair.Atom;
}
}
public static IEnumerable<OBBond> Bonds(this OBMol mol)
{
for (int i = 0; i < mol.NumBonds(); i++)
yield return mol.GetBond(i);
}
}
public struct AtomDepthPair
{
public int Depth;
public OBAtom Atom;
public AtomDepthPair(OBAtom atom, int depth)
{
Atom = atom;
Depth = depth;
}
}
}


I threw this together kind of quickly and only did a little testing, so please email me if you see any bugs.

To demonstrate using these enumerators, here is a simple C# version of Noel's python script that calculated a circular fingerprint using the OBMolAtomBFSIter

using System;
using System.Text;
using System.Collections.Generic;
using System.Linq;
using OpenBabel;
namespace OBDotNetExamples
{
class Program1
{
static void Main(string[] args)
{
//Set up a translator to Sybyl atom types
OBTypeTable ttab = new OBTypeTable();
ttab.SetFromType("INT");
ttab.SetToType("SYB");
OBConversion obc = new OBConversion();
obc.SetInFormat("smi");
OBMol mol = new OBMol();
obc.ReadString(mol, "c1(N)ccccc1c(=O)[O-]");
int maxDepth = 3;
string atomType;
List<string> fp = new List<string>();
List<string> fpBlock;
for(int i = 1; i <= mol.NumAtoms(); i++)
{
fpBlock = new List<string>();
foreach(var pair in mol.AtomsBFSWDepth(i))
{
if(pair.Depth > maxDepth)
break;
atomType = ttab.Translate(pair.Atom.GetAtomType());
fpBlock.Add(string.Format("{0}-{1}",pair.Depth-1,atomType));
}
fpBlock.Sort();
fp.Add(string.Join(";", fpBlock.ToArray()));
}
Console.WriteLine(string.Join("\t", fp.ToArray()));
}
/* output:
0-C.ar;1-C.ar;1-C.ar;1-N.pl3;2-C.2;2-C.ar;2-C.ar 0-N.pl3;1-C.ar;2-C.ar;2-C.ar 0-
C.ar;1-C.ar;1-C.ar;2-C.ar;2-C.ar;2-N.pl3 0-C.ar;1-C.ar;1-C.ar;2-C.ar;2-C.ar 0-
C.ar;1-C.ar;1-C.ar;2-C.ar;2-C.ar 0-C.ar;1-C.ar;1-C.ar;2-C.2;2-C.ar;2-C.ar 0-
C.ar;1-C.2;1-C.ar;1-C.ar;2-C.ar;2-C.ar;2-N.pl3;2-O.co2;2-O.co2 0-C.2;1-C.ar;1-O.co2;1-O.c
o2;2-C.ar;2-C.ar 0-O.co2;1-C.2;2-C.ar;2-O.co2 0-O.co2;1-C.2;2-C.ar;2-O.co2
*/
}
}




Addendum: If your application is targeting v2.0 of the framework this MSDN article contains instructions on how to add support for extension methods. The relevant section is about 90% of the way down under the heading:

Extension Methods in .NET Framework 2.0 Apps

2 comments:

Noel O'Boyle said...

Could you take the same approach as I did with Python to create wrapper for the various Iterators?

mesprague said...

I plan to. This is just a quick fix for people who want enumerators right now. That and I've gone extension method crazy since C# 3.5 came out.