## Saturday, June 4, 2011

### Python wrapper for running GraphLab GaBP linear solver

I got from Daniel Zerbino, a postdoc in UCSC, who is working on smoothing genomic sequencing measurements with constraints, a Python script for converting data to GraphLab GaBP format. Here it is:

```#!/usr/bin/env python

#############################################
# Python wrapper for GaBP application in GraphLab
# By Daniel Zerbino, based on Matlab code by Danny Bickson
#############################################

import sys
import os
import tempfile
import struct
import numpy as np
import subprocess

#############################################
#############################################

def writeDouble(F, x):
F.write(struct.pack('<d', x))

def writeVector(F, vector):
for X in vector:
writeDouble(F, X)

def writeInt(F, x):
F.write(struct.pack('<i', x))

return [readDouble(F) for i in range(n)]

#############################################
# Convenience MatLab like functions
#############################################

def enumerate(M):
for i in range(np.shape(M)[0]):
for j in range(np.shape(M)[1]):
# Beware of Matlab numbering!!
yield i+1, j+1, M[i,j]

def find(M):
return filter(lambda X: X[2] != 0, enumerate(M))

#############################################
# script for exporting system of linear equations of the type Ax = y to
# graphlab format.
# Written by Danny Bickson, CMU
# Input: fn - output file name
# A - A mxn matrix (if A is square than m=n)
# y - mx1 observation vector
# x - nx1 known solution (optional, if not given will write a vector of zeros)
# sigma - a vector m+nx1 of noise levels (optional, for non-square matrices only)
# Conversion to Python by Daniel Zerbino, UCSC
#############################################

def save_c_gl(fn, A, y, x=None, sigma_y=None, sigma_x=None, square=False):
m, n = np.shape(A)
if len(y) != m:
sys.exit('y vector should be of len as the number of A rows (%i and %i resp.)' % (len(y), m))

if x is not None and len(x) != n:
sys.exit('x vector length should be as the matrix A columns')

if sigma_y is None and sigma_x is not None:
sys.exit("sigma_y should be provided when entering sigma_x")

if sigma_x is None and sigma_y is not None:
sys.exit("sigma_x should be provided when entering sigma_y")

if sigma_x is not None:
if square:
sys.exit('sigma noise level input is allowed only for non-square matrices')
else:
if len(sigma_x) != n:
sys.exit('sigma_x length should be number of cols of A')
if len(sigma_y) != m:
sys.exit('sigma_y length should be number of rows of A')

if square:
# matrix is square, edges are non digonal entries
vals = find((A - np.diag(np.diag(A))))
print "Saving a square matrix A"
else:
# matrix is not square, edges are non zero values
vals = find(A)
print "Saving a non-square matrix A"

F = open(fn, 'wb')

#write matrix size
writeInt(F, m)
writeInt(F, n)

# write y (the observation), x (the solution, if known), diag(A) the
# variance (if known, else default variance of 1)
writeVector(F, y)
if x is not None:
writeVector(F, x)
else:
writeVector(F, (0 for x in range(n)))

if square:
writeVector(F, diag(A))
else:
if sigma_y is not None:
writeVector(F, sigma_y)
writeVector(F, sigma_x)
else:
writeVector(F, (1 for x in range(m+n)))

#write number of edges
assert len(vals) > 0
writeInt(F, len(vals))
# pad with zeros for 64 bit offset
writeInt(F, 0)

if not square:
offset = m
else:
offset = 0

#write all edges
for val in vals:
writeInt(F, val[0])
writeInt(F, val[1] + offset)
writeDouble(F, val[2])

F.close()

F = open(fn,'rb')
assert x == m
F.close()

print 'Wrote succesfully into file: %s' % fn

########################################################
#script for reading the output of the GaBP GraphLab program into matlab
# returns x = inv(A)*b as computed by GaBP
# returns diag = diag(inv(A)) - an approximation to the main diagonal of
# the inverse matrix of A.
# Written by Danny Bickson, CMU
# Conversion to Python by Daniel Zerbino, UCSC
########################################################

F = open(filename, 'rb')

F.close()
os.remove(filename)
return x, diag

########################################################
## Wrapper Utility to be used from outside
########################################################

def runGaBP(convergence, A, y, sigma_y=None, x=None, sigma_x=None, square=False):
file, input = tempfile.mkstemp(dir='.')

save_c_gl(input, A, y, x=x, sigma_y=sigma_y, sigma_x=sigma_x, square=square)

args = ['gabp', '--data', input, '--threshold', str(convergence), '--algorithm', '0', '--scheduler=round_robin', '--square']
if not square:
args.append('false')
else:
args.append('true')
print "Running " + " ".join(args)
ret = subprocess.Popen(args, stdout=sys.stdout, stderr=subprocess.STDOUT).wait()

if ret != 0:
sys.exit("GaBP did not complete")

os.remove(input)
x2, diag = load_c_gl(input + ".out", len(x))
return x2, diag

#########################################################
## Unit test
#########################################################
def main():
A = np.array([[0.2785, 0.9649],[0.5469, 0.1576],[0.9575, 0.9706]])
y = np.array([1.2434, 0.7045, 1.9281])
sigma_y= np.array([1e-10, 1e-10, 1e-10])
x = np.array([0, 0])
sigma_x = np.array([1, 1])
convergence = 1e-10
x2, diag = runGaBP(convergence, A, y, sigma_y=sigma_y, x=x, sigma_x=sigma_x)

print 'A'
print A
print 'y'
print y
print 'Initial X'
print x
print 'Initial Error'
print A.dot(x) - y
print 'Final X'
print x2
print 'Final Error'
print A.dot(x2) - y
print 'diag'
print diag

if __name__=='__main__':
main()

```