#!/usr/bin/env python ############################################# # Python wrapper for GaBP application in GraphLab # By Daniel Zerbino, based on Matlab code by Danny Bickson ############################################# import sys import os import tempfile import struct import numpy as np import subprocess ############################################# # Convenience binary writer/reader functions: ############################################# def writeDouble(F, x): F.write(struct.pack('<d', x)) def writeVector(F, vector): for X in vector: writeDouble(F, X) def writeInt(F, x): F.write(struct.pack('<i', x)) def readInt(F): return struct.unpack('<i', F.read(struct.calcsize('i')))[0] def readDouble(F): return struct.unpack('<d', F.read(struct.calcsize('d')))[0] def readVector(F, n): return [readDouble(F) for i in range(n)] ############################################# # Convenience MatLab like functions ############################################# def enumerate(M): for i in range(np.shape(M)[0]): for j in range(np.shape(M)[1]): # Beware of Matlab numbering!! yield i+1, j+1, M[i,j] def find(M): return filter(lambda X: X[2] != 0, enumerate(M)) ############################################# # script for exporting system of linear equations of the type Ax = y to # graphlab format. # Written by Danny Bickson, CMU # Input: fn - output file name # A - A mxn matrix (if A is square than m=n) # y - mx1 observation vector # x - nx1 known solution (optional, if not given will write a vector of zeros) # sigma - a vector m+nx1 of noise levels (optional, for non-square matrices only) # Conversion to Python by Daniel Zerbino, UCSC ############################################# def save_c_gl(fn, A, y, x=None, sigma_y=None, sigma_x=None, square=False): m, n = np.shape(A) if len(y) != m: sys.exit('y vector should be of len as the number of A rows (%i and %i resp.)' % (len(y), m)) if x is not None and len(x) != n: sys.exit('x vector length should be as the matrix A columns') if sigma_y is None and sigma_x is not None: sys.exit("sigma_y should be provided when entering sigma_x") if sigma_x is None and sigma_y is not None: sys.exit("sigma_x should be provided when entering sigma_y") if sigma_x is not None: if square: sys.exit('sigma noise level input is allowed only for non-square matrices') else: if len(sigma_x) != n: sys.exit('sigma_x length should be number of cols of A') if len(sigma_y) != m: sys.exit('sigma_y length should be number of rows of A') if square: # matrix is square, edges are non digonal entries vals = find((A - np.diag(np.diag(A)))) print "Saving a square matrix A" else: # matrix is not square, edges are non zero values vals = find(A) print "Saving a non-square matrix A" F = open(fn, 'wb') #write matrix size writeInt(F, m) writeInt(F, n) # write y (the observation), x (the solution, if known), diag(A) the # variance (if known, else default variance of 1) writeVector(F, y) if x is not None: writeVector(F, x) else: writeVector(F, (0 for x in range(n))) if square: writeVector(F, diag(A)) else: if sigma_y is not None: writeVector(F, sigma_y) writeVector(F, sigma_x) else: writeVector(F, (1 for x in range(m+n))) #write number of edges assert len(vals) > 0 writeInt(F, len(vals)) # pad with zeros for 64 bit offset writeInt(F, 0) if not square: offset = m else: offset = 0 #write all edges for val in vals: writeInt(F, val[0]) writeInt(F, val[1] + offset) writeDouble(F, val[2]) F.close() #verify written file header F = open(fn,'rb') x = readInt(F) assert x == m F.close() print 'Wrote succesfully into file: %s' % fn ######################################################## #script for reading the output of the GaBP GraphLab program into matlab # returns x = inv(A)*b as computed by GaBP # returns diag = diag(inv(A)) - an approximation to the main diagonal of # the inverse matrix of A. # Written by Danny Bickson, CMU # Conversion to Python by Daniel Zerbino, UCSC ######################################################## def load_c_gl(filename, columns): F = open(filename, 'rb') x = readVector(F, columns) diag = readVector(F, columns) F.close() os.remove(filename) return x, diag ######################################################## ## Wrapper Utility to be used from outside ######################################################## def runGaBP(convergence, A, y, sigma_y=None, x=None, sigma_x=None, square=False): file, input = tempfile.mkstemp(dir='.') save_c_gl(input, A, y, x=x, sigma_y=sigma_y, sigma_x=sigma_x, square=square) args = ['gabp', '--data', input, '--threshold', str(convergence), '--algorithm', '0', '--scheduler=round_robin', '--square'] if not square: args.append('false') else: args.append('true') print "Running " + " ".join(args) ret = subprocess.Popen(args, stdout=sys.stdout, stderr=subprocess.STDOUT).wait() if ret != 0: sys.exit("GaBP did not complete") os.remove(input) x2, diag = load_c_gl(input + ".out", len(x)) return x2, diag ######################################################### ## Unit test ######################################################### def main(): A = np.array([[0.2785, 0.9649],[0.5469, 0.1576],[0.9575, 0.9706]]) y = np.array([1.2434, 0.7045, 1.9281]) sigma_y= np.array([1e-10, 1e-10, 1e-10]) x = np.array([0, 0]) sigma_x = np.array([1, 1]) convergence = 1e-10 x2, diag = runGaBP(convergence, A, y, sigma_y=sigma_y, x=x, sigma_x=sigma_x) print 'A' print A print 'y' print y print 'Initial X' print x print 'Initial Error' print A.dot(x) - y print 'Final X' print x2 print 'Final Error' print A.dot(x2) - y print 'diag' print diag if __name__=='__main__': main()
Saturday, June 4, 2011
Python wrapper for running GraphLab GaBP linear solver
I got from Daniel Zerbino, a postdoc in UCSC, who is working on smoothing genomic sequencing measurements with constraints, a Python script for converting data to GraphLab GaBP format. Here it is:
No comments:
Post a Comment