{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Problem Set 11 Code" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%pylab inline\n", "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# The Framingham Risk Score Revisited" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Part a" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Importing medical data\n", "import scipy.io\n", "\n", "# LOADS IN THE MEDICAL DATA IN THE FORM OF A PYTHON DICTIONARY.\n", "# Data credit: CDC http://www.cdc.gov/nchs/nhanes.htm\n", "data = scipy.io.loadmat('CVDdata.mat')\n", "\n", "#UNPACKING DATA INTO COLUMN VECTORS\n", "AGE = data['AGE']\n", "TC = data['TC']\n", "HDL = data['HDL']\n", "SBP = data['SBP']\n", "DIA = data['DIABETIC']\n", "SMK = data['SMOKER']\n", "p = data['pNoisy']\n", "\n", "# Write expressions for b, A1, A2, A3, A4, A5, A6\n", "# It will help to use the identity log_n(z) = log(z)/log(n)\n", "\n", "b = \n", "\n", "A1 = \n", "A2 = \n", "A3 = \n", "A4 = \n", "A5 = \n", "A6 = " ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## Part b" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Write expressions for b and A\n", "# the function np.hstack will be helpful for constructing A\n", "b = \n", "A = " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Part c" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "# Write an expression for xhat\n", "\n", "xhat = \n", "\n", "print(\"The estimated values for x are xhat=\" + str(xhat.T))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Part d" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# the model estimate bhat, and the squared error e2\n", "bhat = \n", "e2 = \n", "\n", "print(\"The model's prediction of b is \" + str(bhat))\n", "print(\"The sum of squared errors is \" + str(e2))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Part e" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Linear plots, pick an index below (0,1,2,etc). This code will plot b and bhat vs Ai\n", "i = 3\n", "plt.plot(A[:,i],b,'ob')\n", "plt.plot(A[:,i],bhat,'or')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Part f" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Here are the values for the test plot\n", "age_test = 55\n", "tc_test_vector = np.linspace(100,350,(350-100+1))\n", "hdl_test = 25\n", "sbp_test = 220\n", "dia_test = 1\n", "smk_test = 1\n", "\n", "A2_test = np.zeros(tc_test_vector.size)\n", "b_test = np.zeros(tc_test_vector.size)\n", "\n", "for ind in range(tc_test_vector.size):\n", " tc_test = tc_test_vector[ind];\n", " # Use the values for age_test, tc_test, hdl_test, sbp_test, dia_test\n", " # and smk_test to calculate the next value for b_test (y axis value)\n", " # and A2_test (x_axis value)\n", " b_test[ind] = \n", " A2_test[ind] =\n", "\n", "plt.plot(A2_test,b_test,'-b')" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## Part g" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Perturb xhat from the solution above, store into x_perturbed and replot.\n", "\n", "# Use the following example expression with different pertrubations.\n", "x_perturbed = xhat+np.matrix([.1, 0.2, 0.2, -0.3, 0.1, 0.32]).T\n", "\n", "# What are the new estimated b values in terms of x_perturbed?\n", "b_perturbed = \n", "\n", "# Plot again\n", "plt.plot(A[:,i],b,'ob')\n", "plt.plot(A[:,i],b_perturbed,'or')\n", "\n", "# What is the new sum of squared errors (after perturbing)?\n", "e2_perturbed = \n", "\n", "print(\"The sum of squared errors after perturbing is \" + str(e2_perturbed))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Part h" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Nonlinear plots, pick an index below (0,1,2,etc). This code will plot b and bhat vs Ai\n", "i = 3\n", "\n", "# Write an expression for estimated p values here\n", "p_estimated = \n", "\n", "plt.plot(np.exp(A[:,i]),p,'ob')\n", "plt.plot(np.exp(A[:,i]),p_estimated,'or')" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## Part i" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "# transform b_test into p_test\n", "p_test =\n", "\n", "plt.plot(tc_test_vector,p_test,'-b')" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Finding Signals in Noise" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Run this first\n", "%matplotlib inline \n", "import numpy as np\n", "import scipy as sp\n", "import scipy.linalg as la\n", "import pylab as plt\n", "import numpy.random\n", "\n", "N = 1000\n", "\n", "def rand_vector(n): # returns a random {+1, -1} vector of length n\n", " return np.random.randint(2, size=n)*2 - 1.0\n", "\n", "def rand_normed_vector(n): # returns a random normalized vector of length n\n", " x = rand_vector(n)\n", " return x / la.norm(x)\n", "\n", "def cross_corr(f, g):\n", " # returns the cross-correlation (a vector of all the inner-products of 'g' with shifted versions of 'f')\n", " C = la.circulant(f)\n", " corr = C.T.dot(g)\n", " return corr" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# (a)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# generate a random normalized vector for s1\n", "# (running this cell again will generate a new random vector)\n", "s1 = rand_normed_vector(N)\n", "\n", "# compute all the inner-products of s1 with shifted versions of s1\n", "# (ie, the cross-correlation of s1 with s1)\n", "corr = cross_corr(s1, s1)\n", "\n", "# The inner-prouct is:\n", "print(corr[1])\n", "\n", "# np.roll circularly shifts the signal\n", "# so the above inner-product could be computed as:\n", "print(np.dot(s1, np.roll(s1,1)))\n", "\n", "# Plot the autocorrelation:\n", "plt.title(\"Autocorrelation s1\")\n", "plt.plot(corr)\n", "\n", "x1,x2,y1,y2 = plt.axis()\n", "plt.axis([x1-50,x2+50,y1,y2])\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# (b)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "y = np.roll(s1, 10) # Received y = s1 shifted by 10\n", "\n", "# Compute the cross-correlation (all the inner-products of y with shifted versions of s1)\n", "corr = cross_corr(s1, y)\n", "\n", "# Plot\n", "plt.title(\"cross-correlation s1, y\")\n", "plt.plot(corr)\n", "\n", "x1,x2,y1,y2 = plt.axis()\n", "plt.axis([x1-50,x2+50,y1,y2])\n", "plt.show()\n", "\n", "# Find the index of maximum correlation (inner-product)\n", "print(np.argmax(corr))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# (c)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# generate a random normalized vector for s1,\n", "# and a random normalized vector for n\n", "# (running this cell again will generate new random vectors)\n", "s1 = rand_normed_vector(N)\n", "n = rand_normed_vector(N)\n", "\n", "print(np.abs(np.dot(s1, n)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# (d)\n", "This is the code from part (b), but with the received signal y additionally corrupted by noise" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "s1 = rand_normed_vector(N)\n", "n = rand_normed_vector(N)\n", "y = np.roll(s1, 10) + 0.1*n\n", "\n", "corr = cross_corr(s1, y)\n", "\n", "plt.title(\"cross-correlation s1, y\")\n", "plt.plot(corr)\n", "\n", "x1,x2,y1,y2 = plt.axis()\n", "plt.axis([x1-50,x2+50,y1,y2])\n", "plt.show()\n", "\n", "# Find the index of maximum correlation (inner-product)\n", "np.argmax(corr)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# (e)\n", "Copy the code provided for part (d), but modify appropriately so the noise is higher.\n", "You should generate two cross-correlation plots, one for each noise level in the question.\n", "(For example, you can just copy the code from part (d) twice.)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "## CODE HERE" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# (f)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "s1 = rand_normed_vector(N)\n", "s2 = rand_normed_vector(N)\n", "\n", "y = np.roll(s1, 10) + np.roll(s2, 100)\n", "\n", "# Compute cross-correlations:\n", "corr_s1_y = cross_corr(s1, y)\n", "corr_s2_y = cross_corr(s2, y)\n", "\n", "# Plot cross-correlations:\n", "plt.title(\"cross-correlation s1, y\")\n", "plt.plot(cross_corr(s1, y))\n", "x1,x2,y1,y2 = plt.axis()\n", "plt.axis([x1-50,x2+50,y1,y2])\n", "plt.show()\n", "\n", "plt.title(\"cross-correlation s2, y\")\n", "plt.plot(cross_corr(s2, y))\n", "x1,x2,y1,y2 = plt.axis()\n", "plt.axis([x1-50,x2+50,y1,y2])\n", "plt.show()\n", "\n", "j = np.argmax(corr_s1_y) # find the first signal delay (max index of correlation)\n", "k = np.argmax(corr_s2_y) # find the second signal delay\n", "print(j,k)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# (g)\n", "This is the same code as part (f), but with slight modification to how the received signal y generated.\n", "Run the below cell a few times, to test for different choices of random signals." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "s1 = rand_normed_vector(N)\n", "s2 = rand_normed_vector(N)\n", "\n", "y = np.roll(s1, 10) + 0.1*np.roll(s2, 100)\n", "\n", "# Compute cross-correlations:\n", "corr_s1_y = cross_corr(s1, y)\n", "corr_s2_y = cross_corr(s2, y)\n", "\n", "# Plot cross-correlations:\n", "plt.title(\"cross-correlation s1, y\")\n", "plt.plot(cross_corr(s1, y))\n", "x1,x2,y1,y2 = plt.axis()\n", "plt.axis([x1-50,x2+50,y1,y2])\n", "plt.show()\n", "\n", "plt.title(\"cross-correlation s2, y\")\n", "plt.plot(cross_corr(s2, y))\n", "x1,x2,y1,y2 = plt.axis()\n", "plt.axis([x1-50,x2+50,y1,y2])\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# (h)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "corr_s1_y = cross_corr(s1, y)\n", "j = np.argmax(corr_s1_y) # find the first signal delay\n", "print(j)\n", "\n", "# subtract out the contribution of the first signal\n", "y_prime = y - np.roll(s1, j)\n", "\n", "# correlate the residual against the second signal\n", "corr_s2_y = cross_corr(s2, y_prime)\n", "\n", "# Plot \n", "plt.title(\"cross-correlation s2, y'\")\n", "plt.plot(corr_s2_y)\n", "x1,x2,y1,y2 = plt.axis()\n", "plt.axis([x1-50,x2+50,y1,y2])\n", "plt.show()\n", "\n", "k = np.argmax(corr_s2_y) # find the second signal delay by looking at the index of max correlation\n", "print(k)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# (i)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "s1 = rand_normed_vector(N)\n", "s2 = rand_normed_vector(N)\n", "\n", "y = 0.7*np.roll(s1, 10) + 0.5*np.roll(s2, 100)\n", "\n", "corr_s1_y = cross_corr(s1, y)\n", "j = np.argmax(corr_s1_y) # find the first signal delay\n", "\n", "corr_s2_y = cross_corr(s2, y)\n", "k = np.argmax(corr_s2_y) # find the second signal delay\n", "\n", "print(j, k)\n", "\n", "# Once we have found the shifts, estimate the coefficients as inner-products:\n", "a1 = np.dot(y, np.roll(s1, j))\n", "a2 = np.dot(y, np.roll(s2, k))\n", "\n", "print(a1, a2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# (j)\n", "This is the same code as part (i), but with noise added to the received signal y." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "s1 = rand_normed_vector(N)\n", "s2 = rand_normed_vector(N)\n", "n = rand_normed_vector(N)\n", "\n", "y = 0.7*np.roll(s1, 10) + 0.5*np.roll(s2, 100) + 0.1*n\n", "\n", "corr_s1_y = cross_corr(s1, y)\n", "j = np.argmax(corr_s1_y) # find the first signal delay\n", "\n", "corr_s2_y = cross_corr(s2, y)\n", "k = np.argmax(corr_s2_y) # find the second signal delay\n", "\n", "print(j, k)\n", "\n", "# Once we have found the shifts, estimate the coefficients as inner-products:\n", "a1 = np.dot(y, np.roll(s1, j))\n", "a2 = np.dot(y, np.roll(s2, k))\n", "\n", "print(a1, a2)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# (k)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Given the shifts j, k, setup the matrix A and vector b.\n", "\n", "# Hint: use np.roll(...) to circularly shift vectors.\n", "# For example, \"np.roll(s1, j)\" shifts the vector s1 by j indices.\n", "# A has columns c1, c2 which you should FILL IN BELOW.\n", "c1 = # FILL IN\n", "c2 = # FILL IN\n", "A = np.array([c1, c2]).T\n", "\n", "b = # FILL IN\n", "\n", "# Solve to find the linear least-square solution of Ax ~ b (minimizing error ||Ax - b||)\n", "xhat = la.inv(A.T.dot(A)).dot(A.T).dot(b)\n", "print(xhat)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# (l)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Load the signal vectors from file.\n", "npzfile = np.load(\"signals.npz\")\n", "y, s1, s2, s3 = [npzfile[f] for f in ['y','s1','s2','s3']]" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "Try to find the delays and coefficients of the three signals s1, s2, s2, from your received signal y.\n", "Hint: Make use of the provided code in the previous parts. This should be possible by mostly copy/pasting code.\n", "In particular, remember:\n", "\n", "- \"np.roll(s1, 123)\" circularly shifts vector s1 by 123\n", "- \"np.argmax(corr)\" finds the index of the maximum entry in vector \"corr\".\n", "\n", "Once you have found candidate delays j, k, l, try running the following function. You should recognize the output." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Test your j,k,l by running this function:\n", "def test(j,k,l):\n", " return [chr(int(i)) for i in (np.array([j,k,l])/20 + 60)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "## TRY TO FIND THE SIGNALS HERE." ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Sparse imaging with OMP\n", "\n", "This example generates a sparse signal and tries to recover it using the orthogonal matching pursuit algorithm" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# imports\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from scipy import misc\n", "from IPython import display\n", "from simulator import *\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "measurements,A = simulate()\n", "\n", "# THE SETTINGS FOR THE IMAGE - PLEASE DO NOT CHANGE\n", "height = 91\n", "width = 120\n", "sparsity = 476\n", "numPixels = len(A[0])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# CHOOSE DIFFERENT MASKS TO PLOT\n", "chosenMaskToDisplay = 0\n", "\n", "M0 = A[chosenMaskToDisplay].reshape((height,width))\n", "plt.title('mask %d'%chosenMaskToDisplay)\n", "plt.imshow(M0, cmap=plt.cm.gray, interpolation='nearest');" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# measurements\n", "plt.title('measurements')\n", "plt.plot(measurements)\n", "plt.xlabel('measurement index')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# OMP algorithm\n", "# THERE ARE MISSING LINES THAT YOU NEED TO FILL\n", "def OMP(imDims, sparsity, measurements, A):\n", " r = measurements.copy()\n", " indices = []\n", " \n", " # threshold to check error. if error is below this value stop\n", " THRESHOLD = 0.1\n", " \n", " # for iterating to recover all signal\n", " i = 0\n", " \n", " while i < sparsity and np.linalg.norm(r) > THRESHOLD: \n", " # calculate the correlations\n", " print('%d - '%i,end=\"\",flush=True)\n", " corrs = A.T.dot(r)\n", "\n", " # Choose highest-correlated pixel location and add to collection\n", " # COMPLETE THE LINE BELOW\n", " best_index = np.argmax(np.abs(COMPLETE_HERE))\n", " indices.append(best_index)\n", "\n", " # Build the matrix made up of selected indices so far\n", " # COMPLETE THE LINE BELOW\n", " Atrunc = A[:,COMPLETE_HERE]\n", "\n", " # Find orthogonal projection of measurements to subspace\n", " # spanned by recovered codewords\n", " b = measurements\n", " # COMPLETE THE LINE BELOW\n", " xhat = np.linalg.lstsq(COMPLETE_HERE,COMPLETE_HERE)[0] \n", "\n", " # Find component orthogonal to subspace to use for next measurement\n", " # COMPLETE THE LINE BELOW\n", " r = b - Atrunc.dot(COMPLETE_HERE)\n", "\n", " # this is for viewing the recovery process\n", " if i%10 == 0 or i == sparsity-1 or np.linalg.norm(r) <= THRESHOLD:\n", " recovered_signal = np.zeros(numPixels)\n", " for j, x in zip(indices, xhat):\n", " recovered_signal[j] = x\n", " Ihat = recovered_signal.reshape(imDims)\n", " plt.title('estimated image')\n", " plt.imshow(Ihat, cmap=plt.cm.gray, interpolation='nearest') \n", " display.clear_output(wait=True)\n", " display.display(plt.gcf())\n", " \n", " i = i + 1\n", " \n", " display.clear_output(wait=True)\n", "\n", " # fill in the recovered signal\n", " recovered_signal = np.zeros(numPixels)\n", " for i, x in zip(indices, xhat):\n", " recovered_signal[i] = x\n", " \n", " return recovered_signal" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "rec = OMP((height,width),sparsity,measurements,A)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# OMP bonus part" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# the setting\n", "\n", "# file name for the sparse image\n", "fname = 'smiley.png'\n", "# number of measurements to be taken from the sparse image\n", "numMeasurements = 6500\n", "# the sparsity of the image\n", "sparsity = 400\n", "\n", "# read the image in black and white\n", "I = misc.imread(fname, flatten=1)\n", "# normalize the image to be between 0 and 1\n", "I = I/np.max(I)\n", "\n", "# shape of the image\n", "imageShape = I.shape\n", "# number of pixels in the image\n", "numPixels = I.size\n", "\n", "plt.title('input image')\n", "plt.imshow(I, cmap=plt.cm.gray, interpolation='nearest');" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# generate your image masks and the underlying measurement matrix\n", "Mask, A = randMasks(numMeasurements,numPixels)\n", "# vectorize your image\n", "full_signal = I.reshape((numPixels,1))\n", "# get the measurements\n", "measurements = np.dot(Mask,full_signal)\n", "# remove the mean from your measurements\n", "measurements = measurements - np.mean(measurements)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# measurements\n", "plt.title('measurements')\n", "plt.plot(measurements)\n", "plt.xlabel('measurement index')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "rec = OMP(imageShape,sparsity,measurements,A)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 0 }