Vectorized Code

Always prefer builtin functions and operators to for loops. They are much more efficient.

In [1]:
% matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

theta0 = 1
theta1 = 2

n = 20

x = np.random.rand(n)
y = theta0 + theta1*x + 0.1*np.random.randn(n)

plt.scatter(x,y)
Out[1]:
<matplotlib.collections.PathCollection at 0x10c6e1c90>
In [2]:
# Compute the cost
pred = theta0 + theta1*x

residual = y - pred
cost = np.sum(residual**2)

cost2 = np.dot(residual, residual)

print cost
print cost2
0.261029809146
0.261029809146
In [3]:
print np.sum( (theta0 + theta1*x - y)**2 ) # one liner
0.261029809146

Takeaway: do not use for loops if possible

  • There are many functions / operators available for elementwise and summary operations on arrays. Find them and use them!

  • Sometimes you can use linear algebra to compute what you want!

In [4]:
# Example
A = np.array([[1, 2, 3], [4, 5, 6], [7,8, 9]])

print A
print np.sum(A)
[[1 2 3]
 [4 5 6]
 [7 8 9]]
45
In [5]:
print A
print np.sum(A, axis=0)
[[1 2 3]
 [4 5 6]
 [7 8 9]]
[12 15 18]
In [6]:
print A
print np.sum(A, axis=1)
[[1 2 3]
 [4 5 6]
 [7 8 9]]
[ 6 15 24]

Array slicing

In [7]:
A = np.array([[1, 2, 3], [4, 5, 6], [7,8, 9]])
print A
[[1 2 3]
 [4 5 6]
 [7 8 9]]
In [8]:
# Get first row of A
print A[0, :]
[1 2 3]
In [9]:
# Get second column of A
print A[:, 1]
[2 5 8]
In [10]:
# Assign to third row of A
A[2,:] = [10, 11, 12]
print A
[[ 1  2  3]
 [ 4  5  6]
 [10 11 12]]
In [11]:
# Get lower right block of A
print A[1:3, 1:3]
[[ 5  6]
 [11 12]]

Logical indexing

In [ ]:
x = np.array([1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1])

i = x >= 4  # elementwise comparison

print i
In [ ]:
print x
print i
print x[i] # select entries of x for which i is true
In [ ]:
print x[x>=4] # one-liner
In [ ]:
a = np.array([1, 2, 3, 4, 5])
b = np.array([5, 4, 3, 2, 1])
print a[ a >= b ] # What does this print?