HTML generated from Jupyter notebook: linear-regression-pytorch.ipynb

Linear Regression using PyTorch

In [16]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
In [17]:
N = 10 # number of data points
m = .9
c = 1
x = np.linspace(0,2*np.pi,N)
y = m*x + c + np.random.normal(0,.3,x.shape)
plt.figure()
plt.plot(x,y,'o')
plt.xlabel('x')
plt.ylabel('y')
plt.title('2D data (#data = %d)' % N)
plt.show()
In [18]:
import torch

Dataset

In [19]:
from torch.utils.data import Dataset
class MyDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        sample = {
            'feature': torch.tensor([1,self.x[idx]]), 
            'label': torch.tensor([self.y[idx]])}
        return sample
In [20]:
dataset = MyDataset(x, y)
for i in range(len(dataset)):
    sample = dataset[i]
    print(i, sample['feature'], sample['label'])
0 tensor([1., 0.]) tensor([1.0971])
1 tensor([1.0000, 0.6981]) tensor([1.4373])
2 tensor([1.0000, 1.3963]) tensor([2.6160])
3 tensor([1.0000, 2.0944]) tensor([2.6413])
4 tensor([1.0000, 2.7925]) tensor([3.4891])
5 tensor([1.0000, 3.4907]) tensor([4.2880])
6 tensor([1.0000, 4.1888]) tensor([4.4478])
7 tensor([1.0000, 4.8869]) tensor([5.5624])
8 tensor([1.0000, 5.5851]) tensor([6.1863])
9 tensor([1.0000, 6.2832]) tensor([6.1383])

Dataloader

In [21]:
from torch.utils.data import DataLoader

dataset = MyDataset(x, y)
batch_size = 4
shuffle = True
num_workers = 4
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
In [22]:
import pprint as pp
for i_batch, samples in enumerate(dataloader):
    print('\nbatch# = %s' % i_batch)
    print('samples: ')
    pp.pprint(samples)
batch# = 0
samples: 
{'feature': tensor([[1.0000, 2.7925],
        [1.0000, 2.0944],
        [1.0000, 4.8869],
        [1.0000, 6.2832]]),
 'label': tensor([[3.4891],
        [2.6413],
        [5.5624],
        [6.1383]])}

batch# = 1
samples: 
{'feature': tensor([[1.0000, 4.1888],
        [1.0000, 5.5851],
        [1.0000, 0.6981],
        [1.0000, 3.4907]]),
 'label': tensor([[4.4478],
        [6.1863],
        [1.4373],
        [4.2880]])}

batch# = 2
samples: 
{'feature': tensor([[1.0000, 1.3963],
        [1.0000, 0.0000]]),
 'label': tensor([[2.6160],
        [1.0971]])}

Model

In [23]:
import torch.nn as nn
import torch.nn.functional as F
class MyModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(MyModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        out = self.linear(x)
        return out

Setting a model for our problem

In [24]:
input_dim = 2
output_dim = 1

model = MyModel(input_dim, output_dim)

Cost function

Often called loss or error

In [25]:
cost = nn.MSELoss()

Minimizing the cost function

In other words training (or learning from data)

In [26]:
num_epochs = 10  # How many times the entire training data is seen?
l_rate = 0.01
optimiser = torch.optim.SGD(model.parameters(), lr = l_rate) 

dataset = MyDataset(x, y)
batch_size = 4
shuffle = True
num_workers = 4
training_sample_generator = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)

for epoch in range(num_epochs):
    print('Epoch = %s' % epoch)
    for batch_i, samples in enumerate(training_sample_generator):
        predictions = model(samples['feature'])
        error = cost(predictions, samples['label'])
        print('\tBatch = %s, Error = %s' % (batch_i, error.item()))
        
        # Before the backward pass, use the optimizer object to zero all of the
        # gradients for the variables it will update (which are the learnable
        # weights of the model). This is because by default, gradients are
        # accumulated in buffers( i.e, not overwritten) whenever .backward()
        # is called. Checkout docs of torch.autograd.backward for more details.
        optimiser.zero_grad()
        
        # Backward pass: compute gradient of the loss with respect to model
        # parameters
        error.backward()
        
        # Calling the step function on an Optimizer makes an update to its
        # parameters
        optimiser.step()
Epoch = 0
	Batch = 0, Error = 3.3245582580566406
	Batch = 1, Error = 1.638617753982544
	Batch = 2, Error = 0.3832966983318329
Epoch = 1
	Batch = 0, Error = 0.7121678590774536
	Batch = 1, Error = 0.21079406142234802
	Batch = 2, Error = 0.5607050061225891
Epoch = 2
	Batch = 0, Error = 0.5053210258483887
	Batch = 1, Error = 0.03993864730000496
	Batch = 2, Error = 0.30065447092056274
Epoch = 3
	Batch = 0, Error = 0.2350146770477295
	Batch = 1, Error = 0.30180448293685913
	Batch = 2, Error = 0.16363243758678436
Epoch = 4
	Batch = 0, Error = 0.07132617384195328
	Batch = 1, Error = 0.3243466913700104
	Batch = 2, Error = 0.3382103145122528
Epoch = 5
	Batch = 0, Error = 0.39112815260887146
	Batch = 1, Error = 0.1373337060213089
	Batch = 2, Error = 0.04603620246052742
Epoch = 6
	Batch = 0, Error = 0.2974059581756592
	Batch = 1, Error = 0.19180424511432648
	Batch = 2, Error = 0.04105750471353531
Epoch = 7
	Batch = 0, Error = 0.24729019403457642
	Batch = 1, Error = 0.06893004477024078
	Batch = 2, Error = 0.35701268911361694
Epoch = 8
	Batch = 0, Error = 0.16357268393039703
	Batch = 1, Error = 0.30955955386161804
	Batch = 2, Error = 0.04322194680571556
Epoch = 9
	Batch = 0, Error = 0.1650623083114624
	Batch = 1, Error = 0.12940038740634918
	Batch = 2, Error = 0.32625168561935425

Lets see how well the model has learnt the data

In [27]:
x_for_plotting = np.linspace(0, 2*np.pi, 1000)
design_matrix = torch.tensor(np.vstack([np.ones(x_for_plotting.shape), x_for_plotting]).T, dtype=torch.float32)
print('Design matrix shape:', design_matrix.shape)

y_for_plotting = model.forward(design_matrix)
print('y_for_plotting shape:', y_for_plotting.shape)
Design matrix shape: torch.Size([1000, 2])
y_for_plotting shape: torch.Size([1000, 1])
In [28]:
plt.figure()
plt.plot(x,y,'o')
plt.plot(x_for_plotting, y_for_plotting.data.numpy(), 'r-')
plt.xlabel('x')
plt.ylabel('y')
plt.title('2D data (#data = %d)' % N)
plt.show()