This is a solution for Hands-on Assignment: Text Generation using Recurrent Neural Networks:
## load dataimport reimport pandasdata = pandas.read_csv('ner_dataset.csv')words = list(data['Word'])tags = list(data['Tag'])names = list()for word, tt in zip(words, tags):if tt == 'O': continueif not tt.endswith('-per'): continueif tt == 'B-per':names.append(word)else:names[-1] = names[-1] + ' ' + word# Only alphabets, hyphens and spaces allowed.# Minimum 2 words. (Filters out names like "Bush")names = [name for name in names if re.match('^[A-Za-z\- ]*$', name) and ' ' in name]names = list(set(names)) # uniqueprint('Number of names:', len(names))train_data = names[:3600]test_data = names[3600:4000]print(len(train_data), train_data[:10])all_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ- $' # $ denotes BOS / EOSn_letters = len(all_letters)################################################################################################################################################################ Creating the Networkimport torchimport torch.nn as nnfrom torch.autograd import Variableimport torch.nn.functional as Fclass RNN(nn.Module):def __init__(self, input_size, hidden_size, output_size):super(RNN, self).__init__()self.hidden_size = hidden_size## declare the layers# rnn layer 1. input = input + hidden values from previous time stepself.rnn1 = nn.Linear(input_size + hidden_size, hidden_size)# rnn layer 2. input = hidden values from rnn1 + hidden values from previous time stepself.rnn2 = nn.Linear(hidden_size + hidden_size, hidden_size)# final layer. output from rnn2 to final outputself.output = nn.Linear(hidden_size, output_size)self.softmax = nn.LogSoftmax(dim=1)def forward(self, input, hidden):hidden1 = F.sigmoid(self.rnn1(torch.cat((input, hidden[0]), 1)))hidden1 = F.dropout(hidden1, p=0.2, training=self.training)hidden2 = F.sigmoid(self.rnn2(torch.cat((hidden1, hidden[1]), 1)))hidden2 = F.dropout(hidden2, p=0.2, training=self.training)output = self.softmax(self.output(hidden2))return output, (hidden1, hidden2)def init_hidden(self):# initialize hidden layers to 0 at the beginningreturn (Variable(torch.zeros(1, self.hidden_size)), Variable(torch.zeros(1, self.hidden_size)))rnn = RNN(n_letters, 128, n_letters)criterion = nn.NLLLoss() # negative log likelihood################################################################################# Preparing Data for Training# One-hot matrix of first to last letters (+BOS) for inputdef input_tensor(name, bos=True):input = ('$' + name) if bos else nametensor = torch.zeros(len(input), 1, n_letters)for idx, letter in enumerate(input):tensor[idx][0][all_letters.find(letter)] = 1return tensor# Index of first letter to last letter (+EOS) for targetdef target_tensor(name):target = name + '$'letter_indexes = [all_letters.find(letter) for letter in target]return torch.LongTensor(letter_indexes)print(input_tensor('Keshav Dhandhania'))print(target_tensor('Keshav Dhandhania'))################################################################################################################################################################ Trainingimport syslog_interval = 100learning_rate = 0.01previous_loss = 100.0def train():global learning_rate, previous_lossprint('Using learning rate: %.5f' % learning_rate)rnn.train()running_loss = 0.0for ii, name in enumerate(train_data):if ii % log_interval == 0:print('.', end='')sys.stdout.flush()input, target = Variable(input_tensor(name)), Variable(target_tensor(name))rnn.zero_grad()hidden = rnn.init_hidden()loss = 0for idx in range(len(name)+1):output, hidden = rnn(input[idx], hidden)loss += criterion(output, target[idx].unsqueeze(0))loss.backward()# torch.nn.utils.clip_grad_norm(rnn.parameters(), 0.25)for p in rnn.parameters():p.data.add_(-learning_rate, p.grad.data)running_loss += loss.item()print('')avg_loss = running_loss / sum(len(name) for name in train_data)if previous_loss < avg_loss: learning_rate *= 0.8previous_loss = avg_lossprint('Training loss: %.3f' % avg_loss)################################################################################# Testingdef test():rnn.eval()running_loss = 0.0for name in test_data:input, target = Variable(input_tensor(name)), Variable(target_tensor(name))hidden = rnn.init_hidden()loss = 0for idx in range(len(name)+1):output, hidden = rnn(input[idx], hidden)loss += criterion(output, target[idx].unsqueeze(0))running_loss += loss.item()avg_loss = running_loss / sum(len(name) for name in test_data)print('Testing loss: %.3f' % avg_loss)################################################################################################################################################################ Sampling the networkfrom numpy.random import choicemax_length = 50# Sample given a starting letterdef sample(prefix=''):rnn.eval()output_string = prefixhidden = rnn.init_hidden() # get initial hidden vectorfor idx in range(max_length):input = Variable(input_tensor(output_string))output, hidden = rnn(input[idx], hidden) # execute one time step of rnnif idx < len(prefix): continue # still 'feeding' in the prefix, no need to change output_stringprobabilities = torch.exp(output) # calculate probabilities from outputsample_idx = choice(n_letters, 1, p=probabilities.squeeze().data.numpy())[0] # sample idx between (0, n_letters) using numpy choiceif sample_idx == n_letters - 1: break # EOSletter = all_letters[sample_idx]output_string += letterinput = Variable(input_tensor(letter))print('Prefix = "%s".' % prefix, 'Generated string =', output_string)return output_string# Get multiple samples from one category and multiple starting lettersdef samples():for i in range(5):sample()for start_letter in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ':sample(start_letter)################################################################################################################################################################ runimport datetimestart = datetime.datetime.now()def time_since(since):delta = datetime.datetime.now() - startreturn '%dm %ds' % (delta.total_seconds() / 60.0, delta.total_seconds() % 60.0)nepochs = 100for iepoch in range(1, nepochs+1):print('='*100)print('Starting epoch', iepoch)train()test()samples()print('Time since start:', time_since(start))###############################################################################