Sentiment Classification with TextCNN Source Code
Date: 23.05.23
Writer: 9tailwolf : doryeon514@gm.gist.ac.kr
Example : TextCNN by using Pytorch
Library
import torch
import torch.nn as nn
import torch.optim as optim
import timeTextCNN Model
class TextCNN(nn.Module):
def __init__(self,input_layer,hidden_layer,classes,kernal,input_length):
super().__init__()
self.layer_length = input_length - kernal + 1
self.classes = classes
self.E = nn.Embedding(input_layer,hidden_layer)
self.CNN = nn.Conv2d(in_channels = 1, out_channels=classes, kernel_size=(kernal,hidden_layer))
self.M = nn.MaxPool1d(kernel_size=input_length - kernal + 1)
self.relu = nn.ReLU()
self.softmax = nn.Softmax()
def forward(self, X):
X = self.E(X)
cnn = self.relu(self.CNN(X)).view(-1, self.classes,self.layer_length)
m = self.M(cnn).view(-1,self.classes)
res = self.softmax(m)
return res Tokenizer Function
def tokenizer(sentence,num,lim_word = 15):
def tokenizer(sentence):
dict_number = {'.':0}
dict_word = {0:'.'}
for s in sentence:
for word in s:
if word not in dict_number.keys():
dict_number[word] = len(dict_number)
dict_word[len(dict_number)-1] = word
return dict_number, dict_wordMaking Data Function
def make_data(sentence,dict_number,length):
X = []
for s in sentence:
if len(s)>length:
X.append([[dict_number[w] for w in s[:length]]])
else:
X.append([[dict_number[w] for w in s] + [0 for _ in range(length-len(s))]])
return XMain Function : Data
device = "cuda" if torch.cuda.is_available() else "cpu"
import pandas as pd
dataset = pd.read_csv("/Users/9tailwolf/Downloads/SST-2/traindata.tsv", delimiter='\t', header=None).transpose()
data = [[w for w in dataset[i][0].split() if w not in ',.0123456789'] for i in range(1,dataset.shape[1])]
dict_number, dict_word = tokenizer(data)
X = make_data(data,dict_number,15)
Y = [[1-int(dataset[i][1]),int(dataset[i][1])] for i in range(1,dataset.shape[1])]
X = torch.LongTensor(X).to(device)
Y = torch.Tensor(Y).to(device)Main Function
model = TextCNN(len(dict_number),50,2,5,15)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.03)
epochs = 1000
for epoch in range(epochs):
Y_pred = model(X)
loss = loss_fn(Y_pred, Y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch+1) % 10==0:
print(epoch+1, loss.float())Test
def test_model(model,dict_number,sentence,length):
if len(sentence.split())<length:
x = torch.LongTensor([[[dict_number[i] for i in sentence.split()] + [0 for _ in range(length - len(sentence.split()))]]])
else:
x = torch.LongTensor([[[dict_number[i] for i in sentence.split()[:length]]]])
x = x.to(device)
predict = model(x).data.max(1, keepdim=True)[1]
print('Good') if predict.squeeze().item()==1 else print('Bad')
s = 'i was happy to play with you' # example
test_model(model,dict_number,s,15)
# >>> Good