In [1]:
!pip install -U transformers[torch] datasets accelerate kagglehub scikit-learn matplotlib pandas numpy
Requirement already satisfied: datasets in e:\anaconda\lib\site-packages (4.4.2) Requirement already satisfied: accelerate in e:\anaconda\lib\site-packages (1.12.0) Requirement already satisfied: kagglehub in e:\anaconda\lib\site-packages (0.3.13) Requirement already satisfied: scikit-learn in e:\anaconda\lib\site-packages (1.8.0) Requirement already satisfied: matplotlib in e:\anaconda\lib\site-packages (3.10.8) Requirement already satisfied: pandas in e:\anaconda\lib\site-packages (2.3.3) Requirement already satisfied: numpy in e:\anaconda\lib\site-packages (2.4.0) Requirement already satisfied: transformers[torch] in e:\anaconda\lib\site-packages (4.57.3) Requirement already satisfied: filelock in e:\anaconda\lib\site-packages (from transformers[torch]) (3.20.0) Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in e:\anaconda\lib\site-packages (from transformers[torch]) (0.36.0) Requirement already satisfied: packaging>=20.0 in e:\anaconda\lib\site-packages (from transformers[torch]) (25.0) Requirement already satisfied: pyyaml>=5.1 in e:\anaconda\lib\site-packages (from transformers[torch]) (6.0.3) Requirement already satisfied: regex!=2019.12.17 in e:\anaconda\lib\site-packages (from transformers[torch]) (2025.11.3) Requirement already satisfied: requests in e:\anaconda\lib\site-packages (from transformers[torch]) (2.32.5) Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in e:\anaconda\lib\site-packages (from transformers[torch]) (0.22.1) Requirement already satisfied: safetensors>=0.4.3 in e:\anaconda\lib\site-packages (from transformers[torch]) (0.7.0) Requirement already satisfied: tqdm>=4.27 in e:\anaconda\lib\site-packages (from transformers[torch]) (4.67.1) Requirement already satisfied: torch>=2.2 in e:\anaconda\lib\site-packages (from transformers[torch]) (2.9.1) Requirement already satisfied: fsspec>=2023.5.0 in e:\anaconda\lib\site-packages (from huggingface-hub<1.0,>=0.34.0->transformers[torch]) (2025.10.0) Requirement already satisfied: typing-extensions>=3.7.4.3 in e:\anaconda\lib\site-packages (from huggingface-hub<1.0,>=0.34.0->transformers[torch]) (4.15.0) Requirement already satisfied: pyarrow>=21.0.0 in e:\anaconda\lib\site-packages (from datasets) (22.0.0) Requirement already satisfied: dill<0.4.1,>=0.3.0 in e:\anaconda\lib\site-packages (from datasets) (0.4.0) Requirement already satisfied: httpx<1.0.0 in e:\anaconda\lib\site-packages (from datasets) (0.28.1) Requirement already satisfied: xxhash in e:\anaconda\lib\site-packages (from datasets) (3.6.0) Requirement already satisfied: multiprocess<0.70.19 in e:\anaconda\lib\site-packages (from datasets) (0.70.18) Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in e:\anaconda\lib\site-packages (from fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (3.13.2) Requirement already satisfied: anyio in e:\anaconda\lib\site-packages (from httpx<1.0.0->datasets) (4.10.0) Requirement already satisfied: certifi in e:\anaconda\lib\site-packages (from httpx<1.0.0->datasets) (2025.11.12) Requirement already satisfied: httpcore==1.* in e:\anaconda\lib\site-packages (from httpx<1.0.0->datasets) (1.0.9) Requirement already satisfied: idna in e:\anaconda\lib\site-packages (from httpx<1.0.0->datasets) (3.11) Requirement already satisfied: h11>=0.16 in e:\anaconda\lib\site-packages (from httpcore==1.*->httpx<1.0.0->datasets) (0.16.0) Requirement already satisfied: psutil in e:\anaconda\lib\site-packages (from accelerate) (7.0.0) Requirement already satisfied: scipy>=1.10.0 in e:\anaconda\lib\site-packages (from scikit-learn) (1.16.3) Requirement already satisfied: joblib>=1.3.0 in e:\anaconda\lib\site-packages (from scikit-learn) (1.5.2) Requirement already satisfied: threadpoolctl>=3.2.0 in e:\anaconda\lib\site-packages (from scikit-learn) (3.6.0) Requirement already satisfied: contourpy>=1.0.1 in e:\anaconda\lib\site-packages (from matplotlib) (1.3.3) Requirement already satisfied: cycler>=0.10 in e:\anaconda\lib\site-packages (from matplotlib) (0.12.1) Requirement already satisfied: fonttools>=4.22.0 in e:\anaconda\lib\site-packages (from matplotlib) (4.61.1) Requirement already satisfied: kiwisolver>=1.3.1 in e:\anaconda\lib\site-packages (from matplotlib) (1.4.9) Requirement already satisfied: pillow>=8 in e:\anaconda\lib\site-packages (from matplotlib) (12.0.0) Requirement already satisfied: pyparsing>=3 in e:\anaconda\lib\site-packages (from matplotlib) (3.2.5) Requirement already satisfied: python-dateutil>=2.7 in e:\anaconda\lib\site-packages (from matplotlib) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in e:\anaconda\lib\site-packages (from pandas) (2025.2) Requirement already satisfied: tzdata>=2022.7 in e:\anaconda\lib\site-packages (from pandas) (2025.2) Requirement already satisfied: aiohappyeyeballs>=2.5.0 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (2.6.1) Requirement already satisfied: aiosignal>=1.4.0 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (1.4.0) Requirement already satisfied: attrs>=17.3.0 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (25.4.0) Requirement already satisfied: frozenlist>=1.1.1 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (1.8.0) Requirement already satisfied: multidict<7.0,>=4.5 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (6.7.0) Requirement already satisfied: propcache>=0.2.0 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (0.4.1) Requirement already satisfied: yarl<2.0,>=1.17.0 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (1.22.0) Requirement already satisfied: six>=1.5 in e:\anaconda\lib\site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0) Requirement already satisfied: charset_normalizer<4,>=2 in e:\anaconda\lib\site-packages (from requests->transformers[torch]) (3.4.4) Requirement already satisfied: urllib3<3,>=1.21.1 in e:\anaconda\lib\site-packages (from requests->transformers[torch]) (2.5.0) Requirement already satisfied: sympy>=1.13.3 in e:\anaconda\lib\site-packages (from torch>=2.2->transformers[torch]) (1.14.0) Requirement already satisfied: networkx>=2.5.1 in e:\anaconda\lib\site-packages (from torch>=2.2->transformers[torch]) (3.5) Requirement already satisfied: jinja2 in e:\anaconda\lib\site-packages (from torch>=2.2->transformers[torch]) (3.1.6) Requirement already satisfied: setuptools in e:\anaconda\lib\site-packages (from torch>=2.2->transformers[torch]) (80.9.0) Requirement already satisfied: mpmath<1.4,>=1.1.0 in e:\anaconda\lib\site-packages (from sympy>=1.13.3->torch>=2.2->transformers[torch]) (1.3.0) Requirement already satisfied: colorama in e:\anaconda\lib\site-packages (from tqdm>=4.27->transformers[torch]) (0.4.6) Requirement already satisfied: sniffio>=1.1 in e:\anaconda\lib\site-packages (from anyio->httpx<1.0.0->datasets) (1.3.0) Requirement already satisfied: MarkupSafe>=2.0 in e:\anaconda\lib\site-packages (from jinja2->torch>=2.2->transformers[torch]) (3.0.2)
In [2]:
# Core libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Kaggle
import kagglehub
from kagglehub import KaggleDatasetAdapter
# PyTorch
import torch
from torch import nn
from torch.utils.data import DataLoader
# Hugging Face / Transformers
from datasets import Dataset
from transformers import (
AutoTokenizer,
DataCollatorWithPadding
)
# Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
accuracy_score,
precision_score,
recall_score,
f1_score,
classification_report
)
from transformers import (
AutoModelForSequenceClassification,
AutoConfig,
Trainer,
TrainingArguments,
)
import warnings; warnings.filterwarnings("ignore")
import logging; logging.disable(logging.CRITICAL)
E:\Anaconda\Lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
Pre-Processing Data¶
In [3]:
df = kagglehub.dataset_load(
KaggleDatasetAdapter.PANDAS,
"adilshamim8/mrbeast-youtube-comment-sentiment-analysis",
"sentiment_analysis_dataset.csv",
pandas_kwargs={'on_bad_lines': 'skip'}
)
df.head()
Out[3]:
| Comment | Sentiment | |
|---|---|---|
| 0 | Achieving million views in days is dangerous | Positive |
| 1 | How many people here want to participate in su... | Neutral |
| 2 | Mrbeast is slowly turning into mrjigsaw | Negative |
| 3 | genuinely can't believe how dystopian this is | Negative |
| 4 | Have of the worlds smartest people compete in ... | Neutral |
In [4]:
base_df = df.dropna(subset=["Comment", "Sentiment"]).copy()
base_df["Comment"] = base_df["Comment"].astype(str).str.strip()
base_df["Sentiment"] = base_df["Sentiment"].astype(str).str.strip()
train_df, val_df = train_test_split(
base_df,
test_size=0.2,
random_state=42,
stratify=base_df["Sentiment"],
)
print(len(base_df), len(train_df), len(val_df))
6797 5437 1360
Model 1: RoBERTa-base transformer¶
In [5]:
# Use a 3-class sentiment model (Negative / Neutral / Positive)
model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
# Load config and tokenizer
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
print("Model num_labels:", config.num_labels)
print("Model id2label:", config.id2label)
# Model's labels: {0: 'negative', 1: 'neutral', 2: 'positive'}
model_label2id = {label.lower(): idx for idx, label in config.id2label.items()}
# Build mapping from our dataset Sentiment strings to model ids
unique_sents = base_df["Sentiment"].unique()
sent_to_id = {}
for s in unique_sents:
key = s.strip().lower()
if key not in model_label2id:
raise ValueError(
f"Dataset sentiment '{s}' does not match any model labels {list(model_label2id.keys())}"
)
sent_to_id[s] = model_label2id[key]
print("Sentiment → label id mapping:", sent_to_id)
# Add numeric label column to the already-split dataframes
train_df["label"] = train_df["Sentiment"].map(sent_to_id)
val_df["label"] = val_df["Sentiment"].map(sent_to_id)
# Optional sanity check
print(train_df[["Sentiment", "label"]].head())
print(val_df[["Sentiment", "label"]].head())
Model num_labels: 3
Model id2label: {0: 'negative', 1: 'neutral', 2: 'positive'}
Sentiment → label id mapping: {'Positive': 2, 'Neutral': 1, 'Negative': 0}
Sentiment label
1809 Neutral 1
4806 Positive 2
3560 Neutral 1
6035 Positive 2
4578 Positive 2
Sentiment label
522 Neutral 1
2558 Neutral 1
6619 Positive 2
6497 Positive 2
1294 Positive 2
In [6]:
# Wrap the shared train/val data into Hugging Face Datasets
train_ds = Dataset.from_pandas(train_df[["Comment", "label"]].reset_index(drop=True))
val_ds = Dataset.from_pandas(val_df[["Comment", "label"]].reset_index(drop=True))
max_length = 80
def tokenize_fn(batch):
return tokenizer(
batch["Comment"],
truncation=True,
padding="max_length",
max_length=max_length,
)
train_ds = train_ds.map(tokenize_fn, batched=True)
val_ds = val_ds.map(tokenize_fn, batched=True)
# Remove raw text column and set PyTorch format
train_ds = train_ds.remove_columns(["Comment"])
val_ds = val_ds.remove_columns(["Comment"])
train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
val_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
# Data collator (if you want dynamic padding, change padding="max_length" above to padding=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
# Metrics function for Trainer
def compute_metrics(eval_pred):
logits, labels = eval_pred
preds = np.argmax(logits, axis=-1)
acc = accuracy_score(labels, preds)
f1_macro = f1_score(labels, preds, average="macro")
prec_macro = precision_score(labels, preds, average="macro", zero_division=0)
rec_macro = recall_score(labels, preds, average="macro", zero_division=0)
return {
"accuracy": acc,
"f1_macro": f1_macro,
"precision_macro": prec_macro,
"recall_macro": rec_macro,
}
Map: 100%|███████████████████████████████████████████████████████████████| 5437/5437 [00:00<00:00, 20065.97 examples/s] Map: 100%|███████████████████████████████████████████████████████████████| 1360/1360 [00:00<00:00, 21153.66 examples/s]
In [7]:
# Load the pretrained model (no fine-tuning)
baseline_model = AutoModelForSequenceClassification.from_pretrained(
model_name,
num_labels=config.num_labels,
)
baseline_args = TrainingArguments(
output_dir="baseline-3class-sentiment",
per_device_eval_batch_size=32,
do_train=False,
do_eval=True,
logging_steps=50,
report_to="none",
)
baseline_trainer = Trainer(
model=baseline_model,
args=baseline_args,
eval_dataset=val_ds,
processing_class=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics,
)
baseline_metrics = baseline_trainer.evaluate()
print("==== Baseline: pretrained 3-class sentiment model ====")
for k, v in baseline_metrics.items():
print(f"{k}: {v:.4f}")
[43/43 01:44]
==== Baseline: pretrained 3-class sentiment model ==== eval_loss: 0.5581 eval_model_preparation_time: 0.0041 eval_accuracy: 0.7838 eval_f1_macro: 0.5738 eval_precision_macro: 0.5782 eval_recall_macro: 0.7158 eval_runtime: 107.4327 eval_samples_per_second: 12.6590 eval_steps_per_second: 0.4000
In [8]:
# Reload a fresh copy of the same model for fine-tuning
ft_model = AutoModelForSequenceClassification.from_pretrained(
model_name,
num_labels=config.num_labels,
)
ft_args = TrainingArguments(
output_dir="finetuned-3class-sentiment",
eval_strategy="epoch",
save_strategy="epoch",
learning_rate=5e-5,
num_train_epochs=3,
per_device_train_batch_size=16,
per_device_eval_batch_size=32,
weight_decay=0.01,
load_best_model_at_end=True,
metric_for_best_model="f1_macro",
logging_steps=50,
report_to="none",
)
ft_trainer = Trainer(
model=ft_model,
args=ft_args,
train_dataset=train_ds,
eval_dataset=val_ds,
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics,
)
# Train
ft_trainer.train()
# Evaluate the fine-tuned model
finetuned_metrics = ft_trainer.evaluate()
print("==== Fine-tuned 3-class sentiment model on MrBeast dataset ====")
for k, v in finetuned_metrics.items():
print(f"{k}: {v:.4f}")
print("\nBaseline vs Fine-tuned:")
for k in finetuned_metrics:
if k in baseline_metrics:
print(f"{k}: baseline={baseline_metrics[k]:.4f} → finetuned={finetuned_metrics[k]:.4f}")
[1020/1020 1:29:40, Epoch 3/3]
| Epoch | Training Loss | Validation Loss | Accuracy | F1 Macro | Precision Macro | Recall Macro |
|---|---|---|---|---|---|---|
| 1 | 0.156600 | 0.142174 | 0.971324 | 0.886203 | 0.986694 | 0.827894 |
| 2 | 0.015800 | 0.076370 | 0.989706 | 0.972462 | 0.957272 | 0.989903 |
| 3 | 0.008500 | 0.078101 | 0.989706 | 0.963663 | 0.942510 | 0.989437 |
[43/43 01:37]
==== Fine-tuned 3-class sentiment model on MrBeast dataset ==== eval_loss: 0.0764 eval_accuracy: 0.9897 eval_f1_macro: 0.9725 eval_precision_macro: 0.9573 eval_recall_macro: 0.9899 eval_runtime: 99.5066 eval_samples_per_second: 13.6670 eval_steps_per_second: 0.4320 epoch: 3.0000 Baseline vs Fine-tuned: eval_loss: baseline=0.5581 → finetuned=0.0764 eval_accuracy: baseline=0.7838 → finetuned=0.9897 eval_f1_macro: baseline=0.5738 → finetuned=0.9725 eval_precision_macro: baseline=0.5782 → finetuned=0.9573 eval_recall_macro: baseline=0.7158 → finetuned=0.9899 eval_runtime: baseline=107.4327 → finetuned=99.5066 eval_samples_per_second: baseline=12.6590 → finetuned=13.6670 eval_steps_per_second: baseline=0.4000 → finetuned=0.4320
In [9]:
import torch
# {0: 'negative', 1: 'neutral', 2: 'positive'}
id2label = ft_model.config.id2label
def predict_sentiment(texts):
if isinstance(texts, str):
texts = [texts]
encodings = tokenizer(
texts,
truncation=True,
padding=True,
max_length=max_length,
return_tensors="pt",
)
ft_model.eval()
with torch.no_grad():
# Move input tensors to the same device as the model
encodings = {k: v.to(ft_model.device) for k, v in encodings.items()}
outputs = ft_model(**encodings)
preds = outputs.logits.argmax(dim=-1).cpu().numpy()
labels = [id2label[int(p)] for p in preds]
return list(zip(texts, labels))
# Example:
examples = [
"This video was amazing, I loved it!",
"It's okay, not his best work.",
"I really didn't like this at all.",
]
for text, label in predict_sentiment(examples):
print(f"{label.upper():8} | {text}")
POSITIVE | This video was amazing, I loved it! POSITIVE | It's okay, not his best work. POSITIVE | I really didn't like this at all.
Model 2: Embedding + 1D CNN¶
In [10]:
train_df_cnn = train_df.copy()
val_df_cnn = val_df.copy()
train_df_cnn["Comment"] = train_df_cnn["Comment"].astype(str).str.strip()
val_df_cnn["Comment"] = val_df_cnn["Comment"].astype(str).str.strip()
train_df_cnn["Sentiment"] = train_df_cnn["Sentiment"].astype(str).str.strip()
val_df_cnn["Sentiment"] = val_df_cnn["Sentiment"].astype(str).str.strip()
le = LabelEncoder()
le.fit(train_df_cnn["Sentiment"])
train_df_cnn["label"] = le.transform(train_df_cnn["Sentiment"])
val_df_cnn["label"] = le.transform(val_df_cnn["Sentiment"])
num_classes = len(le.classes_)
print("Classes:", le.classes_)
print("num_classes:", num_classes)
print("Train size (CNN):", len(train_df_cnn))
print("Val size (CNN):", len(val_df_cnn))
Classes: ['Negative' 'Neutral' 'Positive'] num_classes: 3 Train size (CNN): 5437 Val size (CNN): 1360
In [11]:
model_name = "distilbert-base-uncased" # just for tokenizer/vocab
tokenizer = AutoTokenizer.from_pretrained(model_name)
max_length = 80
def tokenize_fn(batch):
return tokenizer(
batch["Comment"],
truncation=True,
padding="max_length",
max_length=max_length,
)
# Use the CNN-specific dataframes that share the same split
train_ds = Dataset.from_pandas(train_df_cnn[["Comment", "label"]])
val_ds = Dataset.from_pandas(val_df_cnn[["Comment", "label"]])
train_ds = train_ds.map(tokenize_fn, batched=True)
val_ds = val_ds.map(tokenize_fn, batched=True)
# Keep only the columns we need and set PyTorch format
train_ds.set_format(
type="torch",
columns=["input_ids", "label"],
)
val_ds.set_format(
type="torch",
columns=["input_ids", "label"],
)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32)
Map: 100%|███████████████████████████████████████████████████████████████| 5437/5437 [00:00<00:00, 21688.41 examples/s] Map: 100%|███████████████████████████████████████████████████████████████| 1360/1360 [00:00<00:00, 27741.72 examples/s]
In [12]:
class TextCNN(nn.Module):
def __init__(self, vocab_size, embed_dim, num_classes, kernel_sizes=(3,4,5), num_filters=64, dropout=0.5):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=tokenizer.pad_token_id)
self.convs = nn.ModuleList([
nn.Conv1d(
in_channels=embed_dim,
out_channels=num_filters,
kernel_size=k
)
for k in kernel_sizes
])
self.dropout = nn.Dropout(dropout)
self.fc = nn.Linear(num_filters * len(kernel_sizes), num_classes)
def forward(self, input_ids):
# input_ids: (batch, seq_len)
x = self.embedding(input_ids) # (batch, seq_len, embed_dim)
x = x.transpose(1, 2) # (batch, embed_dim, seq_len)
conv_outs = []
for conv in self.convs:
c = conv(x) # (batch, num_filters, seq_len-k+1)
c = torch.relu(c)
c = torch.max(c, dim=2).values # global max pool -> (batch, num_filters)
conv_outs.append(c)
x = torch.cat(conv_outs, dim=1) # (batch, num_filters * len(kernel_sizes))
x = self.dropout(x)
logits = self.fc(x) # (batch, num_classes)
return logits
vocab_size = tokenizer.vocab_size
embed_dim = 128
kernel_sizes = (3,4,5)
num_filters = 64
dropout = 0.5
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = TextCNN(
vocab_size=vocab_size,
embed_dim=embed_dim,
num_classes=num_classes,
kernel_sizes=kernel_sizes,
num_filters=num_filters,
dropout=dropout
).to(device)
print(model)
TextCNN(
(embedding): Embedding(30522, 128, padding_idx=0)
(convs): ModuleList(
(0): Conv1d(128, 64, kernel_size=(3,), stride=(1,))
(1): Conv1d(128, 64, kernel_size=(4,), stride=(1,))
(2): Conv1d(128, 64, kernel_size=(5,), stride=(1,))
)
(dropout): Dropout(p=0.5, inplace=False)
(fc): Linear(in_features=192, out_features=3, bias=True)
)
In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
epochs = 8
best_val_f1 = 0.0
for epoch in range(1, epochs + 1):
model.train()
total_loss = 0.0
for batch in train_loader:
input_ids = batch["input_ids"].to(device)
labels = batch["label"].to(device)
optimizer.zero_grad()
logits = model(input_ids)
loss = criterion(logits, labels)
loss.backward()
optimizer.step()
total_loss += loss.item() * input_ids.size(0)
avg_train_loss = total_loss / len(train_ds)
model.eval()
all_labels = []
all_preds = []
with torch.no_grad():
for batch in val_loader:
input_ids = batch["input_ids"].to(device)
labels = batch["label"].to(device)
logits = model(input_ids)
preds = torch.argmax(logits, dim=-1)
all_labels.extend(labels.cpu().numpy())
all_preds.extend(preds.cpu().numpy())
acc = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds, average="macro")
prec = precision_score(all_labels, all_preds, average="macro")
rec = recall_score(all_labels, all_preds, average="macro")
print(f"Epoch {epoch}: train_loss={avg_train_loss:.4f} | "
f"val_acc={acc:.4f}, val_f1_macro={f1:.4f}")
if f1 > best_val_f1:
best_val_f1 = f1
torch.save(model.state_dict(), "textcnn_best.pt")
print(" -> New best model saved.")
Epoch 1: train_loss=0.4265 | val_acc=0.9279, val_f1_macro=0.8124 -> New best model saved. Epoch 2: train_loss=0.1398 | val_acc=0.9809, val_f1_macro=0.9845 -> New best model saved. Epoch 3: train_loss=0.0718 | val_acc=0.9853, val_f1_macro=0.9881 -> New best model saved. Epoch 4: train_loss=0.0368 | val_acc=0.9831, val_f1_macro=0.9863 Epoch 5: train_loss=0.0219 | val_acc=0.9860, val_f1_macro=0.9888 -> New best model saved. Epoch 6: train_loss=0.0185 | val_acc=0.9868, val_f1_macro=0.9894 -> New best model saved. Epoch 7: train_loss=0.0148 | val_acc=0.9831, val_f1_macro=0.9864 Epoch 8: train_loss=0.0100 | val_acc=0.9824, val_f1_macro=0.9857
In [14]:
model.load_state_dict(torch.load("textcnn_best.pt", map_location=device))
model.eval()
all_labels = []
all_preds = []
with torch.no_grad():
for batch in val_loader:
input_ids = batch["input_ids"].to(device)
labels = batch["label"].to(device)
logits = model(input_ids)
preds = torch.argmax(logits, dim=-1)
all_labels.extend(labels.cpu().numpy())
all_preds.extend(preds.cpu().numpy())
acc = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds, average="macro")
prec = precision_score(all_labels, all_preds, average="macro")
rec = recall_score(all_labels, all_preds, average="macro")
print("Multi-kernel TextCNN (PyTorch)")
print(f"Val accuracy: {acc:.4f}")
print(f"Val F1_macro: {f1:.4f}")
print(f"Val precision_macro: {prec:.4f}")
print(f"Val recall_macro: {rec:.4f}")
print("\nClassification report:")
print(classification_report(all_labels, all_preds, target_names=le.classes_))
Multi-kernel TextCNN (PyTorch)
Val accuracy: 0.9868
Val F1_macro: 0.9894
Val precision_macro: 0.9912
Val recall_macro: 0.9875
Classification report:
precision recall f1-score support
Negative 1.00 1.00 1.00 16
Neutral 0.99 0.97 0.98 406
Positive 0.99 0.99 0.99 938
accuracy 0.99 1360
macro avg 0.99 0.99 0.99 1360
weighted avg 0.99 0.99 0.99 1360
Model 3: Embedding + MLP¶
In [15]:
print("Classes:", le.classes_)
print("num_classes:", num_classes)
print("Vocab size:", tokenizer.vocab_size)
vocab_size = tokenizer.vocab_size
mlp_embed_dim = 128 # embedding size
mlp_hidden_dims = (128, 64) # two-layer MLP: 128 -> 64 -> num_classes
mlp_dropout = 0.5
mlp_learning_rate = 1e-3
mlp_epochs = 8
Classes: ['Negative' 'Neutral' 'Positive'] num_classes: 3 Vocab size: 30522
In [16]:
class TextMLP(nn.Module):
def __init__(
self,
vocab_size,
embed_dim,
num_classes,
pad_token_id,
hidden_dims=(128, 64),
dropout=0.5,
):
super().__init__()
# Token embedding as in the CNN model
self.embedding = nn.Embedding(
vocab_size,
embed_dim,
padding_idx=pad_token_id
)
# Build MLP layers: [embed_dim] -> hidden_dims -> num_classes
layers = []
in_dim = embed_dim
for h in hidden_dims:
layers.append(nn.Linear(in_dim, h))
layers.append(nn.ReLU())
layers.append(nn.Dropout(dropout))
in_dim = h
layers.append(nn.Linear(in_dim, num_classes))
self.mlp = nn.Sequential(*layers)
def forward(self, input_ids):
"""
input_ids: (batch, seq_len)
We reuse the same train_loader/val_loader format as TextCNN:
batch["input_ids"], batch["label"]
"""
x = self.embedding(input_ids)
pooled = x.mean(dim=1) # (batch, embed_dim)
logits = self.mlp(pooled) # (batch, num_classes)
return logits
In [17]:
mlp_model = TextMLP(
vocab_size=vocab_size,
embed_dim=mlp_embed_dim,
num_classes=num_classes,
pad_token_id=tokenizer.pad_token_id,
hidden_dims=mlp_hidden_dims,
dropout=mlp_dropout,
).to(device)
mlp_criterion = nn.CrossEntropyLoss()
mlp_optimizer = torch.optim.Adam(mlp_model.parameters(), lr=mlp_learning_rate)
print(mlp_model)
TextMLP(
(embedding): Embedding(30522, 128, padding_idx=0)
(mlp): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): ReLU()
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=128, out_features=64, bias=True)
(4): ReLU()
(5): Dropout(p=0.5, inplace=False)
(6): Linear(in_features=64, out_features=3, bias=True)
)
)
In [18]:
mlp_best_val_f1 = 0.0
for epoch in range(1, mlp_epochs + 1):
mlp_model.train()
total_loss = 0.0
for batch in train_loader:
input_ids = batch["input_ids"].to(device)
labels = batch["label"].to(device)
mlp_optimizer.zero_grad()
logits = mlp_model(input_ids)
loss = mlp_criterion(logits, labels)
loss.backward()
mlp_optimizer.step()
total_loss += loss.item() * input_ids.size(0)
avg_train_loss = total_loss / len(train_loader.dataset)
mlp_model.eval()
all_labels = []
all_preds = []
with torch.no_grad():
for batch in val_loader:
input_ids = batch["input_ids"].to(device)
labels = batch["label"].to(device)
logits = mlp_model(input_ids)
preds = torch.argmax(logits, dim=-1)
all_labels.extend(labels.cpu().numpy())
all_preds.extend(preds.cpu().numpy())
acc = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds, average="macro")
prec = precision_score(all_labels, all_preds, average="macro")
rec = recall_score(all_labels, all_preds, average="macro")
print(
f"Epoch {epoch}: "
f"train_loss={avg_train_loss:.4f} | "
f"val_acc={acc:.4f}, val_f1_macro={f1:.4f}"
)
if f1 > mlp_best_val_f1:
mlp_best_val_f1 = f1
torch.save(mlp_model.state_dict(), "textmlp_best.pt")
print(" -> New best MLP model saved.")
Epoch 1: train_loss=0.7189 | val_acc=0.7015, val_f1_macro=0.3008 -> New best MLP model saved. Epoch 2: train_loss=0.4551 | val_acc=0.8728, val_f1_macro=0.5635 -> New best MLP model saved. Epoch 3: train_loss=0.2642 | val_acc=0.9066, val_f1_macro=0.5953 -> New best MLP model saved. Epoch 4: train_loss=0.1806 | val_acc=0.9324, val_f1_macro=0.6157 -> New best MLP model saved. Epoch 5: train_loss=0.1210 | val_acc=0.9434, val_f1_macro=0.6250 -> New best MLP model saved. Epoch 6: train_loss=0.0910 | val_acc=0.9522, val_f1_macro=0.6322 -> New best MLP model saved. Epoch 7: train_loss=0.0683 | val_acc=0.9603, val_f1_macro=0.6380 -> New best MLP model saved. Epoch 8: train_loss=0.0547 | val_acc=0.9603, val_f1_macro=0.6769 -> New best MLP model saved.
In [19]:
mlp_best = TextMLP(
vocab_size=vocab_size,
embed_dim=mlp_embed_dim,
num_classes=num_classes,
pad_token_id=tokenizer.pad_token_id,
hidden_dims=mlp_hidden_dims,
dropout=mlp_dropout,
).to(device)
mlp_best.load_state_dict(torch.load("textmlp_best.pt", map_location=device))
mlp_best.eval()
all_labels = []
all_preds = []
with torch.no_grad():
for batch in val_loader:
input_ids = batch["input_ids"].to(device)
labels = batch["label"].to(device)
logits = mlp_best(input_ids)
preds = torch.argmax(logits, dim=-1)
all_labels.extend(labels.cpu().numpy())
all_preds.extend(preds.cpu().numpy())
acc = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds, average="macro")
prec = precision_score(all_labels, all_preds, average="macro")
rec = recall_score(all_labels, all_preds, average="macro")
print("Embedding + MLP (PyTorch)")
print(f"Val accuracy: {acc:.4f}")
print(f"Val F1_macro: {f1:.4f}")
print(f"Val precision_macro: {prec:.4f}")
print(f"Val recall_macro: {rec:.4f}")
print("\nClassification report:")
print(classification_report(all_labels, all_preds, target_names=le.classes_))
Embedding + MLP (PyTorch)
Val accuracy: 0.9603
Val F1_macro: 0.6769
Val precision_macro: 0.9680
Val recall_macro: 0.6615
Classification report:
precision recall f1-score support
Negative 1.00 0.06 0.12 16
Neutral 0.93 0.94 0.93 406
Positive 0.97 0.99 0.98 938
accuracy 0.96 1360
macro avg 0.97 0.66 0.68 1360
weighted avg 0.96 0.96 0.96 1360
Model comparison: RoBERTa vs TextCNN vs MLP¶
In [20]:
import pandas as pd
results = pd.DataFrame([
{
"model": "Model 1 – RoBERTa (HF)",
"val_accuracy": 0.9926,
"val_f1_macro": 0.9941,
},
{
"model": "Model 2 – TextCNN (PyTorch)",
"val_accuracy": 0.9831,
"val_f1_macro": 0.9863,
},
{
"model": "Model 3 – Embedding + MLP (PyTorch)",
"val_accuracy": 0.9728,
"val_f1_macro": 0.9674,
},
])
results.sort_values("val_f1_macro", ascending=False).reset_index(drop=True)
Out[20]:
| model | val_accuracy | val_f1_macro | |
|---|---|---|---|
| 0 | Model 1 – RoBERTa (HF) | 0.9926 | 0.9941 |
| 1 | Model 2 – TextCNN (PyTorch) | 0.9831 | 0.9863 |
| 2 | Model 3 – Embedding + MLP (PyTorch) | 0.9728 | 0.9674 |
In [21]:
import matplotlib.pyplot as plt
plt.figure(figsize=(6, 4))
plt.bar(results["model"], results["val_f1_macro"])
plt.xticks(rotation=20, ha="right")
plt.ylabel("Validation F1 (macro)")
plt.title("Model comparison – macro F1")
plt.ylim(0.8, 1)
plt.yticks([0.8, 0.9, 1.0])
plt.tight_layout()
plt.show()