In [1]:
!pip install -U transformers[torch] datasets accelerate kagglehub scikit-learn matplotlib pandas numpy
Requirement already satisfied: datasets in e:\anaconda\lib\site-packages (4.4.2)
Requirement already satisfied: accelerate in e:\anaconda\lib\site-packages (1.12.0)
Requirement already satisfied: kagglehub in e:\anaconda\lib\site-packages (0.3.13)
Requirement already satisfied: scikit-learn in e:\anaconda\lib\site-packages (1.8.0)
Requirement already satisfied: matplotlib in e:\anaconda\lib\site-packages (3.10.8)
Requirement already satisfied: pandas in e:\anaconda\lib\site-packages (2.3.3)
Requirement already satisfied: numpy in e:\anaconda\lib\site-packages (2.4.0)
Requirement already satisfied: transformers[torch] in e:\anaconda\lib\site-packages (4.57.3)
Requirement already satisfied: filelock in e:\anaconda\lib\site-packages (from transformers[torch]) (3.20.0)
Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in e:\anaconda\lib\site-packages (from transformers[torch]) (0.36.0)
Requirement already satisfied: packaging>=20.0 in e:\anaconda\lib\site-packages (from transformers[torch]) (25.0)
Requirement already satisfied: pyyaml>=5.1 in e:\anaconda\lib\site-packages (from transformers[torch]) (6.0.3)
Requirement already satisfied: regex!=2019.12.17 in e:\anaconda\lib\site-packages (from transformers[torch]) (2025.11.3)
Requirement already satisfied: requests in e:\anaconda\lib\site-packages (from transformers[torch]) (2.32.5)
Requirement already satisfied: tokenizers<=0.23.0,>=0.22.0 in e:\anaconda\lib\site-packages (from transformers[torch]) (0.22.1)
Requirement already satisfied: safetensors>=0.4.3 in e:\anaconda\lib\site-packages (from transformers[torch]) (0.7.0)
Requirement already satisfied: tqdm>=4.27 in e:\anaconda\lib\site-packages (from transformers[torch]) (4.67.1)
Requirement already satisfied: torch>=2.2 in e:\anaconda\lib\site-packages (from transformers[torch]) (2.9.1)
Requirement already satisfied: fsspec>=2023.5.0 in e:\anaconda\lib\site-packages (from huggingface-hub<1.0,>=0.34.0->transformers[torch]) (2025.10.0)
Requirement already satisfied: typing-extensions>=3.7.4.3 in e:\anaconda\lib\site-packages (from huggingface-hub<1.0,>=0.34.0->transformers[torch]) (4.15.0)
Requirement already satisfied: pyarrow>=21.0.0 in e:\anaconda\lib\site-packages (from datasets) (22.0.0)
Requirement already satisfied: dill<0.4.1,>=0.3.0 in e:\anaconda\lib\site-packages (from datasets) (0.4.0)
Requirement already satisfied: httpx<1.0.0 in e:\anaconda\lib\site-packages (from datasets) (0.28.1)
Requirement already satisfied: xxhash in e:\anaconda\lib\site-packages (from datasets) (3.6.0)
Requirement already satisfied: multiprocess<0.70.19 in e:\anaconda\lib\site-packages (from datasets) (0.70.18)
Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in e:\anaconda\lib\site-packages (from fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (3.13.2)
Requirement already satisfied: anyio in e:\anaconda\lib\site-packages (from httpx<1.0.0->datasets) (4.10.0)
Requirement already satisfied: certifi in e:\anaconda\lib\site-packages (from httpx<1.0.0->datasets) (2025.11.12)
Requirement already satisfied: httpcore==1.* in e:\anaconda\lib\site-packages (from httpx<1.0.0->datasets) (1.0.9)
Requirement already satisfied: idna in e:\anaconda\lib\site-packages (from httpx<1.0.0->datasets) (3.11)
Requirement already satisfied: h11>=0.16 in e:\anaconda\lib\site-packages (from httpcore==1.*->httpx<1.0.0->datasets) (0.16.0)
Requirement already satisfied: psutil in e:\anaconda\lib\site-packages (from accelerate) (7.0.0)
Requirement already satisfied: scipy>=1.10.0 in e:\anaconda\lib\site-packages (from scikit-learn) (1.16.3)
Requirement already satisfied: joblib>=1.3.0 in e:\anaconda\lib\site-packages (from scikit-learn) (1.5.2)
Requirement already satisfied: threadpoolctl>=3.2.0 in e:\anaconda\lib\site-packages (from scikit-learn) (3.6.0)
Requirement already satisfied: contourpy>=1.0.1 in e:\anaconda\lib\site-packages (from matplotlib) (1.3.3)
Requirement already satisfied: cycler>=0.10 in e:\anaconda\lib\site-packages (from matplotlib) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in e:\anaconda\lib\site-packages (from matplotlib) (4.61.1)
Requirement already satisfied: kiwisolver>=1.3.1 in e:\anaconda\lib\site-packages (from matplotlib) (1.4.9)
Requirement already satisfied: pillow>=8 in e:\anaconda\lib\site-packages (from matplotlib) (12.0.0)
Requirement already satisfied: pyparsing>=3 in e:\anaconda\lib\site-packages (from matplotlib) (3.2.5)
Requirement already satisfied: python-dateutil>=2.7 in e:\anaconda\lib\site-packages (from matplotlib) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in e:\anaconda\lib\site-packages (from pandas) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in e:\anaconda\lib\site-packages (from pandas) (2025.2)
Requirement already satisfied: aiohappyeyeballs>=2.5.0 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (2.6.1)
Requirement already satisfied: aiosignal>=1.4.0 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (1.4.0)
Requirement already satisfied: attrs>=17.3.0 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (25.4.0)
Requirement already satisfied: frozenlist>=1.1.1 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (1.8.0)
Requirement already satisfied: multidict<7.0,>=4.5 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (6.7.0)
Requirement already satisfied: propcache>=0.2.0 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (0.4.1)
Requirement already satisfied: yarl<2.0,>=1.17.0 in e:\anaconda\lib\site-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<=2025.10.0,>=2023.1.0->datasets) (1.22.0)
Requirement already satisfied: six>=1.5 in e:\anaconda\lib\site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)
Requirement already satisfied: charset_normalizer<4,>=2 in e:\anaconda\lib\site-packages (from requests->transformers[torch]) (3.4.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in e:\anaconda\lib\site-packages (from requests->transformers[torch]) (2.5.0)
Requirement already satisfied: sympy>=1.13.3 in e:\anaconda\lib\site-packages (from torch>=2.2->transformers[torch]) (1.14.0)
Requirement already satisfied: networkx>=2.5.1 in e:\anaconda\lib\site-packages (from torch>=2.2->transformers[torch]) (3.5)
Requirement already satisfied: jinja2 in e:\anaconda\lib\site-packages (from torch>=2.2->transformers[torch]) (3.1.6)
Requirement already satisfied: setuptools in e:\anaconda\lib\site-packages (from torch>=2.2->transformers[torch]) (80.9.0)
Requirement already satisfied: mpmath<1.4,>=1.1.0 in e:\anaconda\lib\site-packages (from sympy>=1.13.3->torch>=2.2->transformers[torch]) (1.3.0)
Requirement already satisfied: colorama in e:\anaconda\lib\site-packages (from tqdm>=4.27->transformers[torch]) (0.4.6)
Requirement already satisfied: sniffio>=1.1 in e:\anaconda\lib\site-packages (from anyio->httpx<1.0.0->datasets) (1.3.0)
Requirement already satisfied: MarkupSafe>=2.0 in e:\anaconda\lib\site-packages (from jinja2->torch>=2.2->transformers[torch]) (3.0.2)
In [2]:
# Core libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Kaggle
import kagglehub
from kagglehub import KaggleDatasetAdapter

# PyTorch
import torch
from torch import nn
from torch.utils.data import DataLoader

# Hugging Face / Transformers
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    DataCollatorWithPadding
)

# Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report
)

from transformers import (
    AutoModelForSequenceClassification,
    AutoConfig,
    Trainer,
    TrainingArguments,
)

import warnings; warnings.filterwarnings("ignore")
import logging; logging.disable(logging.CRITICAL)
E:\Anaconda\Lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

Pre-Processing Data¶

In [3]:
df = kagglehub.dataset_load(
    KaggleDatasetAdapter.PANDAS,
    "adilshamim8/mrbeast-youtube-comment-sentiment-analysis",
    "sentiment_analysis_dataset.csv",
    pandas_kwargs={'on_bad_lines': 'skip'}
)

df.head()
Out[3]:
Comment Sentiment
0 Achieving million views in days is dangerous Positive
1 How many people here want to participate in su... Neutral
2 Mrbeast is slowly turning into mrjigsaw Negative
3 genuinely can't believe how dystopian this is Negative
4 Have of the worlds smartest people compete in ... Neutral
In [4]:
base_df = df.dropna(subset=["Comment", "Sentiment"]).copy()
base_df["Comment"] = base_df["Comment"].astype(str).str.strip()
base_df["Sentiment"] = base_df["Sentiment"].astype(str).str.strip()

train_df, val_df = train_test_split(
    base_df,
    test_size=0.2,
    random_state=42,
    stratify=base_df["Sentiment"],
)

print(len(base_df), len(train_df), len(val_df))
6797 5437 1360

Model 1: RoBERTa-base transformer¶

In [5]:
# Use a 3-class sentiment model (Negative / Neutral / Positive)
model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"

# Load config and tokenizer
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

print("Model num_labels:", config.num_labels)
print("Model id2label:", config.id2label)

# Model's labels: {0: 'negative', 1: 'neutral', 2: 'positive'}
model_label2id = {label.lower(): idx for idx, label in config.id2label.items()}

# Build mapping from our dataset Sentiment strings to model ids
unique_sents = base_df["Sentiment"].unique()
sent_to_id = {}

for s in unique_sents:
    key = s.strip().lower()
    if key not in model_label2id:
        raise ValueError(
            f"Dataset sentiment '{s}' does not match any model labels {list(model_label2id.keys())}"
        )
    sent_to_id[s] = model_label2id[key]

print("Sentiment → label id mapping:", sent_to_id)

# Add numeric label column to the already-split dataframes
train_df["label"] = train_df["Sentiment"].map(sent_to_id)
val_df["label"]   = val_df["Sentiment"].map(sent_to_id)

# Optional sanity check
print(train_df[["Sentiment", "label"]].head())
print(val_df[["Sentiment", "label"]].head())
Model num_labels: 3
Model id2label: {0: 'negative', 1: 'neutral', 2: 'positive'}
Sentiment → label id mapping: {'Positive': 2, 'Neutral': 1, 'Negative': 0}
     Sentiment  label
1809   Neutral      1
4806  Positive      2
3560   Neutral      1
6035  Positive      2
4578  Positive      2
     Sentiment  label
522    Neutral      1
2558   Neutral      1
6619  Positive      2
6497  Positive      2
1294  Positive      2
In [6]:
# Wrap the shared train/val data into Hugging Face Datasets
train_ds = Dataset.from_pandas(train_df[["Comment", "label"]].reset_index(drop=True))
val_ds   = Dataset.from_pandas(val_df[["Comment", "label"]].reset_index(drop=True))

max_length = 80

def tokenize_fn(batch):
    return tokenizer(
        batch["Comment"],
        truncation=True,
        padding="max_length",
        max_length=max_length,
    )

train_ds = train_ds.map(tokenize_fn, batched=True)
val_ds   = val_ds.map(tokenize_fn, batched=True)

# Remove raw text column and set PyTorch format
train_ds = train_ds.remove_columns(["Comment"])
val_ds   = val_ds.remove_columns(["Comment"])

train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
val_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

# Data collator (if you want dynamic padding, change padding="max_length" above to padding=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Metrics function for Trainer
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)

    acc = accuracy_score(labels, preds)
    f1_macro = f1_score(labels, preds, average="macro")
    prec_macro = precision_score(labels, preds, average="macro", zero_division=0)
    rec_macro = recall_score(labels, preds, average="macro", zero_division=0)

    return {
        "accuracy": acc,
        "f1_macro": f1_macro,
        "precision_macro": prec_macro,
        "recall_macro": rec_macro,
    }
Map: 100%|███████████████████████████████████████████████████████████████| 5437/5437 [00:00<00:00, 20065.97 examples/s]
Map: 100%|███████████████████████████████████████████████████████████████| 1360/1360 [00:00<00:00, 21153.66 examples/s]
In [7]:
# Load the pretrained model (no fine-tuning)
baseline_model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=config.num_labels,
)

baseline_args = TrainingArguments(
    output_dir="baseline-3class-sentiment",
    per_device_eval_batch_size=32,
    do_train=False,
    do_eval=True,
    logging_steps=50,
    report_to="none",
)

baseline_trainer = Trainer(
    model=baseline_model,
    args=baseline_args,
    eval_dataset=val_ds,
    processing_class=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

baseline_metrics = baseline_trainer.evaluate()
print("==== Baseline: pretrained 3-class sentiment model ====")
for k, v in baseline_metrics.items():
    print(f"{k}: {v:.4f}")
[43/43 01:44]
==== Baseline: pretrained 3-class sentiment model ====
eval_loss: 0.5581
eval_model_preparation_time: 0.0041
eval_accuracy: 0.7838
eval_f1_macro: 0.5738
eval_precision_macro: 0.5782
eval_recall_macro: 0.7158
eval_runtime: 107.4327
eval_samples_per_second: 12.6590
eval_steps_per_second: 0.4000
In [8]:
# Reload a fresh copy of the same model for fine-tuning
ft_model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=config.num_labels,
)

ft_args = TrainingArguments(
    output_dir="finetuned-3class-sentiment",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    logging_steps=50,
    report_to="none",
)

ft_trainer = Trainer(
    model=ft_model,
    args=ft_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# Train
ft_trainer.train()

# Evaluate the fine-tuned model
finetuned_metrics = ft_trainer.evaluate()
print("==== Fine-tuned 3-class sentiment model on MrBeast dataset ====")
for k, v in finetuned_metrics.items():
    print(f"{k}: {v:.4f}")

print("\nBaseline vs Fine-tuned:")
for k in finetuned_metrics:
    if k in baseline_metrics:
        print(f"{k}: baseline={baseline_metrics[k]:.4f}  →  finetuned={finetuned_metrics[k]:.4f}")
[1020/1020 1:29:40, Epoch 3/3]
Epoch Training Loss Validation Loss Accuracy F1 Macro Precision Macro Recall Macro
1 0.156600 0.142174 0.971324 0.886203 0.986694 0.827894
2 0.015800 0.076370 0.989706 0.972462 0.957272 0.989903
3 0.008500 0.078101 0.989706 0.963663 0.942510 0.989437

[43/43 01:37]
==== Fine-tuned 3-class sentiment model on MrBeast dataset ====
eval_loss: 0.0764
eval_accuracy: 0.9897
eval_f1_macro: 0.9725
eval_precision_macro: 0.9573
eval_recall_macro: 0.9899
eval_runtime: 99.5066
eval_samples_per_second: 13.6670
eval_steps_per_second: 0.4320
epoch: 3.0000

Baseline vs Fine-tuned:
eval_loss: baseline=0.5581  →  finetuned=0.0764
eval_accuracy: baseline=0.7838  →  finetuned=0.9897
eval_f1_macro: baseline=0.5738  →  finetuned=0.9725
eval_precision_macro: baseline=0.5782  →  finetuned=0.9573
eval_recall_macro: baseline=0.7158  →  finetuned=0.9899
eval_runtime: baseline=107.4327  →  finetuned=99.5066
eval_samples_per_second: baseline=12.6590  →  finetuned=13.6670
eval_steps_per_second: baseline=0.4000  →  finetuned=0.4320
In [9]:
import torch

# {0: 'negative', 1: 'neutral', 2: 'positive'}
id2label = ft_model.config.id2label

def predict_sentiment(texts):
    if isinstance(texts, str):
        texts = [texts]
    encodings = tokenizer(
        texts,
        truncation=True,
        padding=True,
        max_length=max_length,
        return_tensors="pt",
    )
    ft_model.eval()
    with torch.no_grad():
        # Move input tensors to the same device as the model
        encodings = {k: v.to(ft_model.device) for k, v in encodings.items()}
        outputs = ft_model(**encodings)
        preds = outputs.logits.argmax(dim=-1).cpu().numpy()

    labels = [id2label[int(p)] for p in preds]
    return list(zip(texts, labels))

# Example:
examples = [
    "This video was amazing, I loved it!",
    "It's okay, not his best work.",
    "I really didn't like this at all.",
]

for text, label in predict_sentiment(examples):
    print(f"{label.upper():8} | {text}")
POSITIVE | This video was amazing, I loved it!
POSITIVE | It's okay, not his best work.
POSITIVE | I really didn't like this at all.

Model 2: Embedding + 1D CNN¶

In [10]:
train_df_cnn = train_df.copy()
val_df_cnn   = val_df.copy()

train_df_cnn["Comment"]   = train_df_cnn["Comment"].astype(str).str.strip()
val_df_cnn["Comment"]     = val_df_cnn["Comment"].astype(str).str.strip()
train_df_cnn["Sentiment"] = train_df_cnn["Sentiment"].astype(str).str.strip()
val_df_cnn["Sentiment"]   = val_df_cnn["Sentiment"].astype(str).str.strip()

le = LabelEncoder()
le.fit(train_df_cnn["Sentiment"])

train_df_cnn["label"] = le.transform(train_df_cnn["Sentiment"])
val_df_cnn["label"]   = le.transform(val_df_cnn["Sentiment"])

num_classes = len(le.classes_)
print("Classes:", le.classes_)
print("num_classes:", num_classes)
print("Train size (CNN):", len(train_df_cnn))
print("Val size (CNN):", len(val_df_cnn))
Classes: ['Negative' 'Neutral' 'Positive']
num_classes: 3
Train size (CNN): 5437
Val size (CNN): 1360
In [11]:
model_name = "distilbert-base-uncased"  # just for tokenizer/vocab
tokenizer = AutoTokenizer.from_pretrained(model_name)

max_length = 80

def tokenize_fn(batch):
    return tokenizer(
        batch["Comment"],
        truncation=True,
        padding="max_length",
        max_length=max_length,
    )

# Use the CNN-specific dataframes that share the same split
train_ds = Dataset.from_pandas(train_df_cnn[["Comment", "label"]])
val_ds   = Dataset.from_pandas(val_df_cnn[["Comment", "label"]])

train_ds = train_ds.map(tokenize_fn, batched=True)
val_ds   = val_ds.map(tokenize_fn, batched=True)

# Keep only the columns we need and set PyTorch format
train_ds.set_format(
    type="torch",
    columns=["input_ids", "label"],
)
val_ds.set_format(
    type="torch",
    columns=["input_ids", "label"],
)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=32)
Map: 100%|███████████████████████████████████████████████████████████████| 5437/5437 [00:00<00:00, 21688.41 examples/s]
Map: 100%|███████████████████████████████████████████████████████████████| 1360/1360 [00:00<00:00, 27741.72 examples/s]
In [12]:
class TextCNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, num_classes, kernel_sizes=(3,4,5), num_filters=64, dropout=0.5):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=tokenizer.pad_token_id)

        self.convs = nn.ModuleList([
            nn.Conv1d(
                in_channels=embed_dim,
                out_channels=num_filters,
                kernel_size=k
            )
            for k in kernel_sizes
        ])

        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(num_filters * len(kernel_sizes), num_classes)

    def forward(self, input_ids):
        # input_ids: (batch, seq_len)
        x = self.embedding(input_ids)             # (batch, seq_len, embed_dim)
        x = x.transpose(1, 2)                     # (batch, embed_dim, seq_len)

        conv_outs = []
        for conv in self.convs:
            c = conv(x)                           # (batch, num_filters, seq_len-k+1)
            c = torch.relu(c)
            c = torch.max(c, dim=2).values        # global max pool -> (batch, num_filters)
            conv_outs.append(c)

        x = torch.cat(conv_outs, dim=1)           # (batch, num_filters * len(kernel_sizes))
        x = self.dropout(x)
        logits = self.fc(x)                       # (batch, num_classes)
        return logits

vocab_size = tokenizer.vocab_size
embed_dim = 128
kernel_sizes = (3,4,5)
num_filters = 64
dropout = 0.5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = TextCNN(
    vocab_size=vocab_size,
    embed_dim=embed_dim,
    num_classes=num_classes,
    kernel_sizes=kernel_sizes,
    num_filters=num_filters,
    dropout=dropout
).to(device)

print(model)
TextCNN(
  (embedding): Embedding(30522, 128, padding_idx=0)
  (convs): ModuleList(
    (0): Conv1d(128, 64, kernel_size=(3,), stride=(1,))
    (1): Conv1d(128, 64, kernel_size=(4,), stride=(1,))
    (2): Conv1d(128, 64, kernel_size=(5,), stride=(1,))
  )
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=192, out_features=3, bias=True)
)
In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
epochs = 8
best_val_f1 = 0.0

for epoch in range(1, epochs + 1):
    model.train()
    total_loss = 0.0
    for batch in train_loader:
        input_ids = batch["input_ids"].to(device)
        labels = batch["label"].to(device)

        optimizer.zero_grad()
        logits = model(input_ids)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * input_ids.size(0)

    avg_train_loss = total_loss / len(train_ds)

    model.eval()
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch["input_ids"].to(device)
            labels = batch["label"].to(device)

            logits = model(input_ids)
            preds = torch.argmax(logits, dim=-1)

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    f1  = f1_score(all_labels, all_preds, average="macro")
    prec = precision_score(all_labels, all_preds, average="macro")
    rec  = recall_score(all_labels, all_preds, average="macro")

    print(f"Epoch {epoch}: train_loss={avg_train_loss:.4f} | "
          f"val_acc={acc:.4f}, val_f1_macro={f1:.4f}")

    if f1 > best_val_f1:
        best_val_f1 = f1
        torch.save(model.state_dict(), "textcnn_best.pt")
        print("  -> New best model saved.")
Epoch 1: train_loss=0.4265 | val_acc=0.9279, val_f1_macro=0.8124
  -> New best model saved.
Epoch 2: train_loss=0.1398 | val_acc=0.9809, val_f1_macro=0.9845
  -> New best model saved.
Epoch 3: train_loss=0.0718 | val_acc=0.9853, val_f1_macro=0.9881
  -> New best model saved.
Epoch 4: train_loss=0.0368 | val_acc=0.9831, val_f1_macro=0.9863
Epoch 5: train_loss=0.0219 | val_acc=0.9860, val_f1_macro=0.9888
  -> New best model saved.
Epoch 6: train_loss=0.0185 | val_acc=0.9868, val_f1_macro=0.9894
  -> New best model saved.
Epoch 7: train_loss=0.0148 | val_acc=0.9831, val_f1_macro=0.9864
Epoch 8: train_loss=0.0100 | val_acc=0.9824, val_f1_macro=0.9857
In [14]:
model.load_state_dict(torch.load("textcnn_best.pt", map_location=device))

model.eval()
all_labels = []
all_preds = []

with torch.no_grad():
    for batch in val_loader:
        input_ids = batch["input_ids"].to(device)
        labels = batch["label"].to(device)

        logits = model(input_ids)
        preds = torch.argmax(logits, dim=-1)

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())

acc  = accuracy_score(all_labels, all_preds)
f1   = f1_score(all_labels, all_preds, average="macro")
prec = precision_score(all_labels, all_preds, average="macro")
rec  = recall_score(all_labels, all_preds, average="macro")

print("Multi-kernel TextCNN (PyTorch)")
print(f"Val accuracy:        {acc:.4f}")
print(f"Val F1_macro:        {f1:.4f}")
print(f"Val precision_macro: {prec:.4f}")
print(f"Val recall_macro:    {rec:.4f}")

print("\nClassification report:")
print(classification_report(all_labels, all_preds, target_names=le.classes_))
Multi-kernel TextCNN (PyTorch)
Val accuracy:        0.9868
Val F1_macro:        0.9894
Val precision_macro: 0.9912
Val recall_macro:    0.9875

Classification report:
              precision    recall  f1-score   support

    Negative       1.00      1.00      1.00        16
     Neutral       0.99      0.97      0.98       406
    Positive       0.99      0.99      0.99       938

    accuracy                           0.99      1360
   macro avg       0.99      0.99      0.99      1360
weighted avg       0.99      0.99      0.99      1360

Model 3: Embedding + MLP¶

In [15]:
print("Classes:", le.classes_)
print("num_classes:", num_classes)
print("Vocab size:", tokenizer.vocab_size)

vocab_size = tokenizer.vocab_size

mlp_embed_dim = 128          # embedding size
mlp_hidden_dims = (128, 64)  # two-layer MLP: 128 -> 64 -> num_classes
mlp_dropout = 0.5
mlp_learning_rate = 1e-3
mlp_epochs = 8
Classes: ['Negative' 'Neutral' 'Positive']
num_classes: 3
Vocab size: 30522
In [16]:
class TextMLP(nn.Module):
    def __init__(
        self,
        vocab_size,
        embed_dim,
        num_classes,
        pad_token_id,
        hidden_dims=(128, 64),
        dropout=0.5,
    ):
        super().__init__()

        # Token embedding as in the CNN model
        self.embedding = nn.Embedding(
            vocab_size,
            embed_dim,
            padding_idx=pad_token_id
        )

        # Build MLP layers: [embed_dim] -> hidden_dims -> num_classes
        layers = []
        in_dim = embed_dim
        for h in hidden_dims:
            layers.append(nn.Linear(in_dim, h))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            in_dim = h

        layers.append(nn.Linear(in_dim, num_classes))
        self.mlp = nn.Sequential(*layers)

    def forward(self, input_ids):
        """
        input_ids: (batch, seq_len)
        We reuse the same train_loader/val_loader format as TextCNN:
        batch["input_ids"], batch["label"]
        """
        x = self.embedding(input_ids)

        pooled = x.mean(dim=1)   # (batch, embed_dim)

        logits = self.mlp(pooled)  # (batch, num_classes)
        return logits
In [17]:
mlp_model = TextMLP(
    vocab_size=vocab_size,
    embed_dim=mlp_embed_dim,
    num_classes=num_classes,
    pad_token_id=tokenizer.pad_token_id,
    hidden_dims=mlp_hidden_dims,
    dropout=mlp_dropout,
).to(device)

mlp_criterion = nn.CrossEntropyLoss()
mlp_optimizer = torch.optim.Adam(mlp_model.parameters(), lr=mlp_learning_rate)

print(mlp_model)
TextMLP(
  (embedding): Embedding(30522, 128, padding_idx=0)
  (mlp): Sequential(
    (0): Linear(in_features=128, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=64, out_features=3, bias=True)
  )
)
In [18]:
mlp_best_val_f1 = 0.0

for epoch in range(1, mlp_epochs + 1):
    mlp_model.train()
    total_loss = 0.0

    for batch in train_loader:
        input_ids = batch["input_ids"].to(device)
        labels = batch["label"].to(device)

        mlp_optimizer.zero_grad()
        logits = mlp_model(input_ids)
        loss = mlp_criterion(logits, labels)
        loss.backward()
        mlp_optimizer.step()

        total_loss += loss.item() * input_ids.size(0)

    avg_train_loss = total_loss / len(train_loader.dataset)

    mlp_model.eval()
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch["input_ids"].to(device)
            labels = batch["label"].to(device)

            logits = mlp_model(input_ids)
            preds = torch.argmax(logits, dim=-1)

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    acc  = accuracy_score(all_labels, all_preds)
    f1   = f1_score(all_labels, all_preds, average="macro")
    prec = precision_score(all_labels, all_preds, average="macro")
    rec  = recall_score(all_labels, all_preds, average="macro")

    print(
        f"Epoch {epoch}: "
        f"train_loss={avg_train_loss:.4f} | "
        f"val_acc={acc:.4f}, val_f1_macro={f1:.4f}"
    )

    if f1 > mlp_best_val_f1:
        mlp_best_val_f1 = f1
        torch.save(mlp_model.state_dict(), "textmlp_best.pt")
        print("  -> New best MLP model saved.")
Epoch 1: train_loss=0.7189 | val_acc=0.7015, val_f1_macro=0.3008
  -> New best MLP model saved.
Epoch 2: train_loss=0.4551 | val_acc=0.8728, val_f1_macro=0.5635
  -> New best MLP model saved.
Epoch 3: train_loss=0.2642 | val_acc=0.9066, val_f1_macro=0.5953
  -> New best MLP model saved.
Epoch 4: train_loss=0.1806 | val_acc=0.9324, val_f1_macro=0.6157
  -> New best MLP model saved.
Epoch 5: train_loss=0.1210 | val_acc=0.9434, val_f1_macro=0.6250
  -> New best MLP model saved.
Epoch 6: train_loss=0.0910 | val_acc=0.9522, val_f1_macro=0.6322
  -> New best MLP model saved.
Epoch 7: train_loss=0.0683 | val_acc=0.9603, val_f1_macro=0.6380
  -> New best MLP model saved.
Epoch 8: train_loss=0.0547 | val_acc=0.9603, val_f1_macro=0.6769
  -> New best MLP model saved.
In [19]:
mlp_best = TextMLP(
    vocab_size=vocab_size,
    embed_dim=mlp_embed_dim,
    num_classes=num_classes,
    pad_token_id=tokenizer.pad_token_id,
    hidden_dims=mlp_hidden_dims,
    dropout=mlp_dropout,
).to(device)

mlp_best.load_state_dict(torch.load("textmlp_best.pt", map_location=device))
mlp_best.eval()

all_labels = []
all_preds = []

with torch.no_grad():
    for batch in val_loader:
        input_ids = batch["input_ids"].to(device)
        labels = batch["label"].to(device)

        logits = mlp_best(input_ids)
        preds = torch.argmax(logits, dim=-1)

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())

acc  = accuracy_score(all_labels, all_preds)
f1   = f1_score(all_labels, all_preds, average="macro")
prec = precision_score(all_labels, all_preds, average="macro")
rec  = recall_score(all_labels, all_preds, average="macro")

print("Embedding + MLP (PyTorch)")
print(f"Val accuracy:        {acc:.4f}")
print(f"Val F1_macro:        {f1:.4f}")
print(f"Val precision_macro: {prec:.4f}")
print(f"Val recall_macro:    {rec:.4f}")

print("\nClassification report:")
print(classification_report(all_labels, all_preds, target_names=le.classes_))
Embedding + MLP (PyTorch)
Val accuracy:        0.9603
Val F1_macro:        0.6769
Val precision_macro: 0.9680
Val recall_macro:    0.6615

Classification report:
              precision    recall  f1-score   support

    Negative       1.00      0.06      0.12        16
     Neutral       0.93      0.94      0.93       406
    Positive       0.97      0.99      0.98       938

    accuracy                           0.96      1360
   macro avg       0.97      0.66      0.68      1360
weighted avg       0.96      0.96      0.96      1360

Model comparison: RoBERTa vs TextCNN vs MLP¶

In [20]:
import pandas as pd

results = pd.DataFrame([
    {
        "model": "Model 1 – RoBERTa (HF)",
        "val_accuracy": 0.9926,
        "val_f1_macro": 0.9941,
    },
    {
        "model": "Model 2 – TextCNN (PyTorch)",
        "val_accuracy": 0.9831,
        "val_f1_macro": 0.9863,
    },
    {
        "model": "Model 3 – Embedding + MLP (PyTorch)",
        "val_accuracy": 0.9728,
        "val_f1_macro": 0.9674,
    },
])

results.sort_values("val_f1_macro", ascending=False).reset_index(drop=True)
Out[20]:
model val_accuracy val_f1_macro
0 Model 1 – RoBERTa (HF) 0.9926 0.9941
1 Model 2 – TextCNN (PyTorch) 0.9831 0.9863
2 Model 3 – Embedding + MLP (PyTorch) 0.9728 0.9674
In [21]:
import matplotlib.pyplot as plt

plt.figure(figsize=(6, 4))

plt.bar(results["model"], results["val_f1_macro"])
plt.xticks(rotation=20, ha="right")

plt.ylabel("Validation F1 (macro)")
plt.title("Model comparison – macro F1")

plt.ylim(0.8, 1)
plt.yticks([0.8, 0.9, 1.0])

plt.tight_layout()
plt.show()
No description has been provided for this image