In [1]:
# Code attribution: Yiyin Shen, Tyler Caraza-Harter
# Imports
import nltk
from pypdf import PdfReader
from wordcloud import WordCloud
import matplotlib.pyplot as plt
In [2]:
# Read the course evaluation surveys
survey = PdfReader("survey.pdf")
text = ""
for page in survey.pages:
    text = text + page.extract_text(0) + "\n"
print(text[0:200])
What is the instructor doing that is helpful for your learning? What are they doing to address the 
challenges of the online learning environment?  
(Instructor Text/Memo)  
 
Young, Wu Instructor  
T
In [3]:
# Tokenize and find the most frequently used 10 tokens (not useful).
tokens = nltk.word_tokenize(text)
freq = nltk.probability.FreqDist(tokens)
freq.most_common(10)
Out[3]:
[('.', 201),
 ('the', 154),
 ('to', 134),
 ('I', 92),
 ('and', 88),
 ('is', 75),
 (',', 75),
 ('lecture', 67),
 ('of', 60),
 ('in', 55)]
In [4]:
# Remove punctuation
tokens_punc = [t.lower() for t in tokens if t.isalpha()]
freq = nltk.probability.FreqDist(tokens_punc)
freq.most_common(10)
Out[4]:
[('the', 164),
 ('to', 134),
 ('i', 93),
 ('and', 89),
 ('is', 75),
 ('lecture', 67),
 ('of', 60),
 ('in', 58),
 ('he', 55),
 ('for', 46)]
In [5]:
# Remove stopwords
stop = set(nltk.corpus.stopwords.words("english"))
tokens_stop = [t for t in tokens_punc if not t in stop]
freq = nltk.probability.FreqDist(tokens_stop)
freq.most_common(10)
Out[5]:
[('lecture', 67),
 ('notes', 42),
 ('code', 35),
 ('lectures', 29),
 ('like', 28),
 ('helpful', 27),
 ('class', 27),
 ('yet', 22),
 ('good', 21),
 ('questions', 20)]
In [6]:
# Combine words that are the same, e.g. "lecture" vs "lectures"
lemma = nltk.stem.WordNetLemmatizer()
tokens_lemma = [lemma.lemmatize(t) for t in tokens_stop]
freq = nltk.probability.FreqDist(tokens_lemma)
freq.most_common(10)
Out[6]:
[('lecture', 96),
 ('note', 43),
 ('code', 38),
 ('class', 31),
 ('like', 28),
 ('helpful', 27),
 ('question', 24),
 ('yet', 22),
 ('good', 21),
 ('material', 21)]
In [7]:
# Print out the results
new_text = " ".join(tokens_lemma)
print(new_text[0:200])
instructor helpful learning address challenge online learning environment instructor young wu instructor instructor seems good explaining complicated concept idk interact much yet quick respond feel l
In [8]:
# Plot the frequent word in a word cloud
cloud = WordCloud(background_color = "white").generate(new_text)
plt.imshow(cloud)
plt.axis("off")
Out[8]:
(-0.5, 399.5, 199.5, -0.5)
No description has been provided for this image