In [1]:
# Code attribution: Yiyin Shen, Tyler Caraza-Harter
# Imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import matplotlib.pyplot as plt
import time
import pandas
In [2]:
# Take a screenshot of the page
service = Service(executable_path="chromedriver-win64/chromedriver.exe")
driver = webdriver.Chrome(service=service)
def visit(url, file = None):
driver.get(url)
body = driver.find_element("tag name", "body")
if file == None:
print(body.text)
else:
body.screenshot(file + ".png")
plt.imshow(plt.imread(file + ".png"))
print("DONE")
DONE
In [3]:
# Visit and scrape the pages
home = "http://127.0.0.1:5000"
visit(home)
visit(home)
time.sleep(1)
visit(home)
time.sleep(2)
visit(home)
Hello, [127.0.0.1] Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Please come back in 2.855536937713623 seconds. Please come back in 1.7896039485931396 seconds. Welcome back, [127.0.0.1] Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36
In [4]:
# Remember to quit when it's done
driver.quit()
In [5]:
# Scrape the table too
pandas.read_html(home + "/visitors")[0]
Out[5]:
Unnamed: 0 | IP | User Agent | |
---|---|---|---|
0 | 0 | 127.0.0.1 | Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl... |
1 | 1 | 127.0.0.1 | Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl... |
2 | 2 | 127.0.0.1 | Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl... |
3 | 3 | 127.0.0.1 | Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl... |