In [1]:
# Code attribution: Yiyin Shen, Tyler Caraza-Harter
# Imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import matplotlib.pyplot as plt
import pandas
In [2]:
# Take a screenshot of the page
service = Service(executable_path="chromedriver-win64/chromedriver.exe")
driver = webdriver.Chrome(service=service)
def visit(url, file = None):
driver.get(url)
body = driver.find_element("tag name", "body")
if file == None:
print(body.text)
else:
body.screenshot(file + ".png")
plt.imshow(plt.imread(file + ".png"))
print("DONE")
DONE
In [3]:
# Visit and scrape the pages
home = "http://127.0.0.1:5000"
visit(home)
visit(home + "/visit")
visit(home + "/visit/peter1")
visit(home + "/visit/peter2")
visit(home + "/visit/peter3")
visit(home + "/visitors")
Hello, visitor 1 Hello, visitor 2 Hello, peter1 Hello, peter2 Hello, peter3 # Name IP 0 1 - 127.0.0.1 1 2 - 127.0.0.1 2 3 peter1 127.0.0.1 3 4 peter2 127.0.0.1 4 5 peter3 127.0.0.1
In [4]:
# Remember to quit when it's done
driver.quit()
In [5]:
# Scrape the table too
pandas.read_html(home + "/visitors")[0]
Out[5]:
Unnamed: 0 | # | Name | IP | |
---|---|---|---|---|
0 | 0 | 1 | - | 127.0.0.1 |
1 | 1 | 2 | - | 127.0.0.1 |
2 | 2 | 3 | peter1 | 127.0.0.1 |
3 | 3 | 4 | peter2 | 127.0.0.1 |
4 | 4 | 5 | peter3 | 127.0.0.1 |