In [1]:
# Code attribution: Yiyin Shen, Tyler Caraza-Harter
# Imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import matplotlib.pyplot as plt
import pandas
In [2]:
# Take a screenshot of the page
service = Service(executable_path="chromedriver-win64/chromedriver.exe")
driver = webdriver.Chrome(service=service)

def visit(url, file = None):
    driver.get(url)
    body = driver.find_element("tag name", "body")
    if file == None:
        print(body.text)
    else:
        body.screenshot(file + ".png")
        plt.imshow(plt.imread(file + ".png"))

print("DONE")
DONE
In [3]:
# Visit and scrape the pages
home = "http://127.0.0.1:5000"
visit(home)
visit(home + "/visit")
visit(home + "/visit/peter1")
visit(home + "/visit/peter2")
visit(home + "/visit/peter3")
visit(home + "/visitors")
Hello, visitor 1
Hello, visitor 2
Hello, peter1
Hello, peter2
Hello, peter3
# Name IP
0 1 - 127.0.0.1
1 2 - 127.0.0.1
2 3 peter1 127.0.0.1
3 4 peter2 127.0.0.1
4 5 peter3 127.0.0.1
In [4]:
# Remember to quit when it's done
driver.quit()
In [5]:
# Scrape the table too
pandas.read_html(home + "/visitors")[0]
Out[5]:
Unnamed: 0 # Name IP
0 0 1 - 127.0.0.1
1 1 2 - 127.0.0.1
2 2 3 peter1 127.0.0.1
3 3 4 peter2 127.0.0.1
4 4 5 peter3 127.0.0.1