In [1]:
# Code attribution: Yiyin Shen, Tyler Caraza-Harter
# Imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import matplotlib.pyplot as plt
import pandas
import random
import time
In [2]:
# Visit the link based on click through rate
service = Service(executable_path="chromedriver-win64/chromedriver.exe")
driver = webdriver.Chrome(service=service)

def visit(url, crt = [0.2, 0.6]):
    driver.get(url)
    link = driver.find_element("tag name", "a")
    href = link.get_attribute("href")
    version = 0 if "cat" in href else 1 
    if random.random() <= crt[version]:
        driver.get(href)

print("DONE")
DONE
In [3]:
# Visit and scrape the pages
home = "http://127.0.0.1:5000"
for i in range(25):
    visit(home, [0.2, 0.6])
driver.quit()
pandas.read_html(home + "/visit")[0]
Out[3]:
Unnamed: 0 CRT UCB
0 cat 0.222222 1.046835
1 dog 0.272727 1.000000