In [1]:
# Code attribution: Yiyin Shen, Tyler Caraza-Harter
# Imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import pandas
import random
In [2]:
# Visit the link based on click through rate
service = Service(executable_path="chromedriver-win64/chromedriver.exe")
driver = webdriver.Chrome(service=service)

def visit(url, crt = [0.2, 0.6]):
    driver.get(url)
    link = driver.find_element("tag name", "a")
    href = link.get_attribute("href")
    version = 0 if "cat" in href else 1 
    if random.random() <= crt[version]:
        driver.get(href)

print("DONE")
DONE
In [3]:
# Visit and scrape the pages
home = "http://127.0.0.1:5000"
for i in range(25):
    visit(home, [0.2, 0.6])
driver.quit()
pandas.read_html(home + "/visit")[0]
Out[3]:
Unnamed: 0 click no click
0 cat 4 12
1 dog 5 8