In [1]:
# Code attribution: Yiyin Shen, Tyler Caraza-Harter
# Imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import pandas
import random
In [2]:
# Visit the link based on click through rate
service = Service(executable_path="chromedriver-win64/chromedriver.exe")
driver = webdriver.Chrome(service=service)
def visit(url, crt = [0.2, 0.6]):
driver.get(url)
link = driver.find_element("tag name", "a")
href = link.get_attribute("href")
version = 0 if "cat" in href else 1
if random.random() <= crt[version]:
driver.get(href)
print("DONE")
DONE
In [3]:
# Visit and scrape the pages
home = "http://127.0.0.1:5000"
for i in range(25):
visit(home, [0.2, 0.6])
driver.quit()
pandas.read_html(home + "/visit")[0]
Out[3]:
Unnamed: 0 | click | no click | |
---|---|---|---|
0 | cat | 4 | 12 |
1 | dog | 5 | 8 |