from bs4 import BeautifulSoup as bs
import sys
import json

sys.setrecursionlimit(1500)
table = {
	"table" : {
		"name" : "Barnes&noble",
		"description" : "Books from Barnes and Noble",
		"idAttrib" : {
			"name" : "isbn-13",
			"type" : "INT"
		},
		"attributes" : [
			{"name" : "audiobook bn price",
			 "type" : "TEXT"},
            {"name" : "authors",
			 "type" : "TEXT"},
            {"name" : "edition",
			 "type" : "INT"},
            {"name" : "hardcover bn price",
			 "type" : "TEXT"},
            {"name" : "isbn-13",
			 "type" : "INT"},
            {"name" : "pages",
			 "type" : "INT"},
            {"name" : "paperback bn price",
			 "type" : "TEXT"},
            {"name" : "product dimensions",
			 "type" : "TEXT"},
            {"name" : "publication date",
			 "type" : "TEXT"},
            {"name" : "publisher",
			 "type" : "TEXT"},
			{"name" : "related categories",
			 "type" : "TEXT"},
            {"name" : "series",
			 "type" : "TEXT"},
            {"name" : "title",
			 "type" : "TEXT"},
		],
		"tuples" : []
	}
}

outfile = open("barnesandnoble.json","w")

num_results = 3600
for j in range(1,num_results+1):
    print(j)
    book = {}
    filename = "RESULTS/results"+str(j)+".html"
    f = open(filename, "r", encoding='utf-8')
    soup = bs(f)
    soup.prettify()

    book_title = soup.find('meta', {'property' : 'og:title'})['content']
    book['title'] = book_title

    div = soup.find('div', {'class':'product-details box'})
    for litag in div.findAll('li'):
            x = litag.text
            while '  ' in x:
                x = x.replace('  ', ' ')
            while '\n' in x:
                x = x.replace('\n','')

            key,delim,val = x.partition(":")
            if(key == "Publication date"):
                book['publication date'] = val.strip()
            if(key == "ISBN-13"):
                book['isbn-13'] = val.strip()
            if(key == "Publisher"):
                book['publisher'] = val.strip()
            if(key == "Pages"):
                book['pages'] = val.strip()
            if(key == "Product dimensions"):
                book['product dimensions'] = val.strip()
            if(key == "Edition Number"):
                book['edition'] = val.strip()
            if(key == "Series"):
                book['series'] = val.strip()

    if 'edition' not in book:
        book['edition'] = "na"
    if 'series' not in book:
        book['series'] = "na"
    if 'pages' not in book:
        book['pages'] = "na"
    if 'product dimensions' not in book:
        book['product dimensions'] = "na"

    contributors = ""
    for ultag in soup.find_all('ul', class_ ="contributors"):
        for litag in ultag.find_all('li'):
            if litag.text != 'by':
                x = litag.text
                while '  ' in x:
                    x = x.replace('  ', ' ')
                while '\n' in x:
                    x = x.replace('\n','')
                contributors = x + "; " + contributors
        contributors = contributors.strip()
        contributors = contributors.rstrip(';')
        book['authors'] = contributors

    category = ""
    for ultag in soup.find_all('ul', class_ ="related-categories box"):
        for litag in ultag.find_all('li'):
            category = litag.text + "; " + category
        category = category.strip()
        category = category.rstrip(';')
        book['related categories'] = category

    # AUDIOBOOK
    for li_types in soup.findAll('li', class_="format ", attrs = {"data-bntrack" : "Audiobook"}):
        for div in li_types.find_all(attrs = {"data-bntrack" : "ParentBNPrice" } ):
            book['audiobook bn price'] = div.text
    if 'audiobook bn price' not in book:
        book['audiobook bn price'] = "na"

    # HARDCOVER
    for li_types in soup.findAll('li', class_="format ", attrs = {"data-bntrack" : "Hardcover"}):
        for div in li_types.find_all(attrs = {"data-bntrack": "ParentBNPrice"}):
            book['hardcover bn price'] = div.text
    if 'hardcover bn price' not in book:
        book['hardcover bn price'] = "na"

    # PAPERBACK
    for li_types in soup.findAll('li', class_="format ", attrs = {"data-bntrack" : "Paperback"}):
        for div in li_types.find_all(attrs={"data-bntrack": "ParentBNPrice"}):
            book['paperback bn price'] = div.text
    if 'paperback bn price' not in book:
        book['paperback bn price'] = "na"

    table["table"]["tuples"].append(book)
    f.close()

outfile.write(json.dumps(table, indent=4,sort_keys=True,separators=(',',':')))
outfile.close()