i = 0 A_table = {} amazon_file = open("amazon.csv",encoding="utf8") for line in amazon_file: if(i == 1): i = i + 1 continue A_table[i] = line i = i + 1 i = 0 B_table = {} bn_file = open("bn.csv",encoding="utf8") for line in bn_file: if(i == 0): i = i + 1 continue B_table[i] = line i = i + 1 #build the reverse index on ISBN field from BnN Table lookup = {} for k, v in B_table.items(): tuple = v items = [] items = str(tuple).split('~') publisher = items.__getitem__(7).strip() isbn = items.__getitem__(1).strip() date = items.__getitem__(6).strip() if '/' not in date: date ='null' publisher = publisher.strip()[:5] if publisher in lookup: lookup[publisher] = lookup[publisher] + ',' + str(k) + ';' + isbn + ';' + date else: lookup[publisher] = str(k) + ';' + isbn + ';' + date counter = 1 output = open("C.csv", 'w') output.write("pairId:INTEGER,amazon.id:TEXT,BnN.id:TEXT,,amazon.isbn:TEXT,BnN.isbn:TEXT" +"\n") for a_id,v in A_table.items(): tuple = v items = [] items = str(tuple).split('~') a_publisher = items.__getitem__(6).strip() a_publisher = a_publisher.strip()[:5] a_isbn = items.__getitem__(1).strip() a_date = items.__getitem__(5).strip() if(a_date!= 'null' and '/' in a_date): d = a_date.split('/') a_yy = d[2] else: a_yy = 0 if a_publisher in lookup: b = lookup[a_publisher] b_list = b.split(',') for items in b_list: x = items.split(';') if(x[2] != 'null'): d = x[2].split('/') b_yy = d[2] else: b_yy = 0 if(b_yy != 0 and a_yy != 0): if(b_yy == a_yy): output.write(str(counter) + "," + str(a_id)+ "," + str(x[0]) + "," + str(a_isbn) + "," + str(x[1]) + "\n") counter += 1 else: output.write(str(counter) + "," + str(a_id)+ "," + str(x[0]) + "," + str(a_isbn) + "," + str(x[1]) + "\n") counter+=1 output.close()