import re, requests, time from bs4 import BeautifulSoup # grab subtags of "final fantasy series" series = "https://archiveofourown.org/tags/Final%20Fantasy%20Series" seriespage = requests.get(series) seriessoup = BeautifulSoup(seriespage.content,"html.parser") fandoms = seriessoup.find(class_="sub").find("ul",{ "class" : "tree" }).findAll("li", recursive=False) # open search page with filters applied filterpage = "https://archiveofourown.org/works?work_search%5Bsort_column%5D=revised_at&work_search%5Bother_tag_names%5D=&work_search%5Bexcluded_tag_names%5D=&work_search%5Bcrossover%5D=&work_search%5Bcomplete%5D=T&work_search%5Bwords_from%5D=1000&work_search%5Bwords_to%5D=&work_search%5Bdate_from%5D=&work_search%5Bdate_to%5D=&work_search%5Bquery%5D=&work_search%5Blanguage_id%5D=en&commit=Sort+and+Filter&tag_id=" eligible = [] ineligible = [] with requests.Session() as s: loginpage = s.get("https://archiveofourown.org/users/login") loginsoup = BeautifulSoup(loginpage.content,"html.parser") token = (loginsoup.find('form', class_='new_user') .find('input', attrs={'name': 'authenticity_token'}) .get('value')) payload = {'user[login]': "translinkni", 'user[password]': "fakeapi", 'user[remember_me]': '1', 'authenticity_token': token } post = s.post("https://archiveofourown.org/users/login",data=payload) for fandom in fandoms: time.sleep(3) tagurl = (str(fandom.find("a")["href"]))[6:] tagname = str(fandom.find(class_="tag").text) print("Checking " + tagname + " (" + str(fandoms.index(fandom) + 1) + " of " + str(len(fandoms)) + ")") tagpage = s.get(filterpage + tagurl) tagsoup = BeautifulSoup(tagpage.content,"html.parser") # get the number heading = (tagsoup.find("h2",class_="heading")).text stripone = re.sub("\n","",heading) striptwo = re.sub(" Work.*","",stripone) stripthree = re.sub(".* of ","",striptwo) stripfour = re.sub(",","",stripthree) workcount = int(stripfour) # if number < 1000, add to eligible list fandomdict = {"name":tagname,"count":workcount} if workcount < 1000: eligible.append(fandomdict) else: ineligible.append(fandomdict) eligible = sorted(eligible,key=lambda d: d["count"],reverse=True) ineligible = sorted(ineligible,key=lambda d: d["count"],reverse=True) print("\nCurrently eligible:\n") for fandom in eligible: print(fandom["name"] + " (" + str(fandom["count"]) + ")") print("\nIneligible:\n") for fandom in ineligible: print(fandom["name"] + " (" + str(fandom["count"]) + ")")