You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
63 lines
2.7 KiB
Python
63 lines
2.7 KiB
Python
7 months ago
|
import re, requests, time
|
||
|
from bs4 import BeautifulSoup
|
||
|
|
||
|
# grab subtags of "final fantasy series"
|
||
|
|
||
|
series = "https://archiveofourown.org/tags/Final%20Fantasy%20Series"
|
||
|
seriespage = requests.get(series)
|
||
|
seriessoup = BeautifulSoup(seriespage.content,"html.parser")
|
||
|
fandoms = seriessoup.find(class_="sub").find("ul",{ "class" : "tree" }).findAll("li", recursive=False)
|
||
|
|
||
|
# open search page with filters applied
|
||
|
|
||
|
filterpage = "https://archiveofourown.org/works?work_search%5Bsort_column%5D=revised_at&work_search%5Bother_tag_names%5D=&work_search%5Bexcluded_tag_names%5D=&work_search%5Bcrossover%5D=&work_search%5Bcomplete%5D=T&work_search%5Bwords_from%5D=1000&work_search%5Bwords_to%5D=&work_search%5Bdate_from%5D=&work_search%5Bdate_to%5D=&work_search%5Bquery%5D=&work_search%5Blanguage_id%5D=en&commit=Sort+and+Filter&tag_id="
|
||
|
eligible = []
|
||
|
ineligible = []
|
||
|
with requests.Session() as s:
|
||
|
loginpage = s.get("https://archiveofourown.org/users/login")
|
||
|
loginsoup = BeautifulSoup(loginpage.content,"html.parser")
|
||
|
token = (loginsoup.find('form', class_='new_user')
|
||
|
.find('input', attrs={'name': 'authenticity_token'})
|
||
|
.get('value'))
|
||
|
payload = {'user[login]': "translinkni",
|
||
|
'user[password]': "fakeapi",
|
||
|
'user[remember_me]': '1',
|
||
|
'authenticity_token': token
|
||
|
}
|
||
|
post = s.post("https://archiveofourown.org/users/login",data=payload)
|
||
|
for fandom in fandoms:
|
||
|
time.sleep(3)
|
||
|
tagurl = (str(fandom.find("a")["href"]))[6:]
|
||
|
tagname = str(fandom.find(class_="tag").text)
|
||
|
print("Checking " + tagname + " (" + str(fandoms.index(fandom) + 1) + " of " + str(len(fandoms)) + ")")
|
||
|
tagpage = s.get(filterpage + tagurl)
|
||
|
tagsoup = BeautifulSoup(tagpage.content,"html.parser")
|
||
|
|
||
|
# get the number
|
||
|
|
||
|
heading = (tagsoup.find("h2",class_="heading")).text
|
||
|
stripone = re.sub("\n","",heading)
|
||
|
striptwo = re.sub(" Work.*","",stripone)
|
||
|
stripthree = re.sub(".* of ","",striptwo)
|
||
|
stripfour = re.sub(",","",stripthree)
|
||
|
workcount = int(stripfour)
|
||
|
|
||
|
# if number < 1000, add to eligible list
|
||
|
|
||
|
fandomdict = {"name":tagname,"count":workcount}
|
||
|
|
||
|
if workcount < 1000:
|
||
|
eligible.append(fandomdict)
|
||
|
else:
|
||
|
ineligible.append(fandomdict)
|
||
|
|
||
|
eligible = sorted(eligible,key=lambda d: d["count"],reverse=True)
|
||
|
ineligible = sorted(ineligible,key=lambda d: d["count"],reverse=True)
|
||
|
|
||
|
print("\nCurrently eligible:\n")
|
||
|
for fandom in eligible:
|
||
|
print(fandom["name"] + " (" + str(fandom["count"]) + ")")
|
||
|
print("\nIneligible:\n")
|
||
|
for fandom in ineligible:
|
||
|
print(fandom["name"] + " (" + str(fandom["count"]) + ")")
|