Handle exceptions properly in prompt scraper

master
trémeur 11 months ago
parent bd85f72186
commit 31f4a80ddc

@ -18,6 +18,7 @@ today = int(date.today().strftime("%d"))
month = str(date.today().strftime("%B")) month = str(date.today().strftime("%B"))
monthstring = ".*" + month + ".*" monthstring = ".*" + month + ".*"
try:
cent = "https://100words.dreamwidth.org/tag/!prompt?style=light&tag=%21prompt" cent = "https://100words.dreamwidth.org/tag/!prompt?style=light&tag=%21prompt"
centpage = requests.get(cent) centpage = requests.get(cent)
centsoup = BeautifulSoup(centpage.content, "html.parser") centsoup = BeautifulSoup(centpage.content, "html.parser")
@ -32,6 +33,8 @@ centprompttext = centpromptsoup.find(class_="entry-content")
centtheprompt = centprompttext.find("strong") centtheprompt = centprompttext.find("strong")
print("100words (100 words): \033[1m" + centtheprompt.text.lower() + "\033[0m (" + centprompt + ")\n") print("100words (100 words): \033[1m" + centtheprompt.text.lower() + "\033[0m (" + centprompt + ")\n")
thefile.write("- [[" + centprompt + "][100words]] (100 words): *" + centtheprompt.text.lower() + "*\n") thefile.write("- [[" + centprompt + "][100words]] (100 words): *" + centtheprompt.text.lower() + "*\n")
except:
pass
# for this one we need to extract the right entry from a list, which may be an <ol> but may not be. also, need to use the right month, as next months prompts are posted in advance # for this one we need to extract the right entry from a list, which may be an <ol> but may not be. also, need to use the right month, as next months prompts are posted in advance
# now defunct?? # now defunct??
@ -65,6 +68,7 @@ thefile.write("- [[" + centprompt + "][100words]] (100 words): *" + centthepromp
# thefile.write("- [[" + thirtyoneprompt + "][31-days]] (any): *" + thirtyonetheprompt.lower() + "*\n") # thefile.write("- [[" + thirtyoneprompt + "][31-days]] (any): *" + thirtyonetheprompt.lower() + "*\n")
try:
ad = "https://anythingdrabble.dreamwidth.org/tag/mod!+post?style=light&tag=mod%21+post" ad = "https://anythingdrabble.dreamwidth.org/tag/mod!+post?style=light&tag=mod%21+post"
adpage = requests.get(ad) adpage = requests.get(ad)
adsoup = BeautifulSoup(adpage.content, "html.parser") adsoup = BeautifulSoup(adpage.content, "html.parser")
@ -82,7 +86,10 @@ while adstrippable[-1] == " ":
adstrippable = adstrippable[:-1] adstrippable = adstrippable[:-1]
print("anythingdrabble (100, 200, 300, 400, or 500 words): \033[1m" + adstrippable.lower() + "\033[0m (" + adprompt + ")\n") print("anythingdrabble (100, 200, 300, 400, or 500 words): \033[1m" + adstrippable.lower() + "\033[0m (" + adprompt + ")\n")
thefile.write("- [[" + adprompt + "][anythingdrabble]] (100, 200, 300, 400, or 500 words): *" + adstrippable.lower() + "*\n") thefile.write("- [[" + adprompt + "][anythingdrabble]] (100, 200, 300, 400, or 500 words): *" + adstrippable.lower() + "*\n")
except:
pass
try:
dove = "https://dove-drabbles.dreamwidth.org/?style=light" dove = "https://dove-drabbles.dreamwidth.org/?style=light"
dovepage = requests.get(dove) dovepage = requests.get(dove)
dovesoup = BeautifulSoup(dovepage.content, "html.parser") dovesoup = BeautifulSoup(dovepage.content, "html.parser")
@ -97,7 +104,10 @@ doveprompttext = dovepromptsoup.find(class_="entry-content")
dovetheprompt = doveprompttext.find("i") dovetheprompt = doveprompttext.find("i")
print("dove-drabbles (any): \033[1m" + dovetheprompt.text.lower() + "\033[0m (" + doveprompt + ")\n") print("dove-drabbles (any): \033[1m" + dovetheprompt.text.lower() + "\033[0m (" + doveprompt + ")\n")
thefile.write("- [[" + doveprompt + "][dove-drabbles]] (any): *" + dovetheprompt.text.lower() + "*\n") thefile.write("- [[" + doveprompt + "][dove-drabbles]] (any): *" + dovetheprompt.text.lower() + "*\n")
except:
pass
try:
with requests.Session() as s: with requests.Session() as s:
response = s.post(login_url , data) response = s.post(login_url , data)
zone = "https://drabble-zone.dreamwidth.org/tag/mod-post?style=light&tag=mod-post" zone = "https://drabble-zone.dreamwidth.org/tag/mod-post?style=light&tag=mod-post"
@ -128,8 +138,11 @@ with requests.Session() as s:
emotiontheprompt = emotionprompttext.find_all("span")[-1] emotiontheprompt = emotionprompttext.find_all("span")[-1]
print("emotion100 (100 words or a multiple of 100): \033[1m" + emotiontheprompt.text.lower() + "\033[0m (" + emotionprompt + ")\n") print("emotion100 (100 words or a multiple of 100): \033[1m" + emotiontheprompt.text.lower() + "\033[0m (" + emotionprompt + ")\n")
thefile.write("- [[" + emotionprompt + "][emotion100]] (100 words or a multiple of 100): *" + emotiontheprompt.text.lower() + "*\n") thefile.write("- [[" + emotionprompt + "][emotion100]] (100 words or a multiple of 100): *" + emotiontheprompt.text.lower() + "*\n")
except:
pass
# for this one, have to get prompts from comments # for this one, have to get prompts from comments
try:
ffa = "https://fail-fandomanon.dreamwidth.org/?style=light" ffa = "https://fail-fandomanon.dreamwidth.org/?style=light"
ffapage = requests.get(ffa) ffapage = requests.get(ffa)
ffasoup = BeautifulSoup(ffapage.content, "html.parser") ffasoup = BeautifulSoup(ffapage.content, "html.parser")
@ -168,9 +181,12 @@ if ffacent:
ffaformat = "; ".join(ffacentnew) ffaformat = "; ".join(ffacentnew)
print("fail-fandomanon (any): \033[1m" + ffaformat.lower() + "\033[0m (" + ffaprompt + ")\n") print("fail-fandomanon (any): \033[1m" + ffaformat.lower() + "\033[0m (" + ffaprompt + ")\n")
thefile.write("- [[" + ffaprompt + "][fail-fandomanon]] (any): *" + ffaformat.lower() + "*\n") thefile.write("- [[" + ffaprompt + "][fail-fandomanon]] (any): *" + ffaformat.lower() + "*\n")
except:
pass
# for this one, prompts are unavailable on tuesdays and wednesdays # for this one, prompts are unavailable on tuesdays and wednesdays
try:
weekprogress = datetime.now().weekday() weekprogress = datetime.now().weekday()
if not 0 < weekprogress < 3: if not 0 < weekprogress < 3:
fandom = "https://fandomweekly.dreamwidth.org/?style=light&tag=%23challenge" fandom = "https://fandomweekly.dreamwidth.org/?style=light&tag=%23challenge"
@ -187,7 +203,10 @@ if not 0 < weekprogress < 3:
fandomtheprompt = fandomprompttext.find("td") fandomtheprompt = fandomprompttext.find("td")
print("fandomweekly (any, competitive): \033[1m" + fandomtheprompt.text.lower() + "\033[0m (" + fandomprompt + ")\n") print("fandomweekly (any, competitive): \033[1m" + fandomtheprompt.text.lower() + "\033[0m (" + fandomprompt + ")\n")
thefile.write("- [[" + fandomprompt + "][fandomweekly]] (any, competitive): *" + fandomtheprompt.text.lower() + "*\n") thefile.write("- [[" + fandomprompt + "][fandomweekly]] (any, competitive): *" + fandomtheprompt.text.lower() + "*\n")
except:
pass
try:
flash = "https://fan-flashworks.dreamwidth.org/?style=light&tag=admin" flash = "https://fan-flashworks.dreamwidth.org/?style=light&tag=admin"
flashpage = requests.get(flash) flashpage = requests.get(flash)
flashsoup = BeautifulSoup(flashpage.content, "html.parser") flashsoup = BeautifulSoup(flashpage.content, "html.parser")
@ -202,7 +221,10 @@ flashprompttext = flashpromptsoup.find(class_="entry-content")
flashtheprompt = flashprompttext.find("center") flashtheprompt = flashprompttext.find("center")
print("fan-flashworks (any, cant post elsewhere until round is closed): \033[1m" + flashtheprompt.text.lower() + "\033[0m (" + flashprompt + ")\n") print("fan-flashworks (any, cant post elsewhere until round is closed): \033[1m" + flashtheprompt.text.lower() + "\033[0m (" + flashprompt + ")\n")
thefile.write("- [[" + flashprompt + "][fan-flashworks]] (any, cant post elsewhere until round is closed): *" + flashtheprompt.text.lower() + "*\n") thefile.write("- [[" + flashprompt + "][fan-flashworks]] (any, cant post elsewhere until round is closed): *" + flashtheprompt.text.lower() + "*\n")
except:
pass
try:
femslash = "https://femslashficlets.dreamwidth.org/tag/challenges?style=light&tag=challenges" femslash = "https://femslashficlets.dreamwidth.org/tag/challenges?style=light&tag=challenges"
femslashpage = requests.get(femslash) femslashpage = requests.get(femslash)
femslashsoup = BeautifulSoup(femslashpage.content, "html.parser") femslashsoup = BeautifulSoup(femslashpage.content, "html.parser")
@ -218,7 +240,10 @@ femslashtheprompt = femslashprompttext.find("i")
if femslashtheprompt is not None: if femslashtheprompt is not None:
print("femslash-ficlets (1001000 words, F/F): \033[1m" + femslashtheprompt.text.lower() + "\033[0m (" + femslashprompt + ")\n") print("femslash-ficlets (1001000 words, F/F): \033[1m" + femslashtheprompt.text.lower() + "\033[0m (" + femslashprompt + ")\n")
thefile.write("- [[" + femslashprompt + "][femslashficlets]] (100 words or a multiple of 100): *" + femslashtheprompt.text.lower() + "*\n") thefile.write("- [[" + femslashprompt + "][femslashficlets]] (100 words or a multiple of 100): *" + femslashtheprompt.text.lower() + "*\n")
except:
pass
try:
with requests.Session() as s: with requests.Session() as s:
response = s.post(login_url , data) response = s.post(login_url , data)
fffc = "https://fffc.dreamwidth.org/tag/!challenges?style=light&tag=%21challenges" fffc = "https://fffc.dreamwidth.org/tag/!challenges?style=light&tag=%21challenges"
@ -267,7 +292,10 @@ with requests.Session() as s:
fffcregulartheprompt = fffcregularprompttext.find("b") fffcregulartheprompt = fffcregularprompttext.find("b")
print("fffc regular challenge (at least 100 words): \033[1m" + fffcregulartheprompt.text.lower() + "\033[0m (" + fffcregularprompt + ")\n") print("fffc regular challenge (at least 100 words): \033[1m" + fffcregulartheprompt.text.lower() + "\033[0m (" + fffcregularprompt + ")\n")
thefile.write("- [[" + fffcregularprompt + "][fffc regular challenge]] (at least 100 words): *" + fffcregulartheprompt.text.lower() + "*\n") thefile.write("- [[" + fffcregularprompt + "][fffc regular challenge]] (at least 100 words): *" + fffcregulartheprompt.text.lower() + "*\n")
except:
pass
try:
ficlet = "https://ficlet-zone.dreamwidth.org/tag/challenge+post?style=light&tag=challenge+post" ficlet = "https://ficlet-zone.dreamwidth.org/tag/challenge+post?style=light&tag=challenge+post"
ficletpage = requests.get(ficlet) ficletpage = requests.get(ficlet)
ficletsoup = BeautifulSoup(ficletpage.content, "html.parser") ficletsoup = BeautifulSoup(ficletpage.content, "html.parser")
@ -282,8 +310,11 @@ ficletprompttext = ficletpromptsoup.find(class_="entry-content")
ficlettheprompt = ficletprompttext.find("a") ficlettheprompt = ficletprompttext.find("a")
print("ficlet-zone (any): \033[1m" + ficlettheprompt.text.lower() + "\033[0m (" + ficletprompt + ")\n") print("ficlet-zone (any): \033[1m" + ficlettheprompt.text.lower() + "\033[0m (" + ficletprompt + ")\n")
thefile.write("- [[" + ficletprompt + "][ficlet-zone]] (any): *" + ficlettheprompt.text.lower() + "*\n") thefile.write("- [[" + ficletprompt + "][ficlet-zone]] (any): *" + ficlettheprompt.text.lower() + "*\n")
except:
pass
# first calculate the hour of the month … # first calculate the hour of the month …
try:
hourselapsed = (today - 1) * 24 hourselapsed = (today - 1) * 24
hourstoday = int(datetime.now().strftime("%H")) hourstoday = int(datetime.now().strftime("%H"))
currenthour = (hourselapsed + hourstoday) currenthour = (hourselapsed + hourstoday)
@ -305,7 +336,10 @@ with requests.Session() as s:
hourlypromptthishour = str(hourlypromptmedian[0])[5:-5] hourlypromptthishour = str(hourlypromptmedian[0])[5:-5]
print("hourlyprompts (any): \033[1m" + hourlypromptthishour.lower() + "\033[0m (" + hourlyprompt + ")\n") print("hourlyprompts (any): \033[1m" + hourlypromptthishour.lower() + "\033[0m (" + hourlyprompt + ")\n")
thefile.write("- [[" + hourlyprompt + "][hourlyprompts]] (any): *" + hourlypromptthishour.lower() + "*\n") thefile.write("- [[" + hourlyprompt + "][hourlyprompts]] (any): *" + hourlypromptthishour.lower() + "*\n")
except:
pass
try:
if 30 > today > 21: if 30 > today > 21:
ssbingo = "https://sweetandshort.dreamwidth.org/tag/challenge:+bingo?style=light&tag=challenge:+bingo" ssbingo = "https://sweetandshort.dreamwidth.org/tag/challenge:+bingo?style=light&tag=challenge:+bingo"
ssbingopage = requests.get(ssbingo) ssbingopage = requests.get(ssbingo)
@ -326,7 +360,10 @@ if 30 > today > 21:
ssbingofinal = "; ".join(ssbingoclean).lower() ssbingofinal = "; ".join(ssbingoclean).lower()
print("sweet and short bingo (up to 300 words for two prompts, up to 600 words for four prompts): \033[1m" + ssbingofinal + "\033[0m (" + ssbingoprompt + ")\n") print("sweet and short bingo (up to 300 words for two prompts, up to 600 words for four prompts): \033[1m" + ssbingofinal + "\033[0m (" + ssbingoprompt + ")\n")
thefile.write("- [[" + ssbingoprompt + "][sweet and short bingo]] (up to 300 words for two prompts, up to 600 words for four prompts): *" + ssbingofinal + "*\n") thefile.write("- [[" + ssbingoprompt + "][sweet and short bingo]] (up to 300 words for two prompts, up to 600 words for four prompts): *" + ssbingofinal + "*\n")
except:
pass
try:
if 16 > today > 7: if 16 > today > 7:
ssquicky = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+comment+quicky?mode=and&style=light&tag=%21new+challenge,challenge:+comment+quicky" ssquicky = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+comment+quicky?mode=and&style=light&tag=%21new+challenge,challenge:+comment+quicky"
ssquickypage = requests.get(ssquicky) ssquickypage = requests.get(ssquicky)
@ -351,7 +388,10 @@ if 16 > today > 7:
ssquickycprompt = "; ".join(ssquickycomments) ssquickycprompt = "; ".join(ssquickycomments)
print("sweet and short comment quicky (up to 99 words): \033[1m" + ssquickycprompt.lower() + "\033[0m (" + ssquickyprompt + ")\n") print("sweet and short comment quicky (up to 99 words): \033[1m" + ssquickycprompt.lower() + "\033[0m (" + ssquickyprompt + ")\n")
thefile.write("- [[" + ssquickyprompt + "][sweet and short comment quicky]] (up to 99 words): *" + ssquickycprompt.lower() + "*\n") thefile.write("- [[" + ssquickyprompt + "][sweet and short comment quicky]] (up to 99 words): *" + ssquickycprompt.lower() + "*\n")
except:
pass
try:
ssmonthly = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+10+out+of+20?mode=and&style=light&tag=%21new+challenge,challenge:+10+out+of+20" ssmonthly = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+10+out+of+20?mode=and&style=light&tag=%21new+challenge,challenge:+10+out+of+20"
ssmonthlypage = requests.get(ssmonthly) ssmonthlypage = requests.get(ssmonthly)
ssmonthlysoup = BeautifulSoup(ssmonthlypage.content, "html.parser") ssmonthlysoup = BeautifulSoup(ssmonthlypage.content, "html.parser")
@ -370,7 +410,10 @@ ssmonthlypromptstripthree = re.sub("- ","",str(ssmonthlypromptstriptwo))
ssmonthlypromptfinal = str(ssmonthlypromptstripthree)[2:-2] ssmonthlypromptfinal = str(ssmonthlypromptstripthree)[2:-2]
print("sweet and short monthly prompts (up to 300 words [09 prompts], up to 900 words [1019 prompts], any [20 prompts]): \033[1m" + ssmonthlypromptfinal + "\033[0m (" + ssmonthlyprompt + ")\n") print("sweet and short monthly prompts (up to 300 words [09 prompts], up to 900 words [1019 prompts], any [20 prompts]): \033[1m" + ssmonthlypromptfinal + "\033[0m (" + ssmonthlyprompt + ")\n")
thefile.write("- [[" + ssmonthlyprompt + "][sweet and short monthly prompts]] (up to 300 words [09 prompts], up to 900 words [1019 prompts], any [20 prompts]): *" + ssmonthlypromptfinal + "*\n") thefile.write("- [[" + ssmonthlyprompt + "][sweet and short monthly prompts]] (up to 300 words [09 prompts], up to 900 words [1019 prompts], any [20 prompts]): *" + ssmonthlypromptfinal + "*\n")
except:
pass
try:
if today > 14: if today > 14:
sspicture = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+picture+prompt+fun?mode=and&style=light&tag=%21new+challenge,challenge:+picture+prompt+fun" sspicture = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+picture+prompt+fun?mode=and&style=light&tag=%21new+challenge,challenge:+picture+prompt+fun"
sspicturepage = requests.get(sspicture) sspicturepage = requests.get(sspicture)
@ -386,7 +429,10 @@ if today > 14:
sspictureprompttext = sspicturepromptsoup.find("h3") sspictureprompttext = sspicturepromptsoup.find("h3")
print("sweet and short picture prompts (up to 300 words): \033[1m" + sspictureprompttext.text.lower() + "\033[0m (" + sspictureprompt + ")\n") print("sweet and short picture prompts (up to 300 words): \033[1m" + sspictureprompttext.text.lower() + "\033[0m (" + sspictureprompt + ")\n")
thefile.write("- [[" + sspictureprompt + "][sweet and short picture prompts]] (up to 300 words): *" + sspictureprompttext.text.lower() + "*\n") thefile.write("- [[" + sspictureprompt + "][sweet and short picture prompts]] (up to 300 words): *" + sspictureprompttext.text.lower() + "*\n")
except:
pass
try:
vocab = "https://vocab-drabbles.dreamwidth.org/?style=light&tag=challenge" vocab = "https://vocab-drabbles.dreamwidth.org/?style=light&tag=challenge"
vocabpage = requests.get(vocab) vocabpage = requests.get(vocab)
vocabsoup = BeautifulSoup(vocabpage.content, "html.parser") vocabsoup = BeautifulSoup(vocabpage.content, "html.parser")
@ -401,3 +447,5 @@ vocabprompttext = vocabpromptsoup.find(class_="entry-content")
vocabtheprompt = vocabprompttext.find("strong") vocabtheprompt = vocabprompttext.find("strong")
print("vocab-drabbles (50500 words): \033[1m" + vocabtheprompt.text.lower() + "\033[0m (" + vocabprompt + ")\n") print("vocab-drabbles (50500 words): \033[1m" + vocabtheprompt.text.lower() + "\033[0m (" + vocabprompt + ")\n")
thefile.write("- [[" + vocabprompt + "][vocab-drabbles]] (50500 words): *" + vocabtheprompt.text.lower() + "*\n") thefile.write("- [[" + vocabprompt + "][vocab-drabbles]] (50500 words): *" + vocabtheprompt.text.lower() + "*\n")
except:
pass

Loading…
Cancel
Save