Update sweetandshort scraper, some more corrections to prompt scraper

master
trémeur 10 months ago
parent 31f4a80ddc
commit 4915e8db4b

@ -84,6 +84,8 @@ try:
adstrippable = str(adtheprompt.text) adstrippable = str(adtheprompt.text)
while adstrippable[-1] == " ": while adstrippable[-1] == " ":
adstrippable = adstrippable[:-1] adstrippable = adstrippable[:-1]
while adstrippable[0] == " ":
adstrippable = adstrippable[1:]
print("anythingdrabble (100, 200, 300, 400, or 500 words): \033[1m" + adstrippable.lower() + "\033[0m (" + adprompt + ")\n") print("anythingdrabble (100, 200, 300, 400, or 500 words): \033[1m" + adstrippable.lower() + "\033[0m (" + adprompt + ")\n")
thefile.write("- [[" + adprompt + "][anythingdrabble]] (100, 200, 300, 400, or 500 words): *" + adstrippable.lower() + "*\n") thefile.write("- [[" + adprompt + "][anythingdrabble]] (100, 200, 300, 400, or 500 words): *" + adstrippable.lower() + "*\n")
except: except:
@ -124,6 +126,7 @@ try:
zonetheprompt = zoneprompttext.find("strong") zonetheprompt = zoneprompttext.find("strong")
print("drabble-zone (100 or 200 words): \033[1m" + zonetheprompt.text.lower() + "\033[0m (" + zoneprompt + ")\n") print("drabble-zone (100 or 200 words): \033[1m" + zonetheprompt.text.lower() + "\033[0m (" + zoneprompt + ")\n")
thefile.write("- [[" + zoneprompt + "][drabble-zone]] (100 or 200 words): *" + zonetheprompt.text.lower() + "*\n") thefile.write("- [[" + zoneprompt + "][drabble-zone]] (100 or 200 words): *" + zonetheprompt.text.lower() + "*\n")
emotion = "https://emotion100.dreamwidth.org/tag/*modpost?style=light&tag=%2Amodpost" emotion = "https://emotion100.dreamwidth.org/tag/*modpost?style=light&tag=%2Amodpost"
emotionpage = s.get(emotion) emotionpage = s.get(emotion)
emotionsoup = BeautifulSoup(emotionpage.content, "html.parser") emotionsoup = BeautifulSoup(emotionpage.content, "html.parser")
@ -146,7 +149,7 @@ try:
ffa = "https://fail-fandomanon.dreamwidth.org/?style=light" ffa = "https://fail-fandomanon.dreamwidth.org/?style=light"
ffapage = requests.get(ffa) ffapage = requests.get(ffa)
ffasoup = BeautifulSoup(ffapage.content, "html.parser") ffasoup = BeautifulSoup(ffapage.content, "html.parser")
ffaprompts = ffasoup.find_all("h3", string=lambda text: "ffa dw post" in text.lower()) ffaprompts = ffasoup.find_all("h3")
ffapromptstrim = [x for x in ffaprompts if "Placeholder" not in str(x)] ffapromptstrim = [x for x in ffaprompts if "Placeholder" not in str(x)]
ffasubsoup = BeautifulSoup(str(ffapromptstrim[0]), "html.parser") ffasubsoup = BeautifulSoup(str(ffapromptstrim[0]), "html.parser")
ffaurl = ffasubsoup.find("a") ffaurl = ffasubsoup.find("a")
@ -210,7 +213,7 @@ try:
flash = "https://fan-flashworks.dreamwidth.org/?style=light&tag=admin" flash = "https://fan-flashworks.dreamwidth.org/?style=light&tag=admin"
flashpage = requests.get(flash) flashpage = requests.get(flash)
flashsoup = BeautifulSoup(flashpage.content, "html.parser") flashsoup = BeautifulSoup(flashpage.content, "html.parser")
flashprompts = flashsoup.find_all("h3", string=lambda text: "challenge" in text.lower()) flashprompts = flashsoup.find_all(lambda tag: tag.name == "h3" and "Challenge" in tag.text)
flashsubsoup = BeautifulSoup(str(flashprompts[0]), "html.parser") flashsubsoup = BeautifulSoup(str(flashprompts[0]), "html.parser")
flashurl = flashsubsoup.find("a") flashurl = flashsubsoup.find("a")
flashprompt = (flashurl["href"]) flashprompt = (flashurl["href"])
@ -224,24 +227,25 @@ try:
except: except:
pass pass
try: # seems dead
femslash = "https://femslashficlets.dreamwidth.org/tag/challenges?style=light&tag=challenges" # try:
femslashpage = requests.get(femslash) # femslash = "https://femslashficlets.dreamwidth.org/tag/challenges?style=light&tag=challenges"
femslashsoup = BeautifulSoup(femslashpage.content, "html.parser") # femslashpage = requests.get(femslash)
femslashprompts = femslashsoup.find_all("h3", string=lambda text: "challenge" in text.lower()) # femslashsoup = BeautifulSoup(femslashpage.content, "html.parser")
femslashsubsoup = BeautifulSoup(str(femslashprompts[0]), "html.parser") # femslashprompts = femslashsoup.find_all("h3", string=lambda text: "challenge" in text.lower())
femslashurl = femslashsubsoup.find("a") # femslashsubsoup = BeautifulSoup(str(femslashprompts[0]), "html.parser")
femslashprompt = (femslashurl["href"]) # femslashurl = femslashsubsoup.find("a")
femslashpromptnew = (femslashurl["href"] + "?style=light") # femslashprompt = (femslashurl["href"])
femslashpromptpage = requests.get(femslashpromptnew) # femslashpromptnew = (femslashurl["href"] + "?style=light")
femslashpromptsoup = BeautifulSoup(femslashpromptpage.content, "html.parser") # femslashpromptpage = requests.get(femslashpromptnew)
femslashprompttext = femslashpromptsoup.find(class_="entry-content") # femslashpromptsoup = BeautifulSoup(femslashpromptpage.content, "html.parser")
femslashtheprompt = femslashprompttext.find("i") # femslashprompttext = femslashpromptsoup.find(class_="entry-content")
if femslashtheprompt is not None: # femslashtheprompt = femslashprompttext.find("i")
print("femslash-ficlets (1001000 words, F/F): \033[1m" + femslashtheprompt.text.lower() + "\033[0m (" + femslashprompt + ")\n") # if femslashtheprompt is not None:
thefile.write("- [[" + femslashprompt + "][femslashficlets]] (100 words or a multiple of 100): *" + femslashtheprompt.text.lower() + "*\n") # print("femslash-ficlets (1001000 words, F/F): \033[1m" + femslashtheprompt.text.lower() + "\033[0m (" + femslashprompt + ")\n")
except: # thefile.write("- [[" + femslashprompt + "][femslashficlets]] (100 words or a multiple of 100): *" + femslashtheprompt.text.lower() + "*\n")
pass # except:
# pass
try: try:
with requests.Session() as s: with requests.Session() as s:
@ -339,9 +343,26 @@ try:
except: except:
pass pass
# sweet and short: complex and time-depedent rules …
# first need to work out which of the two alternating monthly challenges we're on
themonth = date.today().month
thisyear = date.today().year
if thisyear // 2:
if themonth == 1 or themonth == 3 or themonth == 6 or themonth == 9 or themonth == 11:
alternate = "comment"
else:
alternate = "picture"
else:
if themonth == 1 or themonth == 3 or themonth == 6 or themonth == 9 or themonth == 11:
alternate = "picture"
else:
alternate = "comment"
if themonth != 4 and themonth != 8 and themonth != 12:
try: try:
if 30 > today > 21: if today > 21:
ssbingo = "https://sweetandshort.dreamwidth.org/tag/challenge:+bingo?style=light&tag=challenge:+bingo" ssbingo = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+bingo?style=light&tag=!new+challenge,challenge:+bingo&mode=and"
ssbingopage = requests.get(ssbingo) ssbingopage = requests.get(ssbingo)
ssbingosoup = BeautifulSoup(ssbingopage.content, "html.parser") ssbingosoup = BeautifulSoup(ssbingopage.content, "html.parser")
ssbingoprompts = ssbingosoup.find_all("h3") ssbingoprompts = ssbingosoup.find_all("h3")
@ -358,13 +379,14 @@ try:
newprompt = re.sub("<.*?>","",str(prompt)) newprompt = re.sub("<.*?>","",str(prompt))
ssbingoclean.append(newprompt) ssbingoclean.append(newprompt)
ssbingofinal = "; ".join(ssbingoclean).lower() ssbingofinal = "; ".join(ssbingoclean).lower()
print("sweet and short bingo (up to 300 words for two prompts, up to 600 words for four prompts): \033[1m" + ssbingofinal + "\033[0m (" + ssbingoprompt + ")\n") print("sweet and short bingo (up to 500 words, separate or combined): \033[1m" + ssbingofinal + "\033[0m (" + ssbingoprompt + ")\n")
thefile.write("- [[" + ssbingoprompt + "][sweet and short bingo]] (up to 300 words for two prompts, up to 600 words for four prompts): *" + ssbingofinal + "*\n") thefile.write("- [[" + ssbingoprompt + "][sweet and short bingo]] (up to 500 words, separate or combined): *" + ssbingofinal + "*\n")
except: except:
pass pass
try: try:
if 16 > today > 7: if today > 7:
if alternate == "comment":
ssquicky = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+comment+quicky?mode=and&style=light&tag=%21new+challenge,challenge:+comment+quicky" ssquicky = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+comment+quicky?mode=and&style=light&tag=%21new+challenge,challenge:+comment+quicky"
ssquickypage = requests.get(ssquicky) ssquickypage = requests.get(ssquicky)
ssquickysoup = BeautifulSoup(ssquickypage.content, "html.parser") ssquickysoup = BeautifulSoup(ssquickypage.content, "html.parser")
@ -386,13 +408,28 @@ try:
commentprompt = re.sub("<.*?>","",str(commenttext)) commentprompt = re.sub("<.*?>","",str(commenttext))
ssquickycomments.append(str(commentprompt)[2:-2]) ssquickycomments.append(str(commentprompt)[2:-2])
ssquickycprompt = "; ".join(ssquickycomments) ssquickycprompt = "; ".join(ssquickycomments)
print("sweet and short comment quicky (up to 99 words): \033[1m" + ssquickycprompt.lower() + "\033[0m (" + ssquickyprompt + ")\n") print("sweet and short comment quicky (up to 100 words): \033[1m" + ssquickycprompt.lower() + "\033[0m (" + ssquickyprompt + ")\n")
thefile.write("- [[" + ssquickyprompt + "][sweet and short comment quicky]] (up to 99 words): *" + ssquickycprompt.lower() + "*\n") thefile.write("- [[" + ssquickyprompt + "][sweet and short comment quicky]] (up to 100 words): *" + ssquickycprompt.lower() + "*\n")
elif alternate == "picture":
sspicture = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+picture+prompt+fun?mode=and&style=light&tag=%21new+challenge,challenge:+picture+prompt+fun&mode=and"
sspicturepage = requests.get(sspicture)
sspicturesoup = BeautifulSoup(sspicturepage.content, "html.parser")
monthstring = ".*" + month + ".*"
sspictureprompts = sspicturesoup.find_all("h3", string=re.compile(monthstring))
sspicturesubsoup = BeautifulSoup(str(sspictureprompts[0]), "html.parser")
sspictureurl = sspicturesubsoup.find("a")
sspictureprompt = (sspictureurl["href"])
sspicturepromptnew = (sspictureurl["href"] + "?style=light")
sspicturepromptpage = requests.get(sspicturepromptnew)
sspicturepromptsoup = BeautifulSoup(sspicturepromptpage.content, "html.parser")
sspictureprompttext = sspicturepromptsoup.find("h3")
print("sweet and short picture prompts (up to 300 words): \033[1m" + sspictureprompttext.text.lower() + "\033[0m (" + sspictureprompt + ")\n")
thefile.write("- [[" + sspictureprompt + "][sweet and short picture prompts]] (up to 300 words): *" + sspictureprompttext.text.lower() + "*\n")
except: except:
pass pass
try: try:
ssmonthly = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+10+out+of+20?mode=and&style=light&tag=%21new+challenge,challenge:+10+out+of+20" ssmonthly = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+10+out+of+20?mode=and&style=light&tag=%21new+challenge,challenge:+10+out+of+20&mode=and"
ssmonthlypage = requests.get(ssmonthly) ssmonthlypage = requests.get(ssmonthly)
ssmonthlysoup = BeautifulSoup(ssmonthlypage.content, "html.parser") ssmonthlysoup = BeautifulSoup(ssmonthlypage.content, "html.parser")
ssmonthlyprompts = ssmonthlysoup.find_all("h3") ssmonthlyprompts = ssmonthlysoup.find_all("h3")
@ -408,27 +445,30 @@ try:
ssmonthlypromptstriptwo = re.sub("([a-z])- ","\\1; ",str(ssmonthlypromptstripone)) ssmonthlypromptstriptwo = re.sub("([a-z])- ","\\1; ",str(ssmonthlypromptstripone))
ssmonthlypromptstripthree = re.sub("- ","",str(ssmonthlypromptstriptwo)) ssmonthlypromptstripthree = re.sub("- ","",str(ssmonthlypromptstriptwo))
ssmonthlypromptfinal = str(ssmonthlypromptstripthree)[2:-2] ssmonthlypromptfinal = str(ssmonthlypromptstripthree)[2:-2]
print("sweet and short monthly prompts (up to 300 words [09 prompts], up to 900 words [1019 prompts], any [20 prompts]): \033[1m" + ssmonthlypromptfinal + "\033[0m (" + ssmonthlyprompt + ")\n") print("sweet and short monthly prompts (up to 500 words based on at least 10 prompts): \033[1m" + ssmonthlypromptfinal + "\033[0m (" + ssmonthlyprompt + ")\n")
thefile.write("- [[" + ssmonthlyprompt + "][sweet and short monthly prompts]] (up to 300 words [09 prompts], up to 900 words [1019 prompts], any [20 prompts]): *" + ssmonthlypromptfinal + "*\n") thefile.write("- [[" + ssmonthlyprompt + "][sweet and short monthly prompts]] (up to 500 words based on at least 10 prompts): *" + ssmonthlypromptfinal + "*\n")
except: except:
pass pass
try: try:
if today > 14: if today > 14:
sspicture = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+picture+prompt+fun?mode=and&style=light&tag=%21new+challenge,challenge:+picture+prompt+fun" ssone = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+only+one?mode=and&style=light&tag=%21new+challenge,challenge:+only+one&mode=and"
sspicturepage = requests.get(sspicture) ssonepage = requests.get(ssone)
sspicturesoup = BeautifulSoup(sspicturepage.content, "html.parser") ssonesoup = BeautifulSoup(ssonepage.content, "html.parser")
monthstring = ".*" + month + ".*" ssoneprompts = ssonesoup.find_all("h3")
sspictureprompts = sspicturesoup.find_all("h3", string=re.compile(monthstring)) ssonesubsoup = BeautifulSoup(str(ssoneprompts[0]), "html.parser")
sspicturesubsoup = BeautifulSoup(str(sspictureprompts[0]), "html.parser") ssoneurl = ssonesubsoup.find("a")
sspictureurl = sspicturesubsoup.find("a") ssoneprompt = (ssoneurl["href"])
sspictureprompt = (sspictureurl["href"]) ssonepromptnew = (ssoneurl["href"] + "?style=light")
sspicturepromptnew = (sspictureurl["href"] + "?style=light") ssonepromptpage = requests.get(ssonepromptnew)
sspicturepromptpage = requests.get(sspicturepromptnew) ssonepromptsoup = BeautifulSoup(ssonepromptpage.content, "html.parser")
sspicturepromptsoup = BeautifulSoup(sspicturepromptpage.content, "html.parser") ssoneprompttext = ssonepromptsoup.find("i")
sspictureprompttext = sspicturepromptsoup.find("h3") ssonepromptstripone = re.sub("<.*?>","",str(ssoneprompttext))
print("sweet and short picture prompts (up to 300 words): \033[1m" + sspictureprompttext.text.lower() + "\033[0m (" + sspictureprompt + ")\n") ssonepromptstriptwo = re.sub("1. ","",ssonepromptstripone)
thefile.write("- [[" + sspictureprompt + "][sweet and short picture prompts]] (up to 300 words): *" + sspictureprompttext.text.lower() + "*\n") ssonepromptfinal = re.sub("2. ","; ",ssonepromptstriptwo)
print("sweet and short one sentence (up to 500 words, use one or both lines as the start and/or end): \033[1m" + ssonepromptfinal + "\033[0m (" + ssoneprompt + ")\n")
thefile.write("- [[" + ssoneprompt + "][sweet and short one sentence]] (up to 500 words, use one or both lines as the start and/or end): *" + ssonepromptfinal + "*\n")
except: except:
pass pass

Loading…
Cancel
Save