diff --git a/promptscrape.py b/promptscrape.py index cb4ec4b..8f0c565 100644 --- a/promptscrape.py +++ b/promptscrape.py @@ -14,6 +14,10 @@ if os.path.exists("prompts.org"): thefile = open("prompts.org", "a") +today = int(date.today().strftime("%d")) +month = str(date.today().strftime("%B")) +monthstring = ".*" + month + ".*" + cent = "https://100words.dreamwidth.org/tag/!prompt?style=light&tag=%21prompt" centpage = requests.get(cent) centsoup = BeautifulSoup(centpage.content, "html.parser") @@ -30,36 +34,35 @@ print("100words (100 words): \033[1m" + centtheprompt.text.lower() + "\033[0m (" thefile.write("- [[" + centprompt + "][100words]] (100 words): *" + centtheprompt.text.lower() + "*\n") # for this one we need to extract the right entry from a list, which may be an
    but may not be. also, need to use the right month, as next month’s prompts are posted in advance -today = int(date.today().strftime("%d")) -month = str(date.today().strftime("%B")) -monthstring = ".*" + month + ".*" -thirtyone = "https://31-days.dreamwidth.org/tag/!prompts?style=light&tag=%21prompts" -thirtyonepage = requests.get(thirtyone) -thirtyonesoup = BeautifulSoup(thirtyonepage.content, "html.parser") -thirtyoneprompts = thirtyonesoup.find_all("h3", string = re.compile(monthstring)) -thirtyonesubsoup = BeautifulSoup(str(thirtyoneprompts[0]), "html.parser") -thirtyoneurl = thirtyonesubsoup.find("a") -thirtyoneprompt = (thirtyoneurl["href"]) -thirtyonepromptnew = (thirtyoneurl["href"] + "?style=light") -thirtyonepromptpage = requests.get(thirtyonepromptnew) -thirtyonepromptsoup = BeautifulSoup(thirtyonepromptpage.content, "html.parser") -thirtyoneprompttext = thirtyonepromptsoup.find(class_="entry-content") -if "
      " in str(thirtyoneprompttext): - thirtyonetheprompt = thirtyoneprompttext.select("ol > li")[today - 1].get_text(strip=True) -else: - interprompt = list(thirtyoneprompttext.stripped_strings) - thirtyonelist = [] - for prompt in interprompt: - if len(prompt) < 5: - promptnum = interprompt.index(prompt) - newnum = promptnum + 1 - thirtyonelist.append(prompt + interprompt[newnum]) - else: - thirtyonelist.append(prompt) - intsearch = str(today) + "." - thirtyonetheprompt = str([item for item in thirtyonelist if item.startswith(intsearch)])[2:-2] -print("31-days (any): \033[1m" + thirtyonetheprompt.lower() + "\033[0m (" + thirtyoneprompt + ")\n") -thefile.write("- [[" + thirtyoneprompt + "][31-days]] (any): *" + thirtyonetheprompt.lower() + "*\n") +# now defunct?? +# monthstring = ".*" + month + ".*" +# thirtyone = "https://31-days.dreamwidth.org/tag/!prompts?style=light&tag=%21prompts" +# thirtyonepage = requests.get(thirtyone) +# thirtyonesoup = BeautifulSoup(thirtyonepage.content, "html.parser") +# thirtyoneprompts = thirtyonesoup.find_all("h3", string = re.compile(monthstring)) +# thirtyonesubsoup = BeautifulSoup(str(thirtyoneprompts[0]), "html.parser") +# thirtyoneurl = thirtyonesubsoup.find("a") +# thirtyoneprompt = (thirtyoneurl["href"]) +# thirtyonepromptnew = (thirtyoneurl["href"] + "?style=light") +# thirtyonepromptpage = requests.get(thirtyonepromptnew) +# thirtyonepromptsoup = BeautifulSoup(thirtyonepromptpage.content, "html.parser") +# thirtyoneprompttext = thirtyonepromptsoup.find(class_="entry-content") +# if "
        " in str(thirtyoneprompttext): +# thirtyonetheprompt = thirtyoneprompttext.select("ol > li")[today - 1].get_text(strip=True) +# else: +# interprompt = list(thirtyoneprompttext.stripped_strings) +# thirtyonelist = [] +# for prompt in interprompt: +# if len(prompt) < 5: +# promptnum = interprompt.index(prompt) +# newnum = promptnum + 1 +# thirtyonelist.append(prompt + interprompt[newnum]) +# else: +# thirtyonelist.append(prompt) +# intsearch = str(today) + "." +# thirtyonetheprompt = str([item for item in thirtyonelist if item.startswith(intsearch)])[2:-2] +# print("31-days (any): \033[1m" + thirtyonetheprompt.lower() + "\033[0m (" + thirtyoneprompt + ")\n") +# thefile.write("- [[" + thirtyoneprompt + "][31-days]] (any): *" + thirtyonetheprompt.lower() + "*\n") ad = "https://anythingdrabble.dreamwidth.org/tag/mod!+post?style=light&tag=mod%21+post" @@ -221,6 +224,17 @@ with requests.Session() as s: fffc = "https://fffc.dreamwidth.org/tag/!challenges?style=light&tag=%21challenges" fffcpage = s.get(fffc) fffcsoup = BeautifulSoup(fffcpage.content, "html.parser") + if 18 > today > 9: + fffclittleprompts = fffcsoup.find_all("h3", string=lambda text: "little special" in text.lower()) + fffclittlesubsoup = BeautifulSoup(str(fffclittleprompts[0]), "html.parser") + fffclittleurl = fffclittlesubsoup.find("a") + fffclittleprompt = (fffclittleurl["href"]) + fffclittlepromptnew = (fffclittleurl["href"] + "?style=light") + fffclittlepromptpage = s.get(fffclittlepromptnew) + fffclittlepromptsoup = BeautifulSoup(fffclittlepromptpage.content, "html.parser") + fffclittleprompttext = fffclittlepromptsoup.find("h3") + print("fffc little special (at least 100 words): \033[1m" + fffclittleprompttext.text.lower() + "\033[0m (" + fffclittleprompt + ")\n") + thefile.write("- [[" + fffclittleprompt + "][fffc little special]] (at least 100 words): *" + fffclittleprompttext.text.lower() + "*\n") fffcmadnessprompts = fffcsoup.find_all("h3", string=lambda text: "froday madness" in text.lower()) fffcmadnesssubsoup = BeautifulSoup(str(fffcmadnessprompts[0]), "html.parser") fffcmadnessurl = fffcmadnesssubsoup.find("a") @@ -232,6 +246,16 @@ with requests.Session() as s: fffcmadnesstheprompt = fffcmadnessprompttext.find("b") print("fffc madness (at least 2000 words): \033[1m" + fffcmadnesstheprompt.text.lower() + "\033[0m (" + fffcmadnessprompt + ")\n") thefile.write("- [[" + fffcmadnessprompt + "][fffc madness]] (at least 2000 words): *" + fffcmadnesstheprompt.text.lower() + "*\n") + fffcmonthlyprompts = fffcsoup.find_all("h3", string=re.compile(monthstring)) + fffcmonthlysubsoup = BeautifulSoup(str(fffcmonthlyprompts[0]), "html.parser") + fffcmonthlyurl = fffcmonthlysubsoup.find("a") + fffcmonthlyprompt = (fffcmonthlyurl["href"]) + fffcmonthlypromptnew = (fffcmonthlyurl["href"] + "?style=light") + fffcmonthlypromptpage = s.get(fffcmonthlypromptnew) + fffcmonthlypromptsoup = BeautifulSoup(fffcmonthlypromptpage.content, "html.parser") + fffcmonthlyprompttext = fffcmonthlypromptsoup.find("h3") + print("fffc monthly special (usually at least 500 words): \033[1m" + fffcmonthlyprompttext.text.lower() + "\033[0m (" + fffcmonthlyprompt + ")\n") + thefile.write("- [[" + fffcmonthlyprompt + "][fffc monthly special]] (usually at least 500 words): *" + fffcmonthlyprompttext.text.lower() + "*\n") fffcregularprompts = fffcsoup.find_all("h3", string=lambda text: "regular challenge" in text.lower()) fffcregularsubsoup = BeautifulSoup(str(fffcregularprompts[0]), "html.parser") fffcregularurl = fffcregularsubsoup.find("a") @@ -259,7 +283,110 @@ ficlettheprompt = ficletprompttext.find("a") print("ficlet-zone (any): \033[1m" + ficlettheprompt.text.lower() + "\033[0m (" + ficletprompt + ")\n") thefile.write("- [[" + ficletprompt + "][ficlet-zone]] (any): *" + ficlettheprompt.text.lower() + "*\n") -# grabbing two different instances of the same tag within the post +# first calculate the hour of the month … +hourselapsed = (today - 1) * 24 +hourstoday = int(datetime.now().strftime("%H")) +currenthour = (hourselapsed + hourstoday) +with requests.Session() as s: + response = s.post(login_url , data) + hourly = "https://hourlyprompts.dreamwidth.org/?style=light" + hourlypage = s.get(hourly) + hourlysoup = BeautifulSoup(hourlypage.content, "html.parser") + hourlyprompts = hourlysoup.find_all("h3", string=re.compile(monthstring)) + hourlysubsoup = BeautifulSoup(str(hourlyprompts[0]), "html.parser") + hourlyurl = hourlysubsoup.find("a") + hourlyprompt = (hourlyurl["href"]) + hourlypromptnew = (hourlyurl["href"] + "?style=light") + hourlypromptpage = s.get(hourlypromptnew) + hourlypromptsoup = BeautifulSoup(hourlypromptpage.content, "html.parser") + hourlyprompttext = hourlypromptsoup.find(class_="entry-content") + searchstring = r"
        " + re.escape(str(currenthour)) + r"\. .*?
        " + hourlypromptmedian = re.findall(searchstring, str(hourlyprompttext)) + hourlypromptthishour = str(hourlypromptmedian[0])[5:-5] + print("hourlyprompts (any): \033[1m" + hourlypromptthishour.lower() + "\033[0m (" + hourlyprompt + ")\n") + thefile.write("- [[" + hourlyprompt + "][hourlyprompts]] (any): *" + hourlypromptthishour.lower() + "*\n") + +if 30 > today > 21: + ssbingo = "https://sweetandshort.dreamwidth.org/tag/challenge:+bingo?style=light&tag=challenge:+bingo" + ssbingopage = requests.get(ssbingo) + ssbingosoup = BeautifulSoup(ssbingopage.content, "html.parser") + ssbingoprompts = ssbingosoup.find_all("h3") + ssbingosubsoup = BeautifulSoup(str(ssbingoprompts[0]), "html.parser") + ssbingourl = ssbingosubsoup.find("a") + ssbingoprompt = (ssbingourl["href"]) + ssbingopromptnew = (ssbingourl["href"] + "?style=light") + ssbingopromptpage = requests.get(ssbingopromptnew) + ssbingopromptsoup = BeautifulSoup(ssbingopromptpage.content, "html.parser") + ssbingoprompttext = ssbingopromptsoup.find(class_="entry-content") + ssbingotheprompt = ssbingoprompttext.find_all("td") + ssbingoclean = [] + for prompt in ssbingotheprompt: + newprompt = re.sub("<.*?>","",str(prompt)) + ssbingoclean.append(newprompt) + ssbingofinal = "; ".join(ssbingoclean).lower() + print("sweet and short bingo (up to 300 words for two prompts, up to 600 words for four prompts): \033[1m" + ssbingofinal + "\033[0m (" + ssbingoprompt + ")\n") + thefile.write("- [[" + ssbingoprompt + "][sweet and short bingo]] (up to 300 words for two prompts, up to 600 words for four prompts): *" + ssbingofinal + "*\n") + +if 16 > today > 7: + ssquicky = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+comment+quicky?mode=and&style=light&tag=%21new+challenge,challenge:+comment+quicky" + ssquickypage = requests.get(ssquicky) + ssquickysoup = BeautifulSoup(ssquickypage.content, "html.parser") + ssquickyprompts = ssquickysoup.find_all("h3") + ssquickysubsoup = BeautifulSoup(str(ssquickyprompts[0]), "html.parser") + ssquickyurl = ssquickysubsoup.find("a") + ssquickyprompt = (ssquickyurl["href"]) + # deliberately not using style=light here so we can get at the comment contents + ssquickypromptnew = (ssquickyurl["href"]) + ssquickypromptpage = requests.get(ssquickypromptnew) + ssquickypromptsoup = BeautifulSoup(ssquickypromptpage.content, "html.parser") + promptcatch = ".*New Prompts Here" + # ssquickytheprompt = ssquickypromptsoup.find_all("h4",string = re.compile(promptcatch)) + ssquickytheprompt = ssquickypromptsoup.find_all(class_="comment") + ssquickycomments = [] + for comment in ssquickytheprompt: + if re.search("New Prompts Here",str(comment)): + commenttext = re.findall(r"
        ",str(comment)) + commentprompt = re.sub("<.*?>","",str(commenttext)) + ssquickycomments.append(str(commentprompt)[2:-2]) + ssquickycprompt = "; ".join(ssquickycomments) + print("sweet and short comment quicky (up to 99 words): \033[1m" + ssquickycprompt.lower() + "\033[0m (" + ssquickyprompt + ")\n") + thefile.write("- [[" + ssquickyprompt + "][sweet and short comment quicky]] (up to 99 words): *" + ssquickycprompt.lower() + "*\n") + +ssmonthly = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+10+out+of+20?mode=and&style=light&tag=%21new+challenge,challenge:+10+out+of+20" +ssmonthlypage = requests.get(ssmonthly) +ssmonthlysoup = BeautifulSoup(ssmonthlypage.content, "html.parser") +ssmonthlyprompts = ssmonthlysoup.find_all("h3") +ssmonthlysubsoup = BeautifulSoup(str(ssmonthlyprompts[0]), "html.parser") +ssmonthlyurl = ssmonthlysubsoup.find("a") +ssmonthlyprompt = (ssmonthlyurl["href"]) +ssmonthlypromptnew = (ssmonthlyurl["href"] + "?style=light") +ssmonthlypromptpage = requests.get(ssmonthlypromptnew) +ssmonthlypromptsoup = BeautifulSoup(ssmonthlypromptpage.content, "html.parser") +ssmonthlyprompttext = ssmonthlypromptsoup.find(class_="entry-content") +ssmonthlypromptmedian = re.findall(r".*", str(ssmonthlyprompttext)) +ssmonthlypromptstripone = re.sub("<.*?>","",str(ssmonthlypromptmedian)) +ssmonthlypromptstriptwo = re.sub("([a-z])- ","\\1; ",str(ssmonthlypromptstripone)) +ssmonthlypromptstripthree = re.sub("- ","",str(ssmonthlypromptstriptwo)) +ssmonthlypromptfinal = str(ssmonthlypromptstripthree)[2:-2] +print("sweet and short monthly prompts (up to 300 words [0–9 prompts], up to 900 words [10–19 prompts], any [20 prompts]): \033[1m" + ssmonthlypromptfinal + "\033[0m (" + ssmonthlyprompt + ")\n") +thefile.write("- [[" + ssmonthlyprompt + "][sweet and short monthly prompts]] (up to 300 words [0–9 prompts], up to 900 words [10–19 prompts], any [20 prompts]): *" + ssmonthlypromptfinal + "*\n") + +if today > 14: + sspicture = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+picture+prompt+fun?mode=and&style=light&tag=%21new+challenge,challenge:+picture+prompt+fun" + sspicturepage = requests.get(sspicture) + sspicturesoup = BeautifulSoup(sspicturepage.content, "html.parser") + monthstring = ".*" + month + ".*" + sspictureprompts = sspicturesoup.find_all("h3", string=re.compile(monthstring)) + sspicturesubsoup = BeautifulSoup(str(sspictureprompts[0]), "html.parser") + sspictureurl = sspicturesubsoup.find("a") + sspictureprompt = (sspictureurl["href"]) + sspicturepromptnew = (sspictureurl["href"] + "?style=light") + sspicturepromptpage = requests.get(sspicturepromptnew) + sspicturepromptsoup = BeautifulSoup(sspicturepromptpage.content, "html.parser") + sspictureprompttext = sspicturepromptsoup.find("h3") + print("sweet and short picture prompts (up to 300 words): \033[1m" + sspictureprompttext.text.lower() + "\033[0m (" + sspictureprompt + ")\n") + thefile.write("- [[" + sspictureprompt + "][sweet and short picture prompts]] (up to 300 words): *" + sspictureprompttext.text.lower() + "*\n") + vocab = "https://vocab-drabbles.dreamwidth.org/?style=light&tag=challenge" vocabpage = requests.get(vocab) vocabsoup = BeautifulSoup(vocabpage.content, "html.parser") @@ -270,9 +397,7 @@ vocabprompt = (vocaburl["href"]) vocabpromptnew = (vocaburl["href"] + "?style=light") vocabpromptpage = requests.get(vocabpromptnew) vocabpromptsoup = BeautifulSoup(vocabpromptpage.content, "html.parser") -vocablist = [] -for listitem in vocabpromptsoup.find_all("strong"): - vocablist.append(listitem.text.lower()) -vocabformat = "; ".join(vocablist) -print("vocab-drabbles (50–500 words): \033[1m" + vocabformat + "\033[0m (" + vocabprompt + ")\n") -thefile.write("- [[" + vocabprompt + "][vocab-drabbles]] (50–500 words): *" + vocabformat + "*\n") +vocabprompttext = vocabpromptsoup.find(class_="entry-content") +vocabtheprompt = vocabprompttext.find("strong") +print("vocab-drabbles (50–500 words): \033[1m" + vocabtheprompt.text.lower() + "\033[0m (" + vocabprompt + ")\n") +thefile.write("- [[" + vocabprompt + "][vocab-drabbles]] (50–500 words): *" + vocabtheprompt.text.lower() + "*\n")