Update prompt communities
This commit is contained in:
parent
2c546a38fe
commit
b87e0fbfbc
1 changed files with 162 additions and 37 deletions
199
promptscrape.py
199
promptscrape.py
|
@ -14,6 +14,10 @@ if os.path.exists("prompts.org"):
|
||||||
|
|
||||||
thefile = open("prompts.org", "a")
|
thefile = open("prompts.org", "a")
|
||||||
|
|
||||||
|
today = int(date.today().strftime("%d"))
|
||||||
|
month = str(date.today().strftime("%B"))
|
||||||
|
monthstring = ".*" + month + ".*"
|
||||||
|
|
||||||
cent = "https://100words.dreamwidth.org/tag/!prompt?style=light&tag=%21prompt"
|
cent = "https://100words.dreamwidth.org/tag/!prompt?style=light&tag=%21prompt"
|
||||||
centpage = requests.get(cent)
|
centpage = requests.get(cent)
|
||||||
centsoup = BeautifulSoup(centpage.content, "html.parser")
|
centsoup = BeautifulSoup(centpage.content, "html.parser")
|
||||||
|
@ -30,36 +34,35 @@ print("100words (100 words): \033[1m" + centtheprompt.text.lower() + "\033[0m ("
|
||||||
thefile.write("- [[" + centprompt + "][100words]] (100 words): *" + centtheprompt.text.lower() + "*\n")
|
thefile.write("- [[" + centprompt + "][100words]] (100 words): *" + centtheprompt.text.lower() + "*\n")
|
||||||
|
|
||||||
# for this one we need to extract the right entry from a list, which may be an <ol> but may not be. also, need to use the right month, as next month’s prompts are posted in advance
|
# for this one we need to extract the right entry from a list, which may be an <ol> but may not be. also, need to use the right month, as next month’s prompts are posted in advance
|
||||||
today = int(date.today().strftime("%d"))
|
# now defunct??
|
||||||
month = str(date.today().strftime("%B"))
|
# monthstring = ".*" + month + ".*"
|
||||||
monthstring = ".*" + month + ".*"
|
# thirtyone = "https://31-days.dreamwidth.org/tag/!prompts?style=light&tag=%21prompts"
|
||||||
thirtyone = "https://31-days.dreamwidth.org/tag/!prompts?style=light&tag=%21prompts"
|
# thirtyonepage = requests.get(thirtyone)
|
||||||
thirtyonepage = requests.get(thirtyone)
|
# thirtyonesoup = BeautifulSoup(thirtyonepage.content, "html.parser")
|
||||||
thirtyonesoup = BeautifulSoup(thirtyonepage.content, "html.parser")
|
# thirtyoneprompts = thirtyonesoup.find_all("h3", string = re.compile(monthstring))
|
||||||
thirtyoneprompts = thirtyonesoup.find_all("h3", string = re.compile(monthstring))
|
# thirtyonesubsoup = BeautifulSoup(str(thirtyoneprompts[0]), "html.parser")
|
||||||
thirtyonesubsoup = BeautifulSoup(str(thirtyoneprompts[0]), "html.parser")
|
# thirtyoneurl = thirtyonesubsoup.find("a")
|
||||||
thirtyoneurl = thirtyonesubsoup.find("a")
|
# thirtyoneprompt = (thirtyoneurl["href"])
|
||||||
thirtyoneprompt = (thirtyoneurl["href"])
|
# thirtyonepromptnew = (thirtyoneurl["href"] + "?style=light")
|
||||||
thirtyonepromptnew = (thirtyoneurl["href"] + "?style=light")
|
# thirtyonepromptpage = requests.get(thirtyonepromptnew)
|
||||||
thirtyonepromptpage = requests.get(thirtyonepromptnew)
|
# thirtyonepromptsoup = BeautifulSoup(thirtyonepromptpage.content, "html.parser")
|
||||||
thirtyonepromptsoup = BeautifulSoup(thirtyonepromptpage.content, "html.parser")
|
# thirtyoneprompttext = thirtyonepromptsoup.find(class_="entry-content")
|
||||||
thirtyoneprompttext = thirtyonepromptsoup.find(class_="entry-content")
|
# if "<ol>" in str(thirtyoneprompttext):
|
||||||
if "<ol>" in str(thirtyoneprompttext):
|
# thirtyonetheprompt = thirtyoneprompttext.select("ol > li")[today - 1].get_text(strip=True)
|
||||||
thirtyonetheprompt = thirtyoneprompttext.select("ol > li")[today - 1].get_text(strip=True)
|
# else:
|
||||||
else:
|
# interprompt = list(thirtyoneprompttext.stripped_strings)
|
||||||
interprompt = list(thirtyoneprompttext.stripped_strings)
|
# thirtyonelist = []
|
||||||
thirtyonelist = []
|
# for prompt in interprompt:
|
||||||
for prompt in interprompt:
|
# if len(prompt) < 5:
|
||||||
if len(prompt) < 5:
|
# promptnum = interprompt.index(prompt)
|
||||||
promptnum = interprompt.index(prompt)
|
# newnum = promptnum + 1
|
||||||
newnum = promptnum + 1
|
# thirtyonelist.append(prompt + interprompt[newnum])
|
||||||
thirtyonelist.append(prompt + interprompt[newnum])
|
# else:
|
||||||
else:
|
# thirtyonelist.append(prompt)
|
||||||
thirtyonelist.append(prompt)
|
# intsearch = str(today) + "."
|
||||||
intsearch = str(today) + "."
|
# thirtyonetheprompt = str([item for item in thirtyonelist if item.startswith(intsearch)])[2:-2]
|
||||||
thirtyonetheprompt = str([item for item in thirtyonelist if item.startswith(intsearch)])[2:-2]
|
# print("31-days (any): \033[1m" + thirtyonetheprompt.lower() + "\033[0m (" + thirtyoneprompt + ")\n")
|
||||||
print("31-days (any): \033[1m" + thirtyonetheprompt.lower() + "\033[0m (" + thirtyoneprompt + ")\n")
|
# thefile.write("- [[" + thirtyoneprompt + "][31-days]] (any): *" + thirtyonetheprompt.lower() + "*\n")
|
||||||
thefile.write("- [[" + thirtyoneprompt + "][31-days]] (any): *" + thirtyonetheprompt.lower() + "*\n")
|
|
||||||
|
|
||||||
|
|
||||||
ad = "https://anythingdrabble.dreamwidth.org/tag/mod!+post?style=light&tag=mod%21+post"
|
ad = "https://anythingdrabble.dreamwidth.org/tag/mod!+post?style=light&tag=mod%21+post"
|
||||||
|
@ -221,6 +224,17 @@ with requests.Session() as s:
|
||||||
fffc = "https://fffc.dreamwidth.org/tag/!challenges?style=light&tag=%21challenges"
|
fffc = "https://fffc.dreamwidth.org/tag/!challenges?style=light&tag=%21challenges"
|
||||||
fffcpage = s.get(fffc)
|
fffcpage = s.get(fffc)
|
||||||
fffcsoup = BeautifulSoup(fffcpage.content, "html.parser")
|
fffcsoup = BeautifulSoup(fffcpage.content, "html.parser")
|
||||||
|
if 18 > today > 9:
|
||||||
|
fffclittleprompts = fffcsoup.find_all("h3", string=lambda text: "little special" in text.lower())
|
||||||
|
fffclittlesubsoup = BeautifulSoup(str(fffclittleprompts[0]), "html.parser")
|
||||||
|
fffclittleurl = fffclittlesubsoup.find("a")
|
||||||
|
fffclittleprompt = (fffclittleurl["href"])
|
||||||
|
fffclittlepromptnew = (fffclittleurl["href"] + "?style=light")
|
||||||
|
fffclittlepromptpage = s.get(fffclittlepromptnew)
|
||||||
|
fffclittlepromptsoup = BeautifulSoup(fffclittlepromptpage.content, "html.parser")
|
||||||
|
fffclittleprompttext = fffclittlepromptsoup.find("h3")
|
||||||
|
print("fffc little special (at least 100 words): \033[1m" + fffclittleprompttext.text.lower() + "\033[0m (" + fffclittleprompt + ")\n")
|
||||||
|
thefile.write("- [[" + fffclittleprompt + "][fffc little special]] (at least 100 words): *" + fffclittleprompttext.text.lower() + "*\n")
|
||||||
fffcmadnessprompts = fffcsoup.find_all("h3", string=lambda text: "froday madness" in text.lower())
|
fffcmadnessprompts = fffcsoup.find_all("h3", string=lambda text: "froday madness" in text.lower())
|
||||||
fffcmadnesssubsoup = BeautifulSoup(str(fffcmadnessprompts[0]), "html.parser")
|
fffcmadnesssubsoup = BeautifulSoup(str(fffcmadnessprompts[0]), "html.parser")
|
||||||
fffcmadnessurl = fffcmadnesssubsoup.find("a")
|
fffcmadnessurl = fffcmadnesssubsoup.find("a")
|
||||||
|
@ -232,6 +246,16 @@ with requests.Session() as s:
|
||||||
fffcmadnesstheprompt = fffcmadnessprompttext.find("b")
|
fffcmadnesstheprompt = fffcmadnessprompttext.find("b")
|
||||||
print("fffc madness (at least 2000 words): \033[1m" + fffcmadnesstheprompt.text.lower() + "\033[0m (" + fffcmadnessprompt + ")\n")
|
print("fffc madness (at least 2000 words): \033[1m" + fffcmadnesstheprompt.text.lower() + "\033[0m (" + fffcmadnessprompt + ")\n")
|
||||||
thefile.write("- [[" + fffcmadnessprompt + "][fffc madness]] (at least 2000 words): *" + fffcmadnesstheprompt.text.lower() + "*\n")
|
thefile.write("- [[" + fffcmadnessprompt + "][fffc madness]] (at least 2000 words): *" + fffcmadnesstheprompt.text.lower() + "*\n")
|
||||||
|
fffcmonthlyprompts = fffcsoup.find_all("h3", string=re.compile(monthstring))
|
||||||
|
fffcmonthlysubsoup = BeautifulSoup(str(fffcmonthlyprompts[0]), "html.parser")
|
||||||
|
fffcmonthlyurl = fffcmonthlysubsoup.find("a")
|
||||||
|
fffcmonthlyprompt = (fffcmonthlyurl["href"])
|
||||||
|
fffcmonthlypromptnew = (fffcmonthlyurl["href"] + "?style=light")
|
||||||
|
fffcmonthlypromptpage = s.get(fffcmonthlypromptnew)
|
||||||
|
fffcmonthlypromptsoup = BeautifulSoup(fffcmonthlypromptpage.content, "html.parser")
|
||||||
|
fffcmonthlyprompttext = fffcmonthlypromptsoup.find("h3")
|
||||||
|
print("fffc monthly special (usually at least 500 words): \033[1m" + fffcmonthlyprompttext.text.lower() + "\033[0m (" + fffcmonthlyprompt + ")\n")
|
||||||
|
thefile.write("- [[" + fffcmonthlyprompt + "][fffc monthly special]] (usually at least 500 words): *" + fffcmonthlyprompttext.text.lower() + "*\n")
|
||||||
fffcregularprompts = fffcsoup.find_all("h3", string=lambda text: "regular challenge" in text.lower())
|
fffcregularprompts = fffcsoup.find_all("h3", string=lambda text: "regular challenge" in text.lower())
|
||||||
fffcregularsubsoup = BeautifulSoup(str(fffcregularprompts[0]), "html.parser")
|
fffcregularsubsoup = BeautifulSoup(str(fffcregularprompts[0]), "html.parser")
|
||||||
fffcregularurl = fffcregularsubsoup.find("a")
|
fffcregularurl = fffcregularsubsoup.find("a")
|
||||||
|
@ -259,7 +283,110 @@ ficlettheprompt = ficletprompttext.find("a")
|
||||||
print("ficlet-zone (any): \033[1m" + ficlettheprompt.text.lower() + "\033[0m (" + ficletprompt + ")\n")
|
print("ficlet-zone (any): \033[1m" + ficlettheprompt.text.lower() + "\033[0m (" + ficletprompt + ")\n")
|
||||||
thefile.write("- [[" + ficletprompt + "][ficlet-zone]] (any): *" + ficlettheprompt.text.lower() + "*\n")
|
thefile.write("- [[" + ficletprompt + "][ficlet-zone]] (any): *" + ficlettheprompt.text.lower() + "*\n")
|
||||||
|
|
||||||
# grabbing two different instances of the same tag within the post
|
# first calculate the hour of the month …
|
||||||
|
hourselapsed = (today - 1) * 24
|
||||||
|
hourstoday = int(datetime.now().strftime("%H"))
|
||||||
|
currenthour = (hourselapsed + hourstoday)
|
||||||
|
with requests.Session() as s:
|
||||||
|
response = s.post(login_url , data)
|
||||||
|
hourly = "https://hourlyprompts.dreamwidth.org/?style=light"
|
||||||
|
hourlypage = s.get(hourly)
|
||||||
|
hourlysoup = BeautifulSoup(hourlypage.content, "html.parser")
|
||||||
|
hourlyprompts = hourlysoup.find_all("h3", string=re.compile(monthstring))
|
||||||
|
hourlysubsoup = BeautifulSoup(str(hourlyprompts[0]), "html.parser")
|
||||||
|
hourlyurl = hourlysubsoup.find("a")
|
||||||
|
hourlyprompt = (hourlyurl["href"])
|
||||||
|
hourlypromptnew = (hourlyurl["href"] + "?style=light")
|
||||||
|
hourlypromptpage = s.get(hourlypromptnew)
|
||||||
|
hourlypromptsoup = BeautifulSoup(hourlypromptpage.content, "html.parser")
|
||||||
|
hourlyprompttext = hourlypromptsoup.find(class_="entry-content")
|
||||||
|
searchstring = r"<br/>" + re.escape(str(currenthour)) + r"\. .*?<br/>"
|
||||||
|
hourlypromptmedian = re.findall(searchstring, str(hourlyprompttext))
|
||||||
|
hourlypromptthishour = str(hourlypromptmedian[0])[5:-5]
|
||||||
|
print("hourlyprompts (any): \033[1m" + hourlypromptthishour.lower() + "\033[0m (" + hourlyprompt + ")\n")
|
||||||
|
thefile.write("- [[" + hourlyprompt + "][hourlyprompts]] (any): *" + hourlypromptthishour.lower() + "*\n")
|
||||||
|
|
||||||
|
if 30 > today > 21:
|
||||||
|
ssbingo = "https://sweetandshort.dreamwidth.org/tag/challenge:+bingo?style=light&tag=challenge:+bingo"
|
||||||
|
ssbingopage = requests.get(ssbingo)
|
||||||
|
ssbingosoup = BeautifulSoup(ssbingopage.content, "html.parser")
|
||||||
|
ssbingoprompts = ssbingosoup.find_all("h3")
|
||||||
|
ssbingosubsoup = BeautifulSoup(str(ssbingoprompts[0]), "html.parser")
|
||||||
|
ssbingourl = ssbingosubsoup.find("a")
|
||||||
|
ssbingoprompt = (ssbingourl["href"])
|
||||||
|
ssbingopromptnew = (ssbingourl["href"] + "?style=light")
|
||||||
|
ssbingopromptpage = requests.get(ssbingopromptnew)
|
||||||
|
ssbingopromptsoup = BeautifulSoup(ssbingopromptpage.content, "html.parser")
|
||||||
|
ssbingoprompttext = ssbingopromptsoup.find(class_="entry-content")
|
||||||
|
ssbingotheprompt = ssbingoprompttext.find_all("td")
|
||||||
|
ssbingoclean = []
|
||||||
|
for prompt in ssbingotheprompt:
|
||||||
|
newprompt = re.sub("<.*?>","",str(prompt))
|
||||||
|
ssbingoclean.append(newprompt)
|
||||||
|
ssbingofinal = "; ".join(ssbingoclean).lower()
|
||||||
|
print("sweet and short bingo (up to 300 words for two prompts, up to 600 words for four prompts): \033[1m" + ssbingofinal + "\033[0m (" + ssbingoprompt + ")\n")
|
||||||
|
thefile.write("- [[" + ssbingoprompt + "][sweet and short bingo]] (up to 300 words for two prompts, up to 600 words for four prompts): *" + ssbingofinal + "*\n")
|
||||||
|
|
||||||
|
if 16 > today > 7:
|
||||||
|
ssquicky = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+comment+quicky?mode=and&style=light&tag=%21new+challenge,challenge:+comment+quicky"
|
||||||
|
ssquickypage = requests.get(ssquicky)
|
||||||
|
ssquickysoup = BeautifulSoup(ssquickypage.content, "html.parser")
|
||||||
|
ssquickyprompts = ssquickysoup.find_all("h3")
|
||||||
|
ssquickysubsoup = BeautifulSoup(str(ssquickyprompts[0]), "html.parser")
|
||||||
|
ssquickyurl = ssquickysubsoup.find("a")
|
||||||
|
ssquickyprompt = (ssquickyurl["href"])
|
||||||
|
# deliberately not using style=light here so we can get at the comment contents
|
||||||
|
ssquickypromptnew = (ssquickyurl["href"])
|
||||||
|
ssquickypromptpage = requests.get(ssquickypromptnew)
|
||||||
|
ssquickypromptsoup = BeautifulSoup(ssquickypromptpage.content, "html.parser")
|
||||||
|
promptcatch = ".*New Prompts Here"
|
||||||
|
# ssquickytheprompt = ssquickypromptsoup.find_all("h4",string = re.compile(promptcatch))
|
||||||
|
ssquickytheprompt = ssquickypromptsoup.find_all(class_="comment")
|
||||||
|
ssquickycomments = []
|
||||||
|
for comment in ssquickytheprompt:
|
||||||
|
if re.search("New Prompts Here",str(comment)):
|
||||||
|
commenttext = re.findall(r"<div class=\"comment-content\".*?</div>",str(comment))
|
||||||
|
commentprompt = re.sub("<.*?>","",str(commenttext))
|
||||||
|
ssquickycomments.append(str(commentprompt)[2:-2])
|
||||||
|
ssquickycprompt = "; ".join(ssquickycomments)
|
||||||
|
print("sweet and short comment quicky (up to 99 words): \033[1m" + ssquickycprompt.lower() + "\033[0m (" + ssquickyprompt + ")\n")
|
||||||
|
thefile.write("- [[" + ssquickyprompt + "][sweet and short comment quicky]] (up to 99 words): *" + ssquickycprompt.lower() + "*\n")
|
||||||
|
|
||||||
|
ssmonthly = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+10+out+of+20?mode=and&style=light&tag=%21new+challenge,challenge:+10+out+of+20"
|
||||||
|
ssmonthlypage = requests.get(ssmonthly)
|
||||||
|
ssmonthlysoup = BeautifulSoup(ssmonthlypage.content, "html.parser")
|
||||||
|
ssmonthlyprompts = ssmonthlysoup.find_all("h3")
|
||||||
|
ssmonthlysubsoup = BeautifulSoup(str(ssmonthlyprompts[0]), "html.parser")
|
||||||
|
ssmonthlyurl = ssmonthlysubsoup.find("a")
|
||||||
|
ssmonthlyprompt = (ssmonthlyurl["href"])
|
||||||
|
ssmonthlypromptnew = (ssmonthlyurl["href"] + "?style=light")
|
||||||
|
ssmonthlypromptpage = requests.get(ssmonthlypromptnew)
|
||||||
|
ssmonthlypromptsoup = BeautifulSoup(ssmonthlypromptpage.content, "html.parser")
|
||||||
|
ssmonthlyprompttext = ssmonthlypromptsoup.find(class_="entry-content")
|
||||||
|
ssmonthlypromptmedian = re.findall(r"<a name=\"cutid1\">.*", str(ssmonthlyprompttext))
|
||||||
|
ssmonthlypromptstripone = re.sub("<.*?>","",str(ssmonthlypromptmedian))
|
||||||
|
ssmonthlypromptstriptwo = re.sub("([a-z])- ","\\1; ",str(ssmonthlypromptstripone))
|
||||||
|
ssmonthlypromptstripthree = re.sub("- ","",str(ssmonthlypromptstriptwo))
|
||||||
|
ssmonthlypromptfinal = str(ssmonthlypromptstripthree)[2:-2]
|
||||||
|
print("sweet and short monthly prompts (up to 300 words [0–9 prompts], up to 900 words [10–19 prompts], any [20 prompts]): \033[1m" + ssmonthlypromptfinal + "\033[0m (" + ssmonthlyprompt + ")\n")
|
||||||
|
thefile.write("- [[" + ssmonthlyprompt + "][sweet and short monthly prompts]] (up to 300 words [0–9 prompts], up to 900 words [10–19 prompts], any [20 prompts]): *" + ssmonthlypromptfinal + "*\n")
|
||||||
|
|
||||||
|
if today > 14:
|
||||||
|
sspicture = "https://sweetandshort.dreamwidth.org/tag/!new+challenge,challenge:+picture+prompt+fun?mode=and&style=light&tag=%21new+challenge,challenge:+picture+prompt+fun"
|
||||||
|
sspicturepage = requests.get(sspicture)
|
||||||
|
sspicturesoup = BeautifulSoup(sspicturepage.content, "html.parser")
|
||||||
|
monthstring = ".*" + month + ".*"
|
||||||
|
sspictureprompts = sspicturesoup.find_all("h3", string=re.compile(monthstring))
|
||||||
|
sspicturesubsoup = BeautifulSoup(str(sspictureprompts[0]), "html.parser")
|
||||||
|
sspictureurl = sspicturesubsoup.find("a")
|
||||||
|
sspictureprompt = (sspictureurl["href"])
|
||||||
|
sspicturepromptnew = (sspictureurl["href"] + "?style=light")
|
||||||
|
sspicturepromptpage = requests.get(sspicturepromptnew)
|
||||||
|
sspicturepromptsoup = BeautifulSoup(sspicturepromptpage.content, "html.parser")
|
||||||
|
sspictureprompttext = sspicturepromptsoup.find("h3")
|
||||||
|
print("sweet and short picture prompts (up to 300 words): \033[1m" + sspictureprompttext.text.lower() + "\033[0m (" + sspictureprompt + ")\n")
|
||||||
|
thefile.write("- [[" + sspictureprompt + "][sweet and short picture prompts]] (up to 300 words): *" + sspictureprompttext.text.lower() + "*\n")
|
||||||
|
|
||||||
vocab = "https://vocab-drabbles.dreamwidth.org/?style=light&tag=challenge"
|
vocab = "https://vocab-drabbles.dreamwidth.org/?style=light&tag=challenge"
|
||||||
vocabpage = requests.get(vocab)
|
vocabpage = requests.get(vocab)
|
||||||
vocabsoup = BeautifulSoup(vocabpage.content, "html.parser")
|
vocabsoup = BeautifulSoup(vocabpage.content, "html.parser")
|
||||||
|
@ -270,9 +397,7 @@ vocabprompt = (vocaburl["href"])
|
||||||
vocabpromptnew = (vocaburl["href"] + "?style=light")
|
vocabpromptnew = (vocaburl["href"] + "?style=light")
|
||||||
vocabpromptpage = requests.get(vocabpromptnew)
|
vocabpromptpage = requests.get(vocabpromptnew)
|
||||||
vocabpromptsoup = BeautifulSoup(vocabpromptpage.content, "html.parser")
|
vocabpromptsoup = BeautifulSoup(vocabpromptpage.content, "html.parser")
|
||||||
vocablist = []
|
vocabprompttext = vocabpromptsoup.find(class_="entry-content")
|
||||||
for listitem in vocabpromptsoup.find_all("strong"):
|
vocabtheprompt = vocabprompttext.find("strong")
|
||||||
vocablist.append(listitem.text.lower())
|
print("vocab-drabbles (50–500 words): \033[1m" + vocabtheprompt.text.lower() + "\033[0m (" + vocabprompt + ")\n")
|
||||||
vocabformat = "; ".join(vocablist)
|
thefile.write("- [[" + vocabprompt + "][vocab-drabbles]] (50–500 words): *" + vocabtheprompt.text.lower() + "*\n")
|
||||||
print("vocab-drabbles (50–500 words): \033[1m" + vocabformat + "\033[0m (" + vocabprompt + ")\n")
|
|
||||||
thefile.write("- [[" + vocabprompt + "][vocab-drabbles]] (50–500 words): *" + vocabformat + "*\n")
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue