Add files
This commit is contained in:
commit
6549deda7b
9 changed files with 1092 additions and 0 deletions
259
promptscrape.py
Normal file
259
promptscrape.py
Normal file
|
@ -0,0 +1,259 @@
|
|||
import requests, os
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import date, datetime
|
||||
|
||||
# needed for nsfw content
|
||||
login_url = "https://www.dreamwidth.org/login?ret=1"
|
||||
data = {
|
||||
"user": "fakeapi",
|
||||
"password": "thisap1isfalse"
|
||||
}
|
||||
|
||||
if os.path.exists("prompts.org"):
|
||||
os.remove("prompts.org")
|
||||
|
||||
thefile = open("prompts.org", "a")
|
||||
|
||||
cent = "https://100words.dreamwidth.org/tag/!prompt?style=light&tag=%21prompt"
|
||||
centpage = requests.get(cent)
|
||||
centsoup = BeautifulSoup(centpage.content, "html.parser")
|
||||
centprompts = centsoup.find_all("h3", string=lambda text: "prompt:" in text.lower())
|
||||
centsubsoup = BeautifulSoup(str(centprompts[0]), "html.parser")
|
||||
centurl = centsubsoup.find("a")
|
||||
centprompt = (centurl["href"])
|
||||
centpromptnew = (centurl["href"] + "?style=light")
|
||||
centpromptpage = requests.get(centpromptnew)
|
||||
centpromptsoup = BeautifulSoup(centpromptpage.content, "html.parser")
|
||||
centprompttext = centpromptsoup.find(class_="entry-content")
|
||||
centtheprompt = centprompttext.find("strong")
|
||||
print("100words (100 words): \033[1m" + centtheprompt.text.lower() + "\033[0m (" + centprompt + ")\n")
|
||||
thefile.write("- [[" + centprompt + "][100 words]] (100 words): *" + centtheprompt.text.lower() + "*\n")
|
||||
|
||||
# for this one we need to extract the right entry from a list, which may be an <ol> but may not be
|
||||
today = int(date.today().strftime("%d"))
|
||||
thirtyone = "https://31-days.dreamwidth.org/tag/!prompts?style=light&tag=%21prompts"
|
||||
thirtyonepage = requests.get(thirtyone)
|
||||
thirtyonesoup = BeautifulSoup(thirtyonepage.content, "html.parser")
|
||||
thirtyoneprompts = thirtyonesoup.find_all("h3", string=lambda text: "prompts" in text.lower())
|
||||
thirtyonesubsoup = BeautifulSoup(str(thirtyoneprompts[0]), "html.parser")
|
||||
thirtyoneurl = thirtyonesubsoup.find("a")
|
||||
thirtyoneprompt = (thirtyoneurl["href"])
|
||||
thirtyonepromptnew = (thirtyoneurl["href"] + "?style=light")
|
||||
thirtyonepromptpage = requests.get(thirtyonepromptnew)
|
||||
thirtyonepromptsoup = BeautifulSoup(thirtyonepromptpage.content, "html.parser")
|
||||
thirtyoneprompttext = thirtyonepromptsoup.find(class_="entry-content")
|
||||
if "<ol>" in str(thirtyoneprompttext):
|
||||
thirtyonetheprompt = thirtyoneprompttext.select("ol > li")[today - 1].get_text(strip=True)
|
||||
else:
|
||||
interprompt = list(thirtyoneprompttext.stripped_strings)
|
||||
thirtyonelist = []
|
||||
for prompt in interprompt:
|
||||
if len(prompt) < 5:
|
||||
promptnum = interprompt.index(prompt)
|
||||
newnum = promptnum + 1
|
||||
thirtyonelist.append(prompt + interprompt[newnum])
|
||||
else:
|
||||
thirtyonelist.append(prompt)
|
||||
intsearch = str(today) + "."
|
||||
thirtyonetheprompt = str([item for item in thirtyonelist if item.startswith(intsearch)])[2:-2]
|
||||
print("31-days (any): \033[1m" + thirtyonetheprompt.lower() + "\033[0m (" + thirtyoneprompt + ")\n")
|
||||
thefile.write("- [[" + thirtyoneprompt + "][31-days]] (any): *" + thirtyonetheprompt.lower() + "*\n")
|
||||
|
||||
|
||||
ad = "https://anythingdrabble.dreamwidth.org/tag/mod!+post?style=light&tag=mod%21+post"
|
||||
adpage = requests.get(ad)
|
||||
adsoup = BeautifulSoup(adpage.content, "html.parser")
|
||||
adprompts = adsoup.find_all("h3", string=lambda text: "prompt post" in text.lower())
|
||||
adsubsoup = BeautifulSoup(str(adprompts[0]), "html.parser")
|
||||
adurl = adsubsoup.find("a")
|
||||
adprompt = (adurl["href"])
|
||||
adpromptnew = (adurl["href"] + "?style=light")
|
||||
adpromptpage = requests.get(adpromptnew)
|
||||
adpromptsoup = BeautifulSoup(adpromptpage.content, "html.parser")
|
||||
adprompttext = adpromptsoup.find(class_="entry-content")
|
||||
adtheprompt = adprompttext.find("center")
|
||||
adstrippable = str(adtheprompt.text)
|
||||
while adstrippable[-1] == " ":
|
||||
adstrippable = adstrippable[:-1]
|
||||
print("anythingdrabble (100, 200, 300, 400, or 500 words): \033[1m" + adstrippable.lower() + "\033[0m (" + adprompt + ")\n")
|
||||
thefile.write("- [[" + adprompt + "][anythingdrabble]] (100, 200, 300, 400, or 500 words): *" + adstrippable.lower() + "*\n")
|
||||
|
||||
dove = "https://dove-drabbles.dreamwidth.org/?style=light"
|
||||
dovepage = requests.get(dove)
|
||||
dovesoup = BeautifulSoup(dovepage.content, "html.parser")
|
||||
doveprompts = dovesoup.find_all("h3", string=lambda text: "prompt post" in text.lower())
|
||||
dovesubsoup = BeautifulSoup(str(doveprompts[0]), "html.parser")
|
||||
doveurl = dovesubsoup.find("a")
|
||||
doveprompt = (doveurl["href"])
|
||||
dovepromptnew = (doveurl["href"] + "?style=light")
|
||||
dovepromptpage = requests.get(dovepromptnew)
|
||||
dovepromptsoup = BeautifulSoup(dovepromptpage.content, "html.parser")
|
||||
doveprompttext = dovepromptsoup.find(class_="entry-content")
|
||||
dovetheprompt = doveprompttext.find("i")
|
||||
print("dove-drabbles (any): \033[1m" + dovetheprompt.text.lower() + "\033[0m (" + doveprompt + ")\n")
|
||||
thefile.write("- [[" + doveprompt + "][dove-drabbles]] (any): *" + dovetheprompt.text.lower() + "*\n")
|
||||
|
||||
with requests.Session() as s:
|
||||
response = s.post(login_url , data)
|
||||
zone = "https://drabble-zone.dreamwidth.org/tag/mod-post?style=light&tag=mod-post"
|
||||
zonepage = s.get(zone)
|
||||
zonesoup = BeautifulSoup(zonepage.content, "html.parser")
|
||||
zoneprompts = zonesoup.find_all("h3", string=lambda text: "challenge" in text.lower())
|
||||
zonesubsoup = BeautifulSoup(str(zoneprompts[0]), "html.parser")
|
||||
zoneurl = zonesubsoup.find("a")
|
||||
zoneprompt = (zoneurl["href"])
|
||||
zonepromptnew = (zoneurl["href"] + "?style=light")
|
||||
zonepromptpage = s.get(zonepromptnew)
|
||||
zonepromptsoup = BeautifulSoup(zonepromptpage.content, "html.parser")
|
||||
zoneprompttext = zonepromptsoup.find(class_="entry-content")
|
||||
zonetheprompt = zoneprompttext.find("strong")
|
||||
print("drabble-zone (100 or 200 words): \033[1m" + zonetheprompt.text.lower() + "\033[0m (" + zoneprompt + ")\n")
|
||||
thefile.write("- [[" + zoneprompt + "][drabble-zone]] (100 or 200 words): *" + zonetheprompt.text.lower() + "*\n")
|
||||
emotion = "https://emotion100.dreamwidth.org/tag/*modpost?style=light&tag=%2Amodpost"
|
||||
emotionpage = s.get(emotion)
|
||||
emotionsoup = BeautifulSoup(emotionpage.content, "html.parser")
|
||||
emotionprompts = emotionsoup.find_all("h3", string=lambda text: "prompt" in text.lower())
|
||||
emotionsubsoup = BeautifulSoup(str(emotionprompts[0]), "html.parser")
|
||||
emotionurl = emotionsubsoup.find("a")
|
||||
emotionprompt = (emotionurl["href"])
|
||||
emotionpromptnew = (emotionurl["href"] + "?style=light")
|
||||
emotionpromptpage = s.get(emotionpromptnew)
|
||||
emotionpromptsoup = BeautifulSoup(emotionpromptpage.content, "html.parser")
|
||||
emotionprompttext = emotionpromptsoup.find(class_="entry-content")
|
||||
emotiontheprompt = emotionprompttext.find_all("span")[-1]
|
||||
print("emotion100 (100 words or a multiple of 100): \033[1m" + emotiontheprompt.text.lower() + "\033[0m (" + emotionprompt + ")\n")
|
||||
thefile.write("- [[" + emotionprompt + "][emotion100]] (100 words or a multiple of 100): *" + emotiontheprompt.text.lower() + "*\n")
|
||||
|
||||
# for this one, have to get prompts from comments
|
||||
ffa = "https://fail-fandomanon.dreamwidth.org/?style=light"
|
||||
ffapage = requests.get(ffa)
|
||||
ffasoup = BeautifulSoup(ffapage.content, "html.parser")
|
||||
ffaprompts = ffasoup.find_all("h3", string=lambda text: "ffa dw post" in text.lower())
|
||||
ffapromptstrim = [x for x in ffaprompts if "Placeholder" not in str(x)]
|
||||
ffasubsoup = BeautifulSoup(str(ffapromptstrim[0]), "html.parser")
|
||||
ffaurl = ffasubsoup.find("a")
|
||||
ffaprompt = (ffaurl["href"])
|
||||
ffapromptnew = (ffaprompt + "?style=light")
|
||||
ffapromptpage = requests.get(ffapromptnew)
|
||||
ffapromptsoup = BeautifulSoup(ffapromptpage.content, "html.parser")
|
||||
ffaprompttext = ffapromptsoup.find(id="comments")
|
||||
ffaresoup = BeautifulSoup(str(ffaprompttext), "html.parser")
|
||||
ffatheprompt = ffaresoup.find_all("h4",text=True)
|
||||
ffacent = []
|
||||
for each in ffatheprompt:
|
||||
if "100 words of" in (str(each.get_text())):
|
||||
ffacent.append(str(each.get_text()))
|
||||
i = 2
|
||||
while i < 8:
|
||||
ffapromptnew = (ffaprompt + "?page=" + str(i) + "&style=light")
|
||||
ffapromptpage = requests.get(ffapromptnew)
|
||||
ffapromptsoup = BeautifulSoup(ffapromptpage.content, "html.parser")
|
||||
ffaprompttext = ffapromptsoup.find(id="comments")
|
||||
ffaresoup = BeautifulSoup(str(ffaprompttext), "html.parser")
|
||||
ffatheprompt = ffaresoup.find_all("h4",text=True)
|
||||
for each in ffatheprompt:
|
||||
if "100 words of" in (str(each.get_text())):
|
||||
if "Re:" not in (str(each.get_text())):
|
||||
ffacent.append(str(each.get_text()))
|
||||
i += 1
|
||||
if ffacent:
|
||||
ffacent = list(dict.fromkeys(ffacent))
|
||||
ffacentnew = []
|
||||
for x in ffacent:
|
||||
x = x[13:]
|
||||
ffacentnew.append(x)
|
||||
ffaformat = ", ".join(ffacentnew)
|
||||
print("fail-fandomanon (any): \033[1m" + ffaformat + "\033[0m (" + ffaprompt + ")\n")
|
||||
thefile.write("- [[" + ffaprompt + "][fail-fandomanon]] (any): *" + ffaformat + "*\n")
|
||||
|
||||
|
||||
# for this one, prompts are unavailable on tuesdays and wednesdays
|
||||
weekprogress = datetime.now().weekday()
|
||||
if not 0 < weekprogress < 3:
|
||||
fandom = "https://fandomweekly.dreamwidth.org/?style=light&tag=%23challenge"
|
||||
fandompage = requests.get(fandom)
|
||||
fandomsoup = BeautifulSoup(fandompage.content, "html.parser")
|
||||
fandomprompts = fandomsoup.find_all("h3", string=lambda text: "challenge post" in text.lower())
|
||||
fandomsubsoup = BeautifulSoup(str(fandomprompts[0]), "html.parser")
|
||||
fandomurl = fandomsubsoup.find("a")
|
||||
fandomprompt = (fandomurl["href"])
|
||||
fandompromptnew = (fandomurl["href"] + "?style=light")
|
||||
fandompromptpage = requests.get(fandompromptnew)
|
||||
fandompromptsoup = BeautifulSoup(fandompromptpage.content, "html.parser")
|
||||
fandomprompttext = fandompromptsoup.find(class_="entry-content")
|
||||
fandomtheprompt = fandomprompttext.find("td")
|
||||
print("fandomweekly (any, competitive): \033[1m" + fandomtheprompt.text.lower() + "\033[0m (" + fandomprompt + ")\n")
|
||||
thefile.write("- [[" + fandomprompt + "][fandomweekly]] (any, competitive): *" + fandomtheprompt.text.lower() + "*\n")
|
||||
|
||||
flash = "https://fan-flashworks.dreamwidth.org/?style=light&tag=admin"
|
||||
flashpage = requests.get(flash)
|
||||
flashsoup = BeautifulSoup(flashpage.content, "html.parser")
|
||||
flashprompts = flashsoup.find_all("h3", string=lambda text: "challenge" in text.lower())
|
||||
flashsubsoup = BeautifulSoup(str(flashprompts[0]), "html.parser")
|
||||
flashurl = flashsubsoup.find("a")
|
||||
flashprompt = (flashurl["href"])
|
||||
flashpromptnew = (flashurl["href"] + "?style=light")
|
||||
flashpromptpage = requests.get(flashpromptnew)
|
||||
flashpromptsoup = BeautifulSoup(flashpromptpage.content, "html.parser")
|
||||
flashprompttext = flashpromptsoup.find(class_="entry-content")
|
||||
flashtheprompt = flashprompttext.find("center")
|
||||
print("fan-flashworks (any, can’t post elsewhere until round is closed): \033[1m" + flashtheprompt.text.lower() + "\033[0m (" + flashprompt + ")\n")
|
||||
thefile.write("- [[" + flashprompt + "][fan-flashworks]] (any, can’t post elsewhere until round is closed): *" + flashtheprompt.text.lower() + "*\n")
|
||||
|
||||
femslash = "https://femslashficlets.dreamwidth.org/tag/challenges?style=light&tag=challenges"
|
||||
femslashpage = requests.get(femslash)
|
||||
femslashsoup = BeautifulSoup(femslashpage.content, "html.parser")
|
||||
femslashprompts = femslashsoup.find_all("h3", string=lambda text: "challenge" in text.lower())
|
||||
femslashsubsoup = BeautifulSoup(str(femslashprompts[0]), "html.parser")
|
||||
femslashurl = femslashsubsoup.find("a")
|
||||
femslashprompt = (femslashurl["href"])
|
||||
femslashpromptnew = (femslashurl["href"] + "?style=light")
|
||||
femslashpromptpage = requests.get(femslashpromptnew)
|
||||
femslashpromptsoup = BeautifulSoup(femslashpromptpage.content, "html.parser")
|
||||
femslashprompttext = femslashpromptsoup.find(class_="entry-content")
|
||||
femslashtheprompt = femslashprompttext.find("i")
|
||||
print("femslash-ficlets (100–1000 words, F/F): \033[1m" + femslashtheprompt.text.lower() + "\033[0m (" + femslashprompt + ")\n")
|
||||
thefile.write("- [[" + femslashprompt + "][femslashficlets]] (100 words or a multiple of 100): *" + femslashtheprompt.text.lower() + "*\n")
|
||||
|
||||
with requests.Session() as s:
|
||||
response = s.post(login_url , data)
|
||||
fffc = "https://fffc.dreamwidth.org/tag/!challenges?style=light&tag=%21challenges"
|
||||
fffcpage = s.get(fffc)
|
||||
fffcsoup = BeautifulSoup(fffcpage.content, "html.parser")
|
||||
fffcmadnessprompts = fffcsoup.find_all("h3", string=lambda text: "froday madness" in text.lower())
|
||||
fffcmadnesssubsoup = BeautifulSoup(str(fffcmadnessprompts[0]), "html.parser")
|
||||
fffcmadnessurl = fffcmadnesssubsoup.find("a")
|
||||
fffcmadnessprompt = (fffcmadnessurl["href"])
|
||||
fffcmadnesspromptnew = (fffcmadnessurl["href"] + "?style=light")
|
||||
fffcmadnesspromptpage = s.get(fffcmadnesspromptnew)
|
||||
fffcmadnesspromptsoup = BeautifulSoup(fffcmadnesspromptpage.content, "html.parser")
|
||||
fffcmadnessprompttext = fffcmadnesspromptsoup.find(class_="entry-content")
|
||||
fffcmadnesstheprompt = fffcmadnessprompttext.find("center")
|
||||
print("fffc madness (at least 2000 words): \033[1m" + fffcmadnesstheprompt.text.lower() + "\033[0m (" + fffcmadnessprompt + ")\n")
|
||||
thefile.write("- [[" + fffcmadnessprompt + "][fffc madness]] (at least 2000 words): *" + fffcmadnesstheprompt.text.lower() + "*\n")
|
||||
fffcregularprompts = fffcsoup.find_all("h3", string=lambda text: "regular challenge" in text.lower())
|
||||
fffcregularsubsoup = BeautifulSoup(str(fffcregularprompts[0]), "html.parser")
|
||||
fffcregularurl = fffcregularsubsoup.find("a")
|
||||
fffcregularprompt = (fffcregularurl["href"])
|
||||
fffcregularpromptnew = (fffcregularurl["href"] + "?style=light")
|
||||
fffcregularpromptpage = s.get(fffcregularpromptnew)
|
||||
fffcregularpromptsoup = BeautifulSoup(fffcregularpromptpage.content, "html.parser")
|
||||
fffcregularprompttext = fffcregularpromptsoup.find(class_="entry-content")
|
||||
fffcregulartheprompt = fffcregularprompttext.find("b")
|
||||
print("fffc regular challenge (at least 100 words): \033[1m" + fffcregulartheprompt.text.lower() + "\033[0m (" + fffcregularprompt + ")\n")
|
||||
thefile.write("- [[" + fffcregularprompt + "][fffc regular challenge]] (at least 100 words): *" + fffcregulartheprompt.text.lower() + "*\n")
|
||||
|
||||
ficlet = "https://ficlet-zone.dreamwidth.org/tag/challenge+post?style=light&tag=challenge+post"
|
||||
ficletpage = requests.get(ficlet)
|
||||
ficletsoup = BeautifulSoup(ficletpage.content, "html.parser")
|
||||
ficletprompts = ficletsoup.find_all("h3", string=lambda text: "challenge" in text.lower())
|
||||
ficletsubsoup = BeautifulSoup(str(ficletprompts[0]), "html.parser")
|
||||
ficleturl = ficletsubsoup.find("a")
|
||||
ficletprompt = (ficleturl["href"])
|
||||
ficletpromptnew = (ficleturl["href"] + "?style=light")
|
||||
ficletpromptpage = requests.get(ficletpromptnew)
|
||||
ficletpromptsoup = BeautifulSoup(ficletpromptpage.content, "html.parser")
|
||||
ficletprompttext = ficletpromptsoup.find(class_="entry-content")
|
||||
ficlettheprompt = ficletprompttext.find("a")
|
||||
print("ficlet-zone (any): \033[1m" + ficlettheprompt.text.lower() + "\033[0m (" + ficletprompt + ")\n")
|
||||
thefile.write("- [[" + ficletprompt + "][ficlet-zone]] (any): *" + ficlettheprompt.text.lower() + "*\n")
|
Loading…
Add table
Add a link
Reference in a new issue