Add files

2022-07-13 14:04:53 +01:00 · 2022-07-13 14:04:53 +01:00 · 6549deda7b
commit 6549deda7b
9 changed files with 1092 additions and 0 deletions
--- a/promptscrape.py
+++ b/promptscrape.py
@ -0,0 +1,259 @@
+import requests, os
+from bs4 import BeautifulSoup
+from datetime import date, datetime
+
+# needed for nsfw content
+login_url = "https://www.dreamwidth.org/login?ret=1"
+data = {
+    "user": "fakeapi",
+    "password": "thisap1isfalse"
+}
+
+if os.path.exists("prompts.org"):
+    os.remove("prompts.org")
+
+thefile = open("prompts.org", "a")
+
+cent = "https://100words.dreamwidth.org/tag/!prompt?style=light&tag=%21prompt"
+centpage = requests.get(cent)
+centsoup = BeautifulSoup(centpage.content, "html.parser")
+centprompts = centsoup.find_all("h3", string=lambda text: "prompt:" in text.lower())
+centsubsoup = BeautifulSoup(str(centprompts[0]), "html.parser")
+centurl = centsubsoup.find("a")
+centprompt = (centurl["href"])
+centpromptnew = (centurl["href"] + "?style=light")
+centpromptpage = requests.get(centpromptnew)
+centpromptsoup = BeautifulSoup(centpromptpage.content, "html.parser")
+centprompttext = centpromptsoup.find(class_="entry-content")
+centtheprompt = centprompttext.find("strong")
+print("100words (100 words): \033[1m" + centtheprompt.text.lower() + "\033[0m (" + centprompt + ")\n")
+thefile.write("- [[" + centprompt + "][100 words]] (100 words): *" + centtheprompt.text.lower() + "*\n")
+
+# for this one we need to extract the right entry from a list, which may be an <ol> but may not be
+today = int(date.today().strftime("%d"))
+thirtyone = "https://31-days.dreamwidth.org/tag/!prompts?style=light&tag=%21prompts"
+thirtyonepage = requests.get(thirtyone)
+thirtyonesoup = BeautifulSoup(thirtyonepage.content, "html.parser")
+thirtyoneprompts = thirtyonesoup.find_all("h3", string=lambda text: "prompts" in text.lower())
+thirtyonesubsoup = BeautifulSoup(str(thirtyoneprompts[0]), "html.parser")
+thirtyoneurl = thirtyonesubsoup.find("a")
+thirtyoneprompt = (thirtyoneurl["href"])
+thirtyonepromptnew = (thirtyoneurl["href"] + "?style=light")
+thirtyonepromptpage = requests.get(thirtyonepromptnew)
+thirtyonepromptsoup = BeautifulSoup(thirtyonepromptpage.content, "html.parser")
+thirtyoneprompttext = thirtyonepromptsoup.find(class_="entry-content")
+if "<ol>" in str(thirtyoneprompttext):
+    thirtyonetheprompt = thirtyoneprompttext.select("ol > li")[today - 1].get_text(strip=True)
+else:
+    interprompt = list(thirtyoneprompttext.stripped_strings)
+    thirtyonelist = []
+    for prompt in interprompt:
+        if len(prompt) < 5:
+            promptnum = interprompt.index(prompt)
+            newnum = promptnum + 1
+            thirtyonelist.append(prompt + interprompt[newnum])
+        else:
+            thirtyonelist.append(prompt)
+    intsearch = str(today) + "."
+    thirtyonetheprompt = str([item for item in thirtyonelist if item.startswith(intsearch)])[2:-2]
+print("31-days (any): \033[1m" + thirtyonetheprompt.lower() + "\033[0m (" + thirtyoneprompt + ")\n")
+thefile.write("- [[" + thirtyoneprompt + "][31-days]] (any): *" + thirtyonetheprompt.lower() + "*\n")
+
+
+ad = "https://anythingdrabble.dreamwidth.org/tag/mod!+post?style=light&tag=mod%21+post"
+adpage = requests.get(ad)
+adsoup = BeautifulSoup(adpage.content, "html.parser")
+adprompts = adsoup.find_all("h3", string=lambda text: "prompt post" in text.lower())
+adsubsoup = BeautifulSoup(str(adprompts[0]), "html.parser")
+adurl = adsubsoup.find("a")
+adprompt = (adurl["href"])
+adpromptnew = (adurl["href"] + "?style=light")
+adpromptpage = requests.get(adpromptnew)
+adpromptsoup = BeautifulSoup(adpromptpage.content, "html.parser")
+adprompttext = adpromptsoup.find(class_="entry-content")
+adtheprompt = adprompttext.find("center")
+adstrippable = str(adtheprompt.text)
+while adstrippable[-1] == " ":
+    adstrippable = adstrippable[:-1]
+print("anythingdrabble (100, 200, 300, 400, or 500 words): \033[1m" + adstrippable.lower() + "\033[0m (" + adprompt + ")\n")
+thefile.write("- [[" + adprompt + "][anythingdrabble]] (100, 200, 300, 400, or 500 words): *" + adstrippable.lower() + "*\n")
+
+dove = "https://dove-drabbles.dreamwidth.org/?style=light"
+dovepage = requests.get(dove)
+dovesoup = BeautifulSoup(dovepage.content, "html.parser")
+doveprompts = dovesoup.find_all("h3", string=lambda text: "prompt post" in text.lower())
+dovesubsoup = BeautifulSoup(str(doveprompts[0]), "html.parser")
+doveurl = dovesubsoup.find("a")
+doveprompt = (doveurl["href"])
+dovepromptnew = (doveurl["href"] + "?style=light")
+dovepromptpage = requests.get(dovepromptnew)
+dovepromptsoup = BeautifulSoup(dovepromptpage.content, "html.parser")
+doveprompttext = dovepromptsoup.find(class_="entry-content")
+dovetheprompt = doveprompttext.find("i")
+print("dove-drabbles (any): \033[1m" + dovetheprompt.text.lower() + "\033[0m (" + doveprompt + ")\n")
+thefile.write("- [[" + doveprompt + "][dove-drabbles]] (any): *" + dovetheprompt.text.lower() + "*\n")
+
+with requests.Session() as s:
+    response = s.post(login_url , data)
+    zone = "https://drabble-zone.dreamwidth.org/tag/mod-post?style=light&tag=mod-post"
+    zonepage = s.get(zone)
+    zonesoup = BeautifulSoup(zonepage.content, "html.parser")
+    zoneprompts = zonesoup.find_all("h3", string=lambda text: "challenge" in text.lower())
+    zonesubsoup = BeautifulSoup(str(zoneprompts[0]), "html.parser")
+    zoneurl = zonesubsoup.find("a")
+    zoneprompt = (zoneurl["href"])
+    zonepromptnew = (zoneurl["href"] + "?style=light")
+    zonepromptpage = s.get(zonepromptnew)
+    zonepromptsoup = BeautifulSoup(zonepromptpage.content, "html.parser")
+    zoneprompttext = zonepromptsoup.find(class_="entry-content")
+    zonetheprompt = zoneprompttext.find("strong")
+    print("drabble-zone (100 or 200 words): \033[1m" + zonetheprompt.text.lower() + "\033[0m (" + zoneprompt + ")\n")
+    thefile.write("- [[" + zoneprompt + "][drabble-zone]] (100 or 200 words): *" + zonetheprompt.text.lower() + "*\n")
+    emotion = "https://emotion100.dreamwidth.org/tag/*modpost?style=light&tag=%2Amodpost"
+    emotionpage = s.get(emotion)
+    emotionsoup = BeautifulSoup(emotionpage.content, "html.parser")
+    emotionprompts = emotionsoup.find_all("h3", string=lambda text: "prompt" in text.lower())
+    emotionsubsoup = BeautifulSoup(str(emotionprompts[0]), "html.parser")
+    emotionurl = emotionsubsoup.find("a")
+    emotionprompt = (emotionurl["href"])
+    emotionpromptnew = (emotionurl["href"] + "?style=light")
+    emotionpromptpage = s.get(emotionpromptnew)
+    emotionpromptsoup = BeautifulSoup(emotionpromptpage.content, "html.parser")
+    emotionprompttext = emotionpromptsoup.find(class_="entry-content")
+    emotiontheprompt = emotionprompttext.find_all("span")[-1]
+    print("emotion100 (100 words or a multiple of 100): \033[1m" + emotiontheprompt.text.lower() + "\033[0m (" + emotionprompt + ")\n")
+    thefile.write("- [[" + emotionprompt + "][emotion100]] (100 words or a multiple of 100): *" + emotiontheprompt.text.lower() + "*\n")
+
+# for this one, have to get prompts from comments
+ffa = "https://fail-fandomanon.dreamwidth.org/?style=light"
+ffapage = requests.get(ffa)
+ffasoup = BeautifulSoup(ffapage.content, "html.parser")
+ffaprompts = ffasoup.find_all("h3", string=lambda text: "ffa dw post" in text.lower())
+ffapromptstrim = [x for x in ffaprompts if "Placeholder" not in str(x)]
+ffasubsoup = BeautifulSoup(str(ffapromptstrim[0]), "html.parser")
+ffaurl = ffasubsoup.find("a")
+ffaprompt = (ffaurl["href"])
+ffapromptnew = (ffaprompt + "?style=light")
+ffapromptpage = requests.get(ffapromptnew)
+ffapromptsoup = BeautifulSoup(ffapromptpage.content, "html.parser")
+ffaprompttext = ffapromptsoup.find(id="comments")
+ffaresoup = BeautifulSoup(str(ffaprompttext), "html.parser")
+ffatheprompt = ffaresoup.find_all("h4",text=True)
+ffacent = []
+for each in ffatheprompt:
+    if "100 words of" in (str(each.get_text())):
+        ffacent.append(str(each.get_text()))
+i = 2
+while i < 8:
+    ffapromptnew = (ffaprompt + "?page=" + str(i) + "&style=light")
+    ffapromptpage = requests.get(ffapromptnew)
+    ffapromptsoup = BeautifulSoup(ffapromptpage.content, "html.parser")
+    ffaprompttext = ffapromptsoup.find(id="comments")
+    ffaresoup = BeautifulSoup(str(ffaprompttext), "html.parser")
+    ffatheprompt = ffaresoup.find_all("h4",text=True)
+    for each in ffatheprompt:
+        if "100 words of" in (str(each.get_text())):
+            if "Re:" not in (str(each.get_text())):
+                ffacent.append(str(each.get_text()))
+    i += 1
+if ffacent:
+    ffacent = list(dict.fromkeys(ffacent))
+    ffacentnew = []
+    for x in ffacent:
+        x = x[13:]
+        ffacentnew.append(x)
+        ffaformat = ", ".join(ffacentnew)
+    print("fail-fandomanon (any): \033[1m" + ffaformat + "\033[0m (" + ffaprompt + ")\n")
+    thefile.write("- [[" + ffaprompt + "][fail-fandomanon]] (any): *" + ffaformat + "*\n")
+
+    
+# for this one, prompts are unavailable on tuesdays and wednesdays
+weekprogress = datetime.now().weekday()
+if not 0 < weekprogress < 3:
+    fandom = "https://fandomweekly.dreamwidth.org/?style=light&tag=%23challenge"
+    fandompage = requests.get(fandom)
+    fandomsoup = BeautifulSoup(fandompage.content, "html.parser")
+    fandomprompts = fandomsoup.find_all("h3", string=lambda text: "challenge post" in text.lower())
+    fandomsubsoup = BeautifulSoup(str(fandomprompts[0]), "html.parser")
+    fandomurl = fandomsubsoup.find("a")
+    fandomprompt = (fandomurl["href"])
+    fandompromptnew = (fandomurl["href"] + "?style=light")
+    fandompromptpage = requests.get(fandompromptnew)
+    fandompromptsoup = BeautifulSoup(fandompromptpage.content, "html.parser")
+    fandomprompttext = fandompromptsoup.find(class_="entry-content")
+    fandomtheprompt = fandomprompttext.find("td")
+    print("fandomweekly (any, competitive): \033[1m" + fandomtheprompt.text.lower() + "\033[0m (" + fandomprompt + ")\n")
+    thefile.write("- [[" + fandomprompt + "][fandomweekly]] (any, competitive): *" + fandomtheprompt.text.lower() + "*\n")
+
+flash = "https://fan-flashworks.dreamwidth.org/?style=light&tag=admin"
+flashpage = requests.get(flash)
+flashsoup = BeautifulSoup(flashpage.content, "html.parser")
+flashprompts = flashsoup.find_all("h3", string=lambda text: "challenge" in text.lower())
+flashsubsoup = BeautifulSoup(str(flashprompts[0]), "html.parser")
+flashurl = flashsubsoup.find("a")
+flashprompt = (flashurl["href"])
+flashpromptnew = (flashurl["href"] + "?style=light")
+flashpromptpage = requests.get(flashpromptnew)
+flashpromptsoup = BeautifulSoup(flashpromptpage.content, "html.parser")
+flashprompttext = flashpromptsoup.find(class_="entry-content")
+flashtheprompt = flashprompttext.find("center")
+print("fan-flashworks (any, can’t post elsewhere until round is closed): \033[1m" + flashtheprompt.text.lower() + "\033[0m (" + flashprompt + ")\n")
+thefile.write("- [[" + flashprompt + "][fan-flashworks]] (any, can’t post elsewhere until round is closed): *" + flashtheprompt.text.lower() + "*\n")
+
+femslash = "https://femslashficlets.dreamwidth.org/tag/challenges?style=light&tag=challenges"
+femslashpage = requests.get(femslash)
+femslashsoup = BeautifulSoup(femslashpage.content, "html.parser")
+femslashprompts = femslashsoup.find_all("h3", string=lambda text: "challenge" in text.lower())
+femslashsubsoup = BeautifulSoup(str(femslashprompts[0]), "html.parser")
+femslashurl = femslashsubsoup.find("a")
+femslashprompt = (femslashurl["href"])
+femslashpromptnew = (femslashurl["href"] + "?style=light")
+femslashpromptpage = requests.get(femslashpromptnew)
+femslashpromptsoup = BeautifulSoup(femslashpromptpage.content, "html.parser")
+femslashprompttext = femslashpromptsoup.find(class_="entry-content")
+femslashtheprompt = femslashprompttext.find("i")
+print("femslash-ficlets (100–1000 words, F/F): \033[1m" + femslashtheprompt.text.lower() + "\033[0m (" + femslashprompt + ")\n")
+thefile.write("- [[" + femslashprompt + "][femslashficlets]] (100 words or a multiple of 100): *" + femslashtheprompt.text.lower() + "*\n")
+
+with requests.Session() as s:
+    response = s.post(login_url , data)
+    fffc = "https://fffc.dreamwidth.org/tag/!challenges?style=light&tag=%21challenges"
+    fffcpage = s.get(fffc)
+    fffcsoup = BeautifulSoup(fffcpage.content, "html.parser")
+    fffcmadnessprompts = fffcsoup.find_all("h3", string=lambda text: "froday madness" in text.lower())
+    fffcmadnesssubsoup = BeautifulSoup(str(fffcmadnessprompts[0]), "html.parser")
+    fffcmadnessurl = fffcmadnesssubsoup.find("a")
+    fffcmadnessprompt = (fffcmadnessurl["href"])
+    fffcmadnesspromptnew = (fffcmadnessurl["href"] + "?style=light")
+    fffcmadnesspromptpage = s.get(fffcmadnesspromptnew)
+    fffcmadnesspromptsoup = BeautifulSoup(fffcmadnesspromptpage.content, "html.parser")
+    fffcmadnessprompttext = fffcmadnesspromptsoup.find(class_="entry-content")
+    fffcmadnesstheprompt = fffcmadnessprompttext.find("center")
+    print("fffc madness (at least 2000 words): \033[1m" + fffcmadnesstheprompt.text.lower() + "\033[0m (" + fffcmadnessprompt + ")\n")
+    thefile.write("- [[" + fffcmadnessprompt + "][fffc madness]] (at least 2000 words): *" + fffcmadnesstheprompt.text.lower() + "*\n")
+    fffcregularprompts = fffcsoup.find_all("h3", string=lambda text: "regular challenge" in text.lower())
+    fffcregularsubsoup = BeautifulSoup(str(fffcregularprompts[0]), "html.parser")
+    fffcregularurl = fffcregularsubsoup.find("a")
+    fffcregularprompt = (fffcregularurl["href"])
+    fffcregularpromptnew = (fffcregularurl["href"] + "?style=light")
+    fffcregularpromptpage = s.get(fffcregularpromptnew)
+    fffcregularpromptsoup = BeautifulSoup(fffcregularpromptpage.content, "html.parser")
+    fffcregularprompttext = fffcregularpromptsoup.find(class_="entry-content")
+    fffcregulartheprompt = fffcregularprompttext.find("b")
+    print("fffc regular challenge (at least 100 words): \033[1m" + fffcregulartheprompt.text.lower() + "\033[0m (" + fffcregularprompt + ")\n")
+    thefile.write("- [[" + fffcregularprompt + "][fffc regular challenge]] (at least 100 words): *" + fffcregulartheprompt.text.lower() + "*\n")
+
+ficlet = "https://ficlet-zone.dreamwidth.org/tag/challenge+post?style=light&tag=challenge+post"
+ficletpage = requests.get(ficlet)
+ficletsoup = BeautifulSoup(ficletpage.content, "html.parser")
+ficletprompts = ficletsoup.find_all("h3", string=lambda text: "challenge" in text.lower())
+ficletsubsoup = BeautifulSoup(str(ficletprompts[0]), "html.parser")
+ficleturl = ficletsubsoup.find("a")
+ficletprompt = (ficleturl["href"])
+ficletpromptnew = (ficleturl["href"] + "?style=light")
+ficletpromptpage = requests.get(ficletpromptnew)
+ficletpromptsoup = BeautifulSoup(ficletpromptpage.content, "html.parser")
+ficletprompttext = ficletpromptsoup.find(class_="entry-content")
+ficlettheprompt = ficletprompttext.find("a")
+print("ficlet-zone (any): \033[1m" + ficlettheprompt.text.lower() + "\033[0m (" + ficletprompt + ")\n")
+thefile.write("- [[" + ficletprompt + "][ficlet-zone]] (any): *" + ficlettheprompt.text.lower() + "*\n")