Update scraper for vocab-drabbles

master
trémeur 2 years ago
parent 3fec876617
commit d300f90298

@ -259,7 +259,7 @@ ficlettheprompt = ficletprompttext.find("a")
print("ficlet-zone (any): \033[1m" + ficlettheprompt.text.lower() + "\033[0m (" + ficletprompt + ")\n") print("ficlet-zone (any): \033[1m" + ficlettheprompt.text.lower() + "\033[0m (" + ficletprompt + ")\n")
thefile.write("- [[" + ficletprompt + "][ficlet-zone]] (any): *" + ficlettheprompt.text.lower() + "*\n") thefile.write("- [[" + ficletprompt + "][ficlet-zone]] (any): *" + ficlettheprompt.text.lower() + "*\n")
# slightly different format here, taking prompt from the post title # grabbing two different instances of the same tag within the post
vocab = "https://vocab-drabbles.dreamwidth.org/?style=light&tag=challenge" vocab = "https://vocab-drabbles.dreamwidth.org/?style=light&tag=challenge"
vocabpage = requests.get(vocab) vocabpage = requests.get(vocab)
vocabsoup = BeautifulSoup(vocabpage.content, "html.parser") vocabsoup = BeautifulSoup(vocabpage.content, "html.parser")
@ -270,6 +270,9 @@ vocabprompt = (vocaburl["href"])
vocabpromptnew = (vocaburl["href"] + "?style=light") vocabpromptnew = (vocaburl["href"] + "?style=light")
vocabpromptpage = requests.get(vocabpromptnew) vocabpromptpage = requests.get(vocabpromptnew)
vocabpromptsoup = BeautifulSoup(vocabpromptpage.content, "html.parser") vocabpromptsoup = BeautifulSoup(vocabpromptpage.content, "html.parser")
vocabtheprompt = vocabpromptsoup.find(class_="entry-title") vocablist = []
print("vocab-drabbles (50500 words): \033[1m" + vocabtheprompt.text.lower() + "\033[0m (" + vocabprompt + ")\n") for listitem in vocabpromptsoup.find_all("strong"):
thefile.write("- [[" + vocabprompt + "][vocab-drabbles]] (50500 words): *" + vocabtheprompt.text.lower() + "*\n") vocablist.append(listitem.text.lower())
vocabformat = "; ".join(vocablist)
print("vocab-drabbles (50500 words): \033[1m" + vocabformat + "\033[0m (" + vocabprompt + ")\n")
thefile.write("- [[" + vocabprompt + "][vocab-drabbles]] (50500 words): *" + vocabformat + "*\n")

Loading…
Cancel
Save