Fix FFA page 1 scraping
This commit is contained in:
parent
55b4492954
commit
3c5945b87e
1 changed files with 2 additions and 5 deletions
|
@ -140,10 +140,7 @@ ffaprompttext = ffapromptsoup.find(id="comments")
|
||||||
ffaresoup = BeautifulSoup(str(ffaprompttext), "html.parser")
|
ffaresoup = BeautifulSoup(str(ffaprompttext), "html.parser")
|
||||||
ffatheprompt = ffaresoup.find_all("h4",text=True)
|
ffatheprompt = ffaresoup.find_all("h4",text=True)
|
||||||
ffacent = []
|
ffacent = []
|
||||||
for each in ffatheprompt:
|
i = 1
|
||||||
if "100 words of" in (str(each.get_text())):
|
|
||||||
ffacent.append(str(each.get_text()))
|
|
||||||
i = 2
|
|
||||||
while i < 8:
|
while i < 8:
|
||||||
ffapromptnew = (ffaprompt + "?page=" + str(i) + "&style=light")
|
ffapromptnew = (ffaprompt + "?page=" + str(i) + "&style=light")
|
||||||
ffapromptpage = requests.get(ffapromptnew)
|
ffapromptpage = requests.get(ffapromptnew)
|
||||||
|
@ -153,7 +150,7 @@ while i < 8:
|
||||||
ffatheprompt = ffaresoup.find_all("h4",text=True)
|
ffatheprompt = ffaresoup.find_all("h4",text=True)
|
||||||
for each in ffatheprompt:
|
for each in ffatheprompt:
|
||||||
if "100 words of" in (str(each.get_text())) or "100 Words of" in (str(each.get_text())) or "100 Words Of" in (str(each.get_text())):
|
if "100 words of" in (str(each.get_text())) or "100 Words of" in (str(each.get_text())) or "100 Words Of" in (str(each.get_text())):
|
||||||
if "Re:" not in (str(each.get_text())):
|
if "Re:" not in (str(each.get_text())) and "catch-up" not in (str(each.get_text())):
|
||||||
ffacent.append(str(each.get_text()))
|
ffacent.append(str(each.get_text()))
|
||||||
i += 1
|
i += 1
|
||||||
if ffacent:
|
if ffacent:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue