import therss import re from bs4 import BeautifulSoup writerss = open("feed.xml","w") writerss.write("\n\n\n \n \n tre.praze.net\n https://tre.praze.net\n A feed for general updates at tre.praze.net\n en-gb\n") rssno = len(therss.rss) for entry in therss.rss: writerss.write(" \n") try: if entry["real"]: writerss.write(" " + entry["real"] + "\n") except: writerss.write(" " + entry["title"] + "\n") writerss.write(" " + entry["rssdatetime"] + "\n " + entry["url"] + "\n tre" + str(rssno) + "\n ") if entry["desc"] == "placeholder": htmlfile = open(entry["date"] + ".html","r") soup = BeautifulSoup(htmlfile,features="lxml") thepost = soup.find("div",class_="e-content") try: thetitle = soup.find("h1",class_="p-name") writerss.write("","\">",re.sub("\n","",str(thepost)))))) + "]]>") except: writerss.write("","\">",re.sub("\n","",str(thepost)))))) + "]]>") else: writerss.write(entry["desc"]) writerss.write("\n") try: for category in entry["categories"]: writerss.write(" " + category + "\n") except: pass writerss.write(" \n") rssno -= 1 writerss.write(" \n") writerss.close()