Generate RSS feed

2025-06-13 21:57:22 +01:00 · 2025-06-13 21:57:22 +01:00 · 6ac889da24
commit 6ac889da24
parent 19824ab16f
5 changed files with 113 additions and 3 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
 build/*
+feed.xml
 __pycache__/
-secrets.py
+secrets.py
+therss.py
--- a/build.sh
+++ b/build.sh
@ -2,6 +2,8 @@

 rm -rf build

+rm -f feed.xml
+
 mkdir build

 python3 identify.py
@ -11,6 +13,8 @@ for i in 20*.org; do
    emacs --batch -l pre.el $i -l export.el -f org-html-export-to-html --kill
 done

+python3 rssgen.py
+
 for i in *.html; do
    thebase="${i%%.*}"
    dirname=${thebase//"-"/}
@ -28,5 +32,7 @@ mv tempfile readme.org

 rclone copy build prazevps:/var/www/tre/public/notes -P

+rclone copyto feed.xml prazevps:/var/www/tre/public/feed.xml -P
+
 current_date=$(date +"%Y%m%d")
 curl -i -d "source=https://tre.praze.net/notes/$current_date&target=https://fed.brid.gy/" https://fed.brid.gy/webmention
--- a/identify.py
+++ b/identify.py
--- a/readme.org
+++ b/readme.org
@ -1,5 +1,4 @@
 =secrets.py= should contain ~orgpath = "full path to journal root directory"~.

 * to do
- rss feed
 - inject comments somehow (s’il y en a)
--- a/rssgen.py
+++ b/rssgen.py
@ -0,0 +1,42 @@
+import therss
+import re
+from bs4 import BeautifulSoup
+
+writerss = open("feed.xml","w")
+
+writerss.write("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<?xml-stylesheet href=\"feed.xsl\" type=\"text/xsl\"?>\n<rss xmlns:atom=\"http://www.w3.org/2005/Atom\" version=\"2.0\">\n  <channel>\n    <atom:link href=\"https://tre.praze.net/feed.xml\" rel=\"self\" type=\"application/rss+xml\"/>\n    <title>tre.praze.net</title>\n    <link>https://tre.praze.net</link>\n    <description>A feed for general updates at tre.praze.net</description>\n    <language>en-gb</language>\n")
+
+rssno = len(therss.rss)
+
+for entry in therss.rss:
+    writerss.write("    <item>\n")
+    try:
+        if entry["real"]:
+            writerss.write("      <title>" + entry["real"] + "</title>\n")
+    except:
+        writerss.write("      <title>" + entry["title"] + "</title>\n")
+    writerss.write("      <pubDate>" + entry["rssdatetime"] + "</pubDate>\n      <link>" + entry["url"] + "</link>\n      <guid isPermaLink=\"false\">tre" + str(rssno) + "</guid>\n      <description>")
+    if entry["desc"] == "placeholder":
+        htmlfile = open(entry["date"] + ".html","r")
+        soup = BeautifulSoup(htmlfile,features="lxml")
+        thepost = soup.find("div",class_="e-content")
+        try:
+            thetitle = soup.find("h1",class_="p-name")
+            writerss.write("<![CDATA[" + str(thetitle) + re.sub("href=\".\"",("href=\"" + entry["url"] + "\""),re.sub("src=\"/","src=\"https://tre.praze.net/",re.sub("href=\"/","href=\"https://tre.praze.net/",re.sub("\"/>","\">",re.sub("\n","",str(thepost)))))) + "]]>")
+        except:
+            writerss.write("<![CDATA[" + re.sub("href=\".\"",("href=\"" + entry["url"] + "\""),re.sub("src=\"/","src=\"https://tre.praze.net/",re.sub("href=\"/","href=\"https://tre.praze.net/",re.sub("\"/>","\">",re.sub("\n","",str(thepost)))))) + "]]>")
+    else:
+        writerss.write(entry["desc"])
+    writerss.write("</description>\n")
+    try:
+        for category in entry["categories"]:
+            writerss.write("      <category>" + category + "</category>\n")
+    except:
+        pass
+    writerss.write("    </item>\n")
+    rssno -= 1
+
+writerss.write("  </channel>\n</rss>")
+
+writerss.close()
+