the-hacker-news-rss/src/ef/xml-download.py

102 lines
2.7 KiB
Python

import requests
import xml.etree.ElementTree as ET
URL = "https://feeds.feedburner.com/TheHackersNews?format=xml"
FEED_FILE = "feed.xml"
def downloadRSS():
response = requests.get(URL)
with open(FEED_FILE, 'wb') as file:
file.write(response.content)
def parseXML(xmlfile):
# create element tree object
tree = ET.parse(xmlfile)
# get root element
root = tree.getroot()
# create empty list for news items
newsitems = []
# iterate news items
for item in root.findall('./channel/item'):
# empty news dictionary
news = {}
# iterate child elements of item
for child in item:
# # special checking for namespace object content:media
if child.tag == '{http://search.yahoo.com/mrss/}content':
news['media'] = child.attrib['url']
else:
if child.text is not None:
news[child.tag] = child.text.encode('utf8')
# append news dictionary to news items list
newsitems.append(news)
# return news items list
return newsitems
def saveToHTML(articles):
# create news article list
with open("news_scroll.html", "w") as news:
news.write('\
<!doctype html>\n\
<html lang="en">\n\
<head>\n\
<meta charset="utf-8" />\n\
<title>News</title>\n\
<base target="_parent" />\n\
<link rel="stylesheet" href="news_scroll.css" type="text/css" />\n\
<script src="news_scroll.js" type="text/javascript"></script> \n\
</head>\n\
<body id="news-scroll-body" color-theme="terminal" class="news-scroll" onMouseover="scrollspeed=0" onMouseout="scrollspeed=current" OnLoad="NewsScrollStart();">\n\
<!-- START NEWS FEED -->\n\
<div id="NewsDiv">\n\
<div class="scroll-text-if">\n\
<div id="news-container">\n')
# for each article, create an element
for article in articles:
news.write('\
<a href={uri}>\n\
<div class="news-article">\n\
<h2>\n\
{title}\n\
</h2>\n\
<p>{content}...</p>\n\
</div>\n\
</a>\n\
'.format(title = article["title"].decode('utf-8'), content = article["description"].decode('utf-8'), uri = article["link"].decode('utf-8')))
news.write('\
</div>\n\
</div>\n\
</div>\n\
</body>\n\
</html>\
')
news.write('')
return
def main():
print("Downloading RSS...")
downloadRSS()
print("RSS downloaded.")
print("Parsing RSS...")
articles = parseXML(FEED_FILE)
print("RSS parsed into", len(articles), "articles.")
print("Putting into HTML...")
saveToHTML(articles)
print("Put in HTML.")
if __name__ == "__main__":
main()