rsslair/scripts/electrek.lua

46 lines
1.5 KiB
Lua

add_route("electrek", "/electrek")
function electrek.route(args)
-- TODO: This should not be handled by lua
-- Golang needs to bind the database to lua
local rssFeed = rss:get("https://electrek.co/feed")
local entries = parse_xml_feed(rssFeed)
local newEntries = {}
local to_scrape = {}
-- TODO: Potentially remove this limit
for i = 1, 25 do
table.insert(to_scrape, entries[i])
end
-- Check if the selected entries have already been scraped
scraped, to_scrape = db:check(to_scrape)
for _, entry in ipairs(to_scrape) do
log.debug("Scraping: " .. entry:link())
local article = get(entry:link())
local post = html.new(article)
post:remove("header")
post:remove("script")
post:remove(".ad-disclaimer-container")
post:remove("#after_disclaimer_placement")
post:remove(".adsbygoogle")
post:remove(".google-news-link")
local content = post:select(".post-content")
entry:description(content)
db:insert(entry)
table.insert(newEntries, entry)
os.execute("sleep 0.25")
end
-- Fetch the scraped entries from the database
local localEntries = db:getRss(scraped)
-- Merge the two lists
newEntries = rss:merge(localEntries, newEntries)
-- Create a new rss feed from the merged list
local image = RssImage.new("Electrek", "https://electrek.co/favicon.ico", "https://electrek.co")
local feed = create_rss_feed("Electrek", "https://electrek.co", image, newEntries)
return feed
end