46 lines
1.6 KiB
Lua
46 lines
1.6 KiB
Lua
add_route("Nineto5google", "/9to5google")
|
|
function Nineto5google.route(args)
|
|
-- TODO: This should not be handled by lua
|
|
-- Golang needs to bind the database to lua
|
|
local rssFeed = rss:get("https://9to5google.com/feed/")
|
|
local entries = parse_xml_feed(rssFeed)
|
|
|
|
local newEntries = {}
|
|
local to_scrape = {}
|
|
|
|
-- TODO: Potentially remove this limit
|
|
for i = 1, 25 do
|
|
table.insert(to_scrape, entries[i])
|
|
end
|
|
|
|
-- Check if the selected entries have already been scraped
|
|
scraped, to_scrape = db:check(to_scrape)
|
|
for _, entry in ipairs(to_scrape) do
|
|
log.debug("Scraping: " .. entry:link())
|
|
local article = get(entry:link())
|
|
local post = html.new(article)
|
|
post:remove("header")
|
|
post:remove("script")
|
|
post:remove(".ad-disclaimer-container")
|
|
post:remove("#after_disclaimer_placement")
|
|
post:remove(".adsbygoogle")
|
|
post:remove(".google-news-link")
|
|
|
|
local content = post:select(".post-content")
|
|
entry:description(content)
|
|
|
|
db:insert(entry)
|
|
table.insert(newEntries, entry)
|
|
|
|
os.execute("sleep 0.25")
|
|
end
|
|
-- Fetch the scraped entries from the database
|
|
local localEntries = db:getRss(scraped)
|
|
-- Merge the two lists
|
|
newEntries = rss:merge(localEntries, newEntries)
|
|
-- Create a new rss feed from the merged list
|
|
local image = RssImage.new("9to5google", "https://9to5google.com/favicon.ico", "https://9to5google.com")
|
|
local feed = create_rss_feed("9to5google", "https://9to5google.com", image, newEntries)
|
|
return feed
|
|
end
|