rsslair/scripts/techcrunch.lua

43 lines
2.1 KiB
Lua

-- TODO: Once the feed is sufficently long, we could make a bigger feed since the feed is only 20 articles long
local WEBSITE_NAME = "TechCrunch"
local WEBSITE_HOME = "https://techcrunch.com"
add_route("techCrunch", "/TechCrunch")
techCrunch = {}
function techCrunch.route(args)
local xml = get("https://techcrunch.com/feed/") -- Get the xml from the website
local rss_parser = Feed() -- Create a new instance of the Feed object
local feed = rss_parser:new(xml) -- Parse the xml into a feed object
local articles = feed.channel.articles -- Get all of the article objects
-- TODO: Add api to check if the articles are already in the database
local existing_articles = db:check_if_articles_in_feed_exist(feed) -- Get the missing articles from the database
log:debug("Fetching missing articles from the database")
for _, article in ipairs(articles) do
if existing_articles[article.guid.value] then
log:debug("Article already exists in the database: " .. article.title)
article.description = existing_articles[article.guid.value]
goto continue
end
log:debug("Getting article: " .. article.title .. " from " .. article.link)
local article_content = get(article.link) -- Get the entire article content
local html_parser = HtmlParser() -- Create a new instance of the html parser
html_parser:parse(article_content) -- Parse the article into an html tree
local elements = html_parser:select_element('.wp-block-post-content') -- Select the element with the class 'wp-block-post-content'
local element = elements
[1] -- String of the html from the element selected
article.description =
element -- Replace the description with the entire article
sleep(500)
::continue::
end
return feed:render(), feed
end