43 lines
2.1 KiB
Lua
43 lines
2.1 KiB
Lua
-- TODO: Once the feed is sufficently long, we could make a bigger feed since the feed is only 20 articles long
|
|
local WEBSITE_NAME = "TechCrunch"
|
|
local WEBSITE_HOME = "https://techcrunch.com"
|
|
|
|
add_route("techCrunch", "/TechCrunch")
|
|
|
|
techCrunch = {}
|
|
function techCrunch.route(args)
|
|
local xml = get("https://techcrunch.com/feed/") -- Get the xml from the website
|
|
local rss_parser = Feed() -- Create a new instance of the Feed object
|
|
local feed = rss_parser:new(xml) -- Parse the xml into a feed object
|
|
|
|
local articles = feed.channel.articles -- Get all of the article objects
|
|
|
|
-- TODO: Add api to check if the articles are already in the database
|
|
local existing_articles = db:check_if_articles_in_feed_exist(feed) -- Get the missing articles from the database
|
|
|
|
log:debug("Fetching missing articles from the database")
|
|
for _, article in ipairs(articles) do
|
|
if existing_articles[article.guid.value] then
|
|
log:debug("Article already exists in the database: " .. article.title)
|
|
article.description = existing_articles[article.guid.value]
|
|
goto continue
|
|
end
|
|
|
|
log:debug("Getting article: " .. article.title .. " from " .. article.link)
|
|
|
|
local article_content = get(article.link) -- Get the entire article content
|
|
local html_parser = HtmlParser() -- Create a new instance of the html parser
|
|
html_parser:parse(article_content) -- Parse the article into an html tree
|
|
|
|
local elements = html_parser:select_element('.wp-block-post-content') -- Select the element with the class 'wp-block-post-content'
|
|
local element = elements
|
|
[1] -- String of the html from the element selected
|
|
article.description =
|
|
element -- Replace the description with the entire article
|
|
sleep(500)
|
|
::continue::
|
|
end
|
|
|
|
return feed:render(), feed
|
|
end
|