Document techcrunch lua script
This commit is contained in:
parent
c8686df817
commit
f22c2e443c
|
|
@ -1,26 +1,26 @@
|
||||||
local WEBSITE_NAME = "9to5mac"
|
local WEBSITE_NAME = "TechCrunch"
|
||||||
local WEBSITE_HOME = "https://9to5mac.com"
|
local WEBSITE_HOME = "https://techcrunch.com"
|
||||||
|
|
||||||
add_route("techCrunch", "/TechCrunch")
|
add_route("techCrunch", "/TechCrunch")
|
||||||
|
|
||||||
techCrunch = {}
|
techCrunch = {}
|
||||||
function techCrunch.route(args)
|
function techCrunch.route(args)
|
||||||
local xml = get("http://localhost:8081/feed.xml")
|
local xml = get("http://localhost:8081/feed.xml") -- Get an xml RSS feed
|
||||||
local rss_parser = Feed()
|
local rss_parser = Feed() -- Create a new instance of the Feed object
|
||||||
local feed = rss_parser:new(xml)
|
local feed = rss_parser:new(xml) -- Parse the xml into a feed object
|
||||||
|
|
||||||
local articles = feed.channel.articles
|
local articles = feed.channel.articles -- Get all of the article objects
|
||||||
local article = articles[1]
|
local article = articles[1] -- Get the first article object
|
||||||
print('Article Title: ' .. article.title)
|
log:info("Title: " .. article.title)
|
||||||
print('Article Link: ' .. article.link)
|
log:info("Description: " .. article.description)
|
||||||
|
|
||||||
local article_content = get(article.link)
|
local article_content = get(article.link) -- Get the entire article content
|
||||||
local html_parser = HtmlParser()
|
local html_parser = HtmlParser() -- Create a new instance of the html parser
|
||||||
html_parser:parse(article_content)
|
html_parser:parse(article_content) -- Parse the article into an html tree
|
||||||
|
|
||||||
local elements = html_parser:select_element('.wp-block-post-content')
|
local elements = html_parser:select_element('.wp-block-post-content') -- Select the element with the class 'wp-block-post-content'
|
||||||
print('Selected Elements')
|
local element = elements[1] -- String of the html from the element selected
|
||||||
local element = elements[1]
|
article.description = element -- Replace the description with the entire article
|
||||||
article.description = element
|
|
||||||
|
|
||||||
return feed:render()
|
return feed:render()
|
||||||
end
|
end
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
use log::debug;
|
use log::{debug, error};
|
||||||
use mlua::{UserData, UserDataMethods};
|
use mlua::{UserData, UserDataMethods};
|
||||||
|
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
|
@ -29,11 +29,11 @@ impl HtmlParser {
|
||||||
match selector {
|
match selector {
|
||||||
Ok(selector) => {
|
Ok(selector) => {
|
||||||
let elements: Vec<_> = self.doc.select(&selector).collect();
|
let elements: Vec<_> = self.doc.select(&selector).collect();
|
||||||
println!("Found {} elements", elements.len());
|
debug!("Found {} elements", elements.len());
|
||||||
elements.iter().map(|x| x.html()).collect()
|
elements.iter().map(|x| x.html()).collect()
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
println!("Error: {}", e);
|
error!("{}", e);
|
||||||
vec![]
|
vec![]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue