Add initial feed parser for techcrunch
- Able to replace rss content with entire article
This commit is contained in:
parent
2e558fb709
commit
c8686df817
|
|
@ -0,0 +1,26 @@
|
|||
local WEBSITE_NAME = "9to5mac"
|
||||
local WEBSITE_HOME = "https://9to5mac.com"
|
||||
|
||||
add_route("techCrunch", "/TechCrunch")
|
||||
techCrunch = {}
|
||||
function techCrunch.route(args)
|
||||
local xml = get("http://localhost:8081/feed.xml")
|
||||
local rss_parser = Feed()
|
||||
local feed = rss_parser:new(xml)
|
||||
|
||||
local articles = feed.channel.articles
|
||||
local article = articles[1]
|
||||
print('Article Title: ' .. article.title)
|
||||
print('Article Link: ' .. article.link)
|
||||
|
||||
local article_content = get(article.link)
|
||||
local html_parser = HtmlParser()
|
||||
html_parser:parse(article_content)
|
||||
|
||||
local elements = html_parser:select_element('.wp-block-post-content')
|
||||
print('Selected Elements')
|
||||
local element = elements[1]
|
||||
article.description = element
|
||||
|
||||
return feed:render()
|
||||
end
|
||||
|
|
@ -22,14 +22,19 @@ impl HtmlParser {
|
|||
|
||||
pub fn parse(&mut self, html: &str) {
|
||||
self.doc = Html::parse_document(html);
|
||||
let selector = Selector::parse("h1.wp-block-post-title");
|
||||
}
|
||||
|
||||
pub fn select_element(&self, selector: &str) -> Vec<String> {
|
||||
let selector = Selector::parse(selector);
|
||||
match selector {
|
||||
Ok(selector) => {
|
||||
let title = self.doc.select(&selector).next().unwrap();
|
||||
println!("Title: {}", title.text().collect::<String>());
|
||||
let elements: Vec<_> = self.doc.select(&selector).collect();
|
||||
println!("Found {} elements", elements.len());
|
||||
elements.iter().map(|x| x.html()).collect()
|
||||
}
|
||||
Err(e) => {
|
||||
println!("Error: {}", e);
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -64,6 +69,12 @@ impl UserData for HtmlParser {
|
|||
Ok(())
|
||||
},
|
||||
);
|
||||
methods.add_method(
|
||||
"select_element",
|
||||
|_, this, selector: String| -> Result<Vec<String>, mlua::Error> {
|
||||
Ok(this.select_element(&selector))
|
||||
},
|
||||
);
|
||||
methods.add_method_mut(
|
||||
"delete_element",
|
||||
|_, this, selector: String| -> Result<(), mlua::Error> {
|
||||
|
|
|
|||
|
|
@ -1,12 +1,7 @@
|
|||
use log::{debug, info};
|
||||
use std::borrow::BorrowMut;
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashMap;
|
||||
use std::process::exit;
|
||||
use std::rc::Rc;
|
||||
use std::sync::{Arc, Mutex, RwLock};
|
||||
use std::thread;
|
||||
use warp::Filter;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
mod html_parser;
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ impl Router {
|
|||
async move { dyn_reply(word, &s).await }
|
||||
}
|
||||
});
|
||||
warp::serve(routes).run(([127, 0, 0, 1], 3030)).await;
|
||||
warp::serve(routes).run(([0, 0, 0, 0], 3030)).await;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue