Add initial feed parser for techcrunch

- Able to replace rss content with entire article
This commit is contained in:
Christopher Williams 2024-09-22 12:34:14 -04:00
parent 2e558fb709
commit c8686df817
4 changed files with 41 additions and 9 deletions

26
scripts/techcrunch.lua Normal file
View File

@ -0,0 +1,26 @@
local WEBSITE_NAME = "9to5mac"
local WEBSITE_HOME = "https://9to5mac.com"
add_route("techCrunch", "/TechCrunch")
techCrunch = {}
function techCrunch.route(args)
local xml = get("http://localhost:8081/feed.xml")
local rss_parser = Feed()
local feed = rss_parser:new(xml)
local articles = feed.channel.articles
local article = articles[1]
print('Article Title: ' .. article.title)
print('Article Link: ' .. article.link)
local article_content = get(article.link)
local html_parser = HtmlParser()
html_parser:parse(article_content)
local elements = html_parser:select_element('.wp-block-post-content')
print('Selected Elements')
local element = elements[1]
article.description = element
return feed:render()
end

View File

@ -22,14 +22,19 @@ impl HtmlParser {
pub fn parse(&mut self, html: &str) {
self.doc = Html::parse_document(html);
let selector = Selector::parse("h1.wp-block-post-title");
}
pub fn select_element(&self, selector: &str) -> Vec<String> {
let selector = Selector::parse(selector);
match selector {
Ok(selector) => {
let title = self.doc.select(&selector).next().unwrap();
println!("Title: {}", title.text().collect::<String>());
let elements: Vec<_> = self.doc.select(&selector).collect();
println!("Found {} elements", elements.len());
elements.iter().map(|x| x.html()).collect()
}
Err(e) => {
println!("Error: {}", e);
vec![]
}
}
}
@ -64,6 +69,12 @@ impl UserData for HtmlParser {
Ok(())
},
);
methods.add_method(
"select_element",
|_, this, selector: String| -> Result<Vec<String>, mlua::Error> {
Ok(this.select_element(&selector))
},
);
methods.add_method_mut(
"delete_element",
|_, this, selector: String| -> Result<(), mlua::Error> {

View File

@ -1,12 +1,7 @@
use log::{debug, info};
use std::borrow::BorrowMut;
use std::cell::RefCell;
use std::collections::HashMap;
use std::process::exit;
use std::rc::Rc;
use std::sync::{Arc, Mutex, RwLock};
use std::thread;
use warp::Filter;
use once_cell::sync::Lazy;
mod html_parser;

View File

@ -80,7 +80,7 @@ impl Router {
async move { dyn_reply(word, &s).await }
}
});
warp::serve(routes).run(([127, 0, 0, 1], 3030)).await;
warp::serve(routes).run(([0, 0, 0, 0], 3030)).await;
}
}