Add database functionality

- Articles are saved after their route is executed
- Add test for database retrieval from lua
This commit is contained in:
Christopher Williams 2024-09-24 20:19:25 -04:00
parent f22c2e443c
commit 4dbd73e1d3
8 changed files with 498 additions and 60 deletions

49
Cargo.lock generated
View File

@ -551,6 +551,18 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "fallible-iterator"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
[[package]]
name = "fallible-streaming-iterator"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "fastrand"
version = "2.1.1"
@ -791,6 +803,18 @@ name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
]
[[package]]
name = "hashlink"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af"
dependencies = [
"hashbrown 0.14.5",
]
[[package]]
name = "headers"
@ -1211,6 +1235,16 @@ version = "0.2.158"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
[[package]]
name = "libsqlite3-sys"
version = "0.30.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149"
dependencies = [
"pkg-config",
"vcpkg",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.14"
@ -2040,6 +2074,7 @@ dependencies = [
"quick-xml",
"redis",
"reqwest",
"rusqlite",
"scraper",
"serde",
"serde_json",
@ -2050,6 +2085,20 @@ dependencies = [
"xml",
]
[[package]]
name = "rusqlite"
version = "0.32.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7753b721174eb8ff87a9a0e799e2d7bc3749323e773db92e0984debb00019d6e"
dependencies = [
"bitflags 2.6.0",
"fallible-iterator",
"fallible-streaming-iterator",
"hashlink",
"libsqlite3-sys",
"smallvec 1.13.2",
]
[[package]]
name = "rustc-demangle"
version = "0.1.24"

View File

@ -22,6 +22,7 @@ once_cell = "1.19.0"
quick-xml = { version = "0.36.1", features = ["serialize"]}
redis = { version = "0.27.2", features = ["tokio-comp", "aio"] }
reqwest = { version = "0.12.7", features = ["blocking"] }
rusqlite = "0.32.1"
scraper = "0.20.0"
serde = { version = "1.0.130", features = ["derive", "rc"]}
serde_json = "1.0.128"

229
src/database.rs Normal file
View File

@ -0,0 +1,229 @@
use std::rc::Rc;
use log::{debug, error, trace};
use mlua::{Lua, Value, FromLua, UserData, UserDataMethods};
use rusqlite::{params, Connection};
use crate::{implement_from_lua, rss_parser::Item as Article};
#[derive(Debug, Clone)]
pub struct Database {
conn: Rc<Connection>,
}
impl Database {
pub fn new() -> Database {
//TODO: Handle all of these unwrap calls
let conn = Connection::open("database.db").unwrap();
conn.execute(
"CREATE TABLE IF NOT EXISTS feeds (
id INTEGER PRIMARY KEY,
url TEXT NOT NULL,
title TEXT NOT NULL,
last_updated TEXT NOT NULL
)",
params![],
).unwrap();
conn.execute(
"CREATE TABLE IF NOT EXISTS images (
url TEXT PRIMARY KEY,
title TEXT NOT NULL,
link TEXT NOT NULL,
width INTEGER NOT NULL,
height INTEGER NOT NULL
)",
params![],
).unwrap();
conn.execute(
"CREATE TABLE IF NOT EXISTS articles (
id INTEGER PRIMARY KEY,
guid TEXT UNIQUE NOT NULL,
creator TEXT,
categories TEXT,
feed_id INTEGER NOT NULL,
title TEXT NOT NULL,
link TEXT NOT NULL,
pub_date TEXT NOT NULL,
description TEXT NOT NULL
)",
params![],
).unwrap();
//Ok(Self {
// conn: Rc::new(conn),
//})
Self {
conn: Rc::new(conn),
}
}
pub fn new_with_conn(conn: Connection) -> Database {
conn.execute(
"CREATE TABLE IF NOT EXISTS feeds (
id INTEGER PRIMARY KEY,
url TEXT NOT NULL,
title TEXT NOT NULL,
last_updated TEXT NOT NULL
)",
params![],
).unwrap();
conn.execute(
"CREATE TABLE IF NOT EXISTS images (
url TEXT PRIMARY KEY,
title TEXT NOT NULL,
link TEXT NOT NULL,
width INTEGER NOT NULL,
height INTEGER NOT NULL
)",
params![],
).unwrap();
conn.execute(
"CREATE TABLE IF NOT EXISTS articles (
id INTEGER PRIMARY KEY,
guid TEXT UNIQUE NOT NULL,
creator TEXT,
categories TEXT,
feed_id INTEGER NOT NULL,
title TEXT NOT NULL,
link TEXT NOT NULL,
pub_date TEXT NOT NULL,
description TEXT NOT NULL
)",
params![],
).unwrap();
Self {
conn: Rc::new(conn),
}
}
pub fn insert_feed(&self, url: &str, title: &str, last_updated: &str) -> Result<usize, Box<dyn std::error::Error>> {
Ok(self.conn.execute(
"INSERT INTO feeds (url, title, last_updated) VALUES (?1, ?2, ?3)",
params![url, title, last_updated],
)?)
}
/// Insert an article into the database
///
/// # Arguments
///
/// * `article` - The article to insert
///
/// # Returns
///
/// * The number of rows inserted
pub fn insert_article(
&self,
article: &Article
) -> Result<usize, Box<dyn std::error::Error>> {
let title = article.title.as_ref().unwrap();
let guid = article.guid.as_ref().unwrap().value.clone();
let ret = self.conn.execute(
"INSERT INTO articles (guid, creator, categories, feed_id, title, link, pub_date, description) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
params![
guid,
article.creator,
article.categories.join(","),
1,
title,
article.link,
article.pub_date,
article.description,
],
);
match ret {
Ok(_) => Ok(ret?),
Err(e) => {
error!("Error inserting {} into database: {}", title, e);
Err(e.into())
}
}
}
pub fn get_article_by_guid(&self, guid: &str) -> mlua::Result<Option<Article>> {
//TODO: Make this look better
let mut stmt = match self.conn.prepare("SELECT * FROM articles WHERE guid = ?1") {
Ok(it) => it,
Err(err) => {
let var_name = Err(err);
return var_name.unwrap();
},
};
let mut rows = match stmt.query(params![guid]) {
Ok(it) => it,
Err(err) => {
let var_name = Err(err);
return var_name.unwrap();
},
};
let row = match rows.next() {
Ok(it) => it,
Err(err) => {
let var_name = Err(err);
return var_name.unwrap();
},
};
let row = match row {
Some(it) => it,
None => return Ok(None),
};
trace!("Row: {:?}", row);
let article = Article {
title: Some(row.get(5).unwrap_or("".to_string())),
guid: Some(crate::rss_parser::Guid {
value: Some(row.get(1).unwrap_or("".to_string())),
is_perma_link: Some(false),
}),
creator: Some(row.get(2).unwrap_or("".to_string())),
categories: row.get(3).unwrap_or("".to_string()).split(",").map(|s| s.to_string()).collect(),
link: Some(row.get(6).unwrap_or("".to_string())),
pub_date: Some(row.get(7).unwrap_or("".to_string())),
description: Some(row.get(8).unwrap_or("".to_string())),
};
debug!("Article: {:?}", article);
Ok(Some(article))
}
//pub fn does_article_exist(&self, guid: &str) -> mlua::Result<bool> {
// let mut stmt = self.conn.prepare("SELECT * FROM articles WHERE guid = ?1")?;
// let mut rows = stmt.query(params![guid])?;
// let row = rows.next()?;
// match row {
// Some(_) => Ok(true),
// None => Ok(false),
// }
//}
}
impl FromLua<'_> for Database {
fn from_lua(value: Value, _: &'_ Lua) -> mlua::Result<Self> {
match value {
Value::UserData(ud) => {
let db = ud.borrow::<Database>()?;
Ok(db.clone())
}
_ => Err(mlua::Error::FromLuaConversionError {
from: value.type_name(),
to: "Database",
message: Some("expected Database".to_string()),
}),
}
}
}
impl UserData for Database {
fn add_methods<'lua, M: UserDataMethods<'lua, Self>>(methods: &mut M) {
methods.add_method("get_article_by_guid", |_, this, guid: String| -> Result<Article, mlua::Error> {
match this.get_article_by_guid(&guid) {
Ok(Some(article)) => Ok(article),
Ok(None) => Err(mlua::Error::RuntimeError("Article not found".to_string())),
Err(e) => Err(mlua::Error::RuntimeError(e.to_string())),
}
});
//methods.add_method("does_article_exist", |_, this, guid: String| -> Result<Result<bool, _>, mlua::Error> {
// Ok(this.does_article_exist(&guid))
//});
}
}

View File

@ -9,6 +9,7 @@ mod router;
mod rss_parser;
mod scheduler;
mod scripting;
mod database;
static REDIS: Lazy<Mutex<redis::Connection>> = Lazy::new(|| {
let client = redis::Client::open("redis://127.0.0.1").unwrap();

View File

@ -56,8 +56,14 @@ impl Router {
match response {
Ok(response) => {
debug!("Response from Redis: {}", response);
let resp = warp::http::Response::builder()
.header("Content-Type", "text/xml")
.status(StatusCode::OK)
.body(response.clone())
.unwrap();
Ok(Box::new(warp::reply::with_status(
response,
resp,
StatusCode::OK,
)))
}
@ -84,24 +90,3 @@ impl Router {
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_router() {
let mut router = Router::new(8000);
router.add_route("/".to_string());
router.add_route("/about".to_string());
router.add_route("/contact".to_string());
assert_eq!(
router.get_routes(),
&vec![
"/".to_string(),
"/about".to_string(),
"/contact".to_string()
]
);
}
}

View File

@ -1,6 +1,6 @@
use std::{cell::RefCell, rc::Rc};
use log::debug;
use log::{debug, error};
use mlua::{
AnyUserDataExt, FromLua, IntoLua, Lua, Result, Table, UserData, UserDataFields,
UserDataMethods, Value,
@ -70,6 +70,7 @@ macro_rules! add_field_method_set_rc_option {
};
}
#[macro_export]
macro_rules! implement_from_lua {
($type:ty) => {
impl FromLua<'_> for $type {
@ -88,10 +89,22 @@ macro_rules! implement_from_lua {
#[serde(rename = "rss")]
pub struct Rss {
#[serde(rename = "channel")]
channel: Rc<RefCell<Channel>>,
pub channel: Rc<RefCell<Channel>>,
}
impl FromLua<'_> for Rss {
fn from_lua(value: Value, _: &Lua) -> Result<Self> {
match value {
Value::UserData(ud) => Ok(ud.borrow::<Self>()?.clone()),
_ => {
error!("Expected (string, Feed) tuple, received: {:?}", value);
error!("Maybe the route function is not returning both");
Err(mlua::Error::RuntimeError("Expected (string, Feed) tuple".to_string()))
}
}
}
}
implement_from_lua!(Rss);
impl UserData for Rss {
fn add_fields<'lua, F: UserDataFields<'lua, Self>>(fields: &mut F) {
add_field_method_get_set_rc!(fields, "channel", channel, Channel);
@ -105,23 +118,23 @@ impl UserData for Rss {
#[derive(Default, Debug, Deserialize, Serialize, Clone)]
pub struct Channel {
title: Option<String>,
pub title: Option<String>,
#[serde(rename = "atom_link")]
atom_link: Option<Rc<RefCell<AtomLink>>>,
pub atom_link: Option<Rc<RefCell<AtomLink>>>,
#[serde(rename = "link")]
link: Option<String>,
description: Option<String>,
pub link: Option<String>,
pub description: Option<String>,
#[serde(rename = "lastBuildDate")]
last_build_date: Option<String>,
language: Option<String>,
pub last_build_date: Option<String>,
pub language: Option<String>,
#[serde(rename = "sy:updatePeriod")]
update_period: Option<String>,
pub update_period: Option<String>,
#[serde(rename = "sy:updateFrequency")]
update_frequency: Option<String>,
generator: Option<String>,
image: Option<Rc<RefCell<Image>>>,
pub update_frequency: Option<String>,
pub generator: Option<String>,
pub image: Option<Rc<RefCell<Image>>>,
#[serde(rename = "item", default)]
items: Vec<Rc<RefCell<Item>>>,
pub items: Vec<Rc<RefCell<Item>>>,
}
implement_from_lua!(Channel);
@ -182,7 +195,7 @@ impl UserData for AtomLink {
}
#[derive(Default, Debug, Deserialize, Serialize, Clone)]
struct Image {
pub struct Image {
url: Option<String>, // Option for missing fields
title: Option<String>,
link: Option<String>,
@ -208,16 +221,41 @@ impl UserData for Image {
/// Lua object representing an RSS item as Article
#[derive(Default, Debug, Deserialize, Serialize, Clone)]
pub struct Item {
title: Option<String>, // Marked Option
link: Option<String>,
pub title: Option<String>, // Marked Option
pub link: Option<String>,
#[serde(rename = "dc:creator")]
creator: Option<String>,
pub creator: Option<String>,
#[serde(rename = "pubDate")]
pub_date: Option<String>,
pub pub_date: Option<String>,
#[serde(rename = "category", default)]
categories: Vec<String>,
guid: Option<Guid>,
description: Option<String>,
pub categories: Vec<String>,
pub guid: Option<Guid>,
pub description: Option<String>,
}
impl Item {
pub fn new(
guid: &str,
creator: &str,
categories: &str,
title: &str,
link: &str,
pub_date: &str,
description: &str,
) -> Item {
Item {
title: Some(title.to_string()),
link: Some(link.to_string()),
creator: Some(creator.to_string()),
pub_date: Some(pub_date.to_string()),
categories: categories.split(',').map(|s| s.to_string()).collect(),
guid: Some(Guid {
is_perma_link: Some(true),
value: Some(guid.to_string()),
}),
description: Some(description.to_string()),
}
}
}
implement_from_lua!(Item);
@ -238,11 +276,11 @@ impl UserData for Item {
}
#[derive(Default, Debug, Deserialize, Serialize, Clone)]
struct Guid {
pub struct Guid {
#[serde(rename = "isPermaLink")]
is_perma_link: Option<bool>,
pub is_perma_link: Option<bool>,
#[serde(rename = "$value")]
value: Option<String>,
pub value: Option<String>,
}
implement_from_lua!(Guid);

View File

@ -3,25 +3,36 @@ use chrono::prelude::*;
use chrono::Utc;
use cron::Schedule;
use log::debug;
use log::error;
use log::trace;
use redis::Commands;
use std::str::FromStr;
use std::thread;
use crate::database::Database;
use crate::scripting::Scripting;
use crate::REDIS;
pub struct Scheduler {
engine: Scripting,
db: Database,
}
impl Scheduler {
pub fn new(scripting: Scripting) -> Result<Self> {
Ok(Self {
engine: scripting
engine: scripting,
db: Database::new(),
})
}
pub fn new_with_db(scripting: Scripting, db: Database) -> Self {
Self {
engine: scripting,
db,
}
}
/// Execute all of the routes that are registered
fn execute_all_routes(&mut self) -> Result<()> {
let routes = self.engine.get_routes().unwrap();
@ -30,9 +41,33 @@ impl Scheduler {
for (name, route) in routes {
debug!("Executing route: {}", name);
let result = self.engine.execute_route_function(&route, &name)?;
let ret = match self.engine.execute_route_function(&route, &name) {
Ok(ret) => ret,
Err(_) => {
error!("Removing misbehaving route: {}", name);
self.engine.remove_route(&name)?;
continue;
}
};
let result = ret.0;
let feed = ret.1;
trace!("Route {} executed with result: {}", name, result);
let _: () = REDIS.lock().unwrap().set(name, result)?;
//TODO: Put on another thread?
debug!("Inserting articles into database");
let channel = feed.channel;
let articles = &channel.borrow().items;
for article in articles {
let article = article.borrow();
match self.db.insert_article(&article) {
Ok(_) => (),
Err(_) => continue
}
}
debug!("Finished inserting articles into database");
}
Ok(())
@ -42,18 +77,30 @@ impl Scheduler {
pub fn schedule(&mut self, cron_expr: &str) -> Result<()> {
let schedule = Schedule::from_str(cron_expr).unwrap();
while let Some(next) = schedule.upcoming(Utc).next() {
debug!("Scheduled task has started running at {}", Local::now());
self.execute_all_routes()?;
debug!("Successfully executed all routes at {}", Local::now());
// Calculate the delta between now and the next scheduled run
let local: DateTime<Local> = DateTime::from(next);
let now = Utc::now();
let until = next - now;
let sleep_time = until.to_std().unwrap();
let sleep_time = match until.to_std() {
Ok(duration) => duration,
Err(_) => {
error!("Missed schedule, scheduling next run");
error!("A script took too long to run, consider increasing the interval or there may be a misbehaving script");
let next = schedule.upcoming(Utc).next().unwrap();
let until = next - now;
until.to_std().unwrap()
}
};
debug!("Next run UTC: {:?}", next);
debug!("Next run local: {:?}", local);
thread::sleep(sleep_time);
self.execute_all_routes()?;
println!("Scheduled task ran at {}", Local::now());
}
Ok(())
}

View File

@ -1,8 +1,9 @@
use crate::database::Database;
use crate::{html_parser::HtmlParser, rss_parser::AtomLink};
use crate::router;
use crate::rss_parser::{Channel, Item, Rss};
use log::{debug, error, info, warn};
use mlua::{chunk, ExternalResult, Function, Lua, MetaMethod, Result, Table, UserData, UserDataMethods};
use mlua::{chunk, AsChunk, Chunk, ExternalResult, Function, Lua, MetaMethod, Result, Table, UserData, UserDataMethods};
use std::cell::RefCell;
use std::collections::HashMap;
use std::rc::Rc;
@ -10,12 +11,22 @@ use std::sync::{Arc, Mutex, RwLock};
pub struct Scripting {
lua: Lua,
db: Database,
}
impl Scripting {
pub fn new() -> Self {
let db = Database::new();
Self {
lua: Lua::new(),
db,
}
}
pub fn new_with_db(db: Database) -> Self {
Self {
lua: Lua::new(),
db,
}
}
@ -44,6 +55,17 @@ impl Scripting {
self.lua.load(script).exec()
}
pub fn load_script_with_return(&self, script: &str) -> Result<String> {
let result = self.lua.load(script).eval::<String>();
match result {
Ok(result) => Ok(result),
Err(err) => {
error!("Error loading script: {}", err);
Err(err)
}
}
}
pub fn load_globals(&self) -> Result<()> {
macro_rules! add_constructor {
($name:literal, $constructor:ident) => {
@ -52,6 +74,13 @@ impl Scripting {
};
}
macro_rules! add_constructor_new {
($name:literal, $constructor:ident) => {
let constructor = self.lua.create_function(|_, ()| Ok($constructor::new()))?;
self.lua.globals().set($name, constructor)?;
};
}
macro_rules! add_global {
($name:literal, $value:expr) => {
self.lua.globals().set($name, $value)?;
@ -80,12 +109,18 @@ impl Scripting {
debug!("Response: {:?}", response);
Ok(response.text().unwrap())
})?);
add_global!("sleep", self.lua.create_function(|_, milliseconds: u64| {
debug!("Sleeping for {} milliseconds", milliseconds);
std::thread::sleep(std::time::Duration::from_millis(milliseconds));
Ok(())
})?);
add_constructor!("HtmlParser", HtmlParser);
add_constructor!("Feed", Rss);
add_constructor!("Channel", Channel);
add_constructor!("AtomLink", AtomLink);
add_constructor!("Article", Item);
add_constructor_new!("Database", Database);
self.lua.globals().set("routes", self.lua.create_table()?)?;
let add_route = chunk! {
@ -107,12 +142,12 @@ impl Scripting {
/// # Arguments
///
/// * `route_name` - The name of the route to execute
pub fn execute_route_function(&self, obj_name: &str, route_name: &str) -> Result<String> {
pub fn execute_route_function(&self, obj_name: &str, route_name: &str) -> Result<(String, Rss)> {
let route_obj: Table = match self.lua.globals().get(obj_name) {
Ok(route) => route,
Err(_) => {
error!("Object {} does not exist", obj_name);
return Ok("".to_string());
return Ok(("".to_string(), Rss::default()));
}
};
let route_func: Function = match route_obj.get("route") {
@ -121,14 +156,15 @@ impl Scripting {
error!("Route {} does not have a route function", obj_name);
error!("Removing route {}", obj_name);
self.remove_route(route_name)?;
return Ok("".to_string());
return Ok(("".to_string(), Rss::default()));
}
};
let result = match route_func.call::<_, String>(()) {
let result = match route_func.call::<_, (String, Rss)>(()) {
Ok(result) => result,
Err(_) => {
// Missing the right return values drops down here
error!("Error executing route function for route {}", obj_name);
return Ok("".to_string());
return Err(mlua::Error::external("Error executing route function"));
}
};
Ok(result)
@ -139,7 +175,7 @@ impl Scripting {
Ok(routes)
}
fn remove_route(&self, route: &str) -> Result<()> {
pub fn remove_route(&self, route: &str) -> Result<()> {
let remove_route = chunk! {
routes[$route] = nil
};
@ -177,3 +213,55 @@ impl UserData for Log {
methods.add_meta_function(MetaMethod::Call, |_, ()| Ok(()));
}
}
#[cfg(test)]
mod tests {
use mlua::chunk;
use rusqlite::Connection;
use super::*;
use crate::rss_parser::Guid;
use crate::rss_parser::Item as Article;
#[test]
fn test_get_article_by_guid_from_lua() -> Result<()> {
let conn = match Connection::open_in_memory() {
Ok(conn) => conn,
Err(err) => {
return Err(mlua::Error::external(err));
}
};
let db = Database::new_with_conn(conn);
let article = Article {
title: Some("Test".to_string()),
guid: Some(Guid {
value: Some("https://techcrunch.com/?p=2877786".to_string()),
is_perma_link: Some(false),
}),
creator: None,
categories: vec![],
link: Some("https://example.com".to_string()),
pub_date: Some("2021-01-01T00:00:00Z".to_string()),
description: Some("Test".to_string()),
};
let ret = db.insert_article(&article);
assert!(ret.is_ok());
let script = chunk!{
local db = $db;
local article = db:get_article_by_guid("https://techcrunch.com/?p=2877786");
return article;
};
let engine = Scripting::new();
engine.load_globals()?;
let ret = engine.lua.load(script).eval::<Article>()?;
assert_eq!(ret.title.unwrap(), "Test");
assert_eq!(ret.guid.unwrap().value.unwrap(), "https://techcrunch.com/?p=2877786");
assert_eq!(ret.link.unwrap(), "https://example.com");
assert_eq!(ret.pub_date.unwrap(), "2021-01-01T00:00:00Z");
assert_eq!(ret.description.unwrap(), "Test");
Ok(())
}
}