Added readable mode for article content

This commit is contained in:
2023-10-15 17:44:05 +02:00
parent 3d77c6f30f
commit ee80cbd53b
17 changed files with 151 additions and 53 deletions
+1 -38
View File
@@ -4,7 +4,6 @@ use crate::models::feed_item::rss_feed_item::FeedItem;
use crate::reader::structs::feed::FeedAggregate;
use crate::schema::feed_item::{feed_id, read};
use crate::{
auth::jwt::JwtToken,
database::establish_connection,
json_serialization::articles::Articles,
schema::feed::{self, user_id},
@@ -17,8 +16,6 @@ use super::structs::article::Article;
pub async fn get(path: web::Path<JsonUser>, req: HttpRequest) -> impl Responder {
let request = req.clone();
let _token: JwtToken = JwtToken::decode_from_request(req).unwrap();
let req_user_id = path.user_id;
log::info!("Received user_id: {}", req_user_id);
@@ -27,9 +24,6 @@ pub async fn get(path: web::Path<JsonUser>, req: HttpRequest) -> impl Responder
.filter(user_id.eq(req_user_id))
.load::<Feed>(&mut connection)
.unwrap();
// let feed = feeds::get_feed("https://www.heise.de/rss/heise-Rubrik-Wissen.rdf")
// .await
// .unwrap();
let mut feed_aggregates: Vec<FeedAggregate> = Vec::new();
for feed in feeds {
@@ -50,6 +44,7 @@ pub async fn get(path: web::Path<JsonUser>, req: HttpRequest) -> impl Responder
.map(|feed_item: FeedItem| Article {
title: feed_item.title,
content: feed_item.content,
url: feed_item.url,
})
.collect();
@@ -60,38 +55,6 @@ pub async fn get(path: web::Path<JsonUser>, req: HttpRequest) -> impl Responder
items: article_list,
})
}
// let feed_title: String = feed.title.clone();
// let feed_items: Vec<Article> = feed
// .into_items()
// .into_iter()
// .map(|item| {
// let title = item.title.unwrap();
// let frag = Html::parse_fragment(&item.content.unwrap());
// let mut content = "".to_string();
// let frag_clone = frag.clone();
// frag.tree.into_iter().for_each(|node| {
// let selector_img = Selector::parse("img").unwrap();
//
// for element in frag_clone.select(&selector_img) {
// if !content.starts_with("<img") {
// content.push_str(&element.html());
// content.push_str("<br>")
// }
// }
// if let scraper::node::Node::Text(text) = node {
// content.push_str(&text.text);
// }
// });
// Article { title, content }
// })
// .collect();
//
// let feed_aggregates = vec![
// (FeedAggregate {
// title: feed_title,
// items: feed_items,
// }),
// ];
let articles: Articles = Articles {
feeds: feed_aggregates,
+6
View File
@@ -4,6 +4,8 @@ use crate::views::path::Path;
mod add;
pub mod feeds;
mod get;
mod read;
mod scraper;
pub mod structs;
mod sync;
@@ -24,4 +26,8 @@ pub fn feed_factory(app: &mut web::ServiceConfig) {
&base_path.define(String::from("/sync")),
actix_web::Route::to(web::post(), sync::sync),
);
app.route(
&base_path.define(String::from("/read")),
actix_web::Route::to(web::post(), read::read),
);
}
+19
View File
@@ -0,0 +1,19 @@
use actix_web::{web, HttpRequest, Responder};
use crate::json_serialization::{readable::Readable, url::UrlJson};
use super::scraper::content::do_throttled_request;
pub async fn read(_req: HttpRequest, data: web::Json<UrlJson>) -> impl Responder {
let result = do_throttled_request(&data.url);
let content = match result.await {
Ok(cont) => cont,
Err(e) => {
log::error!("Could not scrap url {}", data.url);
e.to_string()
}
};
Readable { content }
}
+8
View File
@@ -0,0 +1,8 @@
use reqwest::Error;
// Do a request for the given URL, with a minimum time between requests
// to avoid overloading the server.
pub async fn do_throttled_request(url: &str) -> Result<String, Error> {
let response = reqwest::get(url).await?;
response.text().await
}
+1
View File
@@ -0,0 +1 @@
pub mod content;
+1
View File
@@ -4,6 +4,7 @@ use serde::Serialize;
pub struct Article {
pub title: String,
pub content: String,
pub url: String,
}
// impl Article {
-6
View File
@@ -7,9 +7,3 @@ pub struct FeedAggregate {
pub title: String,
pub items: Vec<Article>,
}
//
// impl Feed {
// pub fn new(title: String, items: Vec<Article>) -> Feed {
// Feed { title, items }
// }
// }