updated rust version, minor fixes
This commit is contained in:
+2
-3
@@ -34,7 +34,7 @@ async fn main() -> std::io::Result<()> {
|
||||
.allow_any_header()
|
||||
.supports_credentials();
|
||||
|
||||
let app = App::new()
|
||||
App::new()
|
||||
.wrap_fn(|req, srv| {
|
||||
let mut passed: bool;
|
||||
let request_url: String = String::from(req.uri().path());
|
||||
@@ -73,8 +73,7 @@ async fn main() -> std::io::Result<()> {
|
||||
}
|
||||
})
|
||||
.wrap(cors)
|
||||
.configure(views::views_factory);
|
||||
app
|
||||
.configure(views::views_factory)
|
||||
})
|
||||
.bind("0.0.0.0:8001")?
|
||||
.run()
|
||||
|
||||
+47
-11
@@ -27,6 +27,17 @@ fn get_date(date_str: &str) -> Result<NaiveDateTime, chrono::ParseError> {
|
||||
DateTime::parse_from_rfc2822(date_str).map(|dt| dt.with_timezone(&Local).naive_local())
|
||||
}
|
||||
|
||||
// Some feeds (e.g. Deutsche Welle) embed responsive-image templates such as
|
||||
// `src="https://example.com/img_${formatId}.jpg"` that their own frontend
|
||||
// JavaScript fills in before loading — verbatim, they 404. Skip those and
|
||||
// pick the first <img> with a real, directly loadable URL instead.
|
||||
fn image_src_is_resolvable(element: &scraper::ElementRef) -> bool {
|
||||
match element.value().attr("src") {
|
||||
Some(src) => !src.contains('{') && !src.to_lowercase().contains("%7b"),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) {
|
||||
let item_title = item.title.clone().unwrap();
|
||||
log::info!("Create feed item: {}", item_title);
|
||||
@@ -35,20 +46,18 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) {
|
||||
|
||||
let frag = Html::parse_fragment(base_content);
|
||||
let mut content = "".to_string();
|
||||
let frag_clone = frag.clone();
|
||||
frag.tree.into_iter().for_each(|node| {
|
||||
let selector_img = Selector::parse("img").unwrap();
|
||||
|
||||
for element in frag_clone.select(&selector_img) {
|
||||
if !content.starts_with("<img") {
|
||||
content.push_str(&element.html());
|
||||
content.push_str("<br>")
|
||||
}
|
||||
}
|
||||
if let scraper::node::Node::Text(text) = node {
|
||||
let selector_img = Selector::parse("img").unwrap();
|
||||
if let Some(image) = frag.select(&selector_img).find(image_src_is_resolvable) {
|
||||
content.push_str(&image.html());
|
||||
content.push_str("<br>");
|
||||
}
|
||||
|
||||
for node in frag.tree.nodes() {
|
||||
if let scraper::node::Node::Text(text) = node.value() {
|
||||
content.push_str(&text.text);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
let existing_item: Vec<FeedItem> = feed_item::table
|
||||
.filter(feed_id.eq(feed.id))
|
||||
@@ -140,6 +149,33 @@ mod tests {
|
||||
assert!(get_date("not-a-date").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_feed_item_skips_template_placeholder_images() {
|
||||
let html = Html::parse_fragment(
|
||||
r#"<img src="https://example.test/img_${formatId}.jpg"><p>placeholder</p>
|
||||
<img src="https://example.test/real.jpg"><p>real image</p>"#,
|
||||
);
|
||||
let selector = Selector::parse("img").unwrap();
|
||||
|
||||
let chosen = html
|
||||
.select(&selector)
|
||||
.find(image_src_is_resolvable)
|
||||
.expect("should find a resolvable image");
|
||||
|
||||
assert_eq!(
|
||||
Some("https://example.test/real.jpg"),
|
||||
chosen.value().attr("src")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_feed_item_finds_no_image_when_all_are_templated() {
|
||||
let html = Html::parse_fragment(r#"<img src="https://example.test/img_${formatId}.jpg">"#);
|
||||
let selector = Selector::parse("img").unwrap();
|
||||
|
||||
assert!(html.select(&selector).find(image_src_is_resolvable).is_none());
|
||||
}
|
||||
|
||||
#[actix_web::test]
|
||||
async fn create_feed_item_does_not_duplicate_existing_items() {
|
||||
let mut connection = establish_connection();
|
||||
|
||||
Reference in New Issue
Block a user