updated rust version, minor fixes

This commit is contained in:
2026-06-07 16:26:42 +02:00
parent b4874ad318
commit 841e8419b0
7 changed files with 143 additions and 18 deletions
+47 -11
View File
@@ -27,6 +27,17 @@ fn get_date(date_str: &str) -> Result<NaiveDateTime, chrono::ParseError> {
DateTime::parse_from_rfc2822(date_str).map(|dt| dt.with_timezone(&Local).naive_local())
}
// Some feeds (e.g. Deutsche Welle) embed responsive-image templates such as
// `src="https://example.com/img_${formatId}.jpg"` that their own frontend
// JavaScript fills in before loading — verbatim, they 404. Skip those and
// pick the first <img> with a real, directly loadable URL instead.
fn image_src_is_resolvable(element: &scraper::ElementRef) -> bool {
match element.value().attr("src") {
Some(src) => !src.contains('{') && !src.to_lowercase().contains("%7b"),
None => false,
}
}
fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) {
let item_title = item.title.clone().unwrap();
log::info!("Create feed item: {}", item_title);
@@ -35,20 +46,18 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) {
let frag = Html::parse_fragment(base_content);
let mut content = "".to_string();
let frag_clone = frag.clone();
frag.tree.into_iter().for_each(|node| {
let selector_img = Selector::parse("img").unwrap();
for element in frag_clone.select(&selector_img) {
if !content.starts_with("<img") {
content.push_str(&element.html());
content.push_str("<br>")
}
}
if let scraper::node::Node::Text(text) = node {
let selector_img = Selector::parse("img").unwrap();
if let Some(image) = frag.select(&selector_img).find(image_src_is_resolvable) {
content.push_str(&image.html());
content.push_str("<br>");
}
for node in frag.tree.nodes() {
if let scraper::node::Node::Text(text) = node.value() {
content.push_str(&text.text);
}
});
}
let existing_item: Vec<FeedItem> = feed_item::table
.filter(feed_id.eq(feed.id))
@@ -140,6 +149,33 @@ mod tests {
assert!(get_date("not-a-date").is_err());
}
#[test]
fn create_feed_item_skips_template_placeholder_images() {
let html = Html::parse_fragment(
r#"<img src="https://example.test/img_${formatId}.jpg"><p>placeholder</p>
<img src="https://example.test/real.jpg"><p>real image</p>"#,
);
let selector = Selector::parse("img").unwrap();
let chosen = html
.select(&selector)
.find(image_src_is_resolvable)
.expect("should find a resolvable image");
assert_eq!(
Some("https://example.test/real.jpg"),
chosen.value().attr("src")
);
}
#[test]
fn create_feed_item_finds_no_image_when_all_are_templated() {
let html = Html::parse_fragment(r#"<img src="https://example.test/img_${formatId}.jpg">"#);
let selector = Selector::parse("img").unwrap();
assert!(html.select(&selector).find(image_src_is_resolvable).is_none());
}
#[actix_web::test]
async fn create_feed_item_does_not_duplicate_existing_items() {
let mut connection = establish_connection();