From 972e96743272924d280fb7a0aec94c37a20ea78f Mon Sep 17 00:00:00 2001 From: mace Date: Wed, 10 Jun 2026 05:35:46 +0200 Subject: [PATCH] fix sync missing articles --- src/reader/sync.rs | 106 +++++++++++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 38 deletions(-) diff --git a/src/reader/sync.rs b/src/reader/sync.rs index 658d1ff..95fbca1 100644 --- a/src/reader/sync.rs +++ b/src/reader/sync.rs @@ -64,10 +64,9 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) { let item_title = item.title.clone().unwrap(); log::info!("Create feed item: {}", item_title); - // Resolve the publication date before any HTML parsing or DB work so we can - // bail out early for old articles. Items without a pub_date are treated as - // current (inserted unconditionally) — feeds that don't publish dates are - // typically small/curated enough that this is fine. + // Items without a pub_date are treated as current (inserted unconditionally) + // — feeds that don't publish dates are typically small/curated enough that + // this is fine. let mut time: NaiveDateTime = Local::now().naive_local(); if let Some(pub_date) = item.pub_date() { time = match get_date(pub_date) { @@ -79,12 +78,6 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) { }; } - let cutoff = Local::now().naive_local() - Duration::days(14); - if time < cutoff { - log::info!("Skipping item {} (older than 2 weeks).", item_title); - return; - } - let base_content: &str = item.content().or(item.description()).unwrap_or_default(); let frag = Html::parse_fragment(base_content); @@ -134,15 +127,21 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) { } } -// Items without a `created_ts` (e.g. ones inserted before this column existed, -// or whose feed didn't provide a publish date) are left alone — `lt` never -// matches NULL, so there's nothing to special-case here. +// Only read items are purged, and only once they're old — unread items are +// kept regardless of age so infrequently-updated feeds (or infrequent syncs) +// don't lose articles the user hasn't seen yet. Items without a `created_ts` +// (e.g. ones inserted before this column existed, or whose feed didn't +// provide a publish date) are left alone — `lt` never matches NULL. fn delete_old_feed_items(connection: &mut PgConnection) { let cutoff = Local::now().naive_local() - Duration::days(14); - let result = diesel::delete(feed_item::table.filter(feed_item::created_ts.lt(cutoff))) - .execute(connection); + let result = diesel::delete( + feed_item::table + .filter(feed_item::read.eq(true)) + .filter(feed_item::created_ts.lt(cutoff)), + ) + .execute(connection); - log::info!("Deleted old feed items (older than 2 weeks): {:?}", result); + log::info!("Deleted old read feed items (older than 2 weeks): {:?}", result); } pub async fn sync(_req: HttpRequest, data: web::Json) -> impl Responder { @@ -272,7 +271,7 @@ mod tests { } #[actix_web::test] - async fn delete_old_feed_items_removes_items_older_than_two_weeks_but_keeps_recent_ones() { + async fn delete_old_feed_items_removes_only_old_read_items() { let mut connection = establish_connection(); let suffix = unique_suffix(); @@ -297,29 +296,51 @@ mod tests { .unwrap(); let now = Local::now().naive_local(); - let old_item = NewFeedItem::new( + let old_read_item = NewFeedItem::new( feed.id, - "old content".to_string(), - format!("Old article {suffix}"), - format!("https://example.test/article/old-{suffix}"), + "old read content".to_string(), + format!("Old read article {suffix}"), + format!("https://example.test/article/old-read-{suffix}"), Some(now - Duration::days(20)), ); - let recent_item = NewFeedItem::new( + let old_unread_item = NewFeedItem::new( + feed.id, + "old unread content".to_string(), + format!("Old unread article {suffix}"), + format!("https://example.test/article/old-unread-{suffix}"), + Some(now - Duration::days(20)), + ); + let recent_read_item = NewFeedItem::new( feed.id, "recent content".to_string(), format!("Recent article {suffix}"), format!("https://example.test/article/recent-{suffix}"), Some(now - Duration::days(1)), ); - diesel::insert_into(feed_item::table) - .values(&old_item) + + let old_read: FeedItem = diesel::insert_into(feed_item::table) + .values(&old_read_item) + .get_result(&mut connection) + .unwrap(); + diesel::update(&old_read) + .set(feed_item::read.eq(true)) .execute(&mut connection) .unwrap(); - let recent: FeedItem = diesel::insert_into(feed_item::table) - .values(&recent_item) + + let old_unread: FeedItem = diesel::insert_into(feed_item::table) + .values(&old_unread_item) .get_result(&mut connection) .unwrap(); + let recent: FeedItem = diesel::insert_into(feed_item::table) + .values(&recent_read_item) + .get_result(&mut connection) + .unwrap(); + diesel::update(&recent) + .set(feed_item::read.eq(true)) + .execute(&mut connection) + .unwrap(); + delete_old_feed_items(&mut connection); let remaining: Vec = feed_item::table @@ -327,8 +348,19 @@ mod tests { .load(&mut connection) .unwrap(); - assert_eq!(1, remaining.len(), "only the recent item should survive cleanup"); - assert_eq!(recent.id, remaining[0].id); + let remaining_ids: Vec = remaining.iter().map(|item| item.id).collect(); + assert!( + !remaining_ids.contains(&old_read.id), + "old read item should have been deleted" + ); + assert!( + remaining_ids.contains(&old_unread.id), + "old unread item should be kept" + ); + assert!( + remaining_ids.contains(&recent.id), + "recent item should be kept" + ); diesel::delete(feed_item::table.filter(feed_id.eq(feed.id))) .execute(&mut connection) @@ -342,13 +374,13 @@ mod tests { } #[actix_web::test] - async fn create_feed_item_skips_articles_older_than_two_weeks() { + async fn create_feed_item_inserts_articles_older_than_two_weeks() { let mut connection = establish_connection(); let suffix = unique_suffix(); let new_user = NewUser::new( - format!("age_skip_test_{suffix}"), - format!("age_skip_{suffix}@example.test"), + format!("age_test_{suffix}"), + format!("age_{suffix}@example.test"), "secret".to_string(), ); let user: User = diesel::insert_into(users::table) @@ -357,7 +389,7 @@ mod tests { .unwrap(); let new_feed = NewFeed::new( - format!("Age skip test feed {suffix}"), + format!("Age test feed {suffix}"), format!("https://example.test/feed/{suffix}"), user.id, ); @@ -366,7 +398,9 @@ mod tests { .get_result(&mut connection) .unwrap(); - // Item with a pub_date 20 days ago — should be ignored by create_feed_item. + // Item with a pub_date 20 days ago — should still be inserted, since + // infrequently-updated feeds (or infrequent syncs) must not lose + // articles the user hasn't seen yet. let old_date = (Local::now() - Duration::days(20)) .format("%a, %d %b %Y %H:%M:%S %z") .to_string(); @@ -390,11 +424,7 @@ mod tests { .load(&mut connection) .unwrap(); - assert_eq!(1, items.len(), "old item should have been skipped"); - assert!( - items[0].title.contains("Fresh article"), - "only the fresh item should be present" - ); + assert_eq!(2, items.len(), "both old and fresh items should be inserted"); diesel::delete(feed_item::table.filter(feed_id.eq(feed.id))) .execute(&mut connection)