fix sync missing articles

This commit is contained in:
2026-06-10 05:35:46 +02:00
parent 400648c3d1
commit 972e967432
+67 -37
View File
@@ -64,10 +64,9 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) {
let item_title = item.title.clone().unwrap(); let item_title = item.title.clone().unwrap();
log::info!("Create feed item: {}", item_title); log::info!("Create feed item: {}", item_title);
// Resolve the publication date before any HTML parsing or DB work so we can // Items without a pub_date are treated as current (inserted unconditionally)
// bail out early for old articles. Items without a pub_date are treated as // — feeds that don't publish dates are typically small/curated enough that
// current (inserted unconditionally) — feeds that don't publish dates are // this is fine.
// typically small/curated enough that this is fine.
let mut time: NaiveDateTime = Local::now().naive_local(); let mut time: NaiveDateTime = Local::now().naive_local();
if let Some(pub_date) = item.pub_date() { if let Some(pub_date) = item.pub_date() {
time = match get_date(pub_date) { time = match get_date(pub_date) {
@@ -79,12 +78,6 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) {
}; };
} }
let cutoff = Local::now().naive_local() - Duration::days(14);
if time < cutoff {
log::info!("Skipping item {} (older than 2 weeks).", item_title);
return;
}
let base_content: &str = item.content().or(item.description()).unwrap_or_default(); let base_content: &str = item.content().or(item.description()).unwrap_or_default();
let frag = Html::parse_fragment(base_content); let frag = Html::parse_fragment(base_content);
@@ -134,15 +127,21 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) {
} }
} }
// Items without a `created_ts` (e.g. ones inserted before this column existed, // Only read items are purged, and only once they're old — unread items are
// or whose feed didn't provide a publish date) are left alone — `lt` never // kept regardless of age so infrequently-updated feeds (or infrequent syncs)
// matches NULL, so there's nothing to special-case here. // don't lose articles the user hasn't seen yet. Items without a `created_ts`
// (e.g. ones inserted before this column existed, or whose feed didn't
// provide a publish date) are left alone — `lt` never matches NULL.
fn delete_old_feed_items(connection: &mut PgConnection) { fn delete_old_feed_items(connection: &mut PgConnection) {
let cutoff = Local::now().naive_local() - Duration::days(14); let cutoff = Local::now().naive_local() - Duration::days(14);
let result = diesel::delete(feed_item::table.filter(feed_item::created_ts.lt(cutoff))) let result = diesel::delete(
feed_item::table
.filter(feed_item::read.eq(true))
.filter(feed_item::created_ts.lt(cutoff)),
)
.execute(connection); .execute(connection);
log::info!("Deleted old feed items (older than 2 weeks): {:?}", result); log::info!("Deleted old read feed items (older than 2 weeks): {:?}", result);
} }
pub async fn sync(_req: HttpRequest, data: web::Json<JsonUser>) -> impl Responder { pub async fn sync(_req: HttpRequest, data: web::Json<JsonUser>) -> impl Responder {
@@ -272,7 +271,7 @@ mod tests {
} }
#[actix_web::test] #[actix_web::test]
async fn delete_old_feed_items_removes_items_older_than_two_weeks_but_keeps_recent_ones() { async fn delete_old_feed_items_removes_only_old_read_items() {
let mut connection = establish_connection(); let mut connection = establish_connection();
let suffix = unique_suffix(); let suffix = unique_suffix();
@@ -297,29 +296,51 @@ mod tests {
.unwrap(); .unwrap();
let now = Local::now().naive_local(); let now = Local::now().naive_local();
let old_item = NewFeedItem::new( let old_read_item = NewFeedItem::new(
feed.id, feed.id,
"old content".to_string(), "old read content".to_string(),
format!("Old article {suffix}"), format!("Old read article {suffix}"),
format!("https://example.test/article/old-{suffix}"), format!("https://example.test/article/old-read-{suffix}"),
Some(now - Duration::days(20)), Some(now - Duration::days(20)),
); );
let recent_item = NewFeedItem::new( let old_unread_item = NewFeedItem::new(
feed.id,
"old unread content".to_string(),
format!("Old unread article {suffix}"),
format!("https://example.test/article/old-unread-{suffix}"),
Some(now - Duration::days(20)),
);
let recent_read_item = NewFeedItem::new(
feed.id, feed.id,
"recent content".to_string(), "recent content".to_string(),
format!("Recent article {suffix}"), format!("Recent article {suffix}"),
format!("https://example.test/article/recent-{suffix}"), format!("https://example.test/article/recent-{suffix}"),
Some(now - Duration::days(1)), Some(now - Duration::days(1)),
); );
diesel::insert_into(feed_item::table)
.values(&old_item) let old_read: FeedItem = diesel::insert_into(feed_item::table)
.values(&old_read_item)
.get_result(&mut connection)
.unwrap();
diesel::update(&old_read)
.set(feed_item::read.eq(true))
.execute(&mut connection) .execute(&mut connection)
.unwrap(); .unwrap();
let recent: FeedItem = diesel::insert_into(feed_item::table)
.values(&recent_item) let old_unread: FeedItem = diesel::insert_into(feed_item::table)
.values(&old_unread_item)
.get_result(&mut connection) .get_result(&mut connection)
.unwrap(); .unwrap();
let recent: FeedItem = diesel::insert_into(feed_item::table)
.values(&recent_read_item)
.get_result(&mut connection)
.unwrap();
diesel::update(&recent)
.set(feed_item::read.eq(true))
.execute(&mut connection)
.unwrap();
delete_old_feed_items(&mut connection); delete_old_feed_items(&mut connection);
let remaining: Vec<FeedItem> = feed_item::table let remaining: Vec<FeedItem> = feed_item::table
@@ -327,8 +348,19 @@ mod tests {
.load(&mut connection) .load(&mut connection)
.unwrap(); .unwrap();
assert_eq!(1, remaining.len(), "only the recent item should survive cleanup"); let remaining_ids: Vec<i32> = remaining.iter().map(|item| item.id).collect();
assert_eq!(recent.id, remaining[0].id); assert!(
!remaining_ids.contains(&old_read.id),
"old read item should have been deleted"
);
assert!(
remaining_ids.contains(&old_unread.id),
"old unread item should be kept"
);
assert!(
remaining_ids.contains(&recent.id),
"recent item should be kept"
);
diesel::delete(feed_item::table.filter(feed_id.eq(feed.id))) diesel::delete(feed_item::table.filter(feed_id.eq(feed.id)))
.execute(&mut connection) .execute(&mut connection)
@@ -342,13 +374,13 @@ mod tests {
} }
#[actix_web::test] #[actix_web::test]
async fn create_feed_item_skips_articles_older_than_two_weeks() { async fn create_feed_item_inserts_articles_older_than_two_weeks() {
let mut connection = establish_connection(); let mut connection = establish_connection();
let suffix = unique_suffix(); let suffix = unique_suffix();
let new_user = NewUser::new( let new_user = NewUser::new(
format!("age_skip_test_{suffix}"), format!("age_test_{suffix}"),
format!("age_skip_{suffix}@example.test"), format!("age_{suffix}@example.test"),
"secret".to_string(), "secret".to_string(),
); );
let user: User = diesel::insert_into(users::table) let user: User = diesel::insert_into(users::table)
@@ -357,7 +389,7 @@ mod tests {
.unwrap(); .unwrap();
let new_feed = NewFeed::new( let new_feed = NewFeed::new(
format!("Age skip test feed {suffix}"), format!("Age test feed {suffix}"),
format!("https://example.test/feed/{suffix}"), format!("https://example.test/feed/{suffix}"),
user.id, user.id,
); );
@@ -366,7 +398,9 @@ mod tests {
.get_result(&mut connection) .get_result(&mut connection)
.unwrap(); .unwrap();
// Item with a pub_date 20 days ago — should be ignored by create_feed_item. // Item with a pub_date 20 days ago — should still be inserted, since
// infrequently-updated feeds (or infrequent syncs) must not lose
// articles the user hasn't seen yet.
let old_date = (Local::now() - Duration::days(20)) let old_date = (Local::now() - Duration::days(20))
.format("%a, %d %b %Y %H:%M:%S %z") .format("%a, %d %b %Y %H:%M:%S %z")
.to_string(); .to_string();
@@ -390,11 +424,7 @@ mod tests {
.load(&mut connection) .load(&mut connection)
.unwrap(); .unwrap();
assert_eq!(1, items.len(), "old item should have been skipped"); assert_eq!(2, items.len(), "both old and fresh items should be inserted");
assert!(
items[0].title.contains("Fresh article"),
"only the fresh item should be present"
);
diesel::delete(feed_item::table.filter(feed_id.eq(feed.id))) diesel::delete(feed_item::table.filter(feed_id.eq(feed.id)))
.execute(&mut connection) .execute(&mut connection)