fix sync missing articles
This commit is contained in:
+67
-37
@@ -64,10 +64,9 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) {
|
||||
let item_title = item.title.clone().unwrap();
|
||||
log::info!("Create feed item: {}", item_title);
|
||||
|
||||
// Resolve the publication date before any HTML parsing or DB work so we can
|
||||
// bail out early for old articles. Items without a pub_date are treated as
|
||||
// current (inserted unconditionally) — feeds that don't publish dates are
|
||||
// typically small/curated enough that this is fine.
|
||||
// Items without a pub_date are treated as current (inserted unconditionally)
|
||||
// — feeds that don't publish dates are typically small/curated enough that
|
||||
// this is fine.
|
||||
let mut time: NaiveDateTime = Local::now().naive_local();
|
||||
if let Some(pub_date) = item.pub_date() {
|
||||
time = match get_date(pub_date) {
|
||||
@@ -79,12 +78,6 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) {
|
||||
};
|
||||
}
|
||||
|
||||
let cutoff = Local::now().naive_local() - Duration::days(14);
|
||||
if time < cutoff {
|
||||
log::info!("Skipping item {} (older than 2 weeks).", item_title);
|
||||
return;
|
||||
}
|
||||
|
||||
let base_content: &str = item.content().or(item.description()).unwrap_or_default();
|
||||
|
||||
let frag = Html::parse_fragment(base_content);
|
||||
@@ -134,15 +127,21 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) {
|
||||
}
|
||||
}
|
||||
|
||||
// Items without a `created_ts` (e.g. ones inserted before this column existed,
|
||||
// or whose feed didn't provide a publish date) are left alone — `lt` never
|
||||
// matches NULL, so there's nothing to special-case here.
|
||||
// Only read items are purged, and only once they're old — unread items are
|
||||
// kept regardless of age so infrequently-updated feeds (or infrequent syncs)
|
||||
// don't lose articles the user hasn't seen yet. Items without a `created_ts`
|
||||
// (e.g. ones inserted before this column existed, or whose feed didn't
|
||||
// provide a publish date) are left alone — `lt` never matches NULL.
|
||||
fn delete_old_feed_items(connection: &mut PgConnection) {
|
||||
let cutoff = Local::now().naive_local() - Duration::days(14);
|
||||
let result = diesel::delete(feed_item::table.filter(feed_item::created_ts.lt(cutoff)))
|
||||
let result = diesel::delete(
|
||||
feed_item::table
|
||||
.filter(feed_item::read.eq(true))
|
||||
.filter(feed_item::created_ts.lt(cutoff)),
|
||||
)
|
||||
.execute(connection);
|
||||
|
||||
log::info!("Deleted old feed items (older than 2 weeks): {:?}", result);
|
||||
log::info!("Deleted old read feed items (older than 2 weeks): {:?}", result);
|
||||
}
|
||||
|
||||
pub async fn sync(_req: HttpRequest, data: web::Json<JsonUser>) -> impl Responder {
|
||||
@@ -272,7 +271,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[actix_web::test]
|
||||
async fn delete_old_feed_items_removes_items_older_than_two_weeks_but_keeps_recent_ones() {
|
||||
async fn delete_old_feed_items_removes_only_old_read_items() {
|
||||
let mut connection = establish_connection();
|
||||
let suffix = unique_suffix();
|
||||
|
||||
@@ -297,29 +296,51 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
let now = Local::now().naive_local();
|
||||
let old_item = NewFeedItem::new(
|
||||
let old_read_item = NewFeedItem::new(
|
||||
feed.id,
|
||||
"old content".to_string(),
|
||||
format!("Old article {suffix}"),
|
||||
format!("https://example.test/article/old-{suffix}"),
|
||||
"old read content".to_string(),
|
||||
format!("Old read article {suffix}"),
|
||||
format!("https://example.test/article/old-read-{suffix}"),
|
||||
Some(now - Duration::days(20)),
|
||||
);
|
||||
let recent_item = NewFeedItem::new(
|
||||
let old_unread_item = NewFeedItem::new(
|
||||
feed.id,
|
||||
"old unread content".to_string(),
|
||||
format!("Old unread article {suffix}"),
|
||||
format!("https://example.test/article/old-unread-{suffix}"),
|
||||
Some(now - Duration::days(20)),
|
||||
);
|
||||
let recent_read_item = NewFeedItem::new(
|
||||
feed.id,
|
||||
"recent content".to_string(),
|
||||
format!("Recent article {suffix}"),
|
||||
format!("https://example.test/article/recent-{suffix}"),
|
||||
Some(now - Duration::days(1)),
|
||||
);
|
||||
diesel::insert_into(feed_item::table)
|
||||
.values(&old_item)
|
||||
|
||||
let old_read: FeedItem = diesel::insert_into(feed_item::table)
|
||||
.values(&old_read_item)
|
||||
.get_result(&mut connection)
|
||||
.unwrap();
|
||||
diesel::update(&old_read)
|
||||
.set(feed_item::read.eq(true))
|
||||
.execute(&mut connection)
|
||||
.unwrap();
|
||||
let recent: FeedItem = diesel::insert_into(feed_item::table)
|
||||
.values(&recent_item)
|
||||
|
||||
let old_unread: FeedItem = diesel::insert_into(feed_item::table)
|
||||
.values(&old_unread_item)
|
||||
.get_result(&mut connection)
|
||||
.unwrap();
|
||||
|
||||
let recent: FeedItem = diesel::insert_into(feed_item::table)
|
||||
.values(&recent_read_item)
|
||||
.get_result(&mut connection)
|
||||
.unwrap();
|
||||
diesel::update(&recent)
|
||||
.set(feed_item::read.eq(true))
|
||||
.execute(&mut connection)
|
||||
.unwrap();
|
||||
|
||||
delete_old_feed_items(&mut connection);
|
||||
|
||||
let remaining: Vec<FeedItem> = feed_item::table
|
||||
@@ -327,8 +348,19 @@ mod tests {
|
||||
.load(&mut connection)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(1, remaining.len(), "only the recent item should survive cleanup");
|
||||
assert_eq!(recent.id, remaining[0].id);
|
||||
let remaining_ids: Vec<i32> = remaining.iter().map(|item| item.id).collect();
|
||||
assert!(
|
||||
!remaining_ids.contains(&old_read.id),
|
||||
"old read item should have been deleted"
|
||||
);
|
||||
assert!(
|
||||
remaining_ids.contains(&old_unread.id),
|
||||
"old unread item should be kept"
|
||||
);
|
||||
assert!(
|
||||
remaining_ids.contains(&recent.id),
|
||||
"recent item should be kept"
|
||||
);
|
||||
|
||||
diesel::delete(feed_item::table.filter(feed_id.eq(feed.id)))
|
||||
.execute(&mut connection)
|
||||
@@ -342,13 +374,13 @@ mod tests {
|
||||
}
|
||||
|
||||
#[actix_web::test]
|
||||
async fn create_feed_item_skips_articles_older_than_two_weeks() {
|
||||
async fn create_feed_item_inserts_articles_older_than_two_weeks() {
|
||||
let mut connection = establish_connection();
|
||||
let suffix = unique_suffix();
|
||||
|
||||
let new_user = NewUser::new(
|
||||
format!("age_skip_test_{suffix}"),
|
||||
format!("age_skip_{suffix}@example.test"),
|
||||
format!("age_test_{suffix}"),
|
||||
format!("age_{suffix}@example.test"),
|
||||
"secret".to_string(),
|
||||
);
|
||||
let user: User = diesel::insert_into(users::table)
|
||||
@@ -357,7 +389,7 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
let new_feed = NewFeed::new(
|
||||
format!("Age skip test feed {suffix}"),
|
||||
format!("Age test feed {suffix}"),
|
||||
format!("https://example.test/feed/{suffix}"),
|
||||
user.id,
|
||||
);
|
||||
@@ -366,7 +398,9 @@ mod tests {
|
||||
.get_result(&mut connection)
|
||||
.unwrap();
|
||||
|
||||
// Item with a pub_date 20 days ago — should be ignored by create_feed_item.
|
||||
// Item with a pub_date 20 days ago — should still be inserted, since
|
||||
// infrequently-updated feeds (or infrequent syncs) must not lose
|
||||
// articles the user hasn't seen yet.
|
||||
let old_date = (Local::now() - Duration::days(20))
|
||||
.format("%a, %d %b %Y %H:%M:%S %z")
|
||||
.to_string();
|
||||
@@ -390,11 +424,7 @@ mod tests {
|
||||
.load(&mut connection)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(1, items.len(), "old item should have been skipped");
|
||||
assert!(
|
||||
items[0].title.contains("Fresh article"),
|
||||
"only the fresh item should be present"
|
||||
);
|
||||
assert_eq!(2, items.len(), "both old and fresh items should be inserted");
|
||||
|
||||
diesel::delete(feed_item::table.filter(feed_id.eq(feed.id)))
|
||||
.execute(&mut connection)
|
||||
|
||||
Reference in New Issue
Block a user