fix sync missing articles
This commit is contained in:
+67
-37
@@ -64,10 +64,9 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) {
|
|||||||
let item_title = item.title.clone().unwrap();
|
let item_title = item.title.clone().unwrap();
|
||||||
log::info!("Create feed item: {}", item_title);
|
log::info!("Create feed item: {}", item_title);
|
||||||
|
|
||||||
// Resolve the publication date before any HTML parsing or DB work so we can
|
// Items without a pub_date are treated as current (inserted unconditionally)
|
||||||
// bail out early for old articles. Items without a pub_date are treated as
|
// — feeds that don't publish dates are typically small/curated enough that
|
||||||
// current (inserted unconditionally) — feeds that don't publish dates are
|
// this is fine.
|
||||||
// typically small/curated enough that this is fine.
|
|
||||||
let mut time: NaiveDateTime = Local::now().naive_local();
|
let mut time: NaiveDateTime = Local::now().naive_local();
|
||||||
if let Some(pub_date) = item.pub_date() {
|
if let Some(pub_date) = item.pub_date() {
|
||||||
time = match get_date(pub_date) {
|
time = match get_date(pub_date) {
|
||||||
@@ -79,12 +78,6 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
let cutoff = Local::now().naive_local() - Duration::days(14);
|
|
||||||
if time < cutoff {
|
|
||||||
log::info!("Skipping item {} (older than 2 weeks).", item_title);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let base_content: &str = item.content().or(item.description()).unwrap_or_default();
|
let base_content: &str = item.content().or(item.description()).unwrap_or_default();
|
||||||
|
|
||||||
let frag = Html::parse_fragment(base_content);
|
let frag = Html::parse_fragment(base_content);
|
||||||
@@ -134,15 +127,21 @@ fn create_feed_item(item: Item, feed: &Feed, connection: &mut PgConnection) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Items without a `created_ts` (e.g. ones inserted before this column existed,
|
// Only read items are purged, and only once they're old — unread items are
|
||||||
// or whose feed didn't provide a publish date) are left alone — `lt` never
|
// kept regardless of age so infrequently-updated feeds (or infrequent syncs)
|
||||||
// matches NULL, so there's nothing to special-case here.
|
// don't lose articles the user hasn't seen yet. Items without a `created_ts`
|
||||||
|
// (e.g. ones inserted before this column existed, or whose feed didn't
|
||||||
|
// provide a publish date) are left alone — `lt` never matches NULL.
|
||||||
fn delete_old_feed_items(connection: &mut PgConnection) {
|
fn delete_old_feed_items(connection: &mut PgConnection) {
|
||||||
let cutoff = Local::now().naive_local() - Duration::days(14);
|
let cutoff = Local::now().naive_local() - Duration::days(14);
|
||||||
let result = diesel::delete(feed_item::table.filter(feed_item::created_ts.lt(cutoff)))
|
let result = diesel::delete(
|
||||||
|
feed_item::table
|
||||||
|
.filter(feed_item::read.eq(true))
|
||||||
|
.filter(feed_item::created_ts.lt(cutoff)),
|
||||||
|
)
|
||||||
.execute(connection);
|
.execute(connection);
|
||||||
|
|
||||||
log::info!("Deleted old feed items (older than 2 weeks): {:?}", result);
|
log::info!("Deleted old read feed items (older than 2 weeks): {:?}", result);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn sync(_req: HttpRequest, data: web::Json<JsonUser>) -> impl Responder {
|
pub async fn sync(_req: HttpRequest, data: web::Json<JsonUser>) -> impl Responder {
|
||||||
@@ -272,7 +271,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[actix_web::test]
|
#[actix_web::test]
|
||||||
async fn delete_old_feed_items_removes_items_older_than_two_weeks_but_keeps_recent_ones() {
|
async fn delete_old_feed_items_removes_only_old_read_items() {
|
||||||
let mut connection = establish_connection();
|
let mut connection = establish_connection();
|
||||||
let suffix = unique_suffix();
|
let suffix = unique_suffix();
|
||||||
|
|
||||||
@@ -297,29 +296,51 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let now = Local::now().naive_local();
|
let now = Local::now().naive_local();
|
||||||
let old_item = NewFeedItem::new(
|
let old_read_item = NewFeedItem::new(
|
||||||
feed.id,
|
feed.id,
|
||||||
"old content".to_string(),
|
"old read content".to_string(),
|
||||||
format!("Old article {suffix}"),
|
format!("Old read article {suffix}"),
|
||||||
format!("https://example.test/article/old-{suffix}"),
|
format!("https://example.test/article/old-read-{suffix}"),
|
||||||
Some(now - Duration::days(20)),
|
Some(now - Duration::days(20)),
|
||||||
);
|
);
|
||||||
let recent_item = NewFeedItem::new(
|
let old_unread_item = NewFeedItem::new(
|
||||||
|
feed.id,
|
||||||
|
"old unread content".to_string(),
|
||||||
|
format!("Old unread article {suffix}"),
|
||||||
|
format!("https://example.test/article/old-unread-{suffix}"),
|
||||||
|
Some(now - Duration::days(20)),
|
||||||
|
);
|
||||||
|
let recent_read_item = NewFeedItem::new(
|
||||||
feed.id,
|
feed.id,
|
||||||
"recent content".to_string(),
|
"recent content".to_string(),
|
||||||
format!("Recent article {suffix}"),
|
format!("Recent article {suffix}"),
|
||||||
format!("https://example.test/article/recent-{suffix}"),
|
format!("https://example.test/article/recent-{suffix}"),
|
||||||
Some(now - Duration::days(1)),
|
Some(now - Duration::days(1)),
|
||||||
);
|
);
|
||||||
diesel::insert_into(feed_item::table)
|
|
||||||
.values(&old_item)
|
let old_read: FeedItem = diesel::insert_into(feed_item::table)
|
||||||
|
.values(&old_read_item)
|
||||||
|
.get_result(&mut connection)
|
||||||
|
.unwrap();
|
||||||
|
diesel::update(&old_read)
|
||||||
|
.set(feed_item::read.eq(true))
|
||||||
.execute(&mut connection)
|
.execute(&mut connection)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let recent: FeedItem = diesel::insert_into(feed_item::table)
|
|
||||||
.values(&recent_item)
|
let old_unread: FeedItem = diesel::insert_into(feed_item::table)
|
||||||
|
.values(&old_unread_item)
|
||||||
.get_result(&mut connection)
|
.get_result(&mut connection)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
let recent: FeedItem = diesel::insert_into(feed_item::table)
|
||||||
|
.values(&recent_read_item)
|
||||||
|
.get_result(&mut connection)
|
||||||
|
.unwrap();
|
||||||
|
diesel::update(&recent)
|
||||||
|
.set(feed_item::read.eq(true))
|
||||||
|
.execute(&mut connection)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
delete_old_feed_items(&mut connection);
|
delete_old_feed_items(&mut connection);
|
||||||
|
|
||||||
let remaining: Vec<FeedItem> = feed_item::table
|
let remaining: Vec<FeedItem> = feed_item::table
|
||||||
@@ -327,8 +348,19 @@ mod tests {
|
|||||||
.load(&mut connection)
|
.load(&mut connection)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(1, remaining.len(), "only the recent item should survive cleanup");
|
let remaining_ids: Vec<i32> = remaining.iter().map(|item| item.id).collect();
|
||||||
assert_eq!(recent.id, remaining[0].id);
|
assert!(
|
||||||
|
!remaining_ids.contains(&old_read.id),
|
||||||
|
"old read item should have been deleted"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
remaining_ids.contains(&old_unread.id),
|
||||||
|
"old unread item should be kept"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
remaining_ids.contains(&recent.id),
|
||||||
|
"recent item should be kept"
|
||||||
|
);
|
||||||
|
|
||||||
diesel::delete(feed_item::table.filter(feed_id.eq(feed.id)))
|
diesel::delete(feed_item::table.filter(feed_id.eq(feed.id)))
|
||||||
.execute(&mut connection)
|
.execute(&mut connection)
|
||||||
@@ -342,13 +374,13 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[actix_web::test]
|
#[actix_web::test]
|
||||||
async fn create_feed_item_skips_articles_older_than_two_weeks() {
|
async fn create_feed_item_inserts_articles_older_than_two_weeks() {
|
||||||
let mut connection = establish_connection();
|
let mut connection = establish_connection();
|
||||||
let suffix = unique_suffix();
|
let suffix = unique_suffix();
|
||||||
|
|
||||||
let new_user = NewUser::new(
|
let new_user = NewUser::new(
|
||||||
format!("age_skip_test_{suffix}"),
|
format!("age_test_{suffix}"),
|
||||||
format!("age_skip_{suffix}@example.test"),
|
format!("age_{suffix}@example.test"),
|
||||||
"secret".to_string(),
|
"secret".to_string(),
|
||||||
);
|
);
|
||||||
let user: User = diesel::insert_into(users::table)
|
let user: User = diesel::insert_into(users::table)
|
||||||
@@ -357,7 +389,7 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let new_feed = NewFeed::new(
|
let new_feed = NewFeed::new(
|
||||||
format!("Age skip test feed {suffix}"),
|
format!("Age test feed {suffix}"),
|
||||||
format!("https://example.test/feed/{suffix}"),
|
format!("https://example.test/feed/{suffix}"),
|
||||||
user.id,
|
user.id,
|
||||||
);
|
);
|
||||||
@@ -366,7 +398,9 @@ mod tests {
|
|||||||
.get_result(&mut connection)
|
.get_result(&mut connection)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// Item with a pub_date 20 days ago — should be ignored by create_feed_item.
|
// Item with a pub_date 20 days ago — should still be inserted, since
|
||||||
|
// infrequently-updated feeds (or infrequent syncs) must not lose
|
||||||
|
// articles the user hasn't seen yet.
|
||||||
let old_date = (Local::now() - Duration::days(20))
|
let old_date = (Local::now() - Duration::days(20))
|
||||||
.format("%a, %d %b %Y %H:%M:%S %z")
|
.format("%a, %d %b %Y %H:%M:%S %z")
|
||||||
.to_string();
|
.to_string();
|
||||||
@@ -390,11 +424,7 @@ mod tests {
|
|||||||
.load(&mut connection)
|
.load(&mut connection)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(1, items.len(), "old item should have been skipped");
|
assert_eq!(2, items.len(), "both old and fresh items should be inserted");
|
||||||
assert!(
|
|
||||||
items[0].title.contains("Fresh article"),
|
|
||||||
"only the fresh item should be present"
|
|
||||||
);
|
|
||||||
|
|
||||||
diesel::delete(feed_item::table.filter(feed_id.eq(feed.id)))
|
diesel::delete(feed_item::table.filter(feed_id.eq(feed.id)))
|
||||||
.execute(&mut connection)
|
.execute(&mut connection)
|
||||||
|
|||||||
Reference in New Issue
Block a user