added anyhow, improve hamburger menu, improve dw articles

This commit is contained in:
2026-06-10 18:51:55 +02:00
parent 0420cf0dd5
commit 52ea84747a
22 changed files with 226 additions and 91 deletions
+29 -1
View File
@@ -116,6 +116,31 @@ describe('useFeeds', () => {
setInitialLoad(false)
})
it('strips leftover embedded-video placeholder headings', async () => {
feeds.value = [{
id: 1,
title: 'Article one',
url: 'https://www.dw.com/en/article-one/a-1',
content: '',
}]
axios.post.mockResolvedValueOnce({
data: {
content: `<html><body><article>
<h2 aria-label="Eingebettetes Video — Iran-Krieg belastet Wirtschaft und Märkte in Deutschland">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20"><g fill-rule="evenodd"><path d="M14.114 7.599H13.5l.002 4.706h.601l4.582 3.25-.005-11.11zM11.084 4.444l-9.007.002-1.336.797.002 9.514 1.334.793 9.007.006 1.509-.799-.004-9.516z"></path></g></svg>
Iran-Krieg belastet Wirtschaft und Märkte in Deutschland
</h2>
<p>some article text long enough for readability to keep the paragraph as the main content body, padded with extra words to pass the content-length heuristics used by Mozilla Readability when scoring candidate nodes.</p>
</article></body></html>`,
},
})
await getReadable(feeds.value[0], 0)
expect(feeds.value[0].content).not.toContain('Eingebettetes Video')
expect(feeds.value[0].content).not.toContain('<svg')
})
it('resolves Deutsche-Welle-style templated image URLs from data-format/data-url', async () => {
feeds.value = [{
id: 1,
@@ -137,7 +162,10 @@ describe('useFeeds', () => {
await getReadable(feeds.value[0], 0)
expect(feeds.value[0].content).toContain('src="https://static.dw.com/image/76212061_MASTER_LANDSCAPE.jpg"')
// "MASTER_LANDSCAPE" is a symbolic name from DW's CMS, not a valid value
// for the CDN's numeric `formatId` — it must be mapped to "6" or the
// resulting URL 400s and the image fails to load.
expect(feeds.value[0].content).toContain('src="https://static.dw.com/image/76212061_6.jpg"')
// The rendered `src` is what matters — `data-url` retaining the raw
// template is harmless since browsers don't load images from data-* attrs.
expect(feeds.value[0].content).not.toMatch(/src="[^"]*(\$\{|%7[bB])/)
+21 -1
View File
@@ -35,8 +35,19 @@ function authHeaders() {
const TEMPLATE_PATTERN = /\$\{[^}]+\}|%7[bB][^%]*%7[dD]/
const TEMPLATE_PATTERN_GLOBAL = /\$\{[^}]+\}|%7[bB][^%]*%7[dD]/g
// `data-format` holds a symbolic name from DW's CMS (e.g. "MASTER_LANDSCAPE"),
// but their image CDN only accepts numeric format ids in the URL — the
// template's `${formatId}` literally means a number. Substituting the
// symbolic name verbatim produces a 400 (image fails to load). DW generates
// the same fixed set of numeric variants for every image, so map the
// symbolic names we've seen to their numeric equivalent.
const DW_FORMAT_IDS = {
MASTER_LANDSCAPE: '6', // 940x529, 16:9 — matches DW's `16/9` aspect ratio
}
function resolveTemplatedImage(img) {
const format = img.getAttribute('data-format')
const rawFormat = img.getAttribute('data-format')
const format = rawFormat && (DW_FORMAT_IDS[rawFormat] ?? (/^\d+$/.test(rawFormat) ? rawFormat : null))
const dataUrl = img.getAttribute('data-url')
if (format) {
@@ -81,6 +92,15 @@ async function getReadable(feed, index) {
doc.head.prepend(base);
doc.querySelectorAll('img').forEach(resolveTemplatedImage);
doc.querySelectorAll('video, audio').forEach(el => el.remove());
// Some feeds (e.g. Deutsche Welle) leave behind a heading + play-icon SVG
// for an embedded video player whose actual <video>/<iframe> we already
// stripped — without it, the heading is just a giant orphaned icon that
// takes up space and links nowhere.
doc.querySelectorAll('[aria-label]').forEach(el => {
if (/^(Eingebettetes|Embedded) Video/i.test(el.getAttribute('aria-label'))) {
el.remove()
}
})
const article = new Readability(doc).parse();
feeds.value[index].content = article.content;
feeds.value[index].readable = true;