added anyhow, improve hamburger menu, improve dw articles
This commit is contained in:
@@ -116,6 +116,31 @@ describe('useFeeds', () => {
|
||||
setInitialLoad(false)
|
||||
})
|
||||
|
||||
it('strips leftover embedded-video placeholder headings', async () => {
|
||||
feeds.value = [{
|
||||
id: 1,
|
||||
title: 'Article one',
|
||||
url: 'https://www.dw.com/en/article-one/a-1',
|
||||
content: '',
|
||||
}]
|
||||
axios.post.mockResolvedValueOnce({
|
||||
data: {
|
||||
content: `<html><body><article>
|
||||
<h2 aria-label="Eingebettetes Video — Iran-Krieg belastet Wirtschaft und Märkte in Deutschland">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20"><g fill-rule="evenodd"><path d="M14.114 7.599H13.5l.002 4.706h.601l4.582 3.25-.005-11.11zM11.084 4.444l-9.007.002-1.336.797.002 9.514 1.334.793 9.007.006 1.509-.799-.004-9.516z"></path></g></svg>
|
||||
Iran-Krieg belastet Wirtschaft und Märkte in Deutschland
|
||||
</h2>
|
||||
<p>some article text long enough for readability to keep the paragraph as the main content body, padded with extra words to pass the content-length heuristics used by Mozilla Readability when scoring candidate nodes.</p>
|
||||
</article></body></html>`,
|
||||
},
|
||||
})
|
||||
|
||||
await getReadable(feeds.value[0], 0)
|
||||
|
||||
expect(feeds.value[0].content).not.toContain('Eingebettetes Video')
|
||||
expect(feeds.value[0].content).not.toContain('<svg')
|
||||
})
|
||||
|
||||
it('resolves Deutsche-Welle-style templated image URLs from data-format/data-url', async () => {
|
||||
feeds.value = [{
|
||||
id: 1,
|
||||
@@ -137,7 +162,10 @@ describe('useFeeds', () => {
|
||||
|
||||
await getReadable(feeds.value[0], 0)
|
||||
|
||||
expect(feeds.value[0].content).toContain('src="https://static.dw.com/image/76212061_MASTER_LANDSCAPE.jpg"')
|
||||
// "MASTER_LANDSCAPE" is a symbolic name from DW's CMS, not a valid value
|
||||
// for the CDN's numeric `formatId` — it must be mapped to "6" or the
|
||||
// resulting URL 400s and the image fails to load.
|
||||
expect(feeds.value[0].content).toContain('src="https://static.dw.com/image/76212061_6.jpg"')
|
||||
// The rendered `src` is what matters — `data-url` retaining the raw
|
||||
// template is harmless since browsers don't load images from data-* attrs.
|
||||
expect(feeds.value[0].content).not.toMatch(/src="[^"]*(\$\{|%7[bB])/)
|
||||
|
||||
@@ -35,8 +35,19 @@ function authHeaders() {
|
||||
const TEMPLATE_PATTERN = /\$\{[^}]+\}|%7[bB][^%]*%7[dD]/
|
||||
const TEMPLATE_PATTERN_GLOBAL = /\$\{[^}]+\}|%7[bB][^%]*%7[dD]/g
|
||||
|
||||
// `data-format` holds a symbolic name from DW's CMS (e.g. "MASTER_LANDSCAPE"),
|
||||
// but their image CDN only accepts numeric format ids in the URL — the
|
||||
// template's `${formatId}` literally means a number. Substituting the
|
||||
// symbolic name verbatim produces a 400 (image fails to load). DW generates
|
||||
// the same fixed set of numeric variants for every image, so map the
|
||||
// symbolic names we've seen to their numeric equivalent.
|
||||
const DW_FORMAT_IDS = {
|
||||
MASTER_LANDSCAPE: '6', // 940x529, 16:9 — matches DW's `16/9` aspect ratio
|
||||
}
|
||||
|
||||
function resolveTemplatedImage(img) {
|
||||
const format = img.getAttribute('data-format')
|
||||
const rawFormat = img.getAttribute('data-format')
|
||||
const format = rawFormat && (DW_FORMAT_IDS[rawFormat] ?? (/^\d+$/.test(rawFormat) ? rawFormat : null))
|
||||
const dataUrl = img.getAttribute('data-url')
|
||||
|
||||
if (format) {
|
||||
@@ -81,6 +92,15 @@ async function getReadable(feed, index) {
|
||||
doc.head.prepend(base);
|
||||
doc.querySelectorAll('img').forEach(resolveTemplatedImage);
|
||||
doc.querySelectorAll('video, audio').forEach(el => el.remove());
|
||||
// Some feeds (e.g. Deutsche Welle) leave behind a heading + play-icon SVG
|
||||
// for an embedded video player whose actual <video>/<iframe> we already
|
||||
// stripped — without it, the heading is just a giant orphaned icon that
|
||||
// takes up space and links nowhere.
|
||||
doc.querySelectorAll('[aria-label]').forEach(el => {
|
||||
if (/^(Eingebettetes|Embedded) Video/i.test(el.getAttribute('aria-label'))) {
|
||||
el.remove()
|
||||
}
|
||||
})
|
||||
const article = new Readability(doc).parse();
|
||||
feeds.value[index].content = article.content;
|
||||
feeds.value[index].readable = true;
|
||||
|
||||
Reference in New Issue
Block a user