Don’t store alternates explicitly to save disk space

Instead, fetch alternates from within the feed on the demand.

This makes alternates marginally more expensive to retrieve, but saves on storage (which we were running out on the DigitalOcean deployment), and is a cleaner architecture overall: no need to keep the feeds and alternates in sync.

Here’s a script to migrate existing feeds:

// Call me with, for example: env "BASE_URL=https://kill-the-newsletter.com" npx ts-node migrate.ts
// I’m idempotent and reentrant, you may call me multiple times if necessary (for example, if the migration fails in the middle for whatever reason)

import { promises as fs } from "fs";
import path from "path";
import { JSDOM } from "jsdom";

const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000";
const FEEDS_PATH = "static/feeds";

(async () => {
  await fs.rmdir("static/alternate", { recursive: true });
  for (const feedPath of (await fs.readdir(FEEDS_PATH)).filter((feedPath) =>
    feedPath.endsWith(".xml")
  )) {
    const text = await fs.readFile(path.join(FEEDS_PATH, feedPath), "utf-8");
    const feed = new JSDOM(text, { contentType: "text/xml" });
    const document = feed.window.document;
    const feedIdentifier = document
      .querySelector("id")!
      .textContent!.split(":")[2];
    for (const entry of document.querySelectorAll("entry")) {
      const entryIdentifier = entry
        .querySelector("id")!
        .textContent!.split(":")[2];
      entry
        .querySelector(`link[rel="alternate"]`)
        ?.setAttribute(
          "href",
          `${BASE_URL}/alternate/${feedIdentifier}/${entryIdentifier}.html`
        );
    }
    await fs.writeFile(
      path.join(FEEDS_PATH, feedPath),
      `<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim()
    );
    console.log(feedIdentifier);
  }
})();
This commit is contained in:
Leandro Facchinetti 2020-11-24 17:12:14 +00:00
parent 212b06eb49
commit e452838d27
2 changed files with 91 additions and 71 deletions

142
index.ts
View File

@ -25,16 +25,20 @@ export const webServer = express()
try { try {
const { name } = req.body; const { name } = req.body;
const identifier = createIdentifier(); const identifier = createIdentifier();
await writeFileAtomic(feedPath(identifier), feed(identifier, X(name)));
const renderedCreated = created(identifier); const renderedCreated = created(identifier);
await addEntryToFeed( await writeFileAtomic(
feedFilePath(identifier),
feed(
identifier, identifier,
X(name),
entry( entry(
identifier,
createIdentifier(), createIdentifier(),
`${X(name)}” Inbox Created`, `${X(name)}” Inbox Created`,
"Kill the Newsletter!", "Kill the Newsletter!",
X(renderedCreated) X(renderedCreated)
) )
)
); );
res.send( res.send(
layout(html` layout(html`
@ -47,6 +51,35 @@ export const webServer = express()
next(error); next(error);
} }
}) })
.get(
alternatePath(":feedIdentifier", ":entryIdentifier"),
async (req, res, next) => {
try {
const { feedIdentifier, entryIdentifier } = req.params;
const path = feedFilePath(feedIdentifier);
let text;
try {
text = await fs.readFile(path, "utf8");
} catch {
return res.sendStatus(404);
}
const feed = new JSDOM(text, { contentType: "text/xml" });
const document = feed.window.document;
const link = document.querySelector(
`link[href="${alternateURL(feedIdentifier, entryIdentifier)}"]`
);
if (link === null) return res.sendStatus(404);
res.send(
entities.decodeXML(
link.parentElement!.querySelector("content")!.textContent!
)
);
} catch (error) {
console.error(error);
next(error);
}
}
)
.listen(WEB_PORT); .listen(WEB_PORT);
export const emailServer = new SMTPServer({ export const emailServer = new SMTPServer({
@ -62,17 +95,43 @@ export const emailServer = new SMTPServer({
); );
if (match?.groups === undefined) continue; if (match?.groups === undefined) continue;
const identifier = match.groups.identifier.toLowerCase(); const identifier = match.groups.identifier.toLowerCase();
await addEntryToFeed( const path = feedFilePath(identifier);
let text;
try {
text = await fs.readFile(path, "utf8");
} catch {
continue;
}
const feed = new JSDOM(text, { contentType: "text/xml" });
const document = feed.window.document;
const updated = document.querySelector("feed > updated");
if (updated === null) {
console.error(`Field updated not found: ${path}`);
continue;
}
updated.textContent = now();
const renderedEntry = entry(
identifier, identifier,
entry(
createIdentifier(), createIdentifier(),
X(email.subject ?? ""), X(email.subject ?? ""),
X(email.from?.text ?? ""), X(email.from?.text ?? ""),
X(content) X(content)
) );
).catch((error) => { const firstEntry = document.querySelector("feed > entry:first-of-type");
console.error(error); if (firstEntry === null)
}); document
.querySelector("feed")!
.insertAdjacentHTML("beforeend", renderedEntry);
else firstEntry.insertAdjacentHTML("beforebegin", renderedEntry);
while (feed.serialize().length > 500_000) {
const lastEntry = document.querySelector("feed > entry:last-of-type");
if (lastEntry === null) break;
lastEntry.remove();
}
await writeFileAtomic(
path,
html`<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim()
);
} }
callback(); callback();
} catch (error) { } catch (error) {
@ -86,46 +145,6 @@ export const emailServer = new SMTPServer({
}, },
}).listen(EMAIL_PORT); }).listen(EMAIL_PORT);
async function addEntryToFeed(
identifier: string,
entry: string
): Promise<void> {
const path = feedPath(identifier);
let text;
try {
text = await fs.readFile(path, "utf8");
} catch {
return;
}
const feed = new JSDOM(text, { contentType: "text/xml" });
const document = feed.window.document;
const updated = document.querySelector("feed > updated");
if (updated === null) throw new Error(`Field updated not found: ${path}`);
updated.textContent = now();
const firstEntry = document.querySelector("feed > entry:first-of-type");
if (firstEntry === null)
document.querySelector("feed")!.insertAdjacentHTML("beforeend", entry);
else firstEntry.insertAdjacentHTML("beforebegin", entry);
const entryDocument = JSDOM.fragment(entry);
await writeFileAtomic(
alternatePath(getEntryIdentifier(entryDocument)),
entities.decodeXML(entryDocument.querySelector("content")!.textContent!)
);
while (feed.serialize().length > 500_000) {
const entry = document.querySelector("feed > entry:last-of-type");
if (entry === null) break;
entry.remove();
const path = alternatePath(getEntryIdentifier(entry));
await fs.unlink(path).catch(() => {
console.error(`File not found: ${path}`);
});
}
await writeFileAtomic(
path,
html`<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim()
);
}
function layout(content: string): string { function layout(content: string): string {
return html` return html`
<!DOCTYPE html> <!DOCTYPE html>
@ -218,7 +237,7 @@ function created(identifier: string): string {
`.trim(); `.trim();
} }
function feed(identifier: string, name: string): string { function feed(identifier: string, name: string, initialEntry: string): string {
return html` return html`
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom"> <feed xmlns="http://www.w3.org/2005/Atom">
@ -236,26 +255,28 @@ function feed(identifier: string, name: string): string {
> >
<updated>${now()}</updated> <updated>${now()}</updated>
<author><name>Kill the Newsletter!</name></author> <author><name>Kill the Newsletter!</name></author>
${initialEntry}
</feed> </feed>
`.trim(); `.trim();
} }
function entry( function entry(
identifier: string, feedIdentifier: string,
entryIdentifier: string,
title: string, title: string,
author: string, author: string,
content: string content: string
): string { ): string {
return html` return html`
<entry> <entry>
<id>${urn(identifier)}</id> <id>${urn(entryIdentifier)}</id>
<title>${title}</title> <title>${title}</title>
<author><name>${author}</name></author> <author><name>${author}</name></author>
<updated>${now()}</updated> <updated>${now()}</updated>
<link <link
rel="alternate" rel="alternate"
type="text/html" type="text/html"
href="${alternateURL(identifier)}" href="${alternateURL(feedIdentifier, entryIdentifier)}"
/> />
<content type="html">${content}</content> <content type="html">${content}</content>
</entry> </entry>
@ -269,15 +290,11 @@ function createIdentifier(): string {
}); });
} }
function getEntryIdentifier(entry: ParentNode): string {
return entry.querySelector("id")!.textContent!.split(":")[2];
}
function now(): string { function now(): string {
return new Date().toISOString(); return new Date().toISOString();
} }
function feedPath(identifier: string): string { function feedFilePath(identifier: string): string {
return `static/feeds/${identifier}.xml`; return `static/feeds/${identifier}.xml`;
} }
@ -289,12 +306,15 @@ function feedEmail(identifier: string): string {
return `${identifier}@${EMAIL_DOMAIN}`; return `${identifier}@${EMAIL_DOMAIN}`;
} }
function alternatePath(identifier: string): string { function alternatePath(
return `static/alternate/${identifier}.html`; feedIdentifier: string,
entryIdentifier: string
): string {
return `/alternate/${feedIdentifier}/${entryIdentifier}.html`;
} }
function alternateURL(identifier: string): string { function alternateURL(feedIdentifier: string, entryIdentifier: string): string {
return `${BASE_URL}/alternate/${identifier}.html`; return `${BASE_URL}${alternatePath(feedIdentifier, entryIdentifier)}`;
} }
function urn(identifier: string): string { function urn(identifier: string): string {