kill-the-newsletter/index.ts

339 lines
9.8 KiB
TypeScript
Raw Normal View History

2020-05-05 08:12:57 +02:00
import express from "express";
import { SMTPServer } from "smtp-server";
import mailparser from "mailparser";
2020-07-23 17:11:41 +02:00
import * as sanitizeXMLString from "sanitize-xml-string";
2020-05-05 08:12:57 +02:00
import * as entities from "entities";
2020-07-23 17:11:41 +02:00
import R from "escape-string-regexp";
2020-05-05 08:12:57 +02:00
import { JSDOM } from "jsdom";
2020-07-23 17:11:41 +02:00
import { promises as fs } from "fs";
2020-05-05 08:12:57 +02:00
import writeFileAtomic from "write-file-atomic";
import cryptoRandomString from "crypto-random-string";
2020-08-10 01:30:12 +02:00
import html from "tagged-template-noop";
2020-05-05 08:12:57 +02:00
export const WEB_PORT = process.env.WEB_PORT ?? 8000;
export const EMAIL_PORT = process.env.EMAIL_PORT ?? 2525;
export const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000";
export const EMAIL_DOMAIN = process.env.EMAIL_DOMAIN ?? "localhost";
export const ISSUE_REPORT =
process.env.ISSUE_REPORT ?? "mailto:kill-the-newsletter@leafac.com";
export const webServer = express()
.use(express.static("static"))
.use(express.urlencoded({ extended: true }))
2020-07-23 17:11:41 +02:00
.get("/", (req, res) => res.send(layout(newInbox())))
2020-05-05 08:12:57 +02:00
.post("/", async (req, res, next) => {
try {
const { name } = req.body;
const identifier = createIdentifier();
2020-07-23 17:11:41 +02:00
const renderedCreated = created(identifier);
Don’t store alternates explicitly to save disk space Instead, fetch alternates from within the feed on the demand. This makes alternates marginally more expensive to retrieve, but saves on storage (which we were running out on the DigitalOcean deployment), and is a cleaner architecture overall: no need to keep the feeds and alternates in sync. Here’s a script to migrate existing feeds: // Call me with, for example: env "BASE_URL=https://kill-the-newsletter.com" npx ts-node migrate.ts // I’m idempotent and reentrant, you may call me multiple times if necessary (for example, if the migration fails in the middle for whatever reason) import { promises as fs } from "fs"; import path from "path"; import { JSDOM } from "jsdom"; const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000"; const FEEDS_PATH = "static/feeds"; (async () => { await fs.rmdir("static/alternate", { recursive: true }); for (const feedPath of (await fs.readdir(FEEDS_PATH)).filter((feedPath) => feedPath.endsWith(".xml") )) { const text = await fs.readFile(path.join(FEEDS_PATH, feedPath), "utf-8"); const feed = new JSDOM(text, { contentType: "text/xml" }); const document = feed.window.document; const feedIdentifier = document .querySelector("id")! .textContent!.split(":")[2]; for (const entry of document.querySelectorAll("entry")) { const entryIdentifier = entry .querySelector("id")! .textContent!.split(":")[2]; entry .querySelector(`link[rel="alternate"]`) ?.setAttribute( "href", `${BASE_URL}/alternate/${feedIdentifier}/${entryIdentifier}.html` ); } await fs.writeFile( path.join(FEEDS_PATH, feedPath), `<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim() ); console.log(feedIdentifier); } })();
2020-11-24 18:12:14 +01:00
await writeFileAtomic(
feedFilePath(identifier),
feed(
identifier,
X(name),
entry(
identifier,
createIdentifier(),
`${X(name)}” Inbox Created`,
"Kill the Newsletter!",
X(renderedCreated)
)
2020-07-23 17:11:41 +02:00
)
);
2020-05-05 08:12:57 +02:00
res.send(
2020-08-10 01:30:12 +02:00
layout(html`
2020-05-05 08:12:57 +02:00
<p><strong>${H(name)} Inbox Created</strong></p>
2020-07-23 17:11:41 +02:00
${renderedCreated}
2020-05-05 08:12:57 +02:00
`)
);
} catch (error) {
console.error(error);
next(error);
}
})
Don’t store alternates explicitly to save disk space Instead, fetch alternates from within the feed on the demand. This makes alternates marginally more expensive to retrieve, but saves on storage (which we were running out on the DigitalOcean deployment), and is a cleaner architecture overall: no need to keep the feeds and alternates in sync. Here’s a script to migrate existing feeds: // Call me with, for example: env "BASE_URL=https://kill-the-newsletter.com" npx ts-node migrate.ts // I’m idempotent and reentrant, you may call me multiple times if necessary (for example, if the migration fails in the middle for whatever reason) import { promises as fs } from "fs"; import path from "path"; import { JSDOM } from "jsdom"; const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000"; const FEEDS_PATH = "static/feeds"; (async () => { await fs.rmdir("static/alternate", { recursive: true }); for (const feedPath of (await fs.readdir(FEEDS_PATH)).filter((feedPath) => feedPath.endsWith(".xml") )) { const text = await fs.readFile(path.join(FEEDS_PATH, feedPath), "utf-8"); const feed = new JSDOM(text, { contentType: "text/xml" }); const document = feed.window.document; const feedIdentifier = document .querySelector("id")! .textContent!.split(":")[2]; for (const entry of document.querySelectorAll("entry")) { const entryIdentifier = entry .querySelector("id")! .textContent!.split(":")[2]; entry .querySelector(`link[rel="alternate"]`) ?.setAttribute( "href", `${BASE_URL}/alternate/${feedIdentifier}/${entryIdentifier}.html` ); } await fs.writeFile( path.join(FEEDS_PATH, feedPath), `<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim() ); console.log(feedIdentifier); } })();
2020-11-24 18:12:14 +01:00
.get(
alternatePath(":feedIdentifier", ":entryIdentifier"),
async (req, res, next) => {
try {
const { feedIdentifier, entryIdentifier } = req.params;
const path = feedFilePath(feedIdentifier);
let text;
try {
text = await fs.readFile(path, "utf8");
} catch {
return res.sendStatus(404);
}
const feed = new JSDOM(text, { contentType: "text/xml" });
const document = feed.window.document;
const link = document.querySelector(
`link[href="${alternateURL(feedIdentifier, entryIdentifier)}"]`
);
if (link === null) return res.sendStatus(404);
res.send(
entities.decodeXML(
link.parentElement!.querySelector("content")!.textContent!
)
);
} catch (error) {
console.error(error);
next(error);
}
}
)
2020-12-22 20:34:02 +01:00
.listen(WEB_PORT, () => console.log(`Server started: ${BASE_URL}`));
2020-05-05 08:12:57 +02:00
export const emailServer = new SMTPServer({
disabledCommands: ["AUTH", "STARTTLS"],
async onData(stream, session, callback) {
try {
const email = await mailparser.simpleParser(stream);
2020-07-14 19:42:41 +02:00
const content =
typeof email.html === "string" ? email.html : email.textAsHtml ?? "";
2020-12-07 23:56:00 +01:00
for (const address of new Set(
session.envelope.rcptTo.map(({ address }) => address)
)) {
2020-05-05 08:12:57 +02:00
const match = address.match(
2020-07-23 17:11:41 +02:00
new RegExp(`^(?<identifier>\\w+)@${R(EMAIL_DOMAIN)}$`)
2020-05-05 08:12:57 +02:00
);
if (match?.groups === undefined) continue;
2020-07-23 17:11:41 +02:00
const identifier = match.groups.identifier.toLowerCase();
Don’t store alternates explicitly to save disk space Instead, fetch alternates from within the feed on the demand. This makes alternates marginally more expensive to retrieve, but saves on storage (which we were running out on the DigitalOcean deployment), and is a cleaner architecture overall: no need to keep the feeds and alternates in sync. Here’s a script to migrate existing feeds: // Call me with, for example: env "BASE_URL=https://kill-the-newsletter.com" npx ts-node migrate.ts // I’m idempotent and reentrant, you may call me multiple times if necessary (for example, if the migration fails in the middle for whatever reason) import { promises as fs } from "fs"; import path from "path"; import { JSDOM } from "jsdom"; const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000"; const FEEDS_PATH = "static/feeds"; (async () => { await fs.rmdir("static/alternate", { recursive: true }); for (const feedPath of (await fs.readdir(FEEDS_PATH)).filter((feedPath) => feedPath.endsWith(".xml") )) { const text = await fs.readFile(path.join(FEEDS_PATH, feedPath), "utf-8"); const feed = new JSDOM(text, { contentType: "text/xml" }); const document = feed.window.document; const feedIdentifier = document .querySelector("id")! .textContent!.split(":")[2]; for (const entry of document.querySelectorAll("entry")) { const entryIdentifier = entry .querySelector("id")! .textContent!.split(":")[2]; entry .querySelector(`link[rel="alternate"]`) ?.setAttribute( "href", `${BASE_URL}/alternate/${feedIdentifier}/${entryIdentifier}.html` ); } await fs.writeFile( path.join(FEEDS_PATH, feedPath), `<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim() ); console.log(feedIdentifier); } })();
2020-11-24 18:12:14 +01:00
const path = feedFilePath(identifier);
let text;
try {
text = await fs.readFile(path, "utf8");
} catch {
continue;
}
const feed = new JSDOM(text, { contentType: "text/xml" });
const document = feed.window.document;
const updated = document.querySelector("feed > updated");
if (updated === null) {
console.error(`Field updated not found: ${path}`);
continue;
}
updated.textContent = now();
const renderedEntry = entry(
2020-07-23 17:11:41 +02:00
identifier,
Don’t store alternates explicitly to save disk space Instead, fetch alternates from within the feed on the demand. This makes alternates marginally more expensive to retrieve, but saves on storage (which we were running out on the DigitalOcean deployment), and is a cleaner architecture overall: no need to keep the feeds and alternates in sync. Here’s a script to migrate existing feeds: // Call me with, for example: env "BASE_URL=https://kill-the-newsletter.com" npx ts-node migrate.ts // I’m idempotent and reentrant, you may call me multiple times if necessary (for example, if the migration fails in the middle for whatever reason) import { promises as fs } from "fs"; import path from "path"; import { JSDOM } from "jsdom"; const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000"; const FEEDS_PATH = "static/feeds"; (async () => { await fs.rmdir("static/alternate", { recursive: true }); for (const feedPath of (await fs.readdir(FEEDS_PATH)).filter((feedPath) => feedPath.endsWith(".xml") )) { const text = await fs.readFile(path.join(FEEDS_PATH, feedPath), "utf-8"); const feed = new JSDOM(text, { contentType: "text/xml" }); const document = feed.window.document; const feedIdentifier = document .querySelector("id")! .textContent!.split(":")[2]; for (const entry of document.querySelectorAll("entry")) { const entryIdentifier = entry .querySelector("id")! .textContent!.split(":")[2]; entry .querySelector(`link[rel="alternate"]`) ?.setAttribute( "href", `${BASE_URL}/alternate/${feedIdentifier}/${entryIdentifier}.html` ); } await fs.writeFile( path.join(FEEDS_PATH, feedPath), `<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim() ); console.log(feedIdentifier); } })();
2020-11-24 18:12:14 +01:00
createIdentifier(),
X(email.subject ?? ""),
X(email.from?.text ?? ""),
X(content)
);
const firstEntry = document.querySelector("feed > entry:first-of-type");
if (firstEntry === null)
document
.querySelector("feed")!
.insertAdjacentHTML("beforeend", renderedEntry);
else firstEntry.insertAdjacentHTML("beforebegin", renderedEntry);
while (feed.serialize().length > 500_000) {
const lastEntry = document.querySelector("feed > entry:last-of-type");
if (lastEntry === null) break;
lastEntry.remove();
}
await writeFileAtomic(
path,
html`<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim()
);
2020-05-05 08:12:57 +02:00
}
callback();
} catch (error) {
console.error(
2020-07-23 17:11:41 +02:00
`Failed to receive message: ${JSON.stringify(session, null, 2)}`
2020-05-05 08:12:57 +02:00
);
console.error(error);
stream.resume();
callback(new Error("Failed to receive message. Please try again."));
}
},
}).listen(EMAIL_PORT);
function layout(content: string): string {
2020-08-10 01:30:12 +02:00
return html`
<!DOCTYPE html>
2020-05-05 08:12:57 +02:00
<html lang="en">
2020-08-10 01:30:12 +02:00
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Kill the Newsletter!</title>
<meta name="author" content="Leandro Facchinetti" />
<meta
name="description"
content="Convert email newsletters into Atom feeds."
/>
<link
rel="icon"
type="image/png"
sizes="32x32"
href="/favicon-32x32.png"
/>
<link
rel="icon"
type="image/png"
sizes="16x16"
href="/favicon-16x16.png"
/>
<link rel="icon" type="image/x-icon" href="/favicon.ico" />
<link rel="stylesheet" type="text/css" href="/styles.css" />
</head>
<body>
<header>
<h1><a href="/">Kill the Newsletter!</a></h1>
<p>Convert email newsletters into Atom feeds</p>
<p>
<img
src="/logo.svg"
alt="Convert email newsletters into Atom feeds"
/>
</p>
</header>
<main>${content}</main>
<footer>
<p>
By <a href="https://leafac.com">Leandro Facchinetti</a> ·
<a href="https://github.com/leafac/kill-the-newsletter.com"
>Source</a
>
· <a href="${ISSUE_REPORT}">Report an Issue</a>
</p>
</footer>
2020-12-12 03:37:21 +01:00
<script src="/clipboard.min.js"></script>
<script src="/scripts.js"></script>
2020-08-10 01:30:12 +02:00
</body>
2020-08-09 23:53:32 +02:00
</html>
2020-08-10 01:30:12 +02:00
`.trim();
2020-07-23 17:11:41 +02:00
}
function newInbox(): string {
2020-08-10 01:30:12 +02:00
return html`
2020-07-23 17:11:41 +02:00
<form method="POST" action="/">
<p>
2020-08-10 01:30:12 +02:00
<input
type="text"
name="name"
placeholder="Newsletter Name…"
maxlength="500"
size="30"
required
/>
2020-07-23 17:11:41 +02:00
<button>Create Inbox</button>
</p>
</form>
2020-05-05 08:12:57 +02:00
`;
}
function created(identifier: string): string {
2020-08-10 01:30:12 +02:00
return html`
<p>
2020-12-12 03:37:21 +01:00
Sign up for the newsletter with<br /><code class="copyable"
>${feedEmail(identifier)}</code
>
2020-08-10 01:30:12 +02:00
</p>
<p>
2020-12-12 03:37:21 +01:00
Subscribe to the Atom feed at<br /><code class="copyable"
>${feedURL(identifier)}</code
>
2020-08-10 01:30:12 +02:00
</p>
<p>
Dont share these addresses.<br />They contain an identifier that other
people could use<br />to send you spam and to control your newsletter
subscriptions.
</p>
2020-05-05 08:12:57 +02:00
<p>Enjoy your readings!</p>
2020-08-10 01:30:12 +02:00
<p>
<a href="${BASE_URL}"><strong>Create Another Inbox</strong></a>
</p>
2020-05-05 08:12:57 +02:00
`.trim();
}
Don’t store alternates explicitly to save disk space Instead, fetch alternates from within the feed on the demand. This makes alternates marginally more expensive to retrieve, but saves on storage (which we were running out on the DigitalOcean deployment), and is a cleaner architecture overall: no need to keep the feeds and alternates in sync. Here’s a script to migrate existing feeds: // Call me with, for example: env "BASE_URL=https://kill-the-newsletter.com" npx ts-node migrate.ts // I’m idempotent and reentrant, you may call me multiple times if necessary (for example, if the migration fails in the middle for whatever reason) import { promises as fs } from "fs"; import path from "path"; import { JSDOM } from "jsdom"; const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000"; const FEEDS_PATH = "static/feeds"; (async () => { await fs.rmdir("static/alternate", { recursive: true }); for (const feedPath of (await fs.readdir(FEEDS_PATH)).filter((feedPath) => feedPath.endsWith(".xml") )) { const text = await fs.readFile(path.join(FEEDS_PATH, feedPath), "utf-8"); const feed = new JSDOM(text, { contentType: "text/xml" }); const document = feed.window.document; const feedIdentifier = document .querySelector("id")! .textContent!.split(":")[2]; for (const entry of document.querySelectorAll("entry")) { const entryIdentifier = entry .querySelector("id")! .textContent!.split(":")[2]; entry .querySelector(`link[rel="alternate"]`) ?.setAttribute( "href", `${BASE_URL}/alternate/${feedIdentifier}/${entryIdentifier}.html` ); } await fs.writeFile( path.join(FEEDS_PATH, feedPath), `<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim() ); console.log(feedIdentifier); } })();
2020-11-24 18:12:14 +01:00
function feed(identifier: string, name: string, initialEntry: string): string {
2020-08-10 01:30:12 +02:00
return html`
<?xml version="1.0" encoding="utf-8"?>
2020-05-05 08:12:57 +02:00
<feed xmlns="http://www.w3.org/2005/Atom">
2020-08-10 01:30:12 +02:00
<link
rel="self"
type="application/atom+xml"
href="${feedURL(identifier)}"
/>
<link rel="alternate" type="text/html" href="${BASE_URL}" />
2020-05-05 08:12:57 +02:00
<id>${urn(identifier)}</id>
<title>${name}</title>
2020-08-10 01:30:12 +02:00
<subtitle
>Kill the Newsletter! Inbox: ${feedEmail(identifier)}
${feedURL(identifier)}</subtitle
>
2020-05-05 08:12:57 +02:00
<updated>${now()}</updated>
<author><name>Kill the Newsletter!</name></author>
Don’t store alternates explicitly to save disk space Instead, fetch alternates from within the feed on the demand. This makes alternates marginally more expensive to retrieve, but saves on storage (which we were running out on the DigitalOcean deployment), and is a cleaner architecture overall: no need to keep the feeds and alternates in sync. Here’s a script to migrate existing feeds: // Call me with, for example: env "BASE_URL=https://kill-the-newsletter.com" npx ts-node migrate.ts // I’m idempotent and reentrant, you may call me multiple times if necessary (for example, if the migration fails in the middle for whatever reason) import { promises as fs } from "fs"; import path from "path"; import { JSDOM } from "jsdom"; const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000"; const FEEDS_PATH = "static/feeds"; (async () => { await fs.rmdir("static/alternate", { recursive: true }); for (const feedPath of (await fs.readdir(FEEDS_PATH)).filter((feedPath) => feedPath.endsWith(".xml") )) { const text = await fs.readFile(path.join(FEEDS_PATH, feedPath), "utf-8"); const feed = new JSDOM(text, { contentType: "text/xml" }); const document = feed.window.document; const feedIdentifier = document .querySelector("id")! .textContent!.split(":")[2]; for (const entry of document.querySelectorAll("entry")) { const entryIdentifier = entry .querySelector("id")! .textContent!.split(":")[2]; entry .querySelector(`link[rel="alternate"]`) ?.setAttribute( "href", `${BASE_URL}/alternate/${feedIdentifier}/${entryIdentifier}.html` ); } await fs.writeFile( path.join(FEEDS_PATH, feedPath), `<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim() ); console.log(feedIdentifier); } })();
2020-11-24 18:12:14 +01:00
${initialEntry}
2020-05-05 08:12:57 +02:00
</feed>
2020-08-10 01:30:12 +02:00
`.trim();
2020-05-05 08:12:57 +02:00
}
2020-07-14 19:42:41 +02:00
function entry(
Don’t store alternates explicitly to save disk space Instead, fetch alternates from within the feed on the demand. This makes alternates marginally more expensive to retrieve, but saves on storage (which we were running out on the DigitalOcean deployment), and is a cleaner architecture overall: no need to keep the feeds and alternates in sync. Here’s a script to migrate existing feeds: // Call me with, for example: env "BASE_URL=https://kill-the-newsletter.com" npx ts-node migrate.ts // I’m idempotent and reentrant, you may call me multiple times if necessary (for example, if the migration fails in the middle for whatever reason) import { promises as fs } from "fs"; import path from "path"; import { JSDOM } from "jsdom"; const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000"; const FEEDS_PATH = "static/feeds"; (async () => { await fs.rmdir("static/alternate", { recursive: true }); for (const feedPath of (await fs.readdir(FEEDS_PATH)).filter((feedPath) => feedPath.endsWith(".xml") )) { const text = await fs.readFile(path.join(FEEDS_PATH, feedPath), "utf-8"); const feed = new JSDOM(text, { contentType: "text/xml" }); const document = feed.window.document; const feedIdentifier = document .querySelector("id")! .textContent!.split(":")[2]; for (const entry of document.querySelectorAll("entry")) { const entryIdentifier = entry .querySelector("id")! .textContent!.split(":")[2]; entry .querySelector(`link[rel="alternate"]`) ?.setAttribute( "href", `${BASE_URL}/alternate/${feedIdentifier}/${entryIdentifier}.html` ); } await fs.writeFile( path.join(FEEDS_PATH, feedPath), `<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim() ); console.log(feedIdentifier); } })();
2020-11-24 18:12:14 +01:00
feedIdentifier: string,
entryIdentifier: string,
2020-07-14 19:42:41 +02:00
title: string,
author: string,
2020-07-23 17:11:41 +02:00
content: string
2020-07-14 19:42:41 +02:00
): string {
2020-08-10 01:30:12 +02:00
return html`
2020-05-05 08:12:57 +02:00
<entry>
Don’t store alternates explicitly to save disk space Instead, fetch alternates from within the feed on the demand. This makes alternates marginally more expensive to retrieve, but saves on storage (which we were running out on the DigitalOcean deployment), and is a cleaner architecture overall: no need to keep the feeds and alternates in sync. Here’s a script to migrate existing feeds: // Call me with, for example: env "BASE_URL=https://kill-the-newsletter.com" npx ts-node migrate.ts // I’m idempotent and reentrant, you may call me multiple times if necessary (for example, if the migration fails in the middle for whatever reason) import { promises as fs } from "fs"; import path from "path"; import { JSDOM } from "jsdom"; const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000"; const FEEDS_PATH = "static/feeds"; (async () => { await fs.rmdir("static/alternate", { recursive: true }); for (const feedPath of (await fs.readdir(FEEDS_PATH)).filter((feedPath) => feedPath.endsWith(".xml") )) { const text = await fs.readFile(path.join(FEEDS_PATH, feedPath), "utf-8"); const feed = new JSDOM(text, { contentType: "text/xml" }); const document = feed.window.document; const feedIdentifier = document .querySelector("id")! .textContent!.split(":")[2]; for (const entry of document.querySelectorAll("entry")) { const entryIdentifier = entry .querySelector("id")! .textContent!.split(":")[2]; entry .querySelector(`link[rel="alternate"]`) ?.setAttribute( "href", `${BASE_URL}/alternate/${feedIdentifier}/${entryIdentifier}.html` ); } await fs.writeFile( path.join(FEEDS_PATH, feedPath), `<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim() ); console.log(feedIdentifier); } })();
2020-11-24 18:12:14 +01:00
<id>${urn(entryIdentifier)}</id>
2020-05-05 08:12:57 +02:00
<title>${title}</title>
<author><name>${author}</name></author>
<updated>${now()}</updated>
2020-08-10 01:30:12 +02:00
<link
rel="alternate"
type="text/html"
Don’t store alternates explicitly to save disk space Instead, fetch alternates from within the feed on the demand. This makes alternates marginally more expensive to retrieve, but saves on storage (which we were running out on the DigitalOcean deployment), and is a cleaner architecture overall: no need to keep the feeds and alternates in sync. Here’s a script to migrate existing feeds: // Call me with, for example: env "BASE_URL=https://kill-the-newsletter.com" npx ts-node migrate.ts // I’m idempotent and reentrant, you may call me multiple times if necessary (for example, if the migration fails in the middle for whatever reason) import { promises as fs } from "fs"; import path from "path"; import { JSDOM } from "jsdom"; const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000"; const FEEDS_PATH = "static/feeds"; (async () => { await fs.rmdir("static/alternate", { recursive: true }); for (const feedPath of (await fs.readdir(FEEDS_PATH)).filter((feedPath) => feedPath.endsWith(".xml") )) { const text = await fs.readFile(path.join(FEEDS_PATH, feedPath), "utf-8"); const feed = new JSDOM(text, { contentType: "text/xml" }); const document = feed.window.document; const feedIdentifier = document .querySelector("id")! .textContent!.split(":")[2]; for (const entry of document.querySelectorAll("entry")) { const entryIdentifier = entry .querySelector("id")! .textContent!.split(":")[2]; entry .querySelector(`link[rel="alternate"]`) ?.setAttribute( "href", `${BASE_URL}/alternate/${feedIdentifier}/${entryIdentifier}.html` ); } await fs.writeFile( path.join(FEEDS_PATH, feedPath), `<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim() ); console.log(feedIdentifier); } })();
2020-11-24 18:12:14 +01:00
href="${alternateURL(feedIdentifier, entryIdentifier)}"
2020-08-10 01:30:12 +02:00
/>
2020-05-05 08:12:57 +02:00
<content type="html">${content}</content>
</entry>
`.trim();
}
function createIdentifier(): string {
return cryptoRandomString({
length: 20,
characters: "1234567890qwertyuiopasdfghjklzxcvbnm",
});
}
function now(): string {
return new Date().toISOString();
}
Don’t store alternates explicitly to save disk space Instead, fetch alternates from within the feed on the demand. This makes alternates marginally more expensive to retrieve, but saves on storage (which we were running out on the DigitalOcean deployment), and is a cleaner architecture overall: no need to keep the feeds and alternates in sync. Here’s a script to migrate existing feeds: // Call me with, for example: env "BASE_URL=https://kill-the-newsletter.com" npx ts-node migrate.ts // I’m idempotent and reentrant, you may call me multiple times if necessary (for example, if the migration fails in the middle for whatever reason) import { promises as fs } from "fs"; import path from "path"; import { JSDOM } from "jsdom"; const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000"; const FEEDS_PATH = "static/feeds"; (async () => { await fs.rmdir("static/alternate", { recursive: true }); for (const feedPath of (await fs.readdir(FEEDS_PATH)).filter((feedPath) => feedPath.endsWith(".xml") )) { const text = await fs.readFile(path.join(FEEDS_PATH, feedPath), "utf-8"); const feed = new JSDOM(text, { contentType: "text/xml" }); const document = feed.window.document; const feedIdentifier = document .querySelector("id")! .textContent!.split(":")[2]; for (const entry of document.querySelectorAll("entry")) { const entryIdentifier = entry .querySelector("id")! .textContent!.split(":")[2]; entry .querySelector(`link[rel="alternate"]`) ?.setAttribute( "href", `${BASE_URL}/alternate/${feedIdentifier}/${entryIdentifier}.html` ); } await fs.writeFile( path.join(FEEDS_PATH, feedPath), `<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim() ); console.log(feedIdentifier); } })();
2020-11-24 18:12:14 +01:00
function feedFilePath(identifier: string): string {
2020-05-05 08:12:57 +02:00
return `static/feeds/${identifier}.xml`;
}
function feedURL(identifier: string): string {
return `${BASE_URL}/feeds/${identifier}.xml`;
}
function feedEmail(identifier: string): string {
return `${identifier}@${EMAIL_DOMAIN}`;
}
Don’t store alternates explicitly to save disk space Instead, fetch alternates from within the feed on the demand. This makes alternates marginally more expensive to retrieve, but saves on storage (which we were running out on the DigitalOcean deployment), and is a cleaner architecture overall: no need to keep the feeds and alternates in sync. Here’s a script to migrate existing feeds: // Call me with, for example: env "BASE_URL=https://kill-the-newsletter.com" npx ts-node migrate.ts // I’m idempotent and reentrant, you may call me multiple times if necessary (for example, if the migration fails in the middle for whatever reason) import { promises as fs } from "fs"; import path from "path"; import { JSDOM } from "jsdom"; const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000"; const FEEDS_PATH = "static/feeds"; (async () => { await fs.rmdir("static/alternate", { recursive: true }); for (const feedPath of (await fs.readdir(FEEDS_PATH)).filter((feedPath) => feedPath.endsWith(".xml") )) { const text = await fs.readFile(path.join(FEEDS_PATH, feedPath), "utf-8"); const feed = new JSDOM(text, { contentType: "text/xml" }); const document = feed.window.document; const feedIdentifier = document .querySelector("id")! .textContent!.split(":")[2]; for (const entry of document.querySelectorAll("entry")) { const entryIdentifier = entry .querySelector("id")! .textContent!.split(":")[2]; entry .querySelector(`link[rel="alternate"]`) ?.setAttribute( "href", `${BASE_URL}/alternate/${feedIdentifier}/${entryIdentifier}.html` ); } await fs.writeFile( path.join(FEEDS_PATH, feedPath), `<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim() ); console.log(feedIdentifier); } })();
2020-11-24 18:12:14 +01:00
function alternatePath(
feedIdentifier: string,
entryIdentifier: string
): string {
return `/alternate/${feedIdentifier}/${entryIdentifier}.html`;
2020-07-14 19:42:41 +02:00
}
Don’t store alternates explicitly to save disk space Instead, fetch alternates from within the feed on the demand. This makes alternates marginally more expensive to retrieve, but saves on storage (which we were running out on the DigitalOcean deployment), and is a cleaner architecture overall: no need to keep the feeds and alternates in sync. Here’s a script to migrate existing feeds: // Call me with, for example: env "BASE_URL=https://kill-the-newsletter.com" npx ts-node migrate.ts // I’m idempotent and reentrant, you may call me multiple times if necessary (for example, if the migration fails in the middle for whatever reason) import { promises as fs } from "fs"; import path from "path"; import { JSDOM } from "jsdom"; const BASE_URL = process.env.BASE_URL ?? "http://localhost:8000"; const FEEDS_PATH = "static/feeds"; (async () => { await fs.rmdir("static/alternate", { recursive: true }); for (const feedPath of (await fs.readdir(FEEDS_PATH)).filter((feedPath) => feedPath.endsWith(".xml") )) { const text = await fs.readFile(path.join(FEEDS_PATH, feedPath), "utf-8"); const feed = new JSDOM(text, { contentType: "text/xml" }); const document = feed.window.document; const feedIdentifier = document .querySelector("id")! .textContent!.split(":")[2]; for (const entry of document.querySelectorAll("entry")) { const entryIdentifier = entry .querySelector("id")! .textContent!.split(":")[2]; entry .querySelector(`link[rel="alternate"]`) ?.setAttribute( "href", `${BASE_URL}/alternate/${feedIdentifier}/${entryIdentifier}.html` ); } await fs.writeFile( path.join(FEEDS_PATH, feedPath), `<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`.trim() ); console.log(feedIdentifier); } })();
2020-11-24 18:12:14 +01:00
function alternateURL(feedIdentifier: string, entryIdentifier: string): string {
return `${BASE_URL}${alternatePath(feedIdentifier, entryIdentifier)}`;
2020-07-14 19:42:41 +02:00
}
2020-05-05 08:12:57 +02:00
function urn(identifier: string): string {
return `urn:kill-the-newsletter:${identifier}`;
}
function X(string: string): string {
return entities.encodeXML(sanitizeXMLString.sanitize(string));
}
function H(string: string): string {
return entities.encodeHTML(sanitizeXMLString.sanitize(string));
}