Fixes & refactorings

This commit is contained in:
Leandro Facchinetti 2020-07-23 16:11:41 +01:00
parent 0402f641ff
commit 80ae8fab52
5 changed files with 289 additions and 205 deletions

178
index.ts
View File

@ -1,10 +1,11 @@
import express from "express";
import { SMTPServer } from "smtp-server";
import mailparser from "mailparser";
import { promises as fs } from "fs";
import * as entities from "entities";
import { JSDOM } from "jsdom";
import * as sanitizeXMLString from "sanitize-xml-string";
import * as entities from "entities";
import R from "escape-string-regexp";
import { JSDOM } from "jsdom";
import { promises as fs } from "fs";
import writeFileAtomic from "write-file-atomic";
import cryptoRandomString from "crypto-random-string";
@ -18,28 +19,26 @@ export const ISSUE_REPORT =
export const webServer = express()
.use(express.static("static"))
.use(express.urlencoded({ extended: true }))
.get("/", (req, res) =>
res.send(
layout(`
<form method="POST" action="/">
<p>
<input type="text" name="name" placeholder="Newsletter Name…" maxlength="500" size="30" required>
<button>Create Inbox</button>
</p>
</form>
`)
)
)
.get("/", (req, res) => res.send(layout(newInbox())))
.post("/", async (req, res, next) => {
try {
const { name } = req.body;
const identifier = createIdentifier();
await writeFileAtomic(alternatePath(identifier), created(identifier));
await writeFileAtomic(feedPath(identifier), feed(X(name), identifier));
await writeFileAtomic(feedPath(identifier), feed(identifier, X(name)));
const renderedCreated = created(identifier);
await addEntryToFeed(
identifier,
entry(
createIdentifier(),
`${X(name)}” Inbox Created`,
"Kill the Newsletter!",
X(renderedCreated)
)
);
res.send(
layout(`
<p><strong>${H(name)} Inbox Created</strong></p>
${created(identifier)}
${renderedCreated}
`)
);
} catch (error) {
@ -47,14 +46,6 @@ export const webServer = express()
next(error);
}
})
.get("/alternate", (req, res) =>
res.send(
layout(`
<p>Typically each entry in a feed includes a link<br>to an online version of the same content,<br>but the content from the entries in a <strong>Kill the Newsletter!</strong><br>feed come from emailsan online version may not even exist<br>so youre reading this instead.</p>
<p><a href="${BASE_URL}"><strong>Create an Inbox</strong></a></p>
`)
)
)
.listen(WEB_PORT);
export const emailServer = new SMTPServer({
@ -62,57 +53,30 @@ export const emailServer = new SMTPServer({
async onData(stream, session, callback) {
try {
const email = await mailparser.simpleParser(stream);
const identifier = createIdentifier();
const content =
typeof email.html === "string" ? email.html : email.textAsHtml ?? "";
await writeFileAtomic(alternatePath(identifier), content);
const newEntry = entry(
X(email.subject ?? ""),
X(email.from?.text ?? ""),
X(content),
identifier
);
for (const { address } of session.envelope.rcptTo) {
const match = address.match(
new RegExp(`^(?<identifier>\\w+)@${EMAIL_DOMAIN}$`)
new RegExp(`^(?<identifier>\\w+)@${R(EMAIL_DOMAIN)}$`)
);
if (match?.groups === undefined) continue;
const path = feedPath(match.groups.identifier);
const xmlText = await fs.readFile(path, "utf8").catch(() => null);
if (xmlText === null) continue;
const xml = new JSDOM(xmlText, { contentType: "text/xml" });
const document = xml.window.document;
const updated = document.querySelector("feed > updated");
if (updated === null)
throw new Error(`Cant find updated field in feed at ${path}.`);
updated.textContent = now();
const firstEntry = document.querySelector("feed > entry:first-of-type");
if (firstEntry !== null)
firstEntry.insertAdjacentHTML("beforebegin", newEntry);
else
document
.querySelector("feed")!
.insertAdjacentHTML("beforeend", newEntry);
while (
document.querySelector("feed > entry") !== null &&
xml.serialize().length > 500_000
) {
const lastEntry = document.querySelector("feed > entry:last-of-type");
const identifier = removeUrn(
lastEntry!.querySelector("id")!.textContent as string
);
await fs.unlink(alternatePath(identifier));
lastEntry!.remove();
}
await writeFileAtomic(
path,
`<?xml version="1.0" encoding="utf-8"?>${xml.serialize()}`
);
const identifier = match.groups.identifier.toLowerCase();
await addEntryToFeed(
identifier,
entry(
createIdentifier(),
X(email.subject ?? ""),
X(email.from?.text ?? ""),
X(content)
)
).catch((error) => {
console.error(error);
});
}
callback();
} catch (error) {
console.error(
`Error receiving email: ${JSON.stringify(session, null, 2)}`
`Failed to receive message: ${JSON.stringify(session, null, 2)}`
);
console.error(error);
stream.resume();
@ -121,10 +85,49 @@ export const emailServer = new SMTPServer({
},
}).listen(EMAIL_PORT);
async function addEntryToFeed(
identifier: string,
entry: string
): Promise<void> {
const path = feedPath(identifier);
let text;
try {
text = await fs.readFile(path, "utf8");
} catch {
return;
}
const feed = new JSDOM(text, { contentType: "text/xml" });
const document = feed.window.document;
const updated = document.querySelector("feed > updated");
if (updated === null) throw new Error(`Field updated not found: ${path}`);
updated.textContent = now();
const firstEntry = document.querySelector("feed > entry:first-of-type");
if (firstEntry === null)
document.querySelector("feed")!.insertAdjacentHTML("beforeend", entry);
else firstEntry.insertAdjacentHTML("beforebegin", entry);
const entryDocument = JSDOM.fragment(entry);
await writeFileAtomic(
alternatePath(getEntryIdentifier(entryDocument)),
entities.decodeXML(entryDocument.querySelector("content")!.textContent!)
);
while (feed.serialize().length > 500_000) {
const entry = document.querySelector("feed > entry:last-of-type");
if (entry === null) break;
entry.remove();
const path = alternatePath(getEntryIdentifier(entry));
await fs.unlink(path).catch(() => {
console.error(`File not found: ${path}`);
});
}
await writeFileAtomic(
path,
`<?xml version="1.0" encoding="utf-8"?>${feed.serialize()}`
);
}
function layout(content: string): string {
return `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Kill the Newsletter!</title>
@ -134,8 +137,6 @@ function layout(content: string): string {
<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
<link rel="icon" type="image/x-icon" href="/favicon.ico">
<link rel="stylesheet" type="text/css" href="/styles.css">
</head>
<body>
<header>
<h1><a href="/">Kill the Newsletter!</a></h1>
<p>Convert email newsletters into Atom feeds</p>
@ -143,8 +144,17 @@ function layout(content: string): string {
</header>
<main>${content}</main>
<footer><p>By <a href="https://www.leafac.com">Leandro Facchinetti</a> · <a href="https://github.com/leafac/www.kill-the-newsletter.com">Source</a> · <a href="${ISSUE_REPORT}">Report an Issue</a></p></footer>
</body>
</html>
`;
}
function newInbox(): string {
return `
<form method="POST" action="/">
<p>
<input type="text" name="name" placeholder="Newsletter Name…" maxlength="500" size="30" required>
<button>Create Inbox</button>
</p>
</form>
`;
}
@ -160,15 +170,13 @@ function created(identifier: string): string {
`.trim();
}
function feed(name: string, identifier: string): string {
function feed(identifier: string, name: string): string {
return `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<link rel="self" type="application/atom+xml" href="${feedURL(
identifier
)}"/>
<link rel="alternate" type="text/html" href="${alternateURL(
identifier
)}"/>
<link rel="alternate" type="text/html" href="${BASE_URL}"/>
<id>${urn(identifier)}</id>
<title>${name}</title>
<subtitle>Kill the Newsletter! Inbox: ${feedEmail(
@ -176,21 +184,15 @@ function feed(name: string, identifier: string): string {
)} ${feedURL(identifier)}</subtitle>
<updated>${now()}</updated>
<author><name>Kill the Newsletter!</name></author>
${entry(
`${name}” Inbox Created`,
"Kill the Newsletter!",
X(created(identifier)),
identifier
)}
</feed>
`;
}
function entry(
identifier: string,
title: string,
author: string,
content: string,
identifier: string
content: string
): string {
return `
<entry>
@ -213,6 +215,10 @@ function createIdentifier(): string {
});
}
function getEntryIdentifier(entry: ParentNode): string {
return entry.querySelector("id")!.textContent!.split(":")[2];
}
function now(): string {
return new Date().toISOString();
}
@ -241,10 +247,6 @@ function urn(identifier: string): string {
return `urn:kill-the-newsletter:${identifier}`;
}
function removeUrn(identifier: string): string {
return identifier.replace(urn(""), "");
}
function X(string: string): string {
return entities.encodeXML(sanitizeXMLString.sanitize(string));
}

51
package-lock.json generated
View File

@ -223,6 +223,14 @@
"ansi-styles": "^3.2.1",
"escape-string-regexp": "^1.0.5",
"supports-color": "^5.3.0"
},
"dependencies": {
"escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=",
"dev": true
}
}
},
"color-convert": {
@ -762,6 +770,13 @@
"ansi-styles": "^3.2.1",
"escape-string-regexp": "^1.0.5",
"supports-color": "^5.3.0"
},
"dependencies": {
"escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ="
}
}
},
"color-convert": {
@ -1714,6 +1729,14 @@
"ansi-styles": "^3.2.1",
"escape-string-regexp": "^1.0.5",
"supports-color": "^5.3.0"
},
"dependencies": {
"escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=",
"dev": true
}
}
},
"color-convert": {
@ -1995,6 +2018,13 @@
"has-ansi": "^2.0.0",
"strip-ansi": "^3.0.0",
"supports-color": "^2.0.0"
},
"dependencies": {
"escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ="
}
}
},
"strip-ansi": {
@ -2556,9 +2586,9 @@
"integrity": "sha1-9EvaEtRbvfnLf4Yu5+SCez3TIlQ="
},
"escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ="
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
"integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA=="
},
"escodegen": {
"version": "1.14.1",
@ -6327,6 +6357,13 @@
"ansi-styles": "^3.2.1",
"escape-string-regexp": "^1.0.5",
"supports-color": "^5.3.0"
},
"dependencies": {
"escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ="
}
}
},
"color-convert": {
@ -8182,6 +8219,14 @@
"ansi-styles": "^3.2.1",
"escape-string-regexp": "^1.0.5",
"supports-color": "^5.3.0"
},
"dependencies": {
"escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=",
"dev": true
}
}
},
"ci-info": {

View File

@ -13,6 +13,7 @@
"caddy-npm": "^2.1.1",
"crypto-random-string": "^3.2.0",
"entities": "^2.0.0",
"escape-string-regexp": "^4.0.0",
"express": "^4.17.1",
"jsdom": "^16.2.2",
"mailparser": "^2.7.7",

View File

@ -2,8 +2,7 @@
@import "node_modules/typeface-pt-mono/index.css";
body {
font-family: "PT Sans", sans-serif;
line-height: 1.5;
font: 16px/1.5 "PT Sans", sans-serif;
text-align: center;
padding: 0 1em;
margin: 2em 0;

217
test.ts
View File

@ -1,4 +1,4 @@
import { webServer, emailServer, WEB_PORT, EMAIL_PORT, EMAIL_DOMAIN } from ".";
import { webServer, emailServer, BASE_URL, EMAIL_DOMAIN, EMAIL_PORT } from ".";
import nodemailer from "nodemailer";
import axios from "axios";
import qs from "qs";
@ -6,7 +6,18 @@ import { JSDOM } from "jsdom";
test("create feed", async () => {
const identifier = await createFeed();
expect(await getFeed(identifier)).toMatch("My Feed");
const feed = await getFeed(identifier);
const entry = feed.querySelector("feed > entry:first-of-type")!;
const alternate = await getAlternate(
entry.querySelector("link")!.getAttribute("href")!
);
expect(feed.querySelector("feed > title")!.textContent).toBe("My Feed");
expect(entry.querySelector("title")!.textContent).toBe(
"“My Feed” Inbox Created"
);
expect(alternate.querySelector("p")!.textContent).toMatch(
"Sign up for the newsletter with"
);
});
describe("receive email", () => {
@ -20,8 +31,8 @@ describe("receive email", () => {
html: "<p>HTML content</p>",
});
const after = await getFeed(identifier);
expect(after.match(/<updated>(.*)<\/updated>/)![1]).not.toMatch(
before.match(/<updated>(.*)<\/updated>/)![1]
expect(after.querySelector("feed > updated")!.textContent).not.toBe(
before.querySelector("feed > updated")!.textContent
);
});
@ -34,9 +45,16 @@ describe("receive email", () => {
html: "<p>HTML content</p>",
});
const feed = await getFeed(identifier);
expect(feed).toMatch("publisher@example.com");
expect(feed).toMatch("New Message");
expect(feed).toMatch("HTML content");
const entry = feed.querySelector("feed > entry:first-of-type")!;
const alternate = await getAlternate(
entry.querySelector("link")!.getAttribute("href")!
);
expect(entry.querySelector("author > name")!.textContent).toBe(
"publisher@example.com"
);
expect(entry.querySelector("title")!.textContent).toBe("New Message");
expect(entry.querySelector("content")!.textContent).toMatch("HTML content");
expect(alternate.querySelector("p")!.textContent).toMatch("HTML content");
});
test("text content", async () => {
@ -48,7 +66,12 @@ describe("receive email", () => {
text: "TEXT content",
});
const feed = await getFeed(identifier);
expect(feed).toMatch("TEXT content");
const entry = feed.querySelector("feed > entry:first-of-type")!;
const alternate = await getAlternate(
entry.querySelector("link")!.getAttribute("href")!
);
expect(entry.querySelector("content")!.textContent).toMatch("TEXT content");
expect(alternate.querySelector("p")!.textContent).toMatch("TEXT content");
});
test("rich text content", async () => {
@ -60,8 +83,13 @@ describe("receive email", () => {
text: "TEXT content\n\nhttps://www.leafac.com\n\nMore text",
});
const feed = await getFeed(identifier);
expect(feed).toMatch("TEXT content");
expect(feed).toMatch(`href="https://www.leafac.com"`);
const entry = feed.querySelector("feed > entry:first-of-type")!;
const alternate = await getAlternate(
entry.querySelector("link")!.getAttribute("href")!
);
expect(alternate.querySelector("a")!.getAttribute("href")).toBe(
"https://www.leafac.com"
);
});
test("invalid XML character in HTML", async () => {
@ -73,7 +101,11 @@ describe("receive email", () => {
html: "<p>Invalid XML character (backspace): |\b|💩</p>",
});
const feed = await getFeed(identifier);
expect(feed).toMatch("Invalid XML character (backspace): ||💩");
const entry = feed.querySelector("feed > entry:first-of-type")!;
expect(entry.querySelector("content")!.textContent).toMatchInlineSnapshot(`
"<p>Invalid XML character (backspace): ||💩</p>
"
`);
});
test("invalid XML character in text", async () => {
@ -85,68 +117,26 @@ describe("receive email", () => {
text: "Invalid XML character (backspace): |\b|💩",
});
const feed = await getFeed(identifier);
expect(feed).toMatch(
"Invalid XML character (backspace): |&amp;#x8;|&amp;#x1F4A9;"
const entry = feed.querySelector("feed > entry:first-of-type")!;
expect(entry.querySelector("content")!.textContent).toMatchInlineSnapshot(
`"<p>Invalid XML character (backspace): |&#x8;|&#x1F4A9;</p>"`
);
});
test("missing content", async () => {
test("missing from", async () => {
const identifier = await createFeed();
await emailClient.sendMail({
from: "publisher@example.com",
to: `${identifier}@${EMAIL_DOMAIN}`,
subject: "New Message",
});
const feed = await getFeed(identifier);
expect(feed).toMatch("New Message");
});
test("missing subject", async () => {
const identifier = await createFeed();
await emailClient.sendMail({
from: "publisher@example.com",
to: `${identifier}@${EMAIL_DOMAIN}`,
html: "<p>HTML content</p>",
});
const feed = await getFeed(identifier);
expect(feed).toMatch("HTML content");
const entry = feed.querySelector("feed > entry:first-of-type")!;
expect(entry.querySelector("author > name")!.textContent).toBe("");
expect(entry.querySelector("title")!.textContent).toBe("New Message");
});
test("truncation", async () => {
const identifier = await createFeed();
for (const repetition of [...new Array(4).keys()])
await emailClient.sendMail({
from: "publisher@example.com",
to: `${identifier}@${EMAIL_DOMAIN}`,
subject: "New Message",
text: `REPETITION ${repetition} `.repeat(10_000),
});
const feed = await getFeed(identifier);
expect(feed).toMatch("REPETITION 3");
expect(feed).not.toMatch("REPETITION 0");
});
test("too big entry", async () => {
const identifier = await createFeed();
await emailClient.sendMail({
from: "publisher@example.com",
to: `${identifier}@${EMAIL_DOMAIN}`,
subject: "New Message",
text: `TOO BIG`.repeat(100_000),
});
expect(await getFeed(identifier)).not.toMatch("<entry>");
await emailClient.sendMail({
from: "publisher@example.com",
to: `${identifier}@${EMAIL_DOMAIN}`,
subject: "New Message",
text: `NORMAL SIZE`,
});
const feed = await getFeed(identifier);
expect(feed).toMatch("<entry>");
expect(feed).toMatch("NORMAL SIZE");
});
test("nonexistent address", async () => {
test("nonexistent to", async () => {
await emailClient.sendMail({
from: "publisher@example.com",
to: `nonexistent@${EMAIL_DOMAIN}`,
@ -155,67 +145,114 @@ describe("receive email", () => {
});
});
test("missing from", async () => {
test("missing subject", async () => {
const identifier = await createFeed();
await emailClient.sendMail({
from: "publisher@example.com",
to: `${identifier}@${EMAIL_DOMAIN}`,
subject: "New Message",
html: "<p>HTML content</p>",
});
const feed = await getFeed(identifier);
expect(feed).toMatch("HTML content");
});
const entry = feed.querySelector("feed > entry:first-of-type")!;
expect(entry.querySelector("title")!.textContent).toBe("");
expect(entry.querySelector("author > name")!.textContent).toBe(
"publisher@example.com"
);
});
describe("alternate", () => {
test("HTML content", async () => {
test("missing content", async () => {
const identifier = await createFeed();
await emailClient.sendMail({
from: "publisher@example.com",
to: `${identifier}@${EMAIL_DOMAIN}`,
subject: "New Message",
html: "<p>HTML content</p>",
});
const feed = await getFeed(identifier);
const xml = new JSDOM(feed, { contentType: "text/xml" });
const document = xml.window.document;
const href = document
.querySelector("feed > entry link")!
.getAttribute("href") as string;
const alternate = await getAlternate(href);
expect(feed).toMatch("publisher@example.com");
expect(feed).toMatch("New Message");
expect(feed).toMatch("HTML content");
const entry = feed.querySelector("feed > entry:first-of-type")!;
expect(entry.querySelector("content")!.textContent!.trim()).toBe("");
expect(entry.querySelector("title")!.textContent).toBe("New Message");
});
test("truncation", async () => {
const identifier = await createFeed();
const alternatesURLs = new Array<string>();
for (const repetition of [...new Array(4).keys()]) {
await emailClient.sendMail({
from: "publisher@example.com",
to: `${identifier}@${EMAIL_DOMAIN}`,
subject: "New Message",
text: `REPETITION ${repetition} `.repeat(10_000),
});
const feed = await getFeed(identifier);
const entry = feed.querySelector("feed > entry:first-of-type")!;
alternatesURLs.push(entry.querySelector("link")!.getAttribute("href")!);
}
const feed = await getFeed(identifier);
expect(
feed.querySelector("entry:first-of-type > content")!.textContent
).toMatch("REPETITION 3");
expect(
feed.querySelector("entry:last-of-type > content")!.textContent
).toMatch("REPETITION 1");
expect((await getAlternate(alternatesURLs[3]!)).textContent).toMatch(
"REPETITION 3"
);
await expect(getAlternate(alternatesURLs[0]!)).rejects.toThrowError();
});
test("too big entry", async () => {
const identifier = await createFeed();
await emailClient.sendMail({
from: "publisher@example.com",
to: `${identifier}@${EMAIL_DOMAIN}`,
subject: "New Message",
text: "TOO BIG".repeat(100_000),
});
expect((await getFeed(identifier)).querySelector("entry")).toBeNull();
await emailClient.sendMail({
from: "publisher@example.com",
to: `${identifier}@${EMAIL_DOMAIN}`,
subject: "New Message",
text: `NORMAL SIZE`,
});
expect(
(await getFeed(identifier)).querySelector("entry > content")!.textContent
).toMatchInlineSnapshot(`"<p>NORMAL SIZE</p>"`);
});
});
const webClient = axios.create({
baseURL: BASE_URL,
});
const emailClient = nodemailer.createTransport(
`smtp://${EMAIL_DOMAIN}:${EMAIL_PORT}`
);
afterAll(() => {
webServer.close();
emailServer.close();
});
const webClient = axios.create({
baseURL: `http://localhost:${WEB_PORT}`,
});
const emailClient = nodemailer.createTransport(
`smtp://localhost:${EMAIL_PORT}`
);
async function createFeed(): Promise<string> {
return (
return JSDOM.fragment(
(
await webClient.post(
"/",
qs.stringify({
name: "My Feed",
})
)
).data.match(/(\w{20}).xml/)![1];
).data
)
.querySelector("code")!
.textContent!.split("@")[0];
}
async function getFeed(identifier: string): Promise<string> {
return (await webClient.get(`/feeds/${identifier}.xml`)).data;
async function getFeed(identifier: string): Promise<Document> {
return new JSDOM((await webClient.get(`/feeds/${identifier}.xml`)).data, {
contentType: "text/xml",
}).window.document;
}
async function getAlternate(url: string): Promise<string> {
return (await webClient.get(url)).data;
async function getAlternate(url: string): Promise<DocumentFragment> {
return JSDOM.fragment((await webClient.get(url)).data);
}