Files
whenwe2/to-ghost.js
Nick Stokoe b52a691246 to-ghost.js - scrappy script for importing whenwe-tidied.json
whenwe-tidied.json is the drupal export, with some alterations to
de-duplicate images and fix a few other things.
2026-01-25 22:55:28 +00:00

329 lines
8.5 KiB
JavaScript

const data = require("./whenwe-tidied.json");
const fs = require("fs");
//const sh = require("sanitize-html");
const {
NodeHtmlMarkdown,
NodeHtmlMarkdownOptions,
} = require("node-html-markdown");
const nhm = new NodeHtmlMarkdown({
useLinkReferenceDefinitions: true,
useInlineLinks: true,
});
function date(datestr) {
if (datestr) {
const [Y, M, D, h, m, s] = datestr.split(/[^0-9]/);
return new Date(Date.UTC.call(null, Y, M - 1, D, h, m, s)).toISOString();
} else return "";
}
const tag_index = {
// 1: { id: 1, name: "", slug: "" },
};
const user_index = {
"6975f732f0a00f00018346d1": {
id: "6975f732f0a00f00018346d1",
name: "Janet Woolley",
slug: "janet",
},
};
const meta_index = {};
const author_index = {};
const ghost_data = {
meta: {
exported_on: new Date().valueOf(),
version: "5.0.0", // Ghost version the import is valid for
},
data: {
posts: [],
// Optionally define post metadata
posts_meta: [
/*
{
post_id: "1234", // This must be the same as the post it references
feature_image_alt: "A group of people waving at the camera",
feature_image_caption: "The team says hello!",
},
*/
],
// Define the tags
tags: [],
// Relate posts to tags
posts_tags: [],
// Define the users
/*
users: [
{
id: "5678", // Unique ID for this author
name: "Jo Bloggs",
slug: "jo-blogs",
email: "jo@example.com",
profile_image: "/content/images/2025/scenic-background.jpg",
roles: [
"Contributor", // Contributor | Author| Editor | Administrator
],
},
],
*/
// Relate posts to authors
},
};
function convertCase(
str, //: string,
format, // 'camel' | 'pascal' | 'snake' | 'kebab'
) {
const sanitiseString = (str) =>
str
.trim()
.replace(/[^a-zA-Z0-9\s]/g, "")
.replace(/\s+/g, " ");
const formatted = sanitiseString(str);
switch (format) {
case "camel":
return formatted
.toLowerCase()
.replace(/ (\\w)/g, (_, char) => char.toUpperCase());
case "pascal":
return formatted.replace(/(?:^| )(\w)/g, (_, char) => char.toUpperCase());
case "snake":
return formatted.toLowerCase().replace(/\s+/g, "_");
case "kebab":
return formatted.toLowerCase().replace(/\s+/g, "-");
default:
throw new Error("Unsupported format type");
}
}
function mk_tag(name, id, slug) {
id ??= convertCase(name, "kebab");
slug ??= id;
tag_index[id] ??= { id, name, slug };
return id;
}
function mk_author(post_id, author_id) {
author_index[post_id] = { post_id, author_id };
}
function mk_meta(post_id, feature_image_caption, feature_image_alt) {
if (feature_image_alt || feature_image_caption)
meta_index[post_id] = {
feature_image_alt,
feature_image_caption,
post_id,
};
}
function img_path(filename) {
if (!filename) throw new Error("No filename");
return "content/images/" + filename.trim();
}
function img(filename, height, width, title, alt) {
if (!filename) return undefined;
if (typeof filename !== "string")
throw new Error("not a string: " + filename);
return {
row: 0,
src: img_path(filename),
width: width ?? 100,
height: height ?? 100,
filename: filename,
};
}
function sanitize(body) {
return nhm
.translate(body)
.replaceAll(/https?:\/[^"]*?\/public\//g, "content/images/2026/01/")
.replaceAll(/[?]itok=[A-Za-z0-9_-]*/g, "");
}
function sanitize_html(body) {
return body
.replaceAll(/https?:\/[^"]*?\/public\//g, "content/images/2026/01/")
.replaceAll(/[?]itok=[^ ]*/g, "");
}
for (const node of data) {
let body = sanitize_html(node.body.und[0].safe_value);
const id = Number(node.nid);
const lexical = {
root: {
children: [],
direction: "ltr",
format: "",
indent: 0,
type: "root",
version: 1,
},
};
let author = node.field_original_author?.und?.[0]?.value;
let feature_image = node.field_featured_image?.und?.[0]?.filename;
let tags = [];
mk_meta(
id,
node.field_featured_image?.und?.[0]?.title,
node.field_featured_image?.und?.[0]?.alt,
);
mk_author(id, "6975f732f0a00f00018346d1");
const category_id = node.field_category?.und?.[0]?.tid;
if (category_id) {
tags.push(
mk_tag(
"category-" + category_id,
"Category " + category_id,
"category-" + category_id,
),
);
}
/*
lexical.root.children.push({
children: [
{
type: "markdown",
version: 1,
markdown: sanitize(body),
},
],
direction: "ltr",
format: "",
indent: 0,
type: "paragraph",
version: 1,
});
*/
switch (node.type) {
case "article":
{
tags.push(mk_tag("Story", "story", "story"));
let images = node.field_basic_image_image?.und;
if (images) {
// console.error(">>", images);
/*
images = images.map((image) =>
img(image.filename, img.height, image.width, img.title, img.alt),
);
*/
images = images.map(
(image) =>
`
<div class="kg-gallery-image">
<img src="${img_path(image.filename)}" width="${image.width}" height="${image.height}" loading="lazy" alt="${image.alt}" title="${image.title}">
</div>
`,
);
body += `
<hr>
<figure class="kg-card kg-gallery-card kg-width-wide">
<div class="kg-gallery-container">
<div class="kg-gallery-row">
${images.join("")}
</div>
</div>
<figcaption></figcaption>
</figure>
`;
/*
lexical.root.children.push({
type: "gallery",
version: 1,
images,
caption: "",
});
*/
}
}
break;
case "person":
{
const surname_at_birth = node.field_surname_at_birth?.und?.[0]?.value;
const other_surnames = node.field_other_surnames?.und?.[0]?.value;
tags.push(mk_tag("Person", "person", "person"));
if (surname_at_birth) {
tags.push(
mk_tag(
surname_at_birth,
"surname-" + convertCase(surname_at_birth, "kebab"),
),
);
}
if (other_surnames) {
tags.push(
mk_tag(
other_surnames,
"surname-" + convertCase(other_surnames, "kebab"),
),
);
}
/*
forename_at_birth: node.field_forename_at_birth?.und?.[0]?.value,
other_forenames: node.field_other_forenames?.und?.[0]?.value,
title: node.field_title?.und?.[0]?.value,
date_of_birth: date(node.field_date_of_birth?.und?.[0]?.value),
date_of_death: date(node.field_date_of_death?.und?.[0]?.value),
parent_of: node.field_parent_of?.und?.[0]?.value,
child_of: node.field_child_of?.und?.[0]?.value,
partner_of: node.field_partner_of?.und?.[0]?.value,
// lifetime: node.field_lifetime?.und?.[0]?.value,
*/
}
break;
}
for (const tag_id of tags) {
ghost_data.data.posts_tags.push({
post_id: id,
tag_id,
});
}
ghost_data.data.posts.push({
id,
type: "post",
title: node.title,
slug: node.path.alias.replace(/^.*[/]/, ""),
html: body,
feature_image: img(feature_image)?.src,
created_at: new Date(Number(node.created) * 1000).toISOString(),
updated_at: new Date(Number(node.changed) * 1000).toISOString(),
status: "draft",
});
// const author = node.field_original_author?.und?.[0]?.value;
/*
{
id: "1234", // The post ID, which is refered to in other places in this file
title: "My Blog Post Title",
slug: "my-blog-post-title",
html: "<p>Hello world, this is an article</p>", // You could use `lexical` instead to to represent your content
comment_id: "1234-old-cms-post-id", // The ID from the old CMS, which can be output in the theme
feature_image: "/content/images/2024/waving.jpg",
type: "post", // post | page
status: "published", // published | draft
visibility: "public", // public | members | paid
created_at: "2025-06-30 15:31:36",
updated_at: "2025-07-02 08:22:14",
published_at: "2025-06-30 15:35:36",
custom_excerpt: "My custom excerpt",
},*/
}
ghost_data.data.tags = Object.values(tag_index);
ghost_data.data.posts_authors = Object.values(author_index);
ghost_data.data.posts_meta = Object.values(meta_index);
console.log(JSON.stringify(ghost_data, null, 2));