whenwe-tidied.json is the drupal export, with some alterations to de-duplicate images and fix a few other things.
329 lines
8.5 KiB
JavaScript
329 lines
8.5 KiB
JavaScript
const data = require("./whenwe-tidied.json");
|
|
const fs = require("fs");
|
|
//const sh = require("sanitize-html");
|
|
const {
|
|
NodeHtmlMarkdown,
|
|
NodeHtmlMarkdownOptions,
|
|
} = require("node-html-markdown");
|
|
const nhm = new NodeHtmlMarkdown({
|
|
useLinkReferenceDefinitions: true,
|
|
useInlineLinks: true,
|
|
});
|
|
function date(datestr) {
|
|
if (datestr) {
|
|
const [Y, M, D, h, m, s] = datestr.split(/[^0-9]/);
|
|
return new Date(Date.UTC.call(null, Y, M - 1, D, h, m, s)).toISOString();
|
|
} else return "";
|
|
}
|
|
|
|
const tag_index = {
|
|
// 1: { id: 1, name: "", slug: "" },
|
|
};
|
|
|
|
const user_index = {
|
|
"6975f732f0a00f00018346d1": {
|
|
id: "6975f732f0a00f00018346d1",
|
|
name: "Janet Woolley",
|
|
slug: "janet",
|
|
},
|
|
};
|
|
|
|
const meta_index = {};
|
|
|
|
const author_index = {};
|
|
|
|
const ghost_data = {
|
|
meta: {
|
|
exported_on: new Date().valueOf(),
|
|
version: "5.0.0", // Ghost version the import is valid for
|
|
},
|
|
data: {
|
|
posts: [],
|
|
// Optionally define post metadata
|
|
posts_meta: [
|
|
/*
|
|
{
|
|
post_id: "1234", // This must be the same as the post it references
|
|
feature_image_alt: "A group of people waving at the camera",
|
|
feature_image_caption: "The team says hello!",
|
|
},
|
|
*/
|
|
],
|
|
// Define the tags
|
|
tags: [],
|
|
// Relate posts to tags
|
|
posts_tags: [],
|
|
// Define the users
|
|
/*
|
|
users: [
|
|
{
|
|
id: "5678", // Unique ID for this author
|
|
name: "Jo Bloggs",
|
|
slug: "jo-blogs",
|
|
email: "jo@example.com",
|
|
profile_image: "/content/images/2025/scenic-background.jpg",
|
|
roles: [
|
|
"Contributor", // Contributor | Author| Editor | Administrator
|
|
],
|
|
},
|
|
],
|
|
*/
|
|
// Relate posts to authors
|
|
},
|
|
};
|
|
|
|
function convertCase(
|
|
str, //: string,
|
|
format, // 'camel' | 'pascal' | 'snake' | 'kebab'
|
|
) {
|
|
const sanitiseString = (str) =>
|
|
str
|
|
.trim()
|
|
.replace(/[^a-zA-Z0-9\s]/g, "")
|
|
.replace(/\s+/g, " ");
|
|
const formatted = sanitiseString(str);
|
|
switch (format) {
|
|
case "camel":
|
|
return formatted
|
|
.toLowerCase()
|
|
.replace(/ (\\w)/g, (_, char) => char.toUpperCase());
|
|
case "pascal":
|
|
return formatted.replace(/(?:^| )(\w)/g, (_, char) => char.toUpperCase());
|
|
case "snake":
|
|
return formatted.toLowerCase().replace(/\s+/g, "_");
|
|
case "kebab":
|
|
return formatted.toLowerCase().replace(/\s+/g, "-");
|
|
default:
|
|
throw new Error("Unsupported format type");
|
|
}
|
|
}
|
|
|
|
function mk_tag(name, id, slug) {
|
|
id ??= convertCase(name, "kebab");
|
|
slug ??= id;
|
|
tag_index[id] ??= { id, name, slug };
|
|
return id;
|
|
}
|
|
|
|
function mk_author(post_id, author_id) {
|
|
author_index[post_id] = { post_id, author_id };
|
|
}
|
|
|
|
function mk_meta(post_id, feature_image_caption, feature_image_alt) {
|
|
if (feature_image_alt || feature_image_caption)
|
|
meta_index[post_id] = {
|
|
feature_image_alt,
|
|
feature_image_caption,
|
|
post_id,
|
|
};
|
|
}
|
|
|
|
function img_path(filename) {
|
|
if (!filename) throw new Error("No filename");
|
|
return "content/images/" + filename.trim();
|
|
}
|
|
|
|
function img(filename, height, width, title, alt) {
|
|
if (!filename) return undefined;
|
|
if (typeof filename !== "string")
|
|
throw new Error("not a string: " + filename);
|
|
return {
|
|
row: 0,
|
|
src: img_path(filename),
|
|
width: width ?? 100,
|
|
height: height ?? 100,
|
|
filename: filename,
|
|
};
|
|
}
|
|
|
|
function sanitize(body) {
|
|
return nhm
|
|
.translate(body)
|
|
.replaceAll(/https?:\/[^"]*?\/public\//g, "content/images/2026/01/")
|
|
.replaceAll(/[?]itok=[A-Za-z0-9_-]*/g, "");
|
|
}
|
|
|
|
function sanitize_html(body) {
|
|
return body
|
|
.replaceAll(/https?:\/[^"]*?\/public\//g, "content/images/2026/01/")
|
|
.replaceAll(/[?]itok=[^ ]*/g, "");
|
|
}
|
|
|
|
for (const node of data) {
|
|
let body = sanitize_html(node.body.und[0].safe_value);
|
|
const id = Number(node.nid);
|
|
const lexical = {
|
|
root: {
|
|
children: [],
|
|
direction: "ltr",
|
|
format: "",
|
|
indent: 0,
|
|
type: "root",
|
|
version: 1,
|
|
},
|
|
};
|
|
let author = node.field_original_author?.und?.[0]?.value;
|
|
let feature_image = node.field_featured_image?.und?.[0]?.filename;
|
|
|
|
let tags = [];
|
|
mk_meta(
|
|
id,
|
|
node.field_featured_image?.und?.[0]?.title,
|
|
node.field_featured_image?.und?.[0]?.alt,
|
|
);
|
|
mk_author(id, "6975f732f0a00f00018346d1");
|
|
|
|
const category_id = node.field_category?.und?.[0]?.tid;
|
|
if (category_id) {
|
|
tags.push(
|
|
mk_tag(
|
|
"category-" + category_id,
|
|
"Category " + category_id,
|
|
"category-" + category_id,
|
|
),
|
|
);
|
|
}
|
|
/*
|
|
lexical.root.children.push({
|
|
children: [
|
|
{
|
|
type: "markdown",
|
|
version: 1,
|
|
markdown: sanitize(body),
|
|
},
|
|
],
|
|
direction: "ltr",
|
|
format: "",
|
|
indent: 0,
|
|
type: "paragraph",
|
|
version: 1,
|
|
});
|
|
*/
|
|
switch (node.type) {
|
|
case "article":
|
|
{
|
|
tags.push(mk_tag("Story", "story", "story"));
|
|
|
|
let images = node.field_basic_image_image?.und;
|
|
if (images) {
|
|
// console.error(">>", images);
|
|
/*
|
|
images = images.map((image) =>
|
|
img(image.filename, img.height, image.width, img.title, img.alt),
|
|
);
|
|
*/
|
|
|
|
images = images.map(
|
|
(image) =>
|
|
`
|
|
<div class="kg-gallery-image">
|
|
<img src="${img_path(image.filename)}" width="${image.width}" height="${image.height}" loading="lazy" alt="${image.alt}" title="${image.title}">
|
|
</div>
|
|
`,
|
|
);
|
|
|
|
body += `
|
|
<hr>
|
|
<figure class="kg-card kg-gallery-card kg-width-wide">
|
|
<div class="kg-gallery-container">
|
|
<div class="kg-gallery-row">
|
|
${images.join("")}
|
|
</div>
|
|
</div>
|
|
<figcaption></figcaption>
|
|
</figure>
|
|
`;
|
|
/*
|
|
lexical.root.children.push({
|
|
type: "gallery",
|
|
version: 1,
|
|
images,
|
|
caption: "",
|
|
});
|
|
*/
|
|
}
|
|
}
|
|
break;
|
|
|
|
case "person":
|
|
{
|
|
const surname_at_birth = node.field_surname_at_birth?.und?.[0]?.value;
|
|
const other_surnames = node.field_other_surnames?.und?.[0]?.value;
|
|
|
|
tags.push(mk_tag("Person", "person", "person"));
|
|
if (surname_at_birth) {
|
|
tags.push(
|
|
mk_tag(
|
|
surname_at_birth,
|
|
"surname-" + convertCase(surname_at_birth, "kebab"),
|
|
),
|
|
);
|
|
}
|
|
if (other_surnames) {
|
|
tags.push(
|
|
mk_tag(
|
|
other_surnames,
|
|
"surname-" + convertCase(other_surnames, "kebab"),
|
|
),
|
|
);
|
|
}
|
|
/*
|
|
forename_at_birth: node.field_forename_at_birth?.und?.[0]?.value,
|
|
other_forenames: node.field_other_forenames?.und?.[0]?.value,
|
|
title: node.field_title?.und?.[0]?.value,
|
|
date_of_birth: date(node.field_date_of_birth?.und?.[0]?.value),
|
|
date_of_death: date(node.field_date_of_death?.und?.[0]?.value),
|
|
parent_of: node.field_parent_of?.und?.[0]?.value,
|
|
child_of: node.field_child_of?.und?.[0]?.value,
|
|
partner_of: node.field_partner_of?.und?.[0]?.value,
|
|
// lifetime: node.field_lifetime?.und?.[0]?.value,
|
|
*/
|
|
}
|
|
break;
|
|
}
|
|
|
|
for (const tag_id of tags) {
|
|
ghost_data.data.posts_tags.push({
|
|
post_id: id,
|
|
tag_id,
|
|
});
|
|
}
|
|
|
|
ghost_data.data.posts.push({
|
|
id,
|
|
type: "post",
|
|
title: node.title,
|
|
slug: node.path.alias.replace(/^.*[/]/, ""),
|
|
html: body,
|
|
feature_image: img(feature_image)?.src,
|
|
created_at: new Date(Number(node.created) * 1000).toISOString(),
|
|
updated_at: new Date(Number(node.changed) * 1000).toISOString(),
|
|
status: "draft",
|
|
});
|
|
|
|
// const author = node.field_original_author?.und?.[0]?.value;
|
|
|
|
/*
|
|
{
|
|
id: "1234", // The post ID, which is refered to in other places in this file
|
|
title: "My Blog Post Title",
|
|
slug: "my-blog-post-title",
|
|
html: "<p>Hello world, this is an article</p>", // You could use `lexical` instead to to represent your content
|
|
comment_id: "1234-old-cms-post-id", // The ID from the old CMS, which can be output in the theme
|
|
feature_image: "/content/images/2024/waving.jpg",
|
|
type: "post", // post | page
|
|
status: "published", // published | draft
|
|
visibility: "public", // public | members | paid
|
|
created_at: "2025-06-30 15:31:36",
|
|
updated_at: "2025-07-02 08:22:14",
|
|
published_at: "2025-06-30 15:35:36",
|
|
custom_excerpt: "My custom excerpt",
|
|
},*/
|
|
}
|
|
|
|
ghost_data.data.tags = Object.values(tag_index);
|
|
ghost_data.data.posts_authors = Object.values(author_index);
|
|
ghost_data.data.posts_meta = Object.values(meta_index);
|
|
|
|
console.log(JSON.stringify(ghost_data, null, 2));
|