const data = require("./whenwe-tidied.json"); const fs = require("fs"); //const sh = require("sanitize-html"); const { NodeHtmlMarkdown, NodeHtmlMarkdownOptions, } = require("node-html-markdown"); const nhm = new NodeHtmlMarkdown({ useLinkReferenceDefinitions: true, useInlineLinks: true, }); function date(datestr) { if (datestr) { const [Y, M, D, h, m, s] = datestr.split(/[^0-9]/); return new Date(Date.UTC.call(null, Y, M - 1, D, h, m, s)).toISOString(); } else return ""; } const tag_index = { // 1: { id: 1, name: "", slug: "" }, }; const user_index = { "6975f732f0a00f00018346d1": { id: "6975f732f0a00f00018346d1", name: "Janet Woolley", slug: "janet", }, }; const meta_index = {}; const author_index = {}; const ghost_data = { meta: { exported_on: new Date().valueOf(), version: "5.0.0", // Ghost version the import is valid for }, data: { posts: [], // Optionally define post metadata posts_meta: [ /* { post_id: "1234", // This must be the same as the post it references feature_image_alt: "A group of people waving at the camera", feature_image_caption: "The team says hello!", }, */ ], // Define the tags tags: [], // Relate posts to tags posts_tags: [], // Define the users /* users: [ { id: "5678", // Unique ID for this author name: "Jo Bloggs", slug: "jo-blogs", email: "jo@example.com", profile_image: "/content/images/2025/scenic-background.jpg", roles: [ "Contributor", // Contributor | Author| Editor | Administrator ], }, ], */ // Relate posts to authors }, }; function convertCase( str, //: string, format, // 'camel' | 'pascal' | 'snake' | 'kebab' ) { const sanitiseString = (str) => str .trim() .replace(/[^a-zA-Z0-9\s]/g, "") .replace(/\s+/g, " "); const formatted = sanitiseString(str); switch (format) { case "camel": return formatted .toLowerCase() .replace(/ (\\w)/g, (_, char) => char.toUpperCase()); case "pascal": return formatted.replace(/(?:^| )(\w)/g, (_, char) => char.toUpperCase()); case "snake": return formatted.toLowerCase().replace(/\s+/g, "_"); case "kebab": return formatted.toLowerCase().replace(/\s+/g, "-"); default: throw new Error("Unsupported format type"); } } function mk_tag(name, id, slug) { id ??= convertCase(name, "kebab"); slug ??= id; tag_index[id] ??= { id, name, slug }; return id; } function mk_author(post_id, author_id) { author_index[post_id] = { post_id, author_id }; } function mk_meta(post_id, feature_image_caption, feature_image_alt) { if (feature_image_alt || feature_image_caption) meta_index[post_id] = { feature_image_alt, feature_image_caption, post_id, }; } function img_path(filename) { if (!filename) throw new Error("No filename"); return "content/images/" + filename.trim(); } function img(filename, height, width, title, alt) { if (!filename) return undefined; if (typeof filename !== "string") throw new Error("not a string: " + filename); return { row: 0, src: img_path(filename), width: width ?? 100, height: height ?? 100, filename: filename, }; } function sanitize(body) { return nhm .translate(body) .replaceAll(/https?:\/[^"]*?\/public\//g, "content/images/2026/01/") .replaceAll(/[?]itok=[A-Za-z0-9_-]*/g, ""); } function sanitize_html(body) { return body .replaceAll(/https?:\/[^"]*?\/public\//g, "content/images/2026/01/") .replaceAll(/[?]itok=[^ ]*/g, ""); } for (const node of data) { let body = sanitize_html(node.body.und[0].safe_value); const id = Number(node.nid); const lexical = { root: { children: [], direction: "ltr", format: "", indent: 0, type: "root", version: 1, }, }; let author = node.field_original_author?.und?.[0]?.value; let feature_image = node.field_featured_image?.und?.[0]?.filename; let tags = []; mk_meta( id, node.field_featured_image?.und?.[0]?.title, node.field_featured_image?.und?.[0]?.alt, ); mk_author(id, "6975f732f0a00f00018346d1"); const category_id = node.field_category?.und?.[0]?.tid; if (category_id) { tags.push( mk_tag( "category-" + category_id, "Category " + category_id, "category-" + category_id, ), ); } /* lexical.root.children.push({ children: [ { type: "markdown", version: 1, markdown: sanitize(body), }, ], direction: "ltr", format: "", indent: 0, type: "paragraph", version: 1, }); */ switch (node.type) { case "article": { tags.push(mk_tag("Story", "story", "story")); let images = node.field_basic_image_image?.und; if (images) { // console.error(">>", images); /* images = images.map((image) => img(image.filename, img.height, image.width, img.title, img.alt), ); */ images = images.map( (image) => ` `, ); body += `
`; /* lexical.root.children.push({ type: "gallery", version: 1, images, caption: "", }); */ } } break; case "person": { const surname_at_birth = node.field_surname_at_birth?.und?.[0]?.value; const other_surnames = node.field_other_surnames?.und?.[0]?.value; tags.push(mk_tag("Person", "person", "person")); if (surname_at_birth) { tags.push( mk_tag( surname_at_birth, "surname-" + convertCase(surname_at_birth, "kebab"), ), ); } if (other_surnames) { tags.push( mk_tag( other_surnames, "surname-" + convertCase(other_surnames, "kebab"), ), ); } /* forename_at_birth: node.field_forename_at_birth?.und?.[0]?.value, other_forenames: node.field_other_forenames?.und?.[0]?.value, title: node.field_title?.und?.[0]?.value, date_of_birth: date(node.field_date_of_birth?.und?.[0]?.value), date_of_death: date(node.field_date_of_death?.und?.[0]?.value), parent_of: node.field_parent_of?.und?.[0]?.value, child_of: node.field_child_of?.und?.[0]?.value, partner_of: node.field_partner_of?.und?.[0]?.value, // lifetime: node.field_lifetime?.und?.[0]?.value, */ } break; } for (const tag_id of tags) { ghost_data.data.posts_tags.push({ post_id: id, tag_id, }); } ghost_data.data.posts.push({ id, type: "post", title: node.title, slug: node.path.alias.replace(/^.*[/]/, ""), html: body, feature_image: img(feature_image)?.src, created_at: new Date(Number(node.created) * 1000).toISOString(), updated_at: new Date(Number(node.changed) * 1000).toISOString(), status: "draft", }); // const author = node.field_original_author?.und?.[0]?.value; /* { id: "1234", // The post ID, which is refered to in other places in this file title: "My Blog Post Title", slug: "my-blog-post-title", html: "

Hello world, this is an article

", // You could use `lexical` instead to to represent your content comment_id: "1234-old-cms-post-id", // The ID from the old CMS, which can be output in the theme feature_image: "/content/images/2024/waving.jpg", type: "post", // post | page status: "published", // published | draft visibility: "public", // public | members | paid created_at: "2025-06-30 15:31:36", updated_at: "2025-07-02 08:22:14", published_at: "2025-06-30 15:35:36", custom_excerpt: "My custom excerpt", },*/ } ghost_data.data.tags = Object.values(tag_index); ghost_data.data.posts_authors = Object.values(author_index); ghost_data.data.posts_meta = Object.values(meta_index); console.log(JSON.stringify(ghost_data, null, 2));