diff --git a/to-ghost.js b/to-ghost.js new file mode 100644 index 0000000..0c1d3b8 --- /dev/null +++ b/to-ghost.js @@ -0,0 +1,328 @@ +const data = require("./whenwe-tidied.json"); +const fs = require("fs"); +//const sh = require("sanitize-html"); +const { + NodeHtmlMarkdown, + NodeHtmlMarkdownOptions, +} = require("node-html-markdown"); +const nhm = new NodeHtmlMarkdown({ + useLinkReferenceDefinitions: true, + useInlineLinks: true, +}); +function date(datestr) { + if (datestr) { + const [Y, M, D, h, m, s] = datestr.split(/[^0-9]/); + return new Date(Date.UTC.call(null, Y, M - 1, D, h, m, s)).toISOString(); + } else return ""; +} + +const tag_index = { + // 1: { id: 1, name: "", slug: "" }, +}; + +const user_index = { + "6975f732f0a00f00018346d1": { + id: "6975f732f0a00f00018346d1", + name: "Janet Woolley", + slug: "janet", + }, +}; + +const meta_index = {}; + +const author_index = {}; + +const ghost_data = { + meta: { + exported_on: new Date().valueOf(), + version: "5.0.0", // Ghost version the import is valid for + }, + data: { + posts: [], + // Optionally define post metadata + posts_meta: [ + /* + { + post_id: "1234", // This must be the same as the post it references + feature_image_alt: "A group of people waving at the camera", + feature_image_caption: "The team says hello!", + }, + */ + ], + // Define the tags + tags: [], + // Relate posts to tags + posts_tags: [], + // Define the users + /* + users: [ + { + id: "5678", // Unique ID for this author + name: "Jo Bloggs", + slug: "jo-blogs", + email: "jo@example.com", + profile_image: "/content/images/2025/scenic-background.jpg", + roles: [ + "Contributor", // Contributor | Author| Editor | Administrator + ], + }, + ], + */ + // Relate posts to authors + }, +}; + +function convertCase( + str, //: string, + format, // 'camel' | 'pascal' | 'snake' | 'kebab' +) { + const sanitiseString = (str) => + str + .trim() + .replace(/[^a-zA-Z0-9\s]/g, "") + .replace(/\s+/g, " "); + const formatted = sanitiseString(str); + switch (format) { + case "camel": + return formatted + .toLowerCase() + .replace(/ (\\w)/g, (_, char) => char.toUpperCase()); + case "pascal": + return formatted.replace(/(?:^| )(\w)/g, (_, char) => char.toUpperCase()); + case "snake": + return formatted.toLowerCase().replace(/\s+/g, "_"); + case "kebab": + return formatted.toLowerCase().replace(/\s+/g, "-"); + default: + throw new Error("Unsupported format type"); + } +} + +function mk_tag(name, id, slug) { + id ??= convertCase(name, "kebab"); + slug ??= id; + tag_index[id] ??= { id, name, slug }; + return id; +} + +function mk_author(post_id, author_id) { + author_index[post_id] = { post_id, author_id }; +} + +function mk_meta(post_id, feature_image_caption, feature_image_alt) { + if (feature_image_alt || feature_image_caption) + meta_index[post_id] = { + feature_image_alt, + feature_image_caption, + post_id, + }; +} + +function img_path(filename) { + if (!filename) throw new Error("No filename"); + return "content/images/" + filename.trim(); +} + +function img(filename, height, width, title, alt) { + if (!filename) return undefined; + if (typeof filename !== "string") + throw new Error("not a string: " + filename); + return { + row: 0, + src: img_path(filename), + width: width ?? 100, + height: height ?? 100, + filename: filename, + }; +} + +function sanitize(body) { + return nhm + .translate(body) + .replaceAll(/https?:\/[^"]*?\/public\//g, "content/images/2026/01/") + .replaceAll(/[?]itok=[A-Za-z0-9_-]*/g, ""); +} + +function sanitize_html(body) { + return body + .replaceAll(/https?:\/[^"]*?\/public\//g, "content/images/2026/01/") + .replaceAll(/[?]itok=[^ ]*/g, ""); +} + +for (const node of data) { + let body = sanitize_html(node.body.und[0].safe_value); + const id = Number(node.nid); + const lexical = { + root: { + children: [], + direction: "ltr", + format: "", + indent: 0, + type: "root", + version: 1, + }, + }; + let author = node.field_original_author?.und?.[0]?.value; + let feature_image = node.field_featured_image?.und?.[0]?.filename; + + let tags = []; + mk_meta( + id, + node.field_featured_image?.und?.[0]?.title, + node.field_featured_image?.und?.[0]?.alt, + ); + mk_author(id, "6975f732f0a00f00018346d1"); + + const category_id = node.field_category?.und?.[0]?.tid; + if (category_id) { + tags.push( + mk_tag( + "category-" + category_id, + "Category " + category_id, + "category-" + category_id, + ), + ); + } + /* + lexical.root.children.push({ + children: [ + { + type: "markdown", + version: 1, + markdown: sanitize(body), + }, + ], + direction: "ltr", + format: "", + indent: 0, + type: "paragraph", + version: 1, + }); +*/ + switch (node.type) { + case "article": + { + tags.push(mk_tag("Story", "story", "story")); + + let images = node.field_basic_image_image?.und; + if (images) { + // console.error(">>", images); + /* + images = images.map((image) => + img(image.filename, img.height, image.width, img.title, img.alt), + ); + */ + + images = images.map( + (image) => + ` + + `, + ); + + body += ` +
+ +`; + /* + lexical.root.children.push({ + type: "gallery", + version: 1, + images, + caption: "", + }); + */ + } + } + break; + + case "person": + { + const surname_at_birth = node.field_surname_at_birth?.und?.[0]?.value; + const other_surnames = node.field_other_surnames?.und?.[0]?.value; + + tags.push(mk_tag("Person", "person", "person")); + if (surname_at_birth) { + tags.push( + mk_tag( + surname_at_birth, + "surname-" + convertCase(surname_at_birth, "kebab"), + ), + ); + } + if (other_surnames) { + tags.push( + mk_tag( + other_surnames, + "surname-" + convertCase(other_surnames, "kebab"), + ), + ); + } + /* + forename_at_birth: node.field_forename_at_birth?.und?.[0]?.value, + other_forenames: node.field_other_forenames?.und?.[0]?.value, + title: node.field_title?.und?.[0]?.value, + date_of_birth: date(node.field_date_of_birth?.und?.[0]?.value), + date_of_death: date(node.field_date_of_death?.und?.[0]?.value), + parent_of: node.field_parent_of?.und?.[0]?.value, + child_of: node.field_child_of?.und?.[0]?.value, + partner_of: node.field_partner_of?.und?.[0]?.value, + // lifetime: node.field_lifetime?.und?.[0]?.value, + */ + } + break; + } + + for (const tag_id of tags) { + ghost_data.data.posts_tags.push({ + post_id: id, + tag_id, + }); + } + + ghost_data.data.posts.push({ + id, + type: "post", + title: node.title, + slug: node.path.alias.replace(/^.*[/]/, ""), + html: body, + feature_image: img(feature_image)?.src, + created_at: new Date(Number(node.created) * 1000).toISOString(), + updated_at: new Date(Number(node.changed) * 1000).toISOString(), + status: "draft", + }); + + // const author = node.field_original_author?.und?.[0]?.value; + + /* + { + id: "1234", // The post ID, which is refered to in other places in this file + title: "My Blog Post Title", + slug: "my-blog-post-title", + html: "

Hello world, this is an article

", // You could use `lexical` instead to to represent your content + comment_id: "1234-old-cms-post-id", // The ID from the old CMS, which can be output in the theme + feature_image: "/content/images/2024/waving.jpg", + type: "post", // post | page + status: "published", // published | draft + visibility: "public", // public | members | paid + created_at: "2025-06-30 15:31:36", + updated_at: "2025-07-02 08:22:14", + published_at: "2025-06-30 15:35:36", + custom_excerpt: "My custom excerpt", + },*/ +} + +ghost_data.data.tags = Object.values(tag_index); +ghost_data.data.posts_authors = Object.values(author_index); +ghost_data.data.posts_meta = Object.values(meta_index); + +console.log(JSON.stringify(ghost_data, null, 2));