gab-social/app/javascript/gabsocial/features/ui/util/draft-to-markdown.js
2020-06-16 19:44:30 -04:00

456 lines
17 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// https://raw.githubusercontent.com/Rosey/markdown-draft-js/main/src/draft-to-markdown.js
const TRAILING_WHITESPACE = /[ \u0020\t\n]*$/;
// This escapes some markdown but there's a few cases that are TODO -
// - List items
// - Back tics (see https://github.com/Rosey/markdown-draft-js/issues/52#issuecomment-388458017)
// - Complex markdown, like links or images. Not sure it's even worth it, because if you're typing
// that into draft chances are you know its markdown and maybe expect it convert? :/
const MARKDOWN_STYLE_CHARACTERS = ['*', '_', '~', '`'];
const MARKDOWN_STYLE_CHARACTER_REGXP = /(\*|_|~|\\|`)/g;
// I hate this a bit, being outside of the functions scope
// but cant think of a better way to keep track of how many ordered list
// items were are on, as draft doesnt explicitly tell us in the raw object 😢.
// This is a hash that will be assigned values based on depth, so like
// orderedListNumber[0] = 1 would mean that ordered list at depth 0 is on number 1.
// orderedListNumber[0] = 2 would mean that ordered list at depth 0 is on number 2.
// This is so we have the right number of numbers when doing a list, eg
// 1. Item One
// 2. Item two
// 3. Item three
// And so on.
var orderedListNumber = {},
previousOrderedListDepth = 0;
// A map of draftjs block types -> markdown open and close characters
// Both the open and close methods must exist, even if they simply return an empty string.
// They should always return a string.
const StyleItems = {
// BLOCK LEVEL
'unordered-list-item': {
open: function () {
return '- ';
},
close: function () {
return '';
}
},
'ordered-list-item': {
open: function (block, number = 1) {
return `${number}. `;
},
close: function () {
return '';
}
},
'blockquote': {
open: function () {
return '> ';
},
close: function () {
return '';
}
},
'header-one': {
open: function () {
return '# ';
},
close: function () {
return '';
}
},
'code-block': {
open: function (block) {
return '```' + (block.data.language || '') + '\n';
},
close: function () {
return '\n```';
}
},
// INLINE LEVEL
'BOLD': {
open: function () {
return '**';
},
close: function () {
return '**';
}
},
'ITALIC': {
open: function () {
return '*';
},
close: function () {
return '*';
}
},
'UNDERLINE': {
open: function () {
return '_';
},
close: function () {
return '_';
}
},
'STRIKETHROUGH': {
open: function () {
return '~~';
},
close: function () {
return '~~';
}
},
'CODE': {
open: function () {
return '`';
},
close: function () {
return '`';
}
}
};
// A map of draftjs entity types -> markdown open and close characters
// entities are different from block types because they have additional data attached to them.
// an entity object is passed in to both open and close, in case it's needed for string generation.
//
// Both the open and close methods must exist, even if they simply return an empty string.
// They should always return a string.
const EntityItems = {
//
}
// Bit of a hack - we normally want a double newline after a block,
// but for list items we just want one (unless it's the _last_ list item in a group.)
const SingleNewlineAfterBlock = [
'unordered-list-item',
'ordered-list-item'
];
function isEmptyBlock(block) {
return block.text.length === 0 && block.entityRanges.length === 0 && Object.keys(block.data || {}).length === 0;
}
/**
* Generate markdown for a single block javascript object
* DraftJS raw object contains an array of blocks, which is the main "structure"
* of the text. Each block = a new line.
*
* @param {Object} block - block to generate markdown for
* @param {Number} index - index of the block in the blocks array
* @param {Object} rawDraftObject - entire raw draft object (needed for accessing the entityMap)
* @param {Object} options - additional options passed in by the user calling this method.
*
* @return {String} markdown string
**/
function renderBlock(block, index, rawDraftObject, options) {
var openInlineStyles = [],
markdownToAdd = [];
var markdownString = '',
customStyleItems = options.styleItems || {},
customEntityItems = options.entityItems || {},
escapeMarkdownCharacters = options.hasOwnProperty('escapeMarkdownCharacters') ? options.escapeMarkdownCharacters : true;
var type = block.type;
var markdownStyleCharactersToEscape = [];
// draft-js emits empty blocks that have type set… dont style them unless the user wants to preserve new lines
// (if newlines are preserved each empty line should be "styled" eg in case of blockquote we want to see a blockquote.)
// but if newlines arent preserved then we'd end up having double or triple or etc markdown characters, which is a bug.
if (isEmptyBlock(block) && !options.preserveNewlines) {
type = 'unstyled';
}
// Render main block wrapping element
if (customStyleItems[type] || StyleItems[type]) {
if (type === 'unordered-list-item' || type === 'ordered-list-item') {
markdownString += ' '.repeat(block.depth * 4);
}
if (type === 'ordered-list-item') {
orderedListNumber[block.depth] = orderedListNumber[block.depth] || 1;
markdownString += (customStyleItems[type] || StyleItems[type]).open(block, orderedListNumber[block.depth]);
orderedListNumber[block.depth]++;
// Have to reset the number for orderedListNumber if we are breaking out of a list so that if
// there's another nested list at the same level further down, it starts at 1 again.
// COMPLICATED 😭
if (previousOrderedListDepth > block.depth) {
orderedListNumber[previousOrderedListDepth] = 1;
}
previousOrderedListDepth = block.depth;
} else {
orderedListNumber = {};
markdownString += (customStyleItems[type] || StyleItems[type]).open(block);
}
}
// Render text within content, along with any inline styles/entities
Array.from(block.text).some(function (character, characterIndex) {
// Close any entity tags that need closing
block.entityRanges.forEach(function (range, rangeIndex) {
if (range.offset + range.length === characterIndex) {
var entity = rawDraftObject.entityMap[range.key];
if (customEntityItems[entity.type] || EntityItems[entity.type]) {
markdownString += (customEntityItems[entity.type] || EntityItems[entity.type]).close(entity);
}
}
});
// Close any inline tags that need closing
openInlineStyles.forEach(function (style, styleIndex) {
if (style.offset + style.length === characterIndex) {
if ((customStyleItems[style.style] || StyleItems[style.style])) {
var styleIndex = openInlineStyles.indexOf(style);
// Handle nested case - close any open inline styles before closing the parent
if (styleIndex > -1 && styleIndex !== openInlineStyles.length - 1) {
for (var i = openInlineStyles.length - 1; i !== styleIndex; i--) {
var styleItem = (customStyleItems[openInlineStyles[i].style] || StyleItems[openInlineStyles[i].style]);
if (styleItem) {
var trailingWhitespace = TRAILING_WHITESPACE.exec(markdownString);
markdownString = markdownString.slice(0, markdownString.length - trailingWhitespace[0].length);
markdownString += styleItem.close();
markdownString += trailingWhitespace[0];
}
}
}
// Close the actual inline style being closed
// Have to trim whitespace first and then re-add after because markdown can't handle leading/trailing whitespace
var trailingWhitespace = TRAILING_WHITESPACE.exec(markdownString);
markdownString = markdownString.slice(0, markdownString.length - trailingWhitespace[0].length);
markdownString += (customStyleItems[style.style] || StyleItems[style.style]).close();
markdownString += trailingWhitespace[0];
// Handle nested case - reopen any inline styles after closing the parent
if (styleIndex > -1 && styleIndex !== openInlineStyles.length - 1) {
for (var i = openInlineStyles.length - 1; i !== styleIndex; i--) {
var styleItem = (customStyleItems[openInlineStyles[i].style] || StyleItems[openInlineStyles[i].style]);
if (styleItem && openInlineStyles[i].offset + openInlineStyles[i].length > characterIndex) {
markdownString += styleItem.open();
} else {
openInlineStyles.splice(i, 1);
}
}
}
openInlineStyles.splice(styleIndex, 1);
}
}
});
// Open any inline tags that need opening
block.inlineStyleRanges.forEach(function (style, styleIndex) {
if (style.offset === characterIndex) {
if ((customStyleItems[style.style] || StyleItems[style.style])) {
var styleToAdd = (customStyleItems[style.style] || StyleItems[style.style]).open();
markdownToAdd.push({
type: 'style',
style: style,
value: styleToAdd
});
}
}
});
// Open any entity tags that need opening
block.entityRanges.forEach(function (range, rangeIndex) {
if (range.offset === characterIndex) {
var entity = rawDraftObject.entityMap[range.key];
if (customEntityItems[entity.type] || EntityItems[entity.type]) {
var entityToAdd = (customEntityItems[entity.type] || EntityItems[entity.type]).open(entity);
markdownToAdd.push({
type: 'entity',
value: entityToAdd
});
}
}
});
// These are all the opening entity and style types being added to the markdown string for this loop
// we store in an array and add here because if the character is WS character, we want to hang onto it and not apply it until the next non-whitespace
// character before adding the markdown, since markdown doesnt play nice with leading whitespace (eg '** bold**' is no good, whereas ' **bold**' is good.)
if (character !== ' ' && markdownToAdd.length) {
markdownString += markdownToAdd.map(function (item) {
return item.value;
}).join('');
markdownToAdd.forEach(function (item) {
if (item.type === 'style') {
// We hang on to this because we may need to close it early and then re-open if there are nested styles being opened and closed.
openInlineStyles.push(item.style);
}
});
markdownToAdd = [];
}
if (block.type !== 'code-block' && escapeMarkdownCharacters) {
let insideInlineCodeStyle = openInlineStyles.find((style) => style.style === 'CODE');
if (insideInlineCodeStyle) {
// Todo - The syntax to escape backtics when inside backtic code already is to use MORE backtics wrapping.
// So we need to see how many backtics in a row we have and then when converting to markdown, use that # + 1
// EG ``Test ` Hllo ``
// OR ```Test `` Hello```
// OR ````Test ``` Hello ````
// Similar work has to be done for codeblocks.
} else {
// Special escape logic for blockquotes and heading characters
if (characterIndex === 0 && character === '#' && block.text[1] && block.text[1] === ' ') {
character = character.replace('#', '\\#');
} else if (characterIndex === 0 && character === '>') {
character = character.replace('>', '\\>');
}
// Escaping inline markdown characters
// 🧹 If someone can think of a more elegant solution, I would love that.
// orginally this was just a little char replace using a simple regular expression, but theres lots of cases where
// a markdown character does not actually get converted to markdown, like this case: http://google.com/i_am_a_link
// so this code now tries to be smart and keeps track of potential “opening” characters as well as potential “closing”
// characters, and only escapes if both opening and closing exist, and they have the correct whitepace-before-open, whitespace-or-end-of-string-after-close pattern
if (MARKDOWN_STYLE_CHARACTERS.includes(character)) {
let openingStyle = markdownStyleCharactersToEscape.find(function (item) {
return item.character === character;
});
if (!openingStyle && block.text[characterIndex - 1] === ' ' && block.text[characterIndex + 1] !== ' ') {
markdownStyleCharactersToEscape.push({
character: character,
index: characterIndex,
markdownStringIndexStart: markdownString.length + character.length - 1,
markdownStringIndexEnd: markdownString.length + character.length
});
} else if (openingStyle && block.text[characterIndex - 1] === character && characterIndex === openingStyle.index + 1) {
openingStyle.markdownStringIndexEnd += 1;
} else if (openingStyle) {
let openingStyleLength = openingStyle.markdownStringIndexEnd - openingStyle.markdownStringIndexStart;
let escapeCharacter = false;
let popOpeningStyle = false;
if (openingStyleLength === 1 && (block.text[characterIndex + 1] === ' ' || !block.text[characterIndex + 1])) {
popOpeningStyle = true;
escapeCharacter = true;
}
if (openingStyleLength === 2 && block.text[characterIndex + 1] === character) {
escapeCharacter = true;
}
if (openingStyleLength === 2 && block.text[characterIndex - 1] === character && (block.text[characterIndex + 1] === ' ' || !block.text[characterIndex + 1])) {
popOpeningStyle = true;
escapeCharacter = true;
}
if (popOpeningStyle) {
markdownStyleCharactersToEscape.splice(markdownStyleCharactersToEscape.indexOf(openingStyle), 1);
let replacementString = markdownString.slice(openingStyle.markdownStringIndexStart, openingStyle.markdownStringIndexEnd);
replacementString = replacementString.replace(MARKDOWN_STYLE_CHARACTER_REGXP, '\\$1');
markdownString = (markdownString.slice(0, openingStyle.markdownStringIndexStart) + replacementString + markdownString.slice(openingStyle.markdownStringIndexEnd));
}
if (escapeCharacter) {
character = `\\${character}`;
}
}
}
}
}
if (character === '\n' && type === 'blockquote') {
markdownString += '\n> ';
} else {
markdownString += character;
}
});
// Close any remaining entity tags
block.entityRanges.forEach(function (range, rangeIndex) {
if (range.offset + range.length === Array.from(block.text).length) {
var entity = rawDraftObject.entityMap[range.key];
if (customEntityItems[entity.type] || EntityItems[entity.type]) {
markdownString += (customEntityItems[entity.type] || EntityItems[entity.type]).close(entity);
}
}
});
// Close any remaining inline tags (if an inline tag ends at the very last char, we won't catch it inside the loop)
openInlineStyles.reverse().forEach(function (style) {
var trailingWhitespace = TRAILING_WHITESPACE.exec(markdownString);
markdownString = markdownString.slice(0, markdownString.length - trailingWhitespace[0].length);
markdownString += (customStyleItems[style.style] || StyleItems[style.style]).close();
markdownString += trailingWhitespace[0];
});
// Close block level item
if (customStyleItems[type] || StyleItems[type]) {
markdownString += (customStyleItems[type] || StyleItems[type]).close(block);
}
// Determine how many newlines to add - generally we want 2, but for list items we just want one when they are succeeded by another list item.
if (SingleNewlineAfterBlock.indexOf(type) !== -1 && rawDraftObject.blocks[index + 1] && SingleNewlineAfterBlock.indexOf(rawDraftObject.blocks[index + 1].type) !== -1) {
markdownString += '\n';
} else if (rawDraftObject.blocks[index + 1]) {
if (rawDraftObject.blocks[index].text) {
if (SingleNewlineAfterBlock.indexOf(type) !== -1
&& SingleNewlineAfterBlock.indexOf(rawDraftObject.blocks[index + 1].type) === -1) {
markdownString += '\n\n';
} else if (!options.preserveNewlines) {
// 2 newlines if not preserving
markdownString += '\n\n';
} else {
markdownString += '\n';
}
} else if (options.preserveNewlines) {
markdownString += '\n';
}
}
return markdownString;
}
/**
* Generate markdown for a raw draftjs object
* DraftJS raw object contains an array of blocks, which is the main "structure"
* of the text. Each block = a new line.
*
* @param {Object} rawDraftObject - draftjs object to generate markdown for
* @param {Object} options - optional additional data, see readme for what options can be passed in.
*
* @return {String} markdown string
**/
function draftToMarkdown(rawDraftObject, options) {
options = options || {};
var markdownString = '';
rawDraftObject.blocks.forEach(function (block, index) {
markdownString += renderBlock(block, index, rawDraftObject, options);
});
orderedListNumber = {}; // See variable definitions at the top of the page to see why we have to do this sad hack.
return markdownString;
}
export default draftToMarkdown;