Files
outline/server/utils/diff.ts
Hemachandar 04c3d81b1f chore: Setup missing oxlint configs (#9862)
* shared

* server

* app

* remove vestigial eslintrc files

* update comment directives
2025-08-06 19:54:22 -04:00

1120 lines
35 KiB
TypeScript

// Forked from https://github.com/inkling/htmldiff.js/blob/master/js/htmldiff.js
// The MIT License (MIT)
// Copyright (c) 2012 The Network Inc. and contributors
// Copyright (c) 2022 idesis GmbH, Max-Keith-Straße 66 (E 11), D-45136 Essen, https://www.idesis.de
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files (the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
// of the Software, and to permit persons to whom the Software is furnished to do
// so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
/**
* htmldiff.js is a library that compares HTML content. It creates a diff between two
* HTML documents by combining the two documents and wrapping the differences with
* <ins> and <del> tags. Here is a high-level overview of how the diff works.
*
* 1. Tokenize the before and after HTML with htmlToTokens.
* 2. Generate a list of operations that convert the before list of tokens to the after
* list of tokens with calculateOperations, which does the following:
* a. Find all the matching blocks of tokens between the before and after lists of
* tokens with findMatchingBlocks. This is done by finding the single longest
* matching block with findMatch, then iteratively finding the next longest
* matching blocks that precede and follow the longest matching block.
* b. Determine insertions, deletions, and replacements from the matching blocks.
* This is done in calculateOperations.
* 3. Render the list of operations by wrapping tokens with <ins> and <del> tags where
* appropriate with renderOperations.
*
* Example usage:
*
* var htmldiff = require('htmldiff.js');
*
* htmldiff('<p>this is some text</p>', '<p>this is some more text</p>')
* == '<p>this is some <ins>more </ins>text</p>'
*
* htmldiff('<p>this is some text</p>', '<p>this is some more text</p>', 'diff-class')
* == '<p>this is some <ins class="diff-class">more </ins>text</p>'
*/
"use strict";
type Token = {
string: string;
key: string;
};
type Segment = {
beforeTokens: Token[];
afterTokens: Token[];
beforeIndex: number;
afterIndex: number;
beforeMap: Record<string, number[]>;
afterMap: Record<string, number[]>;
};
type MatchT = {
segment: Segment;
length: number;
startInBefore: number;
endInBefore: number;
startInAfter: number;
endInAfter: number;
segmentStartInBefore: number;
segmentStartInAfter: number;
segmentEndInBefore: number;
segmentEndInAfter: number;
};
type OperationType = "insert" | "delete" | "replace" | "equal" | "none";
type Operation = {
action: OperationType;
startInBefore: number;
endInBefore: number | null;
startInAfter: number | null;
endInAfter: number | null;
};
function isEndOfTag(char: string) {
return char === ">";
}
function isStartOfTag(char: string) {
return char === "<";
}
function isWhitespace(char: string) {
return /^\s+$/.test(char);
}
/**
* Determines if the given token is a tag.
*
* @param {string} token The token in question.
*
* @return {boolean|string} False if the token is not a tag, or the tag name otherwise.
*/
function isTag(token: string): boolean | string {
const match = token.match(/^\s*<([^!>][^>]*)>\s*$/);
return !!match && match[1].trim().split(" ")[0];
}
function isntTag(token: string) {
return !isTag(token);
}
function isStartofHTMLComment(word: string) {
return /^<!--/.test(word);
}
function isEndOfHTMLComment(word: string) {
return /--\>$/.test(word);
}
/**
* Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
* child nodes should not be compared - the entire tag should be treated as one token. This
* is useful for tags where it does not make sense to insert <ins> and <del> tags.
*
* @param {string} word The characters of the current token read so far.
*
* @return {string|null} The name of the atomic tag if the word will be an atomic tag,
* null otherwise
*/
function isStartOfAtomicTag(word: string): string | null {
// Note: "math" was removed from this list on Outline fork to support math-display, math-inline nodes
const result = /^<(iframe|object|svg|script)/.exec(word);
return result && result[1];
}
/**
* Checks if the current word is the end of an atomic tag (i.e. it has all the characters,
* except for the end bracket of the closing tag, such as '<iframe></iframe').
*
* @param {string} word The characters of the current token read so far.
* @param {string} tag The ending tag to look for.
*
* @return {boolean} True if the word is now a complete token (including the end tag),
* false otherwise.
*/
function isEndOfAtomicTag(word: string, tag: string): boolean {
return word.substring(word.length - tag.length - 2) === "</" + tag;
}
/**
* Checks if a tag is a void tag.
*
* @param {string} token The token to check.
*
* @return {boolean} True if the token is a void tag, false otherwise.
*/
function isVoidTag(token: string): boolean {
return /^\s*<[^>]+\/>\s*$/.test(token);
}
/**
* Checks if a token can be wrapped inside a tag.
*
* @param {string} token The token to check.
*
* @return {boolean} True if the token can be wrapped inside a tag, false otherwise.
*/
function isWrappable(token: string): boolean {
return isntTag(token) || !!isStartOfAtomicTag(token) || isVoidTag(token);
}
/**
* Creates a token that holds a string and key representation. The key is used for diffing
* comparisons and the string is used to recompose the document after the diff is complete.
*
* @param {string} currentWord The section of the document to create a token for.
*
* @return {Token} A token object with a string and key property.
*/
function createToken(currentWord: string): Token {
return {
string: currentWord,
key: getKeyForToken(currentWord),
};
}
/**
* A Match stores the information of a matching block. A matching block is a list of
* consecutive tokens that appear in both the before and after lists of tokens.
*
* @param {number} startInBefore The index of the first token in the list of before tokens.
* @param {number} startInAfter The index of the first token in the list of after tokens.
* @param {number} length The number of consecutive matching tokens in this block.
* @param {Segment} segment The segment where the match was found.
*/
function Match(
startInBefore: number,
startInAfter: number,
length: number,
segment: Segment
) {
this.segment = segment;
this.length = length;
this.startInBefore = startInBefore + segment.beforeIndex;
this.startInAfter = startInAfter + segment.afterIndex;
this.endInBefore = this.startInBefore + this.length - 1;
this.endInAfter = this.startInAfter + this.length - 1;
this.segmentStartInBefore = startInBefore;
this.segmentStartInAfter = startInAfter;
this.segmentEndInBefore = this.segmentStartInBefore + this.length - 1;
this.segmentEndInAfter = this.segmentStartInAfter + this.length - 1;
}
/**
* Tokenizes a string of HTML.
*
* @param {string} html The string to tokenize.
*
* @return {Array} The list of tokens.
*/
function htmlToTokens(html: string): Token[] {
let mode = "char";
let currentWord = "";
let currentAtomicTag = "";
const words = [];
for (let i = 0; i < html.length; i++) {
const char = html[i];
switch (mode) {
case "tag": {
const atomicTag = isStartOfAtomicTag(currentWord);
if (atomicTag) {
mode = "atomic_tag";
currentAtomicTag = atomicTag;
currentWord += char;
} else if (isStartofHTMLComment(currentWord)) {
mode = "html_comment";
currentWord += char;
} else if (isEndOfTag(char)) {
currentWord += ">";
words.push(createToken(currentWord));
currentWord = "";
if (isWhitespace(char)) {
mode = "whitespace";
} else {
mode = "char";
}
} else {
currentWord += char;
}
break;
}
case "atomic_tag":
if (
isEndOfTag(char) &&
isEndOfAtomicTag(currentWord, currentAtomicTag)
) {
currentWord += ">";
words.push(createToken(currentWord));
currentWord = "";
currentAtomicTag = "";
mode = "char";
} else {
currentWord += char;
}
break;
case "html_comment":
currentWord += char;
if (isEndOfHTMLComment(currentWord)) {
currentWord = "";
mode = "char";
}
break;
case "char":
if (isStartOfTag(char)) {
if (currentWord) {
words.push(createToken(currentWord));
}
currentWord = "<";
mode = "tag";
} else if (/\s/.test(char)) {
if (currentWord) {
words.push(createToken(currentWord));
}
currentWord = char;
mode = "whitespace";
} else if (/[\w\d\#@]/.test(char)) {
currentWord += char;
} else if (/&/.test(char)) {
if (currentWord) {
words.push(createToken(currentWord));
}
currentWord = char;
} else {
currentWord += char;
words.push(createToken(currentWord));
currentWord = "";
}
break;
case "whitespace":
if (isStartOfTag(char)) {
if (currentWord) {
words.push(createToken(currentWord));
}
currentWord = "<";
mode = "tag";
} else if (isWhitespace(char)) {
currentWord += char;
} else {
if (currentWord) {
words.push(createToken(currentWord));
}
currentWord = char;
mode = "char";
}
break;
default:
throw new Error("Unknown mode " + mode);
}
}
if (currentWord) {
words.push(createToken(currentWord));
}
return words;
}
/**
* Creates a key that should be used to match tokens. This is useful, for example, if we want
* to consider two open tag tokens as equal, even if they don't have the same attributes. We
* use a key instead of overwriting the token because we may want to render the original string
* without losing the attributes.
*
* @param {string} token The token to create the key for.
*
* @return {string} The identifying key that should be used to match before and after tokens.
*/
function getKeyForToken(token: string): string {
const tagName = /<([^\s>]+)[\s>]/.exec(token);
if (tagName) {
return "<" + tagName[1].toLowerCase() + ">";
}
return token && token.replace(/(\s+|&nbsp;|&#160;)/g, " ");
}
/**
* Creates a map from token key to an array of indices of locations of the matching token in
* the list of all tokens.
*
* @param {Array.<Token>} tokens The list of tokens to be mapped.
*
* @return {Object} A mapping that can be used to search for tokens.
*/
function createMap(tokens: Token[]) {
return tokens.reduce(function (map, token, index) {
if (map[token.key]) {
map[token.key].push(index);
} else {
map[token.key] = [index];
}
return map;
}, Object.create(null));
}
/**
* Compares two match objects to determine if the second match object comes before or after the
* first match object. Returns -1 if the m2 should come before m1. Returns 1 if m1 should come
* before m2. If the two matches criss-cross each other, a null is returned.
*
* @param {MatchT} m1 The first match object to compare.
* @param {MatchT} m2 The second match object to compare.
*
* @return {number} Returns -1 if the m2 should come before m1. Returns 1 if m1 should come
* before m2. If the two matches criss-cross each other, 0 is returned.
*/
function compareMatches(m1: MatchT, m2: MatchT): number {
if (m2.endInBefore < m1.startInBefore && m2.endInAfter < m1.startInAfter) {
return -1;
} else if (
m2.startInBefore > m1.endInBefore &&
m2.startInAfter > m1.endInAfter
) {
return 1;
} else {
return 0;
}
}
/**
* A constructor for a binary search tree used to keep match objects in the proper order as
* they're found.
*
* @constructor
*/
function MatchBinarySearchTree() {
this._root = null;
}
MatchBinarySearchTree.prototype = {
/**
* Adds matches to the binary search tree.
*
* @param {MatchT} value The match to add to the binary search tree.
*/
add(value: MatchT) {
// Create the node to hold the match value.
const node = {
value,
left: null,
right: null,
};
let current = this._root;
if (current) {
// oxlint-disable-next-line no-constant-condition
while (true) {
// Determine if the match value should go to the left or right of the current
// node.
const position = compareMatches(current.value, value);
if (position === -1) {
// The position of the match is to the left of this node.
if (current.left) {
current = current.left;
} else {
current.left = node;
break;
}
} else if (position === 1) {
// The position of the match is to the right of this node.
if (current.right) {
current = current.right;
} else {
current.right = node;
break;
}
} else {
// If 0 was returned from compareMatches, that means the node cannot
// be inserted because it overlaps an existing node.
break;
}
}
} else {
// If no nodes exist in the tree, make this the root node.
this._root = node;
}
},
/**
* Converts the binary search tree into an array using an in-order traversal.
*
* @return {Array.<MatchT>} An array containing the matches in the binary search tree.
*/
toArray(): Array<MatchT> {
type Node = {
value: MatchT;
left: Node;
right: Node;
};
function inOrder(node: Node, nodes: MatchT[]) {
if (node) {
inOrder(node.left, nodes);
nodes.push(node.value);
inOrder(node.right, nodes);
}
return nodes;
}
return inOrder(this._root, []);
},
};
/**
* Finds and returns the best match between the before and after arrays contained in the segment
* provided.
*
* @param {Segment} segment The segment in which to look for a match.
*
* @return {MatchT} The best match.
*/
function findBestMatch(segment: Segment): MatchT | null {
const beforeTokens = segment.beforeTokens;
const afterMap = segment.afterMap;
let lastSpace = null;
let bestMatch: MatchT | null = null;
// Iterate through the entirety of the beforeTokens to find the best match.
for (let beforeIndex = 0; beforeIndex < beforeTokens.length; beforeIndex++) {
let lookBehind = false;
// If the current best match is longer than the remaining tokens, we can bail because we
// won't find a better match.
const remainingTokens = beforeTokens.length - beforeIndex;
if (bestMatch && remainingTokens < (bestMatch as MatchT).length) {
break;
}
// If the current token is whitespace, make a note of it and move on. Trying to start a
// set of matches with whitespace is not efficient because it's too prevelant in most
// documents. Instead, if the next token yields a match, we'll see if the whitespace can
// be included in that match.
const beforeToken = beforeTokens[beforeIndex];
if (beforeToken.key === " ") {
lastSpace = beforeIndex;
continue;
}
// Check to see if we just skipped a space, if so, we'll ask getFullMatch to look behind
// by one token to see if it can include the whitespace.
if (lastSpace === beforeIndex - 1) {
lookBehind = true;
}
// If the current token is not found in the afterTokens, it won't match and we can move
// on.
const afterTokenLocations = afterMap[beforeToken.key];
if (!afterTokenLocations) {
continue;
}
// For each instance of the current token in afterTokens, let's see how big of a match
// we can build.
afterTokenLocations.forEach(function (afterIndex: number) {
// getFullMatch will see how far the current token match will go in both
// beforeTokens and afterTokens.
const bestMatchLength = bestMatch ? bestMatch.length : 0;
const match = getFullMatch(
segment,
beforeIndex,
afterIndex,
bestMatchLength,
lookBehind
);
// If we got a new best match, we'll save it aside.
if (match && match.length > bestMatchLength) {
bestMatch = match;
}
});
}
return bestMatch;
}
/**
* Takes the start of a match, and expands it in the beforeTokens and afterTokens of the
* current segment as far as it can go.
*
* @param {Segment} segment The segment object to search within when expanding the match.
* @param {number} beforeStart The offset within beforeTokens to start looking.
* @param {number} afterStart The offset within afterTokens to start looking.
* @param {number} minLength The minimum length match that must be found.
* @param {boolean} lookBehind If true, attempt to match a whitespace token just before the
* beforeStart and afterStart tokens.
*
* @return {MatchT} The full match.
*/
function getFullMatch(
segment: Segment,
beforeStart: number,
afterStart: number,
minLength: number,
lookBehind: boolean
): MatchT | undefined {
const beforeTokens = segment.beforeTokens;
const afterTokens = segment.afterTokens;
// If we already have a match that goes to the end of the document, no need to keep looking.
const minBeforeIndex = beforeStart + minLength;
const minAfterIndex = afterStart + minLength;
if (
minBeforeIndex >= beforeTokens.length ||
minAfterIndex >= afterTokens.length
) {
return;
}
// If a minLength was provided, we can do a quick check to see if the tokens after that
// length match. If not, we won't be beating the previous best match, and we can bail out
// early.
if (minLength) {
const nextBeforeWord = beforeTokens[minBeforeIndex].key;
const nextAfterWord = afterTokens[minAfterIndex].key;
if (nextBeforeWord !== nextAfterWord) {
return;
}
}
// Extend the current match as far foward as it can go, without overflowing beforeTokens or
// afterTokens.
let searching = true;
let currentLength = 1;
let beforeIndex = beforeStart + currentLength;
let afterIndex = afterStart + currentLength;
while (
searching &&
beforeIndex < beforeTokens.length &&
afterIndex < afterTokens.length
) {
const beforeWord = beforeTokens[beforeIndex].key;
const afterWord = afterTokens[afterIndex].key;
if (beforeWord === afterWord) {
currentLength++;
beforeIndex = beforeStart + currentLength;
afterIndex = afterStart + currentLength;
} else {
searching = false;
}
}
// If we've been asked to look behind, it's because both beforeTokens and afterTokens may
// have a whitespace token just behind the current match that was previously ignored. If so,
// we'll expand the current match to include it.
if (lookBehind && beforeStart > 0 && afterStart > 0) {
const prevBeforeKey = beforeTokens[beforeStart - 1].key;
const prevAfterKey = afterTokens[afterStart - 1].key;
if (prevBeforeKey === " " && prevAfterKey === " ") {
beforeStart--;
afterStart--;
currentLength++;
}
}
// @ts-expect-error: New construct any
return new Match(beforeStart, afterStart, currentLength, segment);
}
/**
* Creates segment objects from the original document that can be used to restrict the area that
* findBestMatch and it's helper functions search to increase performance.
*
* @param {Array.<Token>} beforeTokens Tokens from the before document.
* @param {Array.<Token>} afterTokens Tokens from the after document.
* @param {number} beforeIndex The index within the before document where this segment begins.
* @param {number} afterIndex The index within the after document where this segment behinds.
*
* @return {Segment} The segment object.
*/
function createSegment(
beforeTokens: Token[],
afterTokens: Token[],
beforeIndex: number,
afterIndex: number
): Segment {
return {
beforeTokens,
afterTokens,
beforeMap: createMap(beforeTokens),
afterMap: createMap(afterTokens),
beforeIndex,
afterIndex,
};
}
/**
* Finds all the matching blocks within the given segment in the before and after lists of
* tokens.
*
* @param {Segment} The segment that should be searched for matching blocks.
*
* @return {Array.<Match>} The list of matching blocks in this range.
*/
function findMatchingBlocks(segment: Segment): Array<MatchT> {
// Create a binary search tree to hold the matches we find in order.
// @ts-expect-error: New construct any
const matches = new MatchBinarySearchTree();
let match;
const segments: Segment[] = [segment];
// Each time the best match is found in a segment, zero, one or two new segments may be
// created from the parts of the original segment not included in the match. We will
// continue to iterate until all segments have been processed.
while (segments.length) {
segment = segments.pop() as Segment;
match = findBestMatch(segment);
if (match && match.length) {
// If there's an unmatched area at the start of the segment, create a new segment
// from that area and throw it into the segments array to get processed.
if (match.segmentStartInBefore > 0 && match.segmentStartInAfter > 0) {
const leftBeforeTokens = segment.beforeTokens.slice(
0,
match.segmentStartInBefore
);
const leftAfterTokens = segment.afterTokens.slice(
0,
match.segmentStartInAfter
);
segments.push(
createSegment(
leftBeforeTokens,
leftAfterTokens,
segment.beforeIndex,
segment.afterIndex
)
);
}
// If there's an unmatched area at the end of the segment, create a new segment from that
// area and throw it into the segments array to get processed.
const rightBeforeTokens = segment.beforeTokens.slice(
match.segmentEndInBefore + 1
);
const rightAfterTokens = segment.afterTokens.slice(
match.segmentEndInAfter + 1
);
const rightBeforeIndex =
segment.beforeIndex + match.segmentEndInBefore + 1;
const rightAfterIndex = segment.afterIndex + match.segmentEndInAfter + 1;
if (rightBeforeTokens.length && rightAfterTokens.length) {
segments.push(
createSegment(
rightBeforeTokens,
rightAfterTokens,
rightBeforeIndex,
rightAfterIndex
)
);
}
matches.add(match);
}
}
return matches.toArray();
}
/**
* Gets a list of operations required to transform the before list of tokens into the
* after list of tokens. An operation describes whether a particular list of consecutive
* tokens are equal, replaced, inserted, or deleted.
*
* @param {Array.<string>} beforeTokens The before list of tokens.
* @param {Array.<string>} afterTokens The after list of tokens.
*
* @return {Array.<Object>} The list of operations to transform the before list of
* tokens into the after list of tokens, where each operation has the following
* keys:
* - {string} action One of {'replace', 'insert', 'delete', 'equal'}.
* - {number} startInBefore The beginning of the range in the list of before tokens.
* - {number} endInBefore The end of the range in the list of before tokens.
* - {number} startInAfter The beginning of the range in the list of after tokens.
* - {number} endInAfter The end of the range in the list of after tokens.
*/
function calculateOperations(
beforeTokens: Token[],
afterTokens: Token[]
): Array<Operation> {
if (!beforeTokens) {
throw new Error("Missing beforeTokens");
}
if (!afterTokens) {
throw new Error("Missing afterTokens");
}
let positionInBefore = 0;
let positionInAfter = 0;
const operations: Operation[] = [];
const segment = createSegment(beforeTokens, afterTokens, 0, 0);
const matches = findMatchingBlocks(segment);
// @ts-expect-error New construct any
matches.push(new Match(beforeTokens.length, afterTokens.length, 0, segment));
for (let index = 0; index < matches.length; index++) {
const match = matches[index];
let actionUpToMatchPositions: OperationType = "none";
if (positionInBefore === match.startInBefore) {
if (positionInAfter !== match.startInAfter) {
actionUpToMatchPositions = "insert";
}
} else {
actionUpToMatchPositions = "delete";
if (positionInAfter !== match.startInAfter) {
actionUpToMatchPositions = "replace";
}
}
if (actionUpToMatchPositions !== "none") {
operations.push({
action: actionUpToMatchPositions,
startInBefore: positionInBefore,
endInBefore:
actionUpToMatchPositions !== "insert"
? match.startInBefore - 1
: null,
startInAfter: positionInAfter,
endInAfter:
actionUpToMatchPositions !== "delete" ? match.startInAfter - 1 : null,
});
}
if (match.length !== 0) {
operations.push({
action: "equal",
startInBefore: match.startInBefore,
endInBefore: match.endInBefore,
startInAfter: match.startInAfter,
endInAfter: match.endInAfter,
});
}
positionInBefore = match.endInBefore + 1;
positionInAfter = match.endInAfter + 1;
}
const postProcessed = [];
let lastOp = { action: "none" } as Operation;
function isSingleWhitespace(op: Operation) {
if (op.action !== "equal") {
return false;
}
if ((op.endInBefore ?? 0) - op.startInBefore !== 0) {
return false;
}
return /^\s$/.test(
// @ts-expect-error Not sure why the code slices here
beforeTokens.slice(op.startInBefore, op.endInBefore + 1)
);
}
for (let i = 0; i < operations.length; i++) {
const op = operations[i];
if (
(isSingleWhitespace(op) && lastOp.action === "replace") ||
(op.action === "replace" && lastOp.action === "replace")
) {
lastOp.endInBefore = op.endInBefore;
lastOp.endInAfter = op.endInAfter;
} else {
postProcessed.push(op as Operation);
lastOp = op;
}
}
return postProcessed;
}
/**
* A TokenWrapper provides a utility for grouping segments of tokens based on whether they're
* wrappable or not. A tag is considered wrappable if it is closed within the given set of
* tokens. For example, given the following tokens:
*
* ['</b>', 'this', ' ', 'is', ' ', 'a', ' ', '<b>', 'test', '</b>', '!']
*
* The first '</b>' is not considered wrappable since the tag is not fully contained within the
* array of tokens. The '<b>', 'test', and '</b>' would be a part of the same wrappable segment
* since the entire bold tag is within the set of tokens.
*
* TokenWrapper has a method 'combine' which allows walking over the segments to wrap them in
* tags.
*/
function TokenWrapper(tokens: any) {
this.tokens = tokens;
this.notes = tokens.reduce(
function (data: any, token: any, index: number) {
data.notes.push({
isWrappable: isWrappable(token),
insertedTag: false,
});
const tag = !isVoidTag(token) && isTag(token);
const lastEntry = data.tagStack[data.tagStack.length - 1];
if (tag) {
if (lastEntry && "/" + lastEntry.tag === tag) {
data.notes[lastEntry.position].insertedTag = true;
data.tagStack.pop();
} else {
data.tagStack.push({
tag,
position: index,
});
}
}
return data;
},
{ notes: [], tagStack: [] }
).notes;
}
/**
* Wraps the contained tokens in tags based on output given by a map function. Each segment of
* tokens will be visited. A segment is a continuous run of either all wrappable
* tokens or unwrappable tokens. The given map function will be called with each segment of
* tokens and the resulting strings will be combined to form the wrapped HTML.
*
* @param {function(boolean, Array.<string>)} mapFn A function called with an array of tokens
* and whether those tokens are wrappable or not. The result should be a string.
*/
TokenWrapper.prototype.combine = function (
mapFn: (wrappable: boolean, tokens: Token[]) => void,
tagFn: (tokens: Token[]) => void
) {
const notes = this.notes;
const tokens = this.tokens.slice();
const segments = tokens.reduce(
function (data: any, _token: Token, index: number) {
if (notes[index].insertedTag) {
tokens[index] = tagFn(tokens[index]);
}
if (data.status === null) {
data.status = notes[index].isWrappable;
}
const status = notes[index].isWrappable;
if (status !== data.status) {
data.list.push({
isWrappable: data.status,
tokens: tokens.slice(data.lastIndex, index),
});
data.lastIndex = index;
data.status = status;
}
if (index === tokens.length - 1) {
data.list.push({
isWrappable: data.status,
tokens: tokens.slice(data.lastIndex, index + 1),
});
}
return data;
},
{ list: [], status: null, lastIndex: 0 }
).list;
return segments.map(mapFn).join("");
};
/**
* Wraps and concatenates a list of tokens with a tag. Does not wrap tag tokens,
* unless they are wrappable (i.e. void and atomic tags).
*
* @param {string} tag The tag name of the wrapper tags.
* @param {Array.<string>} content The list of tokens to wrap.
* @param {string} dataPrefix (Optional) The prefix to use in data attributes.
* @param {string} className (Optional) The class name to include in the wrapper tag.
*/
function wrap(
tag: string,
content: Array<string>,
opIndex: number,
dataPrefix: string,
className: string
) {
// @ts-expect-error New constructor any
const wrapper = new TokenWrapper(content);
dataPrefix = dataPrefix ? dataPrefix + "-" : "";
let attrs = " data-" + dataPrefix + 'operation-index="' + opIndex + '"';
if (className) {
attrs += ' class="' + className + '"';
}
return wrapper.combine(
function (segment: any) {
if (segment.isWrappable) {
const val = segment.tokens.join("");
if (val.trim()) {
return "<" + tag + attrs + ">" + val + "</" + tag + ">";
}
} else {
return segment.tokens.join("");
}
return "";
},
function (openingTag: string) {
let dataAttrs = ' data-diff-node="' + tag + '"';
dataAttrs += " data-" + dataPrefix + 'operation-index="' + opIndex + '"';
return openingTag.replace(/>\s*$/, dataAttrs + "$&");
}
);
}
/**
* OPS.equal/insert/delete/replace are functions that render an operation into
* HTML content.
*
* @param {Operation} op The operation that applies to a particular list of tokens.
* @param {Array.<Token>} beforeTokens The before list of tokens.
* @param {Array.<Token>} afterTokens The after list of tokens.
* @param {number} opIndex The index into the list of operations that identifies the change to
* be rendered. This is used to mark wrapped HTML as part of the same operation.
* @param {string} dataPrefix (Optional) The prefix to use in data attributes.
* @param {string} className (Optional) The class name to include in the wrapper tag.
*
* @return {string} The rendering of that operation.
*/
const OPS = {
equal(op: Operation, _beforeTokens: Token[], afterTokens: Token[]) {
const tokens = afterTokens.slice(
op.startInAfter ?? 0,
(op.endInAfter ?? 0) + 1
);
return tokens.reduce(function (prev, curr) {
return prev + curr.string;
}, "");
},
insert(
op: Operation,
_beforeTokens: Token[],
afterTokens: Token[],
opIndex: number,
dataPrefix: string,
className: string
) {
const tokens = afterTokens.slice(
op.startInAfter ?? 0,
(op.endInAfter ?? 0) + 1
);
const val = tokens.map(function (token) {
return token.string;
});
return wrap("ins", val, opIndex, dataPrefix, className);
},
delete(
op: Operation,
beforeTokens: Token[],
_afterTokens: Token[],
opIndex: number,
dataPrefix: string,
className: string
) {
const tokens = beforeTokens.slice(
op.startInBefore,
(op.endInBefore ?? 0) + 1
);
const val = tokens.map(function (token) {
return token.string;
});
return wrap("del", val, opIndex, dataPrefix, className);
},
replace(...rest: any[]) {
return OPS["delete"].apply(null, rest) + OPS["insert"].apply(null, rest);
},
};
/**
* Renders a list of operations into HTML content. The result is the combined version
* of the before and after tokens with the differences wrapped in tags.
*
* @param {Array.<Token>} beforeTokens The before list of tokens.
* @param {Array.<Token>} afterTokens The after list of tokens.
* @param {Array.<Operation>} operations The list of operations to transform the before
* list of tokens into the after list of tokens;
* @param {string} dataPrefix (Optional) The prefix to use in data attributes.
* @param {string} className (Optional) The class name to include in the wrapper tag.
*
* @return {string} The rendering of the list of operations.
*/
function renderOperations(
beforeTokens: Token[],
afterTokens: Token[],
operations: Operation[],
dataPrefix?: string,
className?: string
): string {
return operations.reduce(function (rendering, op, index) {
return (
rendering +
// @ts-expect-error TODO
OPS[op.action]?.(
op,
beforeTokens,
afterTokens,
index,
dataPrefix,
className
)
);
}, "");
}
/**
* Compares two pieces of HTML content and returns the combined content with differences
* wrapped in <ins> and <del> tags.
*
* @param {string} before The HTML content before the changes.
* @param {string} after The HTML content after the changes.
* @param {string} className (Optional) The class attribute to include in <ins> and <del> tags.
* @param {string} dataPrefix (Optional) The data prefix to use for data attributes. The
* operation index data attribute will be named `data-${dataPrefix-}operation-index`.
*
* @return {string} The combined HTML content with differences wrapped in <ins> and <del> tags.
*/
function diff(
before: string,
after: string,
className?: string,
dataPrefix?: string
): string {
if (before === after) {
return before;
}
const beforeTokens = htmlToTokens(before);
const afterTokens = htmlToTokens(after);
const ops = calculateOperations(beforeTokens, afterTokens);
return renderOperations(
beforeTokens,
afterTokens,
ops,
dataPrefix,
className
);
}
export default diff;