mirror of
https://github.com/showdownjs/showdown.git
synced 2024-03-22 13:30:55 +08:00
fix(HTML parsing): fix HTML parsing issues with nested tags
Deeply nested HTML tags and recursive tags broke the HTML parser. Closes #357, closes #387
This commit is contained in:
parent
813f832160
commit
6fbc072c2c
54
dist/showdown.js
vendored
54
dist/showdown.js
vendored
@ -1,4 +1,4 @@
|
|||||||
;/*! showdown 25-04-2017 */
|
;/*! showdown 28-05-2017 */
|
||||||
(function(){
|
(function(){
|
||||||
/**
|
/**
|
||||||
* Created by Tivie on 13-07-2015.
|
* Created by Tivie on 13-07-2015.
|
||||||
@ -806,6 +806,43 @@ showdown.helper.replaceRecursiveRegExp = function (str, replacement, left, right
|
|||||||
return finalStr;
|
return finalStr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the index within the passed String object of the first occurrence of the specified regex,
|
||||||
|
* starting the search at fromIndex. Returns -1 if the value is not found.
|
||||||
|
*
|
||||||
|
* @param {string} str string to search
|
||||||
|
* @param {RegExp} regex Regular expression to search
|
||||||
|
* @param {int} [fromIndex = 0] Index to start the search
|
||||||
|
* @returns {Number}
|
||||||
|
* @throws InvalidArgumentError
|
||||||
|
*/
|
||||||
|
showdown.helper.regexIndexOf = function (str, regex, fromIndex) {
|
||||||
|
'use strict';
|
||||||
|
if (!showdown.helper.isString(str)) {
|
||||||
|
throw 'InvalidArgumentError: first parameter of showdown.helper.regexIndexOf function must be a string';
|
||||||
|
}
|
||||||
|
if (regex instanceof RegExp === false) {
|
||||||
|
throw 'InvalidArgumentError: second parameter of showdown.helper.regexIndexOf function must be an instance of RegExp';
|
||||||
|
}
|
||||||
|
var indexOf = str.substring(fromIndex || 0).search(regex);
|
||||||
|
return (indexOf >= 0) ? (indexOf + (fromIndex || 0)) : indexOf;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Splits the passed string object at the defined index, and returns an array composed of the two substrings
|
||||||
|
* @param {string} str string to split
|
||||||
|
* @param {int} index index to split string at
|
||||||
|
* @returns {[string,string]}
|
||||||
|
* @throws InvalidArgumentError
|
||||||
|
*/
|
||||||
|
showdown.helper.splitAtIndex = function (str, index) {
|
||||||
|
'use strict';
|
||||||
|
if (!showdown.helper.isString(str)) {
|
||||||
|
throw 'InvalidArgumentError: first parameter of showdown.helper.regexIndexOf function must be a string';
|
||||||
|
}
|
||||||
|
return [str.substring(0, index), str.substring(index)];
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Obfuscate an e-mail address through the use of Character Entities,
|
* Obfuscate an e-mail address through the use of Character Entities,
|
||||||
* transforming ASCII characters into their equivalent decimal or hex entities.
|
* transforming ASCII characters into their equivalent decimal or hex entities.
|
||||||
@ -1905,9 +1942,20 @@ showdown.subParser('hashHTMLBlocks', function (text, options, globals) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
for (var i = 0; i < blockTags.length; ++i) {
|
for (var i = 0; i < blockTags.length; ++i) {
|
||||||
text = showdown.helper.replaceRecursiveRegExp(text, repFunc, '^ {0,3}<' + blockTags[i] + '\\b[^>]*>', '</' + blockTags[i] + '>', 'gim');
|
|
||||||
}
|
|
||||||
|
|
||||||
|
var opTagPos,
|
||||||
|
rgx1 = new RegExp('^ {0,3}<' + blockTags[i] + '\\b[^>]*>', 'im'),
|
||||||
|
patLeft = '<' + blockTags[i] + '\\b[^>]*>',
|
||||||
|
patRight = '</' + blockTags[i] + '>';
|
||||||
|
// 1. Look for the first position of the first opening HTML tag in the text
|
||||||
|
while ((opTagPos = showdown.helper.regexIndexOf(text, rgx1)) !== -1) {
|
||||||
|
//2. Split the text in that position
|
||||||
|
var subTexts = showdown.helper.splitAtIndex(text, opTagPos);
|
||||||
|
//3. Match recursively
|
||||||
|
subTexts[1] = showdown.helper.replaceRecursiveRegExp(subTexts[1], repFunc, patLeft, patRight, 'im');
|
||||||
|
text = subTexts[0].concat(subTexts[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
// HR SPECIAL CASE
|
// HR SPECIAL CASE
|
||||||
text = text.replace(/(\n {0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,
|
text = text.replace(/(\n {0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,
|
||||||
showdown.subParser('hashElement')(text, options, globals));
|
showdown.subParser('hashElement')(text, options, globals));
|
||||||
|
2
dist/showdown.js.map
vendored
2
dist/showdown.js.map
vendored
File diff suppressed because one or more lines are too long
6
dist/showdown.min.js
vendored
6
dist/showdown.min.js
vendored
File diff suppressed because one or more lines are too long
2
dist/showdown.min.js.map
vendored
2
dist/showdown.min.js.map
vendored
File diff suppressed because one or more lines are too long
@ -273,6 +273,43 @@ showdown.helper.replaceRecursiveRegExp = function (str, replacement, left, right
|
|||||||
return finalStr;
|
return finalStr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the index within the passed String object of the first occurrence of the specified regex,
|
||||||
|
* starting the search at fromIndex. Returns -1 if the value is not found.
|
||||||
|
*
|
||||||
|
* @param {string} str string to search
|
||||||
|
* @param {RegExp} regex Regular expression to search
|
||||||
|
* @param {int} [fromIndex = 0] Index to start the search
|
||||||
|
* @returns {Number}
|
||||||
|
* @throws InvalidArgumentError
|
||||||
|
*/
|
||||||
|
showdown.helper.regexIndexOf = function (str, regex, fromIndex) {
|
||||||
|
'use strict';
|
||||||
|
if (!showdown.helper.isString(str)) {
|
||||||
|
throw 'InvalidArgumentError: first parameter of showdown.helper.regexIndexOf function must be a string';
|
||||||
|
}
|
||||||
|
if (regex instanceof RegExp === false) {
|
||||||
|
throw 'InvalidArgumentError: second parameter of showdown.helper.regexIndexOf function must be an instance of RegExp';
|
||||||
|
}
|
||||||
|
var indexOf = str.substring(fromIndex || 0).search(regex);
|
||||||
|
return (indexOf >= 0) ? (indexOf + (fromIndex || 0)) : indexOf;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Splits the passed string object at the defined index, and returns an array composed of the two substrings
|
||||||
|
* @param {string} str string to split
|
||||||
|
* @param {int} index index to split string at
|
||||||
|
* @returns {[string,string]}
|
||||||
|
* @throws InvalidArgumentError
|
||||||
|
*/
|
||||||
|
showdown.helper.splitAtIndex = function (str, index) {
|
||||||
|
'use strict';
|
||||||
|
if (!showdown.helper.isString(str)) {
|
||||||
|
throw 'InvalidArgumentError: first parameter of showdown.helper.regexIndexOf function must be a string';
|
||||||
|
}
|
||||||
|
return [str.substring(0, index), str.substring(index)];
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Obfuscate an e-mail address through the use of Character Entities,
|
* Obfuscate an e-mail address through the use of Character Entities,
|
||||||
* transforming ASCII characters into their equivalent decimal or hex entities.
|
* transforming ASCII characters into their equivalent decimal or hex entities.
|
||||||
|
@ -49,9 +49,20 @@ showdown.subParser('hashHTMLBlocks', function (text, options, globals) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
for (var i = 0; i < blockTags.length; ++i) {
|
for (var i = 0; i < blockTags.length; ++i) {
|
||||||
text = showdown.helper.replaceRecursiveRegExp(text, repFunc, '^ {0,3}<' + blockTags[i] + '\\b[^>]*>', '</' + blockTags[i] + '>', 'gim');
|
|
||||||
}
|
|
||||||
|
|
||||||
|
var opTagPos,
|
||||||
|
rgx1 = new RegExp('^ {0,3}<' + blockTags[i] + '\\b[^>]*>', 'im'),
|
||||||
|
patLeft = '<' + blockTags[i] + '\\b[^>]*>',
|
||||||
|
patRight = '</' + blockTags[i] + '>';
|
||||||
|
// 1. Look for the first position of the first opening HTML tag in the text
|
||||||
|
while ((opTagPos = showdown.helper.regexIndexOf(text, rgx1)) !== -1) {
|
||||||
|
//2. Split the text in that position
|
||||||
|
var subTexts = showdown.helper.splitAtIndex(text, opTagPos);
|
||||||
|
//3. Match recursively
|
||||||
|
subTexts[1] = showdown.helper.replaceRecursiveRegExp(subTexts[1], repFunc, patLeft, patRight, 'im');
|
||||||
|
text = subTexts[0].concat(subTexts[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
// HR SPECIAL CASE
|
// HR SPECIAL CASE
|
||||||
text = text.replace(/(\n {0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,
|
text = text.replace(/(\n {0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,
|
||||||
showdown.subParser('hashElement')(text, options, globals));
|
showdown.subParser('hashElement')(text, options, globals));
|
||||||
|
12
test/issues/deeply-nested-HTML-blocks.html
Normal file
12
test/issues/deeply-nested-HTML-blocks.html
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<div>
|
||||||
|
<div>
|
||||||
|
<div>
|
||||||
|
<div>
|
||||||
|
text
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
text
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
12
test/issues/deeply-nested-HTML-blocks.md
Normal file
12
test/issues/deeply-nested-HTML-blocks.md
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<div>
|
||||||
|
<div>
|
||||||
|
<div>
|
||||||
|
<div>
|
||||||
|
text
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
text
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
3
test/issues/one-line-HTML-input.html
Normal file
3
test/issues/one-line-HTML-input.html
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
<div><div>a</div><div>b</div></div>
|
||||||
|
<pre><code><div>**foobar**</div>
|
||||||
|
</code></pre>
|
3
test/issues/one-line-HTML-input.md
Normal file
3
test/issues/one-line-HTML-input.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
<div><div>a</div><div>b</div></div>
|
||||||
|
|
||||||
|
<div>**foobar**</div>
|
@ -233,3 +233,16 @@ describe('forEach()', function () {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('matchRecursiveRegExp()', function () {
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
var rRegExp = showdown.helper.matchRecursiveRegExp;
|
||||||
|
|
||||||
|
it('should match nested elements', function () {
|
||||||
|
var result = rRegExp('<div><div>a</div></div>', '<div\\b[^>]*>', '</div>', 'gim');
|
||||||
|
result.should.deep.equal([['<div><div>a</div></div>', '<div>a</div>', '<div>', '</div>']]);
|
||||||
|
});
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user