fix(hashHTMLBlock): fix issue with html breaking markdown parsing

Closes #220
This commit is contained in:
Estevão Soares dos Santos 2016-01-01 23:33:33 +00:00
parent 1011c0f7ac
commit 2746949d7d
12 changed files with 299 additions and 250 deletions

256
dist/showdown.js vendored
View File

@ -1,4 +1,4 @@
;/*! showdown 22-12-2015 */
;/*! showdown 01-01-2016 */
(function(){
/**
* Created by Tivie on 13-07-2015.
@ -431,6 +431,18 @@ showdown.helper.isString = function isString(a) {
return (typeof a === 'string' || a instanceof String);
};
/**
* Check if var is a function
* @static
* @param {string} a
* @returns {boolean}
*/
showdown.helper.isFunction = function isFunction(a) {
'use strict';
var getType = {};
return a && getType.toString.call(a) === '[object Function]';
};
/**
* ForEach helper function
* @static
@ -520,6 +532,38 @@ showdown.helper.escapeCharacters = function escapeCharacters(text, charsToEscape
return text;
};
var rgxFindMatchPos = function (str, left, right, flags) {
'use strict';
var f = flags || '',
g = f.indexOf('g') > -1,
x = new RegExp(left + '|' + right, 'g' + f.replace(/g/g, '')),
l = new RegExp(left, f.replace(/g/g, '')),
pos = [],
t, s, m, start, end;
do {
t = 0;
while ((m = x.exec(str))) {
if (l.test(m[0])) {
if (!(t++)) {
s = x.lastIndex;
start = s - m[0].length;
}
} else if (t) {
if (!--t) {
end = m.index + m[0].length;
pos.push({start: start, end: end});
if (!g) {
return pos;
}
}
}
}
} while (t && (x.lastIndex = s));
return pos;
};
/**
* matchRecursiveRegExp
*
@ -553,7 +597,7 @@ showdown.helper.matchRecursiveRegExp = function (str, left, right, flags) {
'use strict';
var f = flags || '',
g = f.indexOf('g') > -1,
x = new RegExp(left + '|' + right, f),
x = new RegExp(left + '|' + right, 'g' + f.replace(/g/g, '')),
l = new RegExp(left, f.replace(/g/g, '')),
a = [],
t, s, m, start, end;
@ -582,6 +626,48 @@ showdown.helper.matchRecursiveRegExp = function (str, left, right, flags) {
return a;
};
/**
*
* @param {string} str
* @param {string|function} replacement
* @param {string} left
* @param {string} right
* @param {string} flags
* @returns {string}
*/
showdown.helper.replaceRecursiveRegExp = function (str, replacement, left, right, flags) {
'use strict';
if (!showdown.helper.isFunction(replacement)) {
var repStr = replacement;
replacement = function () {
return repStr;
};
}
var matchPos = rgxFindMatchPos(str, left, right, flags),
finalStr = str,
lng = matchPos.length;
if (lng > 0) {
var bits = [];
if (matchPos[0].start !== 0) {
bits.push(str.slice(0, matchPos[0].start));
}
for (var i = 0; i < lng; ++i) {
bits.push(replacement(str.slice(matchPos[i].start, matchPos[i].end)));
if (i < lng - 1) {
bits.push(str.slice(matchPos[i].end, matchPos[i + 1].start));
}
}
if (matchPos[lng - 1].end < str.length) {
bits.push(str.slice(matchPos[lng - 1].end));
}
finalStr = bits.join('');
}
return finalStr;
};
/**
* POLYFILLS
*/
@ -1591,135 +1677,63 @@ showdown.subParser('hashElement', function (text, options, globals) {
showdown.subParser('hashHTMLBlocks', function (text, options, globals) {
'use strict';
// attacklab: Double up blank lines to reduce lookaround
text = text.replace(/\n/g, '\n\n');
var blockTags = [
'pre',
'div',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'blockquote',
'table',
'dl',
'ol',
'ul',
'script',
'noscript',
'form',
'fieldset',
'iframe',
'math',
'style',
'section',
'header',
'footer',
'nav',
'article',
'aside',
'address',
'audio',
'canvas',
'figure',
'hgroup',
'output',
'video',
'p'
],
repFunc = function (match) {
return '\n\n~K' + (globals.gHtmlBlocks.push(match) - 1) + 'K\n\n';
};
// Hashify HTML blocks:
// We only want to do this for block-level HTML tags, such as headers,
// lists, and tables. That's because we still want to wrap <p>s around
// "paragraphs" that are wrapped in non-block-level tags, such as anchors,
// phrase emphasis, and spans. The list of tags we're looking for is
// hard-coded:
//var block_tags_a =
// 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del|style|section|header|footer|nav|article|aside';
// var block_tags_b =
// 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|style|section|header|footer|nav|article|aside';
for (var i = 0; i < blockTags.length; ++i) {
text = showdown.helper.replaceRecursiveRegExp(text, repFunc, '^(?: |\\t){0,3}<' + blockTags[i] + '\\b[^>]*>', '</' + blockTags[i] + '>', 'gim');
}
// First, look for nested blocks, e.g.:
// <div>
// <div>
// tags for inner block must be indented.
// </div>
// </div>
//
// The outermost tags must start at the left margin for this to match, and
// the inner nested divs must be indented.
// We need to do this before the next, more liberal match, because the next
// match will start at the first `<div>` and stop at the first `</div>`.
// attacklab: This regex can be expensive when it fails.
/*
var text = text.replace(/
( // save in $1
^ // start of line (with /m)
<($block_tags_a) // start tag = $2
\b // word break
// attacklab: hack around khtml/pcre bug...
[^\r]*?\n // any number of lines, minimally matching
</\2> // the matching end tag
[ \t]* // trailing spaces/tabs
(?=\n+) // followed by a newline
) // attacklab: there are sentinel newlines at end of document
/gm,function(){...}};
*/
text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,
showdown.subParser('hashElement')(text, options, globals));
//
// Now match more liberally, simply from `\n<tag>` to `</tag>\n`
//
/*
var text = text.replace(/
( // save in $1
^ // start of line (with /m)
<($block_tags_b) // start tag = $2
\b // word break
// attacklab: hack around khtml/pcre bug...
[^\r]*? // any number of lines, minimally matching
</\2> // the matching end tag
[ \t]* // trailing spaces/tabs
(?=\n+) // followed by a newline
) // attacklab: there are sentinel newlines at end of document
/gm,function(){...}};
*/
text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|style|section|header|footer|nav|article|aside|address|audio|canvas|figure|hgroup|output|video)\b[^\r]*?<\/\2>[ \t]*(?=\n+)\n)/gm,
showdown.subParser('hashElement')(text, options, globals));
// Special case just for <hr />. It was easier to make a special case than
// to make the other regex more complicated.
/*
text = text.replace(/
( // save in $1
\n\n // Starting after a blank line
[ ]{0,3}
(<(hr) // start tag = $2
\b // word break
([^<>])*? //
\/?>) // the matching end tag
[ \t]*
(?=\n{2,}) // followed by a blank line
)
/g,showdown.subParser('hashElement')(text, options, globals));
*/
// HR SPECIAL CASE
text = text.replace(/(\n[ ]{0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,
showdown.subParser('hashElement')(text, options, globals));
showdown.subParser('hashElement')(text, options, globals));
// Special case for standalone HTML comments:
/*
text = text.replace(/
( // save in $1
\n\n // Starting after a blank line
[ ]{0,3} // attacklab: g_tab_width - 1
<!
(--[^\r]*?--\s*)+
>
[ \t]*
(?=\n{2,}) // followed by a blank line
)
/g,showdown.subParser('hashElement')(text, options, globals));
*/
text = text.replace(/(\n\n[ ]{0,3}<!(--[^\r]*?--\s*)+>[ \t]*(?=\n{2,}))/g,
showdown.subParser('hashElement')(text, options, globals));
text = text.replace(/(<!(--[^\r]*?--\s*)+>[ \t]*(?=\n{2,}))/g,
showdown.subParser('hashElement')(text, options, globals));
// PHP and ASP-style processor instructions (<?...?> and <%...%>)
/*
text = text.replace(/
(?:
\n\n // Starting after a blank line
)
( // save in $1
[ ]{0,3} // attacklab: g_tab_width - 1
(?:
<([?%]) // $2
[^\r]*?
\2>
)
[ \t]*
(?=\n{2,}) // followed by a blank line
)
/g,showdown.subParser('hashElement')(text, options, globals));
*/
text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,
showdown.subParser('hashElement')(text, options, globals));
showdown.subParser('hashElement')(text, options, globals));
// attacklab: Undo double lines (see comment at top of this function)
text = text.replace(/\n\n/g, '\n');
return text;
});
/**

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -17,6 +17,18 @@ showdown.helper.isString = function isString(a) {
return (typeof a === 'string' || a instanceof String);
};
/**
* Check if var is a function
* @static
* @param {string} a
* @returns {boolean}
*/
showdown.helper.isFunction = function isFunction(a) {
'use strict';
var getType = {};
return a && getType.toString.call(a) === '[object Function]';
};
/**
* ForEach helper function
* @static
@ -106,6 +118,38 @@ showdown.helper.escapeCharacters = function escapeCharacters(text, charsToEscape
return text;
};
var rgxFindMatchPos = function (str, left, right, flags) {
'use strict';
var f = flags || '',
g = f.indexOf('g') > -1,
x = new RegExp(left + '|' + right, 'g' + f.replace(/g/g, '')),
l = new RegExp(left, f.replace(/g/g, '')),
pos = [],
t, s, m, start, end;
do {
t = 0;
while ((m = x.exec(str))) {
if (l.test(m[0])) {
if (!(t++)) {
s = x.lastIndex;
start = s - m[0].length;
}
} else if (t) {
if (!--t) {
end = m.index + m[0].length;
pos.push({start: start, end: end});
if (!g) {
return pos;
}
}
}
}
} while (t && (x.lastIndex = s));
return pos;
};
/**
* matchRecursiveRegExp
*
@ -139,7 +183,7 @@ showdown.helper.matchRecursiveRegExp = function (str, left, right, flags) {
'use strict';
var f = flags || '',
g = f.indexOf('g') > -1,
x = new RegExp(left + '|' + right, f),
x = new RegExp(left + '|' + right, 'g' + f.replace(/g/g, '')),
l = new RegExp(left, f.replace(/g/g, '')),
a = [],
t, s, m, start, end;
@ -168,6 +212,48 @@ showdown.helper.matchRecursiveRegExp = function (str, left, right, flags) {
return a;
};
/**
*
* @param {string} str
* @param {string|function} replacement
* @param {string} left
* @param {string} right
* @param {string} flags
* @returns {string}
*/
showdown.helper.replaceRecursiveRegExp = function (str, replacement, left, right, flags) {
'use strict';
if (!showdown.helper.isFunction(replacement)) {
var repStr = replacement;
replacement = function () {
return repStr;
};
}
var matchPos = rgxFindMatchPos(str, left, right, flags),
finalStr = str,
lng = matchPos.length;
if (lng > 0) {
var bits = [];
if (matchPos[0].start !== 0) {
bits.push(str.slice(0, matchPos[0].start));
}
for (var i = 0; i < lng; ++i) {
bits.push(replacement(str.slice(matchPos[i].start, matchPos[i].end)));
if (i < lng - 1) {
bits.push(str.slice(matchPos[i].end, matchPos[i + 1].start));
}
}
if (matchPos[lng - 1].end < str.length) {
bits.push(str.slice(matchPos[lng - 1].end));
}
finalStr = bits.join('');
}
return finalStr;
};
/**
* POLYFILLS
*/

View File

@ -1,133 +1,61 @@
showdown.subParser('hashHTMLBlocks', function (text, options, globals) {
'use strict';
// attacklab: Double up blank lines to reduce lookaround
text = text.replace(/\n/g, '\n\n');
var blockTags = [
'pre',
'div',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'blockquote',
'table',
'dl',
'ol',
'ul',
'script',
'noscript',
'form',
'fieldset',
'iframe',
'math',
'style',
'section',
'header',
'footer',
'nav',
'article',
'aside',
'address',
'audio',
'canvas',
'figure',
'hgroup',
'output',
'video',
'p'
],
repFunc = function (match) {
return '\n\n~K' + (globals.gHtmlBlocks.push(match) - 1) + 'K\n\n';
};
// Hashify HTML blocks:
// We only want to do this for block-level HTML tags, such as headers,
// lists, and tables. That's because we still want to wrap <p>s around
// "paragraphs" that are wrapped in non-block-level tags, such as anchors,
// phrase emphasis, and spans. The list of tags we're looking for is
// hard-coded:
//var block_tags_a =
// 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del|style|section|header|footer|nav|article|aside';
// var block_tags_b =
// 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|style|section|header|footer|nav|article|aside';
for (var i = 0; i < blockTags.length; ++i) {
text = showdown.helper.replaceRecursiveRegExp(text, repFunc, '^(?: |\\t){0,3}<' + blockTags[i] + '\\b[^>]*>', '</' + blockTags[i] + '>', 'gim');
}
// First, look for nested blocks, e.g.:
// <div>
// <div>
// tags for inner block must be indented.
// </div>
// </div>
//
// The outermost tags must start at the left margin for this to match, and
// the inner nested divs must be indented.
// We need to do this before the next, more liberal match, because the next
// match will start at the first `<div>` and stop at the first `</div>`.
// attacklab: This regex can be expensive when it fails.
/*
var text = text.replace(/
( // save in $1
^ // start of line (with /m)
<($block_tags_a) // start tag = $2
\b // word break
// attacklab: hack around khtml/pcre bug...
[^\r]*?\n // any number of lines, minimally matching
</\2> // the matching end tag
[ \t]* // trailing spaces/tabs
(?=\n+) // followed by a newline
) // attacklab: there are sentinel newlines at end of document
/gm,function(){...}};
*/
text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,
showdown.subParser('hashElement')(text, options, globals));
//
// Now match more liberally, simply from `\n<tag>` to `</tag>\n`
//
/*
var text = text.replace(/
( // save in $1
^ // start of line (with /m)
<($block_tags_b) // start tag = $2
\b // word break
// attacklab: hack around khtml/pcre bug...
[^\r]*? // any number of lines, minimally matching
</\2> // the matching end tag
[ \t]* // trailing spaces/tabs
(?=\n+) // followed by a newline
) // attacklab: there are sentinel newlines at end of document
/gm,function(){...}};
*/
text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|style|section|header|footer|nav|article|aside|address|audio|canvas|figure|hgroup|output|video)\b[^\r]*?<\/\2>[ \t]*(?=\n+)\n)/gm,
showdown.subParser('hashElement')(text, options, globals));
// Special case just for <hr />. It was easier to make a special case than
// to make the other regex more complicated.
/*
text = text.replace(/
( // save in $1
\n\n // Starting after a blank line
[ ]{0,3}
(<(hr) // start tag = $2
\b // word break
([^<>])*? //
\/?>) // the matching end tag
[ \t]*
(?=\n{2,}) // followed by a blank line
)
/g,showdown.subParser('hashElement')(text, options, globals));
*/
// HR SPECIAL CASE
text = text.replace(/(\n[ ]{0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,
showdown.subParser('hashElement')(text, options, globals));
showdown.subParser('hashElement')(text, options, globals));
// Special case for standalone HTML comments:
/*
text = text.replace(/
( // save in $1
\n\n // Starting after a blank line
[ ]{0,3} // attacklab: g_tab_width - 1
<!
(--[^\r]*?--\s*)+
>
[ \t]*
(?=\n{2,}) // followed by a blank line
)
/g,showdown.subParser('hashElement')(text, options, globals));
*/
text = text.replace(/(\n\n[ ]{0,3}<!(--[^\r]*?--\s*)+>[ \t]*(?=\n{2,}))/g,
showdown.subParser('hashElement')(text, options, globals));
text = text.replace(/(<!(--[^\r]*?--\s*)+>[ \t]*(?=\n{2,}))/g,
showdown.subParser('hashElement')(text, options, globals));
// PHP and ASP-style processor instructions (<?...?> and <%...%>)
/*
text = text.replace(/
(?:
\n\n // Starting after a blank line
)
( // save in $1
[ ]{0,3} // attacklab: g_tab_width - 1
(?:
<([?%]) // $2
[^\r]*?
\2>
)
[ \t]*
(?=\n{2,}) // followed by a blank line
)
/g,showdown.subParser('hashElement')(text, options, globals));
*/
text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,
showdown.subParser('hashElement')(text, options, globals));
showdown.subParser('hashElement')(text, options, globals));
// attacklab: Undo double lines (see comment at top of this function)
text = text.replace(/\n\n/g, '\n');
return text;
});

View File

@ -2,5 +2,6 @@
<li><p>A list item with code:</p>
<pre><code>alert('Hello world!');
</code></pre></li>
</code></pre>
</li>
</ul>

View File

@ -63,6 +63,7 @@ Another [example][wiki] of a link
<p><code>foo_bar_baz foo_bar_baz_bar_foo _foo_bar baz_bar_ baz_foo</code></p>
<!-- These two cases still have bad <ems> because showdown handles them incorrectly -->
<code>foo_bar_baz foo_bar_baz_bar_foo _foo_bar baz_bar_ baz_foo</code>
![foo_bar_baz foo_bar_baz_bar_foo _foo_bar baz_bar_ baz_foo](http://myurl.com/foo_bar_baz_bar_foo)

View File

@ -5,12 +5,14 @@
<pre><code class="sh language-sh">$ git clone thing.git
dfgdfg
</code></pre></li>
</code></pre>
</li>
<li>
<p>I am another thing!</p>
<pre><code class="sh language-sh">$ git clone other-thing.git
foobar
</code></pre></li>
</ol>
</code></pre>
</li>
</ol>

View File

@ -0,0 +1,5 @@
<h2 id="title1">Title 1</h2>
<div></div>
<h1 id="title2">Title 2</h1>
<div>
</div>

View File

@ -0,0 +1,11 @@
Title 1
-------
<div></div>
# Title 2
<div>
</div>

View File

@ -3,5 +3,6 @@
<pre><code>10 PRINT HELLO INFINITE
20 GOTO 10
</code></pre></li>
</code></pre>
</li>
</ul>