fix(hashHTMLBlock): fix issue with html breaking markdown parsing

Closes #220
This commit is contained in:
Estevão Soares dos Santos 2016-01-01 23:33:33 +00:00
parent 1011c0f7ac
commit 2746949d7d
12 changed files with 160 additions and 125 deletions

BIN
dist/showdown.js vendored

Binary file not shown.

BIN
dist/showdown.js.map vendored

Binary file not shown.

BIN
dist/showdown.min.js vendored

Binary file not shown.

Binary file not shown.

View File

@ -17,6 +17,18 @@ showdown.helper.isString = function isString(a) {
return (typeof a === 'string' || a instanceof String);
};
/**
* Check if var is a function
* @static
* @param {string} a
* @returns {boolean}
*/
showdown.helper.isFunction = function isFunction(a) {
'use strict';
var getType = {};
return a && getType.toString.call(a) === '[object Function]';
};
/**
* ForEach helper function
* @static
@ -106,6 +118,38 @@ showdown.helper.escapeCharacters = function escapeCharacters(text, charsToEscape
return text;
};
var rgxFindMatchPos = function (str, left, right, flags) {
'use strict';
var f = flags || '',
g = f.indexOf('g') > -1,
x = new RegExp(left + '|' + right, 'g' + f.replace(/g/g, '')),
l = new RegExp(left, f.replace(/g/g, '')),
pos = [],
t, s, m, start, end;
do {
t = 0;
while ((m = x.exec(str))) {
if (l.test(m[0])) {
if (!(t++)) {
s = x.lastIndex;
start = s - m[0].length;
}
} else if (t) {
if (!--t) {
end = m.index + m[0].length;
pos.push({start: start, end: end});
if (!g) {
return pos;
}
}
}
}
} while (t && (x.lastIndex = s));
return pos;
};
/**
* matchRecursiveRegExp
*
@ -139,7 +183,7 @@ showdown.helper.matchRecursiveRegExp = function (str, left, right, flags) {
'use strict';
var f = flags || '',
g = f.indexOf('g') > -1,
x = new RegExp(left + '|' + right, f),
x = new RegExp(left + '|' + right, 'g' + f.replace(/g/g, '')),
l = new RegExp(left, f.replace(/g/g, '')),
a = [],
t, s, m, start, end;
@ -168,6 +212,48 @@ showdown.helper.matchRecursiveRegExp = function (str, left, right, flags) {
return a;
};
/**
*
* @param {string} str
* @param {string|function} replacement
* @param {string} left
* @param {string} right
* @param {string} flags
* @returns {string}
*/
showdown.helper.replaceRecursiveRegExp = function (str, replacement, left, right, flags) {
'use strict';
if (!showdown.helper.isFunction(replacement)) {
var repStr = replacement;
replacement = function () {
return repStr;
};
}
var matchPos = rgxFindMatchPos(str, left, right, flags),
finalStr = str,
lng = matchPos.length;
if (lng > 0) {
var bits = [];
if (matchPos[0].start !== 0) {
bits.push(str.slice(0, matchPos[0].start));
}
for (var i = 0; i < lng; ++i) {
bits.push(replacement(str.slice(matchPos[i].start, matchPos[i].end)));
if (i < lng - 1) {
bits.push(str.slice(matchPos[i].end, matchPos[i + 1].start));
}
}
if (matchPos[lng - 1].end < str.length) {
bits.push(str.slice(matchPos[lng - 1].end));
}
finalStr = bits.join('');
}
return finalStr;
};
/**
* POLYFILLS
*/

View File

@ -1,133 +1,61 @@
showdown.subParser('hashHTMLBlocks', function (text, options, globals) {
'use strict';
// attacklab: Double up blank lines to reduce lookaround
text = text.replace(/\n/g, '\n\n');
var blockTags = [
'pre',
'div',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'blockquote',
'table',
'dl',
'ol',
'ul',
'script',
'noscript',
'form',
'fieldset',
'iframe',
'math',
'style',
'section',
'header',
'footer',
'nav',
'article',
'aside',
'address',
'audio',
'canvas',
'figure',
'hgroup',
'output',
'video',
'p'
],
repFunc = function (match) {
return '\n\n~K' + (globals.gHtmlBlocks.push(match) - 1) + 'K\n\n';
};
// Hashify HTML blocks:
// We only want to do this for block-level HTML tags, such as headers,
// lists, and tables. That's because we still want to wrap <p>s around
// "paragraphs" that are wrapped in non-block-level tags, such as anchors,
// phrase emphasis, and spans. The list of tags we're looking for is
// hard-coded:
//var block_tags_a =
// 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del|style|section|header|footer|nav|article|aside';
// var block_tags_b =
// 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|style|section|header|footer|nav|article|aside';
for (var i = 0; i < blockTags.length; ++i) {
text = showdown.helper.replaceRecursiveRegExp(text, repFunc, '^(?: |\\t){0,3}<' + blockTags[i] + '\\b[^>]*>', '</' + blockTags[i] + '>', 'gim');
}
// First, look for nested blocks, e.g.:
// <div>
// <div>
// tags for inner block must be indented.
// </div>
// </div>
//
// The outermost tags must start at the left margin for this to match, and
// the inner nested divs must be indented.
// We need to do this before the next, more liberal match, because the next
// match will start at the first `<div>` and stop at the first `</div>`.
// attacklab: This regex can be expensive when it fails.
/*
var text = text.replace(/
( // save in $1
^ // start of line (with /m)
<($block_tags_a) // start tag = $2
\b // word break
// attacklab: hack around khtml/pcre bug...
[^\r]*?\n // any number of lines, minimally matching
</\2> // the matching end tag
[ \t]* // trailing spaces/tabs
(?=\n+) // followed by a newline
) // attacklab: there are sentinel newlines at end of document
/gm,function(){...}};
*/
text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,
showdown.subParser('hashElement')(text, options, globals));
//
// Now match more liberally, simply from `\n<tag>` to `</tag>\n`
//
/*
var text = text.replace(/
( // save in $1
^ // start of line (with /m)
<($block_tags_b) // start tag = $2
\b // word break
// attacklab: hack around khtml/pcre bug...
[^\r]*? // any number of lines, minimally matching
</\2> // the matching end tag
[ \t]* // trailing spaces/tabs
(?=\n+) // followed by a newline
) // attacklab: there are sentinel newlines at end of document
/gm,function(){...}};
*/
text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|style|section|header|footer|nav|article|aside|address|audio|canvas|figure|hgroup|output|video)\b[^\r]*?<\/\2>[ \t]*(?=\n+)\n)/gm,
showdown.subParser('hashElement')(text, options, globals));
// Special case just for <hr />. It was easier to make a special case than
// to make the other regex more complicated.
/*
text = text.replace(/
( // save in $1
\n\n // Starting after a blank line
[ ]{0,3}
(<(hr) // start tag = $2
\b // word break
([^<>])*? //
\/?>) // the matching end tag
[ \t]*
(?=\n{2,}) // followed by a blank line
)
/g,showdown.subParser('hashElement')(text, options, globals));
*/
// HR SPECIAL CASE
text = text.replace(/(\n[ ]{0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,
showdown.subParser('hashElement')(text, options, globals));
showdown.subParser('hashElement')(text, options, globals));
// Special case for standalone HTML comments:
/*
text = text.replace(/
( // save in $1
\n\n // Starting after a blank line
[ ]{0,3} // attacklab: g_tab_width - 1
<!
(--[^\r]*?--\s*)+
>
[ \t]*
(?=\n{2,}) // followed by a blank line
)
/g,showdown.subParser('hashElement')(text, options, globals));
*/
text = text.replace(/(\n\n[ ]{0,3}<!(--[^\r]*?--\s*)+>[ \t]*(?=\n{2,}))/g,
showdown.subParser('hashElement')(text, options, globals));
text = text.replace(/(<!(--[^\r]*?--\s*)+>[ \t]*(?=\n{2,}))/g,
showdown.subParser('hashElement')(text, options, globals));
// PHP and ASP-style processor instructions (<?...?> and <%...%>)
/*
text = text.replace(/
(?:
\n\n // Starting after a blank line
)
( // save in $1
[ ]{0,3} // attacklab: g_tab_width - 1
(?:
<([?%]) // $2
[^\r]*?
\2>
)
[ \t]*
(?=\n{2,}) // followed by a blank line
)
/g,showdown.subParser('hashElement')(text, options, globals));
*/
text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,
showdown.subParser('hashElement')(text, options, globals));
showdown.subParser('hashElement')(text, options, globals));
// attacklab: Undo double lines (see comment at top of this function)
text = text.replace(/\n\n/g, '\n');
return text;
});

View File

@ -2,5 +2,6 @@
<li><p>A list item with code:</p>
<pre><code>alert('Hello world!');
</code></pre></li>
</code></pre>
</li>
</ul>

View File

@ -63,6 +63,7 @@ Another [example][wiki] of a link
<p><code>foo_bar_baz foo_bar_baz_bar_foo _foo_bar baz_bar_ baz_foo</code></p>
<!-- These two cases still have bad <ems> because showdown handles them incorrectly -->
<code>foo_bar_baz foo_bar_baz_bar_foo _foo_bar baz_bar_ baz_foo</code>
![foo_bar_baz foo_bar_baz_bar_foo _foo_bar baz_bar_ baz_foo](http://myurl.com/foo_bar_baz_bar_foo)

View File

@ -5,12 +5,14 @@
<pre><code class="sh language-sh">$ git clone thing.git
dfgdfg
</code></pre></li>
</code></pre>
</li>
<li>
<p>I am another thing!</p>
<pre><code class="sh language-sh">$ git clone other-thing.git
foobar
</code></pre></li>
</ol>
</code></pre>
</li>
</ol>

View File

@ -0,0 +1,5 @@
<h2 id="title1">Title 1</h2>
<div></div>
<h1 id="title2">Title 2</h1>
<div>
</div>

View File

@ -0,0 +1,11 @@
Title 1
-------
<div></div>
# Title 2
<div>
</div>

View File

@ -3,5 +3,6 @@
<pre><code>10 PRINT HELLO INFINITE
20 GOTO 10
</code></pre></li>
</code></pre>
</li>
</ul>