From 33e6d6709683432c36b80c8e5b954b14355dc31e Mon Sep 17 00:00:00 2001 From: Corey Innis Date: Thu, 13 Nov 2008 13:40:18 -0800 Subject: [PATCH] initial commit --- compressed/showdown.js | 419 ++++++++ example/showdown-gui.html | 720 +++++++++++++ example/showdown-gui.js | 349 ++++++ example/showdown.js | 1296 +++++++++++++++++++++++ license.txt | 34 + perlMarkdown/Markdown License.txt | 30 + perlMarkdown/Markdown-1.0.2b2.pl | 1509 ++++++++++++++++++++++++++ perlMarkdown/Markdown-1.0.2b7.pl | 1642 +++++++++++++++++++++++++++++ perlMarkdown/readme.txt | 13 + readme.txt | 156 +++ src/showdown.js | 1296 +++++++++++++++++++++++ 11 files changed, 7464 insertions(+) create mode 100644 compressed/showdown.js create mode 100644 example/showdown-gui.html create mode 100644 example/showdown-gui.js create mode 100644 example/showdown.js create mode 100644 license.txt create mode 100644 perlMarkdown/Markdown License.txt create mode 100644 perlMarkdown/Markdown-1.0.2b2.pl create mode 100644 perlMarkdown/Markdown-1.0.2b7.pl create mode 100644 perlMarkdown/readme.txt create mode 100644 readme.txt create mode 100644 src/showdown.js diff --git a/compressed/showdown.js b/compressed/showdown.js new file mode 100644 index 0000000..86b9bc1 --- /dev/null +++ b/compressed/showdown.js @@ -0,0 +1,419 @@ +/* + A A L Source code at: + T C A + T K B +*/ + +var Showdown={}; +Showdown.converter=function(){ +var _1; +var _2; +var _3; +var _4=0; +this.makeHtml=function(_5){ +_1=new Array(); +_2=new Array(); +_3=new Array(); +_5=_5.replace(/~/g,"~T"); +_5=_5.replace(/\$/g,"~D"); +_5=_5.replace(/\r\n/g,"\n"); +_5=_5.replace(/\r/g,"\n"); +_5="\n\n"+_5+"\n\n"; +_5=_6(_5); +_5=_5.replace(/^[ \t]+$/mg,""); +_5=_7(_5); +_5=_8(_5); +_5=_9(_5); +_5=_a(_5); +_5=_5.replace(/~D/g,"$$"); +_5=_5.replace(/~T/g,"~"); +return _5; +}; +var _8=function(_b){ +var _b=_b.replace(/^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*?[ \t]*\n?[ \t]*(?:(\n*)["(](.+?)[")][ \t]*)?(?:\n+|\Z)/gm,function(_c,m1,m2,m3,m4){ +m1=m1.toLowerCase(); +_1[m1]=_11(m2); +if(m3){ +return m3+m4; +}else{ +if(m4){ +_2[m1]=m4.replace(/"/g,"""); +} +} +return ""; +}); +return _b; +}; +var _7=function(_12){ +_12=_12.replace(/\n/g,"\n\n"); +var _13="p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del"; +var _14="p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math"; +_12=_12.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,_15); +_12=_12.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm,_15); +_12=_12.replace(/(\n[ ]{0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,_15); +_12=_12.replace(/(\n\n[ ]{0,3}[ \t]*(?=\n{2,}))/g,_15); +_12=_12.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,_15); +_12=_12.replace(/\n\n/g,"\n"); +return _12; +}; +var _15=function(_16,m1){ +var _18=m1; +_18=_18.replace(/\n\n/g,"\n"); +_18=_18.replace(/^\n/,""); +_18=_18.replace(/\n+$/g,""); +_18="\n\n~K"+(_3.push(_18)-1)+"K\n\n"; +return _18; +}; +var _9=function(_19){ +_19=_1a(_19); +var key=_1c("
"); +_19=_19.replace(/^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm,key); +_19=_19.replace(/^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$/gm,key); +_19=_19.replace(/^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$/gm,key); +_19=_1d(_19); +_19=_1e(_19); +_19=_1f(_19); +_19=_7(_19); +_19=_20(_19); +return _19; +}; +var _21=function(_22){ +_22=_23(_22); +_22=_24(_22); +_22=_25(_22); +_22=_26(_22); +_22=_27(_22); +_22=_28(_22); +_22=_11(_22); +_22=_29(_22); +_22=_22.replace(/ +\n/g,"
\n"); +return _22; +}; +var _24=function(_2a){ +var _2b=/(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|)/gi; +_2a=_2a.replace(_2b,function(_2c){ +var tag=_2c.replace(/(.)<\/?code>(?=.)/g,"$1`"); +tag=_2e(tag,"\\`*_"); +return tag; +}); +return _2a; +}; +var _27=function(_2f){ +_2f=_2f.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,_30); +_2f=_2f.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,_30); +_2f=_2f.replace(/(\[([^\[\]]+)\])()()()()()/g,_30); +return _2f; +}; +var _30=function(_31,m1,m2,m3,m4,m5,m6,m7){ +if(m7==undefined){ +m7=""; +} +var _39=m1; +var _3a=m2; +var _3b=m3.toLowerCase(); +var url=m4; +var _3d=m7; +if(url==""){ +if(_3b==""){ +_3b=_3a.toLowerCase().replace(/ ?\n/g," "); +} +url="#"+_3b; +if(_1[_3b]!=undefined){ +url=_1[_3b]; +if(_2[_3b]!=undefined){ +_3d=_2[_3b]; +} +}else{ +if(_39.search(/\(\s*\)$/m)>-1){ +url=""; +}else{ +return _39; +} +} +} +url=_2e(url,"*_"); +var _3e=""; +return _3e; +}; +var _26=function(_3f){ +_3f=_3f.replace(/(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,_40); +_3f=_3f.replace(/(!\[(.*?)\]\s?\([ \t]*()?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,_40); +return _3f; +}; +var _40=function(_41,m1,m2,m3,m4,m5,m6,m7){ +var _49=m1; +var _4a=m2; +var _4b=m3.toLowerCase(); +var url=m4; +var _4d=m7; +if(!_4d){ +_4d=""; +} +if(url==""){ +if(_4b==""){ +_4b=_4a.toLowerCase().replace(/ ?\n/g," "); +} +url="#"+_4b; +if(_1[_4b]!=undefined){ +url=_1[_4b]; +if(_2[_4b]!=undefined){ +_4d=_2[_4b]; +} +}else{ +return _49; +} +} +_4a=_4a.replace(/"/g,"""); +url=_2e(url,"*_"); +var _4e="\""+_4a+"\"";"+_21(m1)+""); +}); +_4f=_4f.replace(/^(.+)[ \t]*\n-+[ \t]*\n+/gm,function(_52,m1){ +return _1c("

"+_21(m1)+"

"); +}); +_4f=_4f.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm,function(_54,m1,m2){ +var _57=m1.length; +return _1c(""+_21(m2)+""); +}); +return _4f; +}; +var _58; +var _1d=function(_59){ +_59+="~0"; +var _5a=/^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm; +if(_4){ +_59=_59.replace(_5a,function(_5b,m1,m2){ +var _5e=m1; +var _5f=(m2.search(/[*+-]/g)>-1)?"ul":"ol"; +_5e=_5e.replace(/\n{2,}/g,"\n\n\n"); +var _60=_58(_5e); +_60=_60.replace(/\s+$/,""); +_60="<"+_5f+">"+_60+"\n"; +return _60; +}); +}else{ +_5a=/(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g; +_59=_59.replace(_5a,function(_61,m1,m2,m3){ +var _65=m1; +var _66=m2; +var _67=(m3.search(/[*+-]/g)>-1)?"ul":"ol"; +var _66=_66.replace(/\n{2,}/g,"\n\n\n"); +var _68=_58(_66); +_68=_65+"<"+_67+">\n"+_68+"\n"; +return _68; +}); +} +_59=_59.replace(/~0/,""); +return _59; +}; +_58=function(_69){ +_4++; +_69=_69.replace(/\n{2,}$/,"\n"); +_69+="~0"; +_69=_69.replace(/(\n)?(^[ \t]*)([*+-]|\d+[.])[ \t]+([^\r]+?(\n{1,2}))(?=\n*(~0|\2([*+-]|\d+[.])[ \t]+))/gm,function(_6a,m1,m2,m3,m4){ +var _6f=m4; +var _70=m1; +var _71=m2; +if(_70||(_6f.search(/\n{2,}/)>-1)){ +_6f=_9(_72(_6f)); +}else{ +_6f=_1d(_72(_6f)); +_6f=_6f.replace(/\n$/,""); +_6f=_21(_6f); +} +return "
  • "+_6f+"
  • \n"; +}); +_69=_69.replace(/~0/g,""); +_4--; +return _69; +}; +var _1e=function(_73){ +_73+="~0"; +_73=_73.replace(/(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,function(_74,m1,m2){ +var _77=m1; +var _78=m2; +_77=_79(_72(_77)); +_77=_6(_77); +_77=_77.replace(/^\n+/g,""); +_77=_77.replace(/\n+$/g,""); +_77="
    "+_77+"\n
    "; +return _1c(_77)+_78; +}); +_73=_73.replace(/~0/,""); +return _73; +}; +var _1c=function(_7a){ +_7a=_7a.replace(/(^\n+|\n+$)/g,""); +return "\n\n~K"+(_3.push(_7a)-1)+"K\n\n"; +}; +var _23=function(_7b){ +_7b=_7b.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm,function(_7c,m1,m2,m3,m4){ +var c=m3; +c=c.replace(/^([ \t]*)/g,""); +c=c.replace(/[ \t]*$/g,""); +c=_79(c); +return m1+""+c+""; +}); +return _7b; +}; +var _79=function(_82){ +_82=_82.replace(/&/g,"&"); +_82=_82.replace(//g,">"); +_82=_2e(_82,"*_{}[]\\",false); +return _82; +}; +var _29=function(_83){ +_83=_83.replace(/(\*\*|__)(?=\S)([^\r]*?\S[*_]*)\1/g,"$2"); +_83=_83.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g,"$2"); +return _83; +}; +var _1f=function(_84){ +_84=_84.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm,function(_85,m1){ +var bq=m1; +bq=bq.replace(/^[ \t]*>[ \t]?/gm,"~0"); +bq=bq.replace(/~0/g,""); +bq=bq.replace(/^[ \t]+$/gm,""); +bq=_9(bq); +bq=bq.replace(/(^|\n)/g,"$1 "); +bq=bq.replace(/(\s*
    [^\r]+?<\/pre>)/gm,function(_88,m1){
    +var pre=m1;
    +pre=pre.replace(/^  /mg,"~0");
    +pre=pre.replace(/~0/g,"");
    +return pre;
    +});
    +return _1c("
    \n"+bq+"\n
    "); +}); +return _84; +}; +var _20=function(_8b){ +_8b=_8b.replace(/^\n+/g,""); +_8b=_8b.replace(/\n+$/g,""); +var _8c=_8b.split(/\n{2,}/g); +var _8d=new Array(); +var end=_8c.length; +for(var i=0;i=0){ +_8d.push(str); +}else{ +if(str.search(/\S/)>=0){ +str=_21(str); +str=str.replace(/^([ \t]*)/g,"

    "); +str+="

    "; +_8d.push(str); +} +} +} +end=_8d.length; +for(var i=0;i=0){ +var _91=_3[RegExp.$1]; +_91=_91.replace(/\$/g,"$$$$"); +_8d[i]=_8d[i].replace(/~K\d+K/,_91); +} +} +return _8d.join("\n\n"); +}; +var _11=function(_92){ +_92=_92.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g,"&"); +_92=_92.replace(/<(?![a-z\/?\$!])/gi,"<"); +return _92; +}; +var _25=function(_93){ +_93=_93.replace(/\\(\\)/g,_94); +_93=_93.replace(/\\([`*_{}\[\]()>#+-.!])/g,_94); +return _93; +}; +var _28=function(_95){ +_95=_95.replace(/<((https?|ftp|dict):[^'">\s]+)>/gi,"
    $1"); +_95=_95.replace(/<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi,function(_96,m1){ +return _98(_a(m1)); +}); +return _95; +}; +var _98=function(_99){ +function char2hex(ch){ +var _9b="0123456789ABCDEF"; +var dec=ch.charCodeAt(0); +return (_9b.charAt(dec>>4)+_9b.charAt(dec&15)); +} +var _9d=[function(ch){ +return "&#"+ch.charCodeAt(0)+";"; +},function(ch){ +return "&#x"+char2hex(ch)+";"; +},function(ch){ +return ch; +}]; +_99="mailto:"+_99; +_99=_99.replace(/./g,function(ch){ +if(ch=="@"){ +ch=_9d[Math.floor(Math.random()*2)](ch); +}else{ +if(ch!=":"){ +var r=Math.random(); +ch=(r>0.9?_9d[2](ch):r>0.45?_9d[1](ch):_9d[0](ch)); +} +} +return ch; +}); +_99=""+_99+""; +_99=_99.replace(/">.+:/g,"\">"); +return _99; +}; +var _a=function(_a3){ +_a3=_a3.replace(/~E(\d+)E/g,function(_a4,m1){ +var _a6=parseInt(m1); +return String.fromCharCode(_a6); +}); +return _a3; +}; +var _72=function(_a7){ +_a7=_a7.replace(/^(\t|[ ]{1,4})/gm,"~0"); +_a7=_a7.replace(/~0/g,""); +return _a7; +}; +var _6=function(_a8){ +_a8=_a8.replace(/\t(?=\t)/g," "); +_a8=_a8.replace(/\t/g,"~A~B"); +_a8=_a8.replace(/~B(.+?)~A/g,function(_a9,m1,m2){ +var _ac=m1; +var _ad=4-_ac.length%4; +for(var i=0;i<_ad;i++){ +_ac+=" "; +} +return _ac; +}); +_a8=_a8.replace(/~A/g," "); +_a8=_a8.replace(/~B/g,""); +return _a8; +}; +var _2e=function(_af,_b0,_b1){ +var _b2="(["+_b0.replace(/([\[\]\\])/g,"\\$1")+"])"; +if(_b1){ +_b2="\\\\"+_b2; +} +var _b3=new RegExp(_b2,"g"); +_af=_af.replace(_b3,_94); +return _af; +}; +var _94=function(_b4,m1){ +var _b6=m1.charCodeAt(0); +return "~E"+_b6+"E"; +}; +}; + diff --git a/example/showdown-gui.html b/example/showdown-gui.html new file mode 100644 index 0000000..c447221 --- /dev/null +++ b/example/showdown-gui.html @@ -0,0 +1,720 @@ + + + + + Showdown - Markdown in Javascript + + + + + + + + + +
    +
    + Input +
    + +
    + +
    +
    + +
    + + + +
    + + + + + + + + + + + + + + +
    + + + + + + \ No newline at end of file diff --git a/example/showdown-gui.js b/example/showdown-gui.js new file mode 100644 index 0000000..5f3519f --- /dev/null +++ b/example/showdown-gui.js @@ -0,0 +1,349 @@ +// +// showdown-gui.js +// +// A sample application for Showdown, a javascript port +// of Markdown. +// +// Copyright (c) 2007 John Fraser. +// +// Redistributable under a BSD-style open source license. +// See license.txt for more information. +// +// The full source distribution is at: +// +// A A L +// T C A +// T K B +// +// +// + +// +// The Showdown converter itself is in showdown.js, which must be +// included by the HTML before this file is. +// +// showdown-gui.js assumes the id and class definitions in +// showdown.html. It isn't dependent on the CSS, but it does +// manually hide, display, and resize the individual panes -- +// overriding the stylesheets. +// +// This sample application only interacts with showdown.js in +// two places: +// +// In startGui(): +// +// converter = new Showdown.converter(); +// +// In convertText(): +// +// text = converter.makeHtml(text); +// +// The rest of this file is user interface stuff. +// + + +// +// Register for onload +// +window.onload = startGui; + + +// +// Globals +// + +var converter; +var convertTextTimer,processingTime; +var lastText,lastOutput,lastRoomLeft; +var convertTextSetting, convertTextButton, paneSetting; +var inputPane,previewPane,outputPane,syntaxPane; +var maxDelay = 3000; // longest update pause (in ms) + + +// +// Initialization +// + +function startGui() { + // find elements + convertTextSetting = document.getElementById("convertTextSetting"); + convertTextButton = document.getElementById("convertTextButton"); + paneSetting = document.getElementById("paneSetting"); + + inputPane = document.getElementById("inputPane"); + previewPane = document.getElementById("previewPane"); + outputPane = document.getElementById("outputPane"); + syntaxPane = document.getElementById("syntaxPane"); + + // set event handlers + convertTextSetting.onchange = onConvertTextSettingChanged; + convertTextButton.onclick = onConvertTextButtonClicked; + paneSetting.onchange = onPaneSettingChanged; + window.onresize = setPaneHeights; + + // First, try registering for keyup events + // (There's no harm in calling onInput() repeatedly) + window.onkeyup = inputPane.onkeyup = onInput; + + // In case we can't capture paste events, poll for them + var pollingFallback = window.setInterval(function(){ + if(inputPane.value != lastText) + onInput(); + },1000); + + // Try registering for paste events + inputPane.onpaste = function() { + // It worked! Cancel paste polling. + if (pollingFallback!=undefined) { + window.clearInterval(pollingFallback); + pollingFallback = undefined; + } + onInput(); + } + + // Try registering for input events (the best solution) + if (inputPane.addEventListener) { + // Let's assume input also fires on paste. + // No need to cancel our keyup handlers; + // they're basically free. + inputPane.addEventListener("input",inputPane.onpaste,false); + } + + // poll for changes in font size + // this is cheap; do it often + window.setInterval(setPaneHeights,250); + + // start with blank page? + if (top.document.location.href.match(/\?blank=1$/)) + inputPane.value = ""; + + // refresh panes to avoid a hiccup + onPaneSettingChanged(); + + // build the converter + converter = new Showdown.converter(); + + // do an initial conversion to avoid a hiccup + convertText(); + + // give the input pane focus + inputPane.focus(); + + // start the other panes at the top + // (our smart scrolling moved them to the bottom) + previewPane.scrollTop = 0; + outputPane.scrollTop = 0; +} + + +// +// Conversion +// + +function convertText() { + // get input text + var text = inputPane.value; + + // if there's no change to input, cancel conversion + if (text && text == lastText) { + return; + } else { + lastText = text; + } + + var startTime = new Date().getTime(); + + // Do the conversion + text = converter.makeHtml(text); + + // display processing time + var endTime = new Date().getTime(); + processingTime = endTime - startTime; + document.getElementById("processingTime").innerHTML = processingTime+" ms"; + + // save proportional scroll positions + saveScrollPositions(); + + // update right pane + if (paneSetting.value == "outputPane") { + // the output pane is selected + outputPane.value = text; + } else if (paneSetting.value == "previewPane") { + // the preview pane is selected + previewPane.innerHTML = text; + } + + lastOutput = text; + + // restore proportional scroll positions + restoreScrollPositions(); +}; + + +// +// Event handlers +// + +function onConvertTextSettingChanged() { + // If the user just enabled automatic + // updates, we'll do one now. + onInput(); +} + +function onConvertTextButtonClicked() { + // hack: force the converter to run + lastText = ""; + + convertText(); + inputPane.focus(); +} + +function onPaneSettingChanged() { + previewPane.style.display = "none"; + outputPane.style.display = "none"; + syntaxPane.style.display = "none"; + + // now make the selected one visible + top[paneSetting.value].style.display = "block"; + + lastRoomLeft = 0; // hack: force resize of new pane + setPaneHeights(); + + if (paneSetting.value == "outputPane") { + // Update output pane + outputPane.value = lastOutput; + } else if (paneSetting.value == "previewPane") { + // Update preview pane + previewPane.innerHTML = lastOutput; + } +} + +function onInput() { +// In "delayed" mode, we do the conversion at pauses in input. +// The pause is equal to the last runtime, so that slow +// updates happen less frequently. +// +// Use a timer to schedule updates. Each keystroke +// resets the timer. + + // if we already have convertText scheduled, cancel it + if (convertTextTimer) { + window.clearTimeout(convertTextTimer); + convertTextTimer = undefined; + } + + if (convertTextSetting.value != "manual") { + var timeUntilConvertText = 0; + if (convertTextSetting.value == "delayed") { + // make timer adaptive + timeUntilConvertText = processingTime; + } + + if (timeUntilConvertText > maxDelay) + timeUntilConvertText = maxDelay; + + // Schedule convertText(). + // Even if we're updating every keystroke, use a timer at 0. + // This gives the browser time to handle other events. + convertTextTimer = window.setTimeout(convertText,timeUntilConvertText); + } +} + + +// +// Smart scrollbar adjustment +// +// We need to make sure the user can't type off the bottom +// of the preview and output pages. We'll do this by saving +// the proportional scroll positions before the update, and +// restoring them afterwards. +// + +var previewScrollPos; +var outputScrollPos; + +function getScrollPos(element) { + // favor the bottom when the text first overflows the window + if (element.scrollHeight <= element.clientHeight) + return 1.0; + return element.scrollTop/(element.scrollHeight-element.clientHeight); +} + +function setScrollPos(element,pos) { + element.scrollTop = (element.scrollHeight - element.clientHeight) * pos; +} + +function saveScrollPositions() { + previewScrollPos = getScrollPos(previewPane); + outputScrollPos = getScrollPos(outputPane); +} + +function restoreScrollPositions() { + // hack for IE: setting scrollTop ensures scrollHeight + // has been updated after a change in contents + previewPane.scrollTop = previewPane.scrollTop; + + setScrollPos(previewPane,previewScrollPos); + setScrollPos(outputPane,outputScrollPos); +} + +// +// Textarea resizing +// +// Some browsers (i.e. IE) refuse to set textarea +// percentage heights in standards mode. (But other units? +// No problem. Percentage widths? No problem.) +// +// So we'll do it in javascript. If IE's behavior ever +// changes, we should remove this crap and do 100% textarea +// heights in CSS, because it makes resizing much smoother +// on other browsers. +// + +function getTop(element) { + var sum = element.offsetTop; + while(element = element.offsetParent) + sum += element.offsetTop; + return sum; +} + +function getElementHeight(element) { + var height = element.clientHeight; + if (!height) height = element.scrollHeight; + return height; +} + +function getWindowHeight(element) { + if (window.innerHeight) + return window.innerHeight; + else if (document.documentElement && document.documentElement.clientHeight) + return document.documentElement.clientHeight; + else if (document.body) + return document.body.clientHeight; +} + +function setPaneHeights() { + var textarea = inputPane; + var footer = document.getElementById("footer"); + + var windowHeight = getWindowHeight(); + var footerHeight = getElementHeight(footer); + var textareaTop = getTop(textarea); + + // figure out how much room the panes should fill + var roomLeft = windowHeight - footerHeight - textareaTop; + + if (roomLeft < 0) roomLeft = 0; + + // if it hasn't changed, return + if (roomLeft == lastRoomLeft) { + return; + } + lastRoomLeft = roomLeft; + + // resize all panes + inputPane.style.height = roomLeft + "px"; + previewPane.style.height = roomLeft + "px"; + outputPane.style.height = roomLeft + "px"; + syntaxPane.style.height = roomLeft + "px"; +} \ No newline at end of file diff --git a/example/showdown.js b/example/showdown.js new file mode 100644 index 0000000..a960309 --- /dev/null +++ b/example/showdown.js @@ -0,0 +1,1296 @@ +// +// showdown.js -- A javascript port of Markdown. +// +// Copyright (c) 2007 John Fraser. +// +// Original Markdown Copyright (c) 2004-2005 John Gruber +// +// +// Redistributable under a BSD-style open source license. +// See license.txt for more information. +// +// The full source distribution is at: +// +// A A L +// T C A +// T K B +// +// +// + +// +// Wherever possible, Showdown is a straight, line-by-line port +// of the Perl version of Markdown. +// +// This is not a normal parser design; it's basically just a +// series of string substitutions. It's hard to read and +// maintain this way, but keeping Showdown close to the original +// design makes it easier to port new features. +// +// More importantly, Showdown behaves like markdown.pl in most +// edge cases. So web applications can do client-side preview +// in Javascript, and then build identical HTML on the server. +// +// This port needs the new RegExp functionality of ECMA 262, +// 3rd Edition (i.e. Javascript 1.5). Most modern web browsers +// should do fine. Even with the new regular expression features, +// We do a lot of work to emulate Perl's regex functionality. +// The tricky changes in this file mostly have the "attacklab:" +// label. Major or self-explanatory changes don't. +// +// Smart diff tools like Araxis Merge will be able to match up +// this file with markdown.pl in a useful way. A little tweaking +// helps: in a copy of markdown.pl, replace "#" with "//" and +// replace "$text" with "text". Be sure to ignore whitespace +// and line endings. +// + + +// +// Showdown usage: +// +// var text = "Markdown *rocks*."; +// +// var converter = new Showdown.converter(); +// var html = converter.makeHtml(text); +// +// alert(html); +// +// Note: move the sample code to the bottom of this +// file before uncommenting it. +// + + +// +// Showdown namespace +// +var Showdown = {}; + +// +// converter +// +// Wraps all "globals" so that the only thing +// exposed is makeHtml(). +// +Showdown.converter = function() { + +// +// Globals: +// + +// Global hashes, used by various utility routines +var g_urls; +var g_titles; +var g_html_blocks; + +// Used to track when we're inside an ordered or unordered list +// (see _ProcessListItems() for details): +var g_list_level = 0; + + +this.makeHtml = function(text) { +// +// Main function. The order in which other subs are called here is +// essential. Link and image substitutions need to happen before +// _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the +// and tags get encoded. +// + + // Clear the global hashes. If we don't clear these, you get conflicts + // from other articles when generating a page which contains more than + // one article (e.g. an index page that shows the N most recent + // articles): + g_urls = new Array(); + g_titles = new Array(); + g_html_blocks = new Array(); + + // attacklab: Replace ~ with ~T + // This lets us use tilde as an escape char to avoid md5 hashes + // The choice of character is arbitray; anything that isn't + // magic in Markdown will work. + text = text.replace(/~/g,"~T"); + + // attacklab: Replace $ with ~D + // RegExp interprets $ as a special character + // when it's in a replacement string + text = text.replace(/\$/g,"~D"); + + // Standardize line endings + text = text.replace(/\r\n/g,"\n"); // DOS to Unix + text = text.replace(/\r/g,"\n"); // Mac to Unix + + // Make sure text begins and ends with a couple of newlines: + text = "\n\n" + text + "\n\n"; + + // Convert all tabs to spaces. + text = _Detab(text); + + // Strip any lines consisting only of spaces and tabs. + // This makes subsequent regexen easier to write, because we can + // match consecutive blank lines with /\n+/ instead of something + // contorted like /[ \t]*\n+/ . + text = text.replace(/^[ \t]+$/mg,""); + + // Turn block-level HTML blocks into hash entries + text = _HashHTMLBlocks(text); + + // Strip link definitions, store in hashes. + text = _StripLinkDefinitions(text); + + text = _RunBlockGamut(text); + + text = _UnescapeSpecialChars(text); + + // attacklab: Restore dollar signs + text = text.replace(/~D/g,"$$"); + + // attacklab: Restore tildes + text = text.replace(/~T/g,"~"); + + return text; +} + + +var _StripLinkDefinitions = function(text) { +// +// Strips link definitions from text, stores the URLs and titles in +// hash references. +// + + // Link defs are in the form: ^[id]: url "optional title" + + /* + var text = text.replace(/ + ^[ ]{0,3}\[(.+)\]: // id = $1 attacklab: g_tab_width - 1 + [ \t]* + \n? // maybe *one* newline + [ \t]* + ? // url = $2 + [ \t]* + \n? // maybe one newline + [ \t]* + (?: + (\n*) // any lines skipped = $3 attacklab: lookbehind removed + ["(] + (.+?) // title = $4 + [")] + [ \t]* + )? // title is optional + (?:\n+|$) + /gm, + function(){...}); + */ + var text = text.replace(/^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*?[ \t]*\n?[ \t]*(?:(\n*)["(](.+?)[")][ \t]*)?(?:\n+|\Z)/gm, + function (wholeMatch,m1,m2,m3,m4) { + m1 = m1.toLowerCase(); + g_urls[m1] = _EncodeAmpsAndAngles(m2); // Link IDs are case-insensitive + if (m3) { + // Oops, found blank lines, so it's not a title. + // Put back the parenthetical statement we stole. + return m3+m4; + } else if (m4) { + g_titles[m1] = m4.replace(/"/g,"""); + } + + // Completely remove the definition from the text + return ""; + } + ); + + return text; +} + + +var _HashHTMLBlocks = function(text) { + // attacklab: Double up blank lines to reduce lookaround + text = text.replace(/\n/g,"\n\n"); + + // Hashify HTML blocks: + // We only want to do this for block-level HTML tags, such as headers, + // lists, and tables. That's because we still want to wrap

    s around + // "paragraphs" that are wrapped in non-block-level tags, such as anchors, + // phrase emphasis, and spans. The list of tags we're looking for is + // hard-coded: + var block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del" + var block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math" + + // First, look for nested blocks, e.g.: + //

    + //
    + // tags for inner block must be indented. + //
    + //
    + // + // The outermost tags must start at the left margin for this to match, and + // the inner nested divs must be indented. + // We need to do this before the next, more liberal match, because the next + // match will start at the first `
    ` and stop at the first `
    `. + + // attacklab: This regex can be expensive when it fails. + /* + var text = text.replace(/ + ( // save in $1 + ^ // start of line (with /m) + <($block_tags_a) // start tag = $2 + \b // word break + // attacklab: hack around khtml/pcre bug... + [^\r]*?\n // any number of lines, minimally matching + // the matching end tag + [ \t]* // trailing spaces/tabs + (?=\n+) // followed by a newline + ) // attacklab: there are sentinel newlines at end of document + /gm,function(){...}}; + */ + text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,hashElement); + + // + // Now match more liberally, simply from `\n` to `\n` + // + + /* + var text = text.replace(/ + ( // save in $1 + ^ // start of line (with /m) + <($block_tags_b) // start tag = $2 + \b // word break + // attacklab: hack around khtml/pcre bug... + [^\r]*? // any number of lines, minimally matching + .* // the matching end tag + [ \t]* // trailing spaces/tabs + (?=\n+) // followed by a newline + ) // attacklab: there are sentinel newlines at end of document + /gm,function(){...}}; + */ + text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm,hashElement); + + // Special case just for
    . It was easier to make a special case than + // to make the other regex more complicated. + + /* + text = text.replace(/ + ( // save in $1 + \n\n // Starting after a blank line + [ ]{0,3} + (<(hr) // start tag = $2 + \b // word break + ([^<>])*? // + \/?>) // the matching end tag + [ \t]* + (?=\n{2,}) // followed by a blank line + ) + /g,hashElement); + */ + text = text.replace(/(\n[ ]{0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,hashElement); + + // Special case for standalone HTML comments: + + /* + text = text.replace(/ + ( // save in $1 + \n\n // Starting after a blank line + [ ]{0,3} // attacklab: g_tab_width - 1 + + [ \t]* + (?=\n{2,}) // followed by a blank line + ) + /g,hashElement); + */ + text = text.replace(/(\n\n[ ]{0,3}[ \t]*(?=\n{2,}))/g,hashElement); + + // PHP and ASP-style processor instructions ( and <%...%>) + + /* + text = text.replace(/ + (?: + \n\n // Starting after a blank line + ) + ( // save in $1 + [ ]{0,3} // attacklab: g_tab_width - 1 + (?: + <([?%]) // $2 + [^\r]*? + \2> + ) + [ \t]* + (?=\n{2,}) // followed by a blank line + ) + /g,hashElement); + */ + text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,hashElement); + + // attacklab: Undo double lines (see comment at top of this function) + text = text.replace(/\n\n/g,"\n"); + return text; +} + +var hashElement = function(wholeMatch,m1) { + var blockText = m1; + + // Undo double lines + blockText = blockText.replace(/\n\n/g,"\n"); + blockText = blockText.replace(/^\n/,""); + + // strip trailing blank lines + blockText = blockText.replace(/\n+$/g,""); + + // Replace the element text with a marker ("~KxK" where x is its key) + blockText = "\n\n~K" + (g_html_blocks.push(blockText)-1) + "K\n\n"; + + return blockText; +}; + +var _RunBlockGamut = function(text) { +// +// These are all the transformations that form block-level +// tags like paragraphs, headers, and list items. +// + text = _DoHeaders(text); + + // Do Horizontal Rules: + var key = hashBlock("
    "); + text = text.replace(/^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm,key); + text = text.replace(/^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$/gm,key); + text = text.replace(/^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$/gm,key); + + text = _DoLists(text); + text = _DoCodeBlocks(text); + text = _DoBlockQuotes(text); + + // We already ran _HashHTMLBlocks() before, in Markdown(), but that + // was to escape raw HTML in the original Markdown source. This time, + // we're escaping the markup we've just created, so that we don't wrap + //

    tags around block-level tags. + text = _HashHTMLBlocks(text); + text = _FormParagraphs(text); + + return text; +} + + +var _RunSpanGamut = function(text) { +// +// These are all the transformations that occur *within* block-level +// tags like paragraphs, headers, and list items. +// + + text = _DoCodeSpans(text); + text = _EscapeSpecialCharsWithinTagAttributes(text); + text = _EncodeBackslashEscapes(text); + + // Process anchor and image tags. Images must come first, + // because ![foo][f] looks like an anchor. + text = _DoImages(text); + text = _DoAnchors(text); + + // Make links out of things like `` + // Must come after _DoAnchors(), because you can use < and > + // delimiters in inline links like [this](). + text = _DoAutoLinks(text); + text = _EncodeAmpsAndAngles(text); + text = _DoItalicsAndBold(text); + + // Do hard breaks: + text = text.replace(/ +\n/g,"
    \n"); + + return text; +} + +var _EscapeSpecialCharsWithinTagAttributes = function(text) { +// +// Within tags -- meaning between < and > -- encode [\ ` * _] so they +// don't conflict with their use in Markdown for code, italics and strong. +// + + // Build a regex to find HTML tags and comments. See Friedl's + // "Mastering Regular Expressions", 2nd Ed., pp. 200-201. + var regex = /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|)/gi; + + text = text.replace(regex, function(wholeMatch) { + var tag = wholeMatch.replace(/(.)<\/?code>(?=.)/g,"$1`"); + tag = escapeCharacters(tag,"\\`*_"); + return tag; + }); + + return text; +} + +var _DoAnchors = function(text) { +// +// Turn Markdown link shortcuts into XHTML
    tags. +// + // + // First, handle reference-style links: [link text] [id] + // + + /* + text = text.replace(/ + ( // wrap whole match in $1 + \[ + ( + (?: + \[[^\]]*\] // allow brackets nested one level + | + [^\[] // or anything else + )* + ) + \] + + [ ]? // one optional space + (?:\n[ ]*)? // one optional newline followed by spaces + + \[ + (.*?) // id = $3 + \] + )()()()() // pad remaining backreferences + /g,_DoAnchors_callback); + */ + text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeAnchorTag); + + // + // Next, inline-style links: [link text](url "optional title") + // + + /* + text = text.replace(/ + ( // wrap whole match in $1 + \[ + ( + (?: + \[[^\]]*\] // allow brackets nested one level + | + [^\[\]] // or anything else + ) + ) + \] + \( // literal paren + [ \t]* + () // no id, so leave $3 empty + ? // href = $4 + [ \t]* + ( // $5 + (['"]) // quote char = $6 + (.*?) // Title = $7 + \6 // matching quote + [ \t]* // ignore any spaces/tabs between closing quote and ) + )? // title is optional + \) + ) + /g,writeAnchorTag); + */ + text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeAnchorTag); + + // + // Last, handle reference-style shortcuts: [link text] + // These must come last in case you've also got [link test][1] + // or [link test](/foo) + // + + /* + text = text.replace(/ + ( // wrap whole match in $1 + \[ + ([^\[\]]+) // link text = $2; can't contain '[' or ']' + \] + )()()()()() // pad rest of backreferences + /g, writeAnchorTag); + */ + text = text.replace(/(\[([^\[\]]+)\])()()()()()/g, writeAnchorTag); + + return text; +} + +var writeAnchorTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) { + if (m7 == undefined) m7 = ""; + var whole_match = m1; + var link_text = m2; + var link_id = m3.toLowerCase(); + var url = m4; + var title = m7; + + if (url == "") { + if (link_id == "") { + // lower-case and turn embedded newlines into spaces + link_id = link_text.toLowerCase().replace(/ ?\n/g," "); + } + url = "#"+link_id; + + if (g_urls[link_id] != undefined) { + url = g_urls[link_id]; + if (g_titles[link_id] != undefined) { + title = g_titles[link_id]; + } + } + else { + if (whole_match.search(/\(\s*\)$/m)>-1) { + // Special case for explicit empty url + url = ""; + } else { + return whole_match; + } + } + } + + url = escapeCharacters(url,"*_"); + var result = ""; + + return result; +} + + +var _DoImages = function(text) { +// +// Turn Markdown image shortcuts into tags. +// + + // + // First, handle reference-style labeled images: ![alt text][id] + // + + /* + text = text.replace(/ + ( // wrap whole match in $1 + !\[ + (.*?) // alt text = $2 + \] + + [ ]? // one optional space + (?:\n[ ]*)? // one optional newline followed by spaces + + \[ + (.*?) // id = $3 + \] + )()()()() // pad rest of backreferences + /g,writeImageTag); + */ + text = text.replace(/(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeImageTag); + + // + // Next, handle inline images: ![alt text](url "optional title") + // Don't forget: encode * and _ + + /* + text = text.replace(/ + ( // wrap whole match in $1 + !\[ + (.*?) // alt text = $2 + \] + \s? // One optional whitespace character + \( // literal paren + [ \t]* + () // no id, so leave $3 empty + ? // src url = $4 + [ \t]* + ( // $5 + (['"]) // quote char = $6 + (.*?) // title = $7 + \6 // matching quote + [ \t]* + )? // title is optional + \) + ) + /g,writeImageTag); + */ + text = text.replace(/(!\[(.*?)\]\s?\([ \t]*()?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeImageTag); + + return text; +} + +var writeImageTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) { + var whole_match = m1; + var alt_text = m2; + var link_id = m3.toLowerCase(); + var url = m4; + var title = m7; + + if (!title) title = ""; + + if (url == "") { + if (link_id == "") { + // lower-case and turn embedded newlines into spaces + link_id = alt_text.toLowerCase().replace(/ ?\n/g," "); + } + url = "#"+link_id; + + if (g_urls[link_id] != undefined) { + url = g_urls[link_id]; + if (g_titles[link_id] != undefined) { + title = g_titles[link_id]; + } + } + else { + return whole_match; + } + } + + alt_text = alt_text.replace(/"/g,"""); + url = escapeCharacters(url,"*_"); + var result = "\""" + _RunSpanGamut(m1) + "");}); + + text = text.replace(/^(.+)[ \t]*\n-+[ \t]*\n+/gm, + function(matchFound,m1){return hashBlock("

    " + _RunSpanGamut(m1) + "

    ");}); + + // atx-style headers: + // # Header 1 + // ## Header 2 + // ## Header 2 with closing hashes ## + // ... + // ###### Header 6 + // + + /* + text = text.replace(/ + ^(\#{1,6}) // $1 = string of #'s + [ \t]* + (.+?) // $2 = Header text + [ \t]* + \#* // optional closing #'s (not counted) + \n+ + /gm, function() {...}); + */ + + text = text.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm, + function(wholeMatch,m1,m2) { + var h_level = m1.length; + return hashBlock("" + _RunSpanGamut(m2) + ""); + }); + + return text; +} + +// This declaration keeps Dojo compressor from outputting garbage: +var _ProcessListItems; + +var _DoLists = function(text) { +// +// Form HTML ordered (numbered) and unordered (bulleted) lists. +// + + // attacklab: add sentinel to hack around khtml/safari bug: + // http://bugs.webkit.org/show_bug.cgi?id=11231 + text += "~0"; + + // Re-usable pattern to match any entirel ul or ol list: + + /* + var whole_list = / + ( // $1 = whole list + ( // $2 + [ ]{0,3} // attacklab: g_tab_width - 1 + ([*+-]|\d+[.]) // $3 = first list item marker + [ \t]+ + ) + [^\r]+? + ( // $4 + ~0 // sentinel for workaround; should be $ + | + \n{2,} + (?=\S) + (?! // Negative lookahead for another list item marker + [ \t]* + (?:[*+-]|\d+[.])[ \t]+ + ) + ) + )/g + */ + var whole_list = /^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm; + + if (g_list_level) { + text = text.replace(whole_list,function(wholeMatch,m1,m2) { + var list = m1; + var list_type = (m2.search(/[*+-]/g)>-1) ? "ul" : "ol"; + + // Turn double returns into triple returns, so that we can make a + // paragraph for the last item in a list, if necessary: + list = list.replace(/\n{2,}/g,"\n\n\n");; + var result = _ProcessListItems(list); + + // Trim any trailing whitespace, to put the closing `` + // up on the preceding line, to get it past the current stupid + // HTML block parser. This is a hack to work around the terrible + // hack that is the HTML block parser. + result = result.replace(/\s+$/,""); + result = "<"+list_type+">" + result + "\n"; + return result; + }); + } else { + whole_list = /(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g; + text = text.replace(whole_list,function(wholeMatch,m1,m2,m3) { + var runup = m1; + var list = m2; + + var list_type = (m3.search(/[*+-]/g)>-1) ? "ul" : "ol"; + // Turn double returns into triple returns, so that we can make a + // paragraph for the last item in a list, if necessary: + var list = list.replace(/\n{2,}/g,"\n\n\n");; + var result = _ProcessListItems(list); + result = runup + "<"+list_type+">\n" + result + "\n"; + return result; + }); + } + + // attacklab: strip sentinel + text = text.replace(/~0/,""); + + return text; +} + +_ProcessListItems = function(list_str) { +// +// Process the contents of a single ordered or unordered list, splitting it +// into individual list items. +// + // The $g_list_level global keeps track of when we're inside a list. + // Each time we enter a list, we increment it; when we leave a list, + // we decrement. If it's zero, we're not in a list anymore. + // + // We do this because when we're not inside a list, we want to treat + // something like this: + // + // I recommend upgrading to version + // 8. Oops, now this line is treated + // as a sub-list. + // + // As a single paragraph, despite the fact that the second line starts + // with a digit-period-space sequence. + // + // Whereas when we're inside a list (or sub-list), that line will be + // treated as the start of a sub-list. What a kludge, huh? This is + // an aspect of Markdown's syntax that's hard to parse perfectly + // without resorting to mind-reading. Perhaps the solution is to + // change the syntax rules such that sub-lists must start with a + // starting cardinal number; e.g. "1." or "a.". + + g_list_level++; + + // trim trailing blank lines: + list_str = list_str.replace(/\n{2,}$/,"\n"); + + // attacklab: add sentinel to emulate \z + list_str += "~0"; + + /* + list_str = list_str.replace(/ + (\n)? // leading line = $1 + (^[ \t]*) // leading whitespace = $2 + ([*+-]|\d+[.]) [ \t]+ // list marker = $3 + ([^\r]+? // list item text = $4 + (\n{1,2})) + (?= \n* (~0 | \2 ([*+-]|\d+[.]) [ \t]+)) + /gm, function(){...}); + */ + list_str = list_str.replace(/(\n)?(^[ \t]*)([*+-]|\d+[.])[ \t]+([^\r]+?(\n{1,2}))(?=\n*(~0|\2([*+-]|\d+[.])[ \t]+))/gm, + function(wholeMatch,m1,m2,m3,m4){ + var item = m4; + var leading_line = m1; + var leading_space = m2; + + if (leading_line || (item.search(/\n{2,}/)>-1)) { + item = _RunBlockGamut(_Outdent(item)); + } + else { + // Recursion for sub-lists: + item = _DoLists(_Outdent(item)); + item = item.replace(/\n$/,""); // chomp(item) + item = _RunSpanGamut(item); + } + + return "
  • " + item + "
  • \n"; + } + ); + + // attacklab: strip sentinel + list_str = list_str.replace(/~0/g,""); + + g_list_level--; + return list_str; +} + + +var _DoCodeBlocks = function(text) { +// +// Process Markdown `
    ` blocks.
    +//  
    +
    +	/*
    +		text = text.replace(text,
    +			/(?:\n\n|^)
    +			(								// $1 = the code block -- one or more lines, starting with a space/tab
    +				(?:
    +					(?:[ ]{4}|\t)			// Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
    +					.*\n+
    +				)+
    +			)
    +			(\n*[ ]{0,3}[^ \t\n]|(?=~0))	// attacklab: g_tab_width
    +		/g,function(){...});
    +	*/
    +
    +	// attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
    +	text += "~0";
    +	
    +	text = text.replace(/(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
    +		function(wholeMatch,m1,m2) {
    +			var codeblock = m1;
    +			var nextChar = m2;
    +		
    +			codeblock = _EncodeCode( _Outdent(codeblock));
    +			codeblock = _Detab(codeblock);
    +			codeblock = codeblock.replace(/^\n+/g,""); // trim leading newlines
    +			codeblock = codeblock.replace(/\n+$/g,""); // trim trailing whitespace
    +
    +			codeblock = "
    " + codeblock + "\n
    "; + + return hashBlock(codeblock) + nextChar; + } + ); + + // attacklab: strip sentinel + text = text.replace(/~0/,""); + + return text; +} + +var hashBlock = function(text) { + text = text.replace(/(^\n+|\n+$)/g,""); + return "\n\n~K" + (g_html_blocks.push(text)-1) + "K\n\n"; +} + + +var _DoCodeSpans = function(text) { +// +// * Backtick quotes are used for spans. +// +// * You can use multiple backticks as the delimiters if you want to +// include literal backticks in the code span. So, this input: +// +// Just type ``foo `bar` baz`` at the prompt. +// +// Will translate to: +// +//

    Just type foo `bar` baz at the prompt.

    +// +// There's no arbitrary limit to the number of backticks you +// can use as delimters. If you need three consecutive backticks +// in your code, use four for delimiters, etc. +// +// * You can use spaces to get literal backticks at the edges: +// +// ... type `` `bar` `` ... +// +// Turns to: +// +// ... type `bar` ... +// + + /* + text = text.replace(/ + (^|[^\\]) // Character before opening ` can't be a backslash + (`+) // $2 = Opening run of ` + ( // $3 = The code block + [^\r]*? + [^`] // attacklab: work around lack of lookbehind + ) + \2 // Matching closer + (?!`) + /gm, function(){...}); + */ + + text = text.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm, + function(wholeMatch,m1,m2,m3,m4) { + var c = m3; + c = c.replace(/^([ \t]*)/g,""); // leading whitespace + c = c.replace(/[ \t]*$/g,""); // trailing whitespace + c = _EncodeCode(c); + return m1+""+c+""; + }); + + return text; +} + + +var _EncodeCode = function(text) { +// +// Encode/escape certain characters inside Markdown code runs. +// The point is that in code, these characters are literals, +// and lose their special Markdown meanings. +// + // Encode all ampersands; HTML entities are not + // entities within a Markdown code span. + text = text.replace(/&/g,"&"); + + // Do the angle bracket song and dance: + text = text.replace(//g,">"); + + // Now, escape characters that are magic in Markdown: + text = escapeCharacters(text,"\*_{}[]\\",false); + +// jj the line above breaks this: +//--- + +//* Item + +// 1. Subitem + +// special char: * +//--- + + return text; +} + + +var _DoItalicsAndBold = function(text) { + + // must go first: + text = text.replace(/(\*\*|__)(?=\S)([^\r]*?\S[*_]*)\1/g, + "$2"); + + text = text.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g, + "$2"); + + return text; +} + + +var _DoBlockQuotes = function(text) { + + /* + text = text.replace(/ + ( // Wrap whole match in $1 + ( + ^[ \t]*>[ \t]? // '>' at the start of a line + .+\n // rest of the first line + (.+\n)* // subsequent consecutive lines + \n* // blanks + )+ + ) + /gm, function(){...}); + */ + + text = text.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm, + function(wholeMatch,m1) { + var bq = m1; + + // attacklab: hack around Konqueror 3.5.4 bug: + // "----------bug".replace(/^-/g,"") == "bug" + + bq = bq.replace(/^[ \t]*>[ \t]?/gm,"~0"); // trim one level of quoting + + // attacklab: clean up hack + bq = bq.replace(/~0/g,""); + + bq = bq.replace(/^[ \t]+$/gm,""); // trim whitespace-only lines + bq = _RunBlockGamut(bq); // recurse + + bq = bq.replace(/(^|\n)/g,"$1 "); + // These leading spaces screw with
     content, so we need to fix that:
    +			bq = bq.replace(
    +					/(\s*
    [^\r]+?<\/pre>)/gm,
    +				function(wholeMatch,m1) {
    +					var pre = m1;
    +					// attacklab: hack around Konqueror 3.5.4 bug:
    +					pre = pre.replace(/^  /mg,"~0");
    +					pre = pre.replace(/~0/g,"");
    +					return pre;
    +				});
    +			
    +			return hashBlock("
    \n" + bq + "\n
    "); + }); + return text; +} + + +var _FormParagraphs = function(text) { +// +// Params: +// $text - string to process with html

    tags +// + + // Strip leading and trailing lines: + text = text.replace(/^\n+/g,""); + text = text.replace(/\n+$/g,""); + + var grafs = text.split(/\n{2,}/g); + var grafsOut = new Array(); + + // + // Wrap

    tags. + // + var end = grafs.length; + for (var i=0; i= 0) { + grafsOut.push(str); + } + else if (str.search(/\S/) >= 0) { + str = _RunSpanGamut(str); + str = str.replace(/^([ \t]*)/g,"

    "); + str += "

    " + grafsOut.push(str); + } + + } + + // + // Unhashify HTML blocks + // + end = grafsOut.length; + for (var i=0; i= 0) { + var blockText = g_html_blocks[RegExp.$1]; + blockText = blockText.replace(/\$/g,"$$$$"); // Escape any dollar signs + grafsOut[i] = grafsOut[i].replace(/~K\d+K/,blockText); + } + } + + return grafsOut.join("\n\n"); +} + + +var _EncodeAmpsAndAngles = function(text) { +// Smart processing for ampersands and angle brackets that need to be encoded. + + // Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + // http://bumppo.net/projects/amputator/ + text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g,"&"); + + // Encode naked <'s + text = text.replace(/<(?![a-z\/?\$!])/gi,"<"); + + return text; +} + + +var _EncodeBackslashEscapes = function(text) { +// +// Parameter: String. +// Returns: The string, with after processing the following backslash +// escape sequences. +// + + // attacklab: The polite way to do this is with the new + // escapeCharacters() function: + // + // text = escapeCharacters(text,"\\",true); + // text = escapeCharacters(text,"`*_{}[]()>#+-.!",true); + // + // ...but we're sidestepping its use of the (slow) RegExp constructor + // as an optimization for Firefox. This function gets called a LOT. + + text = text.replace(/\\(\\)/g,escapeCharacters_callback); + text = text.replace(/\\([`*_{}\[\]()>#+-.!])/g,escapeCharacters_callback); + return text; +} + + +var _DoAutoLinks = function(text) { + + text = text.replace(/<((https?|ftp|dict):[^'">\s]+)>/gi,"
    $1"); + + // Email addresses: + + /* + text = text.replace(/ + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + > + /gi, _DoAutoLinks_callback()); + */ + text = text.replace(/<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi, + function(wholeMatch,m1) { + return _EncodeEmailAddress( _UnescapeSpecialChars(m1) ); + } + ); + + return text; +} + + +var _EncodeEmailAddress = function(addr) { +// +// Input: an email address, e.g. "foo@example.com" +// +// Output: the email address as a mailto link, with each character +// of the address encoded as either a decimal or hex entity, in +// the hopes of foiling most address harvesting spam bots. E.g.: +// +// foo +// @example.com +// +// Based on a filter by Matthew Wickline, posted to the BBEdit-Talk +// mailing list: +// + + // attacklab: why can't javascript speak hex? + function char2hex(ch) { + var hexDigits = '0123456789ABCDEF'; + var dec = ch.charCodeAt(0); + return(hexDigits.charAt(dec>>4) + hexDigits.charAt(dec&15)); + } + + var encode = [ + function(ch){return "&#"+ch.charCodeAt(0)+";";}, + function(ch){return "&#x"+char2hex(ch)+";";}, + function(ch){return ch;} + ]; + + addr = "mailto:" + addr; + + addr = addr.replace(/./g, function(ch) { + if (ch == "@") { + // this *must* be encoded. I insist. + ch = encode[Math.floor(Math.random()*2)](ch); + } else if (ch !=":") { + // leave ':' alone (to spot mailto: later) + var r = Math.random(); + // roughly 10% raw, 45% hex, 45% dec + ch = ( + r > .9 ? encode[2](ch) : + r > .45 ? encode[1](ch) : + encode[0](ch) + ); + } + return ch; + }); + + addr = "" + addr + ""; + addr = addr.replace(/">.+:/g,"\">"); // strip the mailto: from the visible part + + return addr; +} + + +var _UnescapeSpecialChars = function(text) { +// +// Swap back in all the special characters we've hidden. +// + text = text.replace(/~E(\d+)E/g, + function(wholeMatch,m1) { + var charCodeToReplace = parseInt(m1); + return String.fromCharCode(charCodeToReplace); + } + ); + return text; +} + + +var _Outdent = function(text) { +// +// Remove one level of line-leading tabs or spaces +// + + // attacklab: hack around Konqueror 3.5.4 bug: + // "----------bug".replace(/^-/g,"") == "bug" + + text = text.replace(/^(\t|[ ]{1,4})/gm,"~0"); // attacklab: g_tab_width + + // attacklab: clean up hack + text = text.replace(/~0/g,"") + + return text; +} + +var _Detab = function(text) { +// attacklab: Detab's completely rewritten for speed. +// In perl we could fix it by anchoring the regexp with \G. +// In javascript we're less fortunate. + + // expand first n-1 tabs + text = text.replace(/\t(?=\t)/g," "); // attacklab: g_tab_width + + // replace the nth with two sentinels + text = text.replace(/\t/g,"~A~B"); + + // use the sentinel to anchor our regex so it doesn't explode + text = text.replace(/~B(.+?)~A/g, + function(wholeMatch,m1,m2) { + var leadingText = m1; + var numSpaces = 4 - leadingText.length % 4; // attacklab: g_tab_width + + // there *must* be a better way to do this: + for (var i=0; i +All rights reserved. + +Original Markdown copyright (c) 2004, John Gruber + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name "Markdown" nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as +is" and any express or implied warranties, including, but not limited +to, the implied warranties of merchantability and fitness for a +particular purpose are disclaimed. In no event shall the copyright owner +or contributors be liable for any direct, indirect, incidental, special, +exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or +profits; or business interruption) however caused and on any theory of +liability, whether in contract, strict liability, or tort (including +negligence or otherwise) arising in any way out of the use of this +software, even if advised of the possibility of such damage. diff --git a/perlMarkdown/Markdown License.txt b/perlMarkdown/Markdown License.txt new file mode 100644 index 0000000..6d76506 --- /dev/null +++ b/perlMarkdown/Markdown License.txt @@ -0,0 +1,30 @@ +Copyright (c) 2004, John Gruber + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name "Markdown" nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as +is" and any express or implied warranties, including, but not limited +to, the implied warranties of merchantability and fitness for a +particular purpose are disclaimed. In no event shall the copyright owner +or contributors be liable for any direct, indirect, incidental, special, +exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or +profits; or business interruption) however caused and on any theory of +liability, whether in contract, strict liability, or tort (including +negligence or otherwise) arising in any way out of the use of this +software, even if advised of the possibility of such damage. diff --git a/perlMarkdown/Markdown-1.0.2b2.pl b/perlMarkdown/Markdown-1.0.2b2.pl new file mode 100644 index 0000000..5c78edd --- /dev/null +++ b/perlMarkdown/Markdown-1.0.2b2.pl @@ -0,0 +1,1509 @@ +#!/usr/bin/perl + +# +# Markdown -- A text-to-HTML conversion tool for web writers +# +# Copyright (c) 2004-2005 John Gruber +# +# + + +package Markdown; +require 5.006_000; +use strict; +use warnings; + +use Digest::MD5 qw(md5_hex); +use vars qw($VERSION); +$VERSION = '1.0.2b2'; +# Sat 26 Mar 2005 + +## Disabled; causes problems under Perl 5.6.1: +# use utf8; +# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html + + +# +# Global default settings: +# +my $g_empty_element_suffix = " />"; # Change to ">" for HTML output +my $g_tab_width = 4; + + +# +# Globals: +# + +# Regex to match balanced [brackets]. See Friedl's +# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. +my $g_nested_brackets; +$g_nested_brackets = qr{ + (?> # Atomic matching + [^\[\]]+ # Anything other than brackets + | + \[ + (??{ $g_nested_brackets }) # Recursive set of nested brackets + \] + )* +}x; + + +# Table of hash values for escaped characters: +my %g_escape_table; +foreach my $char (split //, '\\`*_{}[]()>#+-.!') { + $g_escape_table{$char} = md5_hex($char); +} + + +# Global hashes, used by various utility routines +my %g_urls; +my %g_titles; +my %g_html_blocks; + +# Used to track when we're inside an ordered or unordered list +# (see _ProcessListItems() for details): +my $g_list_level = 0; + + +#### Blosxom plug-in interface ########################################## + +# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine +# which posts Markdown should process, using a "meta-markup: markdown" +# header. If it's set to 0 (the default), Markdown will process all +# entries. +my $g_blosxom_use_meta = 0; + +sub start { 1; } +sub story { + my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; + + if ( (! $g_blosxom_use_meta) or + (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) + ){ + $$body_ref = Markdown($$body_ref); + } + 1; +} + + +#### Movable Type plug-in interface ##################################### +eval {require MT}; # Test to see if we're running in MT. +unless ($@) { + require MT; + import MT; + require MT::Template::Context; + import MT::Template::Context; + + eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. + unless ($@) { + require MT::Plugin; + import MT::Plugin; + my $plugin = new MT::Plugin({ + name => "Markdown", + description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", + doc_link => 'http://daringfireball.net/projects/markdown/' + }); + MT->add_plugin( $plugin ); + } + + MT::Template::Context->add_container_tag(MarkdownOptions => sub { + my $ctx = shift; + my $args = shift; + my $builder = $ctx->stash('builder'); + my $tokens = $ctx->stash('tokens'); + + if (defined ($args->{'output'}) ) { + $ctx->stash('markdown_output', lc $args->{'output'}); + } + + defined (my $str = $builder->build($ctx, $tokens) ) + or return $ctx->error($builder->errstr); + $str; # return value + }); + + MT->add_text_filter('markdown' => { + label => 'Markdown', + docs => 'http://daringfireball.net/projects/markdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + my $raw = 0; + if (defined $ctx) { + my $output = $ctx->stash('markdown_output'); + if (defined $output && $output =~ m/^html/i) { + $g_empty_element_suffix = ">"; + $ctx->stash('markdown_output', ''); + } + elsif (defined $output && $output eq 'raw') { + $raw = 1; + $ctx->stash('markdown_output', ''); + } + else { + $raw = 0; + $g_empty_element_suffix = " />"; + } + } + $text = $raw ? $text : Markdown($text); + $text; + }, + }); + + # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: + my $smartypants; + + { + no warnings "once"; + $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; + } + + if ($smartypants) { + MT->add_text_filter('markdown_with_smartypants' => { + label => 'Markdown With SmartyPants', + docs => 'http://daringfireball.net/projects/markdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + if (defined $ctx) { + my $output = $ctx->stash('markdown_output'); + if (defined $output && $output eq 'html') { + $g_empty_element_suffix = ">"; + } + else { + $g_empty_element_suffix = " />"; + } + } + $text = Markdown($text); + $text = $smartypants->($text, '1'); + }, + }); + } +} +else { +#### BBEdit/command-line text filter interface ########################## +# Needs to be hidden from MT (and Blosxom when running in static mode). + + # We're only using $blosxom::version once; tell Perl not to warn us: + no warnings 'once'; + unless ( defined($blosxom::version) ) { + use warnings; + + #### Check for command-line switches: ################# + my %cli_opts; + use Getopt::Long; + Getopt::Long::Configure('pass_through'); + GetOptions(\%cli_opts, + 'version', + 'shortversion', + 'html4tags', + ); + if ($cli_opts{'version'}) { # Version info + print "\nThis is Markdown, version $VERSION.\n"; + print "Copyright 2004 John Gruber\n"; + print "http://daringfireball.net/projects/markdown/\n\n"; + exit 0; + } + if ($cli_opts{'shortversion'}) { # Just the version number string. + print $VERSION; + exit 0; + } + if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML + $g_empty_element_suffix = ">"; + } + + + #### Process incoming text: ########################### + my $text; + { + local $/; # Slurp the whole file + $text = <>; + } + print Markdown($text); + } +} + + + +sub Markdown { +# +# Main function. The order in which other subs are called here is +# essential. Link and image substitutions need to happen before +# _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the +# and tags get encoded. +# + my $text = shift; + + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + %g_urls = (); + %g_titles = (); + %g_html_blocks = (); + + + # Standardize line endings: + $text =~ s{\r\n}{\n}g; # DOS to Unix + $text =~ s{\r}{\n}g; # Mac to Unix + + # Make sure $text ends with a couple of newlines: + $text .= "\n\n"; + + # Convert all tabs to spaces. + $text = _Detab($text); + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + $text =~ s/^[ \t]+$//mg; + + # Turn block-level HTML blocks into hash entries + $text = _HashHTMLBlocks($text); + + # Strip link definitions, store in hashes. + $text = _StripLinkDefinitions($text); + + $text = _RunBlockGamut($text); + + $text = _UnescapeSpecialChars($text); + + return $text . "\n"; +} + + +sub _StripLinkDefinitions { +# +# Strips link definitions from text, stores the URLs and titles in +# hash references. +# + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Link defs are in the form: ^[id]: url "optional title" + while ($text =~ s{ + ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + ? # url = $2 + [ \t]* + \n? # maybe one newline + [ \t]* + (?: + (?<=\s) # lookbehind for whitespace + ["(] + (.+?) # title = $3 + [")] + [ \t]* + )? # title is optional + (?:\n+|\Z) + } + {}mx) { + $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive + if ($3) { + $g_titles{lc $1} = $3; + $g_titles{lc $1} =~ s/"/"/g; + } + } + + return $text; +} + + +sub _HashHTMLBlocks { + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Hashify HTML blocks: + # We only want to do this for block-level HTML tags, such as headers, + # lists, and tables. That's because we still want to wrap

    s around + # "paragraphs" that are wrapped in non-block-level tags, such as anchors, + # phrase emphasis, and spans. The list of tags we're looking for is + # hard-coded: + my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; + my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; + + # First, look for nested blocks, e.g.: + #

    + #
    + # tags for inner block must be indented. + #
    + #
    + # + # The outermost tags must start at the left margin for this to match, and + # the inner nested divs must be indented. + # We need to do this before the next, more liberal match, because the next + # match will start at the first `
    ` and stop at the first `
    `. + $text =~ s{ + ( # save in $1 + ^ # start of line (with /m) + <($block_tags_a) # start tag = $2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egmx; + + + # + # Now match more liberally, simply from `\n` to `\n` + # + $text =~ s{ + ( # save in $1 + ^ # start of line (with /m) + <($block_tags_b) # start tag = $2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + .* # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egmx; + # Special case just for
    . It was easier to make a special case than + # to make the other regex more complicated. + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + <(hr) # start tag = $2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + # Special case for standalone HTML comments: + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + (?s: + + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + + return $text; +} + + +sub _RunBlockGamut { +# +# These are all the transformations that form block-level +# tags like paragraphs, headers, and list items. +# + my $text = shift; + + $text = _DoHeaders($text); + + # Do Horizontal Rules: + $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n tags around block-level tags. + $text = _HashHTMLBlocks($text); + $text = _FormParagraphs($text); + + return $text; +} + + +sub _RunSpanGamut { +# +# These are all the transformations that occur *within* block-level +# tags like paragraphs, headers, and list items. +# + my $text = shift; + + $text = _EscapeSpecialCharsWithinTagAttributes($text); + $text = _DoCodeSpans($text); + $text = _EncodeBackslashEscapes($text); + + # Process anchor and image tags. Images must come first, + # because ![foo][f] looks like an anchor. + $text = _DoImages($text); + $text = _DoAnchors($text); + + # Make links out of things like `` + # Must come after _DoAnchors(), because you can use < and > + # delimiters in inline links like [this](). + $text = _DoAutoLinks($text); + $text = _EncodeAmpsAndAngles($text); + $text = _DoItalicsAndBold($text); + + # Do hard breaks: + $text =~ s/ {2,}\n/ -- encode [\ ` * _] so they +# don't conflict with their use in Markdown for code, italics and strong. +# We're replacing each such character with its corresponding MD5 checksum +# value; this is likely overkill, but it should prevent us from colliding +# with the escape values by accident. +# + my $text = shift; + my $tokens ||= _TokenizeHTML($text); + $text = ''; # rebuild $text from the tokens + + foreach my $cur_token (@$tokens) { + if ($cur_token->[0] eq "tag") { + $cur_token->[1] =~ s! \\ !$g_escape_table{'\\'}!gx; + $cur_token->[1] =~ s! ` !$g_escape_table{'`'}!gx; + $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx; + $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx; + } + $text .= $cur_token->[1]; + } + return $text; +} + + +sub _DoAnchors { +# +# Turn Markdown link shortcuts into XHTML
    tags. +# + my $text = shift; + + # + # First, handle reference-style links: [link text] [id] + # + $text =~ s{ + ( # wrap whole match in $1 + \[ + ($g_nested_brackets) # link text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + ) + }{ + my $result; + my $whole_match = $1; + my $link_text = $2; + my $link_id = lc $3; + + if ($link_id eq "") { + $link_id = lc $link_text; # for shortcut links like [this][]. + } + + if (defined $g_urls{$link_id}) { + my $url = $g_urls{$link_id}; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "? # href = $3 + [ \t]* + ( # $4 + (['"]) # quote char = $5 + (.*?) # Title = $6 + \5 # matching quote + )? # title is optional + \) + ) + }{ + my $result; + my $whole_match = $1; + my $link_text = $2; + my $url = $3; + my $title = $6; + + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = " tags. +# + my $text = shift; + + # + # First, handle reference-style labeled images: ![alt text][id] + # + $text =~ s{ + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + my $link_id = lc $3; + + if ($link_id eq "") { + $link_id = lc $alt_text; # for shortcut links like ![this][]. + } + + $alt_text =~ s/"/"/g; + if (defined $g_urls{$link_id}) { + my $url = $g_urls{$link_id}; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "\"$alt_text\"";? # src url = $3 + [ \t]* + ( # $4 + (['"]) # quote char = $5 + (.*?) # title = $6 + \5 # matching quote + [ \t]* + )? # title is optional + \) + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + my $url = $3; + my $title = ''; + if (defined($6)) { + $title = $6; + } + + $alt_text =~ s/"/"/g; + $title =~ s/"/"/g; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "\"$alt_text\"";" . _RunSpanGamut($1) . "\n\n"; + }egmx; + + $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{ + "

    " . _RunSpanGamut($1) . "

    \n\n"; + }egmx; + + + # atx-style headers: + # # Header 1 + # ## Header 2 + # ## Header 2 with closing hashes ## + # ... + # ###### Header 6 + # + $text =~ s{ + ^(\#{1,6}) # $1 = string of #'s + [ \t]* + (.+?) # $2 = Header text + [ \t]* + \#* # optional closing #'s (not counted) + \n+ + }{ + my $h_level = length($1); + "" . _RunSpanGamut($2) . "\n\n"; + }egmx; + + return $text; +} + + +sub _DoLists { +# +# Form HTML ordered (numbered) and unordered (bulleted) lists. +# + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Re-usable patterns to match list item bullets and number markers: + my $marker_ul = qr/[*+-]/; + my $marker_ol = qr/\d+[.]/; + my $marker_any = qr/(?:$marker_ul|$marker_ol)/; + + # Re-usable pattern to match any entirel ul or ol list: + my $whole_list = qr{ + ( # $1 = whole list + ( # $2 + [ ]{0,$less_than_tab} + (${marker_any}) # $3 = first list item marker + [ \t]+ + ) + (?s:.+?) + ( # $4 + \z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + ${marker_any}[ \t]+ + ) + ) + ) + }mx; + + # We use a different prefix before nested lists than top-level lists. + # See extended comment in _ProcessListItems(). + # + # Note: There's a bit of duplication here. My original implementation + # created a scalar regex pattern as the conditional result of the test on + # $g_list_level, and then only ran the $text =~ s{...}{...}egmx + # substitution once, using the scalar as the pattern. This worked, + # everywhere except when running under MT on my hosting account at Pair + # Networks. There, this caused all rebuilds to be killed by the reaper (or + # perhaps they crashed, but that seems incredibly unlikely given that the + # same script on the same server ran fine *except* under MT. I've spent + # more time trying to figure out why this is happening than I'd like to + # admit. My only guess, backed up by the fact that this workaround works, + # is that Perl optimizes the substition when it can figure out that the + # pattern will never change, and when this optimization isn't on, we run + # afoul of the reaper. Thus, the slightly redundant code that uses two + # static s/// patterns rather than one conditional pattern. + + if ($g_list_level) { + $text =~ s{ + ^ + $whole_list + }{ + my $list = $1; + my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; + + # Turn double returns into triple returns, so that we can make a + # paragraph for the last item in a list, if necessary: + $list =~ s/\n{2,}/\n\n\n/g; + my $result = _ProcessListItems($list, $marker_any); + + # Trim any trailing whitespace, to put the closing `` + # up on the preceding line, to get it past the current stupid + # HTML block parser. This is a hack to work around the terrible + # hack that is the HTML block parser. + $result =~ s{\s+$}{}; + $result = "<$list_type>" . $result . "\n"; + $result; + }egmx; + } + else { + $text =~ s{ + (?:(?<=\n\n)|\A\n?) + $whole_list + }{ + my $list = $1; + my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; + # Turn double returns into triple returns, so that we can make a + # paragraph for the last item in a list, if necessary: + $list =~ s/\n{2,}/\n\n\n/g; + my $result = _ProcessListItems($list, $marker_any); + $result = "<$list_type>\n" . $result . "\n"; + $result; + }egmx; + } + + + return $text; +} + + +sub _ProcessListItems { +# +# Process the contents of a single ordered or unordered list, splitting it +# into individual list items. +# + + my $list_str = shift; + my $marker_any = shift; + + + # The $g_list_level global keeps track of when we're inside a list. + # Each time we enter a list, we increment it; when we leave a list, + # we decrement. If it's zero, we're not in a list anymore. + # + # We do this because when we're not inside a list, we want to treat + # something like this: + # + # I recommend upgrading to version + # 8. Oops, now this line is treated + # as a sub-list. + # + # As a single paragraph, despite the fact that the second line starts + # with a digit-period-space sequence. + # + # Whereas when we're inside a list (or sub-list), that line will be + # treated as the start of a sub-list. What a kludge, huh? This is + # an aspect of Markdown's syntax that's hard to parse perfectly + # without resorting to mind-reading. Perhaps the solution is to + # change the syntax rules such that sub-lists must start with a + # starting cardinal number; e.g. "1." or "a.". + + $g_list_level++; + + # trim trailing blank lines: + $list_str =~ s/\n{2,}\z/\n/; + + + $list_str =~ s{ + (\n)? # leading line = $1 + (^[ \t]*) # leading whitespace = $2 + ($marker_any) [ \t]+ # list marker = $3 + ((?s:.+?) # list item text = $4 + (\n{1,2})) + (?= \n* (\z | \2 ($marker_any) [ \t]+)) + }{ + my $item = $4; + my $leading_line = $1; + my $leading_space = $2; + + if ($leading_line or ($item =~ m/\n{2,}/)) { + $item = _RunBlockGamut(_Outdent($item)); + } + else { + # Recursion for sub-lists: + $item = _DoLists(_Outdent($item)); + chomp $item; + $item = _RunSpanGamut($item); + } + + "
  • " . $item . "
  • \n"; + }egmx; + + $g_list_level--; + return $list_str; +} + + + +sub _DoCodeBlocks { +# +# Process Markdown `
    ` blocks.
    +#  
    +
    +  my $text = shift;
    +
    +  $text =~ s{
    +      (?:\n\n|\A)
    +      (              # $1 = the code block -- one or more lines, starting with a space/tab
    +        (?:
    +          (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
    +          .*\n+
    +        )+
    +      )
    +      ((?=^[ ]{0,$g_tab_width}\S)|\Z)  # Lookahead for non-space at line-start, or end of doc
    +    }{
    +      my $codeblock = $1;
    +      my $result; # return value
    +
    +      $codeblock = _EncodeCode(_Outdent($codeblock));
    +      $codeblock = _Detab($codeblock);
    +      $codeblock =~ s/\A\n+//; # trim leading newlines
    +      $codeblock =~ s/\s+\z//; # trim trailing whitespace
    +
    +      $result = "\n\n
    " . $codeblock . "\n
    \n\n"; + + $result; + }egmx; + + return $text; +} + + +sub _DoCodeSpans { +# +# * Backtick quotes are used for spans. +# +# * You can use multiple backticks as the delimiters if you want to +# include literal backticks in the code span. So, this input: +# +# Just type ``foo `bar` baz`` at the prompt. +# +# Will translate to: +# +#

    Just type foo `bar` baz at the prompt.

    +# +# There's no arbitrary limit to the number of backticks you +# can use as delimters. If you need three consecutive backticks +# in your code, use four for delimiters, etc. +# +# * You can use spaces to get literal backticks at the edges: +# +# ... type `` `bar` `` ... +# +# Turns to: +# +# ... type `bar` ... +# + + my $text = shift; + + $text =~ s@ + (?$c
    "; + @egsx; + + return $text; +} + + +sub _EncodeCode { +# +# Encode/escape certain characters inside Markdown code runs. +# The point is that in code, these characters are literals, +# and lose their special Markdown meanings. +# + local $_ = shift; + + # Encode all ampersands; HTML entities are not + # entities within a Markdown code span. + s/&/&/g; + + # Encode $'s, but only if we're running under Blosxom. + # (Blosxom interpolates Perl variables in article bodies.) + { + no warnings 'once'; + if (defined($blosxom::version)) { + s/\$/$/g; + } + } + + + # Do the angle bracket song and dance: + s! < !<!gx; + s! > !>!gx; + + # Now, escape characters that are magic in Markdown: + s! \* !$g_escape_table{'*'}!gx; + s! _ !$g_escape_table{'_'}!gx; + s! { !$g_escape_table{'{'}!gx; + s! } !$g_escape_table{'}'}!gx; + s! \[ !$g_escape_table{'['}!gx; + s! \] !$g_escape_table{']'}!gx; + s! \\ !$g_escape_table{'\\'}!gx; + + return $_; +} + + +sub _DoItalicsAndBold { + my $text = shift; + + # must go first: + $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 } + {$2}gsx; + + $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 } + {$2}gsx; + + return $text; +} + + +sub _DoBlockQuotes { + my $text = shift; + + $text =~ s{ + ( # Wrap whole match in $1 + ( + ^[ \t]*>[ \t]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + ) + }{ + my $bq = $1; + $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting + $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines + $bq = _RunBlockGamut($bq); # recurse + + $bq =~ s/^/ /g; + # These leading spaces screw with
     content, so we need to fix that:
    +      $bq =~ s{
    +          (\s*
    .+?
    ) + }{ + my $pre = $1; + $pre =~ s/^ //mg; + $pre; + }egsx; + + "
    \n$bq\n
    \n\n"; + }egmx; + + + return $text; +} + + +sub _FormParagraphs { +# +# Params: +# $text - string to process with html

    tags +# + my $text = shift; + + # Strip leading and trailing lines: + $text =~ s/\A\n+//; + $text =~ s/\n+\z//; + + my @grafs = split(/\n{2,}/, $text); + + # + # Wrap

    tags. + # + foreach (@grafs) { + unless (defined( $g_html_blocks{$_} )) { + $_ = _RunSpanGamut($_); + s/^([ \t]*)/

    /; + $_ .= "

    "; + } + } + + # + # Unhashify HTML blocks + # + foreach (@grafs) { + if (defined( $g_html_blocks{$_} )) { + $_ = $g_html_blocks{$_}; + } + } + + return join "\n\n", @grafs; +} + + +sub _EncodeAmpsAndAngles { +# Smart processing for ampersands and angle brackets that need to be encoded. + + my $text = shift; + + # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + # http://bumppo.net/projects/amputator/ + $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; + + # Encode naked <'s + $text =~ s{<(?![a-z/?\$!])}{<}gi; + + return $text; +} + + +sub _EncodeBackslashEscapes { +# +# Parameter: String. +# Returns: The string, with after processing the following backslash +# escape sequences. +# + local $_ = shift; + + s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first. + s! \\` !$g_escape_table{'`'}!gx; + s! \\\* !$g_escape_table{'*'}!gx; + s! \\_ !$g_escape_table{'_'}!gx; + s! \\\{ !$g_escape_table{'{'}!gx; + s! \\\} !$g_escape_table{'}'}!gx; + s! \\\[ !$g_escape_table{'['}!gx; + s! \\\] !$g_escape_table{']'}!gx; + s! \\\( !$g_escape_table{'('}!gx; + s! \\\) !$g_escape_table{')'}!gx; + s! \\> !$g_escape_table{'>'}!gx; + s! \\\# !$g_escape_table{'#'}!gx; + s! \\\+ !$g_escape_table{'+'}!gx; + s! \\\- !$g_escape_table{'-'}!gx; + s! \\\. !$g_escape_table{'.'}!gx; + s{ \\! }{$g_escape_table{'!'}}gx; + + return $_; +} + + +sub _DoAutoLinks { + my $text = shift; + + $text =~ s{<((https?|ftp):[^'">\s]+)>}{
    $1}gi; + + # Email addresses: + $text =~ s{ + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + > + }{ + _EncodeEmailAddress( _UnescapeSpecialChars($1) ); + }egix; + + return $text; +} + + +sub _EncodeEmailAddress { +# +# Input: an email address, e.g. "foo@example.com" +# +# Output: the email address as a mailto link, with each character +# of the address encoded as either a decimal or hex entity, in +# the hopes of foiling most address harvesting spam bots. E.g.: +# +# foo +# @example.com +# +# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk +# mailing list: +# + + my $addr = shift; + + srand; + my @encode = ( + sub { '&#' . ord(shift) . ';' }, + sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, + sub { shift }, + ); + + $addr = "mailto:" . $addr; + + $addr =~ s{(.)}{ + my $char = $1; + if ( $char eq '@' ) { + # this *must* be encoded. I insist. + $char = $encode[int rand 1]->($char); + } elsif ( $char ne ':' ) { + # leave ':' alone (to spot mailto: later) + my $r = rand; + # roughly 10% raw, 45% hex, 45% dec + $char = ( + $r > .9 ? $encode[2]->($char) : + $r < .45 ? $encode[1]->($char) : + $encode[0]->($char) + ); + } + $char; + }gex; + + $addr = qq{$addr}; + $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part + + return $addr; +} + + +sub _UnescapeSpecialChars { +# +# Swap back in all the special characters we've hidden. +# + my $text = shift; + + while( my($char, $hash) = each(%g_escape_table) ) { + $text =~ s/$hash/$char/g; + } + return $text; +} + + +sub _TokenizeHTML { +# +# Parameter: String containing HTML markup. +# Returns: Reference to an array of the tokens comprising the input +# string. Each token is either a tag (possibly with nested, +# tags contained therein, such as , or a +# run of text between tags. Each element of the array is a +# two-element array; the first is either 'tag' or 'text'; +# the second is the actual value. +# +# +# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. +# +# + + my $str = shift; + my $pos = 0; + my $len = length $str; + my @tokens; + + my $depth = 6; + my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); + my $match = qr/(?s: ) | # comment + (?s: <\? .*? \?> ) | # processing instruction + $nested_tags/ix; # nested tags + + while ($str =~ m/($match)/g) { + my $whole_tag = $1; + my $sec_start = pos $str; + my $tag_start = $sec_start - length $whole_tag; + if ($pos < $tag_start) { + push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; + } + push @tokens, ['tag', $whole_tag]; + $pos = pos $str; + } + push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; + \@tokens; +} + + +sub _Outdent { +# +# Remove one level of line-leading tabs or spaces +# + my $text = shift; + + $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; + return $text; +} + + +sub _Detab { +# +# Cribbed from a post by Bart Lateur: +# +# + my $text = shift; + + $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; + return $text; +} + + +1; + +__END__ + + +=pod + +=head1 NAME + +B + + +=head1 SYNOPSIS + +B [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ] + [ I ... ] + + +=head1 DESCRIPTION + +Markdown is a text-to-HTML filter; it translates an easy-to-read / +easy-to-write structured text format into HTML. Markdown's text format +is most similar to that of plain text email, and supports features such +as headers, *emphasis*, code blocks, blockquotes, and links. + +Markdown's syntax is designed not as a generic markup language, but +specifically to serve as a front-end to (X)HTML. You can use span-level +HTML tags anywhere in a Markdown document, and you can use block level +HTML tags (like
    and as well). + +For more information about Markdown's syntax, see: + + http://daringfireball.net/projects/markdown/ + + +=head1 OPTIONS + +Use "--" to end switch parsing. For example, to open a file named "-z", use: + + Markdown.pl -- -z + +=over 4 + + +=item B<--html4tags> + +Use HTML 4 style for empty element tags, e.g.: + +
    + +instead of Markdown's default XHTML style tags, e.g.: + +
    + + +=item B<-v>, B<--version> + +Display Markdown's version number and copyright information. + + +=item B<-s>, B<--shortversion> + +Display the short-form version number. + + +=back + + + +=head1 BUGS + +To file bug reports or feature requests (other than topics listed in the +Caveats section above) please send email to: + + support@daringfireball.net + +Please include with your report: (1) the example input; (2) the output +you expected; (3) the output Markdown actually produced. + + +=head1 VERSION HISTORY + +See the readme file for detailed release notes for this version. + +1.0.2b2 - 20 Mar 2005 + + + Fix for nested sub-lists in list-paragraph mode. Previously we got + a spurious extra level of `

    ` tags for something like this: + + * this + + * sub + + that + + + Experimental support for [this] as a synonym for [this][]. + (Note to self: No test yet for this.) + Be sure to test, e.g.: [permutations of this sort of [thing][].] + + +1.0.2b1 - 28 Feb 2005 + + + Fix for backticks within HTML tag: like this + + + Fix for escaped backticks still triggering code spans: + + There are two raw backticks here: \` and here: \`, not a code span + +1.0.1 - 14 Dec 2004 + +1.0 - 28 Aug 2004 + + +=head1 AUTHOR + + John Gruber + http://daringfireball.net + + PHP port and other contributions by Michel Fortin + http://michelf.com + + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2003-2005 John Gruber + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name "Markdown" nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as +is" and any express or implied warranties, including, but not limited +to, the implied warranties of merchantability and fitness for a +particular purpose are disclaimed. In no event shall the copyright owner +or contributors be liable for any direct, indirect, incidental, special, +exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or +profits; or business interruption) however caused and on any theory of +liability, whether in contract, strict liability, or tort (including +negligence or otherwise) arising in any way out of the use of this +software, even if advised of the possibility of such damage. + +=cut diff --git a/perlMarkdown/Markdown-1.0.2b7.pl b/perlMarkdown/Markdown-1.0.2b7.pl new file mode 100644 index 0000000..c3b351f --- /dev/null +++ b/perlMarkdown/Markdown-1.0.2b7.pl @@ -0,0 +1,1642 @@ +#!/usr/bin/env perl + +# +# Markdown -- A text-to-HTML conversion tool for web writers +# +# Copyright (c) 2004-2005 John Gruber +# +# + + +package Markdown; +require 5.006_000; +use strict; +use warnings; + +use Digest::MD5 qw(md5_hex); +use vars qw($VERSION); +$VERSION = '1.0.2b7'; +# Tue 29 Aug 2006 + +## Disabled; causes problems under Perl 5.6.1: +# use utf8; +# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html + +# +# Global default settings: +# +my $g_empty_element_suffix = " />"; # Change to ">" for HTML output +my $g_tab_width = 4; + + +# +# Globals: +# + +# Regex to match balanced [brackets]. See Friedl's +# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. +my $g_nested_brackets; +$g_nested_brackets = qr{ + (?> # Atomic matching + [^\[\]]+ # Anything other than brackets + | + \[ + (??{ $g_nested_brackets }) # Recursive set of nested brackets + \] + )* +}x; + + +# Table of hash values for escaped characters: +my %g_escape_table; +foreach my $char (split //, '\\`*_{}[]()>#+-.!') { + $g_escape_table{$char} = md5_hex($char); +} + + +# Global hashes, used by various utility routines +my %g_urls; +my %g_titles; +my %g_html_blocks; + +# Used to track when we're inside an ordered or unordered list +# (see _ProcessListItems() for details): +my $g_list_level = 0; + + +#### Blosxom plug-in interface ########################################## + +# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine +# which posts Markdown should process, using a "meta-markup: markdown" +# header. If it's set to 0 (the default), Markdown will process all +# entries. +my $g_blosxom_use_meta = 0; + +sub start { 1; } +sub story { + my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; + + if ( (! $g_blosxom_use_meta) or + (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) + ){ + $$body_ref = Markdown($$body_ref); + } + 1; +} + + +#### Movable Type plug-in interface ##################################### +eval {require MT}; # Test to see if we're running in MT. +unless ($@) { + require MT; + import MT; + require MT::Template::Context; + import MT::Template::Context; + + eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. + unless ($@) { + require MT::Plugin; + import MT::Plugin; + my $plugin = new MT::Plugin({ + name => "Markdown", + description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", + doc_link => 'http://daringfireball.net/projects/markdown/' + }); + MT->add_plugin( $plugin ); + } + + MT::Template::Context->add_container_tag(MarkdownOptions => sub { + my $ctx = shift; + my $args = shift; + my $builder = $ctx->stash('builder'); + my $tokens = $ctx->stash('tokens'); + + if (defined ($args->{'output'}) ) { + $ctx->stash('markdown_output', lc $args->{'output'}); + } + + defined (my $str = $builder->build($ctx, $tokens) ) + or return $ctx->error($builder->errstr); + $str; # return value + }); + + MT->add_text_filter('markdown' => { + label => 'Markdown', + docs => 'http://daringfireball.net/projects/markdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + my $raw = 0; + if (defined $ctx) { + my $output = $ctx->stash('markdown_output'); + if (defined $output && $output =~ m/^html/i) { + $g_empty_element_suffix = ">"; + $ctx->stash('markdown_output', ''); + } + elsif (defined $output && $output eq 'raw') { + $raw = 1; + $ctx->stash('markdown_output', ''); + } + else { + $raw = 0; + $g_empty_element_suffix = " />"; + } + } + $text = $raw ? $text : Markdown($text); + $text; + }, + }); + + # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: + my $smartypants; + + { + no warnings "once"; + $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; + } + + if ($smartypants) { + MT->add_text_filter('markdown_with_smartypants' => { + label => 'Markdown With SmartyPants', + docs => 'http://daringfireball.net/projects/markdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + if (defined $ctx) { + my $output = $ctx->stash('markdown_output'); + if (defined $output && $output eq 'html') { + $g_empty_element_suffix = ">"; + } + else { + $g_empty_element_suffix = " />"; + } + } + $text = Markdown($text); + $text = $smartypants->($text, '1'); + }, + }); + } +} +else { +#### BBEdit/command-line text filter interface ########################## +# Needs to be hidden from MT (and Blosxom when running in static mode). + + # We're only using $blosxom::version once; tell Perl not to warn us: + no warnings 'once'; + unless ( defined($blosxom::version) ) { + use warnings; + + #### Check for command-line switches: ################# + my %cli_opts; + use Getopt::Long; + Getopt::Long::Configure('pass_through'); + GetOptions(\%cli_opts, + 'version', + 'shortversion', + 'html4tags', + ); + if ($cli_opts{'version'}) { # Version info + print "\nThis is Markdown, version $VERSION.\n"; + print "Copyright 2004 John Gruber\n"; + print "http://daringfireball.net/projects/markdown/\n\n"; + exit 0; + } + if ($cli_opts{'shortversion'}) { # Just the version number string. + print $VERSION; + exit 0; + } + if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML + $g_empty_element_suffix = ">"; + } + + + #### Process incoming text: ########################### + my $text; + { + local $/; # Slurp the whole file + $text = <>; + } + print Markdown($text); + } +} + + + +sub Markdown { +# +# Main function. The order in which other subs are called here is +# essential. Link and image substitutions need to happen before +# _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the +# and tags get encoded. +# + my $text = shift; + + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + %g_urls = (); + %g_titles = (); + %g_html_blocks = (); + + + # Standardize line endings: + $text =~ s{\r\n}{\n}g; # DOS to Unix + $text =~ s{\r}{\n}g; # Mac to Unix + + # Make sure $text ends with a couple of newlines: + $text .= "\n\n"; + + # Convert all tabs to spaces. + $text = _Detab($text); + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + $text =~ s/^[ \t]+$//mg; + + # Turn block-level HTML blocks into hash entries + $text = _HashHTMLBlocks($text); + + # Strip link definitions, store in hashes. + $text = _StripLinkDefinitions($text); + + $text = _RunBlockGamut($text); + + $text = _UnescapeSpecialChars($text); + + return $text . "\n"; +} + + +sub _StripLinkDefinitions { +# +# Strips link definitions from text, stores the URLs and titles in +# hash references. +# + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Link defs are in the form: ^[id]: url "optional title" + while ($text =~ s{ + ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + ? # url = $2 + [ \t]* + \n? # maybe one newline + [ \t]* + (?: + (?<=\s) # lookbehind for whitespace + ["(] + (.+?) # title = $3 + [")] + [ \t]* + )? # title is optional + (?:\n+|\Z) + } + {}mx) { + $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive + if ($3) { + $g_titles{lc $1} = $3; + $g_titles{lc $1} =~ s/"/"/g; + } + } + + return $text; +} + + +sub _HashHTMLBlocks { + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Hashify HTML blocks: + # We only want to do this for block-level HTML tags, such as headers, + # lists, and tables. That's because we still want to wrap

    s around + # "paragraphs" that are wrapped in non-block-level tags, such as anchors, + # phrase emphasis, and spans. The list of tags we're looking for is + # hard-coded: + my $block_tags = qr{ + (?: + p | div | h[1-6] | blockquote | pre | table | + dl | ol | ul | script | noscript | form | + fieldset | iframe | math | ins | del + ) + }x; + + my $tag_attrs = qr{ + (?: # Match one attr name/value pair + \s+ # There needs to be at least some whitespace + # before each attribute name. + [\w.:_-]+ # Attribute name + \s*=\s* + (["']) # Attribute quoter + .+? # Attribute value + \1 # Closing quoter + )* # Zero or more + }x; + + my $empty_tag = qr{< \w+ $tag_attrs \s* />}xms; + my $open_tag = qr{< $block_tags $tag_attrs \s* >}xms; + my $close_tag = undef; # let Text::Balanced handle this + + use Text::Balanced qw(gen_extract_tagged); + my $extract_block = gen_extract_tagged($open_tag, $close_tag, undef, { ignore => [$empty_tag] }); + + my @chunks; + ## TO-DO: the 0,3 on the next line ought to respect the + ## tabwidth, or else, we should mandate 4-space tabwidth and + ## be done with it: + while ($text =~ s{^(([ ]{0,3}<)?.*\n)}{}m) { + my $cur_line = $1; + if (defined $2) { + # current line could be start of code block + + my ($tag, $remainder) = $extract_block->($cur_line . $text); + if ($tag) { + my $key = md5_hex($tag); + $g_html_blocks{$key} = $tag; + push @chunks, "\n\n" . $key . "\n\n"; + $text = $remainder; + } + else { + # No tag match, so toss $cur_line into @chunks + push @chunks, $cur_line; + } + } + else { + # current line could NOT be start of code block + push @chunks, $cur_line; + } + + } + push @chunks, $text; # Whatever is left. + + $text = join '', @chunks; + + + + # Special case just for


    . It was easier to make a special case than + # to make the other regex more complicated. + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + <(hr) # start tag = $2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + # Special case for standalone HTML comments: + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + (?s: + + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + # PHP and ASP-style processor instructions ( and <%…%>) + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + (?s: + <([?%]) # $2 + .*? + \2> + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + + return $text; +} + + +sub _RunBlockGamut { +# +# These are all the transformations that form block-level +# tags like paragraphs, headers, and list items. +# + my $text = shift; + + $text = _DoHeaders($text); + + # Do Horizontal Rules: + $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n tags around block-level tags. + $text = _HashHTMLBlocks($text); + $text = _FormParagraphs($text); + + return $text; +} + + +sub _RunSpanGamut { +# +# These are all the transformations that occur *within* block-level +# tags like paragraphs, headers, and list items. +# + my $text = shift; + + $text = _DoCodeSpans($text); + $text = _EscapeSpecialCharsWithinTagAttributes($text); + $text = _EncodeBackslashEscapes($text); + + # Process anchor and image tags. Images must come first, + # because ![foo][f] looks like an anchor. + $text = _DoImages($text); + $text = _DoAnchors($text); + + # Make links out of things like `` + # Must come after _DoAnchors(), because you can use < and > + # delimiters in inline links like [this](). + $text = _DoAutoLinks($text); + $text = _EncodeAmpsAndAngles($text); + $text = _DoItalicsAndBold($text); + + # Do hard breaks: + $text =~ s/ {2,}\n/ -- encode [\ ` * _] so they +# don't conflict with their use in Markdown for code, italics and strong. +# We're replacing each such character with its corresponding MD5 checksum +# value; this is likely overkill, but it should prevent us from colliding +# with the escape values by accident. +# + my $text = shift; + my $tokens ||= _TokenizeHTML($text); + $text = ''; # rebuild $text from the tokens + + foreach my $cur_token (@$tokens) { + if ($cur_token->[0] eq "tag") { + $cur_token->[1] =~ s! \\ !$g_escape_table{'\\'}!gx; + $cur_token->[1] =~ s{ (?<=.)(?=.) }{$g_escape_table{'`'}}gx; + $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx; + $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx; + } + $text .= $cur_token->[1]; + } + return $text; +} + + +sub _DoAnchors { +# +# Turn Markdown link shortcuts into XHTML tags. +# + my $text = shift; + + # + # First, handle reference-style links: [link text] [id] + # + $text =~ s{ + ( # wrap whole match in $1 + \[ + ($g_nested_brackets) # link text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + ) + }{ + my $result; + my $whole_match = $1; + my $link_text = $2; + my $link_id = lc $3; + + if ($link_id eq "") { + $link_id = lc $link_text; # for shortcut links like [this][]. + } + + if (defined $g_urls{$link_id}) { + my $url = $g_urls{$link_id}; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "? # href = $3 + [ \t]* + ( # $4 + (['"]) # quote char = $5 + (.*?) # Title = $6 + \5 # matching quote + [ \t]* # ignore any spaces/tabs between closing quote and ) + )? # title is optional + \) + ) + }{ + my $result; + my $whole_match = $1; + my $link_text = $2; + my $url = $3; + my $title = $6; + + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = " tags. +# + my $text = shift; + + # + # First, handle reference-style labeled images: ![alt text][id] + # + $text =~ s{ + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + my $link_id = lc $3; + + if ($link_id eq "") { + $link_id = lc $alt_text; # for shortcut links like ![this][]. + } + + $alt_text =~ s/"/"/g; + if (defined $g_urls{$link_id}) { + my $url = $g_urls{$link_id}; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "\"$alt_text\"";? # src url = $3 + [ \t]* + ( # $4 + (['"]) # quote char = $5 + (.*?) # title = $6 + \5 # matching quote + [ \t]* + )? # title is optional + \) + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + my $url = $3; + my $title = ''; + if (defined($6)) { + $title = $6; + } + + $alt_text =~ s/"/"/g; + $title =~ s/"/"/g; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "\"$alt_text\"";" . _RunSpanGamut($1) . "\n\n"; + }egmx; + + $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{ + "

    " . _RunSpanGamut($1) . "

    \n\n"; + }egmx; + + + # atx-style headers: + # # Header 1 + # ## Header 2 + # ## Header 2 with closing hashes ## + # ... + # ###### Header 6 + # + $text =~ s{ + ^(\#{1,6}) # $1 = string of #'s + [ \t]* + (.+?) # $2 = Header text + [ \t]* + \#* # optional closing #'s (not counted) + \n+ + }{ + my $h_level = length($1); + "" . _RunSpanGamut($2) . "\n\n"; + }egmx; + + return $text; +} + + +sub _DoLists { +# +# Form HTML ordered (numbered) and unordered (bulleted) lists. +# + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Re-usable patterns to match list item bullets and number markers: + my $marker_ul = qr/[*+-]/; + my $marker_ol = qr/\d+[.]/; + my $marker_any = qr/(?:$marker_ul|$marker_ol)/; + + # Re-usable pattern to match any entirel ul or ol list: + my $whole_list = qr{ + ( # $1 = whole list + ( # $2 + [ ]{0,$less_than_tab} + (${marker_any}) # $3 = first list item marker + [ \t]+ + ) + (?s:.+?) + ( # $4 + \z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + ${marker_any}[ \t]+ + ) + ) + ) + }mx; + + # We use a different prefix before nested lists than top-level lists. + # See extended comment in _ProcessListItems(). + # + # Note: There's a bit of duplication here. My original implementation + # created a scalar regex pattern as the conditional result of the test on + # $g_list_level, and then only ran the $text =~ s{...}{...}egmx + # substitution once, using the scalar as the pattern. This worked, + # everywhere except when running under MT on my hosting account at Pair + # Networks. There, this caused all rebuilds to be killed by the reaper (or + # perhaps they crashed, but that seems incredibly unlikely given that the + # same script on the same server ran fine *except* under MT. I've spent + # more time trying to figure out why this is happening than I'd like to + # admit. My only guess, backed up by the fact that this workaround works, + # is that Perl optimizes the substition when it can figure out that the + # pattern will never change, and when this optimization isn't on, we run + # afoul of the reaper. Thus, the slightly redundant code that uses two + # static s/// patterns rather than one conditional pattern. + + if ($g_list_level) { + $text =~ s{ + ^ + $whole_list + }{ + my $list = $1; + my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; + + # Turn double returns into triple returns, so that we can make a + # paragraph for the last item in a list, if necessary: + $list =~ s/\n{2,}/\n\n\n/g; + my $result = _ProcessListItems($list, $marker_any); + + # Trim any trailing whitespace, to put the closing `` + # up on the preceding line, to get it past the current stupid + # HTML block parser. This is a hack to work around the terrible + # hack that is the HTML block parser. + $result =~ s{\s+$}{}; + $result = "<$list_type>" . $result . "\n"; + $result; + }egmx; + } + else { + $text =~ s{ + (?:(?<=\n\n)|\A\n?) + $whole_list + }{ + my $list = $1; + my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; + # Turn double returns into triple returns, so that we can make a + # paragraph for the last item in a list, if necessary: + $list =~ s/\n{2,}/\n\n\n/g; + my $result = _ProcessListItems($list, $marker_any); + $result = "<$list_type>\n" . $result . "\n"; + $result; + }egmx; + } + + + return $text; +} + + +sub _ProcessListItems { +# +# Process the contents of a single ordered or unordered list, splitting it +# into individual list items. +# + + my $list_str = shift; + my $marker_any = shift; + + + # The $g_list_level global keeps track of when we're inside a list. + # Each time we enter a list, we increment it; when we leave a list, + # we decrement. If it's zero, we're not in a list anymore. + # + # We do this because when we're not inside a list, we want to treat + # something like this: + # + # I recommend upgrading to version + # 8. Oops, now this line is treated + # as a sub-list. + # + # As a single paragraph, despite the fact that the second line starts + # with a digit-period-space sequence. + # + # Whereas when we're inside a list (or sub-list), that line will be + # treated as the start of a sub-list. What a kludge, huh? This is + # an aspect of Markdown's syntax that's hard to parse perfectly + # without resorting to mind-reading. Perhaps the solution is to + # change the syntax rules such that sub-lists must start with a + # starting cardinal number; e.g. "1." or "a.". + + $g_list_level++; + + # trim trailing blank lines: + $list_str =~ s/\n{2,}\z/\n/; + + + $list_str =~ s{ + (\n)? # leading line = $1 + (^[ \t]*) # leading whitespace = $2 + ($marker_any) [ \t]+ # list marker = $3 + ((?s:.+?) # list item text = $4 + (\n{1,2})) + (?= \n* (\z | \2 ($marker_any) [ \t]+)) + }{ + my $item = $4; + my $leading_line = $1; + my $leading_space = $2; + + if ($leading_line or ($item =~ m/\n{2,}/)) { + $item = _RunBlockGamut(_Outdent($item)); + } + else { + # Recursion for sub-lists: + $item = _DoLists(_Outdent($item)); + chomp $item; + $item = _RunSpanGamut($item); + } + + "
  • " . $item . "
  • \n"; + }egmx; + + $g_list_level--; + return $list_str; +} + + + +sub _DoCodeBlocks { +# +# Process Markdown `
    ` blocks.
    +#	
    +
    +	my $text = shift;
    +
    +	$text =~ s{
    +			(?:\n\n|\A)
    +			(	            # $1 = the code block -- one or more lines, starting with a space/tab
    +			  (?:
    +			    (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
    +			    .*\n+
    +			  )+
    +			)
    +			((?=^[ ]{0,$g_tab_width}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
    +		}{
    +			my $codeblock = $1;
    +			my $result; # return value
    +
    +			$codeblock = _EncodeCode(_Outdent($codeblock));
    +			$codeblock = _Detab($codeblock);
    +			$codeblock =~ s/\A\n+//; # trim leading newlines
    +			$codeblock =~ s/\n+\z//; # trim trailing newlines
    +
    +			$result = "\n\n
    " . $codeblock . "\n
    \n\n"; + + $result; + }egmx; + + return $text; +} + + +sub _DoCodeSpans { +# +# * Backtick quotes are used for spans. +# +# * You can use multiple backticks as the delimiters if you want to +# include literal backticks in the code span. So, this input: +# +# Just type ``foo `bar` baz`` at the prompt. +# +# Will translate to: +# +#

    Just type foo `bar` baz at the prompt.

    +# +# There's no arbitrary limit to the number of backticks you +# can use as delimters. If you need three consecutive backticks +# in your code, use four for delimiters, etc. +# +# * You can use spaces to get literal backticks at the edges: +# +# ... type `` `bar` `` ... +# +# Turns to: +# +# ... type `bar` ... +# + + my $text = shift; + + $text =~ s@ + (?$c
    "; + @egsx; + + return $text; +} + + +sub _EncodeCode { +# +# Encode/escape certain characters inside Markdown code runs. +# The point is that in code, these characters are literals, +# and lose their special Markdown meanings. +# + local $_ = shift; + + # Encode all ampersands; HTML entities are not + # entities within a Markdown code span. + s/&/&/g; + + # Encode $'s, but only if we're running under Blosxom. + # (Blosxom interpolates Perl variables in article bodies.) + { + no warnings 'once'; + if (defined($blosxom::version)) { + s/\$/$/g; + } + } + + + # Do the angle bracket song and dance: + s! < !<!gx; + s! > !>!gx; + + # Now, escape characters that are magic in Markdown: + s! \* !$g_escape_table{'*'}!gx; + s! _ !$g_escape_table{'_'}!gx; + s! { !$g_escape_table{'{'}!gx; + s! } !$g_escape_table{'}'}!gx; + s! \[ !$g_escape_table{'['}!gx; + s! \] !$g_escape_table{']'}!gx; + s! \\ !$g_escape_table{'\\'}!gx; + + return $_; +} + + +sub _DoItalicsAndBold { + my $text = shift; + + # must go first: + $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 } + {$2}gsx; + + $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 } + {$2}gsx; + + return $text; +} + + +sub _DoBlockQuotes { + my $text = shift; + + $text =~ s{ + ( # Wrap whole match in $1 + ( + ^[ \t]*>[ \t]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + ) + }{ + my $bq = $1; + $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting + $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines + $bq = _RunBlockGamut($bq); # recurse + + $bq =~ s/^/ /g; + # These leading spaces screw with
     content, so we need to fix that:
    +			$bq =~ s{
    +					(\s*
    .+?
    ) + }{ + my $pre = $1; + $pre =~ s/^ //mg; + $pre; + }egsx; + + "
    \n$bq\n
    \n\n"; + }egmx; + + + return $text; +} + + +sub _FormParagraphs { +# +# Params: +# $text - string to process with html

    tags +# + my $text = shift; + + # Strip leading and trailing lines: + $text =~ s/\A\n+//; + $text =~ s/\n+\z//; + + my @grafs = split(/\n{2,}/, $text); + + # + # Wrap

    tags. + # + foreach (@grafs) { + unless (defined( $g_html_blocks{$_} )) { + $_ = _RunSpanGamut($_); + s/^([ \t]*)/

    /; + $_ .= "

    "; + } + } + + # + # Unhashify HTML blocks + # +# foreach my $graf (@grafs) { +# my $block = $g_html_blocks{$graf}; +# if (defined $block) { +# $graf = $block; +# } +# } + + foreach my $graf (@grafs) { + # Modify elements of @grafs in-place... + my $block = $g_html_blocks{$graf}; + if (defined $block) { + $graf = $block; + if ($block =~ m{ + \A + ( # $1 =
    tag +
    ]* + \b + markdown\s*=\s* (['"]) # $2 = attr quote char + 1 + \2 + [^>]* + > + ) + ( # $3 = contents + .* + ) + (
    ) # $4 = closing tag + \z + + }xms + ) { + my ($div_open, $div_content, $div_close) = ($1, $3, $4); + + # We can't call Markdown(), because that resets the hash; + # that initialization code should be pulled into its own sub, though. + $div_content = _HashHTMLBlocks($div_content); + $div_content = _StripLinkDefinitions($div_content); + $div_content = _RunBlockGamut($div_content); + $div_content = _UnescapeSpecialChars($div_content); + + $div_open =~ s{\smarkdown\s*=\s*(['"]).+?\1}{}ms; + + $graf = $div_open . "\n" . $div_content . "\n" . $div_close; + } + } + } + + + return join "\n\n", @grafs; +} + + +sub _EncodeAmpsAndAngles { +# Smart processing for ampersands and angle brackets that need to be encoded. + + my $text = shift; + + # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + # http://bumppo.net/projects/amputator/ + $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; + + # Encode naked <'s + $text =~ s{<(?![a-z/?\$!])}{<}gi; + + return $text; +} + + +sub _EncodeBackslashEscapes { +# +# Parameter: String. +# Returns: The string, with after processing the following backslash +# escape sequences. +# + local $_ = shift; + + s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first. + s! \\` !$g_escape_table{'`'}!gx; + s! \\\* !$g_escape_table{'*'}!gx; + s! \\_ !$g_escape_table{'_'}!gx; + s! \\\{ !$g_escape_table{'{'}!gx; + s! \\\} !$g_escape_table{'}'}!gx; + s! \\\[ !$g_escape_table{'['}!gx; + s! \\\] !$g_escape_table{']'}!gx; + s! \\\( !$g_escape_table{'('}!gx; + s! \\\) !$g_escape_table{')'}!gx; + s! \\> !$g_escape_table{'>'}!gx; + s! \\\# !$g_escape_table{'#'}!gx; + s! \\\+ !$g_escape_table{'+'}!gx; + s! \\\- !$g_escape_table{'-'}!gx; + s! \\\. !$g_escape_table{'.'}!gx; + s{ \\! }{$g_escape_table{'!'}}gx; + + return $_; +} + + +sub _DoAutoLinks { + my $text = shift; + + $text =~ s{<((https?|ftp|dict):[^'">\s]+)>}{
    $1}gi; + + # Email addresses: + $text =~ s{ + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + > + }{ + _EncodeEmailAddress( _UnescapeSpecialChars($1) ); + }egix; + + return $text; +} + + +sub _EncodeEmailAddress { +# +# Input: an email address, e.g. "foo@example.com" +# +# Output: the email address as a mailto link, with each character +# of the address encoded as either a decimal or hex entity, in +# the hopes of foiling most address harvesting spam bots. E.g.: +# +# foo +# @example.com +# +# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk +# mailing list: +# + + my $addr = shift; + + srand; + my @encode = ( + sub { '&#' . ord(shift) . ';' }, + sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, + sub { shift }, + ); + + $addr = "mailto:" . $addr; + + $addr =~ s{(.)}{ + my $char = $1; + if ( $char eq '@' ) { + # this *must* be encoded. I insist. + $char = $encode[int rand 1]->($char); + } elsif ( $char ne ':' ) { + # leave ':' alone (to spot mailto: later) + my $r = rand; + # roughly 10% raw, 45% hex, 45% dec + $char = ( + $r > .9 ? $encode[2]->($char) : + $r < .45 ? $encode[1]->($char) : + $encode[0]->($char) + ); + } + $char; + }gex; + + $addr = qq{$addr}; + $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part + + return $addr; +} + + +sub _UnescapeSpecialChars { +# +# Swap back in all the special characters we've hidden. +# + my $text = shift; + + while( my($char, $hash) = each(%g_escape_table) ) { + $text =~ s/$hash/$char/g; + } + return $text; +} + + +sub _TokenizeHTML { +# +# Parameter: String containing HTML markup. +# Returns: Reference to an array of the tokens comprising the input +# string. Each token is either a tag (possibly with nested, +# tags contained therein, such as , or a +# run of text between tags. Each element of the array is a +# two-element array; the first is either 'tag' or 'text'; +# the second is the actual value. +# +# +# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. +# +# + + my $str = shift; + my $pos = 0; + my $len = length $str; + my @tokens; + + my $depth = 6; + my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); + my $match = qr/(?s: ) | # comment + (?s: <\? .*? \?> ) | # processing instruction + $nested_tags/ix; # nested tags + + while ($str =~ m/($match)/g) { + my $whole_tag = $1; + my $sec_start = pos $str; + my $tag_start = $sec_start - length $whole_tag; + if ($pos < $tag_start) { + push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; + } + push @tokens, ['tag', $whole_tag]; + $pos = pos $str; + } + push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; + + return \@tokens; +} + + +sub _Outdent { +# +# Remove one level of line-leading tabs or spaces +# + my $text = shift; + + $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; + return $text; +} + + +sub _Detab { +# +# Cribbed from a post by Bart Lateur: +# +# + my $text = shift; + + $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; + return $text; +} + + +1; + +__END__ + + +=pod + +=head1 NAME + +B + + +=head1 SYNOPSIS + +B [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ] + [ I ... ] + + +=head1 DESCRIPTION + +Markdown is a text-to-HTML filter; it translates an easy-to-read / +easy-to-write structured text format into HTML. Markdown's text format +is most similar to that of plain text email, and supports features such +as headers, *emphasis*, code blocks, blockquotes, and links. + +Markdown's syntax is designed not as a generic markup language, but +specifically to serve as a front-end to (X)HTML. You can use span-level +HTML tags anywhere in a Markdown document, and you can use block level +HTML tags (like
    and
    as well). + +For more information about Markdown's syntax, see: + + http://daringfireball.net/projects/markdown/ + + +=head1 OPTIONS + +Use "--" to end switch parsing. For example, to open a file named "-z", use: + + Markdown.pl -- -z + +=over 4 + + +=item B<--html4tags> + +Use HTML 4 style for empty element tags, e.g.: + +
    + +instead of Markdown's default XHTML style tags, e.g.: + +
    + + +=item B<-v>, B<--version> + +Display Markdown's version number and copyright information. + + +=item B<-s>, B<--shortversion> + +Display the short-form version number. + + +=back + + + +=head1 BUGS + +To file bug reports or feature requests (other than topics listed in the +Caveats section above) please send email to: + + support@daringfireball.net + +Please include with your report: (1) the example input; (2) the output +you expected; (3) the output Markdown actually produced. + + +=head1 VERSION HISTORY + +See the readme file for detailed release notes for this version. + +1.0.2b7 + + + Changed shebang line from "/usr/bin/perl" to "/usr/bin/env perl" + + + Now only trim trailing newlines from code blocks, instead of trimming + all trailing whitespace characters. + + +1.0.2b6 - Mon 03 Apr 2006 + + + Fixed bad performance bug in new `Text::Balanced`-based block-level parser. + + +1.0.2b5 - Thu 08 Dec 2005 + + + Fixed bug where this: + + [text](http://m.com "title" ) + + wasn't working as expected, because the parser wasn't allowing for spaces + before the closing paren. + + +1.0.2b4 - Thu 08 Sep 2005 + + + Filthy hack to support markdown='1' in div tags, because I need it + to write today's fireball. + + + First crack at a new, smarter, block-level HTML parser. + +1.0.2b3 - Thu 28 Apr 2005 + + + _DoAutoLinks() now supports the 'dict://' URL scheme. + + + PHP- and ASP-style processor instructions are now protected as + raw HTML blocks. + + + <% ... %> + + + Workarounds for regressions introduced with fix for "backticks within + tags" bug in 1.0.2b1. The fix is to allow `...` to be turned into + ... within an HTML tag attribute, and then to turn + these spurious `` tags back into literal backtick characters + in _EscapeSpecialCharsWithinTagAttributes(). + + The regression was caused because in the fix, we moved + _EscapeSpecialCharsWithinTagAttributes() ahead of _DoCodeSpans() + in _RunSpanGamut(), but that's no good. We need to process code + spans first, otherwise we can get tripped up by something like this: + + `` + + +1.0.2b2 - 20 Mar 2005 + + + Fix for nested sub-lists in list-paragraph mode. Previously we got + a spurious extra level of `

    ` tags for something like this: + + * this + + * sub + + that + + + Experimental support for [this] as a synonym for [this][]. + (Note to self: No test yet for this.) + Be sure to test, e.g.: [permutations of this sort of [thing][].] + + +1.0.2b1 - 28 Feb 2005 + + + Fix for backticks within HTML tag: like this + + + Fix for escaped backticks still triggering code spans: + + There are two raw backticks here: \` and here: \`, not a code span + +1.0.1 - 14 Dec 2004 + +1.0 - 28 Aug 2004 + + +=head1 AUTHOR + + John Gruber + http://daringfireball.net + + PHP port and other contributions by Michel Fortin + http://michelf.com + + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2003-2005 John Gruber + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name "Markdown" nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as +is" and any express or implied warranties, including, but not limited +to, the implied warranties of merchantability and fitness for a +particular purpose are disclaimed. In no event shall the copyright owner +or contributors be liable for any direct, indirect, incidental, special, +exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or +profits; or business interruption) however caused and on any theory of +liability, whether in contract, strict liability, or tort (including +negligence or otherwise) arising in any way out of the use of this +software, even if advised of the possibility of such damage. + +=cut diff --git a/perlMarkdown/readme.txt b/perlMarkdown/readme.txt new file mode 100644 index 0000000..9e73e56 --- /dev/null +++ b/perlMarkdown/readme.txt @@ -0,0 +1,13 @@ +Reference Implementation +------------------------ + +This directory contains John Gruber's original Perl implementation of Markdown. Smart diff programs like Araxis Merge will be able to match up this file with markdown.pl. + +A little tweaking helps. In markdown.pl: + + - replace `#` with `//` + - replace `$text` with `text` + +Be sure to ignore whitespace and line endings. + +Note: This release of Showdown is based on `markdown1.0.2b7.pl`, but uses the HTML parser from `markdown1.0.2b2.pl`. diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..4b0d961 --- /dev/null +++ b/readme.txt @@ -0,0 +1,156 @@ + +Showdown -- A JavaScript port of Markdown +========================================= + +Showdown Copyright (c) 2007 John Fraser. + + +Original Markdown Copyright (c) 2004-2005 John Gruber + + +Redistributable under a BSD-style open source license. +See license.txt for more information. + + +What's it for? +-------------- + +Developers can use Showdown to: + + * Add in-browser preview to existing Markdown apps + + Showdown's output is (almost always) identical to + markdown.pl's, so the server can reproduce exactly + the output that the user saw. (See below for + exceptions.) + + * Add Markdown input to programs that don't support it + + Any app that accepts HTML input can now be made to speak + Markdown by modifying the input pages's HTML. If your + application lets users edit documents again later, + then they won't have access to the original Markdown + text. But this should be good enough for many + uses -- and you can do it with just a two-line + `onsubmit` function! + + * Add Markdown input to closed-source web apps + + You can write bookmarklets or userscripts to extend + any standard textarea on the web so that it accepts + Markdown instead of HTML. With a little more hacking, + the same can probably be done with many rich edit + controls. + + * Build new web apps from scratch + + A Showdown front-end can send back text in Markdown, + HTML or both, so you can trade bandwidth for server + load to reduce your cost of operation. If your app + requires JavaScript, you won't need to do any + Markdown processing on the server at all. (For most + uses, you'll still need to sanitize the HTML before + showing it to other users -- but you'd need to do + that anyway if you're allowing raw HTML in your + Markdown.) + + +Browser Compatibility +--------------------- + +Showdown has been tested successfully with: + + - Firefox 1.5 and 2.0 + - Internet Explorer 6 and 7 + - Safari 2.0.4 + - Opera 8.54 and 9.10 + - Netscape 8.1.2 + - Konqueror 3.5.4 + +In theory, Showdown will work in any browser that supports ECMA 262 3rd Edition (JavaScript 1.5). The converter itself might even work in things that aren't web browsers, like Acrobat. No promises. + + +Known Differences in Output +--------------------------- + +In most cases, Showdown's output is identical to that of Perl Markdown v1.0.2b7. What follows is a list of all known deviations. Please email me if you find more. + + + * This release uses the HTML parser from Markdown 1.0.2b2, + which means it fails `Inline HTML (Advanced).text` from + the Markdown test suite: + +

    +
    + unindented == broken +
    +
    + + + * Showdown doesn't support the markdown="1" attribute: + +
    + Markdown does *not* work in here. +
    + + This is half laziness on my part and half stubbornness. + Markdown is smart enough to process the contents of span- + level tags without screwing things up; shouldn't it be + able to do the same inside block elements? Let's find a + way to make markdown="1" the default. + + + * You can only nest square brackets in link titles to a + depth of two levels: + + [[fine]](http://www.attacklab.net/) + [[[broken]]](http://www.attacklab.net/) + + If you need more, you can escape them with backslashes. + + + * When sublists have paragraphs, Showdown produces equivalent + HTML with a slightly different arrangement of newlines: + + + item + + - subitem + + The HTML has a superfluous newline before this + paragraph. + + - subitem + + The HTML here is unchanged. + + - subitem + + The HTML is missing a newline after this + list subitem. + + + + * Markdown.pl creates empty title attributes for + inline-style images: + + Here's an empty title on an inline-style + ![image](http://w3.org/Icons/valid-xhtml10). + + I tried to replicate this to clean up my diffs during + testing, but I went too far: now Showdown also makes + empty titles for reference-style images: + + Showdown makes an empty title for + reference-style ![images][] too. + + [images]: http://w3.org/Icons/valid-xhtml10 + + + * With crazy input, Markdown will mistakenly put + `` or `` tags in URLs: + +
    + improbable URL + + + Showdown won't. But still, don't do that. diff --git a/src/showdown.js b/src/showdown.js new file mode 100644 index 0000000..a960309 --- /dev/null +++ b/src/showdown.js @@ -0,0 +1,1296 @@ +// +// showdown.js -- A javascript port of Markdown. +// +// Copyright (c) 2007 John Fraser. +// +// Original Markdown Copyright (c) 2004-2005 John Gruber +// +// +// Redistributable under a BSD-style open source license. +// See license.txt for more information. +// +// The full source distribution is at: +// +// A A L +// T C A +// T K B +// +// +// + +// +// Wherever possible, Showdown is a straight, line-by-line port +// of the Perl version of Markdown. +// +// This is not a normal parser design; it's basically just a +// series of string substitutions. It's hard to read and +// maintain this way, but keeping Showdown close to the original +// design makes it easier to port new features. +// +// More importantly, Showdown behaves like markdown.pl in most +// edge cases. So web applications can do client-side preview +// in Javascript, and then build identical HTML on the server. +// +// This port needs the new RegExp functionality of ECMA 262, +// 3rd Edition (i.e. Javascript 1.5). Most modern web browsers +// should do fine. Even with the new regular expression features, +// We do a lot of work to emulate Perl's regex functionality. +// The tricky changes in this file mostly have the "attacklab:" +// label. Major or self-explanatory changes don't. +// +// Smart diff tools like Araxis Merge will be able to match up +// this file with markdown.pl in a useful way. A little tweaking +// helps: in a copy of markdown.pl, replace "#" with "//" and +// replace "$text" with "text". Be sure to ignore whitespace +// and line endings. +// + + +// +// Showdown usage: +// +// var text = "Markdown *rocks*."; +// +// var converter = new Showdown.converter(); +// var html = converter.makeHtml(text); +// +// alert(html); +// +// Note: move the sample code to the bottom of this +// file before uncommenting it. +// + + +// +// Showdown namespace +// +var Showdown = {}; + +// +// converter +// +// Wraps all "globals" so that the only thing +// exposed is makeHtml(). +// +Showdown.converter = function() { + +// +// Globals: +// + +// Global hashes, used by various utility routines +var g_urls; +var g_titles; +var g_html_blocks; + +// Used to track when we're inside an ordered or unordered list +// (see _ProcessListItems() for details): +var g_list_level = 0; + + +this.makeHtml = function(text) { +// +// Main function. The order in which other subs are called here is +// essential. Link and image substitutions need to happen before +// _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the +// and tags get encoded. +// + + // Clear the global hashes. If we don't clear these, you get conflicts + // from other articles when generating a page which contains more than + // one article (e.g. an index page that shows the N most recent + // articles): + g_urls = new Array(); + g_titles = new Array(); + g_html_blocks = new Array(); + + // attacklab: Replace ~ with ~T + // This lets us use tilde as an escape char to avoid md5 hashes + // The choice of character is arbitray; anything that isn't + // magic in Markdown will work. + text = text.replace(/~/g,"~T"); + + // attacklab: Replace $ with ~D + // RegExp interprets $ as a special character + // when it's in a replacement string + text = text.replace(/\$/g,"~D"); + + // Standardize line endings + text = text.replace(/\r\n/g,"\n"); // DOS to Unix + text = text.replace(/\r/g,"\n"); // Mac to Unix + + // Make sure text begins and ends with a couple of newlines: + text = "\n\n" + text + "\n\n"; + + // Convert all tabs to spaces. + text = _Detab(text); + + // Strip any lines consisting only of spaces and tabs. + // This makes subsequent regexen easier to write, because we can + // match consecutive blank lines with /\n+/ instead of something + // contorted like /[ \t]*\n+/ . + text = text.replace(/^[ \t]+$/mg,""); + + // Turn block-level HTML blocks into hash entries + text = _HashHTMLBlocks(text); + + // Strip link definitions, store in hashes. + text = _StripLinkDefinitions(text); + + text = _RunBlockGamut(text); + + text = _UnescapeSpecialChars(text); + + // attacklab: Restore dollar signs + text = text.replace(/~D/g,"$$"); + + // attacklab: Restore tildes + text = text.replace(/~T/g,"~"); + + return text; +} + + +var _StripLinkDefinitions = function(text) { +// +// Strips link definitions from text, stores the URLs and titles in +// hash references. +// + + // Link defs are in the form: ^[id]: url "optional title" + + /* + var text = text.replace(/ + ^[ ]{0,3}\[(.+)\]: // id = $1 attacklab: g_tab_width - 1 + [ \t]* + \n? // maybe *one* newline + [ \t]* + ? // url = $2 + [ \t]* + \n? // maybe one newline + [ \t]* + (?: + (\n*) // any lines skipped = $3 attacklab: lookbehind removed + ["(] + (.+?) // title = $4 + [")] + [ \t]* + )? // title is optional + (?:\n+|$) + /gm, + function(){...}); + */ + var text = text.replace(/^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*?[ \t]*\n?[ \t]*(?:(\n*)["(](.+?)[")][ \t]*)?(?:\n+|\Z)/gm, + function (wholeMatch,m1,m2,m3,m4) { + m1 = m1.toLowerCase(); + g_urls[m1] = _EncodeAmpsAndAngles(m2); // Link IDs are case-insensitive + if (m3) { + // Oops, found blank lines, so it's not a title. + // Put back the parenthetical statement we stole. + return m3+m4; + } else if (m4) { + g_titles[m1] = m4.replace(/"/g,"""); + } + + // Completely remove the definition from the text + return ""; + } + ); + + return text; +} + + +var _HashHTMLBlocks = function(text) { + // attacklab: Double up blank lines to reduce lookaround + text = text.replace(/\n/g,"\n\n"); + + // Hashify HTML blocks: + // We only want to do this for block-level HTML tags, such as headers, + // lists, and tables. That's because we still want to wrap

    s around + // "paragraphs" that are wrapped in non-block-level tags, such as anchors, + // phrase emphasis, and spans. The list of tags we're looking for is + // hard-coded: + var block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del" + var block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math" + + // First, look for nested blocks, e.g.: + //

    + //
    + // tags for inner block must be indented. + //
    + //
    + // + // The outermost tags must start at the left margin for this to match, and + // the inner nested divs must be indented. + // We need to do this before the next, more liberal match, because the next + // match will start at the first `
    ` and stop at the first `
    `. + + // attacklab: This regex can be expensive when it fails. + /* + var text = text.replace(/ + ( // save in $1 + ^ // start of line (with /m) + <($block_tags_a) // start tag = $2 + \b // word break + // attacklab: hack around khtml/pcre bug... + [^\r]*?\n // any number of lines, minimally matching + // the matching end tag + [ \t]* // trailing spaces/tabs + (?=\n+) // followed by a newline + ) // attacklab: there are sentinel newlines at end of document + /gm,function(){...}}; + */ + text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,hashElement); + + // + // Now match more liberally, simply from `\n` to `\n` + // + + /* + var text = text.replace(/ + ( // save in $1 + ^ // start of line (with /m) + <($block_tags_b) // start tag = $2 + \b // word break + // attacklab: hack around khtml/pcre bug... + [^\r]*? // any number of lines, minimally matching + .* // the matching end tag + [ \t]* // trailing spaces/tabs + (?=\n+) // followed by a newline + ) // attacklab: there are sentinel newlines at end of document + /gm,function(){...}}; + */ + text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm,hashElement); + + // Special case just for
    . It was easier to make a special case than + // to make the other regex more complicated. + + /* + text = text.replace(/ + ( // save in $1 + \n\n // Starting after a blank line + [ ]{0,3} + (<(hr) // start tag = $2 + \b // word break + ([^<>])*? // + \/?>) // the matching end tag + [ \t]* + (?=\n{2,}) // followed by a blank line + ) + /g,hashElement); + */ + text = text.replace(/(\n[ ]{0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,hashElement); + + // Special case for standalone HTML comments: + + /* + text = text.replace(/ + ( // save in $1 + \n\n // Starting after a blank line + [ ]{0,3} // attacklab: g_tab_width - 1 + + [ \t]* + (?=\n{2,}) // followed by a blank line + ) + /g,hashElement); + */ + text = text.replace(/(\n\n[ ]{0,3}[ \t]*(?=\n{2,}))/g,hashElement); + + // PHP and ASP-style processor instructions ( and <%...%>) + + /* + text = text.replace(/ + (?: + \n\n // Starting after a blank line + ) + ( // save in $1 + [ ]{0,3} // attacklab: g_tab_width - 1 + (?: + <([?%]) // $2 + [^\r]*? + \2> + ) + [ \t]* + (?=\n{2,}) // followed by a blank line + ) + /g,hashElement); + */ + text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,hashElement); + + // attacklab: Undo double lines (see comment at top of this function) + text = text.replace(/\n\n/g,"\n"); + return text; +} + +var hashElement = function(wholeMatch,m1) { + var blockText = m1; + + // Undo double lines + blockText = blockText.replace(/\n\n/g,"\n"); + blockText = blockText.replace(/^\n/,""); + + // strip trailing blank lines + blockText = blockText.replace(/\n+$/g,""); + + // Replace the element text with a marker ("~KxK" where x is its key) + blockText = "\n\n~K" + (g_html_blocks.push(blockText)-1) + "K\n\n"; + + return blockText; +}; + +var _RunBlockGamut = function(text) { +// +// These are all the transformations that form block-level +// tags like paragraphs, headers, and list items. +// + text = _DoHeaders(text); + + // Do Horizontal Rules: + var key = hashBlock("
    "); + text = text.replace(/^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm,key); + text = text.replace(/^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$/gm,key); + text = text.replace(/^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$/gm,key); + + text = _DoLists(text); + text = _DoCodeBlocks(text); + text = _DoBlockQuotes(text); + + // We already ran _HashHTMLBlocks() before, in Markdown(), but that + // was to escape raw HTML in the original Markdown source. This time, + // we're escaping the markup we've just created, so that we don't wrap + //

    tags around block-level tags. + text = _HashHTMLBlocks(text); + text = _FormParagraphs(text); + + return text; +} + + +var _RunSpanGamut = function(text) { +// +// These are all the transformations that occur *within* block-level +// tags like paragraphs, headers, and list items. +// + + text = _DoCodeSpans(text); + text = _EscapeSpecialCharsWithinTagAttributes(text); + text = _EncodeBackslashEscapes(text); + + // Process anchor and image tags. Images must come first, + // because ![foo][f] looks like an anchor. + text = _DoImages(text); + text = _DoAnchors(text); + + // Make links out of things like `` + // Must come after _DoAnchors(), because you can use < and > + // delimiters in inline links like [this](). + text = _DoAutoLinks(text); + text = _EncodeAmpsAndAngles(text); + text = _DoItalicsAndBold(text); + + // Do hard breaks: + text = text.replace(/ +\n/g,"
    \n"); + + return text; +} + +var _EscapeSpecialCharsWithinTagAttributes = function(text) { +// +// Within tags -- meaning between < and > -- encode [\ ` * _] so they +// don't conflict with their use in Markdown for code, italics and strong. +// + + // Build a regex to find HTML tags and comments. See Friedl's + // "Mastering Regular Expressions", 2nd Ed., pp. 200-201. + var regex = /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|)/gi; + + text = text.replace(regex, function(wholeMatch) { + var tag = wholeMatch.replace(/(.)<\/?code>(?=.)/g,"$1`"); + tag = escapeCharacters(tag,"\\`*_"); + return tag; + }); + + return text; +} + +var _DoAnchors = function(text) { +// +// Turn Markdown link shortcuts into XHTML
    tags. +// + // + // First, handle reference-style links: [link text] [id] + // + + /* + text = text.replace(/ + ( // wrap whole match in $1 + \[ + ( + (?: + \[[^\]]*\] // allow brackets nested one level + | + [^\[] // or anything else + )* + ) + \] + + [ ]? // one optional space + (?:\n[ ]*)? // one optional newline followed by spaces + + \[ + (.*?) // id = $3 + \] + )()()()() // pad remaining backreferences + /g,_DoAnchors_callback); + */ + text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeAnchorTag); + + // + // Next, inline-style links: [link text](url "optional title") + // + + /* + text = text.replace(/ + ( // wrap whole match in $1 + \[ + ( + (?: + \[[^\]]*\] // allow brackets nested one level + | + [^\[\]] // or anything else + ) + ) + \] + \( // literal paren + [ \t]* + () // no id, so leave $3 empty + ? // href = $4 + [ \t]* + ( // $5 + (['"]) // quote char = $6 + (.*?) // Title = $7 + \6 // matching quote + [ \t]* // ignore any spaces/tabs between closing quote and ) + )? // title is optional + \) + ) + /g,writeAnchorTag); + */ + text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeAnchorTag); + + // + // Last, handle reference-style shortcuts: [link text] + // These must come last in case you've also got [link test][1] + // or [link test](/foo) + // + + /* + text = text.replace(/ + ( // wrap whole match in $1 + \[ + ([^\[\]]+) // link text = $2; can't contain '[' or ']' + \] + )()()()()() // pad rest of backreferences + /g, writeAnchorTag); + */ + text = text.replace(/(\[([^\[\]]+)\])()()()()()/g, writeAnchorTag); + + return text; +} + +var writeAnchorTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) { + if (m7 == undefined) m7 = ""; + var whole_match = m1; + var link_text = m2; + var link_id = m3.toLowerCase(); + var url = m4; + var title = m7; + + if (url == "") { + if (link_id == "") { + // lower-case and turn embedded newlines into spaces + link_id = link_text.toLowerCase().replace(/ ?\n/g," "); + } + url = "#"+link_id; + + if (g_urls[link_id] != undefined) { + url = g_urls[link_id]; + if (g_titles[link_id] != undefined) { + title = g_titles[link_id]; + } + } + else { + if (whole_match.search(/\(\s*\)$/m)>-1) { + // Special case for explicit empty url + url = ""; + } else { + return whole_match; + } + } + } + + url = escapeCharacters(url,"*_"); + var result = ""; + + return result; +} + + +var _DoImages = function(text) { +// +// Turn Markdown image shortcuts into tags. +// + + // + // First, handle reference-style labeled images: ![alt text][id] + // + + /* + text = text.replace(/ + ( // wrap whole match in $1 + !\[ + (.*?) // alt text = $2 + \] + + [ ]? // one optional space + (?:\n[ ]*)? // one optional newline followed by spaces + + \[ + (.*?) // id = $3 + \] + )()()()() // pad rest of backreferences + /g,writeImageTag); + */ + text = text.replace(/(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeImageTag); + + // + // Next, handle inline images: ![alt text](url "optional title") + // Don't forget: encode * and _ + + /* + text = text.replace(/ + ( // wrap whole match in $1 + !\[ + (.*?) // alt text = $2 + \] + \s? // One optional whitespace character + \( // literal paren + [ \t]* + () // no id, so leave $3 empty + ? // src url = $4 + [ \t]* + ( // $5 + (['"]) // quote char = $6 + (.*?) // title = $7 + \6 // matching quote + [ \t]* + )? // title is optional + \) + ) + /g,writeImageTag); + */ + text = text.replace(/(!\[(.*?)\]\s?\([ \t]*()?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeImageTag); + + return text; +} + +var writeImageTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) { + var whole_match = m1; + var alt_text = m2; + var link_id = m3.toLowerCase(); + var url = m4; + var title = m7; + + if (!title) title = ""; + + if (url == "") { + if (link_id == "") { + // lower-case and turn embedded newlines into spaces + link_id = alt_text.toLowerCase().replace(/ ?\n/g," "); + } + url = "#"+link_id; + + if (g_urls[link_id] != undefined) { + url = g_urls[link_id]; + if (g_titles[link_id] != undefined) { + title = g_titles[link_id]; + } + } + else { + return whole_match; + } + } + + alt_text = alt_text.replace(/"/g,"""); + url = escapeCharacters(url,"*_"); + var result = "\""" + _RunSpanGamut(m1) + "");}); + + text = text.replace(/^(.+)[ \t]*\n-+[ \t]*\n+/gm, + function(matchFound,m1){return hashBlock("

    " + _RunSpanGamut(m1) + "

    ");}); + + // atx-style headers: + // # Header 1 + // ## Header 2 + // ## Header 2 with closing hashes ## + // ... + // ###### Header 6 + // + + /* + text = text.replace(/ + ^(\#{1,6}) // $1 = string of #'s + [ \t]* + (.+?) // $2 = Header text + [ \t]* + \#* // optional closing #'s (not counted) + \n+ + /gm, function() {...}); + */ + + text = text.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm, + function(wholeMatch,m1,m2) { + var h_level = m1.length; + return hashBlock("" + _RunSpanGamut(m2) + ""); + }); + + return text; +} + +// This declaration keeps Dojo compressor from outputting garbage: +var _ProcessListItems; + +var _DoLists = function(text) { +// +// Form HTML ordered (numbered) and unordered (bulleted) lists. +// + + // attacklab: add sentinel to hack around khtml/safari bug: + // http://bugs.webkit.org/show_bug.cgi?id=11231 + text += "~0"; + + // Re-usable pattern to match any entirel ul or ol list: + + /* + var whole_list = / + ( // $1 = whole list + ( // $2 + [ ]{0,3} // attacklab: g_tab_width - 1 + ([*+-]|\d+[.]) // $3 = first list item marker + [ \t]+ + ) + [^\r]+? + ( // $4 + ~0 // sentinel for workaround; should be $ + | + \n{2,} + (?=\S) + (?! // Negative lookahead for another list item marker + [ \t]* + (?:[*+-]|\d+[.])[ \t]+ + ) + ) + )/g + */ + var whole_list = /^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm; + + if (g_list_level) { + text = text.replace(whole_list,function(wholeMatch,m1,m2) { + var list = m1; + var list_type = (m2.search(/[*+-]/g)>-1) ? "ul" : "ol"; + + // Turn double returns into triple returns, so that we can make a + // paragraph for the last item in a list, if necessary: + list = list.replace(/\n{2,}/g,"\n\n\n");; + var result = _ProcessListItems(list); + + // Trim any trailing whitespace, to put the closing `` + // up on the preceding line, to get it past the current stupid + // HTML block parser. This is a hack to work around the terrible + // hack that is the HTML block parser. + result = result.replace(/\s+$/,""); + result = "<"+list_type+">" + result + "\n"; + return result; + }); + } else { + whole_list = /(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g; + text = text.replace(whole_list,function(wholeMatch,m1,m2,m3) { + var runup = m1; + var list = m2; + + var list_type = (m3.search(/[*+-]/g)>-1) ? "ul" : "ol"; + // Turn double returns into triple returns, so that we can make a + // paragraph for the last item in a list, if necessary: + var list = list.replace(/\n{2,}/g,"\n\n\n");; + var result = _ProcessListItems(list); + result = runup + "<"+list_type+">\n" + result + "\n"; + return result; + }); + } + + // attacklab: strip sentinel + text = text.replace(/~0/,""); + + return text; +} + +_ProcessListItems = function(list_str) { +// +// Process the contents of a single ordered or unordered list, splitting it +// into individual list items. +// + // The $g_list_level global keeps track of when we're inside a list. + // Each time we enter a list, we increment it; when we leave a list, + // we decrement. If it's zero, we're not in a list anymore. + // + // We do this because when we're not inside a list, we want to treat + // something like this: + // + // I recommend upgrading to version + // 8. Oops, now this line is treated + // as a sub-list. + // + // As a single paragraph, despite the fact that the second line starts + // with a digit-period-space sequence. + // + // Whereas when we're inside a list (or sub-list), that line will be + // treated as the start of a sub-list. What a kludge, huh? This is + // an aspect of Markdown's syntax that's hard to parse perfectly + // without resorting to mind-reading. Perhaps the solution is to + // change the syntax rules such that sub-lists must start with a + // starting cardinal number; e.g. "1." or "a.". + + g_list_level++; + + // trim trailing blank lines: + list_str = list_str.replace(/\n{2,}$/,"\n"); + + // attacklab: add sentinel to emulate \z + list_str += "~0"; + + /* + list_str = list_str.replace(/ + (\n)? // leading line = $1 + (^[ \t]*) // leading whitespace = $2 + ([*+-]|\d+[.]) [ \t]+ // list marker = $3 + ([^\r]+? // list item text = $4 + (\n{1,2})) + (?= \n* (~0 | \2 ([*+-]|\d+[.]) [ \t]+)) + /gm, function(){...}); + */ + list_str = list_str.replace(/(\n)?(^[ \t]*)([*+-]|\d+[.])[ \t]+([^\r]+?(\n{1,2}))(?=\n*(~0|\2([*+-]|\d+[.])[ \t]+))/gm, + function(wholeMatch,m1,m2,m3,m4){ + var item = m4; + var leading_line = m1; + var leading_space = m2; + + if (leading_line || (item.search(/\n{2,}/)>-1)) { + item = _RunBlockGamut(_Outdent(item)); + } + else { + // Recursion for sub-lists: + item = _DoLists(_Outdent(item)); + item = item.replace(/\n$/,""); // chomp(item) + item = _RunSpanGamut(item); + } + + return "
  • " + item + "
  • \n"; + } + ); + + // attacklab: strip sentinel + list_str = list_str.replace(/~0/g,""); + + g_list_level--; + return list_str; +} + + +var _DoCodeBlocks = function(text) { +// +// Process Markdown `
    ` blocks.
    +//  
    +
    +	/*
    +		text = text.replace(text,
    +			/(?:\n\n|^)
    +			(								// $1 = the code block -- one or more lines, starting with a space/tab
    +				(?:
    +					(?:[ ]{4}|\t)			// Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
    +					.*\n+
    +				)+
    +			)
    +			(\n*[ ]{0,3}[^ \t\n]|(?=~0))	// attacklab: g_tab_width
    +		/g,function(){...});
    +	*/
    +
    +	// attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
    +	text += "~0";
    +	
    +	text = text.replace(/(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
    +		function(wholeMatch,m1,m2) {
    +			var codeblock = m1;
    +			var nextChar = m2;
    +		
    +			codeblock = _EncodeCode( _Outdent(codeblock));
    +			codeblock = _Detab(codeblock);
    +			codeblock = codeblock.replace(/^\n+/g,""); // trim leading newlines
    +			codeblock = codeblock.replace(/\n+$/g,""); // trim trailing whitespace
    +
    +			codeblock = "
    " + codeblock + "\n
    "; + + return hashBlock(codeblock) + nextChar; + } + ); + + // attacklab: strip sentinel + text = text.replace(/~0/,""); + + return text; +} + +var hashBlock = function(text) { + text = text.replace(/(^\n+|\n+$)/g,""); + return "\n\n~K" + (g_html_blocks.push(text)-1) + "K\n\n"; +} + + +var _DoCodeSpans = function(text) { +// +// * Backtick quotes are used for spans. +// +// * You can use multiple backticks as the delimiters if you want to +// include literal backticks in the code span. So, this input: +// +// Just type ``foo `bar` baz`` at the prompt. +// +// Will translate to: +// +//

    Just type foo `bar` baz at the prompt.

    +// +// There's no arbitrary limit to the number of backticks you +// can use as delimters. If you need three consecutive backticks +// in your code, use four for delimiters, etc. +// +// * You can use spaces to get literal backticks at the edges: +// +// ... type `` `bar` `` ... +// +// Turns to: +// +// ... type `bar` ... +// + + /* + text = text.replace(/ + (^|[^\\]) // Character before opening ` can't be a backslash + (`+) // $2 = Opening run of ` + ( // $3 = The code block + [^\r]*? + [^`] // attacklab: work around lack of lookbehind + ) + \2 // Matching closer + (?!`) + /gm, function(){...}); + */ + + text = text.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm, + function(wholeMatch,m1,m2,m3,m4) { + var c = m3; + c = c.replace(/^([ \t]*)/g,""); // leading whitespace + c = c.replace(/[ \t]*$/g,""); // trailing whitespace + c = _EncodeCode(c); + return m1+""+c+""; + }); + + return text; +} + + +var _EncodeCode = function(text) { +// +// Encode/escape certain characters inside Markdown code runs. +// The point is that in code, these characters are literals, +// and lose their special Markdown meanings. +// + // Encode all ampersands; HTML entities are not + // entities within a Markdown code span. + text = text.replace(/&/g,"&"); + + // Do the angle bracket song and dance: + text = text.replace(//g,">"); + + // Now, escape characters that are magic in Markdown: + text = escapeCharacters(text,"\*_{}[]\\",false); + +// jj the line above breaks this: +//--- + +//* Item + +// 1. Subitem + +// special char: * +//--- + + return text; +} + + +var _DoItalicsAndBold = function(text) { + + // must go first: + text = text.replace(/(\*\*|__)(?=\S)([^\r]*?\S[*_]*)\1/g, + "$2"); + + text = text.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g, + "$2"); + + return text; +} + + +var _DoBlockQuotes = function(text) { + + /* + text = text.replace(/ + ( // Wrap whole match in $1 + ( + ^[ \t]*>[ \t]? // '>' at the start of a line + .+\n // rest of the first line + (.+\n)* // subsequent consecutive lines + \n* // blanks + )+ + ) + /gm, function(){...}); + */ + + text = text.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm, + function(wholeMatch,m1) { + var bq = m1; + + // attacklab: hack around Konqueror 3.5.4 bug: + // "----------bug".replace(/^-/g,"") == "bug" + + bq = bq.replace(/^[ \t]*>[ \t]?/gm,"~0"); // trim one level of quoting + + // attacklab: clean up hack + bq = bq.replace(/~0/g,""); + + bq = bq.replace(/^[ \t]+$/gm,""); // trim whitespace-only lines + bq = _RunBlockGamut(bq); // recurse + + bq = bq.replace(/(^|\n)/g,"$1 "); + // These leading spaces screw with
     content, so we need to fix that:
    +			bq = bq.replace(
    +					/(\s*
    [^\r]+?<\/pre>)/gm,
    +				function(wholeMatch,m1) {
    +					var pre = m1;
    +					// attacklab: hack around Konqueror 3.5.4 bug:
    +					pre = pre.replace(/^  /mg,"~0");
    +					pre = pre.replace(/~0/g,"");
    +					return pre;
    +				});
    +			
    +			return hashBlock("
    \n" + bq + "\n
    "); + }); + return text; +} + + +var _FormParagraphs = function(text) { +// +// Params: +// $text - string to process with html

    tags +// + + // Strip leading and trailing lines: + text = text.replace(/^\n+/g,""); + text = text.replace(/\n+$/g,""); + + var grafs = text.split(/\n{2,}/g); + var grafsOut = new Array(); + + // + // Wrap

    tags. + // + var end = grafs.length; + for (var i=0; i= 0) { + grafsOut.push(str); + } + else if (str.search(/\S/) >= 0) { + str = _RunSpanGamut(str); + str = str.replace(/^([ \t]*)/g,"

    "); + str += "

    " + grafsOut.push(str); + } + + } + + // + // Unhashify HTML blocks + // + end = grafsOut.length; + for (var i=0; i= 0) { + var blockText = g_html_blocks[RegExp.$1]; + blockText = blockText.replace(/\$/g,"$$$$"); // Escape any dollar signs + grafsOut[i] = grafsOut[i].replace(/~K\d+K/,blockText); + } + } + + return grafsOut.join("\n\n"); +} + + +var _EncodeAmpsAndAngles = function(text) { +// Smart processing for ampersands and angle brackets that need to be encoded. + + // Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + // http://bumppo.net/projects/amputator/ + text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g,"&"); + + // Encode naked <'s + text = text.replace(/<(?![a-z\/?\$!])/gi,"<"); + + return text; +} + + +var _EncodeBackslashEscapes = function(text) { +// +// Parameter: String. +// Returns: The string, with after processing the following backslash +// escape sequences. +// + + // attacklab: The polite way to do this is with the new + // escapeCharacters() function: + // + // text = escapeCharacters(text,"\\",true); + // text = escapeCharacters(text,"`*_{}[]()>#+-.!",true); + // + // ...but we're sidestepping its use of the (slow) RegExp constructor + // as an optimization for Firefox. This function gets called a LOT. + + text = text.replace(/\\(\\)/g,escapeCharacters_callback); + text = text.replace(/\\([`*_{}\[\]()>#+-.!])/g,escapeCharacters_callback); + return text; +} + + +var _DoAutoLinks = function(text) { + + text = text.replace(/<((https?|ftp|dict):[^'">\s]+)>/gi,"
    $1"); + + // Email addresses: + + /* + text = text.replace(/ + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + > + /gi, _DoAutoLinks_callback()); + */ + text = text.replace(/<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi, + function(wholeMatch,m1) { + return _EncodeEmailAddress( _UnescapeSpecialChars(m1) ); + } + ); + + return text; +} + + +var _EncodeEmailAddress = function(addr) { +// +// Input: an email address, e.g. "foo@example.com" +// +// Output: the email address as a mailto link, with each character +// of the address encoded as either a decimal or hex entity, in +// the hopes of foiling most address harvesting spam bots. E.g.: +// +// foo +// @example.com +// +// Based on a filter by Matthew Wickline, posted to the BBEdit-Talk +// mailing list: +// + + // attacklab: why can't javascript speak hex? + function char2hex(ch) { + var hexDigits = '0123456789ABCDEF'; + var dec = ch.charCodeAt(0); + return(hexDigits.charAt(dec>>4) + hexDigits.charAt(dec&15)); + } + + var encode = [ + function(ch){return "&#"+ch.charCodeAt(0)+";";}, + function(ch){return "&#x"+char2hex(ch)+";";}, + function(ch){return ch;} + ]; + + addr = "mailto:" + addr; + + addr = addr.replace(/./g, function(ch) { + if (ch == "@") { + // this *must* be encoded. I insist. + ch = encode[Math.floor(Math.random()*2)](ch); + } else if (ch !=":") { + // leave ':' alone (to spot mailto: later) + var r = Math.random(); + // roughly 10% raw, 45% hex, 45% dec + ch = ( + r > .9 ? encode[2](ch) : + r > .45 ? encode[1](ch) : + encode[0](ch) + ); + } + return ch; + }); + + addr = "" + addr + ""; + addr = addr.replace(/">.+:/g,"\">"); // strip the mailto: from the visible part + + return addr; +} + + +var _UnescapeSpecialChars = function(text) { +// +// Swap back in all the special characters we've hidden. +// + text = text.replace(/~E(\d+)E/g, + function(wholeMatch,m1) { + var charCodeToReplace = parseInt(m1); + return String.fromCharCode(charCodeToReplace); + } + ); + return text; +} + + +var _Outdent = function(text) { +// +// Remove one level of line-leading tabs or spaces +// + + // attacklab: hack around Konqueror 3.5.4 bug: + // "----------bug".replace(/^-/g,"") == "bug" + + text = text.replace(/^(\t|[ ]{1,4})/gm,"~0"); // attacklab: g_tab_width + + // attacklab: clean up hack + text = text.replace(/~0/g,"") + + return text; +} + +var _Detab = function(text) { +// attacklab: Detab's completely rewritten for speed. +// In perl we could fix it by anchoring the regexp with \G. +// In javascript we're less fortunate. + + // expand first n-1 tabs + text = text.replace(/\t(?=\t)/g," "); // attacklab: g_tab_width + + // replace the nth with two sentinels + text = text.replace(/\t/g,"~A~B"); + + // use the sentinel to anchor our regex so it doesn't explode + text = text.replace(/~B(.+?)~A/g, + function(wholeMatch,m1,m2) { + var leadingText = m1; + var numSpaces = 4 - leadingText.length % 4; // attacklab: g_tab_width + + // there *must* be a better way to do this: + for (var i=0; i