filters: import more modern scripts

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
author: Jason A. Donenfeld <Jason@zx2c4.com> 2013-05-27 23:39:43 +0400
committer: Jason A. Donenfeld <Jason@zx2c4.com> 2013-05-27 23:54:16 +0400
commit: 8149be213f1c8f52b0dbe6c213f6073af57fa954 (patch)
tree: e4d0315f53022bb7335f782ad394d8e7602f1b52
parent: dcbc0438b2543a733858d62170f3110a89edbed6 (diff)
10 files changed, 1815 insertions, 15 deletions
diff --git a/Makefile b/Makefile
index 4b83ac7..4df95b4 100644
--- a/Makefile
+++ b/Makefile
@@ -17,6 +17,7 @@ SHA1_HEADER = <openssl/sha.h>
 GIT_VER = 1.8.3
 GIT_URL = https://git-core.googlecode.com/files/git-$(GIT_VER).tar.gz
 INSTALL = install
+COPYTREE = cp -r
 MAN5_TXT = $(wildcard *.5.txt)
 MAN_TXT  = $(MAN5_TXT)
 DOC_MAN5 = $(patsubst %.txt,%,$(MAN5_TXT))
@@ -77,7 +78,7 @@ install: all
 	$(INSTALL) -m 0644 cgit.css $(DESTDIR)$(CGIT_DATA_PATH)/cgit.css
 	$(INSTALL) -m 0644 cgit.png $(DESTDIR)$(CGIT_DATA_PATH)/cgit.png
 	$(INSTALL) -m 0755 -d $(DESTDIR)$(filterdir)
-	$(INSTALL) -m 0755 filters/* $(DESTDIR)$(filterdir)
+	$(COPYTREE)  filters/* $(DESTDIR)$(filterdir)
 
 install-doc: install-man install-html install-pdf
 
diff --git a/cgit.css b/cgit.css
index a50d62b..d467c66 100644
--- a/cgit.css
+++ b/cgit.css
@@ -800,17 +800,3 @@ div#cgit table.ssdiff td.space {
 div#cgit table.ssdiff td.space div {
 	min-height: 3em;
 }
-
-/* Syntax highlighting */
-div#cgit table.blob .num  { color:#2928ff; }
-div#cgit table.blob .esc  { color:#ff00ff; }
-div#cgit table.blob .str  { color:#ff0000; }
-div#cgit table.blob .dstr { color:#818100; }
-div#cgit table.blob .slc  { color:#838183; font-style:italic; }
-div#cgit table.blob .com  { color:#838183; font-style:italic; }
-div#cgit table.blob .dir  { color:#008200; }
-div#cgit table.blob .sym  { color:#000000; }
-div#cgit table.blob .kwa  { color:#000000; font-weight:bold; }
-div#cgit table.blob .kwb  { color:#830000; }
-div#cgit table.blob .kwc  { color:#000000; font-weight:bold; }
-div#cgit table.blob .kwd  { color:#010181; }
diff --git a/filters/about-formatting.sh b/filters/about-formatting.sh
new file mode 100755
index 0000000..313a4e6
--- /dev/null
+++ b/filters/about-formatting.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+# This may be used with the about-filter or repo.about-filter setting in cgitrc.
+# It passes formatting of about pages to differing programs, depending on the usage.
+
+# Markdown support requires perl.
+# RestructuredText support requires python and docutils.
+# Man page support requires groff.
+
+# The following environment variables can be used to retrieve the configuration
+# of the repository for which this script is called:
+# CGIT_REPO_URL        ( = repo.url       setting )
+# CGIT_REPO_NAME       ( = repo.name      setting )
+# CGIT_REPO_PATH       ( = repo.path      setting )
+# CGIT_REPO_OWNER      ( = repo.owner     setting )
+# CGIT_REPO_DEFBRANCH  ( = repo.defbranch setting )
+# CGIT_REPO_SECTION    ( = section        setting )
+# CGIT_REPO_CLONE_URL  ( = repo.clone-url setting )
+
+cd "$(dirname $0)/html-converters/"
+case "$(tr '[:upper:]' '[:lower:]' <<<"$1")" in
+	*.md|*.mkd) exec ./md2html; ;;
+	*.rst) exec ./rst2html; ;;
+	*.[1-9]) exec ./man2html; ;;
+	*.htm|*.html) exec cat; ;;
+	*.txt|*) exec ./txt2html; ;;
+esac
diff --git a/filters/html-converters/man2html b/filters/html-converters/man2html
new file mode 100755
index 0000000..1b28437
--- /dev/null
+++ b/filters/html-converters/man2html
@@ -0,0 +1,5 @@
+#!/bin/sh
+echo "<div style=\"font-family: monospace\">"
+groff -mandoc -T html -P -r -P -l | egrep -v '(<html>|<head>|<meta|<title>|</title>|</head>|<body>|</body>|</html>|<!DOCTYPE|"http://www.w3.org)'
+echo "</div>"
+
diff --git a/filters/html-converters/md2html b/filters/html-converters/md2html
new file mode 100755
index 0000000..5cab749
--- /dev/null
+++ b/filters/html-converters/md2html
@@ -0,0 +1,2 @@
+#!/bin/sh
+exec "$(dirname "$0")/resources/markdown.pl"
diff --git a/filters/html-converters/resources/markdown.pl b/filters/html-converters/resources/markdown.pl
new file mode 100755
index 0000000..abec173
--- /dev/null
+++ b/filters/html-converters/resources/markdown.pl
@@ -0,0 +1,1731 @@
+#!/usr/bin/perl
+
+#
+# Markdown -- A text-to-HTML conversion tool for web writers
+#
+# Copyright (c) 2004 John Gruber
+# <http://daringfireball.net/projects/markdown/>
+#
+
+
+package Markdown;
+require 5.006_000;
+use strict;
+use warnings;
+
+use Digest::MD5 qw(md5_hex);
+use vars qw($VERSION);
+$VERSION = '1.0.1';
+# Tue 14 Dec 2004
+
+## Disabled; causes problems under Perl 5.6.1:
+use utf8;
+binmode( STDOUT, ":utf8" );  # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
+
+
+#
+# Global default settings:
+#
+my $g_empty_element_suffix = " />";     # Change to ">" for HTML output
+my $g_tab_width = 4;
+
+
+#
+# Globals:
+#
+
+# Regex to match balanced [brackets]. See Friedl's
+# "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
+my $g_nested_brackets;
+$g_nested_brackets = qr{
+	(?> 								# Atomic matching
+	   [^\[\]]+							# Anything other than brackets
+	 | 
+	   \[
+		 (??{ $g_nested_brackets })		# Recursive set of nested brackets
+	   \]
+	)*
+}x;
+
+
+# Table of hash values for escaped characters:
+my %g_escape_table;
+foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
+	$g_escape_table{$char} = md5_hex($char);
+}
+
+
+# Global hashes, used by various utility routines
+my %g_urls;
+my %g_titles;
+my %g_html_blocks;
+
+# Used to track when we're inside an ordered or unordered list
+# (see _ProcessListItems() for details):
+my $g_list_level = 0;
+
+
+#### Blosxom plug-in interface ##########################################
+
+# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
+# which posts Markdown should process, using a "meta-markup: markdown"
+# header. If it's set to 0 (the default), Markdown will process all
+# entries.
+my $g_blosxom_use_meta = 0;
+
+sub start { 1; }
+sub story {
+	my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
+
+	if ( (! $g_blosxom_use_meta) or
+	     (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
+	     ){
+			$$body_ref  = Markdown($$body_ref);
+     }
+     1;
+}
+
+
+#### Movable Type plug-in interface #####################################
+eval {require MT};  # Test to see if we're running in MT.
+unless ($@) {
+    require MT;
+    import  MT;
+    require MT::Template::Context;
+    import  MT::Template::Context;
+
+	eval {require MT::Plugin};  # Test to see if we're running >= MT 3.0.
+	unless ($@) {
+		require MT::Plugin;
+		import  MT::Plugin;
+		my $plugin = new MT::Plugin({
+			name => "Markdown",
+			description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
+			doc_link => 'http://daringfireball.net/projects/markdown/'
+		});
+		MT->add_plugin( $plugin );
+	}
+
+	MT::Template::Context->add_container_tag(MarkdownOptions => sub {
+		my $ctx	 = shift;
+		my $args = shift;
+		my $builder = $ctx->stash('builder');
+		my $tokens = $ctx->stash('tokens');
+
+		if (defined ($args->{'output'}) ) {
+			$ctx->stash('markdown_output', lc $args->{'output'});
+		}
+
+		defined (my $str = $builder->build($ctx, $tokens) )
+			or return $ctx->error($builder->errstr);
+		$str;		# return value
+	});
+
+	MT->add_text_filter('markdown' => {
+		label     => 'Markdown',
+		docs      => 'http://daringfireball.net/projects/markdown/',
+		on_format => sub {
+			my $text = shift;
+			my $ctx  = shift;
+			my $raw  = 0;
+		    if (defined $ctx) {
+		    	my $output = $ctx->stash('markdown_output'); 
+				if (defined $output  &&  $output =~ m/^html/i) {
+					$g_empty_element_suffix = ">";
+					$ctx->stash('markdown_output', '');
+				}
+				elsif (defined $output  &&  $output eq 'raw') {
+					$raw = 1;
+					$ctx->stash('markdown_output', '');
+				}
+				else {
+					$raw = 0;
+					$g_empty_element_suffix = " />";
+				}
+			}
+			$text = $raw ? $text : Markdown($text);
+			$text;
+		},
+	});
+
+	# If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
+	my $smartypants;
+
+	{
+		no warnings "once";
+		$smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
+	}
+
+	if ($smartypants) {
+		MT->add_text_filter('markdown_with_smartypants' => {
+			label     => 'Markdown With SmartyPants',
+			docs      => 'http://daringfireball.net/projects/markdown/',
+			on_format => sub {
+				my $text = shift;
+				my $ctx  = shift;
+				if (defined $ctx) {
+					my $output = $ctx->stash('markdown_output'); 
+					if (defined $output  &&  $output eq 'html') {
+						$g_empty_element_suffix = ">";
+					}
+					else {
+						$g_empty_element_suffix = " />";
+					}
+				}
+				$text = Markdown($text);
+				$text = $smartypants->($text, '1');
+			},
+		});
+	}
+}
+else {
+#### BBEdit/command-line text filter interface ##########################
+# Needs to be hidden from MT (and Blosxom when running in static mode).
+
+    # We're only using $blosxom::version once; tell Perl not to warn us:
+	no warnings 'once';
+    unless ( defined($blosxom::version) ) {
+		use warnings;
+
+		#### Check for command-line switches: #################
+		my %cli_opts;
+		use Getopt::Long;
+		Getopt::Long::Configure('pass_through');
+		GetOptions(\%cli_opts,
+			'version',
+			'shortversion',
+			'html4tags',
+		);
+		if ($cli_opts{'version'}) {		# Version info
+			print "\nThis is Markdown, version $VERSION.\n";
+			print "Copyright 2004 John Gruber\n";
+			print "http://daringfireball.net/projects/markdown/\n\n";
+			exit 0;
+		}
+		if ($cli_opts{'shortversion'}) {		# Just the version number string.
+			print $VERSION;
+			exit 0;
+		}
+		if ($cli_opts{'html4tags'}) {			# Use HTML tag style instead of XHTML
+			$g_empty_element_suffix = ">";
+		}
+
+
+		#### Process incoming text: ###########################
+		my $text;
+		{
+			local $/;               # Slurp the whole file
+			$text = <>;
+		}
+	print <<'EOT';
+<style>
+.markdown-body {
+    font-size: 14px;
+    line-height: 1.6;
+    overflow: hidden;
+}
+.markdown-body>*:first-child {
+    margin-top: 0 !important;
+}
+.markdown-body>*:last-child {
+    margin-bottom: 0 !important;
+}
+.markdown-body a.absent {
+    color: #c00;
+}
+.markdown-body a.anchor {
+    display: block;
+    padding-left: 30px;
+    margin-left: -30px;
+    cursor: pointer;
+    position: absolute;
+    top: 0;
+    left: 0;
+    bottom: 0;
+}
+.markdown-body h1, .markdown-body h2, .markdown-body h3, .markdown-body h4, .markdown-body h5, .markdown-body h6 {
+    margin: 20px 0 10px;
+    padding: 0;
+    font-weight: bold;
+    -webkit-font-smoothing: antialiased;
+    cursor: text;
+    position: relative;
+}
+.markdown-body h1 .mini-icon-link, .markdown-body h2 .mini-icon-link, .markdown-body h3 .mini-icon-link, .markdown-body h4 .mini-icon-link, .markdown-body h5 .mini-icon-link, .markdown-body h6 .mini-icon-link {
+    display: none;
+    color: #000;
+}
+.markdown-body h1:hover a.anchor, .markdown-body h2:hover a.anchor, .markdown-body h3:hover a.anchor, .markdown-body h4:hover a.anchor, .markdown-body h5:hover a.anchor, .markdown-body h6:hover a.anchor {
+    text-decoration: none;
+    line-height: 1;
+    padding-left: 0;
+    margin-left: -22px;
+    top: 15%}
+.markdown-body h1:hover a.anchor .mini-icon-link, .markdown-body h2:hover a.anchor .mini-icon-link, .markdown-body h3:hover a.anchor .mini-icon-link, .markdown-body h4:hover a.anchor .mini-icon-link, .markdown-body h5:hover a.anchor .mini-icon-link, .markdown-body h6:hover a.anchor .mini-icon-link {
+    display: inline-block;
+}
+.markdown-body h1 tt, .markdown-body h1 code, .markdown-body h2 tt, .markdown-body h2 code, .markdown-body h3 tt, .markdown-body h3 code, .markdown-body h4 tt, .markdown-body h4 code, .markdown-body h5 tt, .markdown-body h5 code, .markdown-body h6 tt, .markdown-body h6 code {
+    font-size: inherit;
+}
+.markdown-body h1 {
+    font-size: 28px;
+    color: #000;
+}
+.markdown-body h2 {
+    font-size: 24px;
+    border-bottom: 1px solid #ccc;
+    color: #000;
+}
+.markdown-body h3 {
+    font-size: 18px;
+}
+.markdown-body h4 {
+    font-size: 16px;
+}
+.markdown-body h5 {
+    font-size: 14px;
+}
+.markdown-body h6 {
+    color: #777;
+    font-size: 14px;
+}
+.markdown-body p, .markdown-body blockquote, .markdown-body ul, .markdown-body ol, .markdown-body dl, .markdown-body table, .markdown-body pre {
+    margin: 15px 0;
+}
+.markdown-body hr {
+    background: transparent url("/dirty-shade.png") repeat-x 0 0;
+    border: 0 none;
+    color: #ccc;
+    height: 4px;
+    padding: 0;
+}
+.markdown-body>h2:first-child, .markdown-body>h1:first-child, .markdown-body>h1:first-child+h2, .markdown-body>h3:first-child, .markdown-body>h4:first-child, .markdown-body>h5:first-child, .markdown-body>h6:first-child {
+    margin-top: 0;
+    padding-top: 0;
+}
+.markdown-body a:first-child h1, .markdown-body a:first-child h2, .markdown-body a:first-child h3, .markdown-body a:first-child h4, .markdown-body a:first-child h5, .markdown-body a:first-child h6 {
+    margin-top: 0;
+    padding-top: 0;
+}
+.markdown-body h1+p, .markdown-body h2+p, .markdown-body h3+p, .markdown-body h4+p, .markdown-body h5+p, .markdown-body h6+p {
+    margin-top: 0;
+}
+.markdown-body li p.first {
+    display: inline-block;
+}
+.markdown-body ul, .markdown-body ol {
+    padding-left: 30px;
+}
+.markdown-body ul.no-list, .markdown-body ol.no-list {
+    list-style-type: none;
+    padding: 0;
+}
+.markdown-body ul li>:first-child, .markdown-body ul li ul:first-of-type, .markdown-body ul li ol:first-of-type, .markdown-body ol li>:first-child, .markdown-body ol li ul:first-of-type, .markdown-body ol li ol:first-of-type {
+    margin-top: 0px;
+}
+.markdown-body ul li p:last-of-type, .markdown-body ol li p:last-of-type {
+    margin-bottom: 0;
+}
+.markdown-body ul ul, .markdown-body ul ol, .markdown-body ol ol, .markdown-body ol ul {
+    margin-bottom: 0;
+}
+.markdown-body dl {
+    padding: 0;
+}
+.markdown-body dl dt {
+    font-size: 14px;
+    font-weight: bold;
+    font-style: italic;
+    padding: 0;
+    margin: 15px 0 5px;
+}
+.markdown-body dl dt:first-child {
+    padding: 0;
+}
+.markdown-body dl dt>:first-child {
+    margin-top: 0px;
+}
+.markdown-body dl dt>:last-child {
+    margin-bottom: 0px;
+}
+.markdown-body dl dd {
+    margin: 0 0 15px;
+    padding: 0 15px;
+}
+.markdown-body dl dd>:first-child {
+    margin-top: 0px;
+}
+.markdown-body dl dd>:last-child {
+    margin-bottom: 0px;
+}
+.markdown-body blockquote {
+    border-left: 4px solid #DDD;
+    padding: 0 15px;
+    color: #777;
+}
+.markdown-body blockquote>:first-child {
+    margin-top: 0px;
+}
+.markdown-body blockquote>:last-child {
+    margin-bottom: 0px;
+}
+.markdown-body table th {
+    font-weight: bold;
+}
+.markdown-body table th, .markdown-body table td {
+    border: 1px solid #ccc;
+    padding: 6px 13px;
+}
+.markdown-body table tr {
+    border-top: 1px solid #ccc;
+    background-color: #fff;
+}
+.markdown-body table tr:nth-child(2n) {
+    background-color: #f8f8f8;
+}
+.markdown-body img {
+    max-width: 100%;
+    -moz-box-sizing: border-box;
+    box-sizing: border-box;
+}
+.markdown-body span.frame {
+    display: block;
+    overflow: hidden;
+}
+.markdown-body span.frame>span {
+    border: 1px solid #ddd;
+    display: block;
+    float: left;
+    overflow: hidden;
+    margin: 13px 0 0;
+    padding: 7px;
+    width: auto;
+}
+.markdown-body span.frame span img {
+    display: block;
+    float: left;
+}
+.markdown-body span.frame span span {
+    clear: both;
+    color: #333;
+    display: block;
+    padding: 5px 0 0;
+}
+.markdown-body span.align-center {
+    display: block;
+    overflow: hidden;
+    clear: both;
+}
+.markdown-body span.align-center>span {
+    display: block;
+    overflow: hidden;
+    margin: 13px auto 0;
+    text-align: center;
+}
+.markdown-body span.align-center span img {
+    margin: 0 auto;
+    text-align: center;
+}
+.markdown-body span.align-right {
+    display: block;
+    overflow: hidden;
+    clear: both;
+}
+.markdown-body span.align-right>span {
+    display: block;
+    overflow: hidden;
+    margin: 13px 0 0;
+    text-align: right;
+}
+.markdown-body span.align-right span img {
+    margin: 0;
+    text-align: right;
+}
+.markdown-body span.float-left {
+    display: block;
+    margin-right: 13px;
+    overflow: hidden;
+    float: left;
+}
+.markdown-body span.float-left span {
+    margin: 13px 0 0;
+}
+.markdown-body span.float-right {
+    display: block;
+    margin-left: 13px;
+    overflow: hidden;
+    float: right;
+}
+.markdown-body span.float-right>span {
+    display: block;
+    overflow: hidden;
+    margin: 13px auto 0;
+    text-align: right;
+}
+.markdown-body code, .markdown-body tt {
+    margin: 0 2px;
+    padding: 0px 5px;
+    border: 1px solid #eaeaea;
+    background-color: #f8f8f8;
+    border-radius: 3px;
+}
+.markdown-body code {
+    white-space: nowrap;
+}
+.markdown-body pre>code {
+    margin: 0;
+    padding: 0;
+    white-space: pre;
+    border: none;
+    background: transparent;
+}
+.markdown-body .highlight pre, .markdown-body pre {
+    background-color: #f8f8f8;
+    border: 1px solid #ccc;
+    font-size: 13px;
+    line-height: 19px;
+    overflow: auto;
+    padding: 6px 10px;
+    border-radius: 3px;
+}
+.markdown-body pre code, .markdown-body pre tt {
+    margin: 0;
+    padding: 0;
+    background-color: transparent;
+    border: none;
+}
+</style>
+EOT
+	print "<div class='markdown-body'>";
+        print Markdown($text);
+	print "</div>";
+    }
+}
+
+
+
+sub Markdown {
+#
+# Main function. The order in which other subs are called here is
+# essential. Link and image substitutions need to happen before
+# _EscapeSpecialChars(), so that any *'s or _'s in the <a>
+# and <img> tags get encoded.
+#
+	my $text = shift;
+
+	# Clear the global hashes. If we don't clear these, you get conflicts
+	# from other articles when generating a page which contains more than
+	# one article (e.g. an index page that shows the N most recent
+	# articles):
+	%g_urls = ();
+	%g_titles = ();
+	%g_html_blocks = ();
+
+
+	# Standardize line endings:
+	$text =~ s{\r\n}{\n}g; 	# DOS to Unix
+	$text =~ s{\r}{\n}g; 	# Mac to Unix
+
+	# Make sure $text ends with a couple of newlines:
+	$text .= "\n\n";
+
+	# Convert all tabs to spaces.
+	$text = _Detab($text);
+
+	# Strip any lines consisting only of spaces and tabs.
+	# This makes subsequent regexen easier to write, because we can
+	# match consecutive blank lines with /\n+/ instead of something
+	# contorted like /[ \t]*\n+/ .
+	$text =~ s/^[ \t]+$//mg;
+
+	# Turn block-level HTML blocks into hash entries
+	$text = _HashHTMLBlocks($text);
+
+	# Strip link definitions, store in hashes.
+	$text = _StripLinkDefinitions($text);
+
+	$text = _RunBlockGamut($text);
+
+	$text = _UnescapeSpecialChars($text);
+
+	return $text . "\n";
+}
+
+
+sub _StripLinkDefinitions {
+#
+# Strips link definitions from text, stores the URLs and titles in
+# hash references.
+#
+	my $text = shift;
+	my $less_than_tab = $g_tab_width - 1;
+
+	# Link defs are in the form: ^[id]: url "optional title"
+	while ($text =~ s{
+						^[ ]{0,$less_than_tab}\[(.+)\]:	# id = $1
+						  [ \t]*
+						  \n?				# maybe *one* newline
+						  [ \t]*
+						<?(\S+?)>?			# url = $2
+						  [ \t]*
+						  \n?				# maybe one newline
+						  [ \t]*
+						(?:
+							(?<=\s)			# lookbehind for whitespace
+							["(]
+							(.+?)			# title = $3
+							[")]
+							[ \t]*
+						)?	# title is optional
+						(?:\n+|\Z)
+					}
+					{}mx) {
+		$g_urls{lc $1} = _EncodeAmpsAndAngles( $2 );	# Link IDs are case-insensitive
+		if ($3) {
+			$g_titles{lc $1} = $3;
+			$g_titles{lc $1} =~ s/"/&quot;/g;
+		}
+	}
+
+	return $text;
+}
+
+
+sub _HashHTMLBlocks {
+	my $text = shift;
+	my $less_than_tab = $g_tab_width - 1;
+
+	# Hashify HTML blocks:
+	# We only want to do this for block-level HTML tags, such as headers,
+	# lists, and tables. That's because we still want to wrap <p>s around
+	# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+	# phrase emphasis, and spans. The list of tags we're looking for is
+	# hard-coded:
+	my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
+	my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
+
+	# First, look for nested blocks, e.g.:
+	# 	<div>
+	# 		<div>
+	# 		tags for inner block must be indented.
+	# 		</div>
+	# 	</div>
+	#
+	# The outermost tags must start at the left margin for this to match, and
+	# the inner nested divs must be indented.
+	# We need to do this before the next, more liberal match, because the next
+	# match will start at the first `<div>` and stop at the first `</div>`.
+	$text =~ s{
+				(						# save in $1
+					^					# start of line  (with /m)
+					<($block_tags_a)	# start tag = $2
+					\b					# word break
+					(.*\n)*?			# any number of lines, minimally matching
+					</\2>				# the matching end tag
+					[ \t]*				# trailing spaces/tabs
+					(?=\n+|\Z)	# followed by a newline or end of document
+				)
+			}{
+				my $key = md5_hex($1);
+				$g_html_blocks{$key} = $1;
+				"\n\n" . $key . "\n\n";
+			}egmx;
+
+
+	#
+	# Now match more liberally, simply from `\n<tag>` to `</tag>\n`
+	#
+	$text =~ s{
+				(						# save in $1
+					^					# start of line  (with /m)
+					<($block_tags_b)	# start tag = $2
+					\b					# word break
+					(.*\n)*?			# any number of lines, minimally matching
+					.*</\2>				# the matching end tag
+					[ \t]*				# trailing spaces/tabs
+					(?=\n+|\Z)	# followed by a newline or end of document
+				)
+			}{
+				my $key = md5_hex($1);
+				$g_html_blocks{$key} = $1;
+				"\n\n" . $key . "\n\n";
+			}egmx;
+	# Special case just for <hr />. It was easier to make a special case than
+	# to make the other regex more complicated.	
+	$text =~ s{
+				(?:
+					(?<=\n\n)		# Starting after a blank line
+					|				# or
+					\A\n?			# the beginning of the doc
+				)
+				(						# save in $1
+					[ ]{0,$less_than_tab}
+					<(hr)				# start tag = $2
+					\b					# word break
+					([^<>])*?			# 
+					/?>					# the matching end tag
+					[ \t]*
+					(?=\n{2,}|\Z)		# followed by a blank line or end of document
+				)
+			}{
+				my $key = md5_hex($1);
+				$g_html_blocks{$key} = $1;
+				"\n\n" . $key . "\n\n";
+			}egx;
+
+	# Special case for standalone HTML comments:
+	$text =~ s{
+				(?:
+					(?<=\n\n)		# Starting after a blank line
+					|				# or
+					\A\n?			# the beginning of the doc
+				)
+				(						# save in $1
+					[ ]{0,$less_than_tab}
+					(?s:
+						<!
+						(--.*?--\s*)+
+						>
+					)
+					[ \t]*
+					(?=\n{2,}|\Z)		# followed by a blank line or end of document
+				)
+			}{
+				my $key = md5_hex($1);
+				$g_html_blocks{$key} = $1;
+				"\n\n" . $key . "\n\n";
+			}egx;
+
+
+	return $text;
+}
+
+
+sub _RunBlockGamut {
+#
+# These are all the transformations that form block-level
+# tags like paragraphs, headers, and list items.
+#
+	my $text = shift;
+
+	$text = _DoHeaders($text);
+
+	# Do Horizontal Rules:
+	$text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
+	$text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
+	$text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
+
+	$text = _DoLists($text);
+
+	$text = _DoCodeBlocks($text);
+
+	$text = _DoBlockQuotes($text);
+
+	# We already ran _HashHTMLBlocks() before, in Markdown(), but that
+	# was to escape raw HTML in the original Markdown source. This time,
+	# we're escaping the markup we've just created, so that we don't wrap
+	# <p> tags around block-level tags.
+	$text = _HashHTMLBlocks($text);
+
+	$text = _FormParagraphs($text);
+
+	return $text;
+}
+
+
+sub _RunSpanGamut {
+#
+# These are all the transformations that occur *within* block-level
+# tags like paragraphs, headers, and list items.
+#
+	my $text = shift;
+
+	$text = _DoCodeSpans($text);
+
+	$text = _EscapeSpecialChars($text);
+
+	# Process anchor and image tags. Images must come first,
+	# because ![foo][f] looks like an anchor.
+	$text = _DoImages($text);
+	$text = _DoAnchors($text);
+
+	# Make links out of things like `<http://example.com/>`
+	# Must come after _DoAnchors(), because you can use < and >
+	# delimiters in inline links like [this](<url>).
+	$text = _DoAutoLinks($text);
+
+	$text = _EncodeAmpsAndAngles($text);
+
+	$text = _DoItalicsAndBold($text);
+
+	# Do hard breaks:
+	$text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
+
+	return $text;
+}
+
+
+sub _EscapeSpecialChars {
+	my $text = shift;
+	my $tokens ||= _TokenizeHTML($text);
+
+	$text = '';   # rebuild $text from the tokens
+# 	my $in_pre = 0;	 # Keep track of when we're inside <pre> or <code> tags.
+# 	my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
+
+	foreach my $cur_token (@$tokens) {
+		if ($cur_token->[0] eq "tag") {
+			# Within tags, encode * and _ so they don't conflict
+			# with their use in Markdown for italics and strong.
+			# We're replacing each such character with its
+			# corresponding MD5 checksum value; this is likely
+			# overkill, but it should prevent us from colliding
+			# with the escape values by accident.
+			$cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx;
+			$cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx;
+			$text .= $cur_token->[1];
+		} else {
+			my $t = $cur_token->[1];
+			$t = _EncodeBackslashEscapes($t);
+			$text .= $t;
+		}
+	}
+	return $text;
+}
+
+
+sub _DoAnchors {
+#
+# Turn Markdown link shortcuts into XHTML <a> tags.
+#
+	my $text = shift;
+
+	#
+	# First, handle reference-style links: [link text] [id]
+	#
+	$text =~ s{
+		(					# wrap whole match in $1
+		  \[
+		    ($g_nested_brackets)	# link text = $2
+		  \]
+
+		  [ ]?				# one optional space
+		  (?:\n[ ]*)?		# one optional newline followed by spaces
+
+		  \[
+		    (.*?)		# id = $3
+		  \]
+		)
+	}{
+		my $result;
+		my $whole_match = $1;
+		my $link_text   = $2;
+		my $link_id     = lc $3;
+
+		if ($link_id eq "") {
+			$link_id = lc $link_text;     # for shortcut links like [this][].
+		}
+
+		if (defined $g_urls{$link_id}) {
+			my $url = $g_urls{$link_id};
+			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
+			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
+			$result = "<a href=\"$url\"";
+			if ( defined $g_titles{$link_id} ) {
+				my $title = $g_titles{$link_id};
+				$title =~ s! \* !$g_escape_table{'*'}!gx;
+				$title =~ s!  _ !$g_escape_table{'_'}!gx;
+				$result .=  " title=\"$title\"";
+			}
+			$result .= ">$link_text</a>";
+		}
+		else {
+			$result = $whole_match;
+		}
+		$result;
+	}xsge;
+
+	#
+	# Next, inline-style links: [link text](url "optional title")
+	#
+	$text =~ s{
+		(				# wrap whole match in $1
+		  \[
+		    ($g_nested_brackets)	# link text = $2
+		  \]
+		  \(			# literal paren
+		  	[ \t]*
+			<?(.*?)>?	# href = $3
+		  	[ \t]*
+			(			# $4
+			  (['"])	# quote char = $5
+			  (.*?)		# Title = $6
+			  \5		# matching quote
+			)?			# title is optional
+		  \)
+		)
+	}{
+		my $result;
+		my $whole_match = $1;
+		my $link_text   = $2;
+		my $url	  		= $3;
+		my $title		= $6;
+
+		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
+		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
+		$result = "<a href=\"$url\"";
+
+		if (defined $title) {
+			$title =~ s/"/&quot;/g;
+			$title =~ s! \* !$g_escape_table{'*'}!gx;
+			$title =~ s!  _ !$g_escape_table{'_'}!gx;
+			$result .=  " title=\"$title\"";
+		}
+
+		$result .= ">$link_text</a>";
+
+		$result;
+	}xsge;
+
+	return $text;
+}
+
+
+sub _DoImages {
+#
+# Turn Markdown image shortcuts into <img> tags.
+#
+	my $text = shift;
+
+	#
+	# First, handle reference-style labeled images: ![alt text][id]
+	#
+	$text =~ s{
+		(				# wrap whole match in $1
+		  !\[
+		    (.*?)		# alt text = $2
+		  \]
+
+		  [ ]?				# one optional space
+		  (?:\n[ ]*)?		# one optional newline followed by spaces
+
+		  \[
+		    (.*?)		# id = $3
+		  \]
+
+		)
+	}{
+		my $result;
+		my $whole_match = $1;
+		my $alt_text    = $2;
+		my $link_id     = lc $3;
+
+		if ($link_id eq "") {
+			$link_id = lc $alt_text;     # for shortcut links like ![this][].
+		}
+
+		$alt_text =~ s/"/&quot;/g;
+		if (defined $g_urls{$link_id}) {
+			my $url = $g_urls{$link_id};
+			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
+			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
+			$result = "<img src=\"$url\" alt=\"$alt_text\"";
+			if (defined $g_titles{$link_id}) {
+				my $title = $g_titles{$link_id};
+				$title =~ s! \* !$g_escape_table{'*'}!gx;
+				$title =~ s!  _ !$g_escape_table{'_'}!gx;
+				$result .=  " title=\"$title\"";
+			}
+			$result .= $g_empty_element_suffix;
+		}
+		else {
+			# If there's no such link ID, leave intact:
+			$result = $whole_match;
+		}
+
+		$result;
+	}xsge;
+
+	#
+	# Next, handle inline images:  ![alt text](url "optional title")
+	# Don't forget: encode * and _
+
+	$text =~ s{
+		(				# wrap whole match in $1
+		  !\[
+		    (.*?)		# alt text = $2
+		  \]
+		  \(			# literal paren
+		  	[ \t]*
+			<?(\S+?)>?	# src url = $3
+		  	[ \t]*
+			(			# $4
+			  (['"])	# quote char = $5
+			  (.*?)		# title = $6
+			  \5		# matching quote
+			  [ \t]*
+			)?			# title is optional
+		  \)
+		)
+	}{
+		my $result;
+		my $whole_match = $1;
+		my $alt_text    =             
+                
+                    
+                        contacts: admin@thfree.ru
+                    
+                
+                
+                    
+                        mirrors for living
+                    
+                
+            
+
author	Jason A. Donenfeld <Jason@zx2c4.com>	2013-05-27 23:39:43 +0400
committer	Jason A. Donenfeld <Jason@zx2c4.com>	2013-05-27 23:54:16 +0400
commit	8149be213f1c8f52b0dbe6c213f6073af57fa954 (patch)
tree	e4d0315f53022bb7335f782ad394d8e7602f1b52
parent	dcbc0438b2543a733858d62170f3110a89edbed6 (diff)