Source code for pygments_markdown_lexer.lexer

# -*- coding: utf-8 -*-
# pylint: disable=bad-continuation, too-few-public-methods
""" Markdown lexer for Pygments.

    See `Write your own lexer`_ and `Builtin Tokens`_.

    .. _`Write your own lexer`: http://pygments.org/docs/lexerdevelopment/
    .. _`Builtin Tokens`: http://pygments.org/docs/tokens/
"""
# Copyright ©  2015 Jürgen Hermann <jh@web.de>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, unicode_literals, print_function

import re

from pygments.lexer import RegexLexer, include, bygroups, using, this, do_insertions, default, words
from pygments.token import *  # pylint: disable=wildcard-import, unused-wildcard-import

from ._compat import encode_filename as state


[docs]class Markdown(object): """Symbolic names for Markdown tokens.""" Markup = Keyword Heading = Generic.Heading SubHeading = Generic.Heading CodeBlock = Comment.Preproc HtmlSingle = Comment.Single HtmlBlock = Comment.Preproc HtmlComment = Comment.MultiLine HtmlEntity = String.Symbol
[docs]class MarkdownLexer(RegexLexer): """ A Markdown lexer for Pygments. Some rules adapted from code in ``pygments.lexers.markup`` (BSD-licensed). """ name = 'Markdown' aliases = ['md', 'markdown'] filenames = ['*.md', '*.mkd', '*.markdown'] mimetypes = ["text/x-markdown"] flags = re.MULTILINE # from docutils.parsers.rst.states closers = u'\'")]}>\u2019\u201d\xbb!?' unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0' end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' % (re.escape(unicode_delimiters), re.escape(closers))) tokens = { state('root'): [ # Horizontal rule (r'^\s*\n(?:\s*[-*_]){3,}\s*\n', Markdown.Markup), # Headings (hashmarks) (r'^(# )(.+?)( #)?(\n)', bygroups(Markdown.Markup, Markdown.Heading, Markdown.Markup, Text)), (r'^(#{2,6} )(.+?)( #{2,6})?(\n)', bygroups(Markdown.Markup, Markdown.SubHeading, Markdown.Markup, Text)), # Headings (underlined) (r'^(={3,}\n)?(\S.{2,}\n)(={3,})(\n)', bygroups(Markdown.Markup, Markdown.Heading, Markdown.Markup, Text)), (r'^(-{3,}\n)?(\S.{2,}\n)(-{3,})(\n)', bygroups(Markdown.Markup, Markdown.Heading, Markdown.Markup, Text)), # Blockquotes (r'^\s*>\s', Markdown.Markup), # Lists (r'^\s*[-+*]\s', Markdown.Markup), (r'^\s*[0-9]+\.\s', Markdown.Markup), # HTML one-liners (r'^<(?P<tag>[-:a-zA-Z0-9]+)( [^>]+)>.+</(?P=tag)>\n', Markdown.HtmlSingle), # HTML comments (r'(<!--)((?:.*?\n?)*)(-->)', bygroups(Markdown.Markup, Markdown.HtmlComment, Markdown.Markup)), # HTML blocks (r'^<[^/>][^>]*>\n', Markdown.HtmlBlock, state('htmlblock')), # GitHub style code blocks (r'^(```)(.*?)(\n)', bygroups(Markdown.Markup, Name.Namespace, Markdown.CodeBlock), state('codeblock')), include(state('inline')), ], state('inline'): [ # Escaping (before everything else) (r'\\.', String.Escape), # HTML entities (r'&[-a-z0-9]+;', Markdown.HtmlEntity), (r'&#[0-9]{1,9};', Markdown.HtmlEntity), (r'&', Text), # Inline code (r'``?', Markdown.Markup, state('literal')), # Emphasis (r'_?_[ \n]', Text), # whitespace escape (r'\*?\*[ \n]', Text), # whitespace escape (r'(\*\*)(.+?)((?<![ \\])\*\*)', bygroups(Markdown.Markup, Generic.Strong, Markdown.Markup)), (r'(__)(.+?)((?<![ \\])__)', bygroups(Markdown.Markup, Generic.Strong, Markdown.Markup)), (r'(\*)(.+?)((?<![ \\])\*)', bygroups(Markdown.Markup, Generic.Emph, Markdown.Markup)), (r'(_)(.+?)((?<![ \\])_)', bygroups(Markdown.Markup, Generic.Emph, Markdown.Markup)), #(r'(`.+?)(<.+?>)(`__?)', # reference with inline target # bygroups(String, String.Interpol, String)), #(r'`.+?`__?', String), # reference #(r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', # bygroups(Name.Variable, Name.Attribute)), # role #(r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', # bygroups(Name.Attribute, Name.Variable)), # role (content first) #(r'\[.*?\]_', String), # Footnote or citation #(r'<.+?>', Name.Tag), # Hyperlink #(r'[^\\\n\[*`:]+', Text), # Remaining text (r'[a-zA-Z0-9]+', Text), # optimize normal words a little (r'.', Text), # default fallback ], state('literal'): [ (r'[^`]+', String.Backtick), (r'(?<!\\)``?' + end_string_suffix, Markdown.Markup, state('#pop')), ], state('htmlblock'): [ # TODO: delegate to HTML lexer (r'^</[^>]+>\n', Markdown.HtmlBlock, state('#pop')), (r'.*\n', Markdown.HtmlBlock), # slurp boring text ], state('codeblock'): [ (r'^```\n', Markdown.Markup, state('#pop')), (r'[^`]+', Markdown.CodeBlock), # slurp boring text (r'`', Markdown.CodeBlock), # allow single backticks ], }