moved latex to render_math; latex now symbolic link

2014-02-22 13:15:24 -08:00
parent 7ba212ecc4
commit 5af9d08c06
7 changed files with 310 additions and 207 deletions
--- a/render_math/math.py
+++ b/render_math/math.py
@@ -0,0 +1,348 @@
+# -*- coding: utf-8 -*-
+"""
+Math Render Plugin For Pelican
+==============================
+This plugin allows your site to render Math. It supports both LaTex and MathML
+using the MathJax JavaScript engine.
+
+Typogrify Compatibility
+-----------------------
+This plugin now plays nicely with typogrify, but it requires
+typogrify version 2.04 or above.
+
+User Settings
+-------------
+Users are also able to pass a dictionary of settings in the settings file which
+will control how the mathjax library renders thing. This could be very useful
+for template builders that want to adjust look and feel of the math.
+See README for more details.
+"""
+
+from pelican import signals
+from pelican import contents
+import re, os
+
+# Global Variables
+_TYPOGRIFY = False  # used to determine if we should process typogrify
+_WRAP_LATEX = None  # the tag to wrap LaTex math in (needed to play nicely with typogrify or for template designers)
+_MATH_REGEX = re.compile(r'(\$\$|\$|\\begin\{(.+?)\}|<(math)(?:\s.*?)?>).*?(\1|\\end\{\2\}|</\3>)', re.DOTALL | re.IGNORECASE) #  used to detect math
+_MATH_SUMMARY_REGEX = None  # used to match math in summary
+_MATH_INCOMPLETE_TAG_REGEX = None  # used to match math that has been cut off in summary
+_MATHJAX_SETTINGS = {}  # settings that can be specified by the user, used to control mathjax script settings
+with open (os.path.dirname(os.path.realpath(__file__))+'/mathjax_script.txt', 'r') as mathjax_script:  # Read the mathjax javascript from file
+    _MATHJAX_SCRIPT=mathjax_script.read()
+
+# Python standard library for binary search, namely bisect is cool but I need
+# specific business logic to evaluate my search predicate, so I am using my
+# own version
+def binary_search(match_tuple, ignore_within):
+    """Determines if t is within tupleList. Using the fact that tupleList is
+    ordered, binary search can be performed which is O(logn)
+    """
+
+    ignore = False
+    if ignore_within == []:
+        return False
+
+    lo = 0
+    hi = len(ignore_within)-1
+
+    # Find first value in array where predicate is False
+    # predicate function: tupleList[mid][0] < t[index]
+    while lo < hi:
+        mid = lo + (hi-lo+1)/2
+        if ignore_within[mid][0] < match_tuple[0]:
+            lo = mid
+        else:
+            hi = mid-1
+
+    if lo >= 0 and lo <= len(ignore_within)-1:
+        ignore = (ignore_within[lo][0] <= match_tuple[0] and ignore_within[lo][1] >= match_tuple[1])
+
+    return ignore
+
+
+def ignore_content(content):
+    """Creates a list of match span tuples for which content should be ignored
+    e.g. <pre> and <code> tags
+    """
+    ignore_within = []
+
+    # used to detect all <pre> and <code> tags. NOTE: Alter this regex should
+    # additional tags need to be ignored
+    ignore_regex = re.compile(r'<(pre|code)(?:\s.*?)?>.*?</(\1)>', re.DOTALL | re.IGNORECASE)
+
+    for match in ignore_regex.finditer(content):
+        ignore_within.append(match.span())
+
+    return ignore_within
+
+
+def wrap_math(content, ignore_within):
+    """Wraps math in user specified tags.
+
+    This is needed for typogrify to play nicely with math but it can also be
+    styled by template providers
+    """
+
+    wrap_math.found_math = False
+
+    def math_tag_wrap(match):
+        """function for use in re.sub"""
+
+        # determine if the tags are within <pre> and <code> blocks
+        ignore = binary_search(match.span(1), ignore_within) or binary_search(match.span(4), ignore_within)
+
+        if ignore or match.group(3) == 'math':
+            if match.group(3) == 'math':
+                # Will detect mml, but not wrap anything around it
+                wrap_math.found_math = True
+
+            return match.group(0)
+        else:
+            wrap_math.found_math = True
+            return '<%s>%s</%s>' % (_WRAP_LATEX, match.group(0), _WRAP_LATEX)
+
+    return (_MATH_REGEX.sub(math_tag_wrap, content), wrap_math.found_math)
+
+
+def process_summary(instance, ignore_within):
+    """Summaries need special care. If Latex is cut off, it must be restored.
+
+    In addition, the mathjax script must be included if necessary thereby
+    making it independent to the template
+    """
+
+    process_summary.altered_summary = False
+    insert_mathjax = False
+    end_tag = '</%s>' % _WRAP_LATEX if _WRAP_LATEX != None else ''
+
+    # use content's _get_summary method to obtain summary
+    summary = instance._get_summary()
+
+    # Determine if there is any math in the summary which are not within the
+    # ignore_within tags
+    math_item = None
+    for math_item in _MATH_SUMMARY_REGEX.finditer(summary):
+        ignore = binary_search(math_item.span(2), ignore_within)
+        if '...' not in math_item.group(5):
+            ignore = ignore or binary_search(math_item.span(5), ignore_within)
+        else:
+            ignore = ignore or binary_search(math_item.span(6), ignore_within)
+
+        if ignore:
+            math_item = None # In <code> or <pre> tags, so ignore
+        else:
+            insert_mathjax = True
+
+    # Repair the math if it was cut off math_item will be the final math
+    # code  matched that is not within <pre> or <code> tags
+    if math_item and '...' in math_item.group(5):
+        if math_item.group(3) is not None:
+            end = r'\end{%s}' % math_item.group(3)
+        elif math_item.group(4) is not None:
+            end = r'</math>'
+        elif math_item.group(2) is not None:
+            end = math_item.group(2)
+
+        search_regex = r'%s(%s.*?%s)' % (re.escape(instance._content[0:math_item.start(1)]), re.escape(math_item.group(1)), re.escape(end))
+        math_match = re.search(search_regex, instance._content, re.DOTALL | re.IGNORECASE)
+
+        if math_match:
+            new_summary = summary.replace(math_item.group(0), math_match.group(1)+'%s ...' % end_tag)
+
+            if new_summary != summary:
+                if _MATHJAX_SETTINGS['auto_insert']:
+                    return new_summary+_MATHJAX_SCRIPT.format(**_MATHJAX_SETTINGS)
+                else:
+                    instance.mathjax = True
+                    return new_summary
+
+    def incomplete_end_latex_tag(match):
+        """function for use in re.sub"""
+        if binary_search(match.span(3), ignore_within):
+            return match.group(0)
+
+        process_summary.altered_summary = True
+        return match.group(1) + match.group(4)
+
+    # check for partial math tags at end. These must be removed
+
+    summary = _MATH_INCOMPLETE_TAG_REGEX.sub(incomplete_end_latex_tag, summary)
+
+    if process_summary.altered_summary or insert_mathjax:
+        if insert_mathjax:
+            if _MATHJAX_SETTINGS['auto_insert']:
+                summary+= _MATHJAX_SCRIPT.format(**_MATHJAX_SETTINGS)
+            else:
+                instance.mathjax = True
+
+        return summary
+
+    return None  # Making it explicit that summary was not altered
+
+def process_settings(settings):
+    """Sets user specified MathJax settings (see README for more details)"""
+
+    global _MATHJAX_SETTINGS
+
+    # NOTE TO FUTURE DEVELOPERS: Look at the README and what is happening in
+    # this function if any additional changes to the mathjax settings need to
+    # be incorporated. Also, please inline comment what the variables
+    # will be used for
+
+    # Default settings
+    _MATHJAX_SETTINGS['align'] = 'center'  # controls alignment of of displayed equations (values can be: left, right, center)
+    _MATHJAX_SETTINGS['indent'] = '0em'  # if above is not set to 'center', then this setting acts as an indent
+    _MATHJAX_SETTINGS['show_menu'] = 'true'  # controls whether to attach mathjax contextual menu
+    _MATHJAX_SETTINGS['process_escapes'] = 'true'  # controls whether escapes are processed
+    _MATHJAX_SETTINGS['latex_preview'] = 'TeX'  # controls what user sees while waiting for LaTex to render
+    _MATHJAX_SETTINGS['color'] = 'black'  # controls color math is rendered in
+
+    # This next setting controls whether the mathjax script should be automatically
+    # inserted into the content. The mathjax script will not be inserted into
+    # the content if no math is detected. For summaries that are present in the
+    # index listings, mathjax script will also be automatically inserted.
+    # Setting this value to false means the template must be altered if this
+    # plugin is to work, and so it is only recommended for the template
+    # designer who wants maximum control.
+    _MATHJAX_SETTINGS['auto_insert'] = True # controls whether mathjax script is automatically inserted into the content
+
+    if not isinstance(settings, dict):
+        return
+
+    # The following mathjax settings can be set via the settings dictionary
+    # Iterate over dictionary in a way that is compatible with both version 2
+    # and 3 of python
+    for key, value in ((key, settings[key]) for key in settings):
+        if key == 'auto_insert' and isinstance(value, bool):
+            _MATHJAX_SETTINGS[key] = value
+
+        if key == 'align' and isinstance(value, str):
+            if value == 'left' or value == 'right' or value == 'center':
+                _MATHJAX_SETTINGS[key] = value
+            else:
+                _MATHJAX_SETTINGS[key] = 'center'
+
+        if key == 'indent':
+            _MATHJAX_SETTINGS[key] = value
+
+        if key == 'show_menu' and isinstance(value, bool):
+            _MATHJAX_SETTINGS[key] = 'true' if value else 'false'
+
+        if key == 'process_escapes' and isinstance(value, bool):
+            _MATHJAX_SETTINGS[key] = 'true' if value else 'false'
+
+        if key == 'latex_preview' and isinstance(value, str):
+            _MATHJAX_SETTINGS[key] = value
+
+        if key == 'color' and isinstance(value, str):
+            _MATHJAX_SETTINGS[key] = value
+
+
+def process_content(instance):
+    """Processes content, with logic to ensure that typogrify does not clash
+    with math.
+
+    In addition, mathjax script is inserted at the end of the content thereby
+    making it independent of the template
+    """
+
+    if not instance._content:
+        return
+
+    ignore_within = ignore_content(instance._content)
+
+    if _WRAP_LATEX:
+        instance._content, math = wrap_math(instance._content, ignore_within)
+    else:
+        math = True if _MATH_REGEX.search(instance._content) else False
+
+    # The user initially set typogrify to be True, but since it would clash
+    # with math, we set it to False. This means that the default reader will
+    # not call typogrify, so it is called here, where we are able to control
+    # logic for it ignore math if necessary
+    if _TYPOGRIFY:
+        # Tell typogrify to ignore the tags that math has been wrapped in
+        # also, typogrify must always ignore mml (math) tags
+        ignore_tags = [_WRAP_LATEX,'math'] if _WRAP_LATEX else ['math']
+
+        # Exact copy of the logic as found in the default reader
+        from typogrify.filters import typogrify
+        instance._content = typogrify(instance._content, ignore_tags)
+        instance.metadata['title'] = typogrify(instance.metadata['title'], ignore_tags)
+
+    if math:
+        if _MATHJAX_SETTINGS['auto_insert']:
+            # Mathjax script added to content automatically. Now it
+            # does not need to be explicitly added to the template
+            instance._content += _MATHJAX_SCRIPT.format(**_MATHJAX_SETTINGS)
+        else:
+            # Place the burden on ensuring mathjax script is available to
+            # browser on the template designer (see README for more details)
+            instance.mathjax = True
+
+        # The summary needs special care because math math cannot just be cut
+        # off
+        summary = process_summary(instance, ignore_within)
+        if summary != None:
+            instance._summary = summary
+
+
+def pelican_init(pelicanobj):
+    """Intialializes certain global variables and sets typogogrify setting to
+    False should it be set to True.
+    """
+
+    global _TYPOGRIFY
+    global _WRAP_LATEX
+    global _MATH_SUMMARY_REGEX
+    global _MATH_INCOMPLETE_TAG_REGEX
+
+    try:
+        settings = pelicanobj.settings['MATH']
+    except:
+        settings = None
+
+    process_settings(settings)
+
+    # Allows mathjax script to be accessed from template should it be needed
+    pelicanobj.settings['MATHJAXSCRIPT'] = _MATHJAX_SCRIPT.format(**_MATHJAX_SETTINGS)
+
+    # If typogrify set to True, then we need to handle it manually so it does
+    # not conflict with Latex
+    try:
+        if pelicanobj.settings['TYPOGRIFY'] == True:
+            pelicanobj.settings['TYPOGRIFY'] = False
+            _WRAP_LATEX = 'mathjax' # default to wrap mathjax content inside of
+            _TYPOGRIFY = True
+    except KeyError:
+        pass
+
+    # Set _WRAP_LATEX to the settings tag if defined. The idea behind this is
+    # to give template designers control over how math would be rendered
+    try:
+        if pelicanobj.settings['MATH']['wrap_latex']:
+            _WRAP_LATEX = pelicanobj.settings['MATH']['wrap_latex']
+    except (KeyError, TypeError):
+        pass
+
+    # regular expressions that depend on _WRAP_LATEX are set here
+    tag_start= r'<%s>' % _WRAP_LATEX if not _WRAP_LATEX is None else ''
+    tag_end = r'</%s>' % _WRAP_LATEX if not _WRAP_LATEX is None else ''
+    math_summary_regex = r'((\$\$|\$|\\begin\{(.+?)\}|<(math)(?:\s.*?)?>).+?)(\2|\\end\{\3\}|</\4>|\s?\.\.\.)(%s|</\4>)?' % tag_end
+
+    # NOTE: The logic in _get_summary will handle <math> correctly because it
+    # is perceived as an html tag. Therefore we are only interested in handling
+    # non mml (i.e. LaTex)
+    incomplete_end_latex_tag = r'(.*)(%s)(\\\S*?|\$)\s*?(\s?\.\.\.)(%s)?$' % (tag_start, tag_end)
+
+    _MATH_SUMMARY_REGEX = re.compile(math_summary_regex, re.DOTALL | re.IGNORECASE)
+    _MATH_INCOMPLETE_TAG_REGEX = re.compile(incomplete_end_latex_tag, re.DOTALL | re.IGNORECASE)
+
+
+def register():
+    """Plugin registration"""
+
+    signals.initialized.connect(pelican_init)
+    signals.content_object_init.connect(process_content)