Commit 59ab76d7 authored by Jeremy BLEYER's avatar Jeremy BLEYER

Added pylit

parent 8723b356
......@@ -27,8 +27,7 @@ def process():
# Check that we can find pylint.py for converting foo.py.rst to
# foo.py
# pylit_parser = "/home/bleyerj/Python/pylit/pylit.py"
pylit_parser = "/opt/fenics/dolfin/utils/pylit/pylit.py"
pylit_parser = "../utils/pylit/pylit.py"
if not os.path.isfile(pylit_parser):
raise RuntimeError("Cannot find pylit.py")
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylit.py
# ********
# Literate programming with reStructuredText
# ++++++++++++++++++++++++++++++++++++++++++
#
# :Date: $Date$
# :Revision: $Revision$
# :URL: $URL$
# :Copyright: © 2005, 2007 Günter Milde.
# Released without warranty under the terms of the
# GNU General Public License (v. 2 or later)
#
# ::
from __future__ import print_function
"""pylit: bidirectional text <-> code converter
Covert between a *text source* with embedded computer code and a *code source*
with embedded documentation.
"""
# .. contents::
#
# Frontmatter
# ===========
#
# Changelog
# ---------
#
# .. class:: borderless
#
# ====== ========== ===========================================================
# 0.1 2005-06-29 Initial version.
# 0.1.1 2005-06-30 First literate version.
# 0.1.2 2005-07-01 Object orientated script using generators.
# 0.1.3 2005-07-10 Two state machine (later added 'header' state).
# 0.2b 2006-12-04 Start of work on version 0.2 (code restructuring).
# 0.2 2007-01-23 Published at http://pylit.berlios.de.
# 0.2.1 2007-01-25 Outsourced non-core documentation to the PyLit pages.
# 0.2.2 2007-01-26 New behaviour of `diff` function.
# 0.2.3 2007-01-29 New `header` methods after suggestion by Riccardo Murri.
# 0.2.4 2007-01-31 Raise Error if code indent is too small.
# 0.2.5 2007-02-05 New command line option --comment-string.
# 0.2.6 2007-02-09 Add section with open questions,
# Code2Text: let only blank lines (no comment str)
# separate text and code,
# fix `Code2Text.header`.
# 0.2.7 2007-02-19 Simplify `Code2Text.header`,
# new `iter_strip` method replacing a lot of ``if``-s.
# 0.2.8 2007-02-22 Set `mtime` of outfile to the one of infile.
# 0.3 2007-02-27 New `Code2Text` converter after an idea by Riccardo Murri,
# explicit `option_defaults` dict for easier customisation.
# 0.3.1 2007-03-02 Expand hard-tabs to prevent errors in indentation,
# `Text2Code` now also works on blocks,
# removed dependency on SimpleStates module.
# 0.3.2 2007-03-06 Bug fix: do not set `language` in `option_defaults`
# renamed `code_languages` to `languages`.
# 0.3.3 2007-03-16 New language css,
# option_defaults -> defaults = optparse.Values(),
# simpler PylitOptions: don't store parsed values,
# don't parse at initialisation,
# OptionValues: return `None` for non-existing attributes,
# removed -infile and -outfile, use positional arguments.
# 0.3.4 2007-03-19 Documentation update,
# separate `execute` function.
# 2007-03-21 Code cleanup in `Text2Code.__iter__`.
# 0.3.5 2007-03-23 Removed "css" from known languages after learning that
# there is no C++ style "// " comment string in CSS2.
# 0.3.6 2007-04-24 Documentation update.
# 0.4 2007-05-18 Implement Converter.__iter__ as stack of iterator
# generators. Iterating over a converter instance now
# yields lines instead of blocks.
# Provide "hooks" for pre- and postprocessing filters.
# Rename states to reduce confusion with formats:
# "text" -> "documentation", "code" -> "code_block".
# 0.4.1 2007-05-22 Converter.__iter__: cleanup and reorganisation,
# rename parent class Converter -> TextCodeConverter.
# 0.4.2 2007-05-23 Merged Text2Code.converter and Code2Text.converter into
# TextCodeConverter.converter.
# 0.4.3 2007-05-30 Replaced use of defaults.code_extensions with
# values.languages.keys().
# Removed spurious `print` statement in code_block_handler.
# Added basic support for 'c' and 'css' languages
# with `dumb_c_preprocessor`_ and `dumb_c_postprocessor`_.
# 0.5 2007-06-06 Moved `collect_blocks`_ out of `TextCodeConverter`_,
# bug fix: collect all trailing blank lines into a block.
# Expand tabs with `expandtabs_filter`_.
# 0.6 2007-06-20 Configurable code-block marker (default ``::``)
# 0.6.1 2007-06-28 Bug fix: reset self.code_block_marker_missing.
# 0.7 2007-12-12 prepending an empty string to sys.path in run_doctest()
# to allow imports from the current working dir.
# 0.7.1 2008-01-07 If outfile does not exist, do a round-trip conversion
# and report differences (as with outfile=='-').
# 0.7.2 2008-01-28 Do not add missing code-block separators with
# `doctest_run` on the code source. Keeps lines consistent.
# 0.7.3 2008-04-07 Use value of code_block_marker for insertion of missing
# transition marker in Code2Text.code_block_handler
# Add "shell" to defaults.languages
# 0.7.4 2008-06-23 Add "latex" to defaults.languages
# 0.7.5 2009-05-14 Bugfix: ignore blank lines in test for end of code block
# 0.7.6 2009-12-15 language-dependent code-block markers (after a
# `feature request and patch by jrioux`_),
# use DefaultDict for language-dependent defaults,
# new defaults setting `add_missing_marker`_.
# 0.7.7 2010-06-23 New command line option --codeindent.
# 0.7.8 2011-03-30 bugfix: do not overwrite custom `add_missing_marker` value,
# allow directive options following the 'code' directive.
# 0.7.9 2011-04-05 Decode doctest string if 'magic comment' gives encoding.
# 3.1 2013-09-16 Change to Python3: print statement and exec statements
# removed. String formatting % changed to .format().
# Replaced raise statements and except statements.
# Upgrade for ``dict`` method changes.
# Cleanup ``DefaultDict`` to be more easily replaced.
# Adjust trailing space handling to match unit tests better.
# Provide ``with`` statements for all file contexts.
# ====== ========== ===========================================================
#
# To Do List
# ----------
#
# * Replace home-brewed DefaultDict with collections.defaultdict.
#
# * Replace optparse with argparse.
#
# `Documentation/library/argparse.html#upgrading-optparse-code <file:///Library/Frameworks/Python.framework/Versions/3.2/Resources/English.lproj/Documentation/library/argparse.html#upgrading-optparse-code>`_
#
# ::
_version = "3.1"
__docformat__ = 'restructuredtext'
# Introduction
# ------------
#
# PyLit is a bidirectional converter between two formats of a computer
# program source:
#
# * a (reStructured) text document with program code embedded in
# *code blocks*, and
# * a compilable (or executable) code source with *documentation*
# embedded in comment blocks
#
#
# Requirements
# ------------
#
# ::
import os, sys
import re
import contextlib
import warnings
#from collections import defaultdict # TODO
import optparse
#import argparse # TODO
# DefaultDict
# ~~~~~~~~~~~
# As `collections.defaultdict` is only introduced in Python 2.5, we
# define a Python3 compatible version of the dictionary with default from
# http://code.activestate.com/recipes/389639/
#
# ::
class DefaultDict(dict):
"""Minimalistic Dictionary with default value."""
def __init__(self, default_factory=lambda:None, *args, **kwargs):
self.update(dict(*args, **kwargs))
self.default_factory = default_factory
def __getitem__(self, key):
return self.get(key, self.default_factory() )
# TODO: Replace with proper Python3 ``collections.defaultdict``
# Defaults
# ========
#
# The `defaults` object provides a central repository for default
# values and their customisation. ::
defaults = optparse.Values()
# It is used for
#
# * the initialisation of data arguments in TextCodeConverter_ and
# PylitOptions_
#
# * completion of command line options in `PylitOptions.complete_values`_.
#
# This allows the easy creation of back-ends that customise the
# defaults and then call `main`_ e.g.:
#
# >>> import pylit
# >>> pylit.defaults.comment_string = "## "
# >>> pylit.defaults.codeindent = 4
# >>> pylit.main()
#
# The following default values are defined in pylit.py:
#
# languages
# ---------
#
# Mapping of code file extensions to code language::
defaults.languages = DefaultDict(lambda:"python", # fallback language
{".c": "c",
".cc": "c++",
".css": "css",
".py": "python",
".sh": "shell",
".sl": "slang",
".sty": "latex",
".tex": "latex"
})
# Will be overridden by the ``--language`` command line option.
#
# The first argument is the fallback language, used if there is no
# matching extension (e.g. if pylit is used as filter) and no
# ``--language`` is specified. It can be changed programmatically by
# assignment to the ``.default`` attribute, e.g.
#
# >>> defaults.languages.default='c++'
#
#
# .. _text_extension:
#
# text_extensions
# ---------------
#
# List of known extensions of (reStructured) text files. The first
# extension in this list is used by the `_get_outfile_name`_ method to
# generate a text output filename::
defaults.text_extensions = [".txt", ".rst"]
# fs
# ---------------
#
# Comment strings for known languages. Used in Code2Text_ to recognise
# text blocks and in Text2Code_ to format text blocks as comments.
# Defaults to ``'# '``.
#
# **Comment strings include trailing whitespace.** ::
defaults.comment_strings = DefaultDict(lambda:'# ',
{"css": '// ',
"c": '// ',
"c++": '// ',
"latex": '% ',
"python": '# ',
"shell": '# ',
"slang": '% '
})
# header_string
# -------------
#
# Marker string for a header code block in the text source. No trailing
# whitespace needed as indented code follows.
# Must be a valid rst directive that accepts code on the same line, e.g.
# ``'..admonition::'``.
#
# Default is a comment marker::
defaults.header_string = '..'
# .. _code_block_marker:
#
# code_block_markers
# ------------------
#
# Markup at the end of a documentation block.
# Default is Docutils' marker for a `literal block`_::
defaults.code_block_markers = DefaultDict(lambda:'::')
# The `code_block_marker` string is `inserted into a regular expression`_.
# Language-specific markers can be defined programmatically, e.g. in a
# wrapper script.
#
# In a document where code examples are only one of several uses of
# literal blocks, it is more appropriate to single out the source code
# ,e.g. with the double colon at a separate line ("expanded form")
#
# ``defaults.code_block_marker.default = ':: *'``
#
# or a dedicated ``.. code-block::`` directive [#]_
#
# ``defaults.code_block_marker['c++'] = '.. code-block:: *c++'``
#
# The latter form also allows code in different languages kept together
# in one literate source file.
#
# .. [#] The ``.. code-block::`` directive is not (yet) supported by
# standard Docutils. It is provided by several add-ons, including
# the `code-block directive`_ project in the Docutils Sandbox and
# Sphinx_.
#
#
# strip
# -----
#
# Export to the output format stripping documentation or code blocks::
defaults.strip = False
# strip_marker
# ------------
#
# Strip literal marker from the end of documentation blocks when
# converting to code format. Makes the code more concise but looses the
# synchronisation of line numbers in text and code formats. Can also be used
# (together with the auto-completion of the code-text conversion) to change
# the `code_block_marker`::
defaults.strip_marker = False
# add_missing_marker
# ------------------
#
# When converting from code format to text format, add a `code_block_marker`
# at the end of documentation blocks if it is missing::
defaults.add_missing_marker = True
# Keep this at ``True``, if you want to re-convert to code format later!
#
#
# .. _defaults.preprocessors:
#
# preprocessors
# -------------
#
# Preprocess the data with language-specific filters_
# Set below in Filters_::
defaults.preprocessors = {}
# .. _defaults.postprocessors:
#
# postprocessors
# --------------
#
# Postprocess the data with language-specific filters_::
defaults.postprocessors = {}
# .. _defaults.codeindent:
#
# codeindent
# ----------
#
# Number of spaces to indent code blocks in `Code2Text.code_block_handler`_::
defaults.codeindent = 2
# In `Text2Code.code_block_handler`_, the codeindent is determined by the
# first recognised code line (header or first indented literal block
# of the text source).
#
# overwrite
# ---------
#
# What to do if the outfile already exists? (ignored if `outfile` == '-')::
defaults.overwrite = 'update'
# Recognised values:
#
# :'yes': overwrite eventually existing `outfile`,
# :'update': fail if the `outfile` is newer than `infile`,
# :'no': fail if `outfile` exists.
#
#
# Extensions
# ==========
#
# Try to import optional extensions::
try:
import pylit_elisp
except ImportError:
pass
# Converter Classes
# =================
#
# The converter classes implement a simple state machine to separate and
# transform documentation and code blocks. For this task, only a very limited
# parsing is needed. PyLit's parser assumes:
#
# * `indented literal blocks`_ in a text source are code blocks.
#
# * comment blocks in a code source where every line starts with a matching
# comment string are documentation blocks.
#
# TextCodeConverter
# -----------------
# ::
class TextCodeConverter:
"""Parent class for the converters `Text2Code` and `Code2Text`.
"""
# The parent class defines data attributes and functions used in both
# `Text2Code`_ converting a text source to executable code source, and
# `Code2Text`_ converting commented code to a text source.
#
# Data attributes
# ~~~~~~~~~~~~~~~
#
# Class default values are fetched from the `defaults`_ object and can be
# overridden by matching keyword arguments during class instantiation. This
# also works with keyword arguments to `get_converter`_ and `main`_, as these
# functions pass on unused keyword args to the instantiation of a converter
# class. ::
language = defaults.languages.default_factory()
comment_strings = defaults.comment_strings
comment_string = "" # set in __init__ (if empty)
codeindent = defaults.codeindent
header_string = defaults.header_string
code_block_markers = defaults.code_block_markers
code_block_marker = "" # set in __init__ (if empty)
strip = defaults.strip
strip_marker = defaults.strip_marker
add_missing_marker = defaults.add_missing_marker
directive_option_regexp = re.compile(r' +:(\w|[-._+:])+:( |$)')
state = "" # type of current block, see `TextCodeConverter.convert`_
# Interface methods
# ~~~~~~~~~~~~~~~~~
#
# .. _TextCodeConverter.__init__:
#
# __init__
# """"""""
#
# Initialising sets the `data` attribute, an iterable object yielding lines of
# the source to convert. [#]_
#
# .. [#] The most common choice of data is a `file` object with the text
# or code source.
#
# To convert a string into a suitable object, use its splitlines method
# like ``"2 lines\nof source".splitlines(True)``.
#
#
# Additional keyword arguments are stored as instance variables,
# overwriting the class defaults::
def __init__(self, data, **keyw):
"""data -- iterable data object
(list, file, generator, string, ...)
**keyw -- remaining keyword arguments are
stored as data-attributes
"""
self.data = data
self.__dict__.update(keyw)
# If empty, `code_block_marker` and `comment_string` are set according
# to the `language`::
if not self.code_block_marker:
self.code_block_marker = self.code_block_markers[self.language]
if not self.comment_string:
self.comment_string = self.comment_strings[self.language]
self.stripped_comment_string = self.comment_string.rstrip()
# Pre- and postprocessing filters are set (with
# `TextCodeConverter.get_filter`_)::
self.preprocessor = self.get_filter("preprocessors", self.language)
self.postprocessor = self.get_filter("postprocessors", self.language)
# .. _inserted into a regular expression:
#
# Finally, a regular_expression for the `code_block_marker` is compiled
# to find valid cases of `code_block_marker` in a given line and return
# the groups: ``\1 prefix, \2 code_block_marker, \3 remainder`` ::
marker = self.code_block_marker
if marker == '::':
# the default marker may occur at the end of a text line
self.marker_regexp = re.compile('^( *(?!\.\.).*)(::)([ \n]*)$')
else:
# marker must be on a separate line
self.marker_regexp = re.compile('^( *)(%s)(.*\n?)$' % marker)
# .. _TextCodeConverter.__iter__:
#
# __iter__
# """"""""
#
# Return an iterator for the instance. Iteration yields lines of converted
# data.
#
# The iterator is a chain of iterators acting on `self.data` that does
#
# * preprocessing
# * text<->code format conversion
# * postprocessing
#
# Pre- and postprocessing are only performed if filters for the current
# language are registered in `defaults.preprocessors`_ and|or
# `defaults.postprocessors`_. The filters must accept an iterable as first
# argument and yield the processed input data line-wise.
# ::
def __iter__(self):
"""Iterate over input data source and yield converted lines
"""
return self.postprocessor(self.convert(self.preprocessor(self.data)))
# .. _TextCodeConverter.__call__:
#
# __call__
# """"""""
# The special `__call__` method allows the use of class instances as callable
# objects. It returns the converted data as list of lines::
def __call__(self):
"""Iterate over state-machine and return results as list of lines"""
return [line for line in self]
# .. _TextCodeConverter.__str__:
#
# __str__
# """""""
# Return converted data as string::
def __str__(self):
return "".join(self())
# Helpers and convenience methods
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# .. _TextCodeConverter.convert:
#
# convert
# """""""
#
# The `convert` method generates an iterator that does the actual code <-->
# text format conversion. The converted data is yielded line-wise and the
# instance's `status` argument indicates whether the current line is "header",
# "documentation", or "code_block"::
def convert(self, lines):
"""Iterate over lines of a program document and convert
between "text" and "code" format
"""
# Initialise internal data arguments. (Done here, so that every new iteration
# re-initialises them.)
#
# `state`
# the "type" of the currently processed block of lines. One of
#
# :"": initial state: check for header,
# :"header": leading code block: strip `header_string`,
# :"documentation": documentation part: comment out,
# :"code_block": literal blocks containing source code: unindent.
#
# ::
self.state = ""
# `_codeindent`
# * Do not confuse the internal attribute `_codeindent` with the configurable
# `codeindent` (without the leading underscore).
# * `_codeindent` is set in `Text2Code.code_block_handler`_ to the indent of
# first non-blank "code_block" line and stripped from all "code_block" lines
# in the text-to-code conversion,
# * `codeindent` is set in `__init__` to `defaults.codeindent`_ and added to
# "code_block" lines in the code-to-text conversion.
#
# ::
self._codeindent = 0
# `_textindent`
# * set by `Text2Code.documentation_handler`_ to the minimal indent of a
# documentation block,
# * used in `Text2Code.set_state`_ to find the end of a code block.
#
# ::
self._textindent = 0
# `_add_code_block_marker`
# If the last paragraph of a documentation block does not end with a
# code_block_marker_, it should be added (otherwise, the back-conversion
# fails.).
#
# `_add_code_block_marker` is set by `Code2Text.documentation_handler`_
# and evaluated by `Code2Text.code_block_handler`_, because the
# documentation_handler does not know whether the next block will be
# documentation (with no need for a code_block_marker) or a code block.
#
# ::
self._add_code_block_marker = False
# Determine the state of the block and convert with the matching "handler"::
for block in collect_blocks(expandtabs_filter(lines)):
self.set_state(block)
for line in getattr(self, self.state+"_handler")(block):
yield line
# .. _TextCodeConverter.get_filter:
#
# get_filter
# """"""""""
# ::
def get_filter(self, filter_set, language):
"""Return language specific filter"""