Logo AND Algorithmique Numérique Distribuée

Public GIT Repository
include a copy of javasphinx so that we can fix it on need
[simgrid.git] / docs / source / _ext / javasphinx / javasphinx / htmlrst.py
diff --git a/docs/source/_ext/javasphinx/javasphinx/htmlrst.py b/docs/source/_ext/javasphinx/javasphinx/htmlrst.py
new file mode 100644 (file)
index 0000000..b34f1f2
--- /dev/null
@@ -0,0 +1,419 @@
+#
+# Copyright 2013-2015 Bronto Software, Inc. and contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import unicode_literals
+from builtins import str
+
+import collections
+import re
+
+from xml.sax.saxutils import escape as html_escape
+from bs4 import BeautifulSoup
+
+Cell = collections.namedtuple('Cell', ['type', 'rowspan', 'colspan', 'contents'])
+
+class Converter(object):
+    def __init__(self, parser):
+        self._unknown_tags = set()
+        self._clear = '\n\n..\n\n'
+
+        # Regular expressions
+        self._preprocess_anchors = re.compile(r'<a\s+name\s*=\s*["\']?(.+?)["\']?\s*>')
+        self._post_process_empty_lines = re.compile(r'^\s+$', re.MULTILINE)
+        self._post_process_compress_lines = re.compile(r'\n{3,}')
+        self._whitespace_with_newline = re.compile(r'[\s\n]+')
+        self._whitespace = re.compile(r'\s+')
+        self._html_tag = re.compile(r'<.*?>')
+
+        self._preprocess_entity = re.compile(r'&(nbsp|lt|gt|amp)([^;]|[\n])')
+        self._parser = parser
+
+    # --------------------------------------------------------------------------
+    # ---- reST Utility Methods ----
+
+    def _unicode(self, s):
+        if isinstance(s, unicode):
+            return s
+        else:
+            return unicode(s, 'utf8')
+
+    def _separate(self, s):
+        return u'\n\n' + s + u'\n\n'
+
+    def _escape_inline(self, s):
+        return '\\ ' + s + '\\ '
+
+    def _inline(self, tag, s):
+        # Seems fishy if our inline markup spans lines. We will instead just return
+        # the string as is
+        if '\n' in s:
+            return s
+
+        s = s.strip()
+
+        if not s:
+            return s
+
+        return self._escape_inline(tag + s.strip() + tag)
+
+    def _role(self, role, s, label=None):
+        if label:
+            return self._escape_inline(':%s:`%s <%s>`' % (role, label, s))
+        else:
+            return self._escape_inline(':%s:`%s`' % (role, s))
+
+    def _directive(self, directive, body=None):
+        header = '\n\n.. %s::\n\n' % (directive,)
+
+        if body:
+            return header + self._left_justify(body, 3) + '\n\n'
+        else:
+            return header + '\n'
+
+    def _hyperlink(self, target, label):
+        return self._escape_inline('`%s <%s>`_' % (label, target))
+
+    def _listing(self, marker, items):
+        items = [self._left_justify(item, len(marker) + 1) for item in items]
+        items = [marker + item[len(marker):] for item in items]
+        return self._separate('..') + self._separate('\n'.join(items))
+
+    def _left_justify(self, s, indent=0):
+        lines = [l.rstrip() for l in s.split('\n')]
+        indents = [len(l) - len(l.lstrip()) for l in lines if l]
+
+        if not indents:
+            return s
+
+        shift = indent - min(indents)
+
+        if shift < 0:
+            return '\n'.join(l[-shift:] for l in lines)
+        else:
+            prefix = ' ' * shift
+            return '\n'.join(prefix + l for l in lines)
+
+    def _compress_whitespace(self, s, replace=' ', newlines=True):
+        if newlines:
+            return self._whitespace_with_newline.sub(replace, s)
+        else:
+            return self._whitespace.sub(replace, s)
+
+    # --------------------------------------------------------------------------
+    # ---- DOM Tree Processing ----
+
+    def _process_table_cells(self, table):
+        """ Compile all the table cells.
+
+        Returns a list of rows. The rows may have different lengths because of
+        column spans.
+
+        """
+
+        rows = []
+
+        for i, tr in enumerate(table.find_all('tr')):
+            row = []
+
+            for c in tr.contents:
+                cell_type = getattr(c, 'name', None)
+
+                if cell_type not in ('td', 'th'):
+                    continue
+
+                rowspan = int(c.attrs.get('rowspan', 1))
+                colspan = int(c.attrs.get('colspan', 1))
+                contents = self._process_children(c).strip()
+
+                if cell_type == 'th' and i > 0:
+                    contents = self._inline('**', contents)
+
+                row.append(Cell(cell_type, rowspan, colspan, contents))
+
+            rows.append(row)
+
+        return rows
+
+    def _process_table(self, node):
+        rows = self._process_table_cells(node)
+
+        if not rows:
+            return ''
+
+        table_num_columns = max(sum(c.colspan for c in row) for row in rows)
+
+        normalized = []
+
+        for row in rows:
+            row_num_columns = sum(c.colspan for c in row)
+
+            if row_num_columns < table_num_columns:
+                cell_type = row[-1].type if row else 'td'
+                row.append(Cell(cell_type, 1, table_num_columns - row_num_columns, ''))
+
+        col_widths = [0] * table_num_columns
+        row_heights = [0] * len(rows)
+
+        for i, row in enumerate(rows):
+            j = 0
+            for cell in row:
+                current_w = sum(col_widths[j:j + cell.colspan])
+                required_w = max(len(l) for l in cell.contents.split('\n'))
+
+                if required_w > current_w:
+                    additional = required_w - current_w
+                    col_widths[j] += additional - (cell.colspan - 1) * (additional // cell.colspan)
+                    for jj in range(j + 1, j + cell.colspan):
+                        col_widths[jj] += (additional // cell.colspan)
+
+                current_h = row_heights[i]
+                required_h = len(cell.contents.split('\n'))
+
+                if required_h > current_h:
+                    row_heights[i] = required_h
+
+                j += cell.colspan
+
+        row_sep = '+' + '+'.join('-' * (l + 2) for l in col_widths) + '+'
+        header_sep = '+' + '+'.join('=' * (l + 2) for l in col_widths) + '+'
+        lines = [row_sep]
+
+        for i, row in enumerate(rows):
+            for y in range(0, row_heights[i]):
+                line = []
+                j = 0
+                for c in row:
+                    w = sum(n + 3 for n in col_widths[j:j+c.colspan]) - 2
+                    h = row_heights[i]
+
+                    line.append('| ')
+                    cell_lines = c.contents.split('\n')
+                    content = cell_lines[y] if y < len(cell_lines) else ''
+                    line.append(content.ljust(w))
+
+                    j += c.colspan
+
+                line.append('|')
+                lines.append(''.join(line))
+
+            if i == 0 and all(c.type == 'th' for c in row):
+                lines.append(header_sep)
+            else:
+                lines.append(row_sep)
+
+        return self._separate('\n'.join(lines))
+
+    def _process_children(self, node):
+        parts = []
+        is_newline = False
+
+        for c in node.contents:
+            part = self._process(c)
+
+            if is_newline:
+                part = part.lstrip()
+
+            if part:
+                parts.append(part)
+                is_newline = part.endswith('\n')
+
+        return ''.join(parts)
+
+    def _process_text(self, node):
+        return ''.join(node.strings)
+
+    def _process(self, node):
+        if isinstance(node, str):
+            return self._compress_whitespace(node)
+
+        simple_tags = {
+            'b'      : lambda s: self._inline('**', s),
+            'strong' : lambda s: self._inline('**', s),
+            'i'      : lambda s: self._inline('*', s),
+            'em'     : lambda s: self._inline('*', s),
+            'tt'     : lambda s: self._inline('``', s),
+            'code'   : lambda s: self._inline('``', s),
+            'h1'     : lambda s: self._inline('**', s),
+            'h2'     : lambda s: self._inline('**', s),
+            'h3'     : lambda s: self._inline('**', s),
+            'h4'     : lambda s: self._inline('**', s),
+            'h5'     : lambda s: self._inline('**', s),
+            'h6'     : lambda s: self._inline('**', s),
+            'sub'    : lambda s: self._role('sub', s),
+            'sup'    : lambda s: self._role('sup', s),
+            'hr'     : lambda s: self._separate('') # Transitions not allowed
+            }
+
+        if node.name in simple_tags:
+            return simple_tags[node.name](self._process_text(node))
+
+        if node.name == 'p':
+            return self._separate(self._process_children(node).strip())
+
+        if node.name == 'pre':
+            return self._directive('parsed-literal', self._process_text(node))
+
+        if node.name == 'a':
+            if 'name' in node.attrs:
+                return self._separate('.. _' + node['name'] + ':')
+            elif 'href' in node.attrs:
+                target = node['href']
+                label = self._compress_whitespace(self._process_text(node).strip('\n'))
+
+                if target.startswith('#'):
+                    return self._role('ref', target[1:], label)
+                elif target.startswith('@'):
+                    return self._role('java:ref', target[1:], label)
+                else:
+                    return self._hyperlink(target, label)
+
+        if node.name == 'ul':
+            items = [self._process(n) for n in node.find_all('li', recursive=False)]
+            return self._listing('*', items)
+
+        if node.name == 'ol':
+            items = [self._process(n) for n in node.find_all('li', recursive=False)]
+            return self._listing('#.', items)
+
+        if node.name == 'li':
+            s = self._process_children(node)
+            s = s.strip()
+
+            # If it's multiline clear the end to correcly support nested lists
+            if '\n' in s:
+                s = s + '\n\n'
+
+            return s
+
+        if node.name == 'table':
+            return self._process_table(node)
+
+        self._unknown_tags.add(node.name)
+
+        return self._process_children(node)
+
+    # --------------------------------------------------------------------------
+    # ---- HTML Preprocessing ----
+
+    def _preprocess_inline_javadoc_replace(self, tag, f, s):
+        parts = []
+
+        start = '{@' + tag
+        start_length = len(start)
+
+        i = s.find(start)
+        j = 0
+
+        while i != -1:
+            parts.append(s[j:i])
+
+            # Find a closing bracket such that the brackets are balanced between
+            # them. This is necessary since code examples containing { and } are
+            # commonly wrapped in {@code ...} tags
+
+            try:
+                j = s.find('}', i + start_length) + 1
+                while s.count('{', i, j) != s.count('}', i, j):
+                    j = s.index('}', j) + 1
+            except ValueError:
+                raise ValueError('Unbalanced {} brackets in ' + tag + ' tag')
+
+            parts.append(f(s[i + start_length:j - 1].strip()))
+            i = s.find(start, j)
+
+        parts.append(s[j:])
+
+        return ''.join(parts)
+
+    def _preprocess_replace_javadoc_link(self, s):
+        s = self._compress_whitespace(s)
+
+        target = None
+        label = ''
+
+        if ' ' not in s:
+            target = s
+        else:
+            i = s.find(' ')
+
+            while s.count('(', 0, i) != s.count(')', 0, i):
+                i = s.find(' ', i + 1)
+
+                if i == -1:
+                    i = len(s)
+                    break
+
+            target = s[:i]
+            label = s[i:]
+
+        if target[0] == '#':
+            target = target[1:]
+
+        target = target.replace('#', '.').replace(' ', '').strip()
+
+        # Strip HTML tags from the target
+        target = self._html_tag.sub('', target)
+
+        label = label.strip()
+
+        return '<a href="@%s">%s</a>' % (target, label)
+
+    def _preprocess_close_anchor_tags(self, s):
+        # Add closing tags to all anchors so they are better handled by the parser
+        return self._preprocess_anchors.sub(r'<a name="\1"></a>', s)
+
+    def _preprocess_fix_entities(self, s):
+        return self._preprocess_entity.sub(r'&\1;\2', s)
+
+    def _preprocess(self, s_html):
+        to_tag = lambda t: lambda m: '<%s>%s</%s>' % (t, html_escape(m), t)
+        s_html = self._preprocess_inline_javadoc_replace('code', to_tag('code'), s_html)
+        s_html = self._preprocess_inline_javadoc_replace('literal', to_tag('span'), s_html)
+        s_html = self._preprocess_inline_javadoc_replace('docRoot', lambda m: '', s_html)
+        s_html = self._preprocess_inline_javadoc_replace('linkplain', self._preprocess_replace_javadoc_link, s_html)
+        s_html = self._preprocess_inline_javadoc_replace('link', self._preprocess_replace_javadoc_link, s_html)
+
+        # Make sure all anchor tags are closed
+        s_html = self._preprocess_close_anchor_tags(s_html)
+
+        # Fix up some entitities without closing ;
+        s_html = self._preprocess_fix_entities(s_html)
+
+        return s_html
+
+    # --------------------------------------------------------------------------
+    # ---- Conversion entry point ----
+
+    def convert(self, s_html):
+        if not isinstance(s_html, str):
+            s_html = str(s_html, 'utf8')
+
+        s_html = self._preprocess(s_html)
+
+        if not s_html.strip():
+            return ''
+
+        soup = BeautifulSoup(s_html, self._parser)
+        top = soup.html.body
+
+        result = self._process_children(top)
+
+        # Post processing
+        result = self._post_process_empty_lines.sub('', result)
+        result = self._post_process_compress_lines.sub('\n\n', result)
+        result = result.strip()
+
+        return result