380 lines
12 KiB
Python
380 lines
12 KiB
Python
|
#!/usr/bin/env python3
|
||
|
#
|
||
|
# Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
|
||
|
#
|
||
|
# Extract documentation from C++ header files to use it in Python bindings
|
||
|
#
|
||
|
|
||
|
import os
|
||
|
import sys
|
||
|
import platform
|
||
|
import re
|
||
|
import textwrap
|
||
|
|
||
|
from clang import cindex
|
||
|
from clang.cindex import CursorKind
|
||
|
from collections import OrderedDict
|
||
|
from glob import glob
|
||
|
from threading import Thread, Semaphore
|
||
|
from multiprocessing import cpu_count
|
||
|
|
||
|
RECURSE_LIST = [
|
||
|
CursorKind.TRANSLATION_UNIT,
|
||
|
CursorKind.NAMESPACE,
|
||
|
CursorKind.CLASS_DECL,
|
||
|
CursorKind.STRUCT_DECL,
|
||
|
CursorKind.ENUM_DECL,
|
||
|
CursorKind.CLASS_TEMPLATE
|
||
|
]
|
||
|
|
||
|
PRINT_LIST = [
|
||
|
CursorKind.CLASS_DECL,
|
||
|
CursorKind.STRUCT_DECL,
|
||
|
CursorKind.ENUM_DECL,
|
||
|
CursorKind.ENUM_CONSTANT_DECL,
|
||
|
CursorKind.CLASS_TEMPLATE,
|
||
|
CursorKind.FUNCTION_DECL,
|
||
|
CursorKind.FUNCTION_TEMPLATE,
|
||
|
CursorKind.CONVERSION_FUNCTION,
|
||
|
CursorKind.CXX_METHOD,
|
||
|
CursorKind.CONSTRUCTOR,
|
||
|
CursorKind.FIELD_DECL
|
||
|
]
|
||
|
|
||
|
PREFIX_BLACKLIST = [
|
||
|
CursorKind.TRANSLATION_UNIT
|
||
|
]
|
||
|
|
||
|
CPP_OPERATORS = {
|
||
|
'<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
|
||
|
'+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
|
||
|
'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
|
||
|
'>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
|
||
|
'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
|
||
|
'&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
|
||
|
'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
|
||
|
}
|
||
|
|
||
|
CPP_OPERATORS = OrderedDict(
|
||
|
sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
|
||
|
|
||
|
job_count = cpu_count()
|
||
|
job_semaphore = Semaphore(job_count)
|
||
|
|
||
|
|
||
|
class NoFilenamesError(ValueError):
|
||
|
pass
|
||
|
|
||
|
|
||
|
def d(s):
|
||
|
return s if isinstance(s, str) else s.decode('utf8')
|
||
|
|
||
|
|
||
|
def sanitize_name(name):
|
||
|
name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
|
||
|
for k, v in CPP_OPERATORS.items():
|
||
|
name = name.replace('operator%s' % k, 'operator_%s' % v)
|
||
|
name = re.sub('<.*>', '', name)
|
||
|
name = ''.join([ch if ch.isalnum() else '_' for ch in name])
|
||
|
name = re.sub('_$', '', re.sub('_+', '_', name))
|
||
|
return '__doc_' + name
|
||
|
|
||
|
|
||
|
def process_comment(comment):
|
||
|
result = ''
|
||
|
|
||
|
# Remove C++ comment syntax
|
||
|
leading_spaces = float('inf')
|
||
|
for s in comment.expandtabs(tabsize=4).splitlines():
|
||
|
s = s.strip()
|
||
|
if s.startswith('/*'):
|
||
|
s = s[2:].lstrip('*')
|
||
|
elif s.endswith('*/'):
|
||
|
s = s[:-2].rstrip('*')
|
||
|
elif s.startswith('///'):
|
||
|
s = s[3:]
|
||
|
if s.startswith('*'):
|
||
|
s = s[1:]
|
||
|
if len(s) > 0:
|
||
|
leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
|
||
|
result += s + '\n'
|
||
|
|
||
|
if leading_spaces != float('inf'):
|
||
|
result2 = ""
|
||
|
for s in result.splitlines():
|
||
|
result2 += s[leading_spaces:] + '\n'
|
||
|
result = result2
|
||
|
|
||
|
# Doxygen tags
|
||
|
cpp_group = '([\w:]+)'
|
||
|
param_group = '([\[\w:\]]+)'
|
||
|
|
||
|
s = result
|
||
|
s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
|
||
|
s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
|
||
|
s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
|
||
|
s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
|
||
|
s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
|
||
|
s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
|
||
|
s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
|
||
|
r'\n\n$Parameter ``\2``:\n\n', s)
|
||
|
s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
|
||
|
r'\n\n$Template parameter ``\2``:\n\n', s)
|
||
|
|
||
|
for in_, out_ in {
|
||
|
'return': 'Returns',
|
||
|
'author': 'Author',
|
||
|
'authors': 'Authors',
|
||
|
'copyright': 'Copyright',
|
||
|
'date': 'Date',
|
||
|
'remark': 'Remark',
|
||
|
'sa': 'See also',
|
||
|
'see': 'See also',
|
||
|
'extends': 'Extends',
|
||
|
'throw': 'Throws',
|
||
|
'throws': 'Throws'
|
||
|
}.items():
|
||
|
s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
|
||
|
|
||
|
s = re.sub(r'\\details\s*', r'\n\n', s)
|
||
|
s = re.sub(r'\\brief\s*', r'', s)
|
||
|
s = re.sub(r'\\short\s*', r'', s)
|
||
|
s = re.sub(r'\\ref\s*', r'', s)
|
||
|
|
||
|
s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
|
||
|
r"```\n\1\n```\n", s, flags=re.DOTALL)
|
||
|
|
||
|
# HTML/TeX tags
|
||
|
s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
|
||
|
s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
|
||
|
s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
|
||
|
s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
|
||
|
s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
|
||
|
s = re.sub(r'<li>', r'\n\n* ', s)
|
||
|
s = re.sub(r'</?ul>', r'', s)
|
||
|
s = re.sub(r'</li>', r'\n\n', s)
|
||
|
|
||
|
s = s.replace('``true``', '``True``')
|
||
|
s = s.replace('``false``', '``False``')
|
||
|
|
||
|
# Re-flow text
|
||
|
wrapper = textwrap.TextWrapper()
|
||
|
wrapper.expand_tabs = True
|
||
|
wrapper.replace_whitespace = True
|
||
|
wrapper.drop_whitespace = True
|
||
|
wrapper.width = 70
|
||
|
wrapper.initial_indent = wrapper.subsequent_indent = ''
|
||
|
|
||
|
result = ''
|
||
|
in_code_segment = False
|
||
|
for x in re.split(r'(```)', s):
|
||
|
if x == '```':
|
||
|
if not in_code_segment:
|
||
|
result += '```\n'
|
||
|
else:
|
||
|
result += '\n```\n\n'
|
||
|
in_code_segment = not in_code_segment
|
||
|
elif in_code_segment:
|
||
|
result += x.strip()
|
||
|
else:
|
||
|
for y in re.split(r'(?: *\n *){2,}', x):
|
||
|
wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
|
||
|
if len(wrapped) > 0 and wrapped[0] == '$':
|
||
|
result += wrapped[1:] + '\n'
|
||
|
wrapper.initial_indent = \
|
||
|
wrapper.subsequent_indent = ' ' * 4
|
||
|
else:
|
||
|
if len(wrapped) > 0:
|
||
|
result += wrapped + '\n\n'
|
||
|
wrapper.initial_indent = wrapper.subsequent_indent = ''
|
||
|
return result.rstrip().lstrip('\n')
|
||
|
|
||
|
|
||
|
def extract(filename, node, prefix, output):
|
||
|
if not (node.location.file is None or
|
||
|
os.path.samefile(d(node.location.file.name), filename)):
|
||
|
return 0
|
||
|
if node.kind in RECURSE_LIST:
|
||
|
sub_prefix = prefix
|
||
|
if node.kind not in PREFIX_BLACKLIST:
|
||
|
if len(sub_prefix) > 0:
|
||
|
sub_prefix += '_'
|
||
|
sub_prefix += d(node.spelling)
|
||
|
for i in node.get_children():
|
||
|
extract(filename, i, sub_prefix, output)
|
||
|
if node.kind in PRINT_LIST:
|
||
|
comment = d(node.raw_comment) if node.raw_comment is not None else ''
|
||
|
comment = process_comment(comment)
|
||
|
sub_prefix = prefix
|
||
|
if len(sub_prefix) > 0:
|
||
|
sub_prefix += '_'
|
||
|
if len(node.spelling) > 0:
|
||
|
name = sanitize_name(sub_prefix + d(node.spelling))
|
||
|
output.append((name, filename, comment))
|
||
|
|
||
|
|
||
|
class ExtractionThread(Thread):
|
||
|
def __init__(self, filename, parameters, output):
|
||
|
Thread.__init__(self)
|
||
|
self.filename = filename
|
||
|
self.parameters = parameters
|
||
|
self.output = output
|
||
|
job_semaphore.acquire()
|
||
|
|
||
|
def run(self):
|
||
|
print('Processing "%s" ..' % self.filename, file=sys.stderr)
|
||
|
try:
|
||
|
index = cindex.Index(
|
||
|
cindex.conf.lib.clang_createIndex(False, True))
|
||
|
tu = index.parse(self.filename, self.parameters)
|
||
|
extract(self.filename, tu.cursor, '', self.output)
|
||
|
finally:
|
||
|
job_semaphore.release()
|
||
|
|
||
|
|
||
|
def read_args(args):
|
||
|
parameters = []
|
||
|
filenames = []
|
||
|
if "-x" not in args:
|
||
|
parameters.extend(['-x', 'c++'])
|
||
|
if not any(it.startswith("-std=") for it in args):
|
||
|
parameters.append('-std=c++11')
|
||
|
|
||
|
if platform.system() == 'Darwin':
|
||
|
dev_path = '/Applications/Xcode.app/Contents/Developer/'
|
||
|
lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
|
||
|
sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
|
||
|
libclang = lib_dir + 'libclang.dylib'
|
||
|
|
||
|
if os.path.exists(libclang):
|
||
|
cindex.Config.set_library_path(os.path.dirname(libclang))
|
||
|
|
||
|
if os.path.exists(sdk_dir):
|
||
|
sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
|
||
|
parameters.append('-isysroot')
|
||
|
parameters.append(sysroot_dir)
|
||
|
elif platform.system() == 'Linux':
|
||
|
# clang doesn't find its own base includes by default on Linux,
|
||
|
# but different distros install them in different paths.
|
||
|
# Try to autodetect, preferring the highest numbered version.
|
||
|
def clang_folder_version(d):
|
||
|
return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)]
|
||
|
clang_include_dir = max((
|
||
|
path
|
||
|
for libdir in ['lib64', 'lib', 'lib32']
|
||
|
for path in glob('/usr/%s/clang/*/include' % libdir)
|
||
|
if os.path.isdir(path)
|
||
|
), default=None, key=clang_folder_version)
|
||
|
if clang_include_dir:
|
||
|
parameters.extend(['-isystem', clang_include_dir])
|
||
|
|
||
|
for item in args:
|
||
|
if item.startswith('-'):
|
||
|
parameters.append(item)
|
||
|
else:
|
||
|
filenames.append(item)
|
||
|
|
||
|
if len(filenames) == 0:
|
||
|
raise NoFilenamesError("args parameter did not contain any filenames")
|
||
|
|
||
|
return parameters, filenames
|
||
|
|
||
|
|
||
|
def extract_all(args):
|
||
|
parameters, filenames = read_args(args)
|
||
|
output = []
|
||
|
for filename in filenames:
|
||
|
thr = ExtractionThread(filename, parameters, output)
|
||
|
thr.start()
|
||
|
|
||
|
print('Waiting for jobs to finish ..', file=sys.stderr)
|
||
|
for i in range(job_count):
|
||
|
job_semaphore.acquire()
|
||
|
|
||
|
return output
|
||
|
|
||
|
|
||
|
def write_header(comments, out_file=sys.stdout):
|
||
|
print('''/*
|
||
|
This file contains docstrings for the Python bindings.
|
||
|
Do not edit! These were automatically extracted by mkdoc.py
|
||
|
*/
|
||
|
|
||
|
#define __EXPAND(x) x
|
||
|
#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT
|
||
|
#define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
|
||
|
#define __CAT1(a, b) a ## b
|
||
|
#define __CAT2(a, b) __CAT1(a, b)
|
||
|
#define __DOC1(n1) __doc_##n1
|
||
|
#define __DOC2(n1, n2) __doc_##n1##_##n2
|
||
|
#define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3
|
||
|
#define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4
|
||
|
#define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5
|
||
|
#define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
|
||
|
#define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
|
||
|
#define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
|
||
|
|
||
|
#if defined(__GNUG__)
|
||
|
#pragma GCC diagnostic push
|
||
|
#pragma GCC diagnostic ignored "-Wunused-variable"
|
||
|
#endif
|
||
|
''', file=out_file)
|
||
|
|
||
|
|
||
|
name_ctr = 1
|
||
|
name_prev = None
|
||
|
for name, _, comment in list(sorted(comments, key=lambda x: (x[0], x[1]))):
|
||
|
if name == name_prev:
|
||
|
name_ctr += 1
|
||
|
name = name + "_%i" % name_ctr
|
||
|
else:
|
||
|
name_prev = name
|
||
|
name_ctr = 1
|
||
|
print('\nstatic const char *%s =%sR"doc(%s)doc";' %
|
||
|
(name, '\n' if '\n' in comment else ' ', comment), file=out_file)
|
||
|
|
||
|
print('''
|
||
|
#if defined(__GNUG__)
|
||
|
#pragma GCC diagnostic pop
|
||
|
#endif
|
||
|
''', file=out_file)
|
||
|
|
||
|
|
||
|
def mkdoc(args):
|
||
|
args = list(args)
|
||
|
out_path = None
|
||
|
for idx, arg in enumerate(args):
|
||
|
if arg.startswith("-o"):
|
||
|
args.remove(arg)
|
||
|
try:
|
||
|
out_path = arg[2:] or args.pop(idx)
|
||
|
except IndexError:
|
||
|
print("-o flag requires an argument")
|
||
|
exit(-1)
|
||
|
break
|
||
|
|
||
|
comments = extract_all(args)
|
||
|
|
||
|
if out_path:
|
||
|
try:
|
||
|
with open(out_path, 'w') as out_file:
|
||
|
write_header(comments, out_file)
|
||
|
except:
|
||
|
# In the event of an error, don't leave a partially-written
|
||
|
# output file.
|
||
|
try:
|
||
|
os.unlink(out_path)
|
||
|
except:
|
||
|
pass
|
||
|
raise
|
||
|
else:
|
||
|
write_header(comments)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
try:
|
||
|
mkdoc(sys.argv[1:])
|
||
|
except NoFilenamesError:
|
||
|
print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
|
||
|
exit(-1)
|