309 lines
10 KiB

  1. #!/usr/bin/env python3
  2. #
  3. # Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
  4. #
  5. # Extract documentation from C++ header files to use it in Python bindings
  6. #
  7. import os
  8. import sys
  9. import platform
  10. import re
  11. import textwrap
  12. from clang import cindex
  13. from clang.cindex import CursorKind
  14. from collections import OrderedDict
  15. from threading import Thread, Semaphore
  16. from multiprocessing import cpu_count
  17. RECURSE_LIST = [
  18. CursorKind.TRANSLATION_UNIT,
  19. CursorKind.NAMESPACE,
  20. CursorKind.CLASS_DECL,
  21. CursorKind.STRUCT_DECL,
  22. CursorKind.ENUM_DECL,
  23. CursorKind.CLASS_TEMPLATE
  24. ]
  25. PRINT_LIST = [
  26. CursorKind.CLASS_DECL,
  27. CursorKind.STRUCT_DECL,
  28. CursorKind.ENUM_DECL,
  29. CursorKind.ENUM_CONSTANT_DECL,
  30. CursorKind.CLASS_TEMPLATE,
  31. CursorKind.FUNCTION_DECL,
  32. CursorKind.FUNCTION_TEMPLATE,
  33. CursorKind.CONVERSION_FUNCTION,
  34. CursorKind.CXX_METHOD,
  35. CursorKind.CONSTRUCTOR,
  36. CursorKind.FIELD_DECL
  37. ]
  38. CPP_OPERATORS = {
  39. '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
  40. '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
  41. 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
  42. '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
  43. 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
  44. '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
  45. 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
  46. }
  47. CPP_OPERATORS = OrderedDict(
  48. sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
  49. job_count = cpu_count()
  50. job_semaphore = Semaphore(job_count)
  51. registered_names = dict()
  52. def d(s):
  53. return s.decode('utf8')
  54. def sanitize_name(name):
  55. global registered_names
  56. name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
  57. for k, v in CPP_OPERATORS.items():
  58. name = name.replace('operator%s' % k, 'operator_%s' % v)
  59. name = re.sub('<.*>', '', name)
  60. name = ''.join([ch if ch.isalnum() else '_' for ch in name])
  61. name = re.sub('_$', '', re.sub('_+', '_', name))
  62. if name in registered_names:
  63. registered_names[name] += 1
  64. name += '_' + str(registered_names[name])
  65. else:
  66. registered_names[name] = 1
  67. return '__doc_' + name
  68. def process_comment(comment):
  69. result = ''
  70. # Remove C++ comment syntax
  71. leading_spaces = float('inf')
  72. for s in comment.expandtabs(tabsize=4).splitlines():
  73. s = s.strip()
  74. if s.startswith('/*'):
  75. s = s[2:].lstrip('*')
  76. elif s.endswith('*/'):
  77. s = s[:-2].rstrip('*')
  78. elif s.startswith('///'):
  79. s = s[3:]
  80. if s.startswith('*'):
  81. s = s[1:]
  82. if len(s) > 0:
  83. leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
  84. result += s + '\n'
  85. if leading_spaces != float('inf'):
  86. result2 = ""
  87. for s in result.splitlines():
  88. result2 += s[leading_spaces:] + '\n'
  89. result = result2
  90. # Doxygen tags
  91. cpp_group = '([\w:]+)'
  92. param_group = '([\[\w:\]]+)'
  93. s = result
  94. s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
  95. s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
  96. s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
  97. s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
  98. s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
  99. s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
  100. s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
  101. r'\n\n$Parameter ``\2``:\n\n', s)
  102. s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
  103. r'\n\n$Template parameter ``\2``:\n\n', s)
  104. for in_, out_ in {
  105. 'return': 'Returns',
  106. 'author': 'Author',
  107. 'authors': 'Authors',
  108. 'copyright': 'Copyright',
  109. 'date': 'Date',
  110. 'remark': 'Remark',
  111. 'sa': 'See also',
  112. 'see': 'See also',
  113. 'extends': 'Extends',
  114. 'throw': 'Throws',
  115. 'throws': 'Throws'
  116. }.items():
  117. s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
  118. s = re.sub(r'\\details\s*', r'\n\n', s)
  119. s = re.sub(r'\\brief\s*', r'', s)
  120. s = re.sub(r'\\short\s*', r'', s)
  121. s = re.sub(r'\\ref\s*', r'', s)
  122. s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
  123. r"```\n\1\n```\n", s, flags=re.DOTALL)
  124. # HTML/TeX tags
  125. s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
  126. s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
  127. s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
  128. s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
  129. s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
  130. s = re.sub(r'<li>', r'\n\n* ', s)
  131. s = re.sub(r'</?ul>', r'', s)
  132. s = re.sub(r'</li>', r'\n\n', s)
  133. s = s.replace('``true``', '``True``')
  134. s = s.replace('``false``', '``False``')
  135. # Re-flow text
  136. wrapper = textwrap.TextWrapper()
  137. wrapper.expand_tabs = True
  138. wrapper.replace_whitespace = True
  139. wrapper.drop_whitespace = True
  140. wrapper.width = 70
  141. wrapper.initial_indent = wrapper.subsequent_indent = ''
  142. result = ''
  143. in_code_segment = False
  144. for x in re.split(r'(```)', s):
  145. if x == '```':
  146. if not in_code_segment:
  147. result += '```\n'
  148. else:
  149. result += '\n```\n\n'
  150. in_code_segment = not in_code_segment
  151. elif in_code_segment:
  152. result += x.strip()
  153. else:
  154. for y in re.split(r'(?: *\n *){2,}', x):
  155. wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
  156. if len(wrapped) > 0 and wrapped[0] == '$':
  157. result += wrapped[1:] + '\n'
  158. wrapper.initial_indent = \
  159. wrapper.subsequent_indent = ' ' * 4
  160. else:
  161. if len(wrapped) > 0:
  162. result += wrapped + '\n\n'
  163. wrapper.initial_indent = wrapper.subsequent_indent = ''
  164. return result.rstrip().lstrip('\n')
  165. def extract(filename, node, prefix, output):
  166. num_extracted = 0
  167. if not (node.location.file is None or
  168. os.path.samefile(d(node.location.file.name), filename)):
  169. return 0
  170. if node.kind in RECURSE_LIST:
  171. sub_prefix = prefix
  172. if node.kind != CursorKind.TRANSLATION_UNIT:
  173. if len(sub_prefix) > 0:
  174. sub_prefix += '_'
  175. sub_prefix += d(node.spelling)
  176. for i in node.get_children():
  177. num_extracted += extract(filename, i, sub_prefix, output)
  178. if num_extracted == 0:
  179. return 0
  180. if node.kind in PRINT_LIST:
  181. comment = d(node.raw_comment) if node.raw_comment is not None else ''
  182. comment = process_comment(comment)
  183. sub_prefix = prefix
  184. if len(sub_prefix) > 0:
  185. sub_prefix += '_'
  186. if len(node.spelling) > 0:
  187. name = sanitize_name(sub_prefix + d(node.spelling))
  188. output.append('\nstatic const char *%s =%sR"doc(%s)doc";' %
  189. (name, '\n' if '\n' in comment else ' ', comment))
  190. num_extracted += 1
  191. return num_extracted
  192. class ExtractionThread(Thread):
  193. def __init__(self, filename, parameters, output):
  194. Thread.__init__(self)
  195. self.filename = filename
  196. self.parameters = parameters
  197. self.output = output
  198. job_semaphore.acquire()
  199. def run(self):
  200. print('Processing "%s" ..' % self.filename, file=sys.stderr)
  201. try:
  202. index = cindex.Index(
  203. cindex.conf.lib.clang_createIndex(False, True))
  204. tu = index.parse(self.filename, self.parameters)
  205. extract(self.filename, tu.cursor, '', self.output)
  206. finally:
  207. job_semaphore.release()
  208. if __name__ == '__main__':
  209. parameters = ['-x', 'c++', '-std=c++11']
  210. filenames = []
  211. if platform.system() == 'Darwin':
  212. dev_path = '/Applications/Xcode.app/Contents/Developer/'
  213. lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
  214. sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
  215. libclang = lib_dir + 'libclang.dylib'
  216. if os.path.exists(libclang):
  217. cindex.Config.set_library_path(os.path.dirname(libclang))
  218. if os.path.exists(sdk_dir):
  219. sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
  220. parameters.append('-isysroot')
  221. parameters.append(sysroot_dir)
  222. for item in sys.argv[1:]:
  223. if item.startswith('-'):
  224. parameters.append(item)
  225. else:
  226. filenames.append(item)
  227. if len(filenames) == 0:
  228. print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
  229. exit(-1)
  230. print('''/*
  231. This file contains docstrings for the Python bindings.
  232. Do not edit! These were automatically extracted by mkdoc.py
  233. */
  234. #define __EXPAND(x) x
  235. #define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT
  236. #define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
  237. #define __CAT1(a, b) a ## b
  238. #define __CAT2(a, b) __CAT1(a, b)
  239. #define __DOC1(n1) __doc_##n1
  240. #define __DOC2(n1, n2) __doc_##n1##_##n2
  241. #define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3
  242. #define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4
  243. #define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5
  244. #define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
  245. #define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
  246. #define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
  247. #if defined(__GNUG__)
  248. #pragma GCC diagnostic push
  249. #pragma GCC diagnostic ignored "-Wunused-variable"
  250. #endif
  251. ''')
  252. output = []
  253. for filename in filenames:
  254. thr = ExtractionThread(filename, parameters, output)
  255. thr.start()
  256. print('Waiting for jobs to finish ..', file=sys.stderr)
  257. for i in range(job_count):
  258. job_semaphore.acquire()
  259. output.sort()
  260. for l in output:
  261. print(l)
  262. print('''
  263. #if defined(__GNUG__)
  264. #pragma GCC diagnostic pop
  265. #endif
  266. ''')