You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

241 lines
7.8 KiB

  1. #!/usr/bin/env python3
  2. #
  3. # Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
  4. #
  5. # Extract documentation from C++ header files to use it in Python bindings
  6. #
  7. import os, sys, platform, re, textwrap
  8. from clang import cindex
  9. from clang.cindex import CursorKind
  10. from collections import OrderedDict
  11. from threading import Thread, Semaphore
  12. from multiprocessing import cpu_count
  13. if platform.system() == 'Darwin':
  14. libclang = '/opt/llvm/lib/libclang.dylib'
  15. if os.path.exists(libclang):
  16. cindex.Config.set_library_path(os.path.dirname(libclang))
  17. RECURSE_LIST = [
  18. CursorKind.TRANSLATION_UNIT,
  19. CursorKind.NAMESPACE,
  20. CursorKind.CLASS_DECL,
  21. CursorKind.STRUCT_DECL,
  22. CursorKind.CLASS_TEMPLATE
  23. ]
  24. PRINT_LIST = [
  25. CursorKind.CLASS_DECL,
  26. CursorKind.STRUCT_DECL,
  27. CursorKind.CLASS_TEMPLATE,
  28. CursorKind.FUNCTION_DECL,
  29. CursorKind.FUNCTION_TEMPLATE,
  30. CursorKind.CXX_METHOD,
  31. CursorKind.CONSTRUCTOR,
  32. CursorKind.FIELD_DECL
  33. ]
  34. CPP_OPERATORS = {
  35. '<=' : 'le', '>=' : 'ge', '==' : 'eq', '!=' : 'ne', '[]' : 'array',
  36. '+=' : 'iadd', '-=' : 'isub', '*=' : 'imul', '/=' : 'idiv', '%=' :
  37. 'imod', '&=' : 'iand', '|=' : 'ior', '^=' : 'ixor', '<<=' : 'ilshift',
  38. '>>=' : 'irshift', '++' : 'inc', '--' : 'dec', '<<' : 'lshift', '>>' :
  39. 'rshift', '&&' : 'land', '||' : 'lor', '!' : 'lnot', '~' : 'bnot', '&'
  40. : 'band', '|' : 'bor', '+' : 'add', '-' : 'sub', '*' : 'mul', '/' :
  41. 'div', '%' : 'mod', '<' : 'lt', '>' : 'gt', '=' : 'assign'
  42. }
  43. CPP_OPERATORS = OrderedDict(sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
  44. job_count = cpu_count()
  45. job_semaphore = Semaphore(job_count)
  46. registered_names = dict()
  47. def d(s):
  48. return s.decode('utf8')
  49. def sanitize_name(name):
  50. global registered_names
  51. for k, v in CPP_OPERATORS.items():
  52. name = name.replace('operator%s' % k, 'operator_%s' % v)
  53. name = name.replace('<', '_')
  54. name = name.replace('>', '_')
  55. name = name.replace(' ', '_')
  56. name = name.replace(',', '_')
  57. if name in registered_names:
  58. registered_names[name] += 1
  59. name += '_' + str(registered_names[name])
  60. else:
  61. registered_names[name] = 1
  62. return '__doc_' + name
  63. def process_comment(comment):
  64. result = ''
  65. # Remove C++ comment syntax
  66. for s in comment.splitlines():
  67. s = s.strip()
  68. if s.startswith('/*'):
  69. s = s[2:].lstrip('* \t')
  70. elif s.endswith('*/'):
  71. s = s[:-2].rstrip('* \t')
  72. elif s.startswith('///'):
  73. s = s[3:]
  74. if s.startswith('*'):
  75. s = s[1:]
  76. result += s.strip() + '\n'
  77. # Doxygen tags
  78. cpp_group = '([\w:]+)'
  79. param_group = '([\[\w:\]]+)'
  80. s = result
  81. s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
  82. s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
  83. s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
  84. s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
  85. s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
  86. s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), r'\n\n$Parameter ``\2``:\n\n', s)
  87. for in_, out_ in {
  88. 'return' : 'Returns',
  89. 'author' : 'Author',
  90. 'authors' : 'Authors',
  91. 'copyright' : 'Copyright',
  92. 'date' : 'Date',
  93. 'remark' : 'Remark',
  94. 'sa' : 'See also',
  95. 'see' : 'See also',
  96. 'extends' : 'Extends',
  97. 'throw' : 'Throws',
  98. 'throws' : 'Throws' }.items():
  99. s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
  100. s = re.sub(r'\\details\s*', r'\n\n', s)
  101. s = re.sub(r'\\brief\s*', r'', s)
  102. s = re.sub(r'\\short\s*', r'', s)
  103. s = re.sub(r'\\ref\s*', r'', s)
  104. # HTML/TeX tags
  105. s = re.sub(r'<tt>([^<]*)</tt>', r'``\1``', s)
  106. s = re.sub(r'<em>([^<]*)</em>', r'*\1*', s)
  107. s = re.sub(r'<b>([^<]*)</b>', r'**\1**', s)
  108. s = re.sub(r'\\f\$([^\$]*)\\f\$', r'$\1$', s)
  109. s = s.replace('``true``', '``True``')
  110. s = s.replace('``false``', '``False``')
  111. # Re-flow text
  112. wrapper = textwrap.TextWrapper()
  113. wrapper.expand_tabs = True
  114. wrapper.replace_whitespace = True
  115. wrapper.width = 75
  116. wrapper.initial_indent = wrapper.subsequent_indent = ''
  117. result = ''
  118. for x in re.split(r'\n{2,}', s):
  119. wrapped = wrapper.fill(x.strip())
  120. if len(wrapped) > 0 and wrapped[0] == '$':
  121. result += wrapped[1:] + '\n'
  122. wrapper.initial_indent = wrapper.subsequent_indent = ' '*4
  123. else:
  124. result += wrapped + '\n\n'
  125. wrapper.initial_indent = wrapper.subsequent_indent = ''
  126. return result.rstrip()
  127. def extract(filename, node, prefix, output):
  128. num_extracted = 0
  129. if not (node.location.file is None or os.path.samefile(d(node.location.file.name), filename)):
  130. return 0
  131. if node.kind in RECURSE_LIST:
  132. sub_prefix = prefix
  133. if node.kind != CursorKind.TRANSLATION_UNIT:
  134. if len(sub_prefix) > 0:
  135. sub_prefix += '_'
  136. sub_prefix += d(node.spelling)
  137. for i in node.get_children():
  138. num_extracted += extract(filename, i, sub_prefix, output)
  139. if num_extracted == 0:
  140. return 0
  141. if node.kind in PRINT_LIST:
  142. comment = d(node.raw_comment) if node.raw_comment is not None else ''
  143. comment = process_comment(comment)
  144. name = sanitize_name(prefix + '_' + d(node.spelling))
  145. output.append('\nstatic const char *%s = %sR"doc(%s)doc";' % (name, '\n' if '\n' in comment else '', comment))
  146. num_extracted += 1
  147. return num_extracted
  148. class ExtractionThread(Thread):
  149. def __init__ (self, filename, parameters, output):
  150. Thread.__init__(self)
  151. self.filename = filename
  152. self.parameters = parameters
  153. self.output = output
  154. job_semaphore.acquire()
  155. def run(self):
  156. print('Processing "%s" ..' % self.filename, file = sys.stderr)
  157. try:
  158. index = cindex.Index(cindex.conf.lib.clang_createIndex(False, True))
  159. tu = index.parse(self.filename, self.parameters)
  160. extract(self.filename, tu.cursor, '', self.output)
  161. finally:
  162. job_semaphore.release()
  163. if __name__ == '__main__':
  164. parameters = ['-x', 'c++', '-std=c++11']
  165. filenames = []
  166. for item in sys.argv[1:]:
  167. if item.startswith('-'):
  168. parameters.append(item)
  169. else:
  170. filenames.append(item)
  171. if len(filenames) == 0:
  172. print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
  173. exit(-1)
  174. print('''/*
  175. This file contains docstrings for the Python bindings.
  176. Do not edit! These were automatically extracted by mkdoc.py
  177. */
  178. #define __EXPAND(x) x
  179. #define __COUNT(_1, _2, _3, _4, _5, COUNT, ...) COUNT
  180. #define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 5, 4, 3, 2, 1))
  181. #define __CAT1(a, b) a ## b
  182. #define __CAT2(a, b) __CAT1(a, b)
  183. #define __DOC1(n1) __doc_##n1
  184. #define __DOC2(n1, n2) __doc_##n1##_##n2
  185. #define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3
  186. #define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4
  187. #define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4_##n5
  188. #define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
  189. #if defined(__GNUG__)
  190. #pragma GCC diagnostic push
  191. #pragma GCC diagnostic ignored "-Wunused-variable"
  192. #endif
  193. ''')
  194. output = []
  195. for filename in filenames:
  196. thr = ExtractionThread(filename, parameters, output)
  197. thr.start()
  198. print('Waiting for jobs to finish ..', file = sys.stderr)
  199. for i in range(job_count):
  200. job_semaphore.acquire()
  201. output.sort()
  202. for l in output:
  203. print(l)
  204. print('''
  205. #if defined(__GNUG__)
  206. #pragma GCC diagnostic pop
  207. #endif
  208. ''')