The source code and dockerfile for the GSW2024 AI Lab.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

304 lines
10 KiB

4 months ago
  1. #!/usr/bin/env python3
  2. #
  3. # Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
  4. #
  5. # Extract documentation from C++ header files to use it in Python bindings
  6. #
  7. import os
  8. import sys
  9. import platform
  10. import re
  11. import textwrap
  12. from clang import cindex
  13. from clang.cindex import CursorKind
  14. from collections import OrderedDict
  15. from threading import Thread, Semaphore
  16. from multiprocessing import cpu_count
  17. RECURSE_LIST = [
  18. CursorKind.TRANSLATION_UNIT,
  19. CursorKind.NAMESPACE,
  20. CursorKind.CLASS_DECL,
  21. CursorKind.STRUCT_DECL,
  22. CursorKind.ENUM_DECL,
  23. CursorKind.CLASS_TEMPLATE
  24. ]
  25. PRINT_LIST = [
  26. CursorKind.CLASS_DECL,
  27. CursorKind.STRUCT_DECL,
  28. CursorKind.ENUM_DECL,
  29. CursorKind.ENUM_CONSTANT_DECL,
  30. CursorKind.CLASS_TEMPLATE,
  31. CursorKind.FUNCTION_DECL,
  32. CursorKind.FUNCTION_TEMPLATE,
  33. CursorKind.CONVERSION_FUNCTION,
  34. CursorKind.CXX_METHOD,
  35. CursorKind.CONSTRUCTOR,
  36. CursorKind.FIELD_DECL
  37. ]
  38. CPP_OPERATORS = {
  39. '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
  40. '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
  41. 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
  42. '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
  43. 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
  44. '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
  45. 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
  46. }
  47. CPP_OPERATORS = OrderedDict(
  48. sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
  49. job_count = cpu_count()
  50. job_semaphore = Semaphore(job_count)
  51. output = []
  52. def d(s):
  53. return s.decode('utf8')
  54. def sanitize_name(name):
  55. name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
  56. for k, v in CPP_OPERATORS.items():
  57. name = name.replace('operator%s' % k, 'operator_%s' % v)
  58. name = re.sub('<.*>', '', name)
  59. name = ''.join([ch if ch.isalnum() else '_' for ch in name])
  60. name = re.sub('_$', '', re.sub('_+', '_', name))
  61. return '__doc_' + name
  62. def process_comment(comment):
  63. result = ''
  64. # Remove C++ comment syntax
  65. leading_spaces = float('inf')
  66. for s in comment.expandtabs(tabsize=4).splitlines():
  67. s = s.strip()
  68. if s.startswith('/*'):
  69. s = s[2:].lstrip('*')
  70. elif s.endswith('*/'):
  71. s = s[:-2].rstrip('*')
  72. elif s.startswith('///'):
  73. s = s[3:]
  74. if s.startswith('*'):
  75. s = s[1:]
  76. if len(s) > 0:
  77. leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
  78. result += s + '\n'
  79. if leading_spaces != float('inf'):
  80. result2 = ""
  81. for s in result.splitlines():
  82. result2 += s[leading_spaces:] + '\n'
  83. result = result2
  84. # Doxygen tags
  85. cpp_group = '([\w:]+)'
  86. param_group = '([\[\w:\]]+)'
  87. s = result
  88. s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
  89. s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
  90. s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
  91. s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
  92. s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
  93. s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
  94. s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
  95. r'\n\n$Parameter ``\2``:\n\n', s)
  96. s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
  97. r'\n\n$Template parameter ``\2``:\n\n', s)
  98. for in_, out_ in {
  99. 'return': 'Returns',
  100. 'author': 'Author',
  101. 'authors': 'Authors',
  102. 'copyright': 'Copyright',
  103. 'date': 'Date',
  104. 'remark': 'Remark',
  105. 'sa': 'See also',
  106. 'see': 'See also',
  107. 'extends': 'Extends',
  108. 'throw': 'Throws',
  109. 'throws': 'Throws'
  110. }.items():
  111. s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
  112. s = re.sub(r'\\details\s*', r'\n\n', s)
  113. s = re.sub(r'\\brief\s*', r'', s)
  114. s = re.sub(r'\\short\s*', r'', s)
  115. s = re.sub(r'\\ref\s*', r'', s)
  116. s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
  117. r"```\n\1\n```\n", s, flags=re.DOTALL)
  118. # HTML/TeX tags
  119. s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
  120. s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
  121. s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
  122. s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
  123. s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
  124. s = re.sub(r'<li>', r'\n\n* ', s)
  125. s = re.sub(r'</?ul>', r'', s)
  126. s = re.sub(r'</li>', r'\n\n', s)
  127. s = s.replace('``true``', '``True``')
  128. s = s.replace('``false``', '``False``')
  129. # Re-flow text
  130. wrapper = textwrap.TextWrapper()
  131. wrapper.expand_tabs = True
  132. wrapper.replace_whitespace = True
  133. wrapper.drop_whitespace = True
  134. wrapper.width = 70
  135. wrapper.initial_indent = wrapper.subsequent_indent = ''
  136. result = ''
  137. in_code_segment = False
  138. for x in re.split(r'(```)', s):
  139. if x == '```':
  140. if not in_code_segment:
  141. result += '```\n'
  142. else:
  143. result += '\n```\n\n'
  144. in_code_segment = not in_code_segment
  145. elif in_code_segment:
  146. result += x.strip()
  147. else:
  148. for y in re.split(r'(?: *\n *){2,}', x):
  149. wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
  150. if len(wrapped) > 0 and wrapped[0] == '$':
  151. result += wrapped[1:] + '\n'
  152. wrapper.initial_indent = \
  153. wrapper.subsequent_indent = ' ' * 4
  154. else:
  155. if len(wrapped) > 0:
  156. result += wrapped + '\n\n'
  157. wrapper.initial_indent = wrapper.subsequent_indent = ''
  158. return result.rstrip().lstrip('\n')
  159. def extract(filename, node, prefix):
  160. if not (node.location.file is None or
  161. os.path.samefile(d(node.location.file.name), filename)):
  162. return 0
  163. if node.kind in RECURSE_LIST:
  164. sub_prefix = prefix
  165. if node.kind != CursorKind.TRANSLATION_UNIT:
  166. if len(sub_prefix) > 0:
  167. sub_prefix += '_'
  168. sub_prefix += d(node.spelling)
  169. for i in node.get_children():
  170. extract(filename, i, sub_prefix)
  171. if node.kind in PRINT_LIST:
  172. comment = d(node.raw_comment) if node.raw_comment is not None else ''
  173. comment = process_comment(comment)
  174. sub_prefix = prefix
  175. if len(sub_prefix) > 0:
  176. sub_prefix += '_'
  177. if len(node.spelling) > 0:
  178. name = sanitize_name(sub_prefix + d(node.spelling))
  179. global output
  180. output.append((name, filename, comment))
  181. class ExtractionThread(Thread):
  182. def __init__(self, filename, parameters):
  183. Thread.__init__(self)
  184. self.filename = filename
  185. self.parameters = parameters
  186. job_semaphore.acquire()
  187. def run(self):
  188. print('Processing "%s" ..' % self.filename, file=sys.stderr)
  189. try:
  190. index = cindex.Index(
  191. cindex.conf.lib.clang_createIndex(False, True))
  192. tu = index.parse(self.filename, self.parameters)
  193. extract(self.filename, tu.cursor, '')
  194. finally:
  195. job_semaphore.release()
  196. if __name__ == '__main__':
  197. parameters = ['-x', 'c++', '-std=c++11']
  198. filenames = []
  199. if platform.system() == 'Darwin':
  200. dev_path = '/Applications/Xcode.app/Contents/Developer/'
  201. lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
  202. sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
  203. libclang = lib_dir + 'libclang.dylib'
  204. if os.path.exists(libclang):
  205. cindex.Config.set_library_path(os.path.dirname(libclang))
  206. if os.path.exists(sdk_dir):
  207. sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
  208. parameters.append('-isysroot')
  209. parameters.append(sysroot_dir)
  210. for item in sys.argv[1:]:
  211. if item.startswith('-'):
  212. parameters.append(item)
  213. else:
  214. filenames.append(item)
  215. if len(filenames) == 0:
  216. print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
  217. exit(-1)
  218. print('''/*
  219. This file contains docstrings for the Python bindings.
  220. Do not edit! These were automatically extracted by mkdoc.py
  221. */
  222. #define __EXPAND(x) x
  223. #define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT
  224. #define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
  225. #define __CAT1(a, b) a ## b
  226. #define __CAT2(a, b) __CAT1(a, b)
  227. #define __DOC1(n1) __doc_##n1
  228. #define __DOC2(n1, n2) __doc_##n1##_##n2
  229. #define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3
  230. #define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4
  231. #define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5
  232. #define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
  233. #define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
  234. #define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
  235. #if defined(__GNUG__)
  236. #pragma GCC diagnostic push
  237. #pragma GCC diagnostic ignored "-Wunused-variable"
  238. #endif
  239. ''')
  240. output.clear()
  241. for filename in filenames:
  242. thr = ExtractionThread(filename, parameters)
  243. thr.start()
  244. print('Waiting for jobs to finish ..', file=sys.stderr)
  245. for i in range(job_count):
  246. job_semaphore.acquire()
  247. name_ctr = 1
  248. name_prev = None
  249. for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))):
  250. if name == name_prev:
  251. name_ctr += 1
  252. name = name + "_%i" % name_ctr
  253. else:
  254. name_prev = name
  255. name_ctr = 1
  256. print('\nstatic const char *%s =%sR"doc(%s)doc";' %
  257. (name, '\n' if '\n' in comment else ' ', comment))
  258. print('''
  259. #if defined(__GNUG__)
  260. #pragma GCC diagnostic pop
  261. #endif
  262. ''')