You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

855 lines
23 KiB

  1. #!/usr/bin/env python
  2. #
  3. # Copyright 2008, Google Inc.
  4. # All rights reserved.
  5. #
  6. # Redistribution and use in source and binary forms, with or without
  7. # modification, are permitted provided that the following conditions are
  8. # met:
  9. #
  10. # * Redistributions of source code must retain the above copyright
  11. # notice, this list of conditions and the following disclaimer.
  12. # * Redistributions in binary form must reproduce the above
  13. # copyright notice, this list of conditions and the following disclaimer
  14. # in the documentation and/or other materials provided with the
  15. # distribution.
  16. # * Neither the name of Google Inc. nor the names of its
  17. # contributors may be used to endorse or promote products derived from
  18. # this software without specific prior written permission.
  19. #
  20. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. """pump v0.2.0 - Pretty Useful for Meta Programming.
  32. A tool for preprocessor meta programming. Useful for generating
  33. repetitive boilerplate code. Especially useful for writing C++
  34. classes, functions, macros, and templates that need to work with
  35. various number of arguments.
  36. USAGE:
  37. pump.py SOURCE_FILE
  38. EXAMPLES:
  39. pump.py foo.cc.pump
  40. Converts foo.cc.pump to foo.cc.
  41. GRAMMAR:
  42. CODE ::= ATOMIC_CODE*
  43. ATOMIC_CODE ::= $var ID = EXPRESSION
  44. | $var ID = [[ CODE ]]
  45. | $range ID EXPRESSION..EXPRESSION
  46. | $for ID SEPARATOR [[ CODE ]]
  47. | $($)
  48. | $ID
  49. | $(EXPRESSION)
  50. | $if EXPRESSION [[ CODE ]] ELSE_BRANCH
  51. | [[ CODE ]]
  52. | RAW_CODE
  53. SEPARATOR ::= RAW_CODE | EMPTY
  54. ELSE_BRANCH ::= $else [[ CODE ]]
  55. | $elif EXPRESSION [[ CODE ]] ELSE_BRANCH
  56. | EMPTY
  57. EXPRESSION has Python syntax.
  58. """
  59. __author__ = 'wan@google.com (Zhanyong Wan)'
  60. import os
  61. import re
  62. import sys
  63. TOKEN_TABLE = [
  64. (re.compile(r'\$var\s+'), '$var'),
  65. (re.compile(r'\$elif\s+'), '$elif'),
  66. (re.compile(r'\$else\s+'), '$else'),
  67. (re.compile(r'\$for\s+'), '$for'),
  68. (re.compile(r'\$if\s+'), '$if'),
  69. (re.compile(r'\$range\s+'), '$range'),
  70. (re.compile(r'\$[_A-Za-z]\w*'), '$id'),
  71. (re.compile(r'\$\(\$\)'), '$($)'),
  72. (re.compile(r'\$'), '$'),
  73. (re.compile(r'\[\[\n?'), '[['),
  74. (re.compile(r'\]\]\n?'), ']]'),
  75. ]
  76. class Cursor:
  77. """Represents a position (line and column) in a text file."""
  78. def __init__(self, line=-1, column=-1):
  79. self.line = line
  80. self.column = column
  81. def __eq__(self, rhs):
  82. return self.line == rhs.line and self.column == rhs.column
  83. def __ne__(self, rhs):
  84. return not self == rhs
  85. def __lt__(self, rhs):
  86. return self.line < rhs.line or (
  87. self.line == rhs.line and self.column < rhs.column)
  88. def __le__(self, rhs):
  89. return self < rhs or self == rhs
  90. def __gt__(self, rhs):
  91. return rhs < self
  92. def __ge__(self, rhs):
  93. return rhs <= self
  94. def __str__(self):
  95. if self == Eof():
  96. return 'EOF'
  97. else:
  98. return '%s(%s)' % (self.line + 1, self.column)
  99. def __add__(self, offset):
  100. return Cursor(self.line, self.column + offset)
  101. def __sub__(self, offset):
  102. return Cursor(self.line, self.column - offset)
  103. def Clone(self):
  104. """Returns a copy of self."""
  105. return Cursor(self.line, self.column)
  106. # Special cursor to indicate the end-of-file.
  107. def Eof():
  108. """Returns the special cursor to denote the end-of-file."""
  109. return Cursor(-1, -1)
  110. class Token:
  111. """Represents a token in a Pump source file."""
  112. def __init__(self, start=None, end=None, value=None, token_type=None):
  113. if start is None:
  114. self.start = Eof()
  115. else:
  116. self.start = start
  117. if end is None:
  118. self.end = Eof()
  119. else:
  120. self.end = end
  121. self.value = value
  122. self.token_type = token_type
  123. def __str__(self):
  124. return 'Token @%s: \'%s\' type=%s' % (
  125. self.start, self.value, self.token_type)
  126. def Clone(self):
  127. """Returns a copy of self."""
  128. return Token(self.start.Clone(), self.end.Clone(), self.value,
  129. self.token_type)
  130. def StartsWith(lines, pos, string):
  131. """Returns True iff the given position in lines starts with 'string'."""
  132. return lines[pos.line][pos.column:].startswith(string)
  133. def FindFirstInLine(line, token_table):
  134. best_match_start = -1
  135. for (regex, token_type) in token_table:
  136. m = regex.search(line)
  137. if m:
  138. # We found regex in lines
  139. if best_match_start < 0 or m.start() < best_match_start:
  140. best_match_start = m.start()
  141. best_match_length = m.end() - m.start()
  142. best_match_token_type = token_type
  143. if best_match_start < 0:
  144. return None
  145. return (best_match_start, best_match_length, best_match_token_type)
  146. def FindFirst(lines, token_table, cursor):
  147. """Finds the first occurrence of any string in strings in lines."""
  148. start = cursor.Clone()
  149. cur_line_number = cursor.line
  150. for line in lines[start.line:]:
  151. if cur_line_number == start.line:
  152. line = line[start.column:]
  153. m = FindFirstInLine(line, token_table)
  154. if m:
  155. # We found a regex in line.
  156. (start_column, length, token_type) = m
  157. if cur_line_number == start.line:
  158. start_column += start.column
  159. found_start = Cursor(cur_line_number, start_column)
  160. found_end = found_start + length
  161. return MakeToken(lines, found_start, found_end, token_type)
  162. cur_line_number += 1
  163. # We failed to find str in lines
  164. return None
  165. def SubString(lines, start, end):
  166. """Returns a substring in lines."""
  167. if end == Eof():
  168. end = Cursor(len(lines) - 1, len(lines[-1]))
  169. if start >= end:
  170. return ''
  171. if start.line == end.line:
  172. return lines[start.line][start.column:end.column]
  173. result_lines = ([lines[start.line][start.column:]] +
  174. lines[start.line + 1:end.line] +
  175. [lines[end.line][:end.column]])
  176. return ''.join(result_lines)
  177. def StripMetaComments(str):
  178. """Strip meta comments from each line in the given string."""
  179. # First, completely remove lines containing nothing but a meta
  180. # comment, including the trailing \n.
  181. str = re.sub(r'^\s*\$\$.*\n', '', str)
  182. # Then, remove meta comments from contentful lines.
  183. return re.sub(r'\s*\$\$.*', '', str)
  184. def MakeToken(lines, start, end, token_type):
  185. """Creates a new instance of Token."""
  186. return Token(start, end, SubString(lines, start, end), token_type)
  187. def ParseToken(lines, pos, regex, token_type):
  188. line = lines[pos.line][pos.column:]
  189. m = regex.search(line)
  190. if m and not m.start():
  191. return MakeToken(lines, pos, pos + m.end(), token_type)
  192. else:
  193. print 'ERROR: %s expected at %s.' % (token_type, pos)
  194. sys.exit(1)
  195. ID_REGEX = re.compile(r'[_A-Za-z]\w*')
  196. EQ_REGEX = re.compile(r'=')
  197. REST_OF_LINE_REGEX = re.compile(r'.*?(?=$|\$\$)')
  198. OPTIONAL_WHITE_SPACES_REGEX = re.compile(r'\s*')
  199. WHITE_SPACE_REGEX = re.compile(r'\s')
  200. DOT_DOT_REGEX = re.compile(r'\.\.')
  201. def Skip(lines, pos, regex):
  202. line = lines[pos.line][pos.column:]
  203. m = re.search(regex, line)
  204. if m and not m.start():
  205. return pos + m.end()
  206. else:
  207. return pos
  208. def SkipUntil(lines, pos, regex, token_type):
  209. line = lines[pos.line][pos.column:]
  210. m = re.search(regex, line)
  211. if m:
  212. return pos + m.start()
  213. else:
  214. print ('ERROR: %s expected on line %s after column %s.' %
  215. (token_type, pos.line + 1, pos.column))
  216. sys.exit(1)
  217. def ParseExpTokenInParens(lines, pos):
  218. def ParseInParens(pos):
  219. pos = Skip(lines, pos, OPTIONAL_WHITE_SPACES_REGEX)
  220. pos = Skip(lines, pos, r'\(')
  221. pos = Parse(pos)
  222. pos = Skip(lines, pos, r'\)')
  223. return pos
  224. def Parse(pos):
  225. pos = SkipUntil(lines, pos, r'\(|\)', ')')
  226. if SubString(lines, pos, pos + 1) == '(':
  227. pos = Parse(pos + 1)
  228. pos = Skip(lines, pos, r'\)')
  229. return Parse(pos)
  230. else:
  231. return pos
  232. start = pos.Clone()
  233. pos = ParseInParens(pos)
  234. return MakeToken(lines, start, pos, 'exp')
  235. def RStripNewLineFromToken(token):
  236. if token.value.endswith('\n'):
  237. return Token(token.start, token.end, token.value[:-1], token.token_type)
  238. else:
  239. return token
  240. def TokenizeLines(lines, pos):
  241. while True:
  242. found = FindFirst(lines, TOKEN_TABLE, pos)
  243. if not found:
  244. yield MakeToken(lines, pos, Eof(), 'code')
  245. return
  246. if found.start == pos:
  247. prev_token = None
  248. prev_token_rstripped = None
  249. else:
  250. prev_token = MakeToken(lines, pos, found.start, 'code')
  251. prev_token_rstripped = RStripNewLineFromToken(prev_token)
  252. if found.token_type == '$var':
  253. if prev_token_rstripped:
  254. yield prev_token_rstripped
  255. yield found
  256. id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
  257. yield id_token
  258. pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX)
  259. eq_token = ParseToken(lines, pos, EQ_REGEX, '=')
  260. yield eq_token
  261. pos = Skip(lines, eq_token.end, r'\s*')
  262. if SubString(lines, pos, pos + 2) != '[[':
  263. exp_token = ParseToken(lines, pos, REST_OF_LINE_REGEX, 'exp')
  264. yield exp_token
  265. pos = Cursor(exp_token.end.line + 1, 0)
  266. elif found.token_type == '$for':
  267. if prev_token_rstripped:
  268. yield prev_token_rstripped
  269. yield found
  270. id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
  271. yield id_token
  272. pos = Skip(lines, id_token.end, WHITE_SPACE_REGEX)
  273. elif found.token_type == '$range':
  274. if prev_token_rstripped:
  275. yield prev_token_rstripped
  276. yield found
  277. id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
  278. yield id_token
  279. pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX)
  280. dots_pos = SkipUntil(lines, pos, DOT_DOT_REGEX, '..')
  281. yield MakeToken(lines, pos, dots_pos, 'exp')
  282. yield MakeToken(lines, dots_pos, dots_pos + 2, '..')
  283. pos = dots_pos + 2
  284. new_pos = Cursor(pos.line + 1, 0)
  285. yield MakeToken(lines, pos, new_pos, 'exp')
  286. pos = new_pos
  287. elif found.token_type == '$':
  288. if prev_token:
  289. yield prev_token
  290. yield found
  291. exp_token = ParseExpTokenInParens(lines, found.end)
  292. yield exp_token
  293. pos = exp_token.end
  294. elif (found.token_type == ']]' or found.token_type == '$if' or
  295. found.token_type == '$elif' or found.token_type == '$else'):
  296. if prev_token_rstripped:
  297. yield prev_token_rstripped
  298. yield found
  299. pos = found.end
  300. else:
  301. if prev_token:
  302. yield prev_token
  303. yield found
  304. pos = found.end
  305. def Tokenize(s):
  306. """A generator that yields the tokens in the given string."""
  307. if s != '':
  308. lines = s.splitlines(True)
  309. for token in TokenizeLines(lines, Cursor(0, 0)):
  310. yield token
  311. class CodeNode:
  312. def __init__(self, atomic_code_list=None):
  313. self.atomic_code = atomic_code_list
  314. class VarNode:
  315. def __init__(self, identifier=None, atomic_code=None):
  316. self.identifier = identifier
  317. self.atomic_code = atomic_code
  318. class RangeNode:
  319. def __init__(self, identifier=None, exp1=None, exp2=None):
  320. self.identifier = identifier
  321. self.exp1 = exp1
  322. self.exp2 = exp2
  323. class ForNode:
  324. def __init__(self, identifier=None, sep=None, code=None):
  325. self.identifier = identifier
  326. self.sep = sep
  327. self.code = code
  328. class ElseNode:
  329. def __init__(self, else_branch=None):
  330. self.else_branch = else_branch
  331. class IfNode:
  332. def __init__(self, exp=None, then_branch=None, else_branch=None):
  333. self.exp = exp
  334. self.then_branch = then_branch
  335. self.else_branch = else_branch
  336. class RawCodeNode:
  337. def __init__(self, token=None):
  338. self.raw_code = token
  339. class LiteralDollarNode:
  340. def __init__(self, token):
  341. self.token = token
  342. class ExpNode:
  343. def __init__(self, token, python_exp):
  344. self.token = token
  345. self.python_exp = python_exp
  346. def PopFront(a_list):
  347. head = a_list[0]
  348. a_list[:1] = []
  349. return head
  350. def PushFront(a_list, elem):
  351. a_list[:0] = [elem]
  352. def PopToken(a_list, token_type=None):
  353. token = PopFront(a_list)
  354. if token_type is not None and token.token_type != token_type:
  355. print 'ERROR: %s expected at %s' % (token_type, token.start)
  356. print 'ERROR: %s found instead' % (token,)
  357. sys.exit(1)
  358. return token
  359. def PeekToken(a_list):
  360. if not a_list:
  361. return None
  362. return a_list[0]
  363. def ParseExpNode(token):
  364. python_exp = re.sub(r'([_A-Za-z]\w*)', r'self.GetValue("\1")', token.value)
  365. return ExpNode(token, python_exp)
  366. def ParseElseNode(tokens):
  367. def Pop(token_type=None):
  368. return PopToken(tokens, token_type)
  369. next = PeekToken(tokens)
  370. if not next:
  371. return None
  372. if next.token_type == '$else':
  373. Pop('$else')
  374. Pop('[[')
  375. code_node = ParseCodeNode(tokens)
  376. Pop(']]')
  377. return code_node
  378. elif next.token_type == '$elif':
  379. Pop('$elif')
  380. exp = Pop('code')
  381. Pop('[[')
  382. code_node = ParseCodeNode(tokens)
  383. Pop(']]')
  384. inner_else_node = ParseElseNode(tokens)
  385. return CodeNode([IfNode(ParseExpNode(exp), code_node, inner_else_node)])
  386. elif not next.value.strip():
  387. Pop('code')
  388. return ParseElseNode(tokens)
  389. else:
  390. return None
  391. def ParseAtomicCodeNode(tokens):
  392. def Pop(token_type=None):
  393. return PopToken(tokens, token_type)
  394. head = PopFront(tokens)
  395. t = head.token_type
  396. if t == 'code':
  397. return RawCodeNode(head)
  398. elif t == '$var':
  399. id_token = Pop('id')
  400. Pop('=')
  401. next = PeekToken(tokens)
  402. if next.token_type == 'exp':
  403. exp_token = Pop()
  404. return VarNode(id_token, ParseExpNode(exp_token))
  405. Pop('[[')
  406. code_node = ParseCodeNode(tokens)
  407. Pop(']]')
  408. return VarNode(id_token, code_node)
  409. elif t == '$for':
  410. id_token = Pop('id')
  411. next_token = PeekToken(tokens)
  412. if next_token.token_type == 'code':
  413. sep_token = next_token
  414. Pop('code')
  415. else:
  416. sep_token = None
  417. Pop('[[')
  418. code_node = ParseCodeNode(tokens)
  419. Pop(']]')
  420. return ForNode(id_token, sep_token, code_node)
  421. elif t == '$if':
  422. exp_token = Pop('code')
  423. Pop('[[')
  424. code_node = ParseCodeNode(tokens)
  425. Pop(']]')
  426. else_node = ParseElseNode(tokens)
  427. return IfNode(ParseExpNode(exp_token), code_node, else_node)
  428. elif t == '$range':
  429. id_token = Pop('id')
  430. exp1_token = Pop('exp')
  431. Pop('..')
  432. exp2_token = Pop('exp')
  433. return RangeNode(id_token, ParseExpNode(exp1_token),
  434. ParseExpNode(exp2_token))
  435. elif t == '$id':
  436. return ParseExpNode(Token(head.start + 1, head.end, head.value[1:], 'id'))
  437. elif t == '$($)':
  438. return LiteralDollarNode(head)
  439. elif t == '$':
  440. exp_token = Pop('exp')
  441. return ParseExpNode(exp_token)
  442. elif t == '[[':
  443. code_node = ParseCodeNode(tokens)
  444. Pop(']]')
  445. return code_node
  446. else:
  447. PushFront(tokens, head)
  448. return None
  449. def ParseCodeNode(tokens):
  450. atomic_code_list = []
  451. while True:
  452. if not tokens:
  453. break
  454. atomic_code_node = ParseAtomicCodeNode(tokens)
  455. if atomic_code_node:
  456. atomic_code_list.append(atomic_code_node)
  457. else:
  458. break
  459. return CodeNode(atomic_code_list)
  460. def ParseToAST(pump_src_text):
  461. """Convert the given Pump source text into an AST."""
  462. tokens = list(Tokenize(pump_src_text))
  463. code_node = ParseCodeNode(tokens)
  464. return code_node
  465. class Env:
  466. def __init__(self):
  467. self.variables = []
  468. self.ranges = []
  469. def Clone(self):
  470. clone = Env()
  471. clone.variables = self.variables[:]
  472. clone.ranges = self.ranges[:]
  473. return clone
  474. def PushVariable(self, var, value):
  475. # If value looks like an int, store it as an int.
  476. try:
  477. int_value = int(value)
  478. if ('%s' % int_value) == value:
  479. value = int_value
  480. except Exception:
  481. pass
  482. self.variables[:0] = [(var, value)]
  483. def PopVariable(self):
  484. self.variables[:1] = []
  485. def PushRange(self, var, lower, upper):
  486. self.ranges[:0] = [(var, lower, upper)]
  487. def PopRange(self):
  488. self.ranges[:1] = []
  489. def GetValue(self, identifier):
  490. for (var, value) in self.variables:
  491. if identifier == var:
  492. return value
  493. print 'ERROR: meta variable %s is undefined.' % (identifier,)
  494. sys.exit(1)
  495. def EvalExp(self, exp):
  496. try:
  497. result = eval(exp.python_exp)
  498. except Exception, e:
  499. print 'ERROR: caught exception %s: %s' % (e.__class__.__name__, e)
  500. print ('ERROR: failed to evaluate meta expression %s at %s' %
  501. (exp.python_exp, exp.token.start))
  502. sys.exit(1)
  503. return result
  504. def GetRange(self, identifier):
  505. for (var, lower, upper) in self.ranges:
  506. if identifier == var:
  507. return (lower, upper)
  508. print 'ERROR: range %s is undefined.' % (identifier,)
  509. sys.exit(1)
  510. class Output:
  511. def __init__(self):
  512. self.string = ''
  513. def GetLastLine(self):
  514. index = self.string.rfind('\n')
  515. if index < 0:
  516. return ''
  517. return self.string[index + 1:]
  518. def Append(self, s):
  519. self.string += s
  520. def RunAtomicCode(env, node, output):
  521. if isinstance(node, VarNode):
  522. identifier = node.identifier.value.strip()
  523. result = Output()
  524. RunAtomicCode(env.Clone(), node.atomic_code, result)
  525. value = result.string
  526. env.PushVariable(identifier, value)
  527. elif isinstance(node, RangeNode):
  528. identifier = node.identifier.value.strip()
  529. lower = int(env.EvalExp(node.exp1))
  530. upper = int(env.EvalExp(node.exp2))
  531. env.PushRange(identifier, lower, upper)
  532. elif isinstance(node, ForNode):
  533. identifier = node.identifier.value.strip()
  534. if node.sep is None:
  535. sep = ''
  536. else:
  537. sep = node.sep.value
  538. (lower, upper) = env.GetRange(identifier)
  539. for i in range(lower, upper + 1):
  540. new_env = env.Clone()
  541. new_env.PushVariable(identifier, i)
  542. RunCode(new_env, node.code, output)
  543. if i != upper:
  544. output.Append(sep)
  545. elif isinstance(node, RawCodeNode):
  546. output.Append(node.raw_code.value)
  547. elif isinstance(node, IfNode):
  548. cond = env.EvalExp(node.exp)
  549. if cond:
  550. RunCode(env.Clone(), node.then_branch, output)
  551. elif node.else_branch is not None:
  552. RunCode(env.Clone(), node.else_branch, output)
  553. elif isinstance(node, ExpNode):
  554. value = env.EvalExp(node)
  555. output.Append('%s' % (value,))
  556. elif isinstance(node, LiteralDollarNode):
  557. output.Append('$')
  558. elif isinstance(node, CodeNode):
  559. RunCode(env.Clone(), node, output)
  560. else:
  561. print 'BAD'
  562. print node
  563. sys.exit(1)
  564. def RunCode(env, code_node, output):
  565. for atomic_code in code_node.atomic_code:
  566. RunAtomicCode(env, atomic_code, output)
  567. def IsSingleLineComment(cur_line):
  568. return '//' in cur_line
  569. def IsInPreprocessorDirective(prev_lines, cur_line):
  570. if cur_line.lstrip().startswith('#'):
  571. return True
  572. return prev_lines and prev_lines[-1].endswith('\\')
  573. def WrapComment(line, output):
  574. loc = line.find('//')
  575. before_comment = line[:loc].rstrip()
  576. if before_comment == '':
  577. indent = loc
  578. else:
  579. output.append(before_comment)
  580. indent = len(before_comment) - len(before_comment.lstrip())
  581. prefix = indent*' ' + '// '
  582. max_len = 80 - len(prefix)
  583. comment = line[loc + 2:].strip()
  584. segs = [seg for seg in re.split(r'(\w+\W*)', comment) if seg != '']
  585. cur_line = ''
  586. for seg in segs:
  587. if len((cur_line + seg).rstrip()) < max_len:
  588. cur_line += seg
  589. else:
  590. if cur_line.strip() != '':
  591. output.append(prefix + cur_line.rstrip())
  592. cur_line = seg.lstrip()
  593. if cur_line.strip() != '':
  594. output.append(prefix + cur_line.strip())
  595. def WrapCode(line, line_concat, output):
  596. indent = len(line) - len(line.lstrip())
  597. prefix = indent*' ' # Prefix of the current line
  598. max_len = 80 - indent - len(line_concat) # Maximum length of the current line
  599. new_prefix = prefix + 4*' ' # Prefix of a continuation line
  600. new_max_len = max_len - 4 # Maximum length of a continuation line
  601. # Prefers to wrap a line after a ',' or ';'.
  602. segs = [seg for seg in re.split(r'([^,;]+[,;]?)', line.strip()) if seg != '']
  603. cur_line = '' # The current line without leading spaces.
  604. for seg in segs:
  605. # If the line is still too long, wrap at a space.
  606. while cur_line == '' and len(seg.strip()) > max_len:
  607. seg = seg.lstrip()
  608. split_at = seg.rfind(' ', 0, max_len)
  609. output.append(prefix + seg[:split_at].strip() + line_concat)
  610. seg = seg[split_at + 1:]
  611. prefix = new_prefix
  612. max_len = new_max_len
  613. if len((cur_line + seg).rstrip()) < max_len:
  614. cur_line = (cur_line + seg).lstrip()
  615. else:
  616. output.append(prefix + cur_line.rstrip() + line_concat)
  617. prefix = new_prefix
  618. max_len = new_max_len
  619. cur_line = seg.lstrip()
  620. if cur_line.strip() != '':
  621. output.append(prefix + cur_line.strip())
  622. def WrapPreprocessorDirective(line, output):
  623. WrapCode(line, ' \\', output)
  624. def WrapPlainCode(line, output):
  625. WrapCode(line, '', output)
  626. def IsMultiLineIWYUPragma(line):
  627. return re.search(r'/\* IWYU pragma: ', line)
  628. def IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
  629. return (re.match(r'^#(ifndef|define|endif\s*//)\s*[\w_]+\s*$', line) or
  630. re.match(r'^#include\s', line) or
  631. # Don't break IWYU pragmas, either; that causes iwyu.py problems.
  632. re.search(r'// IWYU pragma: ', line))
  633. def WrapLongLine(line, output):
  634. line = line.rstrip()
  635. if len(line) <= 80:
  636. output.append(line)
  637. elif IsSingleLineComment(line):
  638. if IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
  639. # The style guide made an exception to allow long header guard lines,
  640. # includes and IWYU pragmas.
  641. output.append(line)
  642. else:
  643. WrapComment(line, output)
  644. elif IsInPreprocessorDirective(output, line):
  645. if IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
  646. # The style guide made an exception to allow long header guard lines,
  647. # includes and IWYU pragmas.
  648. output.append(line)
  649. else:
  650. WrapPreprocessorDirective(line, output)
  651. elif IsMultiLineIWYUPragma(line):
  652. output.append(line)
  653. else:
  654. WrapPlainCode(line, output)
  655. def BeautifyCode(string):
  656. lines = string.splitlines()
  657. output = []
  658. for line in lines:
  659. WrapLongLine(line, output)
  660. output2 = [line.rstrip() for line in output]
  661. return '\n'.join(output2) + '\n'
  662. def ConvertFromPumpSource(src_text):
  663. """Return the text generated from the given Pump source text."""
  664. ast = ParseToAST(StripMetaComments(src_text))
  665. output = Output()
  666. RunCode(Env(), ast, output)
  667. return BeautifyCode(output.string)
  668. def main(argv):
  669. if len(argv) == 1:
  670. print __doc__
  671. sys.exit(1)
  672. file_path = argv[-1]
  673. output_str = ConvertFromPumpSource(file(file_path, 'r').read())
  674. if file_path.endswith('.pump'):
  675. output_file_path = file_path[:-5]
  676. else:
  677. output_file_path = '-'
  678. if output_file_path == '-':
  679. print output_str,
  680. else:
  681. output_file = file(output_file_path, 'w')
  682. output_file.write('// This file was GENERATED by command:\n')
  683. output_file.write('// %s %s\n' %
  684. (os.path.basename(__file__), os.path.basename(file_path)))
  685. output_file.write('// DO NOT EDIT BY HAND!!!\n\n')
  686. output_file.write(output_str)
  687. output_file.close()
  688. if __name__ == '__main__':
  689. main(sys.argv)