cpp.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905
  1. # -----------------------------------------------------------------------------
  2. # cpp.py
  3. #
  4. # Author: David Beazley (http://www.dabeaz.com)
  5. # Copyright (C) 2017
  6. # All rights reserved
  7. #
  8. # This module implements an ANSI-C style lexical preprocessor for PLY.
  9. # -----------------------------------------------------------------------------
  10. import sys
  11. # Some Python 3 compatibility shims
  12. if sys.version_info.major < 3:
  13. STRING_TYPES = (str, unicode)
  14. else:
  15. STRING_TYPES = str
  16. xrange = range
  17. # -----------------------------------------------------------------------------
  18. # Default preprocessor lexer definitions. These tokens are enough to get
  19. # a basic preprocessor working. Other modules may import these if they want
  20. # -----------------------------------------------------------------------------
  21. tokens = (
  22. 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND'
  23. )
  24. literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
  25. # Whitespace
  26. def t_CPP_WS(t):
  27. r'\s+'
  28. t.lexer.lineno += t.value.count("\n")
  29. return t
  30. t_CPP_POUND = r'\#'
  31. t_CPP_DPOUND = r'\#\#'
  32. # Identifier
  33. t_CPP_ID = r'[A-Za-z_][\w_]*'
  34. # Integer literal
  35. def CPP_INTEGER(t):
  36. r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)'
  37. return t
  38. t_CPP_INTEGER = CPP_INTEGER
  39. # Floating literal
  40. t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
  41. # String literal
  42. def t_CPP_STRING(t):
  43. r'\"([^\\\n]|(\\(.|\n)))*?\"'
  44. t.lexer.lineno += t.value.count("\n")
  45. return t
  46. # Character constant 'c' or L'c'
  47. def t_CPP_CHAR(t):
  48. r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
  49. t.lexer.lineno += t.value.count("\n")
  50. return t
  51. # Comment
  52. def t_CPP_COMMENT1(t):
  53. r'(/\*(.|\n)*?\*/)'
  54. ncr = t.value.count("\n")
  55. t.lexer.lineno += ncr
  56. # replace with one space or a number of '\n'
  57. t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' '
  58. return t
  59. # Line comment
  60. def t_CPP_COMMENT2(t):
  61. r'(//.*?(\n|$))'
  62. # replace with '/n'
  63. t.type = 'CPP_WS'; t.value = '\n'
  64. return t
  65. def t_error(t):
  66. t.type = t.value[0]
  67. t.value = t.value[0]
  68. t.lexer.skip(1)
  69. return t
  70. import re
  71. import copy
  72. import time
  73. import os.path
  74. # -----------------------------------------------------------------------------
  75. # trigraph()
  76. #
  77. # Given an input string, this function replaces all trigraph sequences.
  78. # The following mapping is used:
  79. #
  80. # ??= #
  81. # ??/ \
  82. # ??' ^
  83. # ??( [
  84. # ??) ]
  85. # ??! |
  86. # ??< {
  87. # ??> }
  88. # ??- ~
  89. # -----------------------------------------------------------------------------
  90. _trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
  91. _trigraph_rep = {
  92. '=':'#',
  93. '/':'\\',
  94. "'":'^',
  95. '(':'[',
  96. ')':']',
  97. '!':'|',
  98. '<':'{',
  99. '>':'}',
  100. '-':'~'
  101. }
  102. def trigraph(input):
  103. return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
  104. # ------------------------------------------------------------------
  105. # Macro object
  106. #
  107. # This object holds information about preprocessor macros
  108. #
  109. # .name - Macro name (string)
  110. # .value - Macro value (a list of tokens)
  111. # .arglist - List of argument names
  112. # .variadic - Boolean indicating whether or not variadic macro
  113. # .vararg - Name of the variadic parameter
  114. #
  115. # When a macro is created, the macro replacement token sequence is
  116. # pre-scanned and used to create patch lists that are later used
  117. # during macro expansion
  118. # ------------------------------------------------------------------
  119. class Macro(object):
  120. def __init__(self,name,value,arglist=None,variadic=False):
  121. self.name = name
  122. self.value = value
  123. self.arglist = arglist
  124. self.variadic = variadic
  125. if variadic:
  126. self.vararg = arglist[-1]
  127. self.source = None
  128. # ------------------------------------------------------------------
  129. # Preprocessor object
  130. #
  131. # Object representing a preprocessor. Contains macro definitions,
  132. # include directories, and other information
  133. # ------------------------------------------------------------------
  134. class Preprocessor(object):
  135. def __init__(self,lexer=None):
  136. if lexer is None:
  137. lexer = lex.lexer
  138. self.lexer = lexer
  139. self.macros = { }
  140. self.path = []
  141. self.temp_path = []
  142. # Probe the lexer for selected tokens
  143. self.lexprobe()
  144. tm = time.localtime()
  145. self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
  146. self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
  147. self.parser = None
  148. # -----------------------------------------------------------------------------
  149. # tokenize()
  150. #
  151. # Utility function. Given a string of text, tokenize into a list of tokens
  152. # -----------------------------------------------------------------------------
  153. def tokenize(self,text):
  154. tokens = []
  155. self.lexer.input(text)
  156. while True:
  157. tok = self.lexer.token()
  158. if not tok: break
  159. tokens.append(tok)
  160. return tokens
  161. # ---------------------------------------------------------------------
  162. # error()
  163. #
  164. # Report a preprocessor error/warning of some kind
  165. # ----------------------------------------------------------------------
  166. def error(self,file,line,msg):
  167. print("%s:%d %s" % (file,line,msg))
  168. # ----------------------------------------------------------------------
  169. # lexprobe()
  170. #
  171. # This method probes the preprocessor lexer object to discover
  172. # the token types of symbols that are important to the preprocessor.
  173. # If this works right, the preprocessor will simply "work"
  174. # with any suitable lexer regardless of how tokens have been named.
  175. # ----------------------------------------------------------------------
  176. def lexprobe(self):
  177. # Determine the token type for identifiers
  178. self.lexer.input("identifier")
  179. tok = self.lexer.token()
  180. if not tok or tok.value != "identifier":
  181. print("Couldn't determine identifier type")
  182. else:
  183. self.t_ID = tok.type
  184. # Determine the token type for integers
  185. self.lexer.input("12345")
  186. tok = self.lexer.token()
  187. if not tok or int(tok.value) != 12345:
  188. print("Couldn't determine integer type")
  189. else:
  190. self.t_INTEGER = tok.type
  191. self.t_INTEGER_TYPE = type(tok.value)
  192. # Determine the token type for strings enclosed in double quotes
  193. self.lexer.input("\"filename\"")
  194. tok = self.lexer.token()
  195. if not tok or tok.value != "\"filename\"":
  196. print("Couldn't determine string type")
  197. else:
  198. self.t_STRING = tok.type
  199. # Determine the token type for whitespace--if any
  200. self.lexer.input(" ")
  201. tok = self.lexer.token()
  202. if not tok or tok.value != " ":
  203. self.t_SPACE = None
  204. else:
  205. self.t_SPACE = tok.type
  206. # Determine the token type for newlines
  207. self.lexer.input("\n")
  208. tok = self.lexer.token()
  209. if not tok or tok.value != "\n":
  210. self.t_NEWLINE = None
  211. print("Couldn't determine token for newlines")
  212. else:
  213. self.t_NEWLINE = tok.type
  214. self.t_WS = (self.t_SPACE, self.t_NEWLINE)
  215. # Check for other characters used by the preprocessor
  216. chars = [ '<','>','#','##','\\','(',')',',','.']
  217. for c in chars:
  218. self.lexer.input(c)
  219. tok = self.lexer.token()
  220. if not tok or tok.value != c:
  221. print("Unable to lex '%s' required for preprocessor" % c)
  222. # ----------------------------------------------------------------------
  223. # add_path()
  224. #
  225. # Adds a search path to the preprocessor.
  226. # ----------------------------------------------------------------------
  227. def add_path(self,path):
  228. self.path.append(path)
  229. # ----------------------------------------------------------------------
  230. # group_lines()
  231. #
  232. # Given an input string, this function splits it into lines. Trailing whitespace
  233. # is removed. Any line ending with \ is grouped with the next line. This
  234. # function forms the lowest level of the preprocessor---grouping into text into
  235. # a line-by-line format.
  236. # ----------------------------------------------------------------------
  237. def group_lines(self,input):
  238. lex = self.lexer.clone()
  239. lines = [x.rstrip() for x in input.splitlines()]
  240. for i in xrange(len(lines)):
  241. j = i+1
  242. while lines[i].endswith('\\') and (j < len(lines)):
  243. lines[i] = lines[i][:-1]+lines[j]
  244. lines[j] = ""
  245. j += 1
  246. input = "\n".join(lines)
  247. lex.input(input)
  248. lex.lineno = 1
  249. current_line = []
  250. while True:
  251. tok = lex.token()
  252. if not tok:
  253. break
  254. current_line.append(tok)
  255. if tok.type in self.t_WS and '\n' in tok.value:
  256. yield current_line
  257. current_line = []
  258. if current_line:
  259. yield current_line
  260. # ----------------------------------------------------------------------
  261. # tokenstrip()
  262. #
  263. # Remove leading/trailing whitespace tokens from a token list
  264. # ----------------------------------------------------------------------
  265. def tokenstrip(self,tokens):
  266. i = 0
  267. while i < len(tokens) and tokens[i].type in self.t_WS:
  268. i += 1
  269. del tokens[:i]
  270. i = len(tokens)-1
  271. while i >= 0 and tokens[i].type in self.t_WS:
  272. i -= 1
  273. del tokens[i+1:]
  274. return tokens
  275. # ----------------------------------------------------------------------
  276. # collect_args()
  277. #
  278. # Collects comma separated arguments from a list of tokens. The arguments
  279. # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions)
  280. # where tokencount is the number of tokens consumed, args is a list of arguments,
  281. # and positions is a list of integers containing the starting index of each
  282. # argument. Each argument is represented by a list of tokens.
  283. #
  284. # When collecting arguments, leading and trailing whitespace is removed
  285. # from each argument.
  286. #
  287. # This function properly handles nested parenthesis and commas---these do not
  288. # define new arguments.
  289. # ----------------------------------------------------------------------
  290. def collect_args(self,tokenlist):
  291. args = []
  292. positions = []
  293. current_arg = []
  294. nesting = 1
  295. tokenlen = len(tokenlist)
  296. # Search for the opening '('.
  297. i = 0
  298. while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
  299. i += 1
  300. if (i < tokenlen) and (tokenlist[i].value == '('):
  301. positions.append(i+1)
  302. else:
  303. self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
  304. return 0, [], []
  305. i += 1
  306. while i < tokenlen:
  307. t = tokenlist[i]
  308. if t.value == '(':
  309. current_arg.append(t)
  310. nesting += 1
  311. elif t.value == ')':
  312. nesting -= 1
  313. if nesting == 0:
  314. if current_arg:
  315. args.append(self.tokenstrip(current_arg))
  316. positions.append(i)
  317. return i+1,args,positions
  318. current_arg.append(t)
  319. elif t.value == ',' and nesting == 1:
  320. args.append(self.tokenstrip(current_arg))
  321. positions.append(i+1)
  322. current_arg = []
  323. else:
  324. current_arg.append(t)
  325. i += 1
  326. # Missing end argument
  327. self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
  328. return 0, [],[]
  329. # ----------------------------------------------------------------------
  330. # macro_prescan()
  331. #
  332. # Examine the macro value (token sequence) and identify patch points
  333. # This is used to speed up macro expansion later on---we'll know
  334. # right away where to apply patches to the value to form the expansion
  335. # ----------------------------------------------------------------------
  336. def macro_prescan(self,macro):
  337. macro.patch = [] # Standard macro arguments
  338. macro.str_patch = [] # String conversion expansion
  339. macro.var_comma_patch = [] # Variadic macro comma patch
  340. i = 0
  341. while i < len(macro.value):
  342. if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
  343. argnum = macro.arglist.index(macro.value[i].value)
  344. # Conversion of argument to a string
  345. if i > 0 and macro.value[i-1].value == '#':
  346. macro.value[i] = copy.copy(macro.value[i])
  347. macro.value[i].type = self.t_STRING
  348. del macro.value[i-1]
  349. macro.str_patch.append((argnum,i-1))
  350. continue
  351. # Concatenation
  352. elif (i > 0 and macro.value[i-1].value == '##'):
  353. macro.patch.append(('c',argnum,i-1))
  354. del macro.value[i-1]
  355. continue
  356. elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
  357. macro.patch.append(('c',argnum,i))
  358. i += 1
  359. continue
  360. # Standard expansion
  361. else:
  362. macro.patch.append(('e',argnum,i))
  363. elif macro.value[i].value == '##':
  364. if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
  365. ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
  366. (macro.value[i+1].value == macro.vararg):
  367. macro.var_comma_patch.append(i-1)
  368. i += 1
  369. macro.patch.sort(key=lambda x: x[2],reverse=True)
  370. # ----------------------------------------------------------------------
  371. # macro_expand_args()
  372. #
  373. # Given a Macro and list of arguments (each a token list), this method
  374. # returns an expanded version of a macro. The return value is a token sequence
  375. # representing the replacement macro tokens
  376. # ----------------------------------------------------------------------
  377. def macro_expand_args(self,macro,args):
  378. # Make a copy of the macro token sequence
  379. rep = [copy.copy(_x) for _x in macro.value]
  380. # Make string expansion patches. These do not alter the length of the replacement sequence
  381. str_expansion = {}
  382. for argnum, i in macro.str_patch:
  383. if argnum not in str_expansion:
  384. str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
  385. rep[i] = copy.copy(rep[i])
  386. rep[i].value = str_expansion[argnum]
  387. # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
  388. comma_patch = False
  389. if macro.variadic and not args[-1]:
  390. for i in macro.var_comma_patch:
  391. rep[i] = None
  392. comma_patch = True
  393. # Make all other patches. The order of these matters. It is assumed that the patch list
  394. # has been sorted in reverse order of patch location since replacements will cause the
  395. # size of the replacement sequence to expand from the patch point.
  396. expanded = { }
  397. for ptype, argnum, i in macro.patch:
  398. # Concatenation. Argument is left unexpanded
  399. if ptype == 'c':
  400. rep[i:i+1] = args[argnum]
  401. # Normal expansion. Argument is macro expanded first
  402. elif ptype == 'e':
  403. if argnum not in expanded:
  404. expanded[argnum] = self.expand_macros(args[argnum])
  405. rep[i:i+1] = expanded[argnum]
  406. # Get rid of removed comma if necessary
  407. if comma_patch:
  408. rep = [_i for _i in rep if _i]
  409. return rep
  410. # ----------------------------------------------------------------------
  411. # expand_macros()
  412. #
  413. # Given a list of tokens, this function performs macro expansion.
  414. # The expanded argument is a dictionary that contains macros already
  415. # expanded. This is used to prevent infinite recursion.
  416. # ----------------------------------------------------------------------
  417. def expand_macros(self,tokens,expanded=None):
  418. if expanded is None:
  419. expanded = {}
  420. i = 0
  421. while i < len(tokens):
  422. t = tokens[i]
  423. if t.type == self.t_ID:
  424. if t.value in self.macros and t.value not in expanded:
  425. # Yes, we found a macro match
  426. expanded[t.value] = True
  427. m = self.macros[t.value]
  428. if not m.arglist:
  429. # A simple macro
  430. ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
  431. for e in ex:
  432. e.lineno = t.lineno
  433. tokens[i:i+1] = ex
  434. i += len(ex)
  435. else:
  436. # A macro with arguments
  437. j = i + 1
  438. while j < len(tokens) and tokens[j].type in self.t_WS:
  439. j += 1
  440. if tokens[j].value == '(':
  441. tokcount,args,positions = self.collect_args(tokens[j:])
  442. if not m.variadic and len(args) != len(m.arglist):
  443. self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
  444. i = j + tokcount
  445. elif m.variadic and len(args) < len(m.arglist)-1:
  446. if len(m.arglist) > 2:
  447. self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
  448. else:
  449. self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
  450. i = j + tokcount
  451. else:
  452. if m.variadic:
  453. if len(args) == len(m.arglist)-1:
  454. args.append([])
  455. else:
  456. args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
  457. del args[len(m.arglist):]
  458. # Get macro replacement text
  459. rep = self.macro_expand_args(m,args)
  460. rep = self.expand_macros(rep,expanded)
  461. for r in rep:
  462. r.lineno = t.lineno
  463. tokens[i:j+tokcount] = rep
  464. i += len(rep)
  465. del expanded[t.value]
  466. continue
  467. elif t.value == '__LINE__':
  468. t.type = self.t_INTEGER
  469. t.value = self.t_INTEGER_TYPE(t.lineno)
  470. i += 1
  471. return tokens
  472. # ----------------------------------------------------------------------
  473. # evalexpr()
  474. #
  475. # Evaluate an expression token sequence for the purposes of evaluating
  476. # integral expressions.
  477. # ----------------------------------------------------------------------
  478. def evalexpr(self,tokens):
  479. # tokens = tokenize(line)
  480. # Search for defined macros
  481. i = 0
  482. while i < len(tokens):
  483. if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
  484. j = i + 1
  485. needparen = False
  486. result = "0L"
  487. while j < len(tokens):
  488. if tokens[j].type in self.t_WS:
  489. j += 1
  490. continue
  491. elif tokens[j].type == self.t_ID:
  492. if tokens[j].value in self.macros:
  493. result = "1L"
  494. else:
  495. result = "0L"
  496. if not needparen: break
  497. elif tokens[j].value == '(':
  498. needparen = True
  499. elif tokens[j].value == ')':
  500. break
  501. else:
  502. self.error(self.source,tokens[i].lineno,"Malformed defined()")
  503. j += 1
  504. tokens[i].type = self.t_INTEGER
  505. tokens[i].value = self.t_INTEGER_TYPE(result)
  506. del tokens[i+1:j+1]
  507. i += 1
  508. tokens = self.expand_macros(tokens)
  509. for i,t in enumerate(tokens):
  510. if t.type == self.t_ID:
  511. tokens[i] = copy.copy(t)
  512. tokens[i].type = self.t_INTEGER
  513. tokens[i].value = self.t_INTEGER_TYPE("0L")
  514. elif t.type == self.t_INTEGER:
  515. tokens[i] = copy.copy(t)
  516. # Strip off any trailing suffixes
  517. tokens[i].value = str(tokens[i].value)
  518. while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
  519. tokens[i].value = tokens[i].value[:-1]
  520. expr = "".join([str(x.value) for x in tokens])
  521. expr = expr.replace("&&"," and ")
  522. expr = expr.replace("||"," or ")
  523. expr = expr.replace("!"," not ")
  524. try:
  525. result = eval(expr)
  526. except Exception:
  527. self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
  528. result = 0
  529. return result
  530. # ----------------------------------------------------------------------
  531. # parsegen()
  532. #
  533. # Parse an input string/
  534. # ----------------------------------------------------------------------
  535. def parsegen(self,input,source=None):
  536. # Replace trigraph sequences
  537. t = trigraph(input)
  538. lines = self.group_lines(t)
  539. if not source:
  540. source = ""
  541. self.define("__FILE__ \"%s\"" % source)
  542. self.source = source
  543. chunk = []
  544. enable = True
  545. iftrigger = False
  546. ifstack = []
  547. for x in lines:
  548. for i,tok in enumerate(x):
  549. if tok.type not in self.t_WS: break
  550. if tok.value == '#':
  551. # Preprocessor directive
  552. # insert necessary whitespace instead of eaten tokens
  553. for tok in x:
  554. if tok.type in self.t_WS and '\n' in tok.value:
  555. chunk.append(tok)
  556. dirtokens = self.tokenstrip(x[i+1:])
  557. if dirtokens:
  558. name = dirtokens[0].value
  559. args = self.tokenstrip(dirtokens[1:])
  560. else:
  561. name = ""
  562. args = []
  563. if name == 'define':
  564. if enable:
  565. for tok in self.expand_macros(chunk):
  566. yield tok
  567. chunk = []
  568. self.define(args)
  569. elif name == 'include':
  570. if enable:
  571. for tok in self.expand_macros(chunk):
  572. yield tok
  573. chunk = []
  574. oldfile = self.macros['__FILE__']
  575. for tok in self.include(args):
  576. yield tok
  577. self.macros['__FILE__'] = oldfile
  578. self.source = source
  579. elif name == 'undef':
  580. if enable:
  581. for tok in self.expand_macros(chunk):
  582. yield tok
  583. chunk = []
  584. self.undef(args)
  585. elif name == 'ifdef':
  586. ifstack.append((enable,iftrigger))
  587. if enable:
  588. if not args[0].value in self.macros:
  589. enable = False
  590. iftrigger = False
  591. else:
  592. iftrigger = True
  593. elif name == 'ifndef':
  594. ifstack.append((enable,iftrigger))
  595. if enable:
  596. if args[0].value in self.macros:
  597. enable = False
  598. iftrigger = False
  599. else:
  600. iftrigger = True
  601. elif name == 'if':
  602. ifstack.append((enable,iftrigger))
  603. if enable:
  604. result = self.evalexpr(args)
  605. if not result:
  606. enable = False
  607. iftrigger = False
  608. else:
  609. iftrigger = True
  610. elif name == 'elif':
  611. if ifstack:
  612. if ifstack[-1][0]: # We only pay attention if outer "if" allows this
  613. if enable: # If already true, we flip enable False
  614. enable = False
  615. elif not iftrigger: # If False, but not triggered yet, we'll check expression
  616. result = self.evalexpr(args)
  617. if result:
  618. enable = True
  619. iftrigger = True
  620. else:
  621. self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
  622. elif name == 'else':
  623. if ifstack:
  624. if ifstack[-1][0]:
  625. if enable:
  626. enable = False
  627. elif not iftrigger:
  628. enable = True
  629. iftrigger = True
  630. else:
  631. self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
  632. elif name == 'endif':
  633. if ifstack:
  634. enable,iftrigger = ifstack.pop()
  635. else:
  636. self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
  637. else:
  638. # Unknown preprocessor directive
  639. pass
  640. else:
  641. # Normal text
  642. if enable:
  643. chunk.extend(x)
  644. for tok in self.expand_macros(chunk):
  645. yield tok
  646. chunk = []
  647. # ----------------------------------------------------------------------
  648. # include()
  649. #
  650. # Implementation of file-inclusion
  651. # ----------------------------------------------------------------------
  652. def include(self,tokens):
  653. # Try to extract the filename and then process an include file
  654. if not tokens:
  655. return
  656. if tokens:
  657. if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
  658. tokens = self.expand_macros(tokens)
  659. if tokens[0].value == '<':
  660. # Include <...>
  661. i = 1
  662. while i < len(tokens):
  663. if tokens[i].value == '>':
  664. break
  665. i += 1
  666. else:
  667. print("Malformed #include <...>")
  668. return
  669. filename = "".join([x.value for x in tokens[1:i]])
  670. path = self.path + [""] + self.temp_path
  671. elif tokens[0].type == self.t_STRING:
  672. filename = tokens[0].value[1:-1]
  673. path = self.temp_path + [""] + self.path
  674. else:
  675. print("Malformed #include statement")
  676. return
  677. for p in path:
  678. iname = os.path.join(p,filename)
  679. try:
  680. data = open(iname,"r").read()
  681. dname = os.path.dirname(iname)
  682. if dname:
  683. self.temp_path.insert(0,dname)
  684. for tok in self.parsegen(data,filename):
  685. yield tok
  686. if dname:
  687. del self.temp_path[0]
  688. break
  689. except IOError:
  690. pass
  691. else:
  692. print("Couldn't find '%s'" % filename)
  693. # ----------------------------------------------------------------------
  694. # define()
  695. #
  696. # Define a new macro
  697. # ----------------------------------------------------------------------
  698. def define(self,tokens):
  699. if isinstance(tokens,STRING_TYPES):
  700. tokens = self.tokenize(tokens)
  701. linetok = tokens
  702. try:
  703. name = linetok[0]
  704. if len(linetok) > 1:
  705. mtype = linetok[1]
  706. else:
  707. mtype = None
  708. if not mtype:
  709. m = Macro(name.value,[])
  710. self.macros[name.value] = m
  711. elif mtype.type in self.t_WS:
  712. # A normal macro
  713. m = Macro(name.value,self.tokenstrip(linetok[2:]))
  714. self.macros[name.value] = m
  715. elif mtype.value == '(':
  716. # A macro with arguments
  717. tokcount, args, positions = self.collect_args(linetok[1:])
  718. variadic = False
  719. for a in args:
  720. if variadic:
  721. print("No more arguments may follow a variadic argument")
  722. break
  723. astr = "".join([str(_i.value) for _i in a])
  724. if astr == "...":
  725. variadic = True
  726. a[0].type = self.t_ID
  727. a[0].value = '__VA_ARGS__'
  728. variadic = True
  729. del a[1:]
  730. continue
  731. elif astr[-3:] == "..." and a[0].type == self.t_ID:
  732. variadic = True
  733. del a[1:]
  734. # If, for some reason, "." is part of the identifier, strip off the name for the purposes
  735. # of macro expansion
  736. if a[0].value[-3:] == '...':
  737. a[0].value = a[0].value[:-3]
  738. continue
  739. if len(a) > 1 or a[0].type != self.t_ID:
  740. print("Invalid macro argument")
  741. break
  742. else:
  743. mvalue = self.tokenstrip(linetok[1+tokcount:])
  744. i = 0
  745. while i < len(mvalue):
  746. if i+1 < len(mvalue):
  747. if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
  748. del mvalue[i]
  749. continue
  750. elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
  751. del mvalue[i+1]
  752. i += 1
  753. m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
  754. self.macro_prescan(m)
  755. self.macros[name.value] = m
  756. else:
  757. print("Bad macro definition")
  758. except LookupError:
  759. print("Bad macro definition")
  760. # ----------------------------------------------------------------------
  761. # undef()
  762. #
  763. # Undefine a macro
  764. # ----------------------------------------------------------------------
  765. def undef(self,tokens):
  766. id = tokens[0].value
  767. try:
  768. del self.macros[id]
  769. except LookupError:
  770. pass
  771. # ----------------------------------------------------------------------
  772. # parse()
  773. #
  774. # Parse input text.
  775. # ----------------------------------------------------------------------
  776. def parse(self,input,source=None,ignore={}):
  777. self.ignore = ignore
  778. self.parser = self.parsegen(input,source)
  779. # ----------------------------------------------------------------------
  780. # token()
  781. #
  782. # Method to return individual tokens
  783. # ----------------------------------------------------------------------
  784. def token(self):
  785. try:
  786. while True:
  787. tok = next(self.parser)
  788. if tok.type not in self.ignore: return tok
  789. except StopIteration:
  790. self.parser = None
  791. return None
  792. if __name__ == '__main__':
  793. import ply.lex as lex
  794. lexer = lex.lex()
  795. # Run a preprocessor
  796. import sys
  797. f = open(sys.argv[1])
  798. input = f.read()
  799. p = Preprocessor(lexer)
  800. p.parse(input,sys.argv[1])
  801. while True:
  802. tok = p.token()
  803. if not tok: break
  804. print(p.source, tok)