cparser.py 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015
  1. from . import model
  2. from .commontypes import COMMON_TYPES, resolve_common_type
  3. from .error import FFIError, CDefError
  4. try:
  5. from . import _pycparser as pycparser
  6. except ImportError:
  7. import pycparser
  8. import weakref, re, sys
  9. try:
  10. if sys.version_info < (3,):
  11. import thread as _thread
  12. else:
  13. import _thread
  14. lock = _thread.allocate_lock()
  15. except ImportError:
  16. lock = None
  17. def _workaround_for_static_import_finders():
  18. # Issue #392: packaging tools like cx_Freeze can not find these
  19. # because pycparser uses exec dynamic import. This is an obscure
  20. # workaround. This function is never called.
  21. import pycparser.yacctab
  22. import pycparser.lextab
  23. CDEF_SOURCE_STRING = "<cdef source string>"
  24. _r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$",
  25. re.DOTALL | re.MULTILINE)
  26. _r_define = re.compile(r"^\s*#\s*define\s+([A-Za-z_][A-Za-z_0-9]*)"
  27. r"\b((?:[^\n\\]|\\.)*?)$",
  28. re.DOTALL | re.MULTILINE)
  29. _r_line_directive = re.compile(r"^[ \t]*#[ \t]*(?:line|\d+)\b.*$", re.MULTILINE)
  30. _r_partial_enum = re.compile(r"=\s*\.\.\.\s*[,}]|\.\.\.\s*\}")
  31. _r_enum_dotdotdot = re.compile(r"__dotdotdot\d+__$")
  32. _r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]")
  33. _r_words = re.compile(r"\w+|\S")
  34. _parser_cache = None
  35. _r_int_literal = re.compile(r"-?0?x?[0-9a-f]+[lu]*$", re.IGNORECASE)
  36. _r_stdcall1 = re.compile(r"\b(__stdcall|WINAPI)\b")
  37. _r_stdcall2 = re.compile(r"[(]\s*(__stdcall|WINAPI)\b")
  38. _r_cdecl = re.compile(r"\b__cdecl\b")
  39. _r_extern_python = re.compile(r'\bextern\s*"'
  40. r'(Python|Python\s*\+\s*C|C\s*\+\s*Python)"\s*.')
  41. _r_star_const_space = re.compile( # matches "* const "
  42. r"[*]\s*((const|volatile|restrict)\b\s*)+")
  43. _r_int_dotdotdot = re.compile(r"(\b(int|long|short|signed|unsigned|char)\s*)+"
  44. r"\.\.\.")
  45. _r_float_dotdotdot = re.compile(r"\b(double|float)\s*\.\.\.")
  46. def _get_parser():
  47. global _parser_cache
  48. if _parser_cache is None:
  49. _parser_cache = pycparser.CParser()
  50. return _parser_cache
  51. def _workaround_for_old_pycparser(csource):
  52. # Workaround for a pycparser issue (fixed between pycparser 2.10 and
  53. # 2.14): "char*const***" gives us a wrong syntax tree, the same as
  54. # for "char***(*const)". This means we can't tell the difference
  55. # afterwards. But "char(*const(***))" gives us the right syntax
  56. # tree. The issue only occurs if there are several stars in
  57. # sequence with no parenthesis inbetween, just possibly qualifiers.
  58. # Attempt to fix it by adding some parentheses in the source: each
  59. # time we see "* const" or "* const *", we add an opening
  60. # parenthesis before each star---the hard part is figuring out where
  61. # to close them.
  62. parts = []
  63. while True:
  64. match = _r_star_const_space.search(csource)
  65. if not match:
  66. break
  67. #print repr(''.join(parts)+csource), '=>',
  68. parts.append(csource[:match.start()])
  69. parts.append('('); closing = ')'
  70. parts.append(match.group()) # e.g. "* const "
  71. endpos = match.end()
  72. if csource.startswith('*', endpos):
  73. parts.append('('); closing += ')'
  74. level = 0
  75. i = endpos
  76. while i < len(csource):
  77. c = csource[i]
  78. if c == '(':
  79. level += 1
  80. elif c == ')':
  81. if level == 0:
  82. break
  83. level -= 1
  84. elif c in ',;=':
  85. if level == 0:
  86. break
  87. i += 1
  88. csource = csource[endpos:i] + closing + csource[i:]
  89. #print repr(''.join(parts)+csource)
  90. parts.append(csource)
  91. return ''.join(parts)
  92. def _preprocess_extern_python(csource):
  93. # input: `extern "Python" int foo(int);` or
  94. # `extern "Python" { int foo(int); }`
  95. # output:
  96. # void __cffi_extern_python_start;
  97. # int foo(int);
  98. # void __cffi_extern_python_stop;
  99. #
  100. # input: `extern "Python+C" int foo(int);`
  101. # output:
  102. # void __cffi_extern_python_plus_c_start;
  103. # int foo(int);
  104. # void __cffi_extern_python_stop;
  105. parts = []
  106. while True:
  107. match = _r_extern_python.search(csource)
  108. if not match:
  109. break
  110. endpos = match.end() - 1
  111. #print
  112. #print ''.join(parts)+csource
  113. #print '=>'
  114. parts.append(csource[:match.start()])
  115. if 'C' in match.group(1):
  116. parts.append('void __cffi_extern_python_plus_c_start; ')
  117. else:
  118. parts.append('void __cffi_extern_python_start; ')
  119. if csource[endpos] == '{':
  120. # grouping variant
  121. closing = csource.find('}', endpos)
  122. if closing < 0:
  123. raise CDefError("'extern \"Python\" {': no '}' found")
  124. if csource.find('{', endpos + 1, closing) >= 0:
  125. raise NotImplementedError("cannot use { } inside a block "
  126. "'extern \"Python\" { ... }'")
  127. parts.append(csource[endpos+1:closing])
  128. csource = csource[closing+1:]
  129. else:
  130. # non-grouping variant
  131. semicolon = csource.find(';', endpos)
  132. if semicolon < 0:
  133. raise CDefError("'extern \"Python\": no ';' found")
  134. parts.append(csource[endpos:semicolon+1])
  135. csource = csource[semicolon+1:]
  136. parts.append(' void __cffi_extern_python_stop;')
  137. #print ''.join(parts)+csource
  138. #print
  139. parts.append(csource)
  140. return ''.join(parts)
  141. def _warn_for_string_literal(csource):
  142. if '"' not in csource:
  143. return
  144. for line in csource.splitlines():
  145. if '"' in line and not line.lstrip().startswith('#'):
  146. import warnings
  147. warnings.warn("String literal found in cdef() or type source. "
  148. "String literals are ignored here, but you should "
  149. "remove them anyway because some character sequences "
  150. "confuse pre-parsing.")
  151. break
  152. def _warn_for_non_extern_non_static_global_variable(decl):
  153. if not decl.storage:
  154. import warnings
  155. warnings.warn("Global variable '%s' in cdef(): for consistency "
  156. "with C it should have a storage class specifier "
  157. "(usually 'extern')" % (decl.name,))
  158. def _remove_line_directives(csource):
  159. # _r_line_directive matches whole lines, without the final \n, if they
  160. # start with '#line' with some spacing allowed, or '#NUMBER'. This
  161. # function stores them away and replaces them with exactly the string
  162. # '#line@N', where N is the index in the list 'line_directives'.
  163. line_directives = []
  164. def replace(m):
  165. i = len(line_directives)
  166. line_directives.append(m.group())
  167. return '#line@%d' % i
  168. csource = _r_line_directive.sub(replace, csource)
  169. return csource, line_directives
  170. def _put_back_line_directives(csource, line_directives):
  171. def replace(m):
  172. s = m.group()
  173. if not s.startswith('#line@'):
  174. raise AssertionError("unexpected #line directive "
  175. "(should have been processed and removed")
  176. return line_directives[int(s[6:])]
  177. return _r_line_directive.sub(replace, csource)
  178. def _preprocess(csource):
  179. # First, remove the lines of the form '#line N "filename"' because
  180. # the "filename" part could confuse the rest
  181. csource, line_directives = _remove_line_directives(csource)
  182. # Remove comments. NOTE: this only work because the cdef() section
  183. # should not contain any string literals (except in line directives)!
  184. def replace_keeping_newlines(m):
  185. return ' ' + m.group().count('\n') * '\n'
  186. csource = _r_comment.sub(replace_keeping_newlines, csource)
  187. # Remove the "#define FOO x" lines
  188. macros = {}
  189. for match in _r_define.finditer(csource):
  190. macroname, macrovalue = match.groups()
  191. macrovalue = macrovalue.replace('\\\n', '').strip()
  192. macros[macroname] = macrovalue
  193. csource = _r_define.sub('', csource)
  194. #
  195. if pycparser.__version__ < '2.14':
  196. csource = _workaround_for_old_pycparser(csource)
  197. #
  198. # BIG HACK: replace WINAPI or __stdcall with "volatile const".
  199. # It doesn't make sense for the return type of a function to be
  200. # "volatile volatile const", so we abuse it to detect __stdcall...
  201. # Hack number 2 is that "int(volatile *fptr)();" is not valid C
  202. # syntax, so we place the "volatile" before the opening parenthesis.
  203. csource = _r_stdcall2.sub(' volatile volatile const(', csource)
  204. csource = _r_stdcall1.sub(' volatile volatile const ', csource)
  205. csource = _r_cdecl.sub(' ', csource)
  206. #
  207. # Replace `extern "Python"` with start/end markers
  208. csource = _preprocess_extern_python(csource)
  209. #
  210. # Now there should not be any string literal left; warn if we get one
  211. _warn_for_string_literal(csource)
  212. #
  213. # Replace "[...]" with "[__dotdotdotarray__]"
  214. csource = _r_partial_array.sub('[__dotdotdotarray__]', csource)
  215. #
  216. # Replace "...}" with "__dotdotdotNUM__}". This construction should
  217. # occur only at the end of enums; at the end of structs we have "...;}"
  218. # and at the end of vararg functions "...);". Also replace "=...[,}]"
  219. # with ",__dotdotdotNUM__[,}]": this occurs in the enums too, when
  220. # giving an unknown value.
  221. matches = list(_r_partial_enum.finditer(csource))
  222. for number, match in enumerate(reversed(matches)):
  223. p = match.start()
  224. if csource[p] == '=':
  225. p2 = csource.find('...', p, match.end())
  226. assert p2 > p
  227. csource = '%s,__dotdotdot%d__ %s' % (csource[:p], number,
  228. csource[p2+3:])
  229. else:
  230. assert csource[p:p+3] == '...'
  231. csource = '%s __dotdotdot%d__ %s' % (csource[:p], number,
  232. csource[p+3:])
  233. # Replace "int ..." or "unsigned long int..." with "__dotdotdotint__"
  234. csource = _r_int_dotdotdot.sub(' __dotdotdotint__ ', csource)
  235. # Replace "float ..." or "double..." with "__dotdotdotfloat__"
  236. csource = _r_float_dotdotdot.sub(' __dotdotdotfloat__ ', csource)
  237. # Replace all remaining "..." with the same name, "__dotdotdot__",
  238. # which is declared with a typedef for the purpose of C parsing.
  239. csource = csource.replace('...', ' __dotdotdot__ ')
  240. # Finally, put back the line directives
  241. csource = _put_back_line_directives(csource, line_directives)
  242. return csource, macros
  243. def _common_type_names(csource):
  244. # Look in the source for what looks like usages of types from the
  245. # list of common types. A "usage" is approximated here as the
  246. # appearance of the word, minus a "definition" of the type, which
  247. # is the last word in a "typedef" statement. Approximative only
  248. # but should be fine for all the common types.
  249. look_for_words = set(COMMON_TYPES)
  250. look_for_words.add(';')
  251. look_for_words.add(',')
  252. look_for_words.add('(')
  253. look_for_words.add(')')
  254. look_for_words.add('typedef')
  255. words_used = set()
  256. is_typedef = False
  257. paren = 0
  258. previous_word = ''
  259. for word in _r_words.findall(csource):
  260. if word in look_for_words:
  261. if word == ';':
  262. if is_typedef:
  263. words_used.discard(previous_word)
  264. look_for_words.discard(previous_word)
  265. is_typedef = False
  266. elif word == 'typedef':
  267. is_typedef = True
  268. paren = 0
  269. elif word == '(':
  270. paren += 1
  271. elif word == ')':
  272. paren -= 1
  273. elif word == ',':
  274. if is_typedef and paren == 0:
  275. words_used.discard(previous_word)
  276. look_for_words.discard(previous_word)
  277. else: # word in COMMON_TYPES
  278. words_used.add(word)
  279. previous_word = word
  280. return words_used
  281. class Parser(object):
  282. def __init__(self):
  283. self._declarations = {}
  284. self._included_declarations = set()
  285. self._anonymous_counter = 0
  286. self._structnode2type = weakref.WeakKeyDictionary()
  287. self._options = {}
  288. self._int_constants = {}
  289. self._recomplete = []
  290. self._uses_new_feature = None
  291. def _parse(self, csource):
  292. csource, macros = _preprocess(csource)
  293. # XXX: for more efficiency we would need to poke into the
  294. # internals of CParser... the following registers the
  295. # typedefs, because their presence or absence influences the
  296. # parsing itself (but what they are typedef'ed to plays no role)
  297. ctn = _common_type_names(csource)
  298. typenames = []
  299. for name in sorted(self._declarations):
  300. if name.startswith('typedef '):
  301. name = name[8:]
  302. typenames.append(name)
  303. ctn.discard(name)
  304. typenames += sorted(ctn)
  305. #
  306. csourcelines = []
  307. csourcelines.append('# 1 "<cdef automatic initialization code>"')
  308. for typename in typenames:
  309. csourcelines.append('typedef int %s;' % typename)
  310. csourcelines.append('typedef int __dotdotdotint__, __dotdotdotfloat__,'
  311. ' __dotdotdot__;')
  312. # this forces pycparser to consider the following in the file
  313. # called <cdef source string> from line 1
  314. csourcelines.append('# 1 "%s"' % (CDEF_SOURCE_STRING,))
  315. csourcelines.append(csource)
  316. csourcelines.append('') # see test_missing_newline_bug
  317. fullcsource = '\n'.join(csourcelines)
  318. if lock is not None:
  319. lock.acquire() # pycparser is not thread-safe...
  320. try:
  321. ast = _get_parser().parse(fullcsource)
  322. except pycparser.c_parser.ParseError as e:
  323. self.convert_pycparser_error(e, csource)
  324. finally:
  325. if lock is not None:
  326. lock.release()
  327. # csource will be used to find buggy source text
  328. return ast, macros, csource
  329. def _convert_pycparser_error(self, e, csource):
  330. # xxx look for "<cdef source string>:NUM:" at the start of str(e)
  331. # and interpret that as a line number. This will not work if
  332. # the user gives explicit ``# NUM "FILE"`` directives.
  333. line = None
  334. msg = str(e)
  335. match = re.match(r"%s:(\d+):" % (CDEF_SOURCE_STRING,), msg)
  336. if match:
  337. linenum = int(match.group(1), 10)
  338. csourcelines = csource.splitlines()
  339. if 1 <= linenum <= len(csourcelines):
  340. line = csourcelines[linenum-1]
  341. return line
  342. def convert_pycparser_error(self, e, csource):
  343. line = self._convert_pycparser_error(e, csource)
  344. msg = str(e)
  345. if line:
  346. msg = 'cannot parse "%s"\n%s' % (line.strip(), msg)
  347. else:
  348. msg = 'parse error\n%s' % (msg,)
  349. raise CDefError(msg)
  350. def parse(self, csource, override=False, packed=False, pack=None,
  351. dllexport=False):
  352. if packed:
  353. if packed != True:
  354. raise ValueError("'packed' should be False or True; use "
  355. "'pack' to give another value")
  356. if pack:
  357. raise ValueError("cannot give both 'pack' and 'packed'")
  358. pack = 1
  359. elif pack:
  360. if pack & (pack - 1):
  361. raise ValueError("'pack' must be a power of two, not %r" %
  362. (pack,))
  363. else:
  364. pack = 0
  365. prev_options = self._options
  366. try:
  367. self._options = {'override': override,
  368. 'packed': pack,
  369. 'dllexport': dllexport}
  370. self._internal_parse(csource)
  371. finally:
  372. self._options = prev_options
  373. def _internal_parse(self, csource):
  374. ast, macros, csource = self._parse(csource)
  375. # add the macros
  376. self._process_macros(macros)
  377. # find the first "__dotdotdot__" and use that as a separator
  378. # between the repeated typedefs and the real csource
  379. iterator = iter(ast.ext)
  380. for decl in iterator:
  381. if decl.name == '__dotdotdot__':
  382. break
  383. else:
  384. assert 0
  385. current_decl = None
  386. #
  387. try:
  388. self._inside_extern_python = '__cffi_extern_python_stop'
  389. for decl in iterator:
  390. current_decl = decl
  391. if isinstance(decl, pycparser.c_ast.Decl):
  392. self._parse_decl(decl)
  393. elif isinstance(decl, pycparser.c_ast.Typedef):
  394. if not decl.name:
  395. raise CDefError("typedef does not declare any name",
  396. decl)
  397. quals = 0
  398. if (isinstance(decl.type.type, pycparser.c_ast.IdentifierType) and
  399. decl.type.type.names[-1].startswith('__dotdotdot')):
  400. realtype = self._get_unknown_type(decl)
  401. elif (isinstance(decl.type, pycparser.c_ast.PtrDecl) and
  402. isinstance(decl.type.type, pycparser.c_ast.TypeDecl) and
  403. isinstance(decl.type.type.type,
  404. pycparser.c_ast.IdentifierType) and
  405. decl.type.type.type.names[-1].startswith('__dotdotdot')):
  406. realtype = self._get_unknown_ptr_type(decl)
  407. else:
  408. realtype, quals = self._get_type_and_quals(
  409. decl.type, name=decl.name, partial_length_ok=True,
  410. typedef_example="*(%s *)0" % (decl.name,))
  411. self._declare('typedef ' + decl.name, realtype, quals=quals)
  412. elif decl.__class__.__name__ == 'Pragma':
  413. # skip pragma, only in pycparser 2.15
  414. import warnings
  415. warnings.warn(
  416. "#pragma in cdef() are entirely ignored. "
  417. "They should be removed for now, otherwise your "
  418. "code might behave differently in a future version "
  419. "of CFFI if #pragma support gets added. Note that "
  420. "'#pragma pack' needs to be replaced with the "
  421. "'packed' keyword argument to cdef().")
  422. else:
  423. raise CDefError("unexpected <%s>: this construct is valid "
  424. "C but not valid in cdef()" %
  425. decl.__class__.__name__, decl)
  426. except CDefError as e:
  427. if len(e.args) == 1:
  428. e.args = e.args + (current_decl,)
  429. raise
  430. except FFIError as e:
  431. msg = self._convert_pycparser_error(e, csource)
  432. if msg:
  433. e.args = (e.args[0] + "\n *** Err: %s" % msg,)
  434. raise
  435. def _add_constants(self, key, val):
  436. if key in self._int_constants:
  437. if self._int_constants[key] == val:
  438. return # ignore identical double declarations
  439. raise FFIError(
  440. "multiple declarations of constant: %s" % (key,))
  441. self._int_constants[key] = val
  442. def _add_integer_constant(self, name, int_str):
  443. int_str = int_str.lower().rstrip("ul")
  444. neg = int_str.startswith('-')
  445. if neg:
  446. int_str = int_str[1:]
  447. # "010" is not valid oct in py3
  448. if (int_str.startswith("0") and int_str != '0'
  449. and not int_str.startswith("0x")):
  450. int_str = "0o" + int_str[1:]
  451. pyvalue = int(int_str, 0)
  452. if neg:
  453. pyvalue = -pyvalue
  454. self._add_constants(name, pyvalue)
  455. self._declare('macro ' + name, pyvalue)
  456. def _process_macros(self, macros):
  457. for key, value in macros.items():
  458. value = value.strip()
  459. if _r_int_literal.match(value):
  460. self._add_integer_constant(key, value)
  461. elif value == '...':
  462. self._declare('macro ' + key, value)
  463. else:
  464. raise CDefError(
  465. 'only supports one of the following syntax:\n'
  466. ' #define %s ... (literally dot-dot-dot)\n'
  467. ' #define %s NUMBER (with NUMBER an integer'
  468. ' constant, decimal/hex/octal)\n'
  469. 'got:\n'
  470. ' #define %s %s'
  471. % (key, key, key, value))
  472. def _declare_function(self, tp, quals, decl):
  473. tp = self._get_type_pointer(tp, quals)
  474. if self._options.get('dllexport'):
  475. tag = 'dllexport_python '
  476. elif self._inside_extern_python == '__cffi_extern_python_start':
  477. tag = 'extern_python '
  478. elif self._inside_extern_python == '__cffi_extern_python_plus_c_start':
  479. tag = 'extern_python_plus_c '
  480. else:
  481. tag = 'function '
  482. self._declare(tag + decl.name, tp)
  483. def _parse_decl(self, decl):
  484. node = decl.type
  485. if isinstance(node, pycparser.c_ast.FuncDecl):
  486. tp, quals = self._get_type_and_quals(node, name=decl.name)
  487. assert isinstance(tp, model.RawFunctionType)
  488. self._declare_function(tp, quals, decl)
  489. else:
  490. if isinstance(node, pycparser.c_ast.Struct):
  491. self._get_struct_union_enum_type('struct', node)
  492. elif isinstance(node, pycparser.c_ast.Union):
  493. self._get_struct_union_enum_type('union', node)
  494. elif isinstance(node, pycparser.c_ast.Enum):
  495. self._get_struct_union_enum_type('enum', node)
  496. elif not decl.name:
  497. raise CDefError("construct does not declare any variable",
  498. decl)
  499. #
  500. if decl.name:
  501. tp, quals = self._get_type_and_quals(node,
  502. partial_length_ok=True)
  503. if tp.is_raw_function:
  504. self._declare_function(tp, quals, decl)
  505. elif (tp.is_integer_type() and
  506. hasattr(decl, 'init') and
  507. hasattr(decl.init, 'value') and
  508. _r_int_literal.match(decl.init.value)):
  509. self._add_integer_constant(decl.name, decl.init.value)
  510. elif (tp.is_integer_type() and
  511. isinstance(decl.init, pycparser.c_ast.UnaryOp) and
  512. decl.init.op == '-' and
  513. hasattr(decl.init.expr, 'value') and
  514. _r_int_literal.match(decl.init.expr.value)):
  515. self._add_integer_constant(decl.name,
  516. '-' + decl.init.expr.value)
  517. elif (tp is model.void_type and
  518. decl.name.startswith('__cffi_extern_python_')):
  519. # hack: `extern "Python"` in the C source is replaced
  520. # with "void __cffi_extern_python_start;" and
  521. # "void __cffi_extern_python_stop;"
  522. self._inside_extern_python = decl.name
  523. else:
  524. if self._inside_extern_python !='__cffi_extern_python_stop':
  525. raise CDefError(
  526. "cannot declare constants or "
  527. "variables with 'extern \"Python\"'")
  528. if (quals & model.Q_CONST) and not tp.is_array_type:
  529. self._declare('constant ' + decl.name, tp, quals=quals)
  530. else:
  531. _warn_for_non_extern_non_static_global_variable(decl)
  532. self._declare('variable ' + decl.name, tp, quals=quals)
  533. def parse_type(self, cdecl):
  534. return self.parse_type_and_quals(cdecl)[0]
  535. def parse_type_and_quals(self, cdecl):
  536. ast, macros = self._parse('void __dummy(\n%s\n);' % cdecl)[:2]
  537. assert not macros
  538. exprnode = ast.ext[-1].type.args.params[0]
  539. if isinstance(exprnode, pycparser.c_ast.ID):
  540. raise CDefError("unknown identifier '%s'" % (exprnode.name,))
  541. return self._get_type_and_quals(exprnode.type)
  542. def _declare(self, name, obj, included=False, quals=0):
  543. if name in self._declarations:
  544. prevobj, prevquals = self._declarations[name]
  545. if prevobj is obj and prevquals == quals:
  546. return
  547. if not self._options.get('override'):
  548. raise FFIError(
  549. "multiple declarations of %s (for interactive usage, "
  550. "try cdef(xx, override=True))" % (name,))
  551. assert '__dotdotdot__' not in name.split()
  552. self._declarations[name] = (obj, quals)
  553. if included:
  554. self._included_declarations.add(obj)
  555. def _extract_quals(self, type):
  556. quals = 0
  557. if isinstance(type, (pycparser.c_ast.TypeDecl,
  558. pycparser.c_ast.PtrDecl)):
  559. if 'const' in type.quals:
  560. quals |= model.Q_CONST
  561. if 'volatile' in type.quals:
  562. quals |= model.Q_VOLATILE
  563. if 'restrict' in type.quals:
  564. quals |= model.Q_RESTRICT
  565. return quals
  566. def _get_type_pointer(self, type, quals, declname=None):
  567. if isinstance(type, model.RawFunctionType):
  568. return type.as_function_pointer()
  569. if (isinstance(type, model.StructOrUnionOrEnum) and
  570. type.name.startswith('$') and type.name[1:].isdigit() and
  571. type.forcename is None and declname is not None):
  572. return model.NamedPointerType(type, declname, quals)
  573. return model.PointerType(type, quals)
  574. def _get_type_and_quals(self, typenode, name=None, partial_length_ok=False,
  575. typedef_example=None):
  576. # first, dereference typedefs, if we have it already parsed, we're good
  577. if (isinstance(typenode, pycparser.c_ast.TypeDecl) and
  578. isinstance(typenode.type, pycparser.c_ast.IdentifierType) and
  579. len(typenode.type.names) == 1 and
  580. ('typedef ' + typenode.type.names[0]) in self._declarations):
  581. tp, quals = self._declarations['typedef ' + typenode.type.names[0]]
  582. quals |= self._extract_quals(typenode)
  583. return tp, quals
  584. #
  585. if isinstance(typenode, pycparser.c_ast.ArrayDecl):
  586. # array type
  587. if typenode.dim is None:
  588. length = None
  589. else:
  590. length = self._parse_constant(
  591. typenode.dim, partial_length_ok=partial_length_ok)
  592. # a hack: in 'typedef int foo_t[...][...];', don't use '...' as
  593. # the length but use directly the C expression that would be
  594. # generated by recompiler.py. This lets the typedef be used in
  595. # many more places within recompiler.py
  596. if typedef_example is not None:
  597. if length == '...':
  598. length = '_cffi_array_len(%s)' % (typedef_example,)
  599. typedef_example = "*" + typedef_example
  600. #
  601. tp, quals = self._get_type_and_quals(typenode.type,
  602. partial_length_ok=partial_length_ok,
  603. typedef_example=typedef_example)
  604. return model.ArrayType(tp, length), quals
  605. #
  606. if isinstance(typenode, pycparser.c_ast.PtrDecl):
  607. # pointer type
  608. itemtype, itemquals = self._get_type_and_quals(typenode.type)
  609. tp = self._get_type_pointer(itemtype, itemquals, declname=name)
  610. quals = self._extract_quals(typenode)
  611. return tp, quals
  612. #
  613. if isinstance(typenode, pycparser.c_ast.TypeDecl):
  614. quals = self._extract_quals(typenode)
  615. type = typenode.type
  616. if isinstance(type, pycparser.c_ast.IdentifierType):
  617. # assume a primitive type. get it from .names, but reduce
  618. # synonyms to a single chosen combination
  619. names = list(type.names)
  620. if names != ['signed', 'char']: # keep this unmodified
  621. prefixes = {}
  622. while names:
  623. name = names[0]
  624. if name in ('short', 'long', 'signed', 'unsigned'):
  625. prefixes[name] = prefixes.get(name, 0) + 1
  626. del names[0]
  627. else:
  628. break
  629. # ignore the 'signed' prefix below, and reorder the others
  630. newnames = []
  631. for prefix in ('unsigned', 'short', 'long'):
  632. for i in range(prefixes.get(prefix, 0)):
  633. newnames.append(prefix)
  634. if not names:
  635. names = ['int'] # implicitly
  636. if names == ['int']: # but kill it if 'short' or 'long'
  637. if 'short' in prefixes or 'long' in prefixes:
  638. names = []
  639. names = newnames + names
  640. ident = ' '.join(names)
  641. if ident == 'void':
  642. return model.void_type, quals
  643. if ident == '__dotdotdot__':
  644. raise FFIError(':%d: bad usage of "..."' %
  645. typenode.coord.line)
  646. tp0, quals0 = resolve_common_type(self, ident)
  647. return tp0, (quals | quals0)
  648. #
  649. if isinstance(type, pycparser.c_ast.Struct):
  650. # 'struct foobar'
  651. tp = self._get_struct_union_enum_type('struct', type, name)
  652. return tp, quals
  653. #
  654. if isinstance(type, pycparser.c_ast.Union):
  655. # 'union foobar'
  656. tp = self._get_struct_union_enum_type('union', type, name)
  657. return tp, quals
  658. #
  659. if isinstance(type, pycparser.c_ast.Enum):
  660. # 'enum foobar'
  661. tp = self._get_struct_union_enum_type('enum', type, name)
  662. return tp, quals
  663. #
  664. if isinstance(typenode, pycparser.c_ast.FuncDecl):
  665. # a function type
  666. return self._parse_function_type(typenode, name), 0
  667. #
  668. # nested anonymous structs or unions end up here
  669. if isinstance(typenode, pycparser.c_ast.Struct):
  670. return self._get_struct_union_enum_type('struct', typenode, name,
  671. nested=True), 0
  672. if isinstance(typenode, pycparser.c_ast.Union):
  673. return self._get_struct_union_enum_type('union', typenode, name,
  674. nested=True), 0
  675. #
  676. raise FFIError(":%d: bad or unsupported type declaration" %
  677. typenode.coord.line)
  678. def _parse_function_type(self, typenode, funcname=None):
  679. params = list(getattr(typenode.args, 'params', []))
  680. for i, arg in enumerate(params):
  681. if not hasattr(arg, 'type'):
  682. raise CDefError("%s arg %d: unknown type '%s'"
  683. " (if you meant to use the old C syntax of giving"
  684. " untyped arguments, it is not supported)"
  685. % (funcname or 'in expression', i + 1,
  686. getattr(arg, 'name', '?')))
  687. ellipsis = (
  688. len(params) > 0 and
  689. isinstance(params[-1].type, pycparser.c_ast.TypeDecl) and
  690. isinstance(params[-1].type.type,
  691. pycparser.c_ast.IdentifierType) and
  692. params[-1].type.type.names == ['__dotdotdot__'])
  693. if ellipsis:
  694. params.pop()
  695. if not params:
  696. raise CDefError(
  697. "%s: a function with only '(...)' as argument"
  698. " is not correct C" % (funcname or 'in expression'))
  699. args = [self._as_func_arg(*self._get_type_and_quals(argdeclnode.type))
  700. for argdeclnode in params]
  701. if not ellipsis and args == [model.void_type]:
  702. args = []
  703. result, quals = self._get_type_and_quals(typenode.type)
  704. # the 'quals' on the result type are ignored. HACK: we absure them
  705. # to detect __stdcall functions: we textually replace "__stdcall"
  706. # with "volatile volatile const" above.
  707. abi = None
  708. if hasattr(typenode.type, 'quals'): # else, probable syntax error anyway
  709. if typenode.type.quals[-3:] == ['volatile', 'volatile', 'const']:
  710. abi = '__stdcall'
  711. return model.RawFunctionType(tuple(args), result, ellipsis, abi)
  712. def _as_func_arg(self, type, quals):
  713. if isinstance(type, model.ArrayType):
  714. return model.PointerType(type.item, quals)
  715. elif isinstance(type, model.RawFunctionType):
  716. return type.as_function_pointer()
  717. else:
  718. return type
  719. def _get_struct_union_enum_type(self, kind, type, name=None, nested=False):
  720. # First, a level of caching on the exact 'type' node of the AST.
  721. # This is obscure, but needed because pycparser "unrolls" declarations
  722. # such as "typedef struct { } foo_t, *foo_p" and we end up with
  723. # an AST that is not a tree, but a DAG, with the "type" node of the
  724. # two branches foo_t and foo_p of the trees being the same node.
  725. # It's a bit silly but detecting "DAG-ness" in the AST tree seems
  726. # to be the only way to distinguish this case from two independent
  727. # structs. See test_struct_with_two_usages.
  728. try:
  729. return self._structnode2type[type]
  730. except KeyError:
  731. pass
  732. #
  733. # Note that this must handle parsing "struct foo" any number of
  734. # times and always return the same StructType object. Additionally,
  735. # one of these times (not necessarily the first), the fields of
  736. # the struct can be specified with "struct foo { ...fields... }".
  737. # If no name is given, then we have to create a new anonymous struct
  738. # with no caching; in this case, the fields are either specified
  739. # right now or never.
  740. #
  741. force_name = name
  742. name = type.name
  743. #
  744. # get the type or create it if needed
  745. if name is None:
  746. # 'force_name' is used to guess a more readable name for
  747. # anonymous structs, for the common case "typedef struct { } foo".
  748. if force_name is not None:
  749. explicit_name = '$%s' % force_name
  750. else:
  751. self._anonymous_counter += 1
  752. explicit_name = '$%d' % self._anonymous_counter
  753. tp = None
  754. else:
  755. explicit_name = name
  756. key = '%s %s' % (kind, name)
  757. tp, _ = self._declarations.get(key, (None, None))
  758. #
  759. if tp is None:
  760. if kind == 'struct':
  761. tp = model.StructType(explicit_name, None, None, None)
  762. elif kind == 'union':
  763. tp = model.UnionType(explicit_name, None, None, None)
  764. elif kind == 'enum':
  765. if explicit_name == '__dotdotdot__':
  766. raise CDefError("Enums cannot be declared with ...")
  767. tp = self._build_enum_type(explicit_name, type.values)
  768. else:
  769. raise AssertionError("kind = %r" % (kind,))
  770. if name is not None:
  771. self._declare(key, tp)
  772. else:
  773. if kind == 'enum' and type.values is not None:
  774. raise NotImplementedError(
  775. "enum %s: the '{}' declaration should appear on the first "
  776. "time the enum is mentioned, not later" % explicit_name)
  777. if not tp.forcename:
  778. tp.force_the_name(force_name)
  779. if tp.forcename and '$' in tp.name:
  780. self._declare('anonymous %s' % tp.forcename, tp)
  781. #
  782. self._structnode2type[type] = tp
  783. #
  784. # enums: done here
  785. if kind == 'enum':
  786. return tp
  787. #
  788. # is there a 'type.decls'? If yes, then this is the place in the
  789. # C sources that declare the fields. If no, then just return the
  790. # existing type, possibly still incomplete.
  791. if type.decls is None:
  792. return tp
  793. #
  794. if tp.fldnames is not None:
  795. raise CDefError("duplicate declaration of struct %s" % name)
  796. fldnames = []
  797. fldtypes = []
  798. fldbitsize = []
  799. fldquals = []
  800. for decl in type.decls:
  801. if (isinstance(decl.type, pycparser.c_ast.IdentifierType) and
  802. ''.join(decl.type.names) == '__dotdotdot__'):
  803. # XXX pycparser is inconsistent: 'names' should be a list
  804. # of strings, but is sometimes just one string. Use
  805. # str.join() as a way to cope with both.
  806. self._make_partial(tp, nested)
  807. continue
  808. if decl.bitsize is None:
  809. bitsize = -1
  810. else:
  811. bitsize = self._parse_constant(decl.bitsize)
  812. self._partial_length = False
  813. type, fqual = self._get_type_and_quals(decl.type,
  814. partial_length_ok=True)
  815. if self._partial_length:
  816. self._make_partial(tp, nested)
  817. if isinstance(type, model.StructType) and type.partial:
  818. self._make_partial(tp, nested)
  819. fldnames.append(decl.name or '')
  820. fldtypes.append(type)
  821. fldbitsize.append(bitsize)
  822. fldquals.append(fqual)
  823. tp.fldnames = tuple(fldnames)
  824. tp.fldtypes = tuple(fldtypes)
  825. tp.fldbitsize = tuple(fldbitsize)
  826. tp.fldquals = tuple(fldquals)
  827. if fldbitsize != [-1] * len(fldbitsize):
  828. if isinstance(tp, model.StructType) and tp.partial:
  829. raise NotImplementedError("%s: using both bitfields and '...;'"
  830. % (tp,))
  831. tp.packed = self._options.get('packed')
  832. if tp.completed: # must be re-completed: it is not opaque any more
  833. tp.completed = 0
  834. self._recomplete.append(tp)
  835. return tp
  836. def _make_partial(self, tp, nested):
  837. if not isinstance(tp, model.StructOrUnion):
  838. raise CDefError("%s cannot be partial" % (tp,))
  839. if not tp.has_c_name() and not nested:
  840. raise NotImplementedError("%s is partial but has no C name" %(tp,))
  841. tp.partial = True
  842. def _parse_constant(self, exprnode, partial_length_ok=False):
  843. # for now, limited to expressions that are an immediate number
  844. # or positive/negative number
  845. if isinstance(exprnode, pycparser.c_ast.Constant):
  846. s = exprnode.value
  847. if '0' <= s[0] <= '9':
  848. s = s.rstrip('uUlL')
  849. try:
  850. if s.startswith('0'):
  851. return int(s, 8)
  852. else:
  853. return int(s, 10)
  854. except ValueError:
  855. if len(s) > 1:
  856. if s.lower()[0:2] == '0x':
  857. return int(s, 16)
  858. elif s.lower()[0:2] == '0b':
  859. return int(s, 2)
  860. raise CDefError("invalid constant %r" % (s,))
  861. elif s[0] == "'" and s[-1] == "'" and (
  862. len(s) == 3 or (len(s) == 4 and s[1] == "\\")):
  863. return ord(s[-2])
  864. else:
  865. raise CDefError("invalid constant %r" % (s,))
  866. #
  867. if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and
  868. exprnode.op == '+'):
  869. return self._parse_constant(exprnode.expr)
  870. #
  871. if (isinstance(exprnode, pycparser.c_ast.UnaryOp) and
  872. exprnode.op == '-'):
  873. return -self._parse_constant(exprnode.expr)
  874. # load previously defined int constant
  875. if (isinstance(exprnode, pycparser.c_ast.ID) and
  876. exprnode.name in self._int_constants):
  877. return self._int_constants[exprnode.name]
  878. #
  879. if (isinstance(exprnode, pycparser.c_ast.ID) and
  880. exprnode.name == '__dotdotdotarray__'):
  881. if partial_length_ok:
  882. self._partial_length = True
  883. return '...'
  884. raise FFIError(":%d: unsupported '[...]' here, cannot derive "
  885. "the actual array length in this context"
  886. % exprnode.coord.line)
  887. #
  888. if isinstance(exprnode, pycparser.c_ast.BinaryOp):
  889. left = self._parse_constant(exprnode.left)
  890. right = self._parse_constant(exprnode.right)
  891. if exprnode.op == '+':
  892. return left + right
  893. elif exprnode.op == '-':
  894. return left - right
  895. elif exprnode.op == '*':
  896. return left * right
  897. elif exprnode.op == '/':
  898. return self._c_div(left, right)
  899. elif exprnode.op == '%':
  900. return left - self._c_div(left, right) * right
  901. elif exprnode.op == '<<':
  902. return left << right
  903. elif exprnode.op == '>>':
  904. return left >> right
  905. elif exprnode.op == '&':
  906. return left & right
  907. elif exprnode.op == '|':
  908. return left | right
  909. elif exprnode.op == '^':
  910. return left ^ right
  911. #
  912. raise FFIError(":%d: unsupported expression: expected a "
  913. "simple numeric constant" % exprnode.coord.line)
  914. def _c_div(self, a, b):
  915. result = a // b
  916. if ((a < 0) ^ (b < 0)) and (a % b) != 0:
  917. result += 1
  918. return result
  919. def _build_enum_type(self, explicit_name, decls):
  920. if decls is not None:
  921. partial = False
  922. enumerators = []
  923. enumvalues = []
  924. nextenumvalue = 0
  925. for enum in decls.enumerators:
  926. if _r_enum_dotdotdot.match(enum.name):
  927. partial = True
  928. continue
  929. if enum.value is not None:
  930. nextenumvalue = self._parse_constant(enum.value)
  931. enumerators.append(enum.name)
  932. enumvalues.append(nextenumvalue)
  933. self._add_constants(enum.name, nextenumvalue)
  934. nextenumvalue += 1
  935. enumerators = tuple(enumerators)
  936. enumvalues = tuple(enumvalues)
  937. tp = model.EnumType(explicit_name, enumerators, enumvalues)
  938. tp.partial = partial
  939. else: # opaque enum
  940. tp = model.EnumType(explicit_name, (), ())
  941. return tp
  942. def include(self, other):
  943. for name, (tp, quals) in other._declarations.items():
  944. if name.startswith('anonymous $enum_$'):
  945. continue # fix for test_anonymous_enum_include
  946. kind = name.split(' ', 1)[0]
  947. if kind in ('struct', 'union', 'enum', 'anonymous', 'typedef'):
  948. self._declare(name, tp, included=True, quals=quals)
  949. for k, v in other._int_constants.items():
  950. self._add_constants(k, v)
  951. def _get_unknown_type(self, decl):
  952. typenames = decl.type.type.names
  953. if typenames == ['__dotdotdot__']:
  954. return model.unknown_type(decl.name)
  955. if typenames == ['__dotdotdotint__']:
  956. if self._uses_new_feature is None:
  957. self._uses_new_feature = "'typedef int... %s'" % decl.name
  958. return model.UnknownIntegerType(decl.name)
  959. if typenames == ['__dotdotdotfloat__']:
  960. # note: not for 'long double' so far
  961. if self._uses_new_feature is None:
  962. self._uses_new_feature = "'typedef float... %s'" % decl.name
  963. return model.UnknownFloatType(decl.name)
  964. raise FFIError(':%d: unsupported usage of "..." in typedef'
  965. % decl.coord.line)
  966. def _get_unknown_ptr_type(self, decl):
  967. if decl.type.type.type.names == ['__dotdotdot__']:
  968. return model.unknown_ptr_type(decl.name)
  969. raise FFIError(':%d: unsupported usage of "..." in typedef'
  970. % decl.coord.line)