1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936 |
- #------------------------------------------------------------------------------
- # pycparser: c_parser.py
- #
- # CParser class: Parser and AST builder for the C language
- #
- # Eli Bendersky [https://eli.thegreenplace.net/]
- # License: BSD
- #------------------------------------------------------------------------------
- from .ply import yacc
- from . import c_ast
- from .c_lexer import CLexer
- from .plyparser import PLYParser, ParseError, parameterized, template
- from .ast_transforms import fix_switch_cases, fix_atomic_specifiers
- @template
- class CParser(PLYParser):
- def __init__(
- self,
- lex_optimize=True,
- lexer=CLexer,
- lextab='pycparser.lextab',
- yacc_optimize=True,
- yacctab='pycparser.yacctab',
- yacc_debug=False,
- taboutputdir=''):
- """ Create a new CParser.
- Some arguments for controlling the debug/optimization
- level of the parser are provided. The defaults are
- tuned for release/performance mode.
- The simple rules for using them are:
- *) When tweaking CParser/CLexer, set these to False
- *) When releasing a stable parser, set to True
- lex_optimize:
- Set to False when you're modifying the lexer.
- Otherwise, changes in the lexer won't be used, if
- some lextab.py file exists.
- When releasing with a stable lexer, set to True
- to save the re-generation of the lexer table on
- each run.
- lexer:
- Set this parameter to define the lexer to use if
- you're not using the default CLexer.
- lextab:
- Points to the lex table that's used for optimized
- mode. Only if you're modifying the lexer and want
- some tests to avoid re-generating the table, make
- this point to a local lex table file (that's been
- earlier generated with lex_optimize=True)
- yacc_optimize:
- Set to False when you're modifying the parser.
- Otherwise, changes in the parser won't be used, if
- some parsetab.py file exists.
- When releasing with a stable parser, set to True
- to save the re-generation of the parser table on
- each run.
- yacctab:
- Points to the yacc table that's used for optimized
- mode. Only if you're modifying the parser, make
- this point to a local yacc table file
- yacc_debug:
- Generate a parser.out file that explains how yacc
- built the parsing table from the grammar.
- taboutputdir:
- Set this parameter to control the location of generated
- lextab and yacctab files.
- """
- self.clex = lexer(
- error_func=self._lex_error_func,
- on_lbrace_func=self._lex_on_lbrace_func,
- on_rbrace_func=self._lex_on_rbrace_func,
- type_lookup_func=self._lex_type_lookup_func)
- self.clex.build(
- optimize=lex_optimize,
- lextab=lextab,
- outputdir=taboutputdir)
- self.tokens = self.clex.tokens
- rules_with_opt = [
- 'abstract_declarator',
- 'assignment_expression',
- 'declaration_list',
- 'declaration_specifiers_no_type',
- 'designation',
- 'expression',
- 'identifier_list',
- 'init_declarator_list',
- 'id_init_declarator_list',
- 'initializer_list',
- 'parameter_type_list',
- 'block_item_list',
- 'type_qualifier_list',
- 'struct_declarator_list'
- ]
- for rule in rules_with_opt:
- self._create_opt_rule(rule)
- self.cparser = yacc.yacc(
- module=self,
- start='translation_unit_or_empty',
- debug=yacc_debug,
- optimize=yacc_optimize,
- tabmodule=yacctab,
- outputdir=taboutputdir)
- # Stack of scopes for keeping track of symbols. _scope_stack[-1] is
- # the current (topmost) scope. Each scope is a dictionary that
- # specifies whether a name is a type. If _scope_stack[n][name] is
- # True, 'name' is currently a type in the scope. If it's False,
- # 'name' is used in the scope but not as a type (for instance, if we
- # saw: int name;
- # If 'name' is not a key in _scope_stack[n] then 'name' was not defined
- # in this scope at all.
- self._scope_stack = [dict()]
- # Keeps track of the last token given to yacc (the lookahead token)
- self._last_yielded_token = None
- def parse(self, text, filename='', debug=False):
- """ Parses C code and returns an AST.
- text:
- A string containing the C source code
- filename:
- Name of the file being parsed (for meaningful
- error messages)
- debug:
- Debug flag to YACC
- """
- self.clex.filename = filename
- self.clex.reset_lineno()
- self._scope_stack = [dict()]
- self._last_yielded_token = None
- return self.cparser.parse(
- input=text,
- lexer=self.clex,
- debug=debug)
- ######################-- PRIVATE --######################
- def _push_scope(self):
- self._scope_stack.append(dict())
- def _pop_scope(self):
- assert len(self._scope_stack) > 1
- self._scope_stack.pop()
- def _add_typedef_name(self, name, coord):
- """ Add a new typedef name (ie a TYPEID) to the current scope
- """
- if not self._scope_stack[-1].get(name, True):
- self._parse_error(
- "Typedef %r previously declared as non-typedef "
- "in this scope" % name, coord)
- self._scope_stack[-1][name] = True
- def _add_identifier(self, name, coord):
- """ Add a new object, function, or enum member name (ie an ID) to the
- current scope
- """
- if self._scope_stack[-1].get(name, False):
- self._parse_error(
- "Non-typedef %r previously declared as typedef "
- "in this scope" % name, coord)
- self._scope_stack[-1][name] = False
- def _is_type_in_scope(self, name):
- """ Is *name* a typedef-name in the current scope?
- """
- for scope in reversed(self._scope_stack):
- # If name is an identifier in this scope it shadows typedefs in
- # higher scopes.
- in_scope = scope.get(name)
- if in_scope is not None: return in_scope
- return False
- def _lex_error_func(self, msg, line, column):
- self._parse_error(msg, self._coord(line, column))
- def _lex_on_lbrace_func(self):
- self._push_scope()
- def _lex_on_rbrace_func(self):
- self._pop_scope()
- def _lex_type_lookup_func(self, name):
- """ Looks up types that were previously defined with
- typedef.
- Passed to the lexer for recognizing identifiers that
- are types.
- """
- is_type = self._is_type_in_scope(name)
- return is_type
- def _get_yacc_lookahead_token(self):
- """ We need access to yacc's lookahead token in certain cases.
- This is the last token yacc requested from the lexer, so we
- ask the lexer.
- """
- return self.clex.last_token
- # To understand what's going on here, read sections A.8.5 and
- # A.8.6 of K&R2 very carefully.
- #
- # A C type consists of a basic type declaration, with a list
- # of modifiers. For example:
- #
- # int *c[5];
- #
- # The basic declaration here is 'int c', and the pointer and
- # the array are the modifiers.
- #
- # Basic declarations are represented by TypeDecl (from module c_ast) and the
- # modifiers are FuncDecl, PtrDecl and ArrayDecl.
- #
- # The standard states that whenever a new modifier is parsed, it should be
- # added to the end of the list of modifiers. For example:
- #
- # K&R2 A.8.6.2: Array Declarators
- #
- # In a declaration T D where D has the form
- # D1 [constant-expression-opt]
- # and the type of the identifier in the declaration T D1 is
- # "type-modifier T", the type of the
- # identifier of D is "type-modifier array of T"
- #
- # This is what this method does. The declarator it receives
- # can be a list of declarators ending with TypeDecl. It
- # tacks the modifier to the end of this list, just before
- # the TypeDecl.
- #
- # Additionally, the modifier may be a list itself. This is
- # useful for pointers, that can come as a chain from the rule
- # p_pointer. In this case, the whole modifier list is spliced
- # into the new location.
- def _type_modify_decl(self, decl, modifier):
- """ Tacks a type modifier on a declarator, and returns
- the modified declarator.
- Note: the declarator and modifier may be modified
- """
- #~ print '****'
- #~ decl.show(offset=3)
- #~ modifier.show(offset=3)
- #~ print '****'
- modifier_head = modifier
- modifier_tail = modifier
- # The modifier may be a nested list. Reach its tail.
- while modifier_tail.type:
- modifier_tail = modifier_tail.type
- # If the decl is a basic type, just tack the modifier onto it.
- if isinstance(decl, c_ast.TypeDecl):
- modifier_tail.type = decl
- return modifier
- else:
- # Otherwise, the decl is a list of modifiers. Reach
- # its tail and splice the modifier onto the tail,
- # pointing to the underlying basic type.
- decl_tail = decl
- while not isinstance(decl_tail.type, c_ast.TypeDecl):
- decl_tail = decl_tail.type
- modifier_tail.type = decl_tail.type
- decl_tail.type = modifier_head
- return decl
- # Due to the order in which declarators are constructed,
- # they have to be fixed in order to look like a normal AST.
- #
- # When a declaration arrives from syntax construction, it has
- # these problems:
- # * The innermost TypeDecl has no type (because the basic
- # type is only known at the uppermost declaration level)
- # * The declaration has no variable name, since that is saved
- # in the innermost TypeDecl
- # * The typename of the declaration is a list of type
- # specifiers, and not a node. Here, basic identifier types
- # should be separated from more complex types like enums
- # and structs.
- #
- # This method fixes these problems.
- def _fix_decl_name_type(self, decl, typename):
- """ Fixes a declaration. Modifies decl.
- """
- # Reach the underlying basic type
- #
- type = decl
- while not isinstance(type, c_ast.TypeDecl):
- type = type.type
- decl.name = type.declname
- type.quals = decl.quals[:]
- # The typename is a list of types. If any type in this
- # list isn't an IdentifierType, it must be the only
- # type in the list (it's illegal to declare "int enum ..")
- # If all the types are basic, they're collected in the
- # IdentifierType holder.
- for tn in typename:
- if not isinstance(tn, c_ast.IdentifierType):
- if len(typename) > 1:
- self._parse_error(
- "Invalid multiple types specified", tn.coord)
- else:
- type.type = tn
- return decl
- if not typename:
- # Functions default to returning int
- #
- if not isinstance(decl.type, c_ast.FuncDecl):
- self._parse_error(
- "Missing type in declaration", decl.coord)
- type.type = c_ast.IdentifierType(
- ['int'],
- coord=decl.coord)
- else:
- # At this point, we know that typename is a list of IdentifierType
- # nodes. Concatenate all the names into a single list.
- #
- type.type = c_ast.IdentifierType(
- [name for id in typename for name in id.names],
- coord=typename[0].coord)
- return decl
- def _add_declaration_specifier(self, declspec, newspec, kind, append=False):
- """ Declaration specifiers are represented by a dictionary
- with the entries:
- * qual: a list of type qualifiers
- * storage: a list of storage type qualifiers
- * type: a list of type specifiers
- * function: a list of function specifiers
- * alignment: a list of alignment specifiers
- This method is given a declaration specifier, and a
- new specifier of a given kind.
- If `append` is True, the new specifier is added to the end of
- the specifiers list, otherwise it's added at the beginning.
- Returns the declaration specifier, with the new
- specifier incorporated.
- """
- spec = declspec or dict(qual=[], storage=[], type=[], function=[], alignment=[])
- if append:
- spec[kind].append(newspec)
- else:
- spec[kind].insert(0, newspec)
- return spec
- def _build_declarations(self, spec, decls, typedef_namespace=False):
- """ Builds a list of declarations all sharing the given specifiers.
- If typedef_namespace is true, each declared name is added
- to the "typedef namespace", which also includes objects,
- functions, and enum constants.
- """
- is_typedef = 'typedef' in spec['storage']
- declarations = []
- # Bit-fields are allowed to be unnamed.
- if decls[0].get('bitsize') is not None:
- pass
- # When redeclaring typedef names as identifiers in inner scopes, a
- # problem can occur where the identifier gets grouped into
- # spec['type'], leaving decl as None. This can only occur for the
- # first declarator.
- elif decls[0]['decl'] is None:
- if len(spec['type']) < 2 or len(spec['type'][-1].names) != 1 or \
- not self._is_type_in_scope(spec['type'][-1].names[0]):
- coord = '?'
- for t in spec['type']:
- if hasattr(t, 'coord'):
- coord = t.coord
- break
- self._parse_error('Invalid declaration', coord)
- # Make this look as if it came from "direct_declarator:ID"
- decls[0]['decl'] = c_ast.TypeDecl(
- declname=spec['type'][-1].names[0],
- type=None,
- quals=None,
- align=spec['alignment'],
- coord=spec['type'][-1].coord)
- # Remove the "new" type's name from the end of spec['type']
- del spec['type'][-1]
- # A similar problem can occur where the declaration ends up looking
- # like an abstract declarator. Give it a name if this is the case.
- elif not isinstance(decls[0]['decl'], (
- c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType)):
- decls_0_tail = decls[0]['decl']
- while not isinstance(decls_0_tail, c_ast.TypeDecl):
- decls_0_tail = decls_0_tail.type
- if decls_0_tail.declname is None:
- decls_0_tail.declname = spec['type'][-1].names[0]
- del spec['type'][-1]
- for decl in decls:
- assert decl['decl'] is not None
- if is_typedef:
- declaration = c_ast.Typedef(
- name=None,
- quals=spec['qual'],
- storage=spec['storage'],
- type=decl['decl'],
- coord=decl['decl'].coord)
- else:
- declaration = c_ast.Decl(
- name=None,
- quals=spec['qual'],
- align=spec['alignment'],
- storage=spec['storage'],
- funcspec=spec['function'],
- type=decl['decl'],
- init=decl.get('init'),
- bitsize=decl.get('bitsize'),
- coord=decl['decl'].coord)
- if isinstance(declaration.type, (
- c_ast.Enum, c_ast.Struct, c_ast.Union,
- c_ast.IdentifierType)):
- fixed_decl = declaration
- else:
- fixed_decl = self._fix_decl_name_type(declaration, spec['type'])
- # Add the type name defined by typedef to a
- # symbol table (for usage in the lexer)
- if typedef_namespace:
- if is_typedef:
- self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
- else:
- self._add_identifier(fixed_decl.name, fixed_decl.coord)
- fixed_decl = fix_atomic_specifiers(fixed_decl)
- declarations.append(fixed_decl)
- return declarations
- def _build_function_definition(self, spec, decl, param_decls, body):
- """ Builds a function definition.
- """
- if 'typedef' in spec['storage']:
- self._parse_error("Invalid typedef", decl.coord)
- declaration = self._build_declarations(
- spec=spec,
- decls=[dict(decl=decl, init=None)],
- typedef_namespace=True)[0]
- return c_ast.FuncDef(
- decl=declaration,
- param_decls=param_decls,
- body=body,
- coord=decl.coord)
- def _select_struct_union_class(self, token):
- """ Given a token (either STRUCT or UNION), selects the
- appropriate AST class.
- """
- if token == 'struct':
- return c_ast.Struct
- else:
- return c_ast.Union
- ##
- ## Precedence and associativity of operators
- ##
- # If this changes, c_generator.CGenerator.precedence_map needs to change as
- # well
- precedence = (
- ('left', 'LOR'),
- ('left', 'LAND'),
- ('left', 'OR'),
- ('left', 'XOR'),
- ('left', 'AND'),
- ('left', 'EQ', 'NE'),
- ('left', 'GT', 'GE', 'LT', 'LE'),
- ('left', 'RSHIFT', 'LSHIFT'),
- ('left', 'PLUS', 'MINUS'),
- ('left', 'TIMES', 'DIVIDE', 'MOD')
- )
- ##
- ## Grammar productions
- ## Implementation of the BNF defined in K&R2 A.13
- ##
- # Wrapper around a translation unit, to allow for empty input.
- # Not strictly part of the C99 Grammar, but useful in practice.
- def p_translation_unit_or_empty(self, p):
- """ translation_unit_or_empty : translation_unit
- | empty
- """
- if p[1] is None:
- p[0] = c_ast.FileAST([])
- else:
- p[0] = c_ast.FileAST(p[1])
- def p_translation_unit_1(self, p):
- """ translation_unit : external_declaration
- """
- # Note: external_declaration is already a list
- p[0] = p[1]
- def p_translation_unit_2(self, p):
- """ translation_unit : translation_unit external_declaration
- """
- p[1].extend(p[2])
- p[0] = p[1]
- # Declarations always come as lists (because they can be
- # several in one line), so we wrap the function definition
- # into a list as well, to make the return value of
- # external_declaration homogeneous.
- def p_external_declaration_1(self, p):
- """ external_declaration : function_definition
- """
- p[0] = [p[1]]
- def p_external_declaration_2(self, p):
- """ external_declaration : declaration
- """
- p[0] = p[1]
- def p_external_declaration_3(self, p):
- """ external_declaration : pp_directive
- | pppragma_directive
- """
- p[0] = [p[1]]
- def p_external_declaration_4(self, p):
- """ external_declaration : SEMI
- """
- p[0] = []
- def p_external_declaration_5(self, p):
- """ external_declaration : static_assert
- """
- p[0] = p[1]
- def p_static_assert_declaration(self, p):
- """ static_assert : _STATIC_ASSERT LPAREN constant_expression COMMA unified_string_literal RPAREN
- | _STATIC_ASSERT LPAREN constant_expression RPAREN
- """
- if len(p) == 5:
- p[0] = [c_ast.StaticAssert(p[3], None, self._token_coord(p, 1))]
- else:
- p[0] = [c_ast.StaticAssert(p[3], p[5], self._token_coord(p, 1))]
- def p_pp_directive(self, p):
- """ pp_directive : PPHASH
- """
- self._parse_error('Directives not supported yet',
- self._token_coord(p, 1))
- def p_pppragma_directive(self, p):
- """ pppragma_directive : PPPRAGMA
- | PPPRAGMA PPPRAGMASTR
- """
- if len(p) == 3:
- p[0] = c_ast.Pragma(p[2], self._token_coord(p, 2))
- else:
- p[0] = c_ast.Pragma("", self._token_coord(p, 1))
- # In function definitions, the declarator can be followed by
- # a declaration list, for old "K&R style" function definitios.
- def p_function_definition_1(self, p):
- """ function_definition : id_declarator declaration_list_opt compound_statement
- """
- # no declaration specifiers - 'int' becomes the default type
- spec = dict(
- qual=[],
- alignment=[],
- storage=[],
- type=[c_ast.IdentifierType(['int'],
- coord=self._token_coord(p, 1))],
- function=[])
- p[0] = self._build_function_definition(
- spec=spec,
- decl=p[1],
- param_decls=p[2],
- body=p[3])
- def p_function_definition_2(self, p):
- """ function_definition : declaration_specifiers id_declarator declaration_list_opt compound_statement
- """
- spec = p[1]
- p[0] = self._build_function_definition(
- spec=spec,
- decl=p[2],
- param_decls=p[3],
- body=p[4])
- # Note, according to C18 A.2.2 6.7.10 static_assert-declaration _Static_assert
- # is a declaration, not a statement. We additionally recognise it as a statement
- # to fix parsing of _Static_assert inside the functions.
- #
- def p_statement(self, p):
- """ statement : labeled_statement
- | expression_statement
- | compound_statement
- | selection_statement
- | iteration_statement
- | jump_statement
- | pppragma_directive
- | static_assert
- """
- p[0] = p[1]
- # A pragma is generally considered a decorator rather than an actual
- # statement. Still, for the purposes of analyzing an abstract syntax tree of
- # C code, pragma's should not be ignored and were previously treated as a
- # statement. This presents a problem for constructs that take a statement
- # such as labeled_statements, selection_statements, and
- # iteration_statements, causing a misleading structure in the AST. For
- # example, consider the following C code.
- #
- # for (int i = 0; i < 3; i++)
- # #pragma omp critical
- # sum += 1;
- #
- # This code will compile and execute "sum += 1;" as the body of the for
- # loop. Previous implementations of PyCParser would render the AST for this
- # block of code as follows:
- #
- # For:
- # DeclList:
- # Decl: i, [], [], []
- # TypeDecl: i, []
- # IdentifierType: ['int']
- # Constant: int, 0
- # BinaryOp: <
- # ID: i
- # Constant: int, 3
- # UnaryOp: p++
- # ID: i
- # Pragma: omp critical
- # Assignment: +=
- # ID: sum
- # Constant: int, 1
- #
- # This AST misleadingly takes the Pragma as the body of the loop and the
- # assignment then becomes a sibling of the loop.
- #
- # To solve edge cases like these, the pragmacomp_or_statement rule groups
- # a pragma and its following statement (which would otherwise be orphaned)
- # using a compound block, effectively turning the above code into:
- #
- # for (int i = 0; i < 3; i++) {
- # #pragma omp critical
- # sum += 1;
- # }
- def p_pragmacomp_or_statement(self, p):
- """ pragmacomp_or_statement : pppragma_directive statement
- | statement
- """
- if isinstance(p[1], c_ast.Pragma) and len(p) == 3:
- p[0] = c_ast.Compound(
- block_items=[p[1], p[2]],
- coord=self._token_coord(p, 1))
- else:
- p[0] = p[1]
- # In C, declarations can come several in a line:
- # int x, *px, romulo = 5;
- #
- # However, for the AST, we will split them to separate Decl
- # nodes.
- #
- # This rule splits its declarations and always returns a list
- # of Decl nodes, even if it's one element long.
- #
- def p_decl_body(self, p):
- """ decl_body : declaration_specifiers init_declarator_list_opt
- | declaration_specifiers_no_type id_init_declarator_list_opt
- """
- spec = p[1]
- # p[2] (init_declarator_list_opt) is either a list or None
- #
- if p[2] is None:
- # By the standard, you must have at least one declarator unless
- # declaring a structure tag, a union tag, or the members of an
- # enumeration.
- #
- ty = spec['type']
- s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
- if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
- decls = [c_ast.Decl(
- name=None,
- quals=spec['qual'],
- align=spec['alignment'],
- storage=spec['storage'],
- funcspec=spec['function'],
- type=ty[0],
- init=None,
- bitsize=None,
- coord=ty[0].coord)]
- # However, this case can also occur on redeclared identifiers in
- # an inner scope. The trouble is that the redeclared type's name
- # gets grouped into declaration_specifiers; _build_declarations
- # compensates for this.
- #
- else:
- decls = self._build_declarations(
- spec=spec,
- decls=[dict(decl=None, init=None)],
- typedef_namespace=True)
- else:
- decls = self._build_declarations(
- spec=spec,
- decls=p[2],
- typedef_namespace=True)
- p[0] = decls
- # The declaration has been split to a decl_body sub-rule and
- # SEMI, because having them in a single rule created a problem
- # for defining typedefs.
- #
- # If a typedef line was directly followed by a line using the
- # type defined with the typedef, the type would not be
- # recognized. This is because to reduce the declaration rule,
- # the parser's lookahead asked for the token after SEMI, which
- # was the type from the next line, and the lexer had no chance
- # to see the updated type symbol table.
- #
- # Splitting solves this problem, because after seeing SEMI,
- # the parser reduces decl_body, which actually adds the new
- # type into the table to be seen by the lexer before the next
- # line is reached.
- def p_declaration(self, p):
- """ declaration : decl_body SEMI
- """
- p[0] = p[1]
- # Since each declaration is a list of declarations, this
- # rule will combine all the declarations and return a single
- # list
- #
- def p_declaration_list(self, p):
- """ declaration_list : declaration
- | declaration_list declaration
- """
- p[0] = p[1] if len(p) == 2 else p[1] + p[2]
- # To know when declaration-specifiers end and declarators begin,
- # we require declaration-specifiers to have at least one
- # type-specifier, and disallow typedef-names after we've seen any
- # type-specifier. These are both required by the spec.
- #
- def p_declaration_specifiers_no_type_1(self, p):
- """ declaration_specifiers_no_type : type_qualifier declaration_specifiers_no_type_opt
- """
- p[0] = self._add_declaration_specifier(p[2], p[1], 'qual')
- def p_declaration_specifiers_no_type_2(self, p):
- """ declaration_specifiers_no_type : storage_class_specifier declaration_specifiers_no_type_opt
- """
- p[0] = self._add_declaration_specifier(p[2], p[1], 'storage')
- def p_declaration_specifiers_no_type_3(self, p):
- """ declaration_specifiers_no_type : function_specifier declaration_specifiers_no_type_opt
- """
- p[0] = self._add_declaration_specifier(p[2], p[1], 'function')
- # Without this, `typedef _Atomic(T) U` will parse incorrectly because the
- # _Atomic qualifier will match, instead of the specifier.
- def p_declaration_specifiers_no_type_4(self, p):
- """ declaration_specifiers_no_type : atomic_specifier declaration_specifiers_no_type_opt
- """
- p[0] = self._add_declaration_specifier(p[2], p[1], 'type')
- def p_declaration_specifiers_no_type_5(self, p):
- """ declaration_specifiers_no_type : alignment_specifier declaration_specifiers_no_type_opt
- """
- p[0] = self._add_declaration_specifier(p[2], p[1], 'alignment')
- def p_declaration_specifiers_1(self, p):
- """ declaration_specifiers : declaration_specifiers type_qualifier
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
- def p_declaration_specifiers_2(self, p):
- """ declaration_specifiers : declaration_specifiers storage_class_specifier
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'storage', append=True)
- def p_declaration_specifiers_3(self, p):
- """ declaration_specifiers : declaration_specifiers function_specifier
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'function', append=True)
- def p_declaration_specifiers_4(self, p):
- """ declaration_specifiers : declaration_specifiers type_specifier_no_typeid
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
- def p_declaration_specifiers_5(self, p):
- """ declaration_specifiers : type_specifier
- """
- p[0] = self._add_declaration_specifier(None, p[1], 'type')
- def p_declaration_specifiers_6(self, p):
- """ declaration_specifiers : declaration_specifiers_no_type type_specifier
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
- def p_declaration_specifiers_7(self, p):
- """ declaration_specifiers : declaration_specifiers alignment_specifier
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment', append=True)
- def p_storage_class_specifier(self, p):
- """ storage_class_specifier : AUTO
- | REGISTER
- | STATIC
- | EXTERN
- | TYPEDEF
- | _THREAD_LOCAL
- """
- p[0] = p[1]
- def p_function_specifier(self, p):
- """ function_specifier : INLINE
- | _NORETURN
- """
- p[0] = p[1]
- def p_type_specifier_no_typeid(self, p):
- """ type_specifier_no_typeid : VOID
- | _BOOL
- | CHAR
- | SHORT
- | INT
- | LONG
- | FLOAT
- | DOUBLE
- | _COMPLEX
- | SIGNED
- | UNSIGNED
- | __INT128
- """
- p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
- def p_type_specifier(self, p):
- """ type_specifier : typedef_name
- | enum_specifier
- | struct_or_union_specifier
- | type_specifier_no_typeid
- | atomic_specifier
- """
- p[0] = p[1]
- # See section 6.7.2.4 of the C11 standard.
- def p_atomic_specifier(self, p):
- """ atomic_specifier : _ATOMIC LPAREN type_name RPAREN
- """
- typ = p[3]
- typ.quals.append('_Atomic')
- p[0] = typ
- def p_type_qualifier(self, p):
- """ type_qualifier : CONST
- | RESTRICT
- | VOLATILE
- | _ATOMIC
- """
- p[0] = p[1]
- def p_init_declarator_list(self, p):
- """ init_declarator_list : init_declarator
- | init_declarator_list COMMA init_declarator
- """
- p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
- # Returns a {decl=<declarator> : init=<initializer>} dictionary
- # If there's no initializer, uses None
- #
- def p_init_declarator(self, p):
- """ init_declarator : declarator
- | declarator EQUALS initializer
- """
- p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
- def p_id_init_declarator_list(self, p):
- """ id_init_declarator_list : id_init_declarator
- | id_init_declarator_list COMMA init_declarator
- """
- p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
- def p_id_init_declarator(self, p):
- """ id_init_declarator : id_declarator
- | id_declarator EQUALS initializer
- """
- p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
- # Require at least one type specifier in a specifier-qualifier-list
- #
- def p_specifier_qualifier_list_1(self, p):
- """ specifier_qualifier_list : specifier_qualifier_list type_specifier_no_typeid
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
- def p_specifier_qualifier_list_2(self, p):
- """ specifier_qualifier_list : specifier_qualifier_list type_qualifier
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
- def p_specifier_qualifier_list_3(self, p):
- """ specifier_qualifier_list : type_specifier
- """
- p[0] = self._add_declaration_specifier(None, p[1], 'type')
- def p_specifier_qualifier_list_4(self, p):
- """ specifier_qualifier_list : type_qualifier_list type_specifier
- """
- p[0] = dict(qual=p[1], alignment=[], storage=[], type=[p[2]], function=[])
- def p_specifier_qualifier_list_5(self, p):
- """ specifier_qualifier_list : alignment_specifier
- """
- p[0] = dict(qual=[], alignment=[p[1]], storage=[], type=[], function=[])
- def p_specifier_qualifier_list_6(self, p):
- """ specifier_qualifier_list : specifier_qualifier_list alignment_specifier
- """
- p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment')
- # TYPEID is allowed here (and in other struct/enum related tag names), because
- # struct/enum tags reside in their own namespace and can be named the same as types
- #
- def p_struct_or_union_specifier_1(self, p):
- """ struct_or_union_specifier : struct_or_union ID
- | struct_or_union TYPEID
- """
- klass = self._select_struct_union_class(p[1])
- # None means no list of members
- p[0] = klass(
- name=p[2],
- decls=None,
- coord=self._token_coord(p, 2))
- def p_struct_or_union_specifier_2(self, p):
- """ struct_or_union_specifier : struct_or_union brace_open struct_declaration_list brace_close
- | struct_or_union brace_open brace_close
- """
- klass = self._select_struct_union_class(p[1])
- if len(p) == 4:
- # Empty sequence means an empty list of members
- p[0] = klass(
- name=None,
- decls=[],
- coord=self._token_coord(p, 2))
- else:
- p[0] = klass(
- name=None,
- decls=p[3],
- coord=self._token_coord(p, 2))
- def p_struct_or_union_specifier_3(self, p):
- """ struct_or_union_specifier : struct_or_union ID brace_open struct_declaration_list brace_close
- | struct_or_union ID brace_open brace_close
- | struct_or_union TYPEID brace_open struct_declaration_list brace_close
- | struct_or_union TYPEID brace_open brace_close
- """
- klass = self._select_struct_union_class(p[1])
- if len(p) == 5:
- # Empty sequence means an empty list of members
- p[0] = klass(
- name=p[2],
- decls=[],
- coord=self._token_coord(p, 2))
- else:
- p[0] = klass(
- name=p[2],
- decls=p[4],
- coord=self._token_coord(p, 2))
- def p_struct_or_union(self, p):
- """ struct_or_union : STRUCT
- | UNION
- """
- p[0] = p[1]
- # Combine all declarations into a single list
- #
- def p_struct_declaration_list(self, p):
- """ struct_declaration_list : struct_declaration
- | struct_declaration_list struct_declaration
- """
- if len(p) == 2:
- p[0] = p[1] or []
- else:
- p[0] = p[1] + (p[2] or [])
- def p_struct_declaration_1(self, p):
- """ struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI
- """
- spec = p[1]
- assert 'typedef' not in spec['storage']
- if p[2] is not None:
- decls = self._build_declarations(
- spec=spec,
- decls=p[2])
- elif len(spec['type']) == 1:
- # Anonymous struct/union, gcc extension, C1x feature.
- # Although the standard only allows structs/unions here, I see no
- # reason to disallow other types since some compilers have typedefs
- # here, and pycparser isn't about rejecting all invalid code.
- #
- node = spec['type'][0]
- if isinstance(node, c_ast.Node):
- decl_type = node
- else:
- decl_type = c_ast.IdentifierType(node)
- decls = self._build_declarations(
- spec=spec,
- decls=[dict(decl=decl_type)])
- else:
- # Structure/union members can have the same names as typedefs.
- # The trouble is that the member's name gets grouped into
- # specifier_qualifier_list; _build_declarations compensates.
- #
- decls = self._build_declarations(
- spec=spec,
- decls=[dict(decl=None, init=None)])
- p[0] = decls
- def p_struct_declaration_2(self, p):
- """ struct_declaration : SEMI
- """
- p[0] = None
- def p_struct_declaration_3(self, p):
- """ struct_declaration : pppragma_directive
- """
- p[0] = [p[1]]
- def p_struct_declarator_list(self, p):
- """ struct_declarator_list : struct_declarator
- | struct_declarator_list COMMA struct_declarator
- """
- p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
- # struct_declarator passes up a dict with the keys: decl (for
- # the underlying declarator) and bitsize (for the bitsize)
- #
- def p_struct_declarator_1(self, p):
- """ struct_declarator : declarator
- """
- p[0] = {'decl': p[1], 'bitsize': None}
- def p_struct_declarator_2(self, p):
- """ struct_declarator : declarator COLON constant_expression
- | COLON constant_expression
- """
- if len(p) > 3:
- p[0] = {'decl': p[1], 'bitsize': p[3]}
- else:
- p[0] = {'decl': c_ast.TypeDecl(None, None, None, None), 'bitsize': p[2]}
- def p_enum_specifier_1(self, p):
- """ enum_specifier : ENUM ID
- | ENUM TYPEID
- """
- p[0] = c_ast.Enum(p[2], None, self._token_coord(p, 1))
- def p_enum_specifier_2(self, p):
- """ enum_specifier : ENUM brace_open enumerator_list brace_close
- """
- p[0] = c_ast.Enum(None, p[3], self._token_coord(p, 1))
- def p_enum_specifier_3(self, p):
- """ enum_specifier : ENUM ID brace_open enumerator_list brace_close
- | ENUM TYPEID brace_open enumerator_list brace_close
- """
- p[0] = c_ast.Enum(p[2], p[4], self._token_coord(p, 1))
- def p_enumerator_list(self, p):
- """ enumerator_list : enumerator
- | enumerator_list COMMA
- | enumerator_list COMMA enumerator
- """
- if len(p) == 2:
- p[0] = c_ast.EnumeratorList([p[1]], p[1].coord)
- elif len(p) == 3:
- p[0] = p[1]
- else:
- p[1].enumerators.append(p[3])
- p[0] = p[1]
- def p_alignment_specifier(self, p):
- """ alignment_specifier : _ALIGNAS LPAREN type_name RPAREN
- | _ALIGNAS LPAREN constant_expression RPAREN
- """
- p[0] = c_ast.Alignas(p[3], self._token_coord(p, 1))
- def p_enumerator(self, p):
- """ enumerator : ID
- | ID EQUALS constant_expression
- """
- if len(p) == 2:
- enumerator = c_ast.Enumerator(
- p[1], None,
- self._token_coord(p, 1))
- else:
- enumerator = c_ast.Enumerator(
- p[1], p[3],
- self._token_coord(p, 1))
- self._add_identifier(enumerator.name, enumerator.coord)
- p[0] = enumerator
- def p_declarator(self, p):
- """ declarator : id_declarator
- | typeid_declarator
- """
- p[0] = p[1]
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_xxx_declarator_1(self, p):
- """ xxx_declarator : direct_xxx_declarator
- """
- p[0] = p[1]
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_xxx_declarator_2(self, p):
- """ xxx_declarator : pointer direct_xxx_declarator
- """
- p[0] = self._type_modify_decl(p[2], p[1])
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_direct_xxx_declarator_1(self, p):
- """ direct_xxx_declarator : yyy
- """
- p[0] = c_ast.TypeDecl(
- declname=p[1],
- type=None,
- quals=None,
- align=None,
- coord=self._token_coord(p, 1))
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'))
- def p_direct_xxx_declarator_2(self, p):
- """ direct_xxx_declarator : LPAREN xxx_declarator RPAREN
- """
- p[0] = p[2]
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_direct_xxx_declarator_3(self, p):
- """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
- """
- quals = (p[3] if len(p) > 5 else []) or []
- # Accept dimension qualifiers
- # Per C99 6.7.5.3 p7
- arr = c_ast.ArrayDecl(
- type=None,
- dim=p[4] if len(p) > 5 else p[3],
- dim_quals=quals,
- coord=p[1].coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_direct_xxx_declarator_4(self, p):
- """ direct_xxx_declarator : direct_xxx_declarator LBRACKET STATIC type_qualifier_list_opt assignment_expression RBRACKET
- | direct_xxx_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET
- """
- # Using slice notation for PLY objects doesn't work in Python 3 for the
- # version of PLY embedded with pycparser; see PLY Google Code issue 30.
- # Work around that here by listing the two elements separately.
- listed_quals = [item if isinstance(item, list) else [item]
- for item in [p[3],p[4]]]
- dim_quals = [qual for sublist in listed_quals for qual in sublist
- if qual is not None]
- arr = c_ast.ArrayDecl(
- type=None,
- dim=p[5],
- dim_quals=dim_quals,
- coord=p[1].coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
- # Special for VLAs
- #
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_direct_xxx_declarator_5(self, p):
- """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt TIMES RBRACKET
- """
- arr = c_ast.ArrayDecl(
- type=None,
- dim=c_ast.ID(p[4], self._token_coord(p, 4)),
- dim_quals=p[3] if p[3] is not None else [],
- coord=p[1].coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
- @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
- def p_direct_xxx_declarator_6(self, p):
- """ direct_xxx_declarator : direct_xxx_declarator LPAREN parameter_type_list RPAREN
- | direct_xxx_declarator LPAREN identifier_list_opt RPAREN
- """
- func = c_ast.FuncDecl(
- args=p[3],
- type=None,
- coord=p[1].coord)
- # To see why _get_yacc_lookahead_token is needed, consider:
- # typedef char TT;
- # void foo(int TT) { TT = 10; }
- # Outside the function, TT is a typedef, but inside (starting and
- # ending with the braces) it's a parameter. The trouble begins with
- # yacc's lookahead token. We don't know if we're declaring or
- # defining a function until we see LBRACE, but if we wait for yacc to
- # trigger a rule on that token, then TT will have already been read
- # and incorrectly interpreted as TYPEID. We need to add the
- # parameters to the scope the moment the lexer sees LBRACE.
- #
- if self._get_yacc_lookahead_token().type == "LBRACE":
- if func.args is not None:
- for param in func.args.params:
- if isinstance(param, c_ast.EllipsisParam): break
- self._add_identifier(param.name, param.coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=func)
- def p_pointer(self, p):
- """ pointer : TIMES type_qualifier_list_opt
- | TIMES type_qualifier_list_opt pointer
- """
- coord = self._token_coord(p, 1)
- # Pointer decls nest from inside out. This is important when different
- # levels have different qualifiers. For example:
- #
- # char * const * p;
- #
- # Means "pointer to const pointer to char"
- #
- # While:
- #
- # char ** const p;
- #
- # Means "const pointer to pointer to char"
- #
- # So when we construct PtrDecl nestings, the leftmost pointer goes in
- # as the most nested type.
- nested_type = c_ast.PtrDecl(quals=p[2] or [], type=None, coord=coord)
- if len(p) > 3:
- tail_type = p[3]
- while tail_type.type is not None:
- tail_type = tail_type.type
- tail_type.type = nested_type
- p[0] = p[3]
- else:
- p[0] = nested_type
- def p_type_qualifier_list(self, p):
- """ type_qualifier_list : type_qualifier
- | type_qualifier_list type_qualifier
- """
- p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
- def p_parameter_type_list(self, p):
- """ parameter_type_list : parameter_list
- | parameter_list COMMA ELLIPSIS
- """
- if len(p) > 2:
- p[1].params.append(c_ast.EllipsisParam(self._token_coord(p, 3)))
- p[0] = p[1]
- def p_parameter_list(self, p):
- """ parameter_list : parameter_declaration
- | parameter_list COMMA parameter_declaration
- """
- if len(p) == 2: # single parameter
- p[0] = c_ast.ParamList([p[1]], p[1].coord)
- else:
- p[1].params.append(p[3])
- p[0] = p[1]
- # From ISO/IEC 9899:TC2, 6.7.5.3.11:
- # "If, in a parameter declaration, an identifier can be treated either
- # as a typedef name or as a parameter name, it shall be taken as a
- # typedef name."
- #
- # Inside a parameter declaration, once we've reduced declaration specifiers,
- # if we shift in an LPAREN and see a TYPEID, it could be either an abstract
- # declarator or a declarator nested inside parens. This rule tells us to
- # always treat it as an abstract declarator. Therefore, we only accept
- # `id_declarator`s and `typeid_noparen_declarator`s.
- def p_parameter_declaration_1(self, p):
- """ parameter_declaration : declaration_specifiers id_declarator
- | declaration_specifiers typeid_noparen_declarator
- """
- spec = p[1]
- if not spec['type']:
- spec['type'] = [c_ast.IdentifierType(['int'],
- coord=self._token_coord(p, 1))]
- p[0] = self._build_declarations(
- spec=spec,
- decls=[dict(decl=p[2])])[0]
- def p_parameter_declaration_2(self, p):
- """ parameter_declaration : declaration_specifiers abstract_declarator_opt
- """
- spec = p[1]
- if not spec['type']:
- spec['type'] = [c_ast.IdentifierType(['int'],
- coord=self._token_coord(p, 1))]
- # Parameters can have the same names as typedefs. The trouble is that
- # the parameter's name gets grouped into declaration_specifiers, making
- # it look like an old-style declaration; compensate.
- #
- if len(spec['type']) > 1 and len(spec['type'][-1].names) == 1 and \
- self._is_type_in_scope(spec['type'][-1].names[0]):
- decl = self._build_declarations(
- spec=spec,
- decls=[dict(decl=p[2], init=None)])[0]
- # This truly is an old-style parameter declaration
- #
- else:
- decl = c_ast.Typename(
- name='',
- quals=spec['qual'],
- align=None,
- type=p[2] or c_ast.TypeDecl(None, None, None, None),
- coord=self._token_coord(p, 2))
- typename = spec['type']
- decl = self._fix_decl_name_type(decl, typename)
- p[0] = decl
- def p_identifier_list(self, p):
- """ identifier_list : identifier
- | identifier_list COMMA identifier
- """
- if len(p) == 2: # single parameter
- p[0] = c_ast.ParamList([p[1]], p[1].coord)
- else:
- p[1].params.append(p[3])
- p[0] = p[1]
- def p_initializer_1(self, p):
- """ initializer : assignment_expression
- """
- p[0] = p[1]
- def p_initializer_2(self, p):
- """ initializer : brace_open initializer_list_opt brace_close
- | brace_open initializer_list COMMA brace_close
- """
- if p[2] is None:
- p[0] = c_ast.InitList([], self._token_coord(p, 1))
- else:
- p[0] = p[2]
- def p_initializer_list(self, p):
- """ initializer_list : designation_opt initializer
- | initializer_list COMMA designation_opt initializer
- """
- if len(p) == 3: # single initializer
- init = p[2] if p[1] is None else c_ast.NamedInitializer(p[1], p[2])
- p[0] = c_ast.InitList([init], p[2].coord)
- else:
- init = p[4] if p[3] is None else c_ast.NamedInitializer(p[3], p[4])
- p[1].exprs.append(init)
- p[0] = p[1]
- def p_designation(self, p):
- """ designation : designator_list EQUALS
- """
- p[0] = p[1]
- # Designators are represented as a list of nodes, in the order in which
- # they're written in the code.
- #
- def p_designator_list(self, p):
- """ designator_list : designator
- | designator_list designator
- """
- p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
- def p_designator(self, p):
- """ designator : LBRACKET constant_expression RBRACKET
- | PERIOD identifier
- """
- p[0] = p[2]
- def p_type_name(self, p):
- """ type_name : specifier_qualifier_list abstract_declarator_opt
- """
- typename = c_ast.Typename(
- name='',
- quals=p[1]['qual'][:],
- align=None,
- type=p[2] or c_ast.TypeDecl(None, None, None, None),
- coord=self._token_coord(p, 2))
- p[0] = self._fix_decl_name_type(typename, p[1]['type'])
- def p_abstract_declarator_1(self, p):
- """ abstract_declarator : pointer
- """
- dummytype = c_ast.TypeDecl(None, None, None, None)
- p[0] = self._type_modify_decl(
- decl=dummytype,
- modifier=p[1])
- def p_abstract_declarator_2(self, p):
- """ abstract_declarator : pointer direct_abstract_declarator
- """
- p[0] = self._type_modify_decl(p[2], p[1])
- def p_abstract_declarator_3(self, p):
- """ abstract_declarator : direct_abstract_declarator
- """
- p[0] = p[1]
- # Creating and using direct_abstract_declarator_opt here
- # instead of listing both direct_abstract_declarator and the
- # lack of it in the beginning of _1 and _2 caused two
- # shift/reduce errors.
- #
- def p_direct_abstract_declarator_1(self, p):
- """ direct_abstract_declarator : LPAREN abstract_declarator RPAREN """
- p[0] = p[2]
- def p_direct_abstract_declarator_2(self, p):
- """ direct_abstract_declarator : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET
- """
- arr = c_ast.ArrayDecl(
- type=None,
- dim=p[3],
- dim_quals=[],
- coord=p[1].coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
- def p_direct_abstract_declarator_3(self, p):
- """ direct_abstract_declarator : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
- """
- quals = (p[2] if len(p) > 4 else []) or []
- p[0] = c_ast.ArrayDecl(
- type=c_ast.TypeDecl(None, None, None, None),
- dim=p[3] if len(p) > 4 else p[2],
- dim_quals=quals,
- coord=self._token_coord(p, 1))
- def p_direct_abstract_declarator_4(self, p):
- """ direct_abstract_declarator : direct_abstract_declarator LBRACKET TIMES RBRACKET
- """
- arr = c_ast.ArrayDecl(
- type=None,
- dim=c_ast.ID(p[3], self._token_coord(p, 3)),
- dim_quals=[],
- coord=p[1].coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
- def p_direct_abstract_declarator_5(self, p):
- """ direct_abstract_declarator : LBRACKET TIMES RBRACKET
- """
- p[0] = c_ast.ArrayDecl(
- type=c_ast.TypeDecl(None, None, None, None),
- dim=c_ast.ID(p[3], self._token_coord(p, 3)),
- dim_quals=[],
- coord=self._token_coord(p, 1))
- def p_direct_abstract_declarator_6(self, p):
- """ direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN
- """
- func = c_ast.FuncDecl(
- args=p[3],
- type=None,
- coord=p[1].coord)
- p[0] = self._type_modify_decl(decl=p[1], modifier=func)
- def p_direct_abstract_declarator_7(self, p):
- """ direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN
- """
- p[0] = c_ast.FuncDecl(
- args=p[2],
- type=c_ast.TypeDecl(None, None, None, None),
- coord=self._token_coord(p, 1))
- # declaration is a list, statement isn't. To make it consistent, block_item
- # will always be a list
- #
- def p_block_item(self, p):
- """ block_item : declaration
- | statement
- """
- p[0] = p[1] if isinstance(p[1], list) else [p[1]]
- # Since we made block_item a list, this just combines lists
- #
- def p_block_item_list(self, p):
- """ block_item_list : block_item
- | block_item_list block_item
- """
- # Empty block items (plain ';') produce [None], so ignore them
- p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2]
- def p_compound_statement_1(self, p):
- """ compound_statement : brace_open block_item_list_opt brace_close """
- p[0] = c_ast.Compound(
- block_items=p[2],
- coord=self._token_coord(p, 1))
- def p_labeled_statement_1(self, p):
- """ labeled_statement : ID COLON pragmacomp_or_statement """
- p[0] = c_ast.Label(p[1], p[3], self._token_coord(p, 1))
- def p_labeled_statement_2(self, p):
- """ labeled_statement : CASE constant_expression COLON pragmacomp_or_statement """
- p[0] = c_ast.Case(p[2], [p[4]], self._token_coord(p, 1))
- def p_labeled_statement_3(self, p):
- """ labeled_statement : DEFAULT COLON pragmacomp_or_statement """
- p[0] = c_ast.Default([p[3]], self._token_coord(p, 1))
- def p_selection_statement_1(self, p):
- """ selection_statement : IF LPAREN expression RPAREN pragmacomp_or_statement """
- p[0] = c_ast.If(p[3], p[5], None, self._token_coord(p, 1))
- def p_selection_statement_2(self, p):
- """ selection_statement : IF LPAREN expression RPAREN statement ELSE pragmacomp_or_statement """
- p[0] = c_ast.If(p[3], p[5], p[7], self._token_coord(p, 1))
- def p_selection_statement_3(self, p):
- """ selection_statement : SWITCH LPAREN expression RPAREN pragmacomp_or_statement """
- p[0] = fix_switch_cases(
- c_ast.Switch(p[3], p[5], self._token_coord(p, 1)))
- def p_iteration_statement_1(self, p):
- """ iteration_statement : WHILE LPAREN expression RPAREN pragmacomp_or_statement """
- p[0] = c_ast.While(p[3], p[5], self._token_coord(p, 1))
- def p_iteration_statement_2(self, p):
- """ iteration_statement : DO pragmacomp_or_statement WHILE LPAREN expression RPAREN SEMI """
- p[0] = c_ast.DoWhile(p[5], p[2], self._token_coord(p, 1))
- def p_iteration_statement_3(self, p):
- """ iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
- p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._token_coord(p, 1))
- def p_iteration_statement_4(self, p):
- """ iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
- p[0] = c_ast.For(c_ast.DeclList(p[3], self._token_coord(p, 1)),
- p[4], p[6], p[8], self._token_coord(p, 1))
- def p_jump_statement_1(self, p):
- """ jump_statement : GOTO ID SEMI """
- p[0] = c_ast.Goto(p[2], self._token_coord(p, 1))
- def p_jump_statement_2(self, p):
- """ jump_statement : BREAK SEMI """
- p[0] = c_ast.Break(self._token_coord(p, 1))
- def p_jump_statement_3(self, p):
- """ jump_statement : CONTINUE SEMI """
- p[0] = c_ast.Continue(self._token_coord(p, 1))
- def p_jump_statement_4(self, p):
- """ jump_statement : RETURN expression SEMI
- | RETURN SEMI
- """
- p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._token_coord(p, 1))
- def p_expression_statement(self, p):
- """ expression_statement : expression_opt SEMI """
- if p[1] is None:
- p[0] = c_ast.EmptyStatement(self._token_coord(p, 2))
- else:
- p[0] = p[1]
- def p_expression(self, p):
- """ expression : assignment_expression
- | expression COMMA assignment_expression
- """
- if len(p) == 2:
- p[0] = p[1]
- else:
- if not isinstance(p[1], c_ast.ExprList):
- p[1] = c_ast.ExprList([p[1]], p[1].coord)
- p[1].exprs.append(p[3])
- p[0] = p[1]
- def p_parenthesized_compound_expression(self, p):
- """ assignment_expression : LPAREN compound_statement RPAREN """
- p[0] = p[2]
- def p_typedef_name(self, p):
- """ typedef_name : TYPEID """
- p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
- def p_assignment_expression(self, p):
- """ assignment_expression : conditional_expression
- | unary_expression assignment_operator assignment_expression
- """
- if len(p) == 2:
- p[0] = p[1]
- else:
- p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord)
- # K&R2 defines these as many separate rules, to encode
- # precedence and associativity. Why work hard ? I'll just use
- # the built in precedence/associativity specification feature
- # of PLY. (see precedence declaration above)
- #
- def p_assignment_operator(self, p):
- """ assignment_operator : EQUALS
- | XOREQUAL
- | TIMESEQUAL
- | DIVEQUAL
- | MODEQUAL
- | PLUSEQUAL
- | MINUSEQUAL
- | LSHIFTEQUAL
- | RSHIFTEQUAL
- | ANDEQUAL
- | OREQUAL
- """
- p[0] = p[1]
- def p_constant_expression(self, p):
- """ constant_expression : conditional_expression """
- p[0] = p[1]
- def p_conditional_expression(self, p):
- """ conditional_expression : binary_expression
- | binary_expression CONDOP expression COLON conditional_expression
- """
- if len(p) == 2:
- p[0] = p[1]
- else:
- p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord)
- def p_binary_expression(self, p):
- """ binary_expression : cast_expression
- | binary_expression TIMES binary_expression
- | binary_expression DIVIDE binary_expression
- | binary_expression MOD binary_expression
- | binary_expression PLUS binary_expression
- | binary_expression MINUS binary_expression
- | binary_expression RSHIFT binary_expression
- | binary_expression LSHIFT binary_expression
- | binary_expression LT binary_expression
- | binary_expression LE binary_expression
- | binary_expression GE binary_expression
- | binary_expression GT binary_expression
- | binary_expression EQ binary_expression
- | binary_expression NE binary_expression
- | binary_expression AND binary_expression
- | binary_expression OR binary_expression
- | binary_expression XOR binary_expression
- | binary_expression LAND binary_expression
- | binary_expression LOR binary_expression
- """
- if len(p) == 2:
- p[0] = p[1]
- else:
- p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord)
- def p_cast_expression_1(self, p):
- """ cast_expression : unary_expression """
- p[0] = p[1]
- def p_cast_expression_2(self, p):
- """ cast_expression : LPAREN type_name RPAREN cast_expression """
- p[0] = c_ast.Cast(p[2], p[4], self._token_coord(p, 1))
- def p_unary_expression_1(self, p):
- """ unary_expression : postfix_expression """
- p[0] = p[1]
- def p_unary_expression_2(self, p):
- """ unary_expression : PLUSPLUS unary_expression
- | MINUSMINUS unary_expression
- | unary_operator cast_expression
- """
- p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord)
- def p_unary_expression_3(self, p):
- """ unary_expression : SIZEOF unary_expression
- | SIZEOF LPAREN type_name RPAREN
- | _ALIGNOF LPAREN type_name RPAREN
- """
- p[0] = c_ast.UnaryOp(
- p[1],
- p[2] if len(p) == 3 else p[3],
- self._token_coord(p, 1))
- def p_unary_operator(self, p):
- """ unary_operator : AND
- | TIMES
- | PLUS
- | MINUS
- | NOT
- | LNOT
- """
- p[0] = p[1]
- def p_postfix_expression_1(self, p):
- """ postfix_expression : primary_expression """
- p[0] = p[1]
- def p_postfix_expression_2(self, p):
- """ postfix_expression : postfix_expression LBRACKET expression RBRACKET """
- p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
- def p_postfix_expression_3(self, p):
- """ postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN
- | postfix_expression LPAREN RPAREN
- """
- p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord)
- def p_postfix_expression_4(self, p):
- """ postfix_expression : postfix_expression PERIOD ID
- | postfix_expression PERIOD TYPEID
- | postfix_expression ARROW ID
- | postfix_expression ARROW TYPEID
- """
- field = c_ast.ID(p[3], self._token_coord(p, 3))
- p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord)
- def p_postfix_expression_5(self, p):
- """ postfix_expression : postfix_expression PLUSPLUS
- | postfix_expression MINUSMINUS
- """
- p[0] = c_ast.UnaryOp('p' + p[2], p[1], p[1].coord)
- def p_postfix_expression_6(self, p):
- """ postfix_expression : LPAREN type_name RPAREN brace_open initializer_list brace_close
- | LPAREN type_name RPAREN brace_open initializer_list COMMA brace_close
- """
- p[0] = c_ast.CompoundLiteral(p[2], p[5])
- def p_primary_expression_1(self, p):
- """ primary_expression : identifier """
- p[0] = p[1]
- def p_primary_expression_2(self, p):
- """ primary_expression : constant """
- p[0] = p[1]
- def p_primary_expression_3(self, p):
- """ primary_expression : unified_string_literal
- | unified_wstring_literal
- """
- p[0] = p[1]
- def p_primary_expression_4(self, p):
- """ primary_expression : LPAREN expression RPAREN """
- p[0] = p[2]
- def p_primary_expression_5(self, p):
- """ primary_expression : OFFSETOF LPAREN type_name COMMA offsetof_member_designator RPAREN
- """
- coord = self._token_coord(p, 1)
- p[0] = c_ast.FuncCall(c_ast.ID(p[1], coord),
- c_ast.ExprList([p[3], p[5]], coord),
- coord)
- def p_offsetof_member_designator(self, p):
- """ offsetof_member_designator : identifier
- | offsetof_member_designator PERIOD identifier
- | offsetof_member_designator LBRACKET expression RBRACKET
- """
- if len(p) == 2:
- p[0] = p[1]
- elif len(p) == 4:
- p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord)
- elif len(p) == 5:
- p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
- else:
- raise NotImplementedError("Unexpected parsing state. len(p): %u" % len(p))
- def p_argument_expression_list(self, p):
- """ argument_expression_list : assignment_expression
- | argument_expression_list COMMA assignment_expression
- """
- if len(p) == 2: # single expr
- p[0] = c_ast.ExprList([p[1]], p[1].coord)
- else:
- p[1].exprs.append(p[3])
- p[0] = p[1]
- def p_identifier(self, p):
- """ identifier : ID """
- p[0] = c_ast.ID(p[1], self._token_coord(p, 1))
- def p_constant_1(self, p):
- """ constant : INT_CONST_DEC
- | INT_CONST_OCT
- | INT_CONST_HEX
- | INT_CONST_BIN
- | INT_CONST_CHAR
- """
- uCount = 0
- lCount = 0
- for x in p[1][-3:]:
- if x in ('l', 'L'):
- lCount += 1
- elif x in ('u', 'U'):
- uCount += 1
- t = ''
- if uCount > 1:
- raise ValueError('Constant cannot have more than one u/U suffix.')
- elif lCount > 2:
- raise ValueError('Constant cannot have more than two l/L suffix.')
- prefix = 'unsigned ' * uCount + 'long ' * lCount
- p[0] = c_ast.Constant(
- prefix + 'int', p[1], self._token_coord(p, 1))
- def p_constant_2(self, p):
- """ constant : FLOAT_CONST
- | HEX_FLOAT_CONST
- """
- if 'x' in p[1].lower():
- t = 'float'
- else:
- if p[1][-1] in ('f', 'F'):
- t = 'float'
- elif p[1][-1] in ('l', 'L'):
- t = 'long double'
- else:
- t = 'double'
- p[0] = c_ast.Constant(
- t, p[1], self._token_coord(p, 1))
- def p_constant_3(self, p):
- """ constant : CHAR_CONST
- | WCHAR_CONST
- | U8CHAR_CONST
- | U16CHAR_CONST
- | U32CHAR_CONST
- """
- p[0] = c_ast.Constant(
- 'char', p[1], self._token_coord(p, 1))
- # The "unified" string and wstring literal rules are for supporting
- # concatenation of adjacent string literals.
- # I.e. "hello " "world" is seen by the C compiler as a single string literal
- # with the value "hello world"
- #
- def p_unified_string_literal(self, p):
- """ unified_string_literal : STRING_LITERAL
- | unified_string_literal STRING_LITERAL
- """
- if len(p) == 2: # single literal
- p[0] = c_ast.Constant(
- 'string', p[1], self._token_coord(p, 1))
- else:
- p[1].value = p[1].value[:-1] + p[2][1:]
- p[0] = p[1]
- def p_unified_wstring_literal(self, p):
- """ unified_wstring_literal : WSTRING_LITERAL
- | U8STRING_LITERAL
- | U16STRING_LITERAL
- | U32STRING_LITERAL
- | unified_wstring_literal WSTRING_LITERAL
- | unified_wstring_literal U8STRING_LITERAL
- | unified_wstring_literal U16STRING_LITERAL
- | unified_wstring_literal U32STRING_LITERAL
- """
- if len(p) == 2: # single literal
- p[0] = c_ast.Constant(
- 'string', p[1], self._token_coord(p, 1))
- else:
- p[1].value = p[1].value.rstrip()[:-1] + p[2][2:]
- p[0] = p[1]
- def p_brace_open(self, p):
- """ brace_open : LBRACE
- """
- p[0] = p[1]
- p.set_lineno(0, p.lineno(1))
- def p_brace_close(self, p):
- """ brace_close : RBRACE
- """
- p[0] = p[1]
- p.set_lineno(0, p.lineno(1))
- def p_empty(self, p):
- 'empty : '
- p[0] = None
- def p_error(self, p):
- # If error recovery is added here in the future, make sure
- # _get_yacc_lookahead_token still works!
- #
- if p:
- self._parse_error(
- 'before: %s' % p.value,
- self._coord(lineno=p.lineno,
- column=self.clex.find_tok_column(p)))
- else:
- self._parse_error('At end of input', self.clex.filename)
|