dot_parser.py 15 KB


  1. """Graphviz's dot language parser.
  2. The dotparser parses GraphViz files in
  3. dot and dot files and transforms them
  4. into a class representation defined by `pydot`.
  5. Author: Michael Krause <michael@krause-software.de>
  6. Fixes by: Ero Carrera <ero.carrera@gmail.com>
  7. """
  8. from __future__ import division
  9. from __future__ import print_function
  10. import sys
  11. from pyparsing import (
  12. nestedExpr, Literal, CaselessLiteral,
  13. Word, OneOrMore,
  14. Forward,
  15. Group, Optional, Combine,
  16. restOfLine, cStyleComment, nums, alphanums,
  17. printables,
  18. ParseException, ParseResults, CharsNotIn,
  19. QuotedString)
  20. import pydot
  21. __author__ = ['Michael Krause', 'Ero Carrera']
  22. __license__ = 'MIT'
  23. PY3 = sys.version_info >= (3, 0, 0)
  24. if PY3:
  25. str_type = str
  26. else:
  27. str_type = basestring
  28. class P_AttrList(object):
  29. def __init__(self, toks):
  30. self.attrs = {}
  31. i = 0
  32. while i < len(toks):
  33. attrname = toks[i]
  34. if i+2 < len(toks) and toks[i+1] == '=':
  35. attrvalue = toks[i+2]
  36. i += 3
  37. else:
  38. attrvalue = None
  39. i += 1
  40. self.attrs[attrname] = attrvalue
  41. def __repr__(self):
  42. return "%s(%r)" % (self.__class__.__name__, self.attrs)
  43. class DefaultStatement(P_AttrList):
  44. def __init__(self, default_type, attrs):
  45. self.default_type = default_type
  46. self.attrs = attrs
  47. def __repr__(self):
  48. return "%s(%s, %r)" % (self.__class__.__name__,
  49. self.default_type, self.attrs)
  50. top_graphs = list()
  51. def push_top_graph_stmt(str, loc, toks):
  52. attrs = {}
  53. g = None
  54. for element in toks:
  55. if (isinstance(element, (ParseResults, tuple, list)) and
  56. len(element) == 1 and
  57. isinstance(element[0], str_type)):
  58. element = element[0]
  59. if element == 'strict':
  60. attrs['strict'] = True
  61. elif element in ['graph', 'digraph']:
  62. attrs = {}
  63. g = pydot.Dot(graph_type=element, **attrs)
  64. attrs['type'] = element
  65. top_graphs.append( g )
  66. elif isinstance( element, str_type):
  67. g.set_name( element )
  68. elif isinstance(element, pydot.Subgraph):
  69. g.obj_dict['attributes'].update( element.obj_dict['attributes'] )
  70. g.obj_dict['edges'].update( element.obj_dict['edges'] )
  71. g.obj_dict['nodes'].update( element.obj_dict['nodes'] )
  72. g.obj_dict['subgraphs'].update( element.obj_dict['subgraphs'] )
  73. g.set_parent_graph(g)
  74. elif isinstance(element, P_AttrList):
  75. attrs.update(element.attrs)
  76. elif isinstance(element, (ParseResults, list)):
  77. add_elements(g, element)
  78. else:
  79. raise ValueError(
  80. 'Unknown element statement: {s}'.format(s=element))
  81. for g in top_graphs:
  82. update_parent_graph_hierarchy(g)
  83. if len( top_graphs ) == 1:
  84. return top_graphs[0]
  85. return top_graphs
  86. def update_parent_graph_hierarchy(g, parent_graph=None, level=0):
  87. if parent_graph is None:
  88. parent_graph = g
  89. for key_name in ('edges',):
  90. if isinstance(g, pydot.frozendict):
  91. item_dict = g
  92. else:
  93. item_dict = g.obj_dict
  94. if key_name not in item_dict:
  95. continue
  96. for key, objs in item_dict[key_name].items():
  97. for obj in objs:
  98. if ('parent_graph' in obj and
  99. obj['parent_graph'].get_parent_graph()==g):
  100. if obj['parent_graph'] is g:
  101. pass
  102. else:
  103. obj['parent_graph'].set_parent_graph(parent_graph)
  104. if key_name == 'edges' and len(key) == 2:
  105. for idx, vertex in enumerate( obj['points'] ):
  106. if isinstance( vertex,
  107. (pydot.Graph,
  108. pydot.Subgraph, pydot.Cluster)):
  109. vertex.set_parent_graph(parent_graph)
  110. if isinstance( vertex, pydot.frozendict):
  111. if vertex['parent_graph'] is g:
  112. pass
  113. else:
  114. vertex['parent_graph'].set_parent_graph(
  115. parent_graph)
  116. def add_defaults(element, defaults):
  117. d = element.__dict__
  118. for key, value in defaults.items():
  119. if not d.get(key):
  120. d[key] = value
  121. def add_elements(g, toks, defaults_graph=None,
  122. defaults_node=None, defaults_edge=None):
  123. if defaults_graph is None:
  124. defaults_graph = {}
  125. if defaults_node is None:
  126. defaults_node = {}
  127. if defaults_edge is None:
  128. defaults_edge = {}
  129. for elm_idx, element in enumerate(toks):
  130. if isinstance(element, (pydot.Subgraph, pydot.Cluster)):
  131. add_defaults(element, defaults_graph)
  132. g.add_subgraph(element)
  133. elif isinstance(element, pydot.Node):
  134. add_defaults(element, defaults_node)
  135. g.add_node(element)
  136. elif isinstance(element, pydot.Edge):
  137. add_defaults(element, defaults_edge)
  138. g.add_edge(element)
  139. elif isinstance(element, ParseResults):
  140. for e in element:
  141. add_elements(g, [e], defaults_graph,
  142. defaults_node, defaults_edge)
  143. elif isinstance(element, DefaultStatement):
  144. if element.default_type == 'graph':
  145. default_graph_attrs = pydot.Node('graph', **element.attrs)
  146. g.add_node(default_graph_attrs)
  147. elif element.default_type == 'node':
  148. default_node_attrs = pydot.Node('node', **element.attrs)
  149. g.add_node(default_node_attrs)
  150. elif element.default_type == 'edge':
  151. default_edge_attrs = pydot.Node('edge', **element.attrs)
  152. g.add_node(default_edge_attrs)
  153. defaults_edge.update(element.attrs)
  154. else:
  155. raise ValueError(
  156. 'Unknown DefaultStatement: {s}'.format(
  157. s=element.default_type))
  158. elif isinstance(element, P_AttrList):
  159. g.obj_dict['attributes'].update(element.attrs)
  160. else:
  161. raise ValueError(
  162. 'Unknown element statement: {s}'.format(s=element))
  163. def push_graph_stmt(str, loc, toks):
  164. g = pydot.Subgraph('')
  165. add_elements(g, toks)
  166. return g
  167. def push_subgraph_stmt(str, loc, toks):
  168. g = pydot.Subgraph('')
  169. for e in toks:
  170. if len(e)==3:
  171. e[2].set_name(e[1])
  172. if e[0] == 'subgraph':
  173. e[2].obj_dict['show_keyword'] = True
  174. return e[2]
  175. else:
  176. if e[0] == 'subgraph':
  177. e[1].obj_dict['show_keyword'] = True
  178. return e[1]
  179. return g
  180. def push_default_stmt(str, loc, toks):
  181. # The pydot class instances should be marked as
  182. # default statements to be inherited by actual
  183. # graphs, nodes and edges.
  184. #
  185. default_type = toks[0][0]
  186. if len(toks) > 1:
  187. attrs = toks[1].attrs
  188. else:
  189. attrs = {}
  190. if default_type in ['graph', 'node', 'edge']:
  191. return DefaultStatement(default_type, attrs)
  192. else:
  193. raise ValueError(
  194. 'Unknown default statement: {s}'.format(s=toks))
  195. def push_attr_list(str, loc, toks):
  196. p = P_AttrList(toks)
  197. return p
  198. def get_port(node):
  199. if len(node)>1:
  200. if isinstance(node[1], ParseResults):
  201. if len(node[1][0])==2:
  202. if node[1][0][0]==':':
  203. return node[1][0][1]
  204. return None
  205. def do_node_ports(node):
  206. node_port = ''
  207. if len(node) > 1:
  208. node_port = ''.join( [str(a)+str(b) for a,b in node[1] ] )
  209. return node_port
  210. def push_edge_stmt(str, loc, toks):
  211. tok_attrs = [a for a in toks if isinstance(a, P_AttrList)]
  212. attrs = {}
  213. for a in tok_attrs:
  214. attrs.update(a.attrs)
  215. e = []
  216. if isinstance(toks[0][0], pydot.Graph):
  217. n_prev = pydot.frozendict(toks[0][0].obj_dict)
  218. else:
  219. n_prev = toks[0][0] + do_node_ports( toks[0] )
  220. if isinstance(toks[2][0], ParseResults):
  221. n_next_list = [[n.get_name(),] for n in toks[2][0] ]
  222. for n_next in [n for n in n_next_list]:
  223. n_next_port = do_node_ports(n_next)
  224. e.append(pydot.Edge(n_prev, n_next[0]+n_next_port, **attrs))
  225. elif isinstance(toks[2][0], pydot.Graph):
  226. e.append(pydot.Edge(n_prev,
  227. pydot.frozendict(toks[2][0].obj_dict),
  228. **attrs))
  229. elif isinstance(toks[2][0], pydot.Node):
  230. node = toks[2][0]
  231. if node.get_port() is not None:
  232. name_port = node.get_name() + ":" + node.get_port()
  233. else:
  234. name_port = node.get_name()
  235. e.append(pydot.Edge(n_prev, name_port, **attrs))
  236. # if the target of this edge is the name of a node
  237. elif isinstance(toks[2][0], str_type):
  238. for n_next in [n for n in tuple(toks)[2::2]]:
  239. if (isinstance(n_next, P_AttrList) or
  240. not isinstance(n_next[0], str_type)):
  241. continue
  242. n_next_port = do_node_ports( n_next )
  243. e.append(pydot.Edge(n_prev, n_next[0]+n_next_port, **attrs))
  244. n_prev = n_next[0]+n_next_port
  245. else:
  246. raise Exception(
  247. 'Edge target {r} with type {s} unsupported.'.format(
  248. r=toks[2][0], s=type(toks[2][0])))
  249. return e
  250. def push_node_stmt(s, loc, toks):
  251. if len(toks) == 2:
  252. attrs = toks[1].attrs
  253. else:
  254. attrs = {}
  255. node_name = toks[0]
  256. if isinstance(node_name, list) or isinstance(node_name, tuple):
  257. if len(node_name)>0:
  258. node_name = node_name[0]
  259. n = pydot.Node(str(node_name), **attrs)
  260. return n
  261. graphparser = None
  262. def graph_definition():
  263. global graphparser
  264. if not graphparser:
  265. # punctuation
  266. colon = Literal(":")
  267. lbrace = Literal("{")
  268. rbrace = Literal("}")
  269. lbrack = Literal("[")
  270. rbrack = Literal("]")
  271. lparen = Literal("(")
  272. rparen = Literal(")")
  273. equals = Literal("=")
  274. comma = Literal(",")
  275. dot = Literal(".")
  276. slash = Literal("/")
  277. bslash = Literal("\\")
  278. star = Literal("*")
  279. semi = Literal(";")
  280. at = Literal("@")
  281. minus = Literal("-")
  282. # keywords
  283. strict_ = CaselessLiteral("strict")
  284. graph_ = CaselessLiteral("graph")
  285. digraph_ = CaselessLiteral("digraph")
  286. subgraph_ = CaselessLiteral("subgraph")
  287. node_ = CaselessLiteral("node")
  288. edge_ = CaselessLiteral("edge")
  289. # token definitions
  290. identifier = Word(alphanums + "_." ).setName("identifier")
  291. double_quoted_string = QuotedString(
  292. '"', multiline=True, unquoteResults=False, escChar='\\') # dblQuotedString
  293. noncomma = "".join([c for c in printables if c != ","])
  294. alphastring_ = OneOrMore(CharsNotIn(noncomma + ' '))
  295. def parse_html(s, loc, toks):
  296. return '<%s>' % ''.join(toks[0])
  297. opener = '<'
  298. closer = '>'
  299. html_text = nestedExpr( opener, closer,
  300. ( CharsNotIn( opener + closer ) )
  301. ).setParseAction(parse_html).leaveWhitespace()
  302. ID = ( identifier | html_text |
  303. double_quoted_string | #.setParseAction(strip_quotes) |
  304. alphastring_ ).setName("ID")
  305. float_number = Combine(Optional(minus) +
  306. OneOrMore(Word(nums + "."))).setName("float_number")
  307. righthand_id = (float_number | ID ).setName("righthand_id")
  308. port_angle = (at + ID).setName("port_angle")
  309. port_location = (OneOrMore(Group(colon + ID)) |
  310. Group(colon + lparen +
  311. ID + comma + ID + rparen)).setName("port_location")
  312. port = (Group(port_location + Optional(port_angle)) |
  313. Group(port_angle + Optional(port_location))).setName("port")
  314. node_id = (ID + Optional(port))
  315. a_list = OneOrMore(ID + Optional(equals + righthand_id) +
  316. Optional(comma.suppress())).setName("a_list")
  317. attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) +
  318. rbrack.suppress()).setName("attr_list")
  319. attr_stmt = (Group(graph_ | node_ | edge_) +
  320. attr_list).setName("attr_stmt")
  321. edgeop = (Literal("--") | Literal("->")).setName("edgeop")
  322. stmt_list = Forward()
  323. graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) +
  324. rbrace.suppress() +
  325. Optional(semi.suppress())).setName("graph_stmt")
  326. edge_point = Forward()
  327. edgeRHS = OneOrMore(edgeop + edge_point)
  328. edge_stmt = edge_point + edgeRHS + Optional(attr_list)
  329. subgraph = Group(
  330. subgraph_ + Optional(ID) + graph_stmt).setName("subgraph")
  331. edge_point << Group(
  332. subgraph | graph_stmt | node_id).setName('edge_point')
  333. node_stmt = (
  334. node_id + Optional(attr_list) +
  335. Optional(semi.suppress())).setName("node_stmt")
  336. assignment = (ID + equals + righthand_id).setName("assignment")
  337. stmt = (assignment | edge_stmt | attr_stmt |
  338. subgraph | graph_stmt | node_stmt).setName("stmt")
  339. stmt_list << OneOrMore(stmt + Optional(semi.suppress()))
  340. graphparser = OneOrMore(
  341. (Optional(strict_) + Group((graph_ | digraph_)) +
  342. Optional(ID) + graph_stmt).setResultsName("graph"))
  343. singleLineComment = Group(
  344. "//" + restOfLine) | Group("#" + restOfLine)
  345. # actions
  346. graphparser.ignore(singleLineComment)
  347. graphparser.ignore(cStyleComment)
  348. assignment.setParseAction(push_attr_list)
  349. a_list.setParseAction(push_attr_list)
  350. edge_stmt.setParseAction(push_edge_stmt)
  351. node_stmt.setParseAction(push_node_stmt)
  352. attr_stmt.setParseAction(push_default_stmt)
  353. subgraph.setParseAction(push_subgraph_stmt)
  354. graph_stmt.setParseAction(push_graph_stmt)
  355. graphparser.setParseAction(push_top_graph_stmt)
  356. return graphparser
  357. def parse_dot_data(s):
  358. """Parse DOT description in (unicode) string `s`.
  359. @return: Graphs that result from parsing.
  360. @rtype: `list` of `pydot.Dot`
  361. """
  362. global top_graphs
  363. top_graphs = list()
  364. try:
  365. graphparser = graph_definition()
  366. graphparser.parseWithTabs()
  367. tokens = graphparser.parseString(s)
  368. return list(tokens)
  369. except ParseException as err:
  370. print(err.line)
  371. print(" " * (err.column - 1) + "^")
  372. print(err)
  373. return None