metadata.php 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753
  1. <?php
  2. /**
  3. * The MetaData Renderer
  4. *
  5. * Metadata is additional information about a DokuWiki page that gets extracted mainly from the page's content
  6. * but also it's own filesystem data (like the creation time). All metadata is stored in the fields $meta and
  7. * $persistent.
  8. *
  9. * Some simplified rendering to $doc is done to gather the page's (text-only) abstract.
  10. *
  11. * @author Esther Brunner <wikidesign@gmail.com>
  12. */
  13. class Doku_Renderer_metadata extends Doku_Renderer
  14. {
  15. /** the approximate byte lenght to capture for the abstract */
  16. const ABSTRACT_LEN = 250;
  17. /** the maximum UTF8 character length for the abstract */
  18. const ABSTRACT_MAX = 500;
  19. /** @var array transient meta data, will be reset on each rendering */
  20. public $meta = array();
  21. /** @var array persistent meta data, will be kept until explicitly deleted */
  22. public $persistent = array();
  23. /** @var array the list of headers used to create unique link ids */
  24. protected $headers = array();
  25. /** @var string temporary $doc store */
  26. protected $store = '';
  27. /** @var string keeps the first image reference */
  28. protected $firstimage = '';
  29. /** @var bool whether or not data is being captured for the abstract, public to be accessible by plugins */
  30. public $capturing = true;
  31. /** @var bool determines if enough data for the abstract was collected, yet */
  32. public $capture = true;
  33. /** @var int number of bytes captured for abstract */
  34. protected $captured = 0;
  35. /**
  36. * Returns the format produced by this renderer.
  37. *
  38. * @return string always 'metadata'
  39. */
  40. public function getFormat()
  41. {
  42. return 'metadata';
  43. }
  44. /**
  45. * Initialize the document
  46. *
  47. * Sets up some of the persistent info about the page if it doesn't exist, yet.
  48. */
  49. public function document_start()
  50. {
  51. global $ID;
  52. $this->headers = array();
  53. // external pages are missing create date
  54. if (!isset($this->persistent['date']['created']) || !$this->persistent['date']['created']) {
  55. $this->persistent['date']['created'] = filectime(wikiFN($ID));
  56. }
  57. if (!isset($this->persistent['user'])) {
  58. $this->persistent['user'] = '';
  59. }
  60. if (!isset($this->persistent['creator'])) {
  61. $this->persistent['creator'] = '';
  62. }
  63. // reset metadata to persistent values
  64. $this->meta = $this->persistent;
  65. }
  66. /**
  67. * Finalize the document
  68. *
  69. * Stores collected data in the metadata
  70. */
  71. public function document_end()
  72. {
  73. global $ID;
  74. // store internal info in metadata (notoc,nocache)
  75. $this->meta['internal'] = $this->info;
  76. if (!isset($this->meta['description']['abstract'])) {
  77. // cut off too long abstracts
  78. $this->doc = trim($this->doc);
  79. if (strlen($this->doc) > self::ABSTRACT_MAX) {
  80. $this->doc = \dokuwiki\Utf8\PhpString::substr($this->doc, 0, self::ABSTRACT_MAX).'…';
  81. }
  82. $this->meta['description']['abstract'] = $this->doc;
  83. }
  84. $this->meta['relation']['firstimage'] = $this->firstimage;
  85. if (!isset($this->meta['date']['modified'])) {
  86. $this->meta['date']['modified'] = filemtime(wikiFN($ID));
  87. }
  88. }
  89. /**
  90. * Render plain text data
  91. *
  92. * This function takes care of the amount captured data and will stop capturing when
  93. * enough abstract data is available
  94. *
  95. * @param $text
  96. */
  97. public function cdata($text)
  98. {
  99. if (!$this->capture || !$this->capturing) {
  100. return;
  101. }
  102. $this->doc .= $text;
  103. $this->captured += strlen($text);
  104. if ($this->captured > self::ABSTRACT_LEN) {
  105. $this->capture = false;
  106. }
  107. }
  108. /**
  109. * Add an item to the TOC
  110. *
  111. * @param string $id the hash link
  112. * @param string $text the text to display
  113. * @param int $level the nesting level
  114. */
  115. public function toc_additem($id, $text, $level)
  116. {
  117. global $conf;
  118. //only add items within configured levels
  119. if ($level >= $conf['toptoclevel'] && $level <= $conf['maxtoclevel']) {
  120. // the TOC is one of our standard ul list arrays ;-)
  121. $this->meta['description']['tableofcontents'][] = array(
  122. 'hid' => $id,
  123. 'title' => $text,
  124. 'type' => 'ul',
  125. 'level' => $level - $conf['toptoclevel'] + 1
  126. );
  127. }
  128. }
  129. /**
  130. * Render a heading
  131. *
  132. * @param string $text the text to display
  133. * @param int $level header level
  134. * @param int $pos byte position in the original source
  135. */
  136. public function header($text, $level, $pos)
  137. {
  138. if (!isset($this->meta['title'])) {
  139. $this->meta['title'] = $text;
  140. }
  141. // add the header to the TOC
  142. $hid = $this->_headerToLink($text, true);
  143. $this->toc_additem($hid, $text, $level);
  144. // add to summary
  145. $this->cdata(DOKU_LF.$text.DOKU_LF);
  146. }
  147. /**
  148. * Open a paragraph
  149. */
  150. public function p_open()
  151. {
  152. $this->cdata(DOKU_LF);
  153. }
  154. /**
  155. * Close a paragraph
  156. */
  157. public function p_close()
  158. {
  159. $this->cdata(DOKU_LF);
  160. }
  161. /**
  162. * Create a line break
  163. */
  164. public function linebreak()
  165. {
  166. $this->cdata(DOKU_LF);
  167. }
  168. /**
  169. * Create a horizontal line
  170. */
  171. public function hr()
  172. {
  173. $this->cdata(DOKU_LF.'----------'.DOKU_LF);
  174. }
  175. /**
  176. * Callback for footnote start syntax
  177. *
  178. * All following content will go to the footnote instead of
  179. * the document. To achieve this the previous rendered content
  180. * is moved to $store and $doc is cleared
  181. *
  182. * @author Andreas Gohr <andi@splitbrain.org>
  183. */
  184. public function footnote_open()
  185. {
  186. if ($this->capture) {
  187. // move current content to store
  188. // this is required to ensure safe behaviour of plugins accessed within footnotes
  189. $this->store = $this->doc;
  190. $this->doc = '';
  191. // disable capturing
  192. $this->capturing = false;
  193. }
  194. }
  195. /**
  196. * Callback for footnote end syntax
  197. *
  198. * All content rendered whilst within footnote syntax mode is discarded,
  199. * the previously rendered content is restored and capturing is re-enabled.
  200. *
  201. * @author Andreas Gohr
  202. */
  203. public function footnote_close()
  204. {
  205. if ($this->capture) {
  206. // re-enable capturing
  207. $this->capturing = true;
  208. // restore previously rendered content
  209. $this->doc = $this->store;
  210. $this->store = '';
  211. }
  212. }
  213. /**
  214. * Open an unordered list
  215. */
  216. public function listu_open()
  217. {
  218. $this->cdata(DOKU_LF);
  219. }
  220. /**
  221. * Open an ordered list
  222. */
  223. public function listo_open()
  224. {
  225. $this->cdata(DOKU_LF);
  226. }
  227. /**
  228. * Open a list item
  229. *
  230. * @param int $level the nesting level
  231. * @param bool $node true when a node; false when a leaf
  232. */
  233. public function listitem_open($level, $node=false)
  234. {
  235. $this->cdata(str_repeat(DOKU_TAB, $level).'* ');
  236. }
  237. /**
  238. * Close a list item
  239. */
  240. public function listitem_close()
  241. {
  242. $this->cdata(DOKU_LF);
  243. }
  244. /**
  245. * Output preformatted text
  246. *
  247. * @param string $text
  248. */
  249. public function preformatted($text)
  250. {
  251. $this->cdata($text);
  252. }
  253. /**
  254. * Start a block quote
  255. */
  256. public function quote_open()
  257. {
  258. $this->cdata(DOKU_LF.DOKU_TAB.'"');
  259. }
  260. /**
  261. * Stop a block quote
  262. */
  263. public function quote_close()
  264. {
  265. $this->cdata('"'.DOKU_LF);
  266. }
  267. /**
  268. * Display text as file content, optionally syntax highlighted
  269. *
  270. * @param string $text text to show
  271. * @param string $lang programming language to use for syntax highlighting
  272. * @param string $file file path label
  273. */
  274. public function file($text, $lang = null, $file = null)
  275. {
  276. $this->cdata(DOKU_LF.$text.DOKU_LF);
  277. }
  278. /**
  279. * Display text as code content, optionally syntax highlighted
  280. *
  281. * @param string $text text to show
  282. * @param string $language programming language to use for syntax highlighting
  283. * @param string $file file path label
  284. */
  285. public function code($text, $language = null, $file = null)
  286. {
  287. $this->cdata(DOKU_LF.$text.DOKU_LF);
  288. }
  289. /**
  290. * Format an acronym
  291. *
  292. * Uses $this->acronyms
  293. *
  294. * @param string $acronym
  295. */
  296. public function acronym($acronym)
  297. {
  298. $this->cdata($acronym);
  299. }
  300. /**
  301. * Format a smiley
  302. *
  303. * Uses $this->smiley
  304. *
  305. * @param string $smiley
  306. */
  307. public function smiley($smiley)
  308. {
  309. $this->cdata($smiley);
  310. }
  311. /**
  312. * Format an entity
  313. *
  314. * Entities are basically small text replacements
  315. *
  316. * Uses $this->entities
  317. *
  318. * @param string $entity
  319. */
  320. public function entity($entity)
  321. {
  322. $this->cdata($entity);
  323. }
  324. /**
  325. * Typographically format a multiply sign
  326. *
  327. * Example: ($x=640, $y=480) should result in "640×480"
  328. *
  329. * @param string|int $x first value
  330. * @param string|int $y second value
  331. */
  332. public function multiplyentity($x, $y)
  333. {
  334. $this->cdata($x.'×'.$y);
  335. }
  336. /**
  337. * Render an opening single quote char (language specific)
  338. */
  339. public function singlequoteopening()
  340. {
  341. global $lang;
  342. $this->cdata($lang['singlequoteopening']);
  343. }
  344. /**
  345. * Render a closing single quote char (language specific)
  346. */
  347. public function singlequoteclosing()
  348. {
  349. global $lang;
  350. $this->cdata($lang['singlequoteclosing']);
  351. }
  352. /**
  353. * Render an apostrophe char (language specific)
  354. */
  355. public function apostrophe()
  356. {
  357. global $lang;
  358. $this->cdata($lang['apostrophe']);
  359. }
  360. /**
  361. * Render an opening double quote char (language specific)
  362. */
  363. public function doublequoteopening()
  364. {
  365. global $lang;
  366. $this->cdata($lang['doublequoteopening']);
  367. }
  368. /**
  369. * Render an closinging double quote char (language specific)
  370. */
  371. public function doublequoteclosing()
  372. {
  373. global $lang;
  374. $this->cdata($lang['doublequoteclosing']);
  375. }
  376. /**
  377. * Render a CamelCase link
  378. *
  379. * @param string $link The link name
  380. * @see http://en.wikipedia.org/wiki/CamelCase
  381. */
  382. public function camelcaselink($link)
  383. {
  384. $this->internallink($link, $link);
  385. }
  386. /**
  387. * Render a page local link
  388. *
  389. * @param string $hash hash link identifier
  390. * @param string $name name for the link
  391. */
  392. public function locallink($hash, $name = null)
  393. {
  394. if (is_array($name)) {
  395. $this->_firstimage($name['src']);
  396. if ($name['type'] == 'internalmedia') {
  397. $this->_recordMediaUsage($name['src']);
  398. }
  399. }
  400. }
  401. /**
  402. * keep track of internal links in $this->meta['relation']['references']
  403. *
  404. * @param string $id page ID to link to. eg. 'wiki:syntax'
  405. * @param string|array|null $name name for the link, array for media file
  406. */
  407. public function internallink($id, $name = null)
  408. {
  409. global $ID;
  410. if (is_array($name)) {
  411. $this->_firstimage($name['src']);
  412. if ($name['type'] == 'internalmedia') {
  413. $this->_recordMediaUsage($name['src']);
  414. }
  415. }
  416. $parts = explode('?', $id, 2);
  417. if (count($parts) === 2) {
  418. $id = $parts[0];
  419. }
  420. $default = $this->_simpleTitle($id);
  421. // first resolve and clean up the $id
  422. $resolver = new \dokuwiki\File\PageResolver($ID);
  423. $id = $resolver->resolveId($id);
  424. list($page) = sexplode('#', $id, 2);
  425. // set metadata
  426. $this->meta['relation']['references'][$page] = page_exists($page);
  427. // $data = array('relation' => array('isreferencedby' => array($ID => true)));
  428. // p_set_metadata($id, $data);
  429. // add link title to summary
  430. if ($this->capture) {
  431. $name = $this->_getLinkTitle($name, $default, $id);
  432. $this->doc .= $name;
  433. }
  434. }
  435. /**
  436. * Render an external link
  437. *
  438. * @param string $url full URL with scheme
  439. * @param string|array|null $name name for the link, array for media file
  440. */
  441. public function externallink($url, $name = null)
  442. {
  443. if (is_array($name)) {
  444. $this->_firstimage($name['src']);
  445. if ($name['type'] == 'internalmedia') {
  446. $this->_recordMediaUsage($name['src']);
  447. }
  448. }
  449. if ($this->capture) {
  450. $this->doc .= $this->_getLinkTitle($name, '<'.$url.'>');
  451. }
  452. }
  453. /**
  454. * Render an interwiki link
  455. *
  456. * You may want to use $this->_resolveInterWiki() here
  457. *
  458. * @param string $match original link - probably not much use
  459. * @param string|array $name name for the link, array for media file
  460. * @param string $wikiName indentifier (shortcut) for the remote wiki
  461. * @param string $wikiUri the fragment parsed from the original link
  462. */
  463. public function interwikilink($match, $name, $wikiName, $wikiUri)
  464. {
  465. if (is_array($name)) {
  466. $this->_firstimage($name['src']);
  467. if ($name['type'] == 'internalmedia') {
  468. $this->_recordMediaUsage($name['src']);
  469. }
  470. }
  471. if ($this->capture) {
  472. list($wikiUri) = explode('#', $wikiUri, 2);
  473. $name = $this->_getLinkTitle($name, $wikiUri);
  474. $this->doc .= $name;
  475. }
  476. }
  477. /**
  478. * Link to windows share
  479. *
  480. * @param string $url the link
  481. * @param string|array $name name for the link, array for media file
  482. */
  483. public function windowssharelink($url, $name = null)
  484. {
  485. if (is_array($name)) {
  486. $this->_firstimage($name['src']);
  487. if ($name['type'] == 'internalmedia') {
  488. $this->_recordMediaUsage($name['src']);
  489. }
  490. }
  491. if ($this->capture) {
  492. if ($name) {
  493. $this->doc .= $name;
  494. } else {
  495. $this->doc .= '<'.$url.'>';
  496. }
  497. }
  498. }
  499. /**
  500. * Render a linked E-Mail Address
  501. *
  502. * Should honor $conf['mailguard'] setting
  503. *
  504. * @param string $address Email-Address
  505. * @param string|array $name name for the link, array for media file
  506. */
  507. public function emaillink($address, $name = null)
  508. {
  509. if (is_array($name)) {
  510. $this->_firstimage($name['src']);
  511. if ($name['type'] == 'internalmedia') {
  512. $this->_recordMediaUsage($name['src']);
  513. }
  514. }
  515. if ($this->capture) {
  516. if ($name) {
  517. $this->doc .= $name;
  518. } else {
  519. $this->doc .= '<'.$address.'>';
  520. }
  521. }
  522. }
  523. /**
  524. * Render an internal media file
  525. *
  526. * @param string $src media ID
  527. * @param string $title descriptive text
  528. * @param string $align left|center|right
  529. * @param int $width width of media in pixel
  530. * @param int $height height of media in pixel
  531. * @param string $cache cache|recache|nocache
  532. * @param string $linking linkonly|detail|nolink
  533. */
  534. public function internalmedia($src, $title = null, $align = null, $width = null,
  535. $height = null, $cache = null, $linking = null)
  536. {
  537. if ($this->capture && $title) {
  538. $this->doc .= '['.$title.']';
  539. }
  540. $this->_firstimage($src);
  541. $this->_recordMediaUsage($src);
  542. }
  543. /**
  544. * Render an external media file
  545. *
  546. * @param string $src full media URL
  547. * @param string $title descriptive text
  548. * @param string $align left|center|right
  549. * @param int $width width of media in pixel
  550. * @param int $height height of media in pixel
  551. * @param string $cache cache|recache|nocache
  552. * @param string $linking linkonly|detail|nolink
  553. */
  554. public function externalmedia($src, $title = null, $align = null, $width = null,
  555. $height = null, $cache = null, $linking = null)
  556. {
  557. if ($this->capture && $title) {
  558. $this->doc .= '['.$title.']';
  559. }
  560. $this->_firstimage($src);
  561. }
  562. /**
  563. * Render the output of an RSS feed
  564. *
  565. * @param string $url URL of the feed
  566. * @param array $params Finetuning of the output
  567. */
  568. public function rss($url, $params)
  569. {
  570. $this->meta['relation']['haspart'][$url] = true;
  571. $this->meta['date']['valid']['age'] =
  572. isset($this->meta['date']['valid']['age']) ?
  573. min($this->meta['date']['valid']['age'], $params['refresh']) :
  574. $params['refresh'];
  575. }
  576. #region Utils
  577. /**
  578. * Removes any Namespace from the given name but keeps
  579. * casing and special chars
  580. *
  581. * @author Andreas Gohr <andi@splitbrain.org>
  582. *
  583. * @param string $name
  584. *
  585. * @return mixed|string
  586. */
  587. public function _simpleTitle($name)
  588. {
  589. global $conf;
  590. if (is_array($name)) {
  591. return '';
  592. }
  593. if ($conf['useslash']) {
  594. $nssep = '[:;/]';
  595. } else {
  596. $nssep = '[:;]';
  597. }
  598. $name = preg_replace('!.*'.$nssep.'!', '', $name);
  599. //if there is a hash we use the anchor name only
  600. $name = preg_replace('!.*#!', '', $name);
  601. return $name;
  602. }
  603. /**
  604. * Construct a title and handle images in titles
  605. *
  606. * @author Harry Fuecks <hfuecks@gmail.com>
  607. * @param string|array|null $title either string title or media array
  608. * @param string $default default title if nothing else is found
  609. * @param null|string $id linked page id (used to extract title from first heading)
  610. * @return string title text
  611. */
  612. public function _getLinkTitle($title, $default, $id = null)
  613. {
  614. if (is_array($title)) {
  615. if ($title['title']) {
  616. return '['.$title['title'].']';
  617. } else {
  618. return $default;
  619. }
  620. } elseif (is_null($title) || trim($title) == '') {
  621. if (useHeading('content') && $id) {
  622. $heading = p_get_first_heading($id, METADATA_DONT_RENDER);
  623. if ($heading) {
  624. return $heading;
  625. }
  626. }
  627. return $default;
  628. } else {
  629. return $title;
  630. }
  631. }
  632. /**
  633. * Remember first image
  634. *
  635. * @param string $src image URL or ID
  636. */
  637. protected function _firstimage($src)
  638. {
  639. global $ID;
  640. if ($this->firstimage) {
  641. return;
  642. }
  643. list($src) = explode('#', $src, 2);
  644. if (!media_isexternal($src)) {
  645. $src = (new \dokuwiki\File\MediaResolver($ID))->resolveId($src);
  646. }
  647. if (preg_match('/.(jpe?g|gif|png)$/i', $src)) {
  648. $this->firstimage = $src;
  649. }
  650. }
  651. /**
  652. * Store list of used media files in metadata
  653. *
  654. * @param string $src media ID
  655. */
  656. protected function _recordMediaUsage($src)
  657. {
  658. global $ID;
  659. list ($src) = explode('#', $src, 2);
  660. if (media_isexternal($src)) {
  661. return;
  662. }
  663. $src = (new \dokuwiki\File\MediaResolver($ID))->resolveId($src);
  664. $file = mediaFN($src);
  665. $this->meta['relation']['media'][$src] = file_exists($file);
  666. }
  667. #endregion
  668. }
  669. //Setup VIM: ex: et ts=4 :