wantedpages.php 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. #!/usr/bin/env php
  2. <?php
  3. use dokuwiki\Utf8\Sort;
  4. use dokuwiki\File\PageResolver;
  5. use splitbrain\phpcli\CLI;
  6. use splitbrain\phpcli\Options;
  7. if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
  8. define('NOSESSION', 1);
  9. require_once(DOKU_INC . 'inc/init.php');
  10. /**
  11. * Find wanted pages
  12. */
  13. class WantedPagesCLI extends CLI {
  14. const DIR_CONTINUE = 1;
  15. const DIR_NS = 2;
  16. const DIR_PAGE = 3;
  17. private $skip = false;
  18. private $sort = 'wanted';
  19. private $result = array();
  20. /**
  21. * Register options and arguments on the given $options object
  22. *
  23. * @param Options $options
  24. * @return void
  25. */
  26. protected function setup(Options $options) {
  27. $options->setHelp(
  28. 'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
  29. ' (the pages that are linkin to these missing pages).'
  30. );
  31. $options->registerArgument(
  32. 'namespace',
  33. 'The namespace to lookup. Defaults to root namespace',
  34. false
  35. );
  36. $options->registerOption(
  37. 'sort',
  38. 'Sort by wanted or origin page',
  39. 's',
  40. '(wanted|origin)'
  41. );
  42. $options->registerOption(
  43. 'skip',
  44. 'Do not show the second dimension',
  45. 'k'
  46. );
  47. }
  48. /**
  49. * Your main program
  50. *
  51. * Arguments and options have been parsed when this is run
  52. *
  53. * @param Options $options
  54. * @return void
  55. */
  56. protected function main(Options $options) {
  57. $args = $options->getArgs();
  58. if($args) {
  59. $startdir = dirname(wikiFN($args[0] . ':xxx'));
  60. } else {
  61. $startdir = dirname(wikiFN('xxx'));
  62. }
  63. $this->skip = $options->getOpt('skip');
  64. $this->sort = $options->getOpt('sort');
  65. $this->info("searching $startdir");
  66. foreach($this->getPages($startdir) as $page) {
  67. $this->internalLinks($page);
  68. }
  69. Sort::ksort($this->result);
  70. foreach($this->result as $main => $subs) {
  71. if($this->skip) {
  72. print "$main\n";
  73. } else {
  74. $subs = array_unique($subs);
  75. Sort::sort($subs);
  76. foreach($subs as $sub) {
  77. printf("%-40s %s\n", $main, $sub);
  78. }
  79. }
  80. }
  81. }
  82. /**
  83. * Determine directions of the search loop
  84. *
  85. * @param string $entry
  86. * @param string $basepath
  87. * @return int
  88. */
  89. protected function dirFilter($entry, $basepath) {
  90. if($entry == '.' || $entry == '..') {
  91. return WantedPagesCLI::DIR_CONTINUE;
  92. }
  93. if(is_dir($basepath . '/' . $entry)) {
  94. if(strpos($entry, '_') === 0) {
  95. return WantedPagesCLI::DIR_CONTINUE;
  96. }
  97. return WantedPagesCLI::DIR_NS;
  98. }
  99. if(preg_match('/\.txt$/', $entry)) {
  100. return WantedPagesCLI::DIR_PAGE;
  101. }
  102. return WantedPagesCLI::DIR_CONTINUE;
  103. }
  104. /**
  105. * Collects recursively the pages in a namespace
  106. *
  107. * @param string $dir
  108. * @return array
  109. * @throws DokuCLI_Exception
  110. */
  111. protected function getPages($dir) {
  112. static $trunclen = null;
  113. if(!$trunclen) {
  114. global $conf;
  115. $trunclen = strlen($conf['datadir'] . ':');
  116. }
  117. if(!is_dir($dir)) {
  118. throw new DokuCLI_Exception("Unable to read directory $dir");
  119. }
  120. $pages = array();
  121. $dh = opendir($dir);
  122. while(false !== ($entry = readdir($dh))) {
  123. $status = $this->dirFilter($entry, $dir);
  124. if($status == WantedPagesCLI::DIR_CONTINUE) {
  125. continue;
  126. } else if($status == WantedPagesCLI::DIR_NS) {
  127. $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
  128. } else {
  129. $page = array(
  130. 'id' => pathID(substr($dir . '/' . $entry, $trunclen)),
  131. 'file' => $dir . '/' . $entry,
  132. );
  133. $pages[] = $page;
  134. }
  135. }
  136. closedir($dh);
  137. return $pages;
  138. }
  139. /**
  140. * Parse instructions and add the non-existing links to the result array
  141. *
  142. * @param array $page array with page id and file path
  143. */
  144. protected function internalLinks($page) {
  145. global $conf;
  146. $instructions = p_get_instructions(file_get_contents($page['file']));
  147. $resolver = new PageResolver($page['id']);
  148. $pid = $page['id'];
  149. foreach($instructions as $ins) {
  150. if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
  151. $mid = $resolver->resolveId($ins[1][0]);
  152. if(!page_exists($mid)) {
  153. list($mid) = explode('#', $mid); //record pages without hashes
  154. if($this->sort == 'origin') {
  155. $this->result[$pid][] = $mid;
  156. } else {
  157. $this->result[$mid][] = $pid;
  158. }
  159. }
  160. }
  161. }
  162. }
  163. }
  164. // Main
  165. $cli = new WantedPagesCLI();
  166. $cli->run();