TaskRunner.php 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. <?php
  2. namespace dokuwiki;
  3. use dokuwiki\Extension\Event;
  4. use dokuwiki\Sitemap\Mapper;
  5. use dokuwiki\Subscriptions\BulkSubscriptionSender;
  6. use dokuwiki\ChangeLog\ChangeLog;
  7. /**
  8. * Class TaskRunner
  9. *
  10. * Run an asynchronous task.
  11. */
  12. class TaskRunner
  13. {
  14. /**
  15. * Run the next task
  16. *
  17. * @todo refactor to remove dependencies on globals
  18. * @triggers INDEXER_TASKS_RUN
  19. */
  20. public function run()
  21. {
  22. global $INPUT, $conf, $ID;
  23. // keep running after browser closes connection
  24. @ignore_user_abort(true);
  25. // check if user abort worked, if yes send output early
  26. $defer = !@ignore_user_abort() || $conf['broken_iua'];
  27. $output = $INPUT->has('debug') && $conf['allowdebug'];
  28. if(!$defer && !$output){
  29. $this->sendGIF();
  30. }
  31. $ID = cleanID($INPUT->str('id'));
  32. // Catch any possible output (e.g. errors)
  33. if(!$output) {
  34. ob_start();
  35. } else {
  36. header('Content-Type: text/plain');
  37. }
  38. // run one of the jobs
  39. $tmp = []; // No event data
  40. $evt = new Event('INDEXER_TASKS_RUN', $tmp);
  41. if ($evt->advise_before()) {
  42. $this->runIndexer() or
  43. $this->runSitemapper() or
  44. $this->sendDigest() or
  45. $this->runTrimRecentChanges() or
  46. $this->runTrimRecentChanges(true) or
  47. $evt->advise_after();
  48. }
  49. if(!$output) {
  50. ob_end_clean();
  51. if($defer) {
  52. $this->sendGIF();
  53. }
  54. }
  55. }
  56. /**
  57. * Just send a 1x1 pixel blank gif to the browser
  58. *
  59. * @author Andreas Gohr <andi@splitbrain.org>
  60. * @author Harry Fuecks <fuecks@gmail.com>
  61. */
  62. protected function sendGIF()
  63. {
  64. $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7');
  65. header('Content-Type: image/gif');
  66. header('Content-Length: '.strlen($img));
  67. header('Connection: Close');
  68. print $img;
  69. tpl_flush();
  70. // Browser should drop connection after this
  71. // Thinks it's got the whole image
  72. }
  73. /**
  74. * Trims the recent changes cache (or imports the old changelog) as needed.
  75. *
  76. * @param bool $media_changes If the media changelog shall be trimmed instead of
  77. * the page changelog
  78. *
  79. * @return bool
  80. * @triggers TASK_RECENTCHANGES_TRIM
  81. * @author Ben Coburn <btcoburn@silicodon.net>
  82. */
  83. protected function runTrimRecentChanges($media_changes = false)
  84. {
  85. global $conf;
  86. echo "runTrimRecentChanges($media_changes): started" . NL;
  87. $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']);
  88. // Trim the Recent Changes
  89. // Trims the recent changes cache to the last $conf['changes_days'] recent
  90. // changes or $conf['recent'] items, which ever is larger.
  91. // The trimming is only done once a day.
  92. if (file_exists($fn) &&
  93. (@filemtime($fn . '.trimmed') + 86400) < time() &&
  94. !file_exists($fn . '_tmp')) {
  95. @touch($fn . '.trimmed');
  96. io_lock($fn);
  97. $lines = file($fn);
  98. if (count($lines) <= $conf['recent']) {
  99. // nothing to trim
  100. io_unlock($fn);
  101. echo "runTrimRecentChanges($media_changes): finished" . NL;
  102. return false;
  103. }
  104. io_saveFile($fn . '_tmp', ''); // presave tmp as 2nd lock
  105. $trim_time = time() - $conf['recent_days'] * 86400;
  106. $out_lines = [];
  107. $old_lines = [];
  108. for ($i = 0; $i < count($lines); $i++) {
  109. $log = ChangeLog::parseLogLine($lines[$i]);
  110. if ($log === false) {
  111. continue; // discard junk
  112. }
  113. if ($log['date'] < $trim_time) {
  114. // keep old lines for now (append .$i to prevent key collisions)
  115. $old_lines[$log['date'] . ".$i"] = $lines[$i];
  116. } else {
  117. // definitely keep these lines
  118. $out_lines[$log['date'] . ".$i"] = $lines[$i];
  119. }
  120. }
  121. if (count($lines) == count($out_lines)) {
  122. // nothing to trim
  123. @unlink($fn . '_tmp');
  124. io_unlock($fn);
  125. echo "runTrimRecentChanges($media_changes): finished" . NL;
  126. return false;
  127. }
  128. // sort the final result, it shouldn't be necessary,
  129. // however the extra robustness in making the changelog cache self-correcting is worth it
  130. ksort($out_lines);
  131. $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum
  132. if ($extra > 0) {
  133. ksort($old_lines);
  134. $out_lines = array_merge(array_slice($old_lines, -$extra), $out_lines);
  135. }
  136. $eventData = [
  137. 'isMedia' => $media_changes,
  138. 'trimmedChangelogLines' => $out_lines,
  139. 'removedChangelogLines' => $extra > 0 ? array_slice($old_lines, 0, -$extra) : $old_lines,
  140. ];
  141. Event::createAndTrigger('TASK_RECENTCHANGES_TRIM', $eventData);
  142. $out_lines = $eventData['trimmedChangelogLines'];
  143. // save trimmed changelog
  144. io_saveFile($fn . '_tmp', implode('', $out_lines));
  145. @unlink($fn);
  146. if (!rename($fn . '_tmp', $fn)) {
  147. // rename failed so try another way...
  148. io_unlock($fn);
  149. io_saveFile($fn, implode('', $out_lines));
  150. @unlink($fn . '_tmp');
  151. } else {
  152. io_unlock($fn);
  153. }
  154. echo "runTrimRecentChanges($media_changes): finished" . NL;
  155. return true;
  156. }
  157. // nothing done
  158. echo "runTrimRecentChanges($media_changes): finished" . NL;
  159. return false;
  160. }
  161. /**
  162. * Runs the indexer for the current page
  163. *
  164. * @author Andreas Gohr <andi@splitbrain.org>
  165. */
  166. protected function runIndexer()
  167. {
  168. global $ID;
  169. print 'runIndexer(): started' . NL;
  170. if ((string) $ID === '') {
  171. return false;
  172. }
  173. // do the work
  174. return idx_addPage($ID, true);
  175. }
  176. /**
  177. * Builds a Google Sitemap of all public pages known to the indexer
  178. *
  179. * The map is placed in the root directory named sitemap.xml.gz - This
  180. * file needs to be writable!
  181. *
  182. * @author Andreas Gohr
  183. * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html
  184. */
  185. protected function runSitemapper()
  186. {
  187. print 'runSitemapper(): started' . NL;
  188. $result = Mapper::generate() && Mapper::pingSearchEngines();
  189. print 'runSitemapper(): finished' . NL;
  190. return $result;
  191. }
  192. /**
  193. * Send digest and list mails for all subscriptions which are in effect for the
  194. * current page
  195. *
  196. * @author Adrian Lang <lang@cosmocode.de>
  197. */
  198. protected function sendDigest()
  199. {
  200. global $ID;
  201. echo 'sendDigest(): started' . NL;
  202. if (!actionOK('subscribe')) {
  203. echo 'sendDigest(): disabled' . NL;
  204. return false;
  205. }
  206. $sub = new BulkSubscriptionSender();
  207. $sent = $sub->sendBulk($ID);
  208. echo "sendDigest(): sent $sent mails" . NL;
  209. echo 'sendDigest(): finished' . NL;
  210. return (bool)$sent;
  211. }
  212. }