search.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. <?php
  2. /**
  3. * DokuWiki search functions
  4. *
  5. * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
  6. * @author Andreas Gohr <andi@splitbrain.org>
  7. */
  8. use dokuwiki\Utf8\Sort;
  9. /**
  10. * Recurse directory
  11. *
  12. * This function recurses into a given base directory
  13. * and calls the supplied function for each file and directory
  14. *
  15. * @param array &$data The results of the search are stored here
  16. * @param string $base Where to start the search
  17. * @param callback $func Callback (function name or array with object,method)
  18. * @param array $opts option array will be given to the Callback
  19. * @param string $dir Current directory beyond $base
  20. * @param int $lvl Recursion Level
  21. * @param mixed $sort 'natural' to use natural order sorting (default);
  22. * 'date' to sort by filemtime; leave empty to skip sorting.
  23. * @author Andreas Gohr <andi@splitbrain.org>
  24. */
  25. function search(&$data,$base,$func,$opts,$dir='',$lvl=1,$sort='natural'){
  26. $dirs = array();
  27. $files = array();
  28. $filepaths = array();
  29. // safeguard against runaways #1452
  30. if($base == '' || $base == '/') {
  31. throw new RuntimeException('No valid $base passed to search() - possible misconfiguration or bug');
  32. }
  33. //read in directories and files
  34. $dh = @opendir($base.'/'.$dir);
  35. if(!$dh) return;
  36. while(($file = readdir($dh)) !== false){
  37. if(preg_match('/^[\._]/',$file)) continue; //skip hidden files and upper dirs
  38. if(is_dir($base.'/'.$dir.'/'.$file)){
  39. $dirs[] = $dir.'/'.$file;
  40. continue;
  41. }
  42. $files[] = $dir.'/'.$file;
  43. $filepaths[] = $base.'/'.$dir.'/'.$file;
  44. }
  45. closedir($dh);
  46. if (!empty($sort)) {
  47. if ($sort == 'date') {
  48. @array_multisort(array_map('filemtime', $filepaths), SORT_NUMERIC, SORT_DESC, $files);
  49. } else /* natural */ {
  50. Sort::asortFN($files);
  51. }
  52. Sort::asortFN($dirs);
  53. }
  54. //give directories to userfunction then recurse
  55. foreach($dirs as $dir){
  56. if (call_user_func_array($func, array(&$data,$base,$dir,'d',$lvl,$opts))){
  57. search($data,$base,$func,$opts,$dir,$lvl+1,$sort);
  58. }
  59. }
  60. //now handle the files
  61. foreach($files as $file){
  62. call_user_func_array($func, array(&$data,$base,$file,'f',$lvl,$opts));
  63. }
  64. }
  65. /**
  66. * The following functions are userfunctions to use with the search
  67. * function above. This function is called for every found file or
  68. * directory. When a directory is given to the function it has to
  69. * decide if this directory should be traversed (true) or not (false)
  70. * The function has to accept the following parameters:
  71. *
  72. * array &$data - Reference to the result data structure
  73. * string $base - Base usually $conf['datadir']
  74. * string $file - current file or directory relative to $base
  75. * string $type - Type either 'd' for directory or 'f' for file
  76. * int $lvl - Current recursion depht
  77. * array $opts - option array as given to search()
  78. *
  79. * return values for files are ignored
  80. *
  81. * All functions should check the ACL for document READ rights
  82. * namespaces (directories) are NOT checked (when sneaky_index is 0) as this
  83. * would break the recursion (You can have an nonreadable dir over a readable
  84. * one deeper nested) also make sure to check the file type (for example
  85. * in case of lockfiles).
  86. */
  87. /**
  88. * Searches for pages beginning with the given query
  89. *
  90. * @author Andreas Gohr <andi@splitbrain.org>
  91. *
  92. * @param array $data
  93. * @param string $base
  94. * @param string $file
  95. * @param string $type
  96. * @param integer $lvl
  97. * @param array $opts
  98. *
  99. * @return bool
  100. */
  101. function search_qsearch(&$data,$base,$file,$type,$lvl,$opts){
  102. $opts = array(
  103. 'idmatch' => '(^|:)'.preg_quote($opts['query'],'/').'/',
  104. 'listfiles' => true,
  105. 'pagesonly' => true,
  106. );
  107. return search_universal($data,$base,$file,$type,$lvl,$opts);
  108. }
  109. /**
  110. * Build the browsable index of pages
  111. *
  112. * $opts['ns'] is the currently viewed namespace
  113. *
  114. * @author Andreas Gohr <andi@splitbrain.org>
  115. *
  116. * @param array $data
  117. * @param string $base
  118. * @param string $file
  119. * @param string $type
  120. * @param integer $lvl
  121. * @param array $opts
  122. *
  123. * @return bool
  124. */
  125. function search_index(&$data,$base,$file,$type,$lvl,$opts){
  126. global $conf;
  127. $ns = isset($opts['ns']) ? $opts['ns'] : '';
  128. $opts = array(
  129. 'pagesonly' => true,
  130. 'listdirs' => true,
  131. 'listfiles' => empty($opts['nofiles']),
  132. 'sneakyacl' => $conf['sneaky_index'],
  133. // Hacky, should rather use recmatch
  134. 'depth' => preg_match('#^'.preg_quote($file, '#').'(/|$)#','/'.$ns) ? 0 : -1
  135. );
  136. return search_universal($data, $base, $file, $type, $lvl, $opts);
  137. }
  138. /**
  139. * List all namespaces
  140. *
  141. * @author Andreas Gohr <andi@splitbrain.org>
  142. *
  143. * @param array $data
  144. * @param string $base
  145. * @param string $file
  146. * @param string $type
  147. * @param integer $lvl
  148. * @param array $opts
  149. *
  150. * @return bool
  151. */
  152. function search_namespaces(&$data,$base,$file,$type,$lvl,$opts){
  153. $opts = array(
  154. 'listdirs' => true,
  155. );
  156. return search_universal($data,$base,$file,$type,$lvl,$opts);
  157. }
  158. /**
  159. * List all mediafiles in a namespace
  160. * $opts['depth'] recursion level, 0 for all
  161. * $opts['showmsg'] shows message if invalid media id is used
  162. * $opts['skipacl'] skip acl checking
  163. * $opts['pattern'] check given pattern
  164. * $opts['hash'] add hashes to result list
  165. *
  166. * @author Andreas Gohr <andi@splitbrain.org>
  167. *
  168. * @param array $data
  169. * @param string $base
  170. * @param string $file
  171. * @param string $type
  172. * @param integer $lvl
  173. * @param array $opts
  174. *
  175. * @return bool
  176. */
  177. function search_media(&$data,$base,$file,$type,$lvl,$opts){
  178. //we do nothing with directories
  179. if($type == 'd') {
  180. if(empty($opts['depth'])) return true; // recurse forever
  181. $depth = substr_count($file,'/');
  182. if($depth >= $opts['depth']) return false; // depth reached
  183. return true;
  184. }
  185. $info = array();
  186. $info['id'] = pathID($file,true);
  187. if($info['id'] != cleanID($info['id'])){
  188. if(!empty($opts['showmsg']))
  189. msg(hsc($info['id']).' is not a valid file name for DokuWiki - skipped',-1);
  190. return false; // skip non-valid files
  191. }
  192. //check ACL for namespace (we have no ACL for mediafiles)
  193. $info['perm'] = auth_quickaclcheck(getNS($info['id']).':*');
  194. if(empty($opts['skipacl']) && $info['perm'] < AUTH_READ){
  195. return false;
  196. }
  197. //check pattern filter
  198. if(!empty($opts['pattern']) && !@preg_match($opts['pattern'], $info['id'])){
  199. return false;
  200. }
  201. $info['file'] = \dokuwiki\Utf8\PhpString::basename($file);
  202. $info['size'] = filesize($base.'/'.$file);
  203. $info['mtime'] = filemtime($base.'/'.$file);
  204. $info['writable'] = is_writable($base.'/'.$file);
  205. if(preg_match("/\.(jpe?g|gif|png)$/",$file)){
  206. $info['isimg'] = true;
  207. $info['meta'] = new JpegMeta($base.'/'.$file);
  208. }else{
  209. $info['isimg'] = false;
  210. }
  211. if(!empty($opts['hash'])){
  212. $info['hash'] = md5(io_readFile(mediaFN($info['id']),false));
  213. }
  214. $data[] = $info;
  215. return false;
  216. }
  217. /**
  218. * List all mediafiles in a namespace
  219. * $opts['depth'] recursion level, 0 for all
  220. * $opts['showmsg'] shows message if invalid media id is used
  221. * $opts['skipacl'] skip acl checking
  222. * $opts['pattern'] check given pattern
  223. * $opts['hash'] add hashes to result list
  224. *
  225. * @todo This is a temporary copy of search_media returning a list of MediaFile intances
  226. *
  227. * @param array $data
  228. * @param string $base
  229. * @param string $file
  230. * @param string $type
  231. * @param integer $lvl
  232. * @param array $opts
  233. *
  234. * @return bool
  235. */
  236. function search_mediafiles(&$data,$base,$file,$type,$lvl,$opts){
  237. //we do nothing with directories
  238. if($type == 'd') {
  239. if(empty($opts['depth'])) return true; // recurse forever
  240. $depth = substr_count($file,'/');
  241. if($depth >= $opts['depth']) return false; // depth reached
  242. return true;
  243. }
  244. $id = pathID($file,true);
  245. if($id != cleanID($id)){
  246. if($opts['showmsg'])
  247. msg(hsc($id).' is not a valid file name for DokuWiki - skipped',-1);
  248. return false; // skip non-valid files
  249. }
  250. //check ACL for namespace (we have no ACL for mediafiles)
  251. $info['perm'] = auth_quickaclcheck(getNS($id).':*');
  252. if(empty($opts['skipacl']) && $info['perm'] < AUTH_READ){
  253. return false;
  254. }
  255. //check pattern filter
  256. if(!empty($opts['pattern']) && !@preg_match($opts['pattern'], $id)){
  257. return false;
  258. }
  259. $data[] = new \dokuwiki\File\MediaFile($id);
  260. return false;
  261. }
  262. /**
  263. * This function just lists documents (for RSS namespace export)
  264. *
  265. * @author Andreas Gohr <andi@splitbrain.org>
  266. *
  267. * @param array $data
  268. * @param string $base
  269. * @param string $file
  270. * @param string $type
  271. * @param integer $lvl
  272. * @param array $opts
  273. *
  274. * @return bool
  275. */
  276. function search_list(&$data,$base,$file,$type,$lvl,$opts){
  277. //we do nothing with directories
  278. if($type == 'd') return false;
  279. //only search txt files
  280. if(substr($file,-4) == '.txt'){
  281. //check ACL
  282. $id = pathID($file);
  283. if(auth_quickaclcheck($id) < AUTH_READ){
  284. return false;
  285. }
  286. $data[]['id'] = $id;
  287. }
  288. return false;
  289. }
  290. /**
  291. * Quicksearch for searching matching pagenames
  292. *
  293. * $opts['query'] is the search query
  294. *
  295. * @author Andreas Gohr <andi@splitbrain.org>
  296. *
  297. * @param array $data
  298. * @param string $base
  299. * @param string $file
  300. * @param string $type
  301. * @param integer $lvl
  302. * @param array $opts
  303. *
  304. * @return bool
  305. */
  306. function search_pagename(&$data,$base,$file,$type,$lvl,$opts){
  307. //we do nothing with directories
  308. if($type == 'd') return true;
  309. //only search txt files
  310. if(substr($file,-4) != '.txt') return true;
  311. //simple stringmatching
  312. if (!empty($opts['query'])){
  313. if(strpos($file,$opts['query']) !== false){
  314. //check ACL
  315. $id = pathID($file);
  316. if(auth_quickaclcheck($id) < AUTH_READ){
  317. return false;
  318. }
  319. $data[]['id'] = $id;
  320. }
  321. }
  322. return true;
  323. }
  324. /**
  325. * Just lists all documents
  326. *
  327. * $opts['depth'] recursion level, 0 for all
  328. * $opts['hash'] do md5 sum of content?
  329. * $opts['skipacl'] list everything regardless of ACL
  330. *
  331. * @author Andreas Gohr <andi@splitbrain.org>
  332. *
  333. * @param array $data
  334. * @param string $base
  335. * @param string $file
  336. * @param string $type
  337. * @param integer $lvl
  338. * @param array $opts
  339. *
  340. * @return bool
  341. */
  342. function search_allpages(&$data,$base,$file,$type,$lvl,$opts){
  343. if(isset($opts['depth']) && $opts['depth']){
  344. $parts = explode('/',ltrim($file,'/'));
  345. if(($type == 'd' && count($parts) >= $opts['depth'])
  346. || ($type != 'd' && count($parts) > $opts['depth'])){
  347. return false; // depth reached
  348. }
  349. }
  350. //we do nothing with directories
  351. if($type == 'd'){
  352. return true;
  353. }
  354. //only search txt files
  355. if(substr($file,-4) != '.txt') return true;
  356. $item = array();
  357. $item['id'] = pathID($file);
  358. if(empty($opts['skipacl']) && auth_quickaclcheck($item['id']) < AUTH_READ){
  359. return false;
  360. }
  361. $item['rev'] = filemtime($base.'/'.$file);
  362. $item['mtime'] = $item['rev'];
  363. $item['size'] = filesize($base.'/'.$file);
  364. if(!empty($opts['hash'])){
  365. $item['hash'] = md5(trim(rawWiki($item['id'])));
  366. }
  367. $data[] = $item;
  368. return true;
  369. }
  370. /* ------------- helper functions below -------------- */
  371. /**
  372. * fulltext sort
  373. *
  374. * Callback sort function for use with usort to sort the data
  375. * structure created by search_fulltext. Sorts descending by count
  376. *
  377. * @author Andreas Gohr <andi@splitbrain.org>
  378. *
  379. * @param array $a
  380. * @param array $b
  381. *
  382. * @return int
  383. */
  384. function sort_search_fulltext($a,$b){
  385. if($a['count'] > $b['count']){
  386. return -1;
  387. }elseif($a['count'] < $b['count']){
  388. return 1;
  389. }else{
  390. return Sort::strcmp($a['id'],$b['id']);
  391. }
  392. }
  393. /**
  394. * translates a document path to an ID
  395. *
  396. * @author Andreas Gohr <andi@splitbrain.org>
  397. * @todo move to pageutils
  398. *
  399. * @param string $path
  400. * @param bool $keeptxt
  401. *
  402. * @return mixed|string
  403. */
  404. function pathID($path,$keeptxt=false){
  405. $id = utf8_decodeFN($path);
  406. $id = str_replace('/',':',$id);
  407. if(!$keeptxt) $id = preg_replace('#\.txt$#','',$id);
  408. $id = trim($id, ':');
  409. return $id;
  410. }
  411. /**
  412. * This is a very universal callback for the search() function, replacing
  413. * many of the former individual functions at the cost of a more complex
  414. * setup.
  415. *
  416. * How the function behaves, depends on the options passed in the $opts
  417. * array, where the following settings can be used.
  418. *
  419. * depth int recursion depth. 0 for unlimited (default: 0)
  420. * keeptxt bool keep .txt extension for IDs (default: false)
  421. * listfiles bool include files in listing (default: false)
  422. * listdirs bool include namespaces in listing (default: false)
  423. * pagesonly bool restrict files to pages (default: false)
  424. * skipacl bool do not check for READ permission (default: false)
  425. * sneakyacl bool don't recurse into nonreadable dirs (default: false)
  426. * hash bool create MD5 hash for files (default: false)
  427. * meta bool return file metadata (default: false)
  428. * filematch string match files against this regexp (default: '', so accept everything)
  429. * idmatch string match full ID against this regexp (default: '', so accept everything)
  430. * dirmatch string match directory against this regexp when adding (default: '', so accept everything)
  431. * nsmatch string match namespace against this regexp when adding (default: '', so accept everything)
  432. * recmatch string match directory against this regexp when recursing (default: '', so accept everything)
  433. * showmsg bool warn about non-ID files (default: false)
  434. * showhidden bool show hidden files(e.g. by hidepages config) too (default: false)
  435. * firsthead bool return first heading for pages (default: false)
  436. *
  437. * @param array &$data - Reference to the result data structure
  438. * @param string $base - Base usually $conf['datadir']
  439. * @param string $file - current file or directory relative to $base
  440. * @param string $type - Type either 'd' for directory or 'f' for file
  441. * @param int $lvl - Current recursion depht
  442. * @param array $opts - option array as given to search()
  443. * @return bool if this directory should be traversed (true) or not (false)
  444. * return value is ignored for files
  445. *
  446. * @author Andreas Gohr <gohr@cosmocode.de>
  447. */
  448. function search_universal(&$data,$base,$file,$type,$lvl,$opts){
  449. $item = array();
  450. $return = true;
  451. // get ID and check if it is a valid one
  452. $item['id'] = pathID($file,($type == 'd' || !empty($opts['keeptxt'])));
  453. if($item['id'] != cleanID($item['id'])){
  454. if(!empty($opts['showmsg'])){
  455. msg(hsc($item['id']).' is not a valid file name for DokuWiki - skipped',-1);
  456. }
  457. return false; // skip non-valid files
  458. }
  459. $item['ns'] = getNS($item['id']);
  460. if($type == 'd') {
  461. // decide if to recursion into this directory is wanted
  462. if(empty($opts['depth'])){
  463. $return = true; // recurse forever
  464. }else{
  465. $depth = substr_count($file,'/');
  466. if($depth >= $opts['depth']){
  467. $return = false; // depth reached
  468. }else{
  469. $return = true;
  470. }
  471. }
  472. if ($return) {
  473. $match = empty($opts['recmatch']) || preg_match('/'.$opts['recmatch'].'/',$file);
  474. if (!$match) {
  475. return false; // doesn't match
  476. }
  477. }
  478. }
  479. // check ACL
  480. if(empty($opts['skipacl'])){
  481. if($type == 'd'){
  482. $item['perm'] = auth_quickaclcheck($item['id'].':*');
  483. }else{
  484. $item['perm'] = auth_quickaclcheck($item['id']); //FIXME check namespace for media files
  485. }
  486. }else{
  487. $item['perm'] = AUTH_DELETE;
  488. }
  489. // are we done here maybe?
  490. if($type == 'd'){
  491. if(empty($opts['listdirs'])) return $return;
  492. //neither list nor recurse forbidden items:
  493. if(empty($opts['skipacl']) && !empty($opts['sneakyacl']) && $item['perm'] < AUTH_READ) return false;
  494. if(!empty($opts['dirmatch']) && !preg_match('/'.$opts['dirmatch'].'/',$file)) return $return;
  495. if(!empty($opts['nsmatch']) && !preg_match('/'.$opts['nsmatch'].'/',$item['ns'])) return $return;
  496. }else{
  497. if(empty($opts['listfiles'])) return $return;
  498. if(empty($opts['skipacl']) && $item['perm'] < AUTH_READ) return $return;
  499. if(!empty($opts['pagesonly']) && (substr($file,-4) != '.txt')) return $return;
  500. if(empty($opts['showhidden']) && isHiddenPage($item['id'])) return $return;
  501. if(!empty($opts['filematch']) && !preg_match('/'.$opts['filematch'].'/',$file)) return $return;
  502. if(!empty($opts['idmatch']) && !preg_match('/'.$opts['idmatch'].'/',$item['id'])) return $return;
  503. }
  504. // still here? prepare the item
  505. $item['type'] = $type;
  506. $item['level'] = $lvl;
  507. $item['open'] = $return;
  508. if(!empty($opts['meta'])){
  509. $item['file'] = \dokuwiki\Utf8\PhpString::basename($file);
  510. $item['size'] = filesize($base.'/'.$file);
  511. $item['mtime'] = filemtime($base.'/'.$file);
  512. $item['rev'] = $item['mtime'];
  513. $item['writable'] = is_writable($base.'/'.$file);
  514. $item['executable'] = is_executable($base.'/'.$file);
  515. }
  516. if($type == 'f'){
  517. if(!empty($opts['hash'])) $item['hash'] = md5(io_readFile($base.'/'.$file,false));
  518. if(!empty($opts['firsthead'])) $item['title'] = p_get_first_heading($item['id'],METADATA_DONT_RENDER);
  519. }
  520. // finally add the item
  521. $data[] = $item;
  522. return $return;
  523. }
  524. //Setup VIM: ex: et ts=4 :