search.module

  1. 7.x drupal/modules/search/search.module
  2. 5.x drupal/modules/search/search.module
  3. 6.x drupal/modules/search/search.module
  4. 8.x drupal/core/modules/search/search.module

Enables site-wide keyword searching.

Functions

Namesort descending Description
do_search Do a query on the full-text search index for a word or words.
search_admin_settings Menu callback; displays the search module settings page.
search_admin_settings_validate Validate callback.
search_block Implementation of hook_block().
search_box Output a search form for the search block and the theme's search box.
search_box_form_submit Process a block search form submission.
search_cron Implementation of hook_cron().
search_data Perform a standard search on the given keys, and return the formatted results.
search_dirty Marks a word as dirty (or retrieves the list of dirty words). This is used during indexing (cron). Words which are dirty have outdated total counts in the search_total table, and need to be recounted.
search_excerpt Returns snippets from a piece of text, with certain keywords highlighted. Used for formatting search results.
search_expand_cjk Basic CJK tokenizer. Simply splits a string into consecutive, overlapping sequences of characters ('minimum_word_size' long).
search_form Render a search form.
search_forms
search_form_submit Process a search form submission.
search_form_validate As the search form collates keys from other modules hooked in via hook_form_alter, the validation takes place in _submit. search_form_validate() is used solely to set the 'processed_keys' form value for the basic search form.
search_get_keys Helper function for grabbing search keys.
search_help Implementation of hook_help().
search_index Update the full-text search index for a particular item.
search_index_split Splits a string into tokens for indexing.
search_menu Implementation of hook_menu().
search_parse_query Parse a search query into SQL conditions.
search_perm Implementation of hook_perm().
search_preprocess Invokes hook_search_preprocess() in modules.
search_query_extract Extract a module-specific search option from a search query. e.g. 'type:book'
search_query_insert Return a query with the given module-specific search option inserted in. e.g. 'type:book'.
search_simplify Simplifies a string according to indexing rules.
search_update_totals This function is called on shutdown to ensure that search_total is always up to date (even if cron times out or otherwise fails).
search_view Menu callback; presents the search form and/or search results.
search_wipe Wipes a part of or the entire search index.
search_wipe_confirm Menu callback: confirm wiping of the index.
search_wipe_confirm_submit Handler for wipe confirmation
theme_search_block_form Theme the block search form.
theme_search_item Format a single result entry of a search query. This function is normally called by theme_search_page() or hook_search_page().
theme_search_page Format the result page of a search query.
theme_search_theme_form Theme the theme search form.
_search_excerpt_replace Helper function for array_walk in search_except.
_search_index_truncate Helper function for array_walk in search_index_split.
_search_parse_query Helper function for search_parse_query();

Constants

Namesort descending Description
PREG_CLASS_CJK Matches all CJK characters that are candidates for auto-splitting (Chinese, Japanese, Korean). Contains kana and BMP ideographs.
PREG_CLASS_NUMBERS Matches all 'N' Unicode character classes (numbers)
PREG_CLASS_PUNCTUATION Matches all 'P' Unicode character classes (punctuation)
PREG_CLASS_SEARCH_EXCLUDE Matches Unicode character classes to exclude from the search index.

File

drupal/modules/search/search.module
View source
  1. <?php
  2. /**
  3. * @file
  4. * Enables site-wide keyword searching.
  5. */
  6. /**
  7. * Matches Unicode character classes to exclude from the search index.
  8. *
  9. * See: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
  10. *
  11. * The index only contains the following character classes:
  12. * Lu Letter, Uppercase
  13. * Ll Letter, Lowercase
  14. * Lt Letter, Titlecase
  15. * Lo Letter, Other
  16. * Nd Number, Decimal Digit
  17. * No Number, Other
  18. */
  19. define('PREG_CLASS_SEARCH_EXCLUDE',
  20. '\x{0}-\x{2f}\x{3a}-\x{40}\x{5b}-\x{60}\x{7b}-\x{bf}\x{d7}\x{f7}\x{2b0}-'.
  21. '\x{385}\x{387}\x{3f6}\x{482}-\x{489}\x{559}-\x{55f}\x{589}-\x{5c7}\x{5f3}-'.
  22. '\x{61f}\x{640}\x{64b}-\x{65e}\x{66a}-\x{66d}\x{670}\x{6d4}\x{6d6}-\x{6ed}'.
  23. '\x{6fd}\x{6fe}\x{700}-\x{70f}\x{711}\x{730}-\x{74a}\x{7a6}-\x{7b0}\x{901}-'.
  24. '\x{903}\x{93c}\x{93e}-\x{94d}\x{951}-\x{954}\x{962}-\x{965}\x{970}\x{981}-'.
  25. '\x{983}\x{9bc}\x{9be}-\x{9cd}\x{9d7}\x{9e2}\x{9e3}\x{9f2}-\x{a03}\x{a3c}-'.
  26. '\x{a4d}\x{a70}\x{a71}\x{a81}-\x{a83}\x{abc}\x{abe}-\x{acd}\x{ae2}\x{ae3}'.
  27. '\x{af1}-\x{b03}\x{b3c}\x{b3e}-\x{b57}\x{b70}\x{b82}\x{bbe}-\x{bd7}\x{bf0}-'.
  28. '\x{c03}\x{c3e}-\x{c56}\x{c82}\x{c83}\x{cbc}\x{cbe}-\x{cd6}\x{d02}\x{d03}'.
  29. '\x{d3e}-\x{d57}\x{d82}\x{d83}\x{dca}-\x{df4}\x{e31}\x{e34}-\x{e3f}\x{e46}-'.
  30. '\x{e4f}\x{e5a}\x{e5b}\x{eb1}\x{eb4}-\x{ebc}\x{ec6}-\x{ecd}\x{f01}-\x{f1f}'.
  31. '\x{f2a}-\x{f3f}\x{f71}-\x{f87}\x{f90}-\x{fd1}\x{102c}-\x{1039}\x{104a}-'.
  32. '\x{104f}\x{1056}-\x{1059}\x{10fb}\x{10fc}\x{135f}-\x{137c}\x{1390}-\x{1399}'.
  33. '\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16f0}\x{1712}-\x{1714}'.
  34. '\x{1732}-\x{1736}\x{1752}\x{1753}\x{1772}\x{1773}\x{17b4}-\x{17db}\x{17dd}'.
  35. '\x{17f0}-\x{180e}\x{1843}\x{18a9}\x{1920}-\x{1945}\x{19b0}-\x{19c0}\x{19c8}'.
  36. '\x{19c9}\x{19de}-\x{19ff}\x{1a17}-\x{1a1f}\x{1d2c}-\x{1d61}\x{1d78}\x{1d9b}-'.
  37. '\x{1dc3}\x{1fbd}\x{1fbf}-\x{1fc1}\x{1fcd}-\x{1fcf}\x{1fdd}-\x{1fdf}\x{1fed}-'.
  38. '\x{1fef}\x{1ffd}-\x{2070}\x{2074}-\x{207e}\x{2080}-\x{2101}\x{2103}-\x{2106}'.
  39. '\x{2108}\x{2109}\x{2114}\x{2116}-\x{2118}\x{211e}-\x{2123}\x{2125}\x{2127}'.
  40. '\x{2129}\x{212e}\x{2132}\x{213a}\x{213b}\x{2140}-\x{2144}\x{214a}-\x{2b13}'.
  41. '\x{2ce5}-\x{2cff}\x{2d6f}\x{2e00}-\x{3005}\x{3007}-\x{303b}\x{303d}-\x{303f}'.
  42. '\x{3099}-\x{309e}\x{30a0}\x{30fb}-\x{30fe}\x{3190}-\x{319f}\x{31c0}-\x{31cf}'.
  43. '\x{3200}-\x{33ff}\x{4dc0}-\x{4dff}\x{a015}\x{a490}-\x{a716}\x{a802}\x{a806}'.
  44. '\x{a80b}\x{a823}-\x{a82b}\x{d800}-\x{f8ff}\x{fb1e}\x{fb29}\x{fd3e}\x{fd3f}'.
  45. '\x{fdfc}-\x{fe6b}\x{feff}-\x{ff0f}\x{ff1a}-\x{ff20}\x{ff3b}-\x{ff40}\x{ff5b}-'.
  46. '\x{ff65}\x{ff70}\x{ff9e}\x{ff9f}\x{ffe0}-\x{fffd}');
  47. /**
  48. * Matches all 'N' Unicode character classes (numbers)
  49. */
  50. define('PREG_CLASS_NUMBERS',
  51. '\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}'.
  52. '\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}'.
  53. '\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}'.
  54. '\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-'.
  55. '\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}'.
  56. '\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}'.
  57. '\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}'.
  58. '\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-'.
  59. '\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}');
  60. /**
  61. * Matches all 'P' Unicode character classes (punctuation)
  62. */
  63. define('PREG_CLASS_PUNCTUATION',
  64. '\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}'.
  65. '\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}'.
  66. '\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}'.
  67. '\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}'.
  68. '\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}'.
  69. '\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}'.
  70. '\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}'.
  71. '\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}'.
  72. '\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-'.
  73. '\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}'.
  74. '\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}'.
  75. '\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}'.
  76. '\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}'.
  77. '\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-'.
  78. '\x{ff65}');
  79. /**
  80. * Matches all CJK characters that are candidates for auto-splitting
  81. * (Chinese, Japanese, Korean).
  82. * Contains kana and BMP ideographs.
  83. */
  84. define('PREG_CLASS_CJK', '\x{3041}-\x{30ff}\x{31f0}-\x{31ff}\x{3400}-\x{4db5}'.
  85. '\x{4e00}-\x{9fbb}\x{f900}-\x{fad9}');
  86. /**
  87. * Implementation of hook_help().
  88. */
  89. function search_help($section) {
  90. switch ($section) {
  91. case 'admin/help#search':
  92. $output = '<p>'. t('The search module adds the ability to search for content by keywords. Search is often the only practical way to find content on a large site. Search is useful for finding users and posts by searching on keywords.') .'</p>';
  93. $output .= '<p>'. t('The search engine works by maintaining an index of the words in your site\'s content. It indexes the posts and users. You can adjust the settings to tweak the indexing behaviour. Note that the search requires cron to be set up correctly. The index percentage sets the maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.') .'</p>';
  94. $output .= '<p>'. t('For more information please read the configuration and customization handbook <a href="@search">Search page</a>.', array('@search' => 'http://drupal.org/handbook/modules/search/')) .'</p>';
  95. return $output;
  96. case 'admin/settings/search':
  97. return '<p>'. t('The search engine works by maintaining an index of the words in your site\'s content. You can adjust the settings below to tweak the indexing behaviour. Note that the search requires cron to be set up correctly.') .'</p>';
  98. case 'search#noresults':
  99. return t('<ul>
  100. <li>Check if your spelling is correct.</li>
  101. <li>Remove quotes around phrases to match each word individually: <em>"blue smurf"</em> will match less than <em>blue smurf</em>.</li>
  102. <li>Consider loosening your query with <em>OR</em>: <em>blue smurf</em> will match less than <em>blue OR smurf</em>.</li>
  103. </ul>');
  104. }
  105. }
  106. /**
  107. * Implementation of hook_perm().
  108. */
  109. function search_perm() {
  110. return array('search content', 'use advanced search', 'administer search');
  111. }
  112. /**
  113. * Implementation of hook_block().
  114. */
  115. function search_block($op = 'list', $delta = 0) {
  116. if ($op == 'list') {
  117. $blocks[0]['info'] = t('Search form');
  118. return $blocks;
  119. }
  120. else if ($op == 'view' && user_access('search content')) {
  121. $block['content'] = drupal_get_form('search_block_form');
  122. $block['subject'] = t('Search');
  123. return $block;
  124. }
  125. }
  126. /**
  127. * Implementation of hook_menu().
  128. */
  129. function search_menu($may_cache) {
  130. $items = array();
  131. if ($may_cache) {
  132. $items[] = array('path' => 'search',
  133. 'title' => t('Search'),
  134. 'callback' => 'search_view',
  135. 'access' => user_access('search content'),
  136. 'type' => MENU_SUGGESTED_ITEM);
  137. $items[] = array('path' => 'admin/settings/search',
  138. 'title' => t('Search settings'),
  139. 'description' => t('Configure relevance settings for search and other indexing options'),
  140. 'callback' => 'drupal_get_form',
  141. 'callback arguments' => array('search_admin_settings'),
  142. 'access' => user_access('administer search'),
  143. 'type' => MENU_NORMAL_ITEM);
  144. $items[] = array('path' => 'admin/settings/search/wipe',
  145. 'title' => t('Clear index'),
  146. 'callback' => 'drupal_get_form',
  147. 'callback arguments' => array('search_wipe_confirm'),
  148. 'access' => user_access('administer search'),
  149. 'type' => MENU_CALLBACK);
  150. $items[] = array('path' => 'admin/logs/search', 'title' => t('Top search phrases'),
  151. 'description' => t('View most popular search phrases.'),
  152. 'callback' => 'watchdog_top',
  153. 'callback arguments' => array('search'));
  154. }
  155. else if (arg(0) == 'search') {
  156. // To remember the user's search keywords when switching across tabs,
  157. // we dynamically add the keywords to the search tabs' paths.
  158. $keys = search_get_keys();
  159. $keys = strlen($keys) ? '/'. $keys : '';
  160. foreach (module_list() as $name) {
  161. if (module_hook($name, 'search') && $title = module_invoke($name, 'search', 'name')) {
  162. $items[] = array('path' => 'search/'. $name . $keys, 'title' => $title,
  163. 'callback' => 'search_view',
  164. 'access' => user_access('search content'),
  165. 'type' => MENU_LOCAL_TASK);
  166. }
  167. }
  168. }
  169. return $items;
  170. }
  171. /**
  172. * Validate callback.
  173. */
  174. function search_admin_settings_validate($form_id, $form_values) {
  175. if ($form_values['op'] == t('Re-index site')) {
  176. drupal_goto('admin/settings/search/wipe');
  177. }
  178. // If these settings change, the index needs to be rebuilt.
  179. if ((variable_get('minimum_word_size', 3) != $form_values['minimum_word_size']) ||
  180. (variable_get('overlap_cjk', TRUE) != $form_values['overlap_cjk'])) {
  181. drupal_set_message(t('The index will be rebuilt.'));
  182. search_wipe();
  183. }
  184. }
  185. /**
  186. * Menu callback; displays the search module settings page.
  187. */
  188. function search_admin_settings() {
  189. // Collect some stats
  190. $remaining = 0;
  191. $total = 0;
  192. foreach (module_list() as $module) {
  193. if (module_hook($module, 'search')) {
  194. $status = module_invoke($module, 'search', 'status');
  195. $remaining += $status['remaining'];
  196. $total += $status['total'];
  197. }
  198. }
  199. $count = format_plural($remaining, 'There is 1 item left to index.', 'There are @count items left to index.');
  200. $percentage = ((int)min(100, 100 * ($total - $remaining) / max(1, $total))) .'%';
  201. $status = '<p><strong>'. t('%percentage of the site has been indexed.', array('%percentage' => $percentage)) .' '. $count .'</strong></p>';
  202. $form['status'] = array('#type' => 'fieldset', '#title' => t('Indexing status'));
  203. $form['status']['status'] = array('#value' => $status);
  204. $form['status']['wipe'] = array('#type' => 'submit', '#value' => t('Re-index site'));
  205. $items = drupal_map_assoc(array(10, 20, 50, 100, 200, 500));
  206. // Indexing throttle:
  207. $form['indexing_throttle'] = array('#type' => 'fieldset', '#title' => t('Indexing throttle'));
  208. $form['indexing_throttle']['search_cron_limit'] = array('#type' => 'select', '#title' => t('Items to index per cron run'), '#default_value' => variable_get('search_cron_limit', 100), '#options' => $items, '#description' => t('The maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.'));
  209. // Indexing settings:
  210. $form['indexing_settings'] = array('#type' => 'fieldset', '#title' => t('Indexing settings'));
  211. $form['indexing_settings']['info'] = array('#value' => '<em>'. t('<p>Changing the settings below will cause the site index to be rebuilt. The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed.</p><p>The default settings should be appropriate for the majority of sites.</p>') .'</em>');
  212. $form['indexing_settings']['minimum_word_size'] = array('#type' => 'textfield', '#title' => t('Minimum word length to index'), '#default_value' => variable_get('minimum_word_size', 3), '#size' => 5, '#maxlength' => 3, '#description' => t('The number of characters a word has to be to be indexed. A lower setting means better search result ranking, but also a larger database. Each search query must contain at least one keyword that is this size (or longer).'));
  213. $form['indexing_settings']['overlap_cjk'] = array('#type' => 'checkbox', '#title' => t('Simple CJK handling'), '#default_value' => variable_get('overlap_cjk', TRUE), '#description' => t('Whether to apply a simple Chinese/Japanese/Korean tokenizer based on overlapping sequences. Turn this off if you want to use an external preprocessor for this instead. Does not affect other languages.'));
  214. // Per module settings
  215. $form = array_merge($form, module_invoke_all('search', 'admin'));
  216. return system_settings_form($form);
  217. }
  218. /**
  219. * Menu callback: confirm wiping of the index.
  220. */
  221. function search_wipe_confirm() {
  222. return confirm_form(array(), t('Are you sure you want to re-index the site?'),
  223. 'admin/settings/search', t(' The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed. This action cannot be undone.'), t('Re-index site'), t('Cancel'));
  224. }
  225. /**
  226. * Handler for wipe confirmation
  227. */
  228. function search_wipe_confirm_submit($form_id, &$form) {
  229. if ($form['confirm']) {
  230. search_wipe();
  231. drupal_set_message(t('The index will be rebuilt.'));
  232. return 'admin/settings/search';
  233. }
  234. }
  235. /**
  236. * Wipes a part of or the entire search index.
  237. *
  238. * @param $sid
  239. * (optional) The SID of the item to wipe. If specified, $type must be passed
  240. * too.
  241. * @param $type
  242. * (optional) The type of item to wipe.
  243. */
  244. function search_wipe($sid = NULL, $type = NULL, $reindex = FALSE) {
  245. if ($type == NULL && $sid == NULL) {
  246. module_invoke_all('search', 'reset');
  247. }
  248. else {
  249. db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type);
  250. db_query("DELETE FROM {search_index} WHERE fromsid = %d AND fromtype = '%s'", $sid, $type);
  251. // When re-indexing, keep link references
  252. db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'". ($reindex ? " AND fromsid = 0" : ''), $sid, $type);
  253. }
  254. }
  255. /**
  256. * Marks a word as dirty (or retrieves the list of dirty words). This is used
  257. * during indexing (cron). Words which are dirty have outdated total counts in
  258. * the search_total table, and need to be recounted.
  259. */
  260. function search_dirty($word = NULL) {
  261. static $dirty = array();
  262. if ($word !== NULL) {
  263. $dirty[$word] = TRUE;
  264. }
  265. else {
  266. return $dirty;
  267. }
  268. }
  269. /**
  270. * Implementation of hook_cron().
  271. *
  272. * Fires hook_update_index() in all modules and cleans up dirty words (see
  273. * search_dirty).
  274. */
  275. function search_cron() {
  276. // We register a shutdown function to ensure that search_total is always up
  277. // to date.
  278. register_shutdown_function('search_update_totals');
  279. // Update word index
  280. foreach (module_list() as $module) {
  281. module_invoke($module, 'update_index');
  282. }
  283. }
  284. /**
  285. * This function is called on shutdown to ensure that search_total is always
  286. * up to date (even if cron times out or otherwise fails).
  287. */
  288. function search_update_totals() {
  289. // Update word IDF (Inverse Document Frequency) counts for new/changed words
  290. foreach (search_dirty() as $word => $dummy) {
  291. // Get total count
  292. $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
  293. // Apply Zipf's law to equalize the probability distribution
  294. $total = log10(1 + 1/(max(1, $total)));
  295. db_query("UPDATE {search_total} SET count = %f WHERE word = '%s'", $total, $word);
  296. if (!db_affected_rows()) {
  297. db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %f)", $word, $total);
  298. }
  299. }
  300. // Find words that were deleted from search_index, but are still in
  301. // search_total. We use a LEFT JOIN between the two tables and keep only the
  302. // rows which fail to join.
  303. $result = db_query("SELECT t.word AS realword, i.word FROM {search_total} t LEFT JOIN {search_index} i ON t.word = i.word WHERE i.word IS NULL");
  304. while ($word = db_fetch_object($result)) {
  305. db_query("DELETE FROM {search_total} WHERE word = '%s'", $word->realword);
  306. }
  307. }
  308. /**
  309. * Simplifies a string according to indexing rules.
  310. */
  311. function search_simplify($text) {
  312. // Decode entities to UTF-8
  313. $text = decode_entities($text);
  314. // Lowercase
  315. $text = drupal_strtolower($text);
  316. // Call an external processor for word handling.
  317. search_preprocess($text);
  318. // Simple CJK handling
  319. if (variable_get('overlap_cjk', TRUE)) {
  320. $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
  321. }
  322. // To improve searching for numerical data such as dates, IP addresses
  323. // or version numbers, we consider a group of numerical characters
  324. // separated only by punctuation characters to be one piece.
  325. // This also means that searching for e.g. '20/03/1984' also returns
  326. // results with '20-03-1984' in them.
  327. // Readable regexp: ([number]+)[punctuation]+(?=[number])
  328. $text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text);
  329. // The dot, underscore and dash are simply removed. This allows meaningful
  330. // search behaviour with acronyms and URLs.
  331. $text = preg_replace('/[._-]+/', '', $text);
  332. // With the exception of the rules above, we consider all punctuation,
  333. // marks, spacers, etc, to be a word boundary.
  334. $text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE .']+/u', ' ', $text);
  335. return $text;
  336. }
  337. /**
  338. * Basic CJK tokenizer. Simply splits a string into consecutive, overlapping
  339. * sequences of characters ('minimum_word_size' long).
  340. */
  341. function search_expand_cjk($matches) {
  342. $min = variable_get('minimum_word_size', 3);
  343. $str = $matches[0];
  344. $l = drupal_strlen($str);
  345. // Passthrough short words
  346. if ($l <= $min) {
  347. return ' '. $str .' ';
  348. }
  349. $tokens = ' ';
  350. // FIFO queue of characters
  351. $chars = array();
  352. // Begin loop
  353. for ($i = 0; $i < $l; ++$i) {
  354. // Grab next character
  355. $current = drupal_substr($str, 0, 1);
  356. $str = substr($str, strlen($current));
  357. $chars[] = $current;
  358. if ($i >= $min - 1) {
  359. $tokens .= implode('', $chars) .' ';
  360. array_shift($chars);
  361. }
  362. }
  363. return $tokens;
  364. }
  365. /**
  366. * Splits a string into tokens for indexing.
  367. */
  368. function search_index_split($text) {
  369. static $last = NULL;
  370. static $lastsplit = NULL;
  371. if ($last == $text) {
  372. return $lastsplit;
  373. }
  374. // Process words
  375. $text = search_simplify($text);
  376. $words = explode(' ', $text);
  377. array_walk($words, '_search_index_truncate');
  378. // Save last keyword result
  379. $last = $text;
  380. $lastsplit = $words;
  381. return $words;
  382. }
  383. /**
  384. * Helper function for array_walk in search_index_split.
  385. */
  386. function _search_index_truncate(&$text) {
  387. $text = truncate_utf8($text, 50);
  388. }
  389. /**
  390. * Invokes hook_search_preprocess() in modules.
  391. */
  392. function search_preprocess(&$text) {
  393. foreach (module_implements('search_preprocess') as $module) {
  394. $text = module_invoke($module, 'search_preprocess', $text);
  395. }
  396. }
  397. /**
  398. * Update the full-text search index for a particular item.
  399. *
  400. * @param $sid
  401. * A number identifying this particular item (e.g. node id).
  402. *
  403. * @param $type
  404. * A string defining this type of item (e.g. 'node')
  405. *
  406. * @param $text
  407. * The content of this item. Must be a piece of HTML text.
  408. *
  409. * @ingroup search
  410. */
  411. function search_index($sid, $type, $text) {
  412. $minimum_word_size = variable_get('minimum_word_size', 3);
  413. // Link matching
  414. global $base_url;
  415. $node_regexp = '@href=[\'"]?(?:'. preg_quote($base_url, '@') .'/|'. preg_quote(base_path(), '@') .')(?:\?q=)?/?((?![a-z]+:)[^\'">]+)[\'">]@i';
  416. // Multipliers for scores of words inside certain HTML tags.
  417. // Note: 'a' must be included for link ranking to work.
  418. $tags = array('h1' => 25,
  419. 'h2' => 18,
  420. 'h3' => 15,
  421. 'h4' => 12,
  422. 'h5' => 9,
  423. 'h6' => 6,
  424. 'u' => 3,
  425. 'b' => 3,
  426. 'i' => 3,
  427. 'strong' => 3,
  428. 'em' => 3,
  429. 'a' => 10);
  430. // Strip off all ignored tags to speed up processing, but insert space before/after
  431. // them to keep word boundaries.
  432. $text = str_replace(array('<', '>'), array(' <', '> '), $text);
  433. $text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>');
  434. // Split HTML tags from plain text.
  435. $split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
  436. // Note: PHP ensures the array consists of alternating delimiters and literals
  437. // and begins and ends with a literal (inserting $null as required).
  438. $tag = FALSE; // Odd/even counter. Tag or no tag.
  439. $link = FALSE; // State variable for link analyser
  440. $score = 1; // Starting score per word
  441. $accum = ' '; // Accumulator for cleaned up data
  442. $tagstack = array(); // Stack with open tags
  443. $tagwords = 0; // Counter for consecutive words
  444. $focus = 1; // Focus state
  445. $results = array(0 => array()); // Accumulator for words for index
  446. foreach ($split as $value) {
  447. if ($tag) {
  448. // Increase or decrease score per word based on tag
  449. list($tagname) = explode(' ', $value, 2);
  450. $tagname = drupal_strtolower($tagname);
  451. // Closing or opening tag?
  452. if ($tagname[0] == '/') {
  453. $tagname = substr($tagname, 1);
  454. // If we encounter unexpected tags, reset score to avoid incorrect boosting.
  455. if (!count($tagstack) || $tagstack[0] != $tagname) {
  456. $tagstack = array();
  457. $score = 1;
  458. }
  459. else {
  460. // Remove from tag stack and decrement score
  461. $score = max(1, $score - $tags[array_shift($tagstack)]);
  462. }
  463. if ($tagname == 'a') {
  464. $link = FALSE;
  465. }
  466. }
  467. else {
  468. if ($tagstack[0] == $tagname) {
  469. // None of the tags we look for make sense when nested identically.
  470. // If they are, it's probably broken HTML.
  471. $tagstack = array();
  472. $score = 1;
  473. }
  474. else {
  475. // Add to open tag stack and increment score
  476. array_unshift($tagstack, $tagname);
  477. $score += $tags[$tagname];
  478. }
  479. if ($tagname == 'a') {
  480. // Check if link points to a node on this site
  481. if (preg_match($node_regexp, $value, $match)) {
  482. $path = drupal_get_normal_path($match[1]);
  483. if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
  484. $linknid = $match[1];
  485. if ($linknid > 0) {
  486. // Note: ignore links to uncachable nodes to avoid redirect bugs.
  487. $node = db_fetch_object(db_query('SELECT n.title, n.nid, n.vid, r.format FROM {node} n INNER JOIN {node_revisions} r ON n.vid = r.vid WHERE n.nid = %d', $linknid));
  488. if (filter_format_allowcache($node->format)) {
  489. $link = TRUE;
  490. $linktitle = $node->title;
  491. }
  492. }
  493. }
  494. }
  495. }
  496. }
  497. // A tag change occurred, reset counter.
  498. $tagwords = 0;
  499. }
  500. else {
  501. // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
  502. if ($value != '') {
  503. if ($link) {
  504. // Check to see if the node link text is its URL. If so, we use the target node title instead.
  505. if (preg_match('!^https?://!i', $value)) {
  506. $value = $linktitle;
  507. }
  508. }
  509. $words = search_index_split($value);
  510. foreach ($words as $word) {
  511. // Add word to accumulator
  512. $accum .= $word .' ';
  513. $num = is_numeric($word);
  514. // Check wordlength
  515. if ($num || drupal_strlen($word) >= $minimum_word_size) {
  516. // Normalize numbers
  517. if ($num) {
  518. $word = (int)ltrim($word, '-0');
  519. }
  520. if ($link) {
  521. if (!isset($results[$linknid])) {
  522. $results[$linknid] = array();
  523. }
  524. $results[$linknid][$word] += $score * $focus;
  525. }
  526. else {
  527. $results[0][$word] += $score * $focus;
  528. // Focus is a decaying value in terms of the amount of unique words up to this point.
  529. // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words.
  530. $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015));
  531. }
  532. }
  533. $tagwords++;
  534. // Too many words inside a single tag probably mean a tag was accidentally left open.
  535. if (count($tagstack) && $tagwords >= 15) {
  536. $tagstack = array();
  537. $score = 1;
  538. }
  539. }
  540. }
  541. }
  542. $tag = !$tag;
  543. }
  544. search_wipe($sid, $type, TRUE);
  545. // Insert cleaned up data into dataset
  546. db_query("INSERT INTO {search_dataset} (sid, type, data) VALUES (%d, '%s', '%s')", $sid, $type, $accum);
  547. // Insert results into search index
  548. foreach ($results[0] as $word => $score) {
  549. db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %f)", $word, $sid, $type, $score);
  550. search_dirty($word);
  551. }
  552. unset($results[0]);
  553. // Now insert links to nodes
  554. foreach ($results as $nid => $words) {
  555. foreach ($words as $word => $score) {
  556. db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %f)", $word, $nid, 'node', $sid, $type, $score);
  557. search_dirty($word);
  558. }
  559. }
  560. }
  561. /**
  562. * Extract a module-specific search option from a search query. e.g. 'type:book'
  563. */
  564. function search_query_extract($keys, $option) {
  565. if (preg_match('/(^| )'. $option .':([^ ]*)( |$)/i', $keys, $matches)) {
  566. return $matches[2];
  567. }
  568. }
  569. /**
  570. * Return a query with the given module-specific search option inserted in.
  571. * e.g. 'type:book'.
  572. */
  573. function search_query_insert($keys, $option, $value = '') {
  574. if (search_query_extract($keys, $option)) {
  575. $keys = trim(preg_replace('/(^| )'. $option .':[^ ]*/i', '', $keys));
  576. }
  577. if ($value != '') {
  578. $keys .= ' '. $option .':'. $value;
  579. }
  580. return $keys;
  581. }
  582. /**
  583. * Parse a search query into SQL conditions.
  584. *
  585. * We build a query that matches the dataset bodies.
  586. */
  587. function search_parse_query($text) {
  588. $keys = array('positive' => array(), 'negative' => array());
  589. // Tokenize query string
  590. preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' '. $text, $matches, PREG_SET_ORDER);
  591. if (count($matches) < 1) {
  592. return NULL;
  593. }
  594. // Classify tokens
  595. $or = FALSE;
  596. foreach ($matches as $match) {
  597. $phrase = FALSE;
  598. // Strip off phrase quotes
  599. if ($match[2]{0} == '"') {
  600. $match[2] = substr($match[2], 1, -1);
  601. $phrase = TRUE;
  602. }
  603. // Simplify keyword according to indexing rules and external preprocessors
  604. $words = search_simplify($match[2]);
  605. // Re-explode in case simplification added more words, except when matching a phrase
  606. $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
  607. // Negative matches
  608. if ($match[1] == '-') {
  609. $keys['negative'] = array_merge($keys['negative'], $words);
  610. }
  611. // OR operator: instead of a single keyword, we store an array of all
  612. // OR'd keywords.
  613. elseif ($match[2] == 'OR' && count($keys['positive'])) {
  614. $last = array_pop($keys['positive']);
  615. // Starting a new OR?
  616. if (!is_array($last)) {
  617. $last = array($last);
  618. }
  619. $keys['positive'][] = $last;
  620. $or = TRUE;
  621. continue;
  622. }
  623. // Plain keyword
  624. else {
  625. if ($or) {
  626. // Add to last element (which is an array)
  627. $keys['positive'][count($keys['positive']) - 1] = array_merge($keys['positive'][count($keys['positive']) - 1], $words);
  628. }
  629. else {
  630. $keys['positive'] = array_merge($keys['positive'], $words);
  631. }
  632. }
  633. $or = FALSE;
  634. }
  635. // Convert keywords into SQL statements.
  636. $query = array();
  637. $query2 = array();
  638. $arguments = array();
  639. $arguments2 = array();
  640. $matches = 0;
  641. // Positive matches
  642. foreach ($keys['positive'] as $key) {
  643. // Group of ORed terms
  644. if (is_array($key) && count($key)) {
  645. $queryor = array();
  646. $any = FALSE;
  647. foreach ($key as $or) {
  648. list($q, $count) = _search_parse_query($or, $arguments2);
  649. $any |= $count;
  650. if ($q) {
  651. $queryor[] = $q;
  652. $arguments[] = $or;
  653. }
  654. }
  655. if (count($queryor)) {
  656. $query[] = '('. implode(' OR ', $queryor) .')';
  657. // A group of OR keywords only needs to match once
  658. $matches += ($any > 0);
  659. }
  660. }
  661. // Single ANDed term
  662. else {
  663. list($q, $count) = _search_parse_query($key, $arguments2);
  664. if ($q) {
  665. $query[] = $q;
  666. $arguments[] = $key;
  667. // Each AND keyword needs to match at least once
  668. $matches += $count;
  669. }
  670. }
  671. }
  672. // Negative matches
  673. foreach ($keys['negative'] as $key) {
  674. list($q) = _search_parse_query($key, $arguments2, TRUE);
  675. if ($q) {
  676. $query[] = $q;
  677. $arguments[] = $key;
  678. }
  679. }
  680. $query = implode(' AND ', $query);
  681. // Build word-index conditions for the first pass
  682. $query2 = substr(str_repeat("i.word = '%s' OR ", count($arguments2)), 0, -4);
  683. return array($query, $arguments, $query2, $arguments2, $matches);
  684. }
  685. /**
  686. * Helper function for search_parse_query();
  687. */
  688. function _search_parse_query(&$word, &$scores, $not = FALSE) {
  689. $count = 0;
  690. // Determine the scorewords of this word/phrase
  691. if (!$not) {
  692. $split = explode(' ', $word);
  693. foreach ($split as $s) {
  694. $num = is_numeric($s);
  695. if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) {
  696. $s = $num ? ((int)ltrim($s, '-0')) : $s;
  697. if (!isset($scores[$s])) {
  698. $scores[$s] = $s;
  699. $count++;
  700. }
  701. }
  702. }
  703. }
  704. // Return matching snippet and number of added words
  705. return array("d.data ". ($not ? 'NOT ' : '') ."LIKE '%% %s %%'", $count);
  706. }
  707. /**
  708. * Do a query on the full-text search index for a word or words.
  709. *
  710. * This function is normally only called by each module that support the
  711. * indexed search (and thus, implements hook_update_index()).
  712. *
  713. * Two queries are performed which can be extended by the caller.
  714. *
  715. * The first query selects a set of possible matches based on the search index
  716. * and any extra given restrictions. This is the classic "OR" search.
  717. *
  718. * SELECT i.type, i.sid, SUM(i.score*t.count) AS relevance
  719. * FROM {search_index} i
  720. * INNER JOIN {search_total} t ON i.word = t.word
  721. * $join1
  722. * WHERE $where1 AND (...)
  723. * GROUP BY i.type, i.sid
  724. *
  725. * The second query further refines this set by verifying advanced text
  726. * conditions (such as AND, negative or phrase matches), and orders the results
  727. * on a the column or expression 'score':
  728. *
  729. * SELECT i.type, i.sid, $select2
  730. * FROM temp_search_sids i
  731. * INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type
  732. * $join2
  733. * WHERE (...)
  734. * ORDER BY score DESC
  735. *
  736. * @param $keywords
  737. * A search string as entered by the user.
  738. *
  739. * @param $type
  740. * A string identifying the calling module.
  741. *
  742. * @param $join1
  743. * (optional) Inserted into the JOIN part of the first SQL query.
  744. * For example "INNER JOIN {node} n ON n.nid = i.sid".
  745. *
  746. * @param $where1
  747. * (optional) Inserted into the WHERE part of the first SQL query.
  748. * For example "(n.status > %d)".
  749. *
  750. * @param $arguments1
  751. * (optional) Extra SQL arguments belonging to the first query.
  752. *
  753. * @param $select2
  754. * (optional) Inserted into the SELECT pat of the second query. Must contain
  755. * a column selected as 'score'.
  756. * defaults to 'i.relevance AS score'
  757. *
  758. * @param $join2
  759. * (optional) Inserted into the JOIN par of the second SQL query.
  760. * For example "INNER JOIN {node_comment_statistics} n ON n.nid = i.sid"
  761. *
  762. * @param $arguments2
  763. * (optional) Extra SQL arguments belonging to the second query parameter.
  764. *
  765. * @param $sort_parameters
  766. * (optional) SQL arguments for sorting the final results.
  767. * Default: 'ORDER BY score DESC'
  768. *
  769. * @return
  770. * An array of SIDs for the search results.
  771. *
  772. * @ingroup search
  773. */
  774. function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = array(), $select2 = 'i.relevance AS score', $join2 = '', $arguments2 = array(), $sort_parameters = 'ORDER BY score DESC') {
  775. $query = search_parse_query($keywords);
  776. if ($query[2] == '') {
  777. form_set_error('keys', t('You must include at least one positive keyword with @count characters or more.', array('@count' => variable_get('minimum_word_size', 3))));
  778. }
  779. if ($query === NULL || $query[0] == '' || $query[2] == '') {
  780. return array();
  781. }
  782. // First pass: select all possible matching sids, doing a simple index-based OR matching on the keywords.
  783. // 'matches' is used to reject those items that cannot possibly match the query.
  784. $conditions = $where1 .' AND ('. $query[2] .") AND i.type = '%s'";
  785. $arguments = array_merge($arguments1, $query[3], array($type, $query[4]));
  786. $result = db_query_temporary("SELECT i.type, i.sid, SUM(i.score * t.count) AS relevance, COUNT(*) AS matches FROM {search_index} i INNER JOIN {search_total} t ON i.word = t.word $join1 WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d", $arguments, 'temp_search_sids');
  787. // Calculate maximum relevance, to normalize it
  788. $normalize = db_result(db_query('SELECT MAX(relevance) FROM temp_search_sids'));
  789. if (!$normalize) {
  790. return array();
  791. }
  792. $select2 = str_replace('i.relevance', '('. (1.0 / $normalize) .' * i.relevance)', $select2);
  793. // Second pass: only keep items that match the complicated keywords conditions (phrase search, negative keywords, ...)
  794. $conditions = '('. $query[0] .')';
  795. $arguments = array_merge($arguments2, $query[1]);
  796. $result = db_query_temporary("SELECT i.type, i.sid, $select2 FROM temp_search_sids i INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type $join2 WHERE $conditions $sort_parameters", $arguments, 'temp_search_results');
  797. if (($count = db_result(db_query('SELECT COUNT(*) FROM temp_search_results'))) == 0) {
  798. return array();
  799. }
  800. $count_query = "SELECT $count";
  801. // Do actual search query
  802. $result = pager_query("SELECT * FROM temp_search_results", 10, 0, $count_query);
  803. $results = array();
  804. while ($item = db_fetch_object($result)) {
  805. $results[] = $item;
  806. }
  807. return $results;
  808. }
  809. /**
  810. * Helper function for grabbing search keys.
  811. */
  812. function search_get_keys() {
  813. // Extract keys as remainder of path
  814. // Note: support old GET format of searches for existing links.
  815. $path = explode('/', $_GET['q'], 3);
  816. return count($path) == 3 ? $path[2] : $_REQUEST['keys'];
  817. }
  818. /**
  819. * Menu callback; presents the search form and/or search results.
  820. */
  821. function search_view() {
  822. $type = arg(1);
  823. // Search form submits with POST but redirects to GET. This way we can keep
  824. // the search query URL clean as a whistle:
  825. // search/type/keyword+keyword
  826. if (!isset($_POST['form_id'])) {
  827. if ($type == '') {
  828. // Note: search/node can not be a default tab because it would take on the
  829. // path of its parent (search). It would prevent remembering keywords when
  830. // switching tabs. This is why we drupal_goto to it from the parent instead.
  831. drupal_goto('search/node');
  832. }
  833. $keys = search_get_keys();
  834. // Only perform search if there is non-whitespace search term:
  835. if (trim($keys)) {
  836. // Log the search keys:
  837. watchdog('search', t('%keys (@type).', array('%keys' => $keys, '@type' => module_invoke($type, 'search', 'name'))), WATCHDOG_NOTICE, l(t('results'), 'search/'. $type .'/'. $keys));
  838. // Collect the search results:
  839. $results = search_data($keys, $type);
  840. if ($results) {
  841. $results = theme('box', t('Search results'), $results);
  842. }
  843. else {
  844. $results = theme('box', t('Your search yielded no results'), search_help('search#noresults'));
  845. }
  846. }
  847. // Construct the search form.
  848. $output = drupal_get_form('search_form', NULL, $keys, $type);
  849. $output .= $results;
  850. return $output;
  851. }
  852. return drupal_get_form('search_form', NULL, $keys, $type);
  853. }
  854. /**
  855. * @defgroup search Search interface
  856. * @{
  857. * The Drupal search interface manages a global search mechanism.
  858. *
  859. * Modules may plug into this system to provide searches of different types of
  860. * data. Most of the system is handled by search.module, so this must be enabled
  861. * for all of the search features to work.
  862. *
  863. * There are three ways to interact with the search system:
  864. * - Specifically for searching nodes, you can implement nodeapi('update index')
  865. * and nodeapi('search result'). However, note that the search system already
  866. * indexes all visible output of a node, i.e. everything displayed normally
  867. * by hook_view() and hook_nodeapi('view'). This is usually sufficient.
  868. * You should only use this mechanism if you want additional, non-visible data
  869. * to be indexed.
  870. * - Implement hook_search(). This will create a search tab for your module on
  871. * the /search page with a simple keyword search form. You may optionally
  872. * implement hook_search_item() to customize the display of your results.
  873. * - Implement hook_update_index(). This allows your module to use Drupal's
  874. * HTML indexing mechanism for searching full text efficiently.
  875. *
  876. * If your module needs to provide a more complicated search form, then you need
  877. * to implement it yourself without hook_search(). In that case, you should
  878. * define it as a local task (tab) under the /search page (e.g. /search/mymodule)
  879. * so that users can easily find it.
  880. */
  881. /**
  882. * Render a search form.
  883. *
  884. * @param $action
  885. * Form action. Defaults to "search".
  886. * @param $keys
  887. * The search string entered by the user, containing keywords for the search.
  888. * @param $type
  889. * The type of search to render the node for. Must be the name of module
  890. * which implements hook_search(). Defaults to 'node'.
  891. * @param $prompt
  892. * A piece of text to put before the form (e.g. "Enter your keywords")
  893. * @return
  894. * An HTML string containing the search form.
  895. */
  896. function search_form($action = '', $keys = '', $type = NULL, $prompt = NULL) {
  897. // Add CSS
  898. drupal_add_css(drupal_get_path('module', 'search') .'/search.css', 'module', 'all', FALSE);
  899. if (!$action) {
  900. $action = url('search/'. $type);
  901. }
  902. if (is_null($prompt)) {
  903. $prompt = t('Enter your keywords');
  904. }
  905. $form = array(
  906. '#action' => $action,
  907. '#attributes' => array('class' => 'search-form'),
  908. );
  909. $form['module'] = array('#type' => 'value', '#value' => $type);
  910. $form['basic'] = array('#type' => 'item', '#title' => $prompt);
  911. $form['basic']['inline'] = array('#prefix' => '<div class="container-inline">', '#suffix' => '</div>');
  912. $form['basic']['inline']['keys'] = array(
  913. '#type' => 'textfield',
  914. '#title' => '',
  915. '#default_value' => $keys,
  916. '#size' => $prompt ? 40 : 20,
  917. '#maxlength' => 255,
  918. );
  919. // processed_keys is used to coordinate keyword passing between other forms
  920. // that hook into the basic search form.
  921. $form['basic']['inline']['processed_keys'] = array('#type' => 'value', '#value' => array());
  922. $form['basic']['inline']['submit'] = array('#type' => 'submit', '#value' => t('Search'));
  923. return $form;
  924. }
  925. /**
  926. * As the search form collates keys from other modules hooked in via
  927. * hook_form_alter, the validation takes place in _submit.
  928. * search_form_validate() is used solely to set the 'processed_keys' form
  929. * value for the basic search form.
  930. */
  931. function search_form_validate($form_id, $form_values, $form) {
  932. form_set_value($form['basic']['inline']['processed_keys'], trim($form_values['keys']));
  933. }
  934. /**
  935. * Process a search form submission.
  936. */
  937. function search_form_submit($form_id, $form_values) {
  938. $keys = $form_values['processed_keys'];
  939. if ($keys == '') {
  940. form_set_error('keys', t('Please enter some keywords.'));
  941. // Fall through to the drupal_goto() call.
  942. }
  943. $type = $form_values['module'] ? $form_values['module'] : 'node';
  944. return 'search/'. $type .'/'. $keys;
  945. }
  946. /**
  947. * Output a search form for the search block and the theme's search box.
  948. */
  949. function search_box($form_id) {
  950. // Use search_keys instead of keys to avoid ID conflicts with the search block.
  951. $form[$form_id .'_keys'] = array(
  952. '#type' => 'textfield',
  953. '#size' => 15,
  954. '#default_value' => '',
  955. '#attributes' => array('title' => t('Enter the terms you wish to search for.')),
  956. );
  957. $form['submit'] = array('#type' => 'submit', '#value' => t('Search'));
  958. $form['#base'] = 'search_box_form';
  959. return $form;
  960. }
  961. /**
  962. * Process a block search form submission.
  963. */
  964. function search_box_form_submit($form_id, $form_values) {
  965. // The search form relies on control of the redirect destination for its
  966. // functionality, so we override any static destination set in the request,
  967. // for example by drupal_access_denied() or drupal_not_found()
  968. // (see http://drupal.org/node/292565).
  969. if (isset($_REQUEST['destination'])) {
  970. unset($_REQUEST['destination']);
  971. }
  972. if (isset($_REQUEST['edit']['destination'])) {
  973. unset($_REQUEST['edit']['destination']);
  974. }
  975. return 'search/node/'. trim($form_values[$form_id .'_keys']);
  976. }
  977. /**
  978. * Theme the theme search form.
  979. */
  980. function theme_search_theme_form($form) {
  981. return '<div id="search" class="container-inline">'. drupal_render($form) .'</div>';
  982. }
  983. /**
  984. * Theme the block search form.
  985. */
  986. function theme_search_block_form($form) {
  987. return '<div class="container-inline">'. drupal_render($form) .'</div>';
  988. }
  989. /**
  990. * Perform a standard search on the given keys, and return the formatted results.
  991. */
  992. function search_data($keys = NULL, $type = 'node') {
  993. if (isset($keys)) {
  994. if (module_hook($type, 'search')) {
  995. $results = module_invoke($type, 'search', 'search', $keys);
  996. if (isset($results) && is_array($results) && count($results)) {
  997. if (module_hook($type, 'search_page')) {
  998. return module_invoke($type, 'search_page', $results);
  999. }
  1000. else {
  1001. return theme('search_page', $results, $type);
  1002. }
  1003. }
  1004. }
  1005. }
  1006. }
  1007. /**
  1008. * Returns snippets from a piece of text, with certain keywords highlighted.
  1009. * Used for formatting search results.
  1010. *
  1011. * @param $keys
  1012. * A string containing a search query.
  1013. *
  1014. * @param $text
  1015. * The text to extract fragments from.
  1016. *
  1017. * @return
  1018. * A string containing HTML for the excerpt.
  1019. */
  1020. function search_excerpt($keys, $text) {
  1021. // We highlight around non-indexable or CJK characters.
  1022. $boundary = '(?:(?<=['. PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK .'])|(?=['. PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK .']))';
  1023. // Extract positive keywords and phrases
  1024. preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' '. $keys, $matches);
  1025. $keys = array_merge($matches[2], $matches[3]);
  1026. // Prepare text
  1027. $text = ' '. strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text)) .' ';
  1028. array_walk($keys, '_search_excerpt_replace');
  1029. $workkeys = $keys;
  1030. // Extract a fragment per keyword for at most 4 keywords.
  1031. // First we collect ranges of text around each keyword, starting/ending
  1032. // at spaces.
  1033. // If the sum of all fragments is too short, we look for second occurrences.
  1034. $ranges = array();
  1035. $included = array();
  1036. $length = 0;
  1037. while ($length < 256 && count($workkeys)) {
  1038. foreach ($workkeys as $k => $key) {
  1039. if (strlen($key) == 0) {
  1040. unset($workkeys[$k]);
  1041. unset($keys[$k]);
  1042. continue;
  1043. }
  1044. if ($length >= 256) {
  1045. break;
  1046. }
  1047. // Remember occurrence of key so we can skip over it if more occurrences
  1048. // are desired.
  1049. if (!isset($included[$key])) {
  1050. $included[$key] = 0;
  1051. }
  1052. // Locate a keyword (position $p), then locate a space in front (position
  1053. // $q) and behind it (position $s)
  1054. if (preg_match('/'. $boundary . $key . $boundary .'/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) {
  1055. $p = $match[0][1];
  1056. if (($q = strpos($text, ' ', max(0, $p - 60))) !== FALSE) {
  1057. $end = substr($text, $p, 80);
  1058. if (($s = strrpos($end, ' ')) !== FALSE) {
  1059. $ranges[$q] = $p + $s;
  1060. $length += $p + $s - $q;
  1061. $included[$key] = $p + 1;
  1062. }
  1063. else {
  1064. unset($workkeys[$k]);
  1065. }
  1066. }
  1067. else {
  1068. unset($workkeys[$k]);
  1069. }
  1070. }
  1071. else {
  1072. unset($workkeys[$k]);
  1073. }
  1074. }
  1075. }
  1076. // If we didn't find anything, return the beginning.
  1077. if (count($ranges) == 0) {
  1078. return truncate_utf8($text, 256) .' ...';
  1079. }
  1080. // Sort the text ranges by starting position.
  1081. ksort($ranges);
  1082. // Now we collapse overlapping text ranges into one. The sorting makes it O(n).
  1083. $newranges = array();
  1084. foreach ($ranges as $from2 => $to2) {
  1085. if (!isset($from1)) {
  1086. $from1 = $from2;
  1087. $to1 = $to2;
  1088. continue;
  1089. }
  1090. if ($from2 <= $to1) {
  1091. $to1 = max($to1, $to2);
  1092. }
  1093. else {
  1094. $newranges[$from1] = $to1;
  1095. $from1 = $from2;
  1096. $to1 = $to2;
  1097. }
  1098. }
  1099. $newranges[$from1] = $to1;
  1100. // Fetch text
  1101. $out = array();
  1102. foreach ($newranges as $from => $to) {
  1103. $out[] = substr($text, $from, $to - $from);
  1104. }
  1105. $text = (isset($newranges[0]) ? '' : '... '). implode(' ... ', $out) .' ...';
  1106. // Highlight keywords. Must be done at once to prevent conflicts ('strong' and '<strong>').
  1107. $text = preg_replace('/'. $boundary .'('. implode('|', $keys) .')'. $boundary .'/iu', '<strong>\0</strong>', $text);
  1108. return $text;
  1109. }
  1110. /**
  1111. * @} End of "defgroup search".
  1112. */
  1113. /**
  1114. * Helper function for array_walk in search_except.
  1115. */
  1116. function _search_excerpt_replace(&$text) {
  1117. $text = preg_quote($text, '/');
  1118. }
  1119. /**
  1120. * Format a single result entry of a search query. This function is normally
  1121. * called by theme_search_page() or hook_search_page().
  1122. *
  1123. * @param $item
  1124. * A single search result as returned by hook_search(). The result should be
  1125. * an array with keys "link", "title", "type", "user", "date", and "snippet".
  1126. * Optionally, "extra" can be an array of extra info to show along with the
  1127. * result.
  1128. * @param $type
  1129. * The type of item found, such as "user" or "node".
  1130. *
  1131. * @ingroup themeable
  1132. */
  1133. function theme_search_item($item, $type) {
  1134. $output = ' <dt class="title"><a href="'. check_url($item['link']) .'">'. check_plain($item['title']) .'</a></dt>';
  1135. $info = array();
  1136. if ($item['type']) {
  1137. $info[] = check_plain($item['type']);
  1138. }
  1139. if ($item['user']) {
  1140. $info[] = $item['user'];
  1141. }
  1142. if ($item['date']) {
  1143. $info[] = format_date($item['date'], 'small');
  1144. }
  1145. if (is_array($item['extra'])) {
  1146. $info = array_merge($info, $item['extra']);
  1147. }
  1148. $output .= ' <dd>'. ($item['snippet'] ? '<p>'. $item['snippet'] .'</p>' : '') .'<p class="search-info">'. implode(' - ', $info) .'</p></dd>';
  1149. return $output;
  1150. }
  1151. /**
  1152. * Format the result page of a search query.
  1153. *
  1154. * Modules may implement hook_search_page() in order to override this default
  1155. * function to display search results. In that case it is expected they provide
  1156. * their own themeable functions.
  1157. *
  1158. * @param $results
  1159. * All search result as returned by hook_search().
  1160. * @param $type
  1161. * The type of item found, such as "user" or "node".
  1162. *
  1163. * @ingroup themeable
  1164. */
  1165. function theme_search_page($results, $type) {
  1166. $output = '<dl class="search-results">';
  1167. foreach ($results as $entry) {
  1168. $output .= theme('search_item', $entry, $type);
  1169. }
  1170. $output .= '</dl>';
  1171. $output .= theme('pager', NULL, 10, 0);
  1172. return $output;
  1173. }
  1174. function search_forms() {
  1175. $forms['search_theme_form']= array(
  1176. 'callback' => 'search_box',
  1177. 'callback arguments' => array('search_theme_form'),
  1178. );
  1179. $forms['search_block_form']= array(
  1180. 'callback' => 'search_box',
  1181. 'callback arguments' => array('search_block_form'),
  1182. );
  1183. return $forms;
  1184. }