search_index
modules/search/search.module, строка 419
- Версии
- 5 – 6
search_index($sid, $type, $text)
Обновляет полнотекстовый поисковый индекс для указанного элемента.
Параметры
$sid
Идентификатор элемента (например, nid ноды).
$type
Строка, определяющая тип элемента (например, 'node'
)
$text
Содержимое элемента в виде HTML-строки.
Связанные темы
Код
<?php
function search_index($sid, $type, $text) {
$minimum_word_size = variable_get('minimum_word_size', 3);
// Link matching
global $base_url;
$node_regexp = '@href=[\'"]?(?:'. preg_quote($base_url, '@') .'/|'. preg_quote(base_path(), '@') .')(?:\?q=)?/?((?![a-z]+:)[^\'">]+)[\'">]@i';
// Multipliers for scores of words inside certain HTML tags.
// Note: 'a' must be included for link ranking to work.
$tags = array('h1' => 25,
'h2' => 18,
'h3' => 15,
'h4' => 12,
'h5' => 9,
'h6' => 6,
'u' => 3,
'b' => 3,
'i' => 3,
'strong' => 3,
'em' => 3,
'a' => 10);
// Strip off all ignored tags to speed up processing, but insert space before/after
// them to keep word boundaries.
$text = str_replace(array('<', '>'), array(' <', '> '), $text);
$text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>');
// Split HTML tags from plain text.
$split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
// Note: PHP ensures the array consists of alternating delimiters and literals
// and begins and ends with a literal (inserting $null as required).
$tag = FALSE; // Odd/even counter. Tag or no tag.
$link = FALSE; // State variable for link analyser
$score = 1; // Starting score per word
$accum = ' '; // Accumulator for cleaned up data
$tagstack = array(); // Stack with open tags
$tagwords = 0; // Counter for consecutive words
$focus = 1; // Focus state
$results = array(0 => array()); // Accumulator for words for index
foreach ($split as $value) {
if ($tag) {
// Increase or decrease score per word based on tag
list($tagname) = explode(' ', $value, 2);
$tagname = drupal_strtolower($tagname);
// Closing or opening tag?
if ($tagname[0] == '/') {
$tagname = substr($tagname, 1);
// If we encounter unexpected tags, reset score to avoid incorrect boosting.
if (!count($tagstack) || $tagstack[0] != $tagname) {
$tagstack = array();
$score = 1;
}
else {
// Remove from tag stack and decrement score
$score = max(1, $score - $tags[array_shift($tagstack)]);
}
if ($tagname == 'a') {
$link = FALSE;
}
}
else {
if (isset($tagstack[0]) && $tagstack[0] == $tagname) {
// None of the tags we look for make sense when nested identically.
// If they are, it's probably broken HTML.
$tagstack = array();
$score = 1;
}
else {
// Add to open tag stack and increment score
array_unshift($tagstack, $tagname);
$score += $tags[$tagname];
}
if ($tagname == 'a') {
// Check if link points to a node on this site
if (preg_match($node_regexp, $value, $match)) {
$path = drupal_get_normal_path($match[1]);
if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
$linknid = $match[1];
if ($linknid > 0) {
// Note: ignore links to uncachable nodes to avoid redirect bugs.
$node = db_fetch_object(db_query('SELECT n.title, n.nid, n.vid, r.format FROM {node} n INNER JOIN {node_revisions} r ON n.vid = r.vid WHERE n.nid = %d', $linknid));
if (filter_format_allowcache($node->format)) {
$link = TRUE;
$linktitle = $node->title;
}
}
}
}
}
}
// A tag change occurred, reset counter.
$tagwords = 0;
}
else {
// Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
if ($value != '') {
if ($link) {
// Check to see if the node link text is its URL. If so, we use the target node title instead.
if (preg_match('!^https?://!i', $value)) {
$value = $linktitle;
}
}
$words = search_index_split($value);
foreach ($words as $word) {
// Add word to accumulator
$accum .= $word .' ';
$num = is_numeric($word);
// Check wordlength
if ($num || drupal_strlen($word) >= $minimum_word_size) {
// Normalize numbers
if ($num) {
$word = (int)ltrim($word, '-0');
}
// Links score mainly for the target.
if ($link) {
if (!isset($results[$linknid])) {
$results[$linknid] = array();
}
$results[$linknid][] = $word;
// Reduce score of the link caption in the source.
$focus *= 0.2;
}
// Fall-through
if (!isset($results[0][$word])) {
$results[0][$word] = 0;
}
$results[0][$word] += $score * $focus;
// Focus is a decaying value in terms of the amount of unique words up to this point.
// From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words.
$focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015));
}
$tagwords++;
// Too many words inside a single tag probably mean a tag was accidentally left open.
if (count($tagstack) && $tagwords >= 15) {
$tagstack = array();
$score = 1;
}
}
}
}
$tag = !$tag;
}
search_wipe($sid, $type, TRUE);
// Insert cleaned up data into dataset
db_query("INSERT INTO {search_dataset} (sid, type, data, reindex) VALUES (%d, '%s', '%s', %d)", $sid, $type, $accum, 0);
// Insert results into search index
foreach ($results[0] as $word => $score) {
// Try inserting first because this will succeed most times, but because
// the database collates similar words (accented and non-accented), the
// insert can fail, in which case we need to add the word scores together.
@db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %f)", $word, $sid, $type, $score);
if (!db_affected_rows()) {
db_query("UPDATE {search_index} SET score = score + %f WHERE word = '%s' AND sid = %d AND type = '%s'", $score, $word, $sid, $type);
}
search_dirty($word);
}
unset($results[0]);
// Get all previous links from this item.
$result = db_query("SELECT nid, caption FROM {search_node_links} WHERE sid = %d AND type = '%s'", $sid, $type);
$links = array();
while ($link = db_fetch_object($result)) {
$links[$link->nid] = $link->caption;
}
// Now store links to nodes.
foreach ($results as $nid => $words) {
$caption = implode(' ', $words);
if (isset($links[$nid])) {
if ($links[$nid] != $caption) {
// Update the existing link and mark the node for reindexing.
db_query("UPDATE {search_node_links} SET caption = '%s' WHERE sid = %d AND type = '%s' AND nid = %d", $caption, $sid, $type, $nid);
search_touch_node($nid);
}
// Unset the link to mark it as processed.
unset($links[$nid]);
}
else {
// Insert the existing link and mark the node for reindexing.
db_query("INSERT INTO {search_node_links} (caption, sid, type, nid) VALUES ('%s', %d, '%s', %d)", $caption, $sid, $type, $nid);
search_touch_node($nid);
}
}
// Any left-over links in $links no longer exist. Delete them and mark the nodes for reindexing.
foreach ($links as $nid => $caption) {
db_query("DELETE FROM {search_node_links} WHERE sid = %d AND type = '%s' AND nid = %d", $sid, $type, $nid);
search_touch_node($nid);
}
}
?>
Войдите или зарегистрируйтесь, чтобы получить возможность отправлять комментарии