Multi-Bytes Language full text search engine Hack.. [message #8615] |
Tue, 11 February 2003 09:40 |
hightman
Messages: 4 Registered: February 2003 Location: China
Karma: 0
|
Junior Member |
|
|
This code Hack make it is possible to do a full text search with multi-byte languages support such as chinese...
You can try it by visiting http://forum.hightman.net/
the following is code: [please rebulid the theme and rebuild the search index after modifing the php code.
-----------------------------------
/path/to/data/src/isearch.inc.t
-----------------------------------
1. add two functions befor 'function index_text($subj, $body, $msg_id)'
[code]
function valid_chr($ch)
{
return ( ($ch >= 97 && $ch <= 122) ||
($ch >= 65 && $ch <= 90) ||
($ch >= 48 && $ch <= 57) ||
($ch & 0x80) );
}
function split_word($str)
{
$arr = array();
$len = strlen($str);
$j = 0;
$cat_mode = 0;
for ( $i = 0; $i < $len; $i++ )
{
$ia = ord($str[$i]);
if ( !valid_chr($ia) )
{
if ( $cat_mode == 1 )
$j++;
$cat_mode = 0;
}
else if ( $ia & 0x80 )
{
if ( $cat_mode == 1 )
{
$j++;
$cat_mode = 0;
}
$arr[$j] = substr($str, $i, 2);
$i++;
$j++;
}
else
{
$cat_mode = 1;
$arr[$j] .= $str[$i];
}
}
return $arr;
}
[/code]
2. change function index_text();
/* build full text index */
//$w = explode(' ', trim($subj.' '.$body));
$w = split_word(trim($subj.' '.$body));
... ...
for ( $i=0; $i<$a; $i++ ) {
//if ( strlen($w[$i]) > 50 || strlen($w[$i])<3 ) continue;
if ( strlen($w[$i]) > 50 || strlen($w[$i])<2 ) continue;
... ...
/* build subject only index */
//$w = explode(' ', $subj);
$w = split_word($subj);
for ( $i=0; $i<count($w); $i++ ) {
//if ( strlen($w[$i]) > 50 || strlen($w[$i])<3 ) continue;
if ( strlen($w[$i]) > 50 || strlen($w[$i])<2 ) continue;
... ...
3. change function search()
function search($str, $fld, $start, $count, $forum_limiter='', &$total)
{
//$w = explode(" ", $str);
$w = split_word($str);
... ...
|
|
|
|