fix word boundary matching on broken platforms FS#2440

Seems like matching \b on unicode strings is unreliable across different
platforms (Debian). Using Unicode class lookahed/behinds seems to work
though.
This commit is contained in:
Andreas Gohr 2012-07-28 16:57:15 +02:00
parent 0483f97f57
commit 84e581a6fc

View file

@ -394,19 +394,24 @@ function ft_snippet_re_preprocess($term) {
return $term;
}
// unicode word boundaries
// see http://stackoverflow.com/a/2449017/172068
$BL = '(?<!\pL)';
$BR = '(?!\pL)';
if(substr($term,0,2) == '\\*'){
$term = substr($term,2);
}else{
$term = '\b'.$term;
$term = $BL.$term;
}
if(substr($term,-2,2) == '\\*'){
$term = substr($term,0,-2);
}else{
$term = $term.'\b';
$term = $term.$BR;
}
if($term == '\b' || $term == '\b\b') $term = '';
if($term == $BL || $term == $BR || $term == $BL.$BR) $term = '';
return $term;
}