fix word boundary matching on broken platforms FS#2440
Seems like matching \b on unicode strings is unreliable across different platforms (Debian). Using Unicode class lookahed/behinds seems to work though.
This commit is contained in:
parent
0483f97f57
commit
84e581a6fc
1 changed files with 8 additions and 3 deletions
|
@ -394,19 +394,24 @@ function ft_snippet_re_preprocess($term) {
|
|||
return $term;
|
||||
}
|
||||
|
||||
// unicode word boundaries
|
||||
// see http://stackoverflow.com/a/2449017/172068
|
||||
$BL = '(?<!\pL)';
|
||||
$BR = '(?!\pL)';
|
||||
|
||||
if(substr($term,0,2) == '\\*'){
|
||||
$term = substr($term,2);
|
||||
}else{
|
||||
$term = '\b'.$term;
|
||||
$term = $BL.$term;
|
||||
}
|
||||
|
||||
if(substr($term,-2,2) == '\\*'){
|
||||
$term = substr($term,0,-2);
|
||||
}else{
|
||||
$term = $term.'\b';
|
||||
$term = $term.$BR;
|
||||
}
|
||||
|
||||
if($term == '\b' || $term == '\b\b') $term = '';
|
||||
if($term == $BL || $term == $BR || $term == $BL.$BR) $term = '';
|
||||
return $term;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue