Fix word detection with non-ASCII characters.

Use the unicode character property \pL together with look-behind and
look-ahead assertions rather than '\b' escape sequences (which do not work
with non-ASCII characters).
This commit is contained in:
Daniel Kraus
2014-08-28 15:57:49 +02:00
parent 10f528e265
commit c800862990

View File

@ -104,8 +104,13 @@
global $wgLinkTitlesSmartMode;
global $wgCapitalLinks;
( $wgLinkTitlesWordStartOnly ) ? $wordStartDelim = '\b' : $wordStartDelim = '';
( $wgLinkTitlesWordEndOnly ) ? $wordEndDelim = '\b' : $wordEndDelim = '';
// Use unicode character properties rather than \b escape sequences
// to detect whole words containing non-ASCII characters as well.
// Note that this requires the use of the '/u' switch, and you need
// to have PHP with a PCRE library that was compiled with
// --enable-unicode-properties
( $wgLinkTitlesWordStartOnly ) ? $wordStartDelim = '(?<!\pL)' : $wordStartDelim = '';
( $wgLinkTitlesWordEndOnly ) ? $wordEndDelim = '(?!\pL)' : $wordEndDelim = '';
( $wgLinkTitlesPreferShortTitles ) ? $sort_order = 'ASC' : $sort_order = 'DESC';
( $wgLinkTitlesFirstOnly ) ? $limit = 1 : $limit = -1;
@ -209,7 +214,7 @@
for ( $i = 0; $i < count( $arr ); $i+=2 ) {
// even indexes will point to text that is not enclosed by brackets
$arr[$i] = preg_replace_callback( '/(?<![\:\.\@\/\?\&])' .
$wordStartDelim . $searchTerm . $wordEndDelim . '/',
$wordStartDelim . $searchTerm . $wordEndDelim . '/u',
array('LinkTitles', 'simpleModeCallback'), $arr[$i], $limit, $count );
if (( $limit >= 0 ) && ( $count > 0 )) {
break;
@ -227,7 +232,7 @@
// even indexes will point to text that is not enclosed by brackets
$arr[$i] = preg_replace_callback( '/(?<![\:\.\@\/\?\&])' .
$wordStartDelim . '(' . $quotedTitle . ')' .
$wordEndDelim . '/i', array('LinkTitles', 'smartModeCallback'),
$wordEndDelim . '/iu', array('LinkTitles', 'smartModeCallback'),
$arr[$i], $limit, $count );
if (( $limit >= 0 ) && ( $count > 0 )) {
break;