Add linker tests, fix firstOnly behavior.

- Fix: The firstOnly option finally also works if a page contains a link to a given other page that was not currently added by the extension, i.e. that existed prior to an edit or that was manually added.

Closes #12.
This commit is contained in:
Daniel Kraus
2017-08-29 05:42:49 +02:00
parent 33df917af1
commit 04aa26d6a2
3 changed files with 79 additions and 17 deletions

View File

@ -86,6 +86,14 @@ class Linker {
continue;
}
// Dealing with existing links if the firstOnly option is set:
// A link to the current page should only be recognized if it appears in
// clear text, i.e. we do not count piped links as existing links.
// (Similarly, by design, redirections should not be counted as existing links.)
if ( $limit == 1 && preg_match( '/[[' . $target->getCaseSensitiveLinkValueRegex() . ']]/' , $text ) ) {
continue;
}
// Split the page content by non-linkable sections.
// Credits to inhan @ StackOverflow for suggesting preg_split.
// See http://stackoverflow.com/questions/10672286
@ -161,7 +169,7 @@ class Linker {
// we need to ignore the first letter of the page titles, as
// it does not matter for linking.
if ( strcmp( substr( $this->targetTitleText, 1 ), substr( $matches[ 0 ], 1) ) == 0 ) {
// Case-sensitive match: no need to bulid piped link.
// Case-sensitive match: no need to build piped link.
return '[[' . $matches[ 0 ] . ']]';
} else {
// Case-insensitive match: build piped link.
@ -171,7 +179,7 @@ class Linker {
// If $wgCapitalLinks is false, we can use the simple variant
// of the callback function.
if ( strcmp( $this->targetTitleText, $matches[ 0 ] ) == 0 ) {
// Case-sensitive match: no need to bulid piped link.
// Case-sensitive match: no need to build piped link.
return '[[' . $matches[ 0 ] . ']]';
} else {
// Case-insensitive match: build piped link.

View File

@ -60,6 +60,8 @@ class Target {
*/
private $config;
private $caseSensitiveLinkValueRegex;
/**
* Constructs a new Target object
*
@ -103,20 +105,7 @@ class Target {
* @return String regular expression for this title.
*/
public function getCaseSensitiveRegex() {
$regexSafeTitle = $this->getRegexSafeTitle();
// Depending on the $config->capitalLinks setting,
// the title has to be searched for either in a strictly case-sensitive
// way, or in a 'fuzzy' way where the first letter of the title may
// be either case.
//
if ( $this->config->capitalLinks && ( $regexSafeTitle[0] != '\\' )) {
$searchTerm = '((?i)' . $regexSafeTitle[0] . '(?-i)' . substr($regexSafeTitle, 1) . ')';
} else {
$searchTerm = '(' . $regexSafeTitle . ')';
}
return $this->buildRegex( $searchTerm );
return $this->buildRegex( $this->getCaseSensitiveLinkValueRegex() );
}
/**
@ -138,6 +127,27 @@ class Target {
return '/(?<![\:\.\@\/\?\&])' . $this->wordStart . $searchTerm . $this->wordEnd . '/S';
}
/**
* Gets the (cached) regex for the link value.
*
* Depending on the $config->capitalLinks setting, the title has to be
* searched for either in a strictly case-sensitive way, or in a 'fuzzy' way
* where the first letter of the title may be either case.
*
* @return String regular expression pattern for the link value.
*/
public function getCaseSensitiveLinkValueRegex() {
if ( $this->caseSensitiveLinkValueRegex === null ) {
$regexSafeTitle = $this->getRegexSafeTitle();
if ( $this->config->capitalLinks && ( $regexSafeTitle[0] != '\\' )) {
$this->caseSensitiveLinkValueRegex = '((?i)' . $regexSafeTitle[0] . '(?-i)' . substr($regexSafeTitle, 1) . ')';
} else {
$this->caseSensitiveLinkValueRegex = '(' . $regexSafeTitle . ')';
}
}
return $this->caseSensitiveLinkValueRegex;
}
/**
* Returns the \Content of the target page.
*