From 04aa26d6a2ea25c31b777d1cd2208b414905d2a8 Mon Sep 17 00:00:00 2001 From: Daniel Kraus Date: Tue, 29 Aug 2017 05:42:49 +0200 Subject: [PATCH] Add linker tests, fix firstOnly behavior. - Fix: The firstOnly option finally also works if a page contains a link to a given other page that was not currently added by the extension, i.e. that existed prior to an edit or that was manually added. Closes #12. --- includes/Linker.php | 12 ++++++++-- includes/Target.php | 38 ++++++++++++++++++----------- tests/phpunit/LinkerTest.php | 46 +++++++++++++++++++++++++++++++++++- 3 files changed, 79 insertions(+), 17 deletions(-) diff --git a/includes/Linker.php b/includes/Linker.php index aae6570..4c7a350 100644 --- a/includes/Linker.php +++ b/includes/Linker.php @@ -86,6 +86,14 @@ class Linker { continue; } + // Dealing with existing links if the firstOnly option is set: + // A link to the current page should only be recognized if it appears in + // clear text, i.e. we do not count piped links as existing links. + // (Similarly, by design, redirections should not be counted as existing links.) + if ( $limit == 1 && preg_match( '/[[' . $target->getCaseSensitiveLinkValueRegex() . ']]/' , $text ) ) { + continue; + } + // Split the page content by non-linkable sections. // Credits to inhan @ StackOverflow for suggesting preg_split. // See http://stackoverflow.com/questions/10672286 @@ -161,7 +169,7 @@ class Linker { // we need to ignore the first letter of the page titles, as // it does not matter for linking. if ( strcmp( substr( $this->targetTitleText, 1 ), substr( $matches[ 0 ], 1) ) == 0 ) { - // Case-sensitive match: no need to bulid piped link. + // Case-sensitive match: no need to build piped link. return '[[' . $matches[ 0 ] . ']]'; } else { // Case-insensitive match: build piped link. @@ -171,7 +179,7 @@ class Linker { // If $wgCapitalLinks is false, we can use the simple variant // of the callback function. if ( strcmp( $this->targetTitleText, $matches[ 0 ] ) == 0 ) { - // Case-sensitive match: no need to bulid piped link. + // Case-sensitive match: no need to build piped link. return '[[' . $matches[ 0 ] . ']]'; } else { // Case-insensitive match: build piped link. diff --git a/includes/Target.php b/includes/Target.php index fda299d..f793f2b 100644 --- a/includes/Target.php +++ b/includes/Target.php @@ -60,6 +60,8 @@ class Target { */ private $config; + private $caseSensitiveLinkValueRegex; + /** * Constructs a new Target object * @@ -103,20 +105,7 @@ class Target { * @return String regular expression for this title. */ public function getCaseSensitiveRegex() { - $regexSafeTitle = $this->getRegexSafeTitle(); - - // Depending on the $config->capitalLinks setting, - // the title has to be searched for either in a strictly case-sensitive - // way, or in a 'fuzzy' way where the first letter of the title may - // be either case. - // - if ( $this->config->capitalLinks && ( $regexSafeTitle[0] != '\\' )) { - $searchTerm = '((?i)' . $regexSafeTitle[0] . '(?-i)' . substr($regexSafeTitle, 1) . ')'; - } else { - $searchTerm = '(' . $regexSafeTitle . ')'; - } - - return $this->buildRegex( $searchTerm ); + return $this->buildRegex( $this->getCaseSensitiveLinkValueRegex() ); } /** @@ -138,6 +127,27 @@ class Target { return '/(?wordStart . $searchTerm . $this->wordEnd . '/S'; } + /** + * Gets the (cached) regex for the link value. + * + * Depending on the $config->capitalLinks setting, the title has to be + * searched for either in a strictly case-sensitive way, or in a 'fuzzy' way + * where the first letter of the title may be either case. + * + * @return String regular expression pattern for the link value. + */ + public function getCaseSensitiveLinkValueRegex() { + if ( $this->caseSensitiveLinkValueRegex === null ) { + $regexSafeTitle = $this->getRegexSafeTitle(); + if ( $this->config->capitalLinks && ( $regexSafeTitle[0] != '\\' )) { + $this->caseSensitiveLinkValueRegex = '((?i)' . $regexSafeTitle[0] . '(?-i)' . substr($regexSafeTitle, 1) . ')'; + } else { + $this->caseSensitiveLinkValueRegex = '(' . $regexSafeTitle . ')'; + } + } + return $this->caseSensitiveLinkValueRegex; + } + /** * Returns the \Content of the target page. * diff --git a/tests/phpunit/LinkerTest.php b/tests/phpunit/LinkerTest.php index b47a86f..108e669 100644 --- a/tests/phpunit/LinkerTest.php +++ b/tests/phpunit/LinkerTest.php @@ -1,5 +1,13 @@ setMwGlobals( 'wgCapitalLinks', $capitalLinks ); $config = new LinkTitles\Config(); + $config->firstOnly = false; $config->smartMode = $smartMode; $linker = new LinkTitles\Linker( $config ); $this->assertSame( $expectedOutput, $linker->linkContent( $this->title, $input )); } - public static function provideLinkContentSmartModeData() { + public function provideLinkContentSmartModeData() { return [ [ true, // wgCapitalLinks @@ -86,4 +95,39 @@ class LinkTitlesLinkerTest extends LinkTitles\TestCase { ], ]; } + + /** + * @dataProvider provideLinkContentFirstOnlyData + */ + public function testLinkContentFirstOnly( $firstOnly, $input, $expectedOutput ) { + $config = new LinkTitles\Config(); + $config->firstOnly = $firstOnly; + $linker = new LinkTitles\Linker( $config ); + $this->assertSame( $expectedOutput, $linker->linkContent( $this->title, $input )); + } + + public function provideLinkContentFirstOnlyData() { + return [ + [ + false, // firstOnly + 'With firstOnly = false, link target is a link target multiple times', + 'With firstOnly = false, [[link target]] is a [[link target]] multiple times' + ], + [ + false, // firstOnly + 'With firstOnly = false, [[link target]] is a link target multiple times', + 'With firstOnly = false, [[link target]] is a [[link target]] multiple times' + ], + [ + true, // firstOnly + 'With firstOnly = true, link target is a link target only once', + 'With firstOnly = true, [[link target]] is a link target only once' + ], + [ + true, // firstOnly + 'With firstOnly = true, [[link target]] is a link target only once', + 'With firstOnly = true, [[link target]] is a link target only once' + ], + ]; + } }