Add Source class, refactor.

This commit is contained in:
Daniel Kraus
2017-08-31 20:37:49 +02:00
parent 4a92321895
commit 256feb50d9
8 changed files with 392 additions and 110 deletions

View File

@ -29,13 +29,13 @@
"LinkTitlesWordStartOnly": true,
"LinkTitlesWordEndOnly": true,
"LinkTitlesSpecialPageReloadAfter": 1,
"LinkTitlesNamespaces": [
0
]
"LinkTitlesSourceNamespaces": [],
"LinkTitlesTargetNamespaces": []
},
"AutoloadClasses": {
"LinkTitles\\Extension": "includes/Extension.php",
"LinkTitles\\Linker": "includes/Linker.php",
"LinkTitles\\Source": "includes/Source.php",
"LinkTitles\\Target": "includes/Target.php",
"LinkTitles\\Targets": "includes/Targets.php",
"LinkTitles\\Splitter": "includes/Splitter.php",

View File

@ -69,10 +69,18 @@ class Config {
public $blackList;
/**
* Array of those name spaces (integer constants) whose pages may be linked.
* @var Array $namespaces
* Array of those namespaces (integer constants) whose pages may be linked
* when edited.
* @var Array $sourceNamespaces
*/
public $namespaces;
public $sourceNamespaces;
/**
* Array of those namespaces (integer constants) whose pages may be linked
* to a source page.
* @var Array $targetNamespaces
*/
public $targetNamespaces;
/**
* Indicates whether to add a link to the first occurrence of a page title
@ -170,7 +178,8 @@ class Config {
global $wgLinkTitlesPreferShortTitles;
global $wgLinkTitlesMinimumTitleLength;
global $wgLinkTitlesBlackList;
global $wgLinkTitlesNamespaces;
global $wgLinkTitlesSourceNamespaces;
global $wgLinkTitlesTargetNamespaces;
global $wgLinkTitlesFirstOnly;
global $wgLinkTitlesSmartMode;
global $wgCapitalLinks;
@ -186,7 +195,8 @@ class Config {
$this->preferShortTitles = $wgLinkTitlesPreferShortTitles;
$this->minimumTitleLength = $wgLinkTitlesMinimumTitleLength;
$this->blackList = $wgLinkTitlesBlackList;
$this->namespaces = $wgLinkTitlesNamespaces;
$this->sourceNamespaces = $wgLinkTitlesSourceNamespaces ? $wgLinkTitlesSourceNamespaces : [ NS_MAIN ];
$this->targetNamespaces = $wgLinkTitlesTargetNamespaces;
$this->firstOnly = $wgLinkTitlesFirstOnly;
$this->smartMode = $wgLinkTitlesSmartMode;
$this->capitalLinks = $wgCapitalLinks; // MediaWiki global variable

View File

@ -28,69 +28,60 @@ namespace LinkTitles;
*/
class Extension {
/// Event handler that is hooked to the PageContentSave event.
/**
* Event handler for the PageContentSave hook.
*
* This handler is used if the parseOnEdit configuration option is set.
*/
public static function onPageContentSave( &$wikiPage, &$user, &$content, &$summary,
$isMinor, $isWatch, $section, &$flags, &$status ) {
$config = new Config();
if ( !$config->parseOnEdit || $isMinor ) return true;
$title = $wikiPage->getTitle();
// Only process if page is in one of our namespaces we want to link
// Fixes ugly autolinking of sidebar pages
if ( in_array( $title->getNamespace(), $config->namespaces )) {
$text = $content->getContentHandler()->serializeContent( $content );
if ( !\MagicWord::get( 'MAG_LINKTITLES_NOAUTOLINKS' )->match( $text ) ) {
$linker = new Linker( $config );
$newText = $linker->linkContent( $title, $text );
if ( $newText != $text ) {
$content = $content->getContentHandler()->unserializeContent( $newText );
}
}
};
return true;
}
/*
* Event handler that is hooked to the InternalParseBeforeLinks event.
* @param Parser $parser Parser that raised the event.
* @param $text Preprocessed text of the page
*/
public static function onInternalParseBeforeLinks( \Parser &$parser, &$text ) {
$config = new Config();
if ( !$config->parseOnRender ) return true;
$title = $parser->getTitle();
// If the page contains the magic word '__NOAUTOLINKS__', do not parse it.
// Only process if page is in one of our namespaces we want to link
if ( !\MagicWord::get( 'MAG_LINKTITLES_NOAUTOLINKS' )->match( $text ) &&
in_array( $title->getNamespace(), $config->namespaces ) ) {
$linker = new Linker( $config );
$text = $linker->linkContent( $title, $text );
$source = Source::createFromPageandContent( $wikiPage, $content, $config );
$linker = new Linker( $config );
$result = $linker->linkContent( $source );
if ( $result ) {
$content = $source->setText( $result );
}
return true;
}
/*
* Automatically processes a single page, given a $title Title object.
* This function is called by the SpecialLinkTitles class and the
* LinkTitlesJob class.
* @param Title $title Title object.
* @param RequestContext $context Current request context. If in doubt, call MediaWiki's `RequestContext::getMain()` to obtain such an object.
* @returns bool True if the page exists, false if the page does not exist
* Event handler for the InternalParseBeforeLinks hook.
*
* This handler is used if the parseOnRender configuration option is set.
*/
public static function onInternalParseBeforeLinks( \Parser &$parser, &$text ) {
$config = new Config();
if ( !$config->parseOnRender ) return true;
$title = $parser->getTitle();
$source = Source::createFromParserAndText( $parser, $text, $config );
$linker = new Linker( $config );
$result = $linker->linkContent( $source );
if ( $result ) {
$text = $result;
}
return true;
}
/**
* Adds links to a single page.
*
* Entry point for the SpecialLinkTitles class and the LinkTitlesJob class.
*
* @param \Title $title Title object.
* @param \RequestContext $context Current request context. If in doubt, call MediaWiki's `RequestContext::getMain()` to obtain such an object.
* @return bool True if the page exists, false if the page does not exist
*/
public static function processPage( \Title $title, \RequestContext $context ) {
$page = \WikiPage::factory( $title );
$content = $page->getContent();
if ( $content != null ) {
$text = $content->getContentHandler()->serializeContent( $content );
$config = new Config();
$config = new Config();
$source = Source::createFromTitle( $title, $config );
if ( $source->hasContent ) {
$linker = new Linker( $config );
$newText = $linker->linkContent( $title, $text );
if ( $text != $newText ) {
$content = $content->getContentHandler()->unserializeContent( $newText );
$page->doEditContent(
$result = $linker->linkContent( $source );
if ( $result ) {
$content = $source->getContent()->getContentHandler()->unserializeContent( $result );
$source->getPage()->doEditContent(
$content,
"Links to existing pages added by LinkTitles bot.", // TODO: i18n
EDIT_MINOR | EDIT_FORCE_BOT,
@ -105,39 +96,53 @@ class Extension {
}
}
/// Adds the two magic words defined by this extension to the list of
/// 'double-underscore' terms that are automatically removed before a
/// page is displayed.
/// @param $doubleUnderscoreIDs Array of magic word IDs.
/// @return true
/*
* Adds the two magic words defined by this extension to the list of
* 'double-underscore' terms that are automatically removed before a
* page is displayed.
*
* @param Array $doubleUnderscoreIDs Array of magic word IDs.
* @return true
*/
public static function onGetDoubleUnderscoreIDs( array &$doubleUnderscoreIDs ) {
$doubleUnderscoreIDs[] = 'MAG_LINKTITLES_NOTARGET';
$doubleUnderscoreIDs[] = 'MAG_LINKTITLES_NOAUTOLINKS';
return true;
}
/**
* Handles the ParserFirstCallInit hook and adds the <autolink>/</noautolink>
* tags.
*/
public static function onParserFirstCallInit( \Parser $parser ) {
$parser->setHook( 'noautolinks', 'LinkTitles\Extension::doNoautolinksTag' );
$parser->setHook( 'autolinks', 'LinkTitles\Extension::doAutolinksTag' );
}
/// Removes the extra tag that this extension provides (<noautolinks>)
/// by simply returning the text between the tags (if any).
/// See https://www.mediawiki.org/wiki/Manual:Tag_extensions#Example
/*
* Removes the extra tag that this extension provides (<noautolinks>)
* by simply returning the text between the tags (if any).
* See https://www.mediawiki.org/wiki/Manual:Tag_extensions#Example
*/
public static function doNoautolinksTag( $input, array $args, \Parser $parser, \PPFrame $frame ) {
return htmlspecialchars( $input );
}
/// Removes the extra tag that this extension provides (<noautolinks>)
/// by simply returning the text between the tags (if any).
/// See https://www.mediawiki.org/wiki/Manual:Tag_extensions#How_do_I_render_wikitext_in_my_extension.3F
/*
* Removes the extra tag that this extension provides (<noautolinks>)
* by simply returning the text between the tags (if any).
* See https://www.mediawiki.org/wiki/Manual:Tag_extensions#How_do_I_render_wikitext_in_my_extension.3F
*/
public static function doAutolinksTag( $input, array $args, \Parser $parser, \PPFrame $frame ) {
$config = new Config();
$linker = new Linker( $config );
$title = $parser->getTitle();
$withLinks = $linker->linkContent( $title, $input );
$output = $parser->recursiveTagParse( $withLinks, $frame );
return $output;
$source = Source::createFromParser( $parser, $config );
$result = $linker->linkContent( $source );
if ( $result ) {
return $parser->recursiveTagParse( $result, $frame );
} else {
return $parser->recursiveTagParse( $input, $frame );
}
}
}

View File

@ -65,16 +65,19 @@ class Linker {
*
* @param \Title &$title Title object for the current page.
* @param String $text String that holds the article content
* @return String with links to target pages
* @return String|null Source page text with links to target pages, or null if no links were added
*/
public function linkContent( \Title &$title, &$text ) {
public function linkContent( Source $source ) {
if ( !$source->canBeLinked() ) {
return;
}
( $this->config->firstOnly ) ? $limit = 1 : $limit = -1;
$limitReached = false;
$newText = $text;
$newLinks = false; // whether or not new links were added
$newText = $source->getText();
$splitter = Splitter::default( $this->config );
$targets = Targets::default( $title, $this->config );
$targets = Targets::default( $source->getTitle(), $this->config );
// Iterate through the target page titles
foreach( $targets->queryResult as $row ) {
@ -83,7 +86,7 @@ class Linker {
// Don't link current page and don't link if the target page redirects
// to the current page or has the __NOAUTOLINKTARGET__ magic word
// (as required by the actual LinkTitles configuration).
if ( $target->isSameTitle( $title ) || !$target->mayLinkTo( $title ) ) {
if ( $target->isSameTitle( $source ) || !$target->mayLinkTo( $source ) ) {
continue;
}
@ -91,7 +94,7 @@ class Linker {
// A link to the current page should only be recognized if it appears in
// clear text, i.e. we do not count piped links as existing links.
// (Similarly, by design, redirections should not be counted as existing links.)
if ( $limit == 1 && preg_match( '/[[' . $target->getCaseSensitiveLinkValueRegex() . ']]/' , $text ) ) {
if ( $limit == 1 && preg_match( '/[[' . $target->getCaseSensitiveLinkValueRegex() . ']]/' , $source->getText() ) ) {
continue;
}
@ -114,7 +117,10 @@ class Linker {
break;
};
};
$newText = implode( '', $arr );
if ( $count > 0 ) {
$newLinks = true;
$newText = implode( '', $arr );
}
// If smart mode is turned on, the extension will perform a second
// pass on the page and add links with aliases where the case does
@ -134,11 +140,16 @@ class Linker {
break;
};
};
$newText = implode( '', $arr );
if ( $count > 0 ) {
$newLinks = true;
$newText = implode( '', $arr );
}
} // $wgLinkTitlesSmartMode
}; // foreach $res as $row
return $newText;
if ( $newLinks ) {
return $newText;
}
}
/**

245
includes/Source.php Normal file
View File

@ -0,0 +1,245 @@
<?php
/**
* The LinkTitles\Source represents a Wiki page to which links may be added.
*
* Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*
* @author Daniel Kraus <bovender@bovender.de>
*/
namespace LinkTitles;
/**
* Represents a page that is a potential link target.
*/
class Source {
/**
* The LinKTitles configuration for this Source.
*
* @var Config $config
*/
public $config;
private $title;
private $text;
private $page;
private $content;
/**
* Creates a Source object from a \Title.
* @param \Title $title Title object from which to create the Source.
* @return Source Source object created from the title.
*/
public static function createFromTitle( \Title $title, Config $config ) {
$source = new Source( $config );
$source->title = $title;
return $source;
}
/**
* Creates a Source object with a given Title and a text.
*
* This factory can be called e.g. from a onPageContentSave event handler
* which knows both these parameters.
*
* @param \Title $title Title of the source page
* @param String $text String representation of the page content
* @param Config $config LinkTitles configuration
* @return Source Source object created from the title and the text
*/
public static function createFromTitleAndText( \Title $title, $text, Config $config ) {
$source = Source::createFromTitle( $title, $config);
$source->text = $text;
return $source;
}
/**
* Creates a Source object with a given WikiPage and a Content.
*
* This factory can be called e.g. from an onPageContentSave event handler
* which knows both these parameters.
*
* @param \WikiPage $page WikiPage to link from
* @param \Content $content Page content
* @param Config $config LinkTitles configuration
* @return Source Source object created from the title and the text
*/
public static function createFromPageandContent( \WikiPage $page, \Content $content, Config $config ) {
$source = new Source( $config );
$source->page = $page;
$source->content = $content;
return $source;
}
/**
* Creates a Source object with a given Parser.
*
* @param \Parser $parser Parser object from which to create the Source.
* @param Config $config LinKTitles Configuration
* @return Source Source object created from the parser and the text.
*/
public static function createFromParser( \Parser $parser, Config $config ) {
$source = new Source( $config );
$source->title = $parser->getTitle();
return $source;
}
/**
* Creates a Source object with a given Parser and text.
*
* This factory can be called e.g. from an onInternalParseBeforeLinks event
* handler which knows these parameters.
*
* @param \Parser $parser Parser object from which to create the Source.
* @param String $text String representation of the page content.
* @param Config $config LinKTitles Configuration
* @return Source Source object created from the parser and the text.
*/
public static function createFromParserAndText( \Parser $parser, $text, Config $config ) {
$source = Source::createFromParser( $parser, $config );
$source->text = $text;
return $source;
}
/**
* Private constructor. Use one of the factories to created a Source object.
* @param Config $config LinkTitles configuration
*/
private function __construct( Config $config) {
$this->config = $config;
}
/**
* Determines whether or not this page may be linked to.
* @return [type] [description]
*/
public function canBeLinked() {
return $this->hasDesiredNamespace() && !$this->hasNoAutolinksMagicWord();
}
/**
* Determines whether the Source is in a desired namespace, i.e. a namespace
* that is listed in the sourceNamespaces config setting or is NS_MAIN.
* @return boolean True if the Source is in a 'good' namespace.
*/
public function hasDesiredNamespace() {
return in_array( $this->getTitle()->getNamespace(), $this->config->sourceNamespaces );
}
/**
* Determines whether the source page contains the __NOAUTOLINKS__ magic word.
*
* @return boolean True if the page contains the __NOAUTOLINKS__ magic word.
*/
public function hasNoAutolinksMagicWord() {
return \MagicWord::get( 'MAG_LINKTITLES_NOAUTOLINKS' )->match( $this->getText() );
}
/**
* Gets the title.
*
* @return \Title Title of the source page.
*/
public function getTitle() {
if ( $this->title === null ) {
// Access the property directly to avoid an infinite loop.
if ( $this->page != null) {
$this->title = $this->page->getTitle();
} else {
throw new Exception( 'Unable to create Title for this Source because Page is null.' );
}
}
return $this->title;
}
/**
* Gets the namespace of the source Title.
* @return integer namespace index.
*/
public function getNamespace() {
return $this->getTitle()->getNamespace();
}
/**
* Gets the Content object for the source page.
*
* The value is cached.
*
* @return \Content Content object.
*/
public function getContent() {
if ( $this->content === null ) {
$this->content = $this->getPage()->getContent();
}
return $this->content;
}
/**
* Determines whether the source page has content.
*
* @return boolean True if the source page has content.
*/
public function hasContent() {
return $this->content != null;
}
/**
* Gets the text of the corresponding Wiki page.
*
* The value is cached.
*
* @return String Text of the Wiki page.
*/
public function getText() {
if ( $this->text === null ) {
$content = $this->getContent();
$this->text = $content->getContentHandler()->serializeContent( $content );
}
return $this->text;
}
/**
* Unserializes text to the page's content.
*
* @param String $text Text to unserialize.
* @return \Content The source's updated content object.
*/
public function setText( $text ) {
$this->content = $this->content->getContentHandler()->unserializeContent( $text );
$this->text = $text;
return $this->content;
}
/**
* Returns the source page object.
* @return \WikiPage WikiPage for the source title.
*/
public function getPage() {
if ( $this->page === null ) {
// Access the property directly to avoid an infinite loop.
if ( $this->title != null) {
$this->page = \WikiPage::factory( $this->title );
} else {
throw new Exception( 'Unable to create Page for this Source because Title is null.' );
}
}
return $this->page;
}
}

View File

@ -197,17 +197,16 @@ class Target {
* and whether the target page is a redirect or contains the
* __NOAUTOLINKTARGET__ magic word.
*
* @param \Title $fromTitle
*
* @param Source source
* @return boolean
*/
public function mayLinkTo( \Title $fromTitle ) {
public function mayLinkTo( Source $source ) {
// If checking for redirects is enabled and the target page does
// indeed redirect to the current page, return the page title as-is
// (unlinked).
if ( $this->config->checkRedirect ) {
$redirectTitle = $this->getContent()->getUltimateRedirectTarget();
if ( $redirectTitle && $redirectTitle->equals( $fromTitle ) ) {
if ( $redirectTitle && $redirectTitle->equals( $source->getTitle() ) ) {
return false;
}
};
@ -224,10 +223,10 @@ class Target {
/**
* Determines if the Target's title is the same as another title.
* @param Title $otherTitle Other title
* @return boolean True if the $otherTitle is the same, false if not.
* @param Source $source Source object.
* @return boolean True if the $otherTitle is the same, false if not.
*/
public function isSameTitle( \Title $otherTitle) {
return $this->title->equals( $otherTitle );
public function isSameTitle( Source $source) {
return $this->title->equals( $source->getTitle() );
}
}

View File

@ -33,15 +33,15 @@ class Targets {
* Singleton factory that returns a (cached) database query results with
* potential target page titles.
*
* The subset of pages that may serve as target pages depends on the
* name space of the source page. Therefore, if the $namespace differs from
* the cached name space, the database is queried again.
* The subset of pages that may serve as target pages depends on the namespace
* of the source page. Therefore, if the $sourceNamespace differs from the
* cached namespace, the database is queried again.
*
* @param String $namespace The namespace of the current page.
* @param String $sourceNamespace The namespace of the current page.
* @param Config $config LinkTitles configuration.
*/
public static function default( \Title $title, Config $config ) {
if ( ( self::$instance === null ) || ( self::$instance->namespace != $title->getNamespace() ) ) {
if ( ( self::$instance === null ) || ( self::$instance->sourceNamespace != $title->getNamespace() ) ) {
self::$instance = new Targets( $title, $config );
}
return self::$instance;
@ -66,10 +66,11 @@ class Targets {
public $queryResult;
/**
* Holds the name space (integer) for which the list of target pages was built.
* @var Int $namespace
* Holds the source page's namespace (integer) for which the list of target
* pages was built.
* @var Int $sourceNamespace
*/
public $namespace;
public $sourceNamespace;
private $config;
@ -79,7 +80,7 @@ class Targets {
*/
private function __construct( \Title $title, Config $config) {
$this->config = $config;
$this->namespace = $title->getNamespace();
$this->sourceNamespace = $title->getNamespace();
$this->fetch();
}
@ -101,8 +102,8 @@ class Targets {
}
// Build our weight list. Make sure current namespace is first element
$namespaces = array_diff( $this->config->namespaces, [ $this->namespace ] );
array_unshift( $namespaces, $this->namespace );
$namespaces = array_diff( $this->config->targetNamespaces, [ $this->sourceNamespace ] );
array_unshift( $namespaces, $this->sourceNamespace );
// No need for sanitiy check. we are sure that we have at least one element in the array
$weightSelect = "CASE page_namespace ";

View File

@ -38,17 +38,16 @@ class LinkTitlesLinkerTest extends LinkTitles\TestCase {
protected $title;
protected function setUp() {
$this->title = $this->insertPage( 'source page', 'This page is the test page' )['title'];
parent::setUp(); // call last to have the Targets object invalidated after inserting the page
}
public function addDBDataOnce() {
public function addDBData() {
$this->title = $this->insertPage( 'source page', 'This page is the test page' )['title'];
$this->insertPage( 'link target', 'This page serves as a link target' );
parent::addDBDataOnce(); // call parent after adding page to have targets invalidated
}
/**
* @dataProvider provideLinkContentTemplatesData
*/
@ -57,8 +56,11 @@ class LinkTitlesLinkerTest extends LinkTitles\TestCase {
$config->firstOnly = false;
$config->skipTemplates = $skipTemplates;
LinkTitles\Splitter::invalidate();
$source = LinkTitles\Source::createFromTitleAndText( $this->title, $input, $config );
$linker = new LinkTitles\Linker( $config );
$this->assertSame( $expectedOutput, $linker->linkContent( $this->title, $input ));
$result = $linker->linkContent( $source );
if ( !$result ) { $result = $input; }
$this->assertSame( $expectedOutput, $result );
}
public function provideLinkContentTemplatesData() {
@ -91,7 +93,10 @@ class LinkTitlesLinkerTest extends LinkTitles\TestCase {
$config->firstOnly = false;
$config->smartMode = $smartMode;
$linker = new LinkTitles\Linker( $config );
$this->assertSame( $expectedOutput, $linker->linkContent( $this->title, $input ));
$source = LinkTitles\Source::createFromTitleAndText( $this->title, $input, $config );
$result = $linker->linkContent( $source );
if ( !$result ) { $result = $input; }
$this->assertSame( $expectedOutput, $result );
}
public function provideLinkContentSmartModeData() {
@ -166,7 +171,10 @@ class LinkTitlesLinkerTest extends LinkTitles\TestCase {
$config = new LinkTitles\Config();
$config->firstOnly = $firstOnly;
$linker = new LinkTitles\Linker( $config );
$this->assertSame( $expectedOutput, $linker->linkContent( $this->title, $input ));
$source = LinkTitles\Source::createFromTitleAndText( $this->title, $input, $config );
$result = $linker->linkContent( $source );
if ( !$result ) { $result = $input; }
$this->assertSame( $expectedOutput, $result );
}
public function provideLinkContentFirstOnlyData() {
@ -200,7 +208,10 @@ class LinkTitlesLinkerTest extends LinkTitles\TestCase {
LinkTitles\Targets::invalidate();
$linker = new LinkTitles\Linker( $config );
$text = 'If the link target is blacklisted, it should not be linked';
$this->assertSame( $text, $linker->linkContent( $this->title, $text ) );
$source = LinkTitles\Source::createFromTitleAndText( $this->title, $text, $config );
$result = $linker->linkContent( $source );
if ( !$result ) { $result = $text; }
$this->assertSame( $text, $result );
}
// Tests for namespace handling are commented out until I find a way to add