mirror of
https://github.com/diocloid/LinkTitles.git
synced 2025-07-13 09:49:31 +02:00
Merge branch 'unit-tests' into develop
This commit is contained in:
1
.atomignore
Normal file
1
.atomignore
Normal file
@ -0,0 +1 @@
|
||||
gh-pages/
|
63
README.md
63
README.md
@ -31,3 +31,66 @@ Contributors
|
||||
- Daniel Kraus (@bovender), main developer
|
||||
- Ulrich Strauss (@c0nnex), namespaces
|
||||
- Brent Laabs (@labster), code review and bug fixes
|
||||
|
||||
|
||||
Testing
|
||||
-------
|
||||
|
||||
Starting from version 4.2.0, LinkTitles finally comes with phpunit tests.
|
||||
|
||||
Here's how I set up the testing environment. This may not be the canonical way
|
||||
to do it. Basic information on testing MediaWiki can be found [here](https://www.mediawiki.org/wiki/Manual:PHP_unit_testing).
|
||||
|
||||
The following assumes that you have an instance of MediaWiki running locally
|
||||
on your development machine. This assumes that you are running Linux (I personally
|
||||
use Ubuntu).
|
||||
|
||||
1. Pull the MediaWiki repository:
|
||||
|
||||
cd ~/Code
|
||||
git clone --depth 1 https://phabricator.wikimedia.org/source/mediawiki.git
|
||||
|
||||
2. Install [composer](https://getcomposer.org) locally and fetch the
|
||||
dependencies (including development dependencies):
|
||||
|
||||
Follow the instructions on the [composer download page](https://getcomposer.org/download),
|
||||
but instead of running `php composer-setup.php`, run:
|
||||
|
||||
php composer-setup.php --install-dir=bin --filename=composer
|
||||
bin/composer install
|
||||
|
||||
3. Install phpunit (it was already installed on my Ubuntu system when I began
|
||||
testing LinkTitles, so I leave it up to you to figure out how to do it).
|
||||
|
||||
4. Copy your `LocalSettings.php` over from your local MediaWiki installation
|
||||
and remove (or comment out) any lines that reference extensions or skins that
|
||||
you are not going to install to your test environment. For the purposes of
|
||||
testing the LinkTitles extension, leave the following line in place:
|
||||
|
||||
wfLoadExtensions( array( 'LinkTitles' ));
|
||||
|
||||
And ensure the settings file contains the following:
|
||||
|
||||
$wgShowDBErrorBacktrace = true;
|
||||
|
||||
5. Create a symbolic link to your copy of the LinkTitles repository:
|
||||
|
||||
cd ~/Code/mediawiki/extensions
|
||||
ln -s ~/Code/LinkTitles
|
||||
|
||||
6. Make sure your local MediaWiki instance is up to date. Otherwise phpunit may
|
||||
fail and tell you about database problems.
|
||||
|
||||
This is because the local database is used as a template for the unit tests.
|
||||
For example, I initially had MW 1.26 installed on my laptop, but the cloned
|
||||
repository was MW 1.29.1. It's probably also possible to clone the repository
|
||||
with a specific version tag which matches your local installation.
|
||||
|
||||
7. Run the tests:
|
||||
|
||||
cd ~/Code/mediawiki/tests/phpunit
|
||||
php phpunit.php --group bovender
|
||||
|
||||
This will run all tests from the 'bovender' group, i.e. tests for my extensions.
|
||||
If you linked just the LinkTitles extension in step 5, only this extension
|
||||
will be tested.
|
||||
|
@ -33,8 +33,14 @@
|
||||
]
|
||||
},
|
||||
"AutoloadClasses": {
|
||||
"LinkTitles\\Extension": "includes/LinkTitles_Extension.php",
|
||||
"LinkTitles\\Special": "includes/LinkTitles_Special.php"
|
||||
"LinkTitles\\Extension": "includes/Extension.php",
|
||||
"LinkTitles\\Linker": "includes/Linker.php",
|
||||
"LinkTitles\\Target": "includes/Target.php",
|
||||
"LinkTitles\\Targets": "includes/Targets.php",
|
||||
"LinkTitles\\Splitter": "includes/Splitter.php",
|
||||
"LinkTitles\\Config": "includes/Config.php",
|
||||
"LinkTitles\\Special": "includes/Special.php",
|
||||
"LinkTitles\\TestCase": "tests/phpunit/TestCase.php"
|
||||
},
|
||||
"SpecialPages": {
|
||||
"LinkTitles": "LinkTitles\\Special"
|
||||
@ -61,9 +67,8 @@
|
||||
"LinkTitles\\Extension::onParserFirstCallInit"
|
||||
]
|
||||
},
|
||||
"callback": "LinkTitles\\Extension::setup",
|
||||
"ExtensionMessagesFiles": {
|
||||
"LinkTitlesMagic": "includes/LinkTitles_Magic.php"
|
||||
"LinkTitlesMagic": "includes/Magic.php"
|
||||
},
|
||||
"MessagesDirs": {
|
||||
"LinkTitles": [
|
||||
|
196
includes/Config.php
Normal file
196
includes/Config.php
Normal file
@ -0,0 +1,196 @@
|
||||
<?php
|
||||
/**
|
||||
* The LinkTitles\Config class holds configuration for the LinkTitles extension.
|
||||
*
|
||||
* Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*
|
||||
* @author Daniel Kraus <bovender@bovender.de>
|
||||
*/
|
||||
namespace LinkTitles;
|
||||
|
||||
/**
|
||||
* Holds LinkTitles configuration.
|
||||
*
|
||||
* This class encapsulates the global configuration variables so we do not have
|
||||
* to pull those globals into scope in the individual LinkTitles classes.
|
||||
*
|
||||
* Using a dedicated configuration class also facilitates overriding certain
|
||||
* options, i.e. in a maintenance script that is invoked with flags from the
|
||||
* command line.
|
||||
*
|
||||
* @since 5.0.0
|
||||
*/
|
||||
class Config {
|
||||
/**
|
||||
* Whether to add links to a page when the page is edited/saved.
|
||||
* @var bool $parseOnEdit
|
||||
*/
|
||||
public $parseOnEdit;
|
||||
|
||||
/**
|
||||
* Whether to add links to a page when the page is rendered.
|
||||
* @var bool $parseOnRender
|
||||
*/
|
||||
public $parseOnRender;
|
||||
|
||||
/**
|
||||
* Indicates whether to prioritize short over long titles.
|
||||
* @var bool $preferShortTitles
|
||||
*/
|
||||
public $preferShortTitles;
|
||||
|
||||
/**
|
||||
* Minimum length of a page title for it to qualify as a potential link target.
|
||||
* @var int $minimumTitleLength
|
||||
*/
|
||||
public $minimumTitleLength;
|
||||
|
||||
/**
|
||||
* Array of page titles that must never be link targets.
|
||||
*
|
||||
* This may be useful to exclude common abbreviations or acronyms from
|
||||
* automatic linking.
|
||||
* @var Array $blackList
|
||||
*/
|
||||
public $blackList;
|
||||
|
||||
/**
|
||||
* Array of those name spaces (integer constants) whose pages may be linked.
|
||||
* @var Array $nameSpaces
|
||||
*/
|
||||
public $nameSpaces;
|
||||
|
||||
/**
|
||||
* Indicates whether to add a link to the first occurrence of a page title
|
||||
* only (true), or add links to all occurrences on the source page (false).
|
||||
* @var bool $firstOnly;
|
||||
*/
|
||||
public $firstOnly;
|
||||
|
||||
/**
|
||||
* Indicates whether to operate in smart mode, i.e. link to pages even if the
|
||||
* case does not match. Without smart mode, pages are linked to only if the
|
||||
* exact title appears on the source page.
|
||||
* @var bool $smartMode;
|
||||
*/
|
||||
public $smartMode;
|
||||
|
||||
/**
|
||||
* Mirrors the global MediaWiki variable $wgCapitalLinks that indicates
|
||||
* whether or not page titles are fully case sensitive
|
||||
* @var bool $capitalLinks;
|
||||
*/
|
||||
public $capitalLinks;
|
||||
|
||||
/**
|
||||
* Whether or not to link to pages only if the page title appears at the
|
||||
* start of a word on the target page (i.e., link 'MediaWiki' to a page
|
||||
* 'Media', but not to a page 'Wiki').
|
||||
*
|
||||
* Set both $wordStartOnly and $wordEndOnly to true to enforce matching
|
||||
* whole titles.
|
||||
*
|
||||
* @var bool $wordStartOnly;
|
||||
*/
|
||||
public $wordStartOnly;
|
||||
|
||||
/**
|
||||
* Whether or not to link to pages only if the page title appears at the
|
||||
* end of a word on the target page (i.e., link 'MediaWiki' to a page
|
||||
* 'Wiki', but not to a page 'Media').
|
||||
*
|
||||
* Set both $wordStartOnly and $wordEndOnly to true to enforce matching
|
||||
* whole titles.
|
||||
*
|
||||
* @var bool $wordEndOnly;
|
||||
*/
|
||||
public $wordEndOnly;
|
||||
|
||||
/**
|
||||
* Whether or not to skip templates. If set to true, text inside transclusions
|
||||
* will not be linked.
|
||||
* @var bool $skipTemplates
|
||||
*/
|
||||
public $skipTemplates;
|
||||
|
||||
/**
|
||||
* Whether or not to parse headings.
|
||||
* @var bool $parseHeadings
|
||||
*/
|
||||
public $parseHeadings;
|
||||
|
||||
/**
|
||||
* Whether to check if a potential target page links back to the source page.
|
||||
* Set this to true to avoid indirect linkbacks.
|
||||
*
|
||||
* @var bool $checkRedirect
|
||||
*/
|
||||
public $checkRedirect;
|
||||
|
||||
/**
|
||||
* Whether to enable the __NOAUTOLINKTARGET__ magic word which prevents
|
||||
* a potential target page from being linked to.
|
||||
*
|
||||
* @var bool $enableNoTargetMagicWord
|
||||
*/
|
||||
public $enableNoTargetMagicWord;
|
||||
|
||||
public $enableConsoleOutput;
|
||||
public $enableDebugConsoleOutput;
|
||||
|
||||
/**
|
||||
* Constructs a new Config object.
|
||||
*
|
||||
* The object's member variables will automatically be set with the values
|
||||
* from the corresponding global variables.
|
||||
*/
|
||||
public function __construct() {
|
||||
global $wgLinkTitlesParseOnEdit;
|
||||
global $wgLinkTitlesParseOnRender;
|
||||
global $wgLinkTitlesPreferShortTitles;
|
||||
global $wgLinkTitlesMinimumTitleLength;
|
||||
global $wgLinkTitlesBlackList;
|
||||
global $wgLinkTitlesNamespaces;
|
||||
global $wgLinkTitlesFirstOnly;
|
||||
global $wgLinkTitlesSmartMode;
|
||||
global $wgCapitalLinks;
|
||||
global $wgLinkTitlesWordStartOnly;
|
||||
global $wgLinkTitlesWordEndOnly;
|
||||
global $wgLinkTitlesSkipTemplates;
|
||||
global $wgLinkTitlesParseHeadings;
|
||||
global $wgLinkTitlesEnableNoTargetMagicWord;
|
||||
global $wgLinkTitlesCheckRedirect;
|
||||
$this->parseOnEdit = $wgLinkTitlesParseOnEdit;
|
||||
$this->parseOnRender = $wgLinkTitlesParseOnRender;
|
||||
$this->preferShortTitles = $wgLinkTitlesPreferShortTitles;
|
||||
$this->minimumTitleLength = $wgLinkTitlesMinimumTitleLength;
|
||||
$this->blackList = $wgLinkTitlesBlackList;
|
||||
$this->nameSpaces = $wgLinkTitlesNamespaces;
|
||||
$this->firstOnly = $wgLinkTitlesFirstOnly;
|
||||
$this->smartMode = $wgLinkTitlesSmartMode;
|
||||
$this->capitalLinks = $wgCapitalLinks; // MediaWiki global variable
|
||||
$this->wordStartOnly = $wgLinkTitlesWordStartOnly;
|
||||
$this->wordEndOnly = $wgLinkTitlesWordEndOnly;
|
||||
$this->skipTemplates = $wgLinkTitlesSkipTemplates;
|
||||
$this->parseHeadings = $wgLinkTitlesParseHeadings;
|
||||
$this->enableNoTargetMagicWord = $wgLinkTitlesEnableNoTargetMagicWord;;
|
||||
$this->checkRedirect = $wgLinkTitlesCheckRedirect;;
|
||||
$this->enableConsoleOutput = false;
|
||||
$this->enableDebugConsoleOutput = false;
|
||||
}
|
||||
|
||||
}
|
147
includes/Extension.php
Normal file
147
includes/Extension.php
Normal file
@ -0,0 +1,147 @@
|
||||
<?php
|
||||
/**
|
||||
* The LinkTitles\Extension class provides event handlers and entry points for the extension.
|
||||
*
|
||||
* Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*
|
||||
* @author Daniel Kraus <bovender@bovender.de>
|
||||
*/
|
||||
namespace LinkTitles;
|
||||
|
||||
/**
|
||||
* Provides event handlers and entry points for the extension.
|
||||
*/
|
||||
class Extension {
|
||||
|
||||
/// Event handler that is hooked to the PageContentSave event.
|
||||
public static function onPageContentSave( &$wikiPage, &$user, &$content, &$summary,
|
||||
$isMinor, $isWatch, $section, &$flags, &$status ) {
|
||||
global $wgLinkTitlesParseOnEdit;
|
||||
global $wgLinkTitlesNamespaces;
|
||||
if ( !$wgLinkTitlesParseOnEdit ) return true; // TODO: refactor with following if
|
||||
|
||||
if ( !$isMinor ) {
|
||||
$title = $wikiPage->getTitle();
|
||||
|
||||
// Only process if page is in one of our namespaces we want to link
|
||||
// Fixes ugly autolinking of sidebar pages
|
||||
if ( in_array( $title->getNamespace(), $wgLinkTitlesNamespaces )) {
|
||||
$text = $content->getContentHandler()->serializeContent( $content );
|
||||
if ( !\MagicWord::get( 'MAG_LINKTITLES_NOAUTOLINKS' )->match( $text ) ) {
|
||||
$config = new Config();
|
||||
$linker = new Linker( $config );
|
||||
$newText = $linker->linkContent( $title, $text );
|
||||
if ( $newText != $text ) {
|
||||
$content = $content->getContentHandler()->unserializeContent( $newText );
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Event handler that is hooked to the InternalParseBeforeLinks event.
|
||||
* @param Parser $parser Parser that raised the event.
|
||||
* @param $text Preprocessed text of the page
|
||||
*/
|
||||
public static function onInternalParseBeforeLinks( \Parser &$parser, &$text ) {
|
||||
$config = new Config();
|
||||
if (!$config->parseOnRender) return true;
|
||||
$title = $parser->getTitle();
|
||||
|
||||
// If the page contains the magic word '__NOAUTOLINKS__', do not parse it.
|
||||
// Only process if page is in one of our namespaces we want to link
|
||||
if ( !\MagicWord::get( 'MAG_LINKTITLES_NOAUTOLINKS' )->match( $text ) &&
|
||||
in_array( $title->getNamespace(), $config->nameSpaces ) ) {
|
||||
$linker = new Linker( $config );
|
||||
$text = $linker->linkContent( $title, $text );
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Automatically processes a single page, given a $title Title object.
|
||||
* This function is called by the SpecialLinkTitles class and the
|
||||
* LinkTitlesJob class.
|
||||
* @param Title $title Title object.
|
||||
* @param RequestContext $context Current request context. If in doubt, call MediaWiki's `RequestContext::getMain()` to obtain such an object.
|
||||
* @returns bool True if the page exists, false if the page does not exist
|
||||
*/
|
||||
public static function processPage( \Title $title, \RequestContext $context ) {
|
||||
$page = \WikiPage::factory($title);
|
||||
$content = $page->getContent();
|
||||
if ( $content != null ) {
|
||||
$text = $content->getContentHandler()->serializeContent($content);
|
||||
$config = new Config();
|
||||
$linker = new Linker( $config );
|
||||
$newText = $linker->linkContent($title, $text);
|
||||
if ( $text != $newText ) {
|
||||
$content = $content->getContentHandler()->unserializeContent( $newText );
|
||||
$page->doEditContent(
|
||||
$content,
|
||||
"Links to existing pages added by LinkTitles bot.", // TODO: i18n
|
||||
EDIT_MINOR | EDIT_FORCE_BOT,
|
||||
false, // baseRevId
|
||||
$context->getUser()
|
||||
);
|
||||
};
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the two magic words defined by this extension to the list of
|
||||
/// 'double-underscore' terms that are automatically removed before a
|
||||
/// page is displayed.
|
||||
/// @param $doubleUnderscoreIDs Array of magic word IDs.
|
||||
/// @return true
|
||||
public static function onGetDoubleUnderscoreIDs( array &$doubleUnderscoreIDs ) {
|
||||
$doubleUnderscoreIDs[] = 'MAG_LINKTITLES_NOTARGET';
|
||||
$doubleUnderscoreIDs[] = 'MAG_LINKTITLES_NOAUTOLINKS';
|
||||
return true;
|
||||
}
|
||||
|
||||
public static function onParserFirstCallInit( \Parser $parser ) {
|
||||
$parser->setHook( 'noautolinks', 'LinkTitles\Extension::doNoautolinksTag' );
|
||||
$parser->setHook( 'autolinks', 'LinkTitles\Extension::doAutolinksTag' );
|
||||
}
|
||||
|
||||
/// Removes the extra tag that this extension provides (<noautolinks>)
|
||||
/// by simply returning the text between the tags (if any).
|
||||
/// See https://www.mediawiki.org/wiki/Manual:Tag_extensions#Example
|
||||
public static function doNoautolinksTag( $input, array $args, \Parser $parser, \PPFrame $frame ) {
|
||||
return htmlspecialchars( $input );
|
||||
}
|
||||
|
||||
/// Removes the extra tag that this extension provides (<noautolinks>)
|
||||
/// by simply returning the text between the tags (if any).
|
||||
/// See https://www.mediawiki.org/wiki/Manual:Tag_extensions#How_do_I_render_wikitext_in_my_extension.3F
|
||||
public static function doAutolinksTag( $input, array $args, \Parser $parser, \PPFrame $frame ) {
|
||||
$config = new Config();
|
||||
$linker = new Linker( $config );
|
||||
$title = $parser->getTitle();
|
||||
$withLinks = $linker->linkContent( $title, $input );
|
||||
$output = $parser->recursiveTagParse( $withLinks, $frame );
|
||||
return $output;
|
||||
}
|
||||
}
|
||||
|
||||
// vim: ts=2:sw=2:noet:comments^=\:///
|
@ -1,527 +0,0 @@
|
||||
<?php
|
||||
/*
|
||||
* Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
/// @file
|
||||
namespace LinkTitles;
|
||||
|
||||
/// Helper function for development and debugging.
|
||||
/// @param $var Any variable. Raw content will be dumped to stderr.
|
||||
/// @return undefined
|
||||
function dump($var) {
|
||||
error_log(print_r($var, TRUE) . "\n", 3, 'php://stderr');
|
||||
};
|
||||
|
||||
/// Central class of the extension. Sets up parser hooks.
|
||||
/// This class contains only static functions; do not instantiate.
|
||||
class Extension {
|
||||
/// Caching variable for page titles that are fetched from the DB.
|
||||
private static $pageTitles;
|
||||
|
||||
/// Caching variable for the current namespace.
|
||||
/// This is needed because the sort order of the page titles that
|
||||
/// are cached in self::$pageTitles depends on the namespace of
|
||||
/// the page currently being processed.
|
||||
private static $currentNamespace;
|
||||
|
||||
/// A Title object for the page that is being parsed.
|
||||
private static $currentTitle;
|
||||
|
||||
/// A Title object for the target page currently being examined.
|
||||
private static $targetTitle;
|
||||
|
||||
// The TitleValue object of the target page
|
||||
private static $targetTitleValue;
|
||||
|
||||
/// The content object for the currently processed target page.
|
||||
/// This variable is necessary to be able to prevent loading the target
|
||||
/// content twice.
|
||||
private static $targetContent;
|
||||
|
||||
/// Holds the page title of the currently processed target page
|
||||
/// as a string.
|
||||
private static $targetTitleText;
|
||||
|
||||
/// Delimiter used in a regexp split operation to seperate those parts
|
||||
/// of the page that should be parsed from those that should not be
|
||||
/// parsed (e.g. inside pre-existing links etc.).
|
||||
private static $delimiter;
|
||||
|
||||
private static $wordStartDelim;
|
||||
private static $wordEndDelim;
|
||||
|
||||
public static $ltConsoleOutput;
|
||||
public static $ltConsoleOutputDebug;
|
||||
|
||||
/// Setup method
|
||||
public static function setup() {
|
||||
self::BuildDelimiters();
|
||||
}
|
||||
|
||||
/// Event handler that is hooked to the PageContentSave event.
|
||||
public static function onPageContentSave( &$wikiPage, &$user, &$content, &$summary,
|
||||
$isMinor, $isWatch, $section, &$flags, &$status ) {
|
||||
global $wgLinkTitlesParseOnEdit;
|
||||
global $wgLinkTitlesNamespaces;
|
||||
if ( !$wgLinkTitlesParseOnEdit ) return true;
|
||||
|
||||
if ( !$isMinor ) {
|
||||
$title = $wikiPage->getTitle();
|
||||
|
||||
// Only process if page is in one of our namespaces we want to link
|
||||
// Fixes ugly autolinking of sidebar pages
|
||||
if ( in_array( $title->getNamespace(), $wgLinkTitlesNamespaces )) {
|
||||
$text = $content->getContentHandler()->serializeContent( $content );
|
||||
if ( !\MagicWord::get( 'MAG_LINKTITLES_NOAUTOLINKS' )->match( $text ) ) {
|
||||
$newText = self::parseContent( $title, $text );
|
||||
if ( $newText != $text ) {
|
||||
$content = $content->getContentHandler()->unserializeContent( $newText );
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Event handler that is hooked to the InternalParseBeforeLinks event.
|
||||
/// @param Parser $parser Parser that raised the event.
|
||||
/// @param $text Preprocessed text of the page.
|
||||
public static function onInternalParseBeforeLinks( \Parser &$parser, &$text ) {
|
||||
global $wgLinkTitlesParseOnRender;
|
||||
if (!$wgLinkTitlesParseOnRender) return true;
|
||||
global $wgLinkTitlesNamespaces;
|
||||
$title = $parser->getTitle();
|
||||
|
||||
// If the page contains the magic word '__NOAUTOLINKS__', do not parse it.
|
||||
// Only process if page is in one of our namespaces we want to link
|
||||
if ( !\MagicWord::get( 'MAG_LINKTITLES_NOAUTOLINKS' )->match( $text ) &&
|
||||
in_array( $title->getNamespace(), $wgLinkTitlesNamespaces ) ) {
|
||||
$text = self::parseContent( $title, $text );
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Core function of the extension, performs the actual parsing of the content.
|
||||
/// @param Parser $parser Parser instance for the current page
|
||||
/// @param $text String that holds the article content
|
||||
/// @returns string: parsed text with links added if needed
|
||||
private static function parseContent( $title, &$text ) {
|
||||
|
||||
// Configuration variables need to be defined here as globals.
|
||||
global $wgLinkTitlesFirstOnly;
|
||||
global $wgLinkTitlesSmartMode;
|
||||
global $wgCapitalLinks;
|
||||
|
||||
( $wgLinkTitlesFirstOnly ) ? $limit = 1 : $limit = -1;
|
||||
$limitReached = false;
|
||||
self::$currentTitle = $title;
|
||||
$currentNamespace = $title->getNamespace();
|
||||
$newText = $text;
|
||||
|
||||
if ( !isset( self::$pageTitles ) || ( $currentNamespace != self::$currentNamespace ) ) {
|
||||
self::$currentNamespace = $currentNamespace;
|
||||
self::$pageTitles = self::fetchPageTitles( $currentNamespace );
|
||||
}
|
||||
|
||||
// Iterate through the page titles
|
||||
foreach( self::$pageTitles as $row ) {
|
||||
self::newTarget( $row->page_namespace, $row->page_title );
|
||||
|
||||
// Don't link current page
|
||||
if ( self::$targetTitle->equals( self::$currentTitle ) ) { continue; }
|
||||
|
||||
// split the page content by [[...]] groups
|
||||
// credits to inhan @ StackOverflow for suggesting preg_split
|
||||
// see http://stackoverflow.com/questions/10672286
|
||||
$arr = preg_split( self::$delimiter, $newText, -1, PREG_SPLIT_DELIM_CAPTURE );
|
||||
|
||||
// Escape certain special characters in the page title to prevent
|
||||
// regexp compilation errors
|
||||
self::$targetTitleText = self::$targetTitle->getText();
|
||||
$quotedTitle = preg_quote( self::$targetTitleText, '/' );
|
||||
|
||||
self::ltDebugLog( 'TargetTitle='. self::$targetTitleText, 'private' );
|
||||
self::ltDebugLog( 'TargetTitleQuoted='. $quotedTitle, 'private' );
|
||||
|
||||
// Depending on the global configuration setting $wgCapitalLinks,
|
||||
// the title has to be searched for either in a strictly case-sensitive
|
||||
// way, or in a 'fuzzy' way where the first letter of the title may
|
||||
// be either case.
|
||||
if ( $wgCapitalLinks && ( $quotedTitle[0] != '\\' )) {
|
||||
$searchTerm = '((?i)' . $quotedTitle[0] . '(?-i)' .
|
||||
substr($quotedTitle, 1) . ')';
|
||||
} else {
|
||||
$searchTerm = '(' . $quotedTitle . ')';
|
||||
}
|
||||
|
||||
$regex = '/(?<![\:\.\@\/\?\&])' . self::$wordStartDelim .
|
||||
$searchTerm . self::$wordEndDelim . '/S';
|
||||
for ( $i = 0; $i < count( $arr ); $i+=2 ) {
|
||||
// even indexes will point to text that is not enclosed by brackets
|
||||
$arr[$i] = preg_replace_callback( $regex,
|
||||
'LinkTitles\Extension::simpleModeCallback', $arr[$i], $limit, $count );
|
||||
if ( $wgLinkTitlesFirstOnly && ( $count > 0 ) ) {
|
||||
$limitReached = true;
|
||||
break;
|
||||
};
|
||||
};
|
||||
$newText = implode( '', $arr );
|
||||
|
||||
// If smart mode is turned on, the extension will perform a second
|
||||
// pass on the page and add links with aliases where the case does
|
||||
// not match.
|
||||
if ( $wgLinkTitlesSmartMode && !$limitReached ) {
|
||||
$arr = preg_split( self::$delimiter, $newText, -1, PREG_SPLIT_DELIM_CAPTURE );
|
||||
|
||||
for ( $i = 0; $i < count( $arr ); $i+=2 ) {
|
||||
// even indexes will point to text that is not enclosed by brackets
|
||||
$arr[$i] = preg_replace_callback( '/(?<![\:\.\@\/\?\&])' .
|
||||
self::$wordStartDelim . '(' . $quotedTitle . ')' .
|
||||
self::$wordEndDelim . '/iS', 'LinkTitles\Extension::smartModeCallback',
|
||||
$arr[$i], $limit, $count );
|
||||
if ( $wgLinkTitlesFirstOnly && ( $count > 0 )) {
|
||||
break;
|
||||
};
|
||||
};
|
||||
$newText = implode( '', $arr );
|
||||
} // $wgLinkTitlesSmartMode
|
||||
}; // foreach $res as $row
|
||||
return $newText;
|
||||
}
|
||||
|
||||
/// Automatically processes a single page, given a $title Title object.
|
||||
/// This function is called by the SpecialLinkTitles class and the
|
||||
/// LinkTitlesJob class.
|
||||
/// @param Title $title Title object.
|
||||
/// @param RequestContext $context Current request context.
|
||||
/// If in doubt, call MediaWiki's `RequestContext::getMain()`
|
||||
/// to obtain such an object.
|
||||
/// @returns boolean True if the page exists, false if the page does not exist
|
||||
public static function processPage( \Title $title, \RequestContext $context ) {
|
||||
self::ltLog('Processing '. $title->getPrefixedText());
|
||||
$page = \WikiPage::factory($title);
|
||||
$content = $page->getContent();
|
||||
if ( $content != null ) {
|
||||
$text = $content->getContentHandler()->serializeContent($content);
|
||||
$newText = self::parseContent($title, $text);
|
||||
if ( $text != $newText ) {
|
||||
$content = $content->getContentHandler()->unserializeContent( $newText );
|
||||
$page->doEditContent(
|
||||
$content,
|
||||
"Links to existing pages added by LinkTitles bot.", // TODO: i18n
|
||||
EDIT_MINOR | EDIT_FORCE_BOT,
|
||||
false, // baseRevId
|
||||
$context->getUser()
|
||||
);
|
||||
};
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the two magic words defined by this extension to the list of
|
||||
/// 'double-underscore' terms that are automatically removed before a
|
||||
/// page is displayed.
|
||||
/// @param $doubleUnderscoreIDs Array of magic word IDs.
|
||||
/// @return true
|
||||
public static function onGetDoubleUnderscoreIDs( array &$doubleUnderscoreIDs ) {
|
||||
$doubleUnderscoreIDs[] = 'MAG_LINKTITLES_NOTARGET';
|
||||
$doubleUnderscoreIDs[] = 'MAG_LINKTITLES_NOAUTOLINKS';
|
||||
return true;
|
||||
}
|
||||
|
||||
public static function onParserFirstCallInit( \Parser $parser ) {
|
||||
$parser->setHook( 'noautolinks', 'LinkTitles\Extension::doNoautolinksTag' );
|
||||
$parser->setHook( 'autolinks', 'LinkTitles\Extension::doAutolinksTag' );
|
||||
}
|
||||
|
||||
/// Removes the extra tag that this extension provides (<noautolinks>)
|
||||
/// by simply returning the text between the tags (if any).
|
||||
/// See https://www.mediawiki.org/wiki/Manual:Tag_extensions#Example
|
||||
public static function doNoautolinksTag( $input, array $args, \Parser $parser, \PPFrame $frame ) {
|
||||
return htmlspecialchars( $input );
|
||||
}
|
||||
|
||||
/// Removes the extra tag that this extension provides (<noautolinks>)
|
||||
/// by simply returning the text between the tags (if any).
|
||||
/// See https://www.mediawiki.org/wiki/Manual:Tag_extensions#How_do_I_render_wikitext_in_my_extension.3F
|
||||
public static function doAutolinksTag( $input, array $args, \Parser $parser, \PPFrame $frame ) {
|
||||
$withLinks = self::parseContent( $parser->getTitle(), $input );
|
||||
$output = $parser->recursiveTagParse( $withLinks, $frame );
|
||||
return $output;
|
||||
}
|
||||
|
||||
// Fetches the page titles from the database.
|
||||
// @param $currentNamespace String holding the namespace of the page currently being processed.
|
||||
private static function fetchPageTitles( $currentNamespace ) {
|
||||
global $wgLinkTitlesPreferShortTitles;
|
||||
global $wgLinkTitlesMinimumTitleLength;
|
||||
global $wgLinkTitlesBlackList;
|
||||
global $wgLinkTitlesNamespaces;
|
||||
|
||||
( $wgLinkTitlesPreferShortTitles ) ? $sort_order = 'ASC' : $sort_order = 'DESC';
|
||||
// Build a blacklist of pages that are not supposed to be link
|
||||
// targets. This includes the current page.
|
||||
$blackList = str_replace( ' ', '_', '("' . implode( '","',$wgLinkTitlesBlackList ) . '")' );
|
||||
|
||||
// Build our weight list. Make sure current namespace is first element
|
||||
$namespaces = array_diff( $wgLinkTitlesNamespaces, [ $currentNamespace ] );
|
||||
array_unshift( $namespaces, $currentNamespace );
|
||||
|
||||
// No need for sanitiy check. we are sure that we have at least one element in the array
|
||||
$weightSelect = "CASE page_namespace ";
|
||||
$currentWeight = 0;
|
||||
foreach ($namespaces as &$namspacevalue) {
|
||||
$currentWeight = $currentWeight + 100;
|
||||
$weightSelect = $weightSelect . " WHEN " . $namspacevalue . " THEN " . $currentWeight . PHP_EOL;
|
||||
}
|
||||
$weightSelect = $weightSelect . " END ";
|
||||
$namespacesClause = '(' . implode( ', ', $namespaces ) . ')';
|
||||
|
||||
// Build an SQL query and fetch all page titles ordered by length from
|
||||
// shortest to longest. Only titles from 'normal' pages (namespace uid
|
||||
// = 0) are returned. Since the db may be sqlite, we need a try..catch
|
||||
// structure because sqlite does not support the CHAR_LENGTH function.
|
||||
$dbr = wfGetDB( DB_SLAVE );
|
||||
try {
|
||||
$res = $dbr->select(
|
||||
'page',
|
||||
array( 'page_title', 'page_namespace' , "weight" => $weightSelect),
|
||||
array(
|
||||
'page_namespace IN ' . $namespacesClause,
|
||||
'CHAR_LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength,
|
||||
'page_title NOT IN ' . $blackList,
|
||||
),
|
||||
__METHOD__,
|
||||
array( 'ORDER BY' => 'weight ASC, CHAR_LENGTH(page_title) ' . $sort_order )
|
||||
);
|
||||
} catch (Exception $e) {
|
||||
$res = $dbr->select(
|
||||
'page',
|
||||
array( 'page_title', 'page_namespace' , "weight" => $weightSelect ),
|
||||
array(
|
||||
'page_namespace IN ' . $namespacesClause,
|
||||
'LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength,
|
||||
'page_title NOT IN ' . $blackList,
|
||||
),
|
||||
__METHOD__,
|
||||
array( 'ORDER BY' => 'weight ASC, LENGTH(page_title) ' . $sort_order )
|
||||
);
|
||||
}
|
||||
|
||||
return $res;
|
||||
}
|
||||
|
||||
// Build an anonymous callback function to be used in simple mode.
|
||||
private static function simpleModeCallback( array $matches ) {
|
||||
if ( self::checkTargetPage() ) {
|
||||
self::ltLog( "Linking '$matches[0]' to '" . self::$targetTitle . "'" );
|
||||
return '[[' . $matches[0] . ']]';
|
||||
}
|
||||
else
|
||||
{
|
||||
return $matches[0];
|
||||
}
|
||||
}
|
||||
|
||||
// Callback function for use with preg_replace_callback.
|
||||
// This essentially performs a case-sensitive comparison of the
|
||||
// current page title and the occurrence found on the page; if
|
||||
// the cases do not match, it builds an aliased (piped) link.
|
||||
// If $wgCapitalLinks is set to true, the case of the first
|
||||
// letter is ignored by MediaWiki and we don't need to build a
|
||||
// piped link if only the case of the first letter is different.
|
||||
private static function smartModeCallback( array $matches ) {
|
||||
global $wgCapitalLinks;
|
||||
|
||||
if ( $wgCapitalLinks ) {
|
||||
// With $wgCapitalLinks set to true we have a slightly more
|
||||
// complicated version of the callback than if it were false;
|
||||
// we need to ignore the first letter of the page titles, as
|
||||
// it does not matter for linking.
|
||||
if ( self::checkTargetPage() ) {
|
||||
self::ltLog( "Linking (smart) '$matches[0]' to '" . self::$targetTitle . "'" );
|
||||
if ( strcmp(substr(self::$targetTitleText, 1), substr($matches[0], 1)) == 0 ) {
|
||||
// Case-sensitive match: no need to bulid piped link.
|
||||
return '[[' . $matches[0] . ']]';
|
||||
} else {
|
||||
// Case-insensitive match: build piped link.
|
||||
return '[[' . self::$targetTitleText . '|' . $matches[0] . ']]';
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return $matches[0];
|
||||
}
|
||||
} else {
|
||||
// If $wgCapitalLinks is false, we can use the simple variant
|
||||
// of the callback function.
|
||||
if ( self::checkTargetPage() ) {
|
||||
self::ltLog( "Linking (smart) '$matches[0]' to '" . self::$targetTitle . "'" );
|
||||
if ( strcmp(self::$targetTitleText, $matches[0]) == 0 ) {
|
||||
// Case-sensitive match: no need to bulid piped link.
|
||||
return '[[' . $matches[0] . ']]';
|
||||
} else {
|
||||
// Case-insensitive match: build piped link.
|
||||
return '[[' . self::$targetTitleText . '|' . $matches[0] . ']]';
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return $matches[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets member variables for the current target page.
|
||||
private static function newTarget( $ns, $title ) {
|
||||
self::$targetTitle = \Title::makeTitleSafe( $ns, $title );
|
||||
self::ltDebugLog( 'newtarget='. self::$targetTitle->getText(), "private" );
|
||||
self::$targetTitleValue = self::$targetTitle->getTitleValue();
|
||||
self::ltDebugLog( 'altTarget='. self::$targetTitleValue->getText(), "private" );
|
||||
self::$targetContent = null;
|
||||
}
|
||||
|
||||
/// Returns the content of the current target page.
|
||||
/// This function serves to be used in preg_replace_callback callback
|
||||
/// functions, in order to load the target page content from the
|
||||
/// database only when needed.
|
||||
/// @note It is absolutely necessary that the newTarget()
|
||||
/// function is called for every new page.
|
||||
private static function getTargetContent() {
|
||||
if ( ! isset( $targetContent ) ) {
|
||||
self::$targetContent = \WikiPage::factory(
|
||||
self::$targetTitle)->getContent();
|
||||
};
|
||||
return self::$targetContent;
|
||||
}
|
||||
|
||||
/// Examines the current target page. Returns true if it may be linked;
|
||||
/// false if not. This depends on the settings
|
||||
/// $wgLinkTitlesCheckRedirect and $wgLinkTitlesEnableNoTargetMagicWord
|
||||
/// and whether the target page is a redirect or contains the
|
||||
/// __NOAUTOLINKTARGET__ magic word.
|
||||
/// @returns boolean
|
||||
private static function checkTargetPage() {
|
||||
global $wgLinkTitlesEnableNoTargetMagicWord;
|
||||
global $wgLinkTitlesCheckRedirect;
|
||||
|
||||
// If checking for redirects is enabled and the target page does
|
||||
// indeed redirect to the current page, return the page title as-is
|
||||
// (unlinked).
|
||||
if ( $wgLinkTitlesCheckRedirect ) {
|
||||
$redirectTitle = self::getTargetContent()->getUltimateRedirectTarget();
|
||||
if ( $redirectTitle && $redirectTitle->equals(self::$currentTitle) ) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// If the magic word __NOAUTOLINKTARGET__ is enabled and the target
|
||||
// page does indeed contain this magic word, return the page title
|
||||
// as-is (unlinked).
|
||||
if ( $wgLinkTitlesEnableNoTargetMagicWord ) {
|
||||
if ( self::getTargetContent()->matchMagicWord(
|
||||
\MagicWord::get('MAG_LINKTITLES_NOTARGET') ) ) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Builds the delimiter that is used in a regexp to separate
|
||||
/// text that should be parsed from text that should not be
|
||||
/// parsed (e.g. inside existing links etc.)
|
||||
private static function BuildDelimiters() {
|
||||
// Configuration variables need to be defined here as globals.
|
||||
global $wgLinkTitlesParseHeadings;
|
||||
global $wgLinkTitlesSkipTemplates;
|
||||
global $wgLinkTitlesWordStartOnly;
|
||||
global $wgLinkTitlesWordEndOnly;
|
||||
|
||||
// Use unicode character properties rather than \b escape sequences
|
||||
// to detect whole words containing non-ASCII characters as well.
|
||||
// Note that this requires a PCRE library that was compiled with
|
||||
// --enable-unicode-properties
|
||||
( $wgLinkTitlesWordStartOnly ) ? self::$wordStartDelim = '(?<!\pL)' : self::$wordStartDelim = '';
|
||||
( $wgLinkTitlesWordEndOnly ) ? self::$wordEndDelim = '(?!\pL)' : self::$wordEndDelim = '';
|
||||
|
||||
if ( $wgLinkTitlesSkipTemplates )
|
||||
{
|
||||
// Use recursive regex to balance curly braces;
|
||||
// see http://www.regular-expressions.info/recurse.html
|
||||
$templatesDelimiter = '{{(?>[^{}]|(?R))*}}|';
|
||||
} else {
|
||||
// Match template names (ignoring any piped [[]] links in them)
|
||||
// along with the trailing pipe and parameter name or closing
|
||||
// braces; also match sequences of '|wordcharacters=' (without
|
||||
// spaces in them) that usually only occur as parameter names in
|
||||
// transclusions (but could also occur as wiki table cell contents).
|
||||
// TODO: Find a way to match parameter names in transclusions, but
|
||||
// not in table cells or other sequences involving a pipe character
|
||||
// and equal sign.
|
||||
$templatesDelimiter = '{{[^|]*?(?:(?:\[\[[^]]+]])?)[^|]*?(?:\|(?:\w+=)?|(?:}}))|\|\w+=|';
|
||||
}
|
||||
|
||||
// Build a regular expression that will capture existing wiki links ("[[...]]"),
|
||||
// wiki headings ("= ... =", "== ... ==" etc.),
|
||||
// urls ("http://example.com", "[http://example.com]", "[http://example.com Description]",
|
||||
// and email addresses ("mail@example.com").
|
||||
// Since there is a user option to skip headings, we make this part of the expression
|
||||
// optional. Note that in order to use preg_split(), it is important to have only one
|
||||
// capturing subpattern (which precludes the use of conditional subpatterns).
|
||||
( $wgLinkTitlesParseHeadings ) ? $delimiter = '' : $delimiter = '=+.+?=+|';
|
||||
$urlPattern = '[a-z]+?\:\/\/(?:\S+\.)+\S+(?:\/.*)?';
|
||||
self::$delimiter = '/(' . // exclude from linking:
|
||||
'\[\[.*?\]\]|' . // links
|
||||
$delimiter . // titles (if requested)
|
||||
$templatesDelimiter . // templates (if requested)
|
||||
'^ .+?\n|\n .+?\n|\n .+?$|^ .+?$|' . // preformatted text
|
||||
'<nowiki>.*?<.nowiki>|<code>.*?<\/code>|' . // nowiki/code
|
||||
'<pre>.*?<\/pre>|<html>.*?<\/html>|' . // pre/html
|
||||
'<script>.*?<\/script>|' . // script
|
||||
'<gallery>.*?<\/gallery>|' . // gallery
|
||||
'<div.+?>|<\/div>|' . // attributes of div elements
|
||||
'<span.+?>|<\/span>|' . // attributes of span elements
|
||||
'<file>[^<]*<\/file>|' . // stuff inside file elements
|
||||
'style=".+?"|class=".+?"|' . // styles and classes (e.g. of wikitables)
|
||||
'<noautolinks>.*?<\/noautolinks>|' . // custom tag 'noautolinks'
|
||||
'\[' . $urlPattern . '\s.+?\]|'. $urlPattern . '(?=\s|$)|' . // urls
|
||||
'(?<=\b)\S+\@(?:\S+\.)+\S+(?=\b)' . // email addresses
|
||||
')/ismS';
|
||||
}
|
||||
|
||||
/// Local Debugging output function which can send output to console as well
|
||||
public static function ltDebugLog($text) {
|
||||
if ( self::$ltConsoleOutputDebug ) {
|
||||
print $text . "\n";
|
||||
}
|
||||
wfDebugLog( 'LinkTitles', $text , 'private' );
|
||||
}
|
||||
|
||||
/// Local Logging output function which can send output to console as well
|
||||
public static function ltLog($text) {
|
||||
if (self::$ltConsoleOutput) {
|
||||
print $text . "\n";
|
||||
}
|
||||
wfDebugLog( 'LinkTitles', $text , 'private' );
|
||||
}
|
||||
}
|
||||
|
||||
// vim: ts=2:sw=2:noet:comments^=\:///
|
184
includes/Linker.php
Normal file
184
includes/Linker.php
Normal file
@ -0,0 +1,184 @@
|
||||
<?php
|
||||
/**
|
||||
* The LinkTitles\Linker class does the heavy linking for the extension.
|
||||
*
|
||||
* Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*
|
||||
* @author Daniel Kraus <bovender@bovender.de>
|
||||
*/
|
||||
namespace LinkTitles;
|
||||
|
||||
/**
|
||||
* Performs the actual linking of content to existing pages.
|
||||
*/
|
||||
class Linker {
|
||||
/**
|
||||
* LinkTitles configuration.
|
||||
*
|
||||
* @var Config $config
|
||||
*/
|
||||
public $config;
|
||||
|
||||
/**
|
||||
* The string representation of the title object for the potential target page
|
||||
* that is currently being processed.
|
||||
*
|
||||
* This is an instance variable (rather than a local method variable) so it
|
||||
* can be accessed in the preg_replace_callback callbacks.
|
||||
*
|
||||
* @var String $targetTitleString
|
||||
*/
|
||||
private $targetTitleText;
|
||||
|
||||
/**
|
||||
* Constructs a new instance of the Linker class.
|
||||
*
|
||||
* @param Config $config LinkTitles configuration object.
|
||||
*/
|
||||
public function __construct( Config &$config ) {
|
||||
$this->config = $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Core function of the extension, performs the actual parsing of the content.
|
||||
*
|
||||
* This method receives a Title object and the string representation of the
|
||||
* source page. It does not work on a WikiPage object directly because the
|
||||
* callbacks in the Extension class do not always get a WikiPage object in the
|
||||
* first place.
|
||||
*
|
||||
* @param \Title &$title Title object for the current page.
|
||||
* @param String $text String that holds the article content
|
||||
* @return String with links to target pages
|
||||
*/
|
||||
public function linkContent( \Title &$title, &$text ) {
|
||||
|
||||
( $this->config->firstOnly ) ? $limit = 1 : $limit = -1;
|
||||
$limitReached = false;
|
||||
$newText = $text;
|
||||
|
||||
$splitter = Splitter::default( $this->config );
|
||||
$targets = Targets::default( $title, $this->config );
|
||||
|
||||
// Iterate through the target page titles
|
||||
foreach( $targets->queryResult as $row ) {
|
||||
$target = new Target( $row->page_namespace, $row->page_title, $this->config );
|
||||
|
||||
// Don't link current page and don't link if the target page redirects
|
||||
// to the current page or has the __NOAUTOLINKTARGET__ magic word
|
||||
// (as required by the actual LinkTitles configuration).
|
||||
if ( $target->isSameTitle( $title ) || !$target->mayLinkTo( $title ) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Split the page content by non-linkable sections.
|
||||
// Credits to inhan @ StackOverflow for suggesting preg_split.
|
||||
// See http://stackoverflow.com/questions/10672286
|
||||
$arr = $splitter->split( $newText );
|
||||
$count = 0;
|
||||
|
||||
// Cache the target title text for the regex callbacks
|
||||
$this->targetTitleText = $target->getTitleText();
|
||||
|
||||
// Even indexes will point to sections of the text that may be linked
|
||||
for ( $i = 0; $i < count( $arr ); $i += 2 ) {
|
||||
$arr[$i] = preg_replace_callback( $target->getCaseSensitiveRegex(),
|
||||
array( $this, 'simpleModeCallback'),
|
||||
$arr[$i], $limit, $count );
|
||||
if ( $this->config->firstOnly && ( $count > 0 ) ) {
|
||||
$limitReached = true;
|
||||
break;
|
||||
};
|
||||
};
|
||||
$newText = implode( '', $arr );
|
||||
|
||||
// If smart mode is turned on, the extension will perform a second
|
||||
// pass on the page and add links with aliases where the case does
|
||||
// not match.
|
||||
if ( $this->config->smartMode && !$limitReached ) {
|
||||
if ( $count > 0 ) {
|
||||
// Split the text again because it was changed in the first pass.
|
||||
$arr = $splitter->split( $newText );
|
||||
}
|
||||
|
||||
for ( $i = 0; $i < count( $arr ); $i+=2 ) {
|
||||
// even indexes will point to text that is not enclosed by brackets
|
||||
$arr[$i] = preg_replace_callback( $target->getCaseInsensitiveRegex(),
|
||||
array( $this, 'smartModeCallback'),
|
||||
$arr[$i], $limit, $count );
|
||||
if ( $this->config->firstOnly && ( $count > 0 )) {
|
||||
break;
|
||||
};
|
||||
};
|
||||
$newText = implode( '', $arr );
|
||||
} // $wgLinkTitlesSmartMode
|
||||
}; // foreach $res as $row
|
||||
|
||||
return $newText;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback for preg_replace_callback in simple mode.
|
||||
*
|
||||
* @param array $matches Matches provided by preg_replace_callback
|
||||
* @return string Target page title with or without link markup
|
||||
*/
|
||||
private function simpleModeCallback( array $matches ) {
|
||||
return '[[' . $matches[0] . ']]';
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function for use with preg_replace_callback.
|
||||
* This essentially performs a case-sensitive comparison of the
|
||||
* current page title and the occurrence found on the page; if
|
||||
* the cases do not match, it builds an aliased (piped) link.
|
||||
* If $wgCapitalLinks is set to true, the case of the first
|
||||
* letter is ignored by MediaWiki and we don't need to build a
|
||||
* piped link if only the case of the first letter is different.
|
||||
*
|
||||
* @param array $matches Matches provided by preg_replace_callback
|
||||
* @return string Target page title with or without link markup
|
||||
*/
|
||||
private function smartModeCallback( array $matches ) {
|
||||
if ( $this->config->capitalLinks ) {
|
||||
// With $wgCapitalLinks set to true we have a slightly more
|
||||
// complicated version of the callback than if it were false;
|
||||
// we need to ignore the first letter of the page titles, as
|
||||
// it does not matter for linking.
|
||||
if ( strcmp( substr( $this->targetTitleText, 1 ), substr( $matches[ 0 ], 1) ) == 0 ) {
|
||||
// Case-sensitive match: no need to bulid piped link.
|
||||
return '[[' . $matches[ 0 ] . ']]';
|
||||
} else {
|
||||
// Case-insensitive match: build piped link.
|
||||
return '[[' . $this->targetTitleText . '|' . $matches[ 0 ] . ']]';
|
||||
}
|
||||
} else {
|
||||
// If $wgCapitalLinks is false, we can use the simple variant
|
||||
// of the callback function.
|
||||
if ( strcmp( $this->targetTitleText, $matches[ 0 ] ) == 0 ) {
|
||||
// Case-sensitive match: no need to bulid piped link.
|
||||
return '[[' . $matches[ 0 ] . ']]';
|
||||
} else {
|
||||
// Case-insensitive match: build piped link.
|
||||
return '[[' . $this->targetTitleText . '|' . $matches[ 0 ] . ']]';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vim: ts=2:sw=2:noet:comments^=\:///
|
@ -1,21 +1,25 @@
|
||||
<?php
|
||||
/*
|
||||
* Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
|
||||
/**
|
||||
* Provides a special page for the LinkTitles extension.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
* Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*
|
||||
* @author Daniel Kraus <bovender@bovender.de>
|
||||
*/
|
||||
namespace LinkTitles;
|
||||
/// @defgroup batch Batch processing
|
||||
@ -25,17 +29,21 @@ if ( !defined( 'MEDIAWIKI' ) ) {
|
||||
die( 'Not an entry point.' );
|
||||
}
|
||||
/// @endcond
|
||||
|
||||
/// Provides a special page that can be used to batch-process all pages in
|
||||
/// the wiki. By default, this can only be performed by sysops.
|
||||
/// @ingroup batch
|
||||
|
||||
/**
|
||||
* Provides a special page that can be used to batch-process all pages in
|
||||
* the wiki. By default, this can only be performed by sysops.
|
||||
* @ingroup batch
|
||||
*
|
||||
*/
|
||||
class Special extends \SpecialPage {
|
||||
|
||||
/// Constructor. Announces the special page title and required user right
|
||||
/// to the parent constructor.
|
||||
/**
|
||||
* Constructor. Announces the special page title and required user right to the parent constructor.
|
||||
*/
|
||||
function __construct() {
|
||||
// the second parameter in the following function call ensures that only
|
||||
// users who have the 'linktitles-batch' right get to see this page (by
|
||||
// the second parameter in the following function call ensures that only
|
||||
// users who have the 'linktitles-batch' right get to see this page (by
|
||||
// default, this are all sysop users).
|
||||
parent::__construct( 'LinkTitles', 'linktitles-batch' );
|
||||
}
|
||||
@ -44,9 +52,11 @@ class Special extends \SpecialPage {
|
||||
return 'pagetools';
|
||||
}
|
||||
|
||||
/// Entry function of the special page class. Will abort if the user does
|
||||
/// not have appropriate permissions ('linktitles-batch').
|
||||
/// @return undefined
|
||||
|
||||
/**
|
||||
* Entry function of the special page class. Will abort if the user does not have appropriate permissions ('linktitles-batch').
|
||||
* @param $par Additional parameters (required by interface; currently not used)
|
||||
*/
|
||||
function execute($par) {
|
||||
// Prevent non-authorized users from executing the batch processing.
|
||||
if ( !$this->userCanExecute( $this->getUser() ) ) {
|
||||
@ -76,18 +86,19 @@ class Special extends \SpecialPage {
|
||||
}
|
||||
}
|
||||
|
||||
/// Processes wiki articles, starting at the page indicated by
|
||||
/// $startTitle. If $wgLinkTitlesTimeLimit is reached before all pages are
|
||||
/// processed, returns the title of the next page that needs processing.
|
||||
/// @param WebRequest $request WebRequest object that is associated with the special
|
||||
/// page.
|
||||
/// @param OutputPage $output Output page for the special page.
|
||||
/**
|
||||
* Processes wiki articles, starting at the page indicated by
|
||||
* $startTitle. If $wgLinkTitlesTimeLimit is reached before all pages are
|
||||
* processed, returns the title of the next page that needs processing.
|
||||
* @param WebRequest $request WebRequest object that is associated with the special page.
|
||||
* @param OutputPage $output Output page for the special page.
|
||||
*/
|
||||
private function process( \WebRequest &$request, \OutputPage &$output) {
|
||||
global $wgLinkTitlesTimeLimit;
|
||||
global $wgLinkTitlesNamespaces;
|
||||
global $wgLinkTitlesNamespaces;
|
||||
|
||||
// get our Namespaces
|
||||
$namespacesClause = str_replace( '_', ' ','(' . implode( ', ',$wgLinkTitlesNamespaces ) . ')' );
|
||||
// get our Namespaces
|
||||
$namespacesClause = str_replace( '_', ' ','(' . implode( ', ',$wgLinkTitlesNamespaces ) . ')' );
|
||||
|
||||
// Start the stopwatch
|
||||
$startTime = microtime(true);
|
||||
@ -95,7 +106,7 @@ class Special extends \SpecialPage {
|
||||
// Connect to the database
|
||||
$dbr = wfGetDB( DB_SLAVE );
|
||||
|
||||
// Fetch the start index and max number of records from the POST
|
||||
// Fetch the start index and max number of records from the POST
|
||||
// request.
|
||||
$postValues = $request->getValues();
|
||||
|
||||
@ -107,26 +118,24 @@ class Special extends \SpecialPage {
|
||||
if ( array_key_exists('e', $postValues) ) {
|
||||
$end = intval($postValues['e']);
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
// No end index was given. Therefore, count pages now.
|
||||
$end = $this->countPages($dbr, $namespacesClause );
|
||||
};
|
||||
|
||||
array_key_exists('r', $postValues) ?
|
||||
$reloads = $postValues['r'] :
|
||||
$reloads = 0;
|
||||
array_key_exists('r', $postValues) ? $reloads = $postValues['r'] : $reloads = 0;
|
||||
|
||||
// Retrieve page names from the database.
|
||||
$res = $dbr->select(
|
||||
$res = $dbr->select(
|
||||
'page',
|
||||
array('page_title', 'page_namespace'),
|
||||
array(
|
||||
'page_namespace IN ' . $namespacesClause,
|
||||
),
|
||||
__METHOD__,
|
||||
array(
|
||||
'LIMIT' => 999999999,
|
||||
'page_namespace IN ' . $namespacesClause,
|
||||
),
|
||||
__METHOD__,
|
||||
array(
|
||||
'LIMIT' => 999999999,
|
||||
'OFFSET' => $start
|
||||
)
|
||||
);
|
||||
@ -136,7 +145,7 @@ class Special extends \SpecialPage {
|
||||
$curTitle = \Title::makeTitleSafe( $row->page_namespace, $row->page_title);
|
||||
Extension::processPage($curTitle, $this->getContext());
|
||||
$start += 1;
|
||||
|
||||
|
||||
// Check if the time limit is exceeded
|
||||
if ( microtime(true)-$startTime > $wgLinkTitlesTimeLimit )
|
||||
{
|
||||
@ -149,11 +158,11 @@ class Special extends \SpecialPage {
|
||||
// If we have not reached the last page yet, produce code to reload
|
||||
// the extension's special page.
|
||||
if ( $start < $end )
|
||||
{
|
||||
{
|
||||
$reloads += 1;
|
||||
// Build a form with hidden values and output JavaScript code that
|
||||
// Build a form with hidden values and output JavaScript code that
|
||||
// immediately submits the form in order to continue the process.
|
||||
$output->addHTML($this->getReloaderForm($request->getRequestURL(),
|
||||
$output->addHTML($this->getReloaderForm($request->getRequestURL(),
|
||||
$start, $end, $reloads));
|
||||
}
|
||||
else // Last page has been processed
|
||||
@ -162,8 +171,10 @@ class Special extends \SpecialPage {
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds WikiText to the output containing information about the extension
|
||||
/// and a form and button to start linking.
|
||||
/*
|
||||
* Adds WikiText to the output containing information about the extension
|
||||
* and a form and button to start linking.
|
||||
*/
|
||||
private function buildInfoPage( &$request, &$output ) {
|
||||
$url = $request->getRequestURL();
|
||||
|
||||
@ -176,8 +187,8 @@ Source code: http://github.com/bovender/LinkTitles
|
||||
|
||||
== Batch Linking ==
|
||||
You can start a batch linking process by clicking on the button below.
|
||||
This will go through every page in the normal namespace of your Wiki and
|
||||
insert links automatically. This page will repeatedly reload itself, in
|
||||
This will go through every page in the normal namespace of your Wiki and
|
||||
insert links automatically. This page will repeatedly reload itself, in
|
||||
order to prevent blocking the server. To interrupt the process, simply
|
||||
close this page.
|
||||
EOF
|
||||
@ -192,12 +203,13 @@ EOF
|
||||
);
|
||||
}
|
||||
|
||||
/// Produces informative output in WikiText format to show while working.
|
||||
/// @param $output Output object.
|
||||
/// @param $curTitle Title of the currently processed page.
|
||||
/// @param $index Index of the currently processed page.
|
||||
/// @param $end Last index that will be processed (i.e., number of
|
||||
/// pages).
|
||||
/*
|
||||
* Produces informative output in WikiText format to show while working.
|
||||
* @param $output Output object.
|
||||
* @param $curTitle Title of the currently processed page.
|
||||
* @param $index Index of the currently processed page.
|
||||
* @param $end Last index that will be processed (i.e., number of pages).
|
||||
*/
|
||||
private function addProgressInfo( &$output, $curTitle, $index, $end ) {
|
||||
$progress = $index / $end * 100;
|
||||
$percent = sprintf("%01.1f", $progress);
|
||||
@ -205,8 +217,8 @@ EOF
|
||||
$output->addWikiText(
|
||||
<<<EOF
|
||||
== Processing pages... ==
|
||||
The [http://www.mediawiki.org/wiki/Extension:LinkTitles LinkTitles]
|
||||
extension is currently going through every page of your wiki, adding links to
|
||||
The [http://www.mediawiki.org/wiki/Extension:LinkTitles LinkTitles]
|
||||
extension is currently going through every page of your wiki, adding links to
|
||||
existing pages as appropriate.
|
||||
|
||||
=== Current page: $curTitle ===
|
||||
@ -232,14 +244,15 @@ EOF
|
||||
);
|
||||
}
|
||||
|
||||
/// Generates an HTML form and JavaScript to automatically submit the
|
||||
/// form.
|
||||
/// @param $url URL to reload with a POST request.
|
||||
/// @param $start Index of the next page that shall be processed.
|
||||
/// @param $end Index of the last page to be processed.
|
||||
/// @param $reloads Counter that holds the number of reloads so far.
|
||||
/// @returns String that holds the HTML for a form and a
|
||||
/// JavaScript command.
|
||||
/*
|
||||
* Generates an HTML form and JavaScript to automatically submit the
|
||||
* form.
|
||||
* @param $url URL to reload with a POST request.
|
||||
* @param $start Index of the next page that shall be processed.
|
||||
* @param $end Index of the last page to be processed.
|
||||
* @param $reloads Counter that holds the number of reloads so far.
|
||||
* @returns String that holds the HTML for a form and a JavaScript command.
|
||||
*/
|
||||
private function getReloaderForm( $url, $start, $end, $reloads ) {
|
||||
return
|
||||
<<<EOF
|
||||
@ -255,12 +268,14 @@ EOF
|
||||
;
|
||||
}
|
||||
|
||||
/// Adds statistics to the page when all processing is done.
|
||||
/// @param $output Output object
|
||||
/// @param $start Index of the first page that was processed.
|
||||
/// @param $end Index of the last processed page.
|
||||
/// @param $reloads Number of reloads of the page.
|
||||
/// @returns undefined
|
||||
/*
|
||||
* Adds statistics to the page when all processing is done.
|
||||
* @param $output Output object
|
||||
* @param $start Index of the first page that was processed.
|
||||
* @param $end Index of the last processed page.
|
||||
* @param $reloads Number of reloads of the page.
|
||||
* @returns undefined
|
||||
*/
|
||||
private function addCompletedInfo( &$output, $start, $end, $reloads ) {
|
||||
global $wgLinkTitlesTimeLimit;
|
||||
$pagesPerReload = sprintf('%0.1f', $end / $reloads);
|
||||
@ -281,19 +296,21 @@ EOF
|
||||
);
|
||||
}
|
||||
|
||||
/// Counts the number of pages in a read-access wiki database ($dbr).
|
||||
/// @param $dbr Read-only `Database` object.
|
||||
/// @returns Number of pages in the default namespace (0) of the wiki.
|
||||
/*
|
||||
* Counts the number of pages in a read-access wiki database ($dbr).
|
||||
* @param $dbr Read-only `Database` object.
|
||||
* @returns Number of pages in the default namespace (0) of the wiki.
|
||||
*/
|
||||
private function countPages(&$dbr, $namespacesClause) {
|
||||
$res = $dbr->select(
|
||||
'page',
|
||||
array('pagecount' => "COUNT(page_id)"),
|
||||
array(
|
||||
'page_namespace IN ' . $namespacesClause,
|
||||
),
|
||||
__METHOD__
|
||||
array(
|
||||
'page_namespace IN ' . $namespacesClause,
|
||||
),
|
||||
__METHOD__
|
||||
);
|
||||
|
||||
|
||||
return $res->current()->pagecount;
|
||||
}
|
||||
}
|
140
includes/Splitter.php
Normal file
140
includes/Splitter.php
Normal file
@ -0,0 +1,140 @@
|
||||
<?php
|
||||
/**
|
||||
* The Splitter class caches a regular expression that delimits text to be parsed.
|
||||
*
|
||||
* Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*
|
||||
* @author Daniel Kraus <bovender@bovender.de>
|
||||
*/
|
||||
namespace LinkTitles;
|
||||
|
||||
/**
|
||||
* Caches a regular expression that delimits text to be parsed.
|
||||
*/
|
||||
class Splitter {
|
||||
/**
|
||||
* The splitting expression that separates text to be parsed from text that
|
||||
* must not be parsed.
|
||||
* @var String $splitter
|
||||
*/
|
||||
public $splitter;
|
||||
|
||||
/**
|
||||
* The LinkTitles configuration for this Splitter instance.
|
||||
* @var Config $config
|
||||
*/
|
||||
public $config;
|
||||
|
||||
private static $instance;
|
||||
|
||||
/**
|
||||
* Gets the Splitter singleton; may build one with the given config or the
|
||||
* default config if none is given.
|
||||
*
|
||||
* If the instance was already created, it does not matter what Config this
|
||||
* method is called with. To re-create an instance with a different Config,
|
||||
* call Splitter::invalidate() first.
|
||||
*
|
||||
* @param Config|null $config LinkTitles configuration.
|
||||
*/
|
||||
public static function default( Config &$config = null ) {
|
||||
if ( self::$instance === null ) {
|
||||
if ( $config === null ) {
|
||||
$config = new Config();
|
||||
}
|
||||
self::$instance = new Splitter( $config );
|
||||
}
|
||||
return self::$instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Invalidates the singleton instance.
|
||||
*
|
||||
* Used for unit testing.
|
||||
*/
|
||||
public static function invalidate() {
|
||||
self::$instance = null;
|
||||
}
|
||||
|
||||
protected function __construct( Config $config) {
|
||||
$this->config = $config;
|
||||
$this->buildSplitter();
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a text into sections that may be linked and sections that may not
|
||||
* be linked (e.g., because they already are a link, or a template, etc.).
|
||||
*
|
||||
* @param String &$text Text to split.
|
||||
* @return Array of strings where even indexes point to linkable sections.
|
||||
*/
|
||||
public function split( &$text ) {
|
||||
return preg_split( $this->splitter, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
|
||||
}
|
||||
|
||||
/*
|
||||
* Builds the delimiter that is used in a regexp to separate
|
||||
* text that should be parsed from text that should not be
|
||||
* parsed (e.g. inside existing links etc.)
|
||||
*/
|
||||
private function buildSplitter() {
|
||||
if ( $this->config->skipTemplates )
|
||||
{
|
||||
// Use recursive regex to balance curly braces;
|
||||
// see http://www.regular-expressions.info/recurse.html
|
||||
$templatesDelimiter = '{{(?>[^{}]|(?R))*}}|';
|
||||
} else {
|
||||
// Match template names (ignoring any piped [[]] links in them)
|
||||
// along with the trailing pipe and parameter name or closing
|
||||
// braces; also match sequences of '|wordcharacters=' (without
|
||||
// spaces in them) that usually only occur as parameter names in
|
||||
// transclusions (but could also occur as wiki table cell contents).
|
||||
// TODO: Find a way to match parameter names in transclusions, but
|
||||
// not in table cells or other sequences involving a pipe character
|
||||
// and equal sign.
|
||||
$templatesDelimiter = '{{[^|]*?(?:(?:\[\[[^]]+]])?)[^|]*?(?:\|(?:\w+=)?|(?:}}))|\|\w+=|';
|
||||
}
|
||||
|
||||
// Build a regular expression that will capture existing wiki links ("[[...]]"),
|
||||
// wiki headings ("= ... =", "== ... ==" etc.),
|
||||
// urls ("http://example.com", "[http://example.com]", "[http://example.com Description]",
|
||||
// and email addresses ("mail@example.com").
|
||||
// Since there is a user option to skip headings, we make this part of the expression
|
||||
// optional. Note that in order to use preg_split(), it is important to have only one
|
||||
// capturing subpattern (which precludes the use of conditional subpatterns).
|
||||
( $this->config->parseHeadings ) ? $delimiter = '' : $delimiter = '=+.+?=+|';
|
||||
$urlPattern = '[a-z]+?\:\/\/(?:\S+\.)+\S+(?:\/.*)?';
|
||||
$this->splitter = '/(' . // exclude from linking:
|
||||
'\[\[.*?\]\]|' . // links
|
||||
$delimiter . // titles (if requested)
|
||||
$templatesDelimiter . // templates (if requested)
|
||||
'^ .+?\n|\n .+?\n|\n .+?$|^ .+?$|' . // preformatted text
|
||||
'<nowiki>.*?<.nowiki>|<code>.*?<\/code>|' . // nowiki/code
|
||||
'<pre>.*?<\/pre>|<html>.*?<\/html>|' . // pre/html
|
||||
'<script>.*?<\/script>|' . // script
|
||||
'<gallery>.*?<\/gallery>|' . // gallery
|
||||
'<div.+?>|<\/div>|' . // attributes of div elements
|
||||
'<span.+?>|<\/span>|' . // attributes of span elements
|
||||
'<file>[^<]*<\/file>|' . // stuff inside file elements
|
||||
'style=".+?"|class=".+?"|' . // styles and classes (e.g. of wikitables)
|
||||
'<noautolinks>.*?<\/noautolinks>|' . // custom tag 'noautolinks'
|
||||
'\[' . $urlPattern . '\s.+?\]|'. $urlPattern . '(?=\s|$)|' . // urls
|
||||
'(?<=\b)\S+\@(?:\S+\.)+\S+(?=\b)' . // email addresses
|
||||
')/ismS';
|
||||
}
|
||||
}
|
194
includes/Target.php
Normal file
194
includes/Target.php
Normal file
@ -0,0 +1,194 @@
|
||||
<?php
|
||||
/**
|
||||
* The LinkTitles\Target represents a Wiki page that is a potential link target.
|
||||
*
|
||||
* Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*
|
||||
* @author Daniel Kraus <bovender@bovender.de>
|
||||
*/
|
||||
namespace LinkTitles;
|
||||
|
||||
/**
|
||||
* Represents a page that is a potential link target.
|
||||
*/
|
||||
class Target {
|
||||
/**
|
||||
* A Title object for the target page currently being examined.
|
||||
* @var \Title $title
|
||||
*/
|
||||
private $title;
|
||||
|
||||
/**
|
||||
* Caches the target page content as a \Content object.
|
||||
*
|
||||
* @var \Content $content
|
||||
*/
|
||||
private $content;
|
||||
|
||||
/**
|
||||
* Regex that matches the start of a word; this expression depends on the
|
||||
* setting of LinkTitles\Config->wordStartOnly;
|
||||
* @var String $wordStart
|
||||
*/
|
||||
public $wordStart;
|
||||
|
||||
/**
|
||||
* Regex that matches the end of a word; this expression depends on the
|
||||
* setting of LinkTitles\Config->wordEndOnly;
|
||||
* @var String $wordEnd
|
||||
*/
|
||||
public $wordEnd;
|
||||
|
||||
/**
|
||||
* LinkTitles configuration.
|
||||
* @var Config $config
|
||||
*/
|
||||
private $config;
|
||||
|
||||
/**
|
||||
* Constructs a new Target object
|
||||
*
|
||||
* The parameters may be taken from database rows, for example.
|
||||
*
|
||||
* @param Int $nameSpace Name space of the target page
|
||||
* @param String &$title Title of the target page
|
||||
*/
|
||||
public function __construct( $nameSpace, $title, Config &$config ) {
|
||||
$this->title = \Title::makeTitleSafe( $nameSpace, $title );
|
||||
$this->titleValue = $this->title->getTitleValue();
|
||||
$this->config = $config;
|
||||
|
||||
// Use unicode character properties rather than \b escape sequences
|
||||
// to detect whole words containing non-ASCII characters as well.
|
||||
// Note that this requires a PCRE library that was compiled with
|
||||
// --enable-unicode-properties
|
||||
( $config->wordStartOnly ) ? $this->wordStart = '(?<!\pL)' : $this->wordStart = '';
|
||||
( $config->wordEndOnly ) ? $this->wordEnd = '(?!\pL)' : $this->wordEnd = '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the string representation of the target title.
|
||||
* @return String title text
|
||||
*/
|
||||
public function getTitleText() {
|
||||
return $this->title->getText();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the title string with certain characters escaped that may interfere
|
||||
* with regular expressions.
|
||||
* @return String representation of the title, regex-safe
|
||||
*/
|
||||
public function getRegexSafeTitle() {
|
||||
return preg_quote( $this->title->getText(), '/' );
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a regular expression of the title
|
||||
* @return String regular expression for this title.
|
||||
*/
|
||||
public function getCaseSensitiveRegex() {
|
||||
$regexSafeTitle = $this->getRegexSafeTitle();
|
||||
|
||||
// Depending on the $config->capitalLinks setting,
|
||||
// the title has to be searched for either in a strictly case-sensitive
|
||||
// way, or in a 'fuzzy' way where the first letter of the title may
|
||||
// be either case.
|
||||
//
|
||||
if ( $this->config->capitalLinks && ( $regexSafeTitle[0] != '\\' )) {
|
||||
$searchTerm = '((?i)' . $regexSafeTitle[0] . '(?-i)' . substr($regexSafeTitle, 1) . ')';
|
||||
} else {
|
||||
$searchTerm = '(' . $regexSafeTitle . ')';
|
||||
}
|
||||
|
||||
return $this->buildRegex( $searchTerm );
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a regular expression pattern for the title in a case-insensitive
|
||||
* way.
|
||||
* @return String case-insensitive regular expression pattern for the title
|
||||
*/
|
||||
public function getCaseInsensitiveRegex() {
|
||||
return $this->buildRegex( $this->getRegexSafeTitle() ) . 'i';
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the basic regex that is used to match target page titles in a source
|
||||
* text.
|
||||
* @param String $searchTerm Target page title (special characters must be quoted)
|
||||
* @return String regular expression pattern
|
||||
*/
|
||||
private function buildRegex( $searchTerm ) {
|
||||
return '/(?<![\:\.\@\/\?\&])' . $this->wordStart . $searchTerm . $this->wordEnd . '/S';
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the \Content of the target page.
|
||||
*
|
||||
* The value is cached.
|
||||
* @return \Content Content of the Target page.
|
||||
*/
|
||||
public function getContent() {
|
||||
if ( $this->content === null ) {
|
||||
$this->content = \WikiPage::factory( $this->title )->getContent();
|
||||
};
|
||||
return $this->content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Examines the current target page. Returns true if it may be linked;
|
||||
* false if not. This depends on two settings:
|
||||
* $wgLinkTitlesCheckRedirect and $wgLinkTitlesEnableNoTargetMagicWord
|
||||
* and whether the target page is a redirect or contains the
|
||||
* __NOAUTOLINKTARGET__ magic word.
|
||||
*
|
||||
* @param \Title $fromTitle
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function mayLinkTo( \Title $fromTitle ) {
|
||||
// If checking for redirects is enabled and the target page does
|
||||
// indeed redirect to the current page, return the page title as-is
|
||||
// (unlinked).
|
||||
if ( $this->config->checkRedirect ) {
|
||||
$redirectTitle = $this->getContent()->getUltimateRedirectTarget();
|
||||
if ( $redirectTitle && $redirectTitle->equals( $fromTitle ) ) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
// If the magic word __NOAUTOLINKTARGET__ is enabled and the target
|
||||
// page does indeed contain this magic word, return the page title
|
||||
// as-is (unlinked).
|
||||
if ( $this->config->enableNoTargetMagicWord ) {
|
||||
if ( $this->getContent()->matchMagicWord( \MagicWord::get('MAG_LINKTITLES_NOTARGET') ) ) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the Target's title is the same as another title.
|
||||
* @param Title $otherTitle Other title
|
||||
* @return boolean True if the $otherTitle is the same, false if not.
|
||||
*/
|
||||
public function isSameTitle( \Title $otherTitle) {
|
||||
return $this->title->equals( $otherTitle );
|
||||
}
|
||||
}
|
142
includes/Targets.php
Normal file
142
includes/Targets.php
Normal file
@ -0,0 +1,142 @@
|
||||
<?php
|
||||
/**
|
||||
* The LinkTitles\Targets class.
|
||||
*
|
||||
* Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*
|
||||
* @author Daniel Kraus <bovender@bovender.de>
|
||||
*/
|
||||
namespace LinkTitles;
|
||||
|
||||
/**
|
||||
* Fetches potential target page titles from the database.
|
||||
*/
|
||||
class Targets {
|
||||
private static $instance;
|
||||
|
||||
/**
|
||||
* Singleton factory that returns a (cached) database query results with
|
||||
* potential target page titles.
|
||||
*
|
||||
* The subset of pages that may serve as target pages depends on the
|
||||
* name space of the source page. Therefore, if the $nameSpace differs from
|
||||
* the cached name space, the database is queried again.
|
||||
*
|
||||
* @param String $nameSpace The namespace of the current page.
|
||||
* @param Config $config LinkTitles configuration.
|
||||
*/
|
||||
public static function default( \Title $title, Config $config ) {
|
||||
if ( ( self::$instance === null ) || ( self::$instance->nameSpace != $title->getNamespace() ) ) {
|
||||
self::$instance = new Targets( $title, $config );
|
||||
}
|
||||
return self::$instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Invalidates the cache; the next call of Targets::default() will trigger
|
||||
* a database query.
|
||||
*
|
||||
* Use this in unit tests which are performed in a single request cycle so that
|
||||
* changes to the pages list may not be picked up by the cached Targets instance.
|
||||
*/
|
||||
public static function invalidate() {
|
||||
self::$instance = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds the results of a database query for target page titles, filtered
|
||||
* and sorted.
|
||||
* @var IResultWrapper $queryResult
|
||||
*/
|
||||
public $queryResult;
|
||||
|
||||
/**
|
||||
* Holds the name space (integer) for which the list of target pages was built.
|
||||
* @var Int $nameSpace
|
||||
*/
|
||||
public $nameSpace;
|
||||
|
||||
private $config;
|
||||
|
||||
/**
|
||||
* The constructor is private to enforce using the singleton pattern.
|
||||
* @param \Title $title
|
||||
*/
|
||||
private function __construct( \Title $title, Config $config) {
|
||||
$this->config = $config;
|
||||
$this->nameSpace = $title->getNameSpace();
|
||||
$this->fetch();
|
||||
}
|
||||
|
||||
//
|
||||
/**
|
||||
* Fetches the page titles from the database.
|
||||
*/
|
||||
private function fetch() {
|
||||
|
||||
( $this->config->preferShortTitles ) ? $sortOrder = 'ASC' : $sortOrder = 'DESC';
|
||||
// Build a blacklist of pages that are not supposed to be link
|
||||
// targets. This includes the current page.
|
||||
$blackList = str_replace( ' ', '_', '("' . implode( '","',$this->config->blackList ) . '")' );
|
||||
|
||||
// Build our weight list. Make sure current namespace is first element
|
||||
$nameSpaces = array_diff( $this->config->nameSpaces, [ $this->nameSpace ] );
|
||||
array_unshift( $nameSpaces, $this->nameSpace );
|
||||
|
||||
// No need for sanitiy check. we are sure that we have at least one element in the array
|
||||
$weightSelect = "CASE page_namespace ";
|
||||
$currentWeight = 0;
|
||||
foreach ($nameSpaces as &$nameSpaceValue) {
|
||||
$currentWeight = $currentWeight + 100;
|
||||
$weightSelect = $weightSelect . " WHEN " . $nameSpaceValue . " THEN " . $currentWeight . PHP_EOL;
|
||||
}
|
||||
$weightSelect = $weightSelect . " END ";
|
||||
$nameSpacesClause = '(' . implode( ', ', $nameSpaces ) . ')';
|
||||
|
||||
// Build an SQL query and fetch all page titles ordered by length from
|
||||
// shortest to longest. Only titles from 'normal' pages (namespace uid
|
||||
// = 0) are returned. Since the db may be sqlite, we need a try..catch
|
||||
// structure because sqlite does not support the CHAR_LENGTH function.
|
||||
$dbr = wfGetDB( DB_SLAVE );
|
||||
try {
|
||||
$this->queryResult = $dbr->select(
|
||||
'page',
|
||||
array( 'page_title', 'page_namespace' , "weight" => $weightSelect),
|
||||
array(
|
||||
'page_namespace IN ' . $nameSpacesClause,
|
||||
'CHAR_LENGTH(page_title) >= ' . $this->config->minimumTitleLength,
|
||||
'page_title NOT IN ' . $blackList,
|
||||
),
|
||||
__METHOD__,
|
||||
array( 'ORDER BY' => 'weight ASC, CHAR_LENGTH(page_title) ' . $sortOrder )
|
||||
);
|
||||
} catch (Exception $e) {
|
||||
$this->queryResult = $dbr->select(
|
||||
'page',
|
||||
array( 'page_title', 'page_namespace' , "weight" => $weightSelect ),
|
||||
array(
|
||||
'page_namespace IN ' . $nameSpacesClause,
|
||||
'LENGTH(page_title) >= ' . $this->config->minimumTitleLength,
|
||||
'page_title NOT IN ' . $blackList,
|
||||
),
|
||||
__METHOD__,
|
||||
array( 'ORDER BY' => 'weight ASC, LENGTH(page_title) ' . $sortOrder )
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,21 +1,23 @@
|
||||
<?php
|
||||
/*
|
||||
* Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> @bovender
|
||||
/**
|
||||
* LinkTitles command line interface (CLI)/maintenance script
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
* Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> @bovender
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
namespace LinkTitles;
|
||||
|
||||
@ -44,17 +46,21 @@ else
|
||||
}
|
||||
};
|
||||
|
||||
require_once( __DIR__ . "/includes/LinkTitles_Extension.php" );
|
||||
require_once( __DIR__ . "/includes/Extension.php" );
|
||||
|
||||
/// Core class of the maintanance script.
|
||||
/// @note Note that the execution of maintenance scripts is prohibited for
|
||||
/// an Apache web server due to a `.htaccess` file that declares `deny from
|
||||
/// all`. Other webservers may exhibit different behavior. Be aware that
|
||||
/// anybody who is able to execute this script may place a high load on the
|
||||
/// server.
|
||||
/// @ingroup batch
|
||||
/**
|
||||
* Core class of the maintanance script.
|
||||
* @note Note that the execution of maintenance scripts is prohibited for
|
||||
* an Apache web server due to a `.htaccess` file that declares `deny from
|
||||
* all`. Other webservers may exhibit different behavior. Be aware that
|
||||
* anybody who is able to execute this script may place a high load on the
|
||||
* server.
|
||||
* @ingroup batch
|
||||
*/
|
||||
class Cli extends \Maintenance {
|
||||
/// The constructor adds a description and one option.
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
public function __construct() {
|
||||
parent::__construct();
|
||||
$this->addDescription("Iterates over wiki pages and automatically adds links to other pages.");
|
||||
@ -65,41 +71,45 @@ class Cli extends \Maintenance {
|
||||
true, // requires argument
|
||||
"s"
|
||||
);
|
||||
$this->addOption(
|
||||
$this->addOption(
|
||||
"page",
|
||||
"page name to process",
|
||||
false, // not required
|
||||
true, // requires argument
|
||||
"p"
|
||||
);
|
||||
$this->addOption(
|
||||
"log",
|
||||
"enables logging to console",
|
||||
false, // not required
|
||||
false, // requires no argument
|
||||
"l"
|
||||
);
|
||||
$this->addOption(
|
||||
"debug",
|
||||
"enables debug logging to console",
|
||||
false, // not required
|
||||
false // requires no argument
|
||||
);
|
||||
// TODO: Add back logging options.
|
||||
// TODO: Add configuration options.
|
||||
// $this->addOption(
|
||||
// "log",
|
||||
// "enables logging to console",
|
||||
// false, // not required
|
||||
// false, // requires no argument
|
||||
// "l"
|
||||
// );
|
||||
// $this->addOption(
|
||||
// "debug",
|
||||
// "enables debug logging to console",
|
||||
// false, // not required
|
||||
// false // requires no argument
|
||||
// );
|
||||
}
|
||||
|
||||
/// Main function of the maintenance script.
|
||||
/// Will iterate over all pages in the wiki (starting at a certain index,
|
||||
/// if the `--start` option is given) and call LinkTitles::processPage() for
|
||||
/// each page.
|
||||
/*
|
||||
* Main function of the maintenance script.
|
||||
* Will iterate over all pages in the wiki (starting at a certain index,
|
||||
* if the `--start` option is given) and call LinkTitles::processPage() for
|
||||
* each page.
|
||||
*/
|
||||
public function execute() {
|
||||
if ($this->hasOption('log'))
|
||||
{
|
||||
Extension::$ltConsoleOutput = true;
|
||||
}
|
||||
if ($this->hasOption('debug'))
|
||||
{
|
||||
Extension::$ltConsoleOutputDebug = true;
|
||||
}
|
||||
// if ($this->hasOption('log'))
|
||||
// {
|
||||
// Extension::$ltConsoleOutput = true;
|
||||
// }
|
||||
// if ($this->hasOption('debug'))
|
||||
// {
|
||||
// Extension::$ltConsoleOutputDebug = true;
|
||||
// }
|
||||
if ( $this->hasOption('page') ) {
|
||||
if ( !$this->hasOption( 'start' ) ) {
|
||||
$this->singlePage();
|
||||
@ -113,10 +123,14 @@ class Cli extends \Maintenance {
|
||||
if ( $startIndex < 0 ) {
|
||||
$this->error( 'FATAL: Start index must be 0 or greater.', 1 );
|
||||
};
|
||||
$this->allPages( $startIndex);
|
||||
$this->allPages( $startIndex );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes a single page.
|
||||
* @return bool True on success, false on failure.
|
||||
*/
|
||||
private function singlePage() {
|
||||
$pageName = strval( $this->getOption( 'page' ) );
|
||||
$this->output( "Processing single page: '$pageName'\n" );
|
||||
@ -131,17 +145,22 @@ class Cli extends \Maintenance {
|
||||
return $success;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process all pages in the Wiki.
|
||||
* @param integer $index Index of the start page.
|
||||
* @return bool True on success, false on failure.
|
||||
*/
|
||||
private function allPages( $index = 0 ) {
|
||||
global $wgLinkTitlesNamespaces;
|
||||
$config = new Config();
|
||||
|
||||
// Retrieve page names from the database.
|
||||
$dbr = $this->getDB( DB_SLAVE );
|
||||
$namespacesClause = str_replace( '_', ' ','(' . implode( ', ', $wgLinkTitlesNamespaces ) . ')' );
|
||||
$nameSpacesClause = str_replace( '_', ' ','(' . implode( ', ', $config->nameSpaces ) . ')' );
|
||||
$res = $dbr->select(
|
||||
'page',
|
||||
array( 'page_title', 'page_namespace' ),
|
||||
array(
|
||||
'page_namespace IN ' . $namespacesClause,
|
||||
'page_namespace IN ' . $nameSpacesClause,
|
||||
),
|
||||
__METHOD__,
|
||||
array(
|
20
tests/phpunit/ConfigTest.php
Normal file
20
tests/phpunit/ConfigTest.php
Normal file
@ -0,0 +1,20 @@
|
||||
<?php
|
||||
/**
|
||||
* Tests the LinkTitles\Config class.
|
||||
*
|
||||
* This single unit test basically serves to ensure the Config class is working.
|
||||
* @group bovender
|
||||
* @group Database
|
||||
*/
|
||||
class ConfigTest extends LinkTitles\TestCase {
|
||||
|
||||
public function testParseOnEdit() {
|
||||
$this->setMwGlobals( [
|
||||
'wgLinkTitlesParseOnEdit' => true,
|
||||
'wgLinkTitlesParseOnRender' => false
|
||||
] );
|
||||
$config = new LinkTitles\Config();
|
||||
global $wgLinkTitlesParseOnEdit;
|
||||
$this->assertSame( $config->parseOnEdit, $wgLinkTitlesParseOnEdit );
|
||||
}
|
||||
}
|
27
tests/phpunit/ParseOnEditTest.php
Normal file
27
tests/phpunit/ParseOnEditTest.php
Normal file
@ -0,0 +1,27 @@
|
||||
<?php
|
||||
/**
|
||||
* @group bovender
|
||||
* @group Database
|
||||
*/
|
||||
class ParseOnEditTest extends LinkTitles\TestCase {
|
||||
|
||||
public function testParseOnEdit() {
|
||||
$this->setMwGlobals( [
|
||||
'wgLinkTitlesParseOnEdit' => true,
|
||||
'wgLinkTitlesParseOnRender' => false
|
||||
] );
|
||||
$pageId = $this->insertPage( 'test page', 'This page should link to the link target but not to test page' )['id'];
|
||||
$page = WikiPage::newFromId( $pageId );
|
||||
$this->assertSame( 'This page should link to the [[link target]] but not to test page', self::getPageText( $page ) );
|
||||
}
|
||||
|
||||
public function testDoNotParseOnEdit() {
|
||||
$this->setMwGlobals( [
|
||||
'wgLinkTitlesParseOnEdit' => false,
|
||||
'wgLinkTitlesParseOnRender' => false
|
||||
] );
|
||||
$pageId = $this->insertPage( 'test page', 'This page should not link to the link target' )['id'];
|
||||
$page = WikiPage::newFromId( $pageId );
|
||||
$this->assertSame( 'This page should not link to the link target', self::getPageText( $page ) );
|
||||
}
|
||||
}
|
31
tests/phpunit/SplitterTest.php
Normal file
31
tests/phpunit/SplitterTest.php
Normal file
@ -0,0 +1,31 @@
|
||||
<?php
|
||||
/**
|
||||
* @group bovender
|
||||
*/
|
||||
class SplitterTest extends MediaWikiTestCase {
|
||||
/**
|
||||
* @dataProvider provideSplitData
|
||||
*/
|
||||
public function testSplit( $input, $expectedOutput ) {
|
||||
$splitter = LinkTitles\Splitter::default();
|
||||
$this->assertSame( $expectedOutput, $splitter->split( $input ) );
|
||||
}
|
||||
|
||||
// TODO: Add more examples.
|
||||
public static function provideSplitData() {
|
||||
return [
|
||||
[
|
||||
'this may be linked [[this may not be linked]]',
|
||||
[ 'this may be linked ', '[[this may not be linked]]', '' ]
|
||||
],
|
||||
[
|
||||
'this may be linked <gallery>this may not be linked</gallery>',
|
||||
[ 'this may be linked ', '<gallery>this may not be linked</gallery>', '' ]
|
||||
],
|
||||
[
|
||||
'this may be linked {{mytemplate|param={{transcluded}}}}',
|
||||
[ 'this may be linked ', '{{mytemplate|param={{transcluded}}}}', '' ]
|
||||
],
|
||||
];
|
||||
}
|
||||
}
|
40
tests/phpunit/TargetTest.php
Normal file
40
tests/phpunit/TargetTest.php
Normal file
@ -0,0 +1,40 @@
|
||||
<?php
|
||||
/**
|
||||
* @group bovender
|
||||
*/
|
||||
class TargetTest extends MediaWikiTestCase {
|
||||
|
||||
/**
|
||||
* @dataProvider provideStartOnly
|
||||
*/
|
||||
public function testTargetWordStartOnly( $enabled, $delimiter ) {
|
||||
$config = new LinkTitles\Config();
|
||||
$config->wordStartOnly = $enabled;
|
||||
$target = new LinKTitles\Target( NS_MAIN, 'test page', $config );
|
||||
$this->assertSame( $delimiter, $target->wordStart );
|
||||
}
|
||||
|
||||
public static function provideStartOnly() {
|
||||
return [
|
||||
[ true, '(?<!\pL)' ],
|
||||
[ false, '' ]
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider provideEndOnly
|
||||
*/
|
||||
public function testTargetWordEndOnly( $enabled, $delimiter ) {
|
||||
$config = new LinkTitles\Config();
|
||||
$config->wordEndOnly = $enabled;
|
||||
$target = new LinKTitles\Target( NS_MAIN, 'test page', $config );
|
||||
$this->assertSame( $delimiter, $target->wordEnd );
|
||||
}
|
||||
|
||||
public static function provideEndOnly() {
|
||||
return [
|
||||
[ true, '(?!\pL)' ],
|
||||
[ false, '' ]
|
||||
];
|
||||
}
|
||||
}
|
26
tests/phpunit/TargetsTest.php
Normal file
26
tests/phpunit/TargetsTest.php
Normal file
@ -0,0 +1,26 @@
|
||||
<?php
|
||||
/**
|
||||
* Tests the LinkTitles\Targets class.
|
||||
*
|
||||
* @group bovender
|
||||
* @group Database
|
||||
*/
|
||||
class TargetsTest extends LinkTitles\TestCase {
|
||||
|
||||
/**
|
||||
* This test asserts that the list of potential link targets is 0
|
||||
* @return [type] [description]
|
||||
*/
|
||||
public function testTargets() {
|
||||
$title = \Title::newFromText( 'link target' );
|
||||
$targets = LinkTitles\Targets::default( $title, new LinkTitles\Config() );
|
||||
|
||||
// Count number of articles: Inspired by updateArticleCount.php maintenance
|
||||
// script: https://doc.wikimedia.org/mediawiki-core/master/php/updateArticleCount_8php_source.html
|
||||
$dbr = wfGetDB( DB_SLAVE );
|
||||
$counter = new SiteStatsInit( $dbr );
|
||||
$count = $counter->pages();
|
||||
|
||||
$this->assertEquals( $targets->queryResult->numRows(), $count );
|
||||
}
|
||||
}
|
19
tests/phpunit/TestCase.php
Normal file
19
tests/phpunit/TestCase.php
Normal file
@ -0,0 +1,19 @@
|
||||
<?php
|
||||
namespace LinkTitles;
|
||||
|
||||
abstract class TestCase extends \MediaWikiTestCase {
|
||||
protected function setUp() {
|
||||
parent::setUp();
|
||||
$this->insertPage( 'link target', 'This page serves as a link target' );
|
||||
Targets::invalidate(); // force re-querying the pages table
|
||||
}
|
||||
|
||||
protected function tearDown() {
|
||||
parent::tearDown();
|
||||
}
|
||||
|
||||
protected function getPageText( \WikiPage $page ) {
|
||||
$content = $page->getContent();
|
||||
return $page->getContentHandler()->serializeContent( $content );
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user