mirror of
				https://github.com/diocloid/LinkTitles.git
				synced 2025-10-22 05:42:32 +02:00 
			
		
		
		
	Merge branch 'unit-tests' into develop
This commit is contained in:
		
							
								
								
									
										1
									
								
								.atomignore
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.atomignore
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1 @@
 | 
			
		||||
gh-pages/
 | 
			
		||||
							
								
								
									
										63
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										63
									
								
								README.md
									
									
									
									
									
								
							@@ -31,3 +31,66 @@ Contributors
 | 
			
		||||
- Daniel Kraus (@bovender), main developer
 | 
			
		||||
- Ulrich Strauss (@c0nnex), namespaces
 | 
			
		||||
- Brent Laabs (@labster), code review and bug fixes
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Testing
 | 
			
		||||
-------
 | 
			
		||||
 | 
			
		||||
Starting from version 4.2.0, LinkTitles finally comes with phpunit tests.
 | 
			
		||||
 | 
			
		||||
Here's how I set up the testing environment. This may not be the canonical way
 | 
			
		||||
to do it. Basic information on testing MediaWiki can be found [here](https://www.mediawiki.org/wiki/Manual:PHP_unit_testing).
 | 
			
		||||
 | 
			
		||||
The following assumes that you have an instance of MediaWiki running locally
 | 
			
		||||
on your development machine. This assumes that you are running Linux (I personally
 | 
			
		||||
use Ubuntu).
 | 
			
		||||
 | 
			
		||||
1. Pull the MediaWiki repository:
 | 
			
		||||
 | 
			
		||||
        cd ~/Code
 | 
			
		||||
        git clone --depth 1 https://phabricator.wikimedia.org/source/mediawiki.git
 | 
			
		||||
 | 
			
		||||
2. Install [composer](https://getcomposer.org) locally and fetch the
 | 
			
		||||
  dependencies (including development dependencies):
 | 
			
		||||
 | 
			
		||||
  Follow the instructions on the [composer download page](https://getcomposer.org/download),
 | 
			
		||||
  but instead of running `php composer-setup.php`, run:
 | 
			
		||||
 | 
			
		||||
        php composer-setup.php --install-dir=bin --filename=composer
 | 
			
		||||
        bin/composer install
 | 
			
		||||
 | 
			
		||||
3. Install phpunit (it was already installed on my Ubuntu system when I began
 | 
			
		||||
  testing LinkTitles, so I leave it up to you to figure out how to do it).
 | 
			
		||||
 | 
			
		||||
4. Copy your `LocalSettings.php` over from your local MediaWiki installation
 | 
			
		||||
  and remove (or comment out) any lines that reference extensions or skins that
 | 
			
		||||
  you are not going to install to your test environment. For the purposes of
 | 
			
		||||
  testing the LinkTitles extension, leave the following line in place:
 | 
			
		||||
 | 
			
		||||
        wfLoadExtensions( array( 'LinkTitles' ));
 | 
			
		||||
 | 
			
		||||
  And ensure the settings file contains the following:
 | 
			
		||||
 | 
			
		||||
        $wgShowDBErrorBacktrace = true;
 | 
			
		||||
 | 
			
		||||
5. Create a symbolic link to your copy of the LinkTitles repository:
 | 
			
		||||
 | 
			
		||||
        cd ~/Code/mediawiki/extensions
 | 
			
		||||
        ln -s ~/Code/LinkTitles
 | 
			
		||||
 | 
			
		||||
6. Make sure your local MediaWiki instance is up to date. Otherwise phpunit may
 | 
			
		||||
  fail and tell you about database problems.
 | 
			
		||||
 | 
			
		||||
  This is because the local database is used as a template for the unit tests.
 | 
			
		||||
  For example, I initially had MW 1.26 installed on my laptop, but the cloned
 | 
			
		||||
  repository was MW 1.29.1. It's probably also possible to clone the repository
 | 
			
		||||
  with a specific version tag which matches your local installation.
 | 
			
		||||
 | 
			
		||||
7. Run the tests:
 | 
			
		||||
 | 
			
		||||
        cd ~/Code/mediawiki/tests/phpunit
 | 
			
		||||
        php phpunit.php --group bovender
 | 
			
		||||
 | 
			
		||||
  This will run all tests from the 'bovender' group, i.e. tests for my extensions.
 | 
			
		||||
  If you linked just the LinkTitles extension in step 5, only this extension
 | 
			
		||||
  will be tested.
 | 
			
		||||
 
 | 
			
		||||
@@ -33,8 +33,14 @@
 | 
			
		||||
                ]
 | 
			
		||||
        },
 | 
			
		||||
        "AutoloadClasses": {
 | 
			
		||||
                "LinkTitles\\Extension": "includes/LinkTitles_Extension.php",
 | 
			
		||||
                "LinkTitles\\Special": "includes/LinkTitles_Special.php"
 | 
			
		||||
                "LinkTitles\\Extension": "includes/Extension.php",
 | 
			
		||||
                "LinkTitles\\Linker": "includes/Linker.php",
 | 
			
		||||
                "LinkTitles\\Target": "includes/Target.php",
 | 
			
		||||
                "LinkTitles\\Targets": "includes/Targets.php",
 | 
			
		||||
                "LinkTitles\\Splitter": "includes/Splitter.php",
 | 
			
		||||
                "LinkTitles\\Config": "includes/Config.php",
 | 
			
		||||
                "LinkTitles\\Special": "includes/Special.php",
 | 
			
		||||
                "LinkTitles\\TestCase": "tests/phpunit/TestCase.php"
 | 
			
		||||
        },
 | 
			
		||||
        "SpecialPages": {
 | 
			
		||||
                "LinkTitles": "LinkTitles\\Special"
 | 
			
		||||
@@ -61,9 +67,8 @@
 | 
			
		||||
                        "LinkTitles\\Extension::onParserFirstCallInit"
 | 
			
		||||
                ]
 | 
			
		||||
        },
 | 
			
		||||
        "callback": "LinkTitles\\Extension::setup",
 | 
			
		||||
        "ExtensionMessagesFiles": {
 | 
			
		||||
                "LinkTitlesMagic": "includes/LinkTitles_Magic.php"
 | 
			
		||||
                "LinkTitlesMagic": "includes/Magic.php"
 | 
			
		||||
        },
 | 
			
		||||
        "MessagesDirs": {
 | 
			
		||||
                "LinkTitles": [
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										196
									
								
								includes/Config.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										196
									
								
								includes/Config.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,196 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * The LinkTitles\Config class holds configuration for the LinkTitles extension.
 | 
			
		||||
 *
 | 
			
		||||
 * Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
 | 
			
		||||
 * This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 * it under the terms of the GNU General Public License as published by
 | 
			
		||||
 * the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 * (at your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 * This program is distributed in the hope that it will be useful,
 | 
			
		||||
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 * GNU General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 * You should have received a copy of the GNU General Public License
 | 
			
		||||
 * along with this program; if not, write to the Free Software
 | 
			
		||||
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
			
		||||
 * MA 02110-1301, USA.
 | 
			
		||||
 *
 | 
			
		||||
 * @author Daniel Kraus <bovender@bovender.de>
 | 
			
		||||
 */
 | 
			
		||||
namespace LinkTitles;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Holds LinkTitles configuration.
 | 
			
		||||
 *
 | 
			
		||||
 * This class encapsulates the global configuration variables so we do not have
 | 
			
		||||
 * to pull those globals into scope in the individual LinkTitles classes.
 | 
			
		||||
 *
 | 
			
		||||
 * Using a dedicated configuration class also facilitates overriding certain
 | 
			
		||||
 * options, i.e. in a maintenance script that is invoked with flags from the
 | 
			
		||||
 * command line.
 | 
			
		||||
 *
 | 
			
		||||
 * @since 5.0.0
 | 
			
		||||
 */
 | 
			
		||||
class Config {
 | 
			
		||||
	/**
 | 
			
		||||
	 * Whether to add links to a page when the page is edited/saved.
 | 
			
		||||
	 * @var bool $parseOnEdit
 | 
			
		||||
	 */
 | 
			
		||||
	public $parseOnEdit;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Whether to add links to a page when the page is rendered.
 | 
			
		||||
	 * @var bool $parseOnRender
 | 
			
		||||
	 */
 | 
			
		||||
	public $parseOnRender;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Indicates whether to prioritize short over long titles.
 | 
			
		||||
	 * @var bool $preferShortTitles
 | 
			
		||||
	 */
 | 
			
		||||
	public $preferShortTitles;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Minimum length of a page title for it to qualify as a potential link target.
 | 
			
		||||
	 * @var int $minimumTitleLength
 | 
			
		||||
	 */
 | 
			
		||||
	public $minimumTitleLength;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Array of page titles that must never be link targets.
 | 
			
		||||
	 *
 | 
			
		||||
	 * This may be useful to exclude common abbreviations or acronyms from
 | 
			
		||||
	 * automatic linking.
 | 
			
		||||
	 * @var Array $blackList
 | 
			
		||||
	 */
 | 
			
		||||
	public $blackList;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Array of those name spaces (integer constants) whose pages may be linked.
 | 
			
		||||
	 * @var Array $nameSpaces
 | 
			
		||||
	 */
 | 
			
		||||
	public $nameSpaces;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Indicates whether to add a link to the first occurrence of a page title
 | 
			
		||||
	 * only (true), or add links to all occurrences on the source page (false).
 | 
			
		||||
	 * @var bool $firstOnly;
 | 
			
		||||
	 */
 | 
			
		||||
	public $firstOnly;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Indicates whether to operate in smart mode, i.e. link to pages even if the
 | 
			
		||||
	 * case does not match. Without smart mode, pages are linked to only if the
 | 
			
		||||
	 * exact title appears on the source page.
 | 
			
		||||
	 * @var bool $smartMode;
 | 
			
		||||
	 */
 | 
			
		||||
	public $smartMode;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Mirrors the global MediaWiki variable $wgCapitalLinks that indicates
 | 
			
		||||
	 * whether or not page titles are fully case sensitive
 | 
			
		||||
	 * @var bool $capitalLinks;
 | 
			
		||||
	 */
 | 
			
		||||
	public $capitalLinks;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Whether or not to link to pages only if the page title appears at the
 | 
			
		||||
	 * start of a word on the target page (i.e., link 'MediaWiki' to a page
 | 
			
		||||
	 * 'Media', but not to a page 'Wiki').
 | 
			
		||||
	 *
 | 
			
		||||
	 * Set both $wordStartOnly and $wordEndOnly to true to enforce matching
 | 
			
		||||
	 * whole titles.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @var bool $wordStartOnly;
 | 
			
		||||
	 */
 | 
			
		||||
	public $wordStartOnly;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Whether or not to link to pages only if the page title appears at the
 | 
			
		||||
	 * end of a word on the target page (i.e., link 'MediaWiki' to a page
 | 
			
		||||
	 * 'Wiki', but not to a page 'Media').
 | 
			
		||||
	 *
 | 
			
		||||
	 * Set both $wordStartOnly and $wordEndOnly to true to enforce matching
 | 
			
		||||
	 * whole titles.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @var bool $wordEndOnly;
 | 
			
		||||
	 */
 | 
			
		||||
	public $wordEndOnly;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Whether or not to skip templates. If set to true, text inside transclusions
 | 
			
		||||
	 * will not be linked.
 | 
			
		||||
	 * @var bool $skipTemplates
 | 
			
		||||
	 */
 | 
			
		||||
	public $skipTemplates;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Whether or not to parse headings.
 | 
			
		||||
	 * @var bool $parseHeadings
 | 
			
		||||
	 */
 | 
			
		||||
	public $parseHeadings;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Whether to check if a potential target page links back to the source page.
 | 
			
		||||
	 * Set this to true to avoid indirect linkbacks.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @var bool $checkRedirect
 | 
			
		||||
	 */
 | 
			
		||||
	public $checkRedirect;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Whether to enable the __NOAUTOLINKTARGET__ magic word which prevents
 | 
			
		||||
	 * a potential target page from being linked to.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @var bool $enableNoTargetMagicWord
 | 
			
		||||
	 */
 | 
			
		||||
	public $enableNoTargetMagicWord;
 | 
			
		||||
 | 
			
		||||
	public $enableConsoleOutput;
 | 
			
		||||
	public $enableDebugConsoleOutput;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Constructs a new Config object.
 | 
			
		||||
	 *
 | 
			
		||||
	 * The object's member variables will automatically be set with the values
 | 
			
		||||
	 * from the corresponding global variables.
 | 
			
		||||
	 */
 | 
			
		||||
	public function __construct() {
 | 
			
		||||
		global $wgLinkTitlesParseOnEdit;
 | 
			
		||||
		global $wgLinkTitlesParseOnRender;
 | 
			
		||||
		global $wgLinkTitlesPreferShortTitles;
 | 
			
		||||
		global $wgLinkTitlesMinimumTitleLength;
 | 
			
		||||
		global $wgLinkTitlesBlackList;
 | 
			
		||||
		global $wgLinkTitlesNamespaces;
 | 
			
		||||
		global $wgLinkTitlesFirstOnly;
 | 
			
		||||
		global $wgLinkTitlesSmartMode;
 | 
			
		||||
		global $wgCapitalLinks;
 | 
			
		||||
		global $wgLinkTitlesWordStartOnly;
 | 
			
		||||
		global $wgLinkTitlesWordEndOnly;
 | 
			
		||||
		global $wgLinkTitlesSkipTemplates;
 | 
			
		||||
		global $wgLinkTitlesParseHeadings;
 | 
			
		||||
		global $wgLinkTitlesEnableNoTargetMagicWord;
 | 
			
		||||
		global $wgLinkTitlesCheckRedirect;
 | 
			
		||||
		$this->parseOnEdit = $wgLinkTitlesParseOnEdit;
 | 
			
		||||
		$this->parseOnRender = $wgLinkTitlesParseOnRender;
 | 
			
		||||
		$this->preferShortTitles = $wgLinkTitlesPreferShortTitles;
 | 
			
		||||
		$this->minimumTitleLength = $wgLinkTitlesMinimumTitleLength;
 | 
			
		||||
		$this->blackList = $wgLinkTitlesBlackList;
 | 
			
		||||
		$this->nameSpaces = $wgLinkTitlesNamespaces;
 | 
			
		||||
		$this->firstOnly = $wgLinkTitlesFirstOnly;
 | 
			
		||||
		$this->smartMode = $wgLinkTitlesSmartMode;
 | 
			
		||||
		$this->capitalLinks = $wgCapitalLinks; // MediaWiki global variable
 | 
			
		||||
		$this->wordStartOnly = $wgLinkTitlesWordStartOnly;
 | 
			
		||||
		$this->wordEndOnly = $wgLinkTitlesWordEndOnly;
 | 
			
		||||
		$this->skipTemplates = $wgLinkTitlesSkipTemplates;
 | 
			
		||||
		$this->parseHeadings = $wgLinkTitlesParseHeadings;
 | 
			
		||||
		$this->enableNoTargetMagicWord = $wgLinkTitlesEnableNoTargetMagicWord;;
 | 
			
		||||
		$this->checkRedirect = $wgLinkTitlesCheckRedirect;;
 | 
			
		||||
		$this->enableConsoleOutput = false;
 | 
			
		||||
		$this->enableDebugConsoleOutput = false;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										147
									
								
								includes/Extension.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								includes/Extension.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,147 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * The LinkTitles\Extension class provides event handlers and entry points for the extension.
 | 
			
		||||
 *
 | 
			
		||||
 * Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
 | 
			
		||||
 *
 | 
			
		||||
 * This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 * it under the terms of the GNU General Public License as published by
 | 
			
		||||
 * the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 * (at your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 * This program is distributed in the hope that it will be useful,
 | 
			
		||||
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 * GNU General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 * You should have received a copy of the GNU General Public License
 | 
			
		||||
 * along with this program; if not, write to the Free Software
 | 
			
		||||
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
			
		||||
 * MA 02110-1301, USA.
 | 
			
		||||
 *
 | 
			
		||||
 * @author Daniel Kraus <bovender@bovender.de>
 | 
			
		||||
 */
 | 
			
		||||
namespace LinkTitles;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Provides event handlers and entry points for the extension.
 | 
			
		||||
 */
 | 
			
		||||
class Extension {
 | 
			
		||||
 | 
			
		||||
	/// Event handler that is hooked to the PageContentSave event.
 | 
			
		||||
	public static function onPageContentSave( &$wikiPage, &$user, &$content, &$summary,
 | 
			
		||||
			$isMinor, $isWatch, $section, &$flags, &$status ) {
 | 
			
		||||
		global $wgLinkTitlesParseOnEdit;
 | 
			
		||||
		global $wgLinkTitlesNamespaces;
 | 
			
		||||
		if ( !$wgLinkTitlesParseOnEdit ) return true; // TODO: refactor with following if
 | 
			
		||||
 | 
			
		||||
		if ( !$isMinor ) {
 | 
			
		||||
			$title = $wikiPage->getTitle();
 | 
			
		||||
 | 
			
		||||
			// Only process if page is in one of our namespaces we want to link
 | 
			
		||||
			// Fixes ugly autolinking of sidebar pages
 | 
			
		||||
			if ( in_array( $title->getNamespace(), $wgLinkTitlesNamespaces )) {
 | 
			
		||||
				$text = $content->getContentHandler()->serializeContent( $content );
 | 
			
		||||
				if ( !\MagicWord::get( 'MAG_LINKTITLES_NOAUTOLINKS' )->match( $text ) ) {
 | 
			
		||||
					$config = new Config();
 | 
			
		||||
					$linker = new Linker( $config );
 | 
			
		||||
					$newText = $linker->linkContent( $title, $text );
 | 
			
		||||
					if ( $newText != $text ) {
 | 
			
		||||
						$content = $content->getContentHandler()->unserializeContent( $newText );
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		};
 | 
			
		||||
		return true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Event handler that is hooked to the InternalParseBeforeLinks event.
 | 
			
		||||
	 * @param Parser $parser Parser that raised the event.
 | 
			
		||||
	 * @param $text          Preprocessed text of the page
 | 
			
		||||
	 */
 | 
			
		||||
	public static function onInternalParseBeforeLinks( \Parser &$parser, &$text ) {
 | 
			
		||||
		$config = new Config();
 | 
			
		||||
		if (!$config->parseOnRender) return true;
 | 
			
		||||
		$title = $parser->getTitle();
 | 
			
		||||
 | 
			
		||||
		// If the page contains the magic word '__NOAUTOLINKS__', do not parse it.
 | 
			
		||||
		// Only process if page is in one of our namespaces we want to link
 | 
			
		||||
		if ( !\MagicWord::get( 'MAG_LINKTITLES_NOAUTOLINKS' )->match( $text ) &&
 | 
			
		||||
				in_array( $title->getNamespace(), $config->nameSpaces ) ) {
 | 
			
		||||
			$linker = new Linker( $config );
 | 
			
		||||
			$text = $linker->linkContent( $title, $text );
 | 
			
		||||
		}
 | 
			
		||||
		return true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Automatically processes a single page, given a $title Title object.
 | 
			
		||||
	 * This function is called by the SpecialLinkTitles class and the
 | 
			
		||||
	 * LinkTitlesJob class.
 | 
			
		||||
	 * @param Title $title Title object.
 | 
			
		||||
	 * @param RequestContext $context Current request context. If in doubt, call MediaWiki's `RequestContext::getMain()` to obtain such an object.
 | 
			
		||||
	 * @returns bool True if the page exists, false if the page does not exist
 | 
			
		||||
	 */
 | 
			
		||||
	public static function processPage( \Title $title, \RequestContext $context ) {
 | 
			
		||||
		$page = \WikiPage::factory($title);
 | 
			
		||||
		$content = $page->getContent();
 | 
			
		||||
		if ( $content != null ) {
 | 
			
		||||
			$text = $content->getContentHandler()->serializeContent($content);
 | 
			
		||||
			$config = new Config();
 | 
			
		||||
			$linker = new Linker( $config );
 | 
			
		||||
			$newText = $linker->linkContent($title, $text);
 | 
			
		||||
			if ( $text != $newText ) {
 | 
			
		||||
				$content = $content->getContentHandler()->unserializeContent( $newText );
 | 
			
		||||
				$page->doEditContent(
 | 
			
		||||
					$content,
 | 
			
		||||
					"Links to existing pages added by LinkTitles bot.", // TODO: i18n
 | 
			
		||||
					EDIT_MINOR | EDIT_FORCE_BOT,
 | 
			
		||||
					false, // baseRevId
 | 
			
		||||
					$context->getUser()
 | 
			
		||||
				);
 | 
			
		||||
			};
 | 
			
		||||
			return true;
 | 
			
		||||
		}
 | 
			
		||||
		else {
 | 
			
		||||
			return false;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Adds the two magic words defined by this extension to the list of
 | 
			
		||||
	/// 'double-underscore' terms that are automatically removed before a
 | 
			
		||||
	/// page is displayed.
 | 
			
		||||
	/// @param $doubleUnderscoreIDs Array of magic word IDs.
 | 
			
		||||
	/// @return true
 | 
			
		||||
	public static function onGetDoubleUnderscoreIDs( array &$doubleUnderscoreIDs ) {
 | 
			
		||||
		$doubleUnderscoreIDs[] = 'MAG_LINKTITLES_NOTARGET';
 | 
			
		||||
		$doubleUnderscoreIDs[] = 'MAG_LINKTITLES_NOAUTOLINKS';
 | 
			
		||||
		return true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	public static function onParserFirstCallInit( \Parser $parser ) {
 | 
			
		||||
		$parser->setHook( 'noautolinks', 'LinkTitles\Extension::doNoautolinksTag' );
 | 
			
		||||
		$parser->setHook( 'autolinks', 'LinkTitles\Extension::doAutolinksTag' );
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	///	Removes the extra tag that this extension provides (<noautolinks>)
 | 
			
		||||
	///	by simply returning the text between the tags (if any).
 | 
			
		||||
	///	See https://www.mediawiki.org/wiki/Manual:Tag_extensions#Example
 | 
			
		||||
	public static function doNoautolinksTag( $input, array $args, \Parser $parser, \PPFrame $frame ) {
 | 
			
		||||
		return htmlspecialchars( $input );
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	///	Removes the extra tag that this extension provides (<noautolinks>)
 | 
			
		||||
	///	by simply returning the text between the tags (if any).
 | 
			
		||||
	///	See https://www.mediawiki.org/wiki/Manual:Tag_extensions#How_do_I_render_wikitext_in_my_extension.3F
 | 
			
		||||
	public static function doAutolinksTag( $input, array $args, \Parser $parser, \PPFrame $frame ) {
 | 
			
		||||
		$config = new Config();
 | 
			
		||||
		$linker = new Linker( $config );
 | 
			
		||||
		$title = $parser->getTitle();
 | 
			
		||||
		$withLinks = $linker->linkContent( $title, $input );
 | 
			
		||||
		$output = $parser->recursiveTagParse( $withLinks, $frame );
 | 
			
		||||
		return $output;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// vim: ts=2:sw=2:noet:comments^=\:///
 | 
			
		||||
@@ -1,527 +0,0 @@
 | 
			
		||||
<?php
 | 
			
		||||
/*
 | 
			
		||||
 *      Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
 | 
			
		||||
 *
 | 
			
		||||
 *      This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 *      it under the terms of the GNU General Public License as published by
 | 
			
		||||
 *      the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 *      (at your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 *      This program is distributed in the hope that it will be useful,
 | 
			
		||||
 *      but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 *      GNU General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 *      You should have received a copy of the GNU General Public License
 | 
			
		||||
 *      along with this program; if not, write to the Free Software
 | 
			
		||||
 *      Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
			
		||||
 *      MA 02110-1301, USA.
 | 
			
		||||
 */
 | 
			
		||||
/// @file
 | 
			
		||||
namespace LinkTitles;
 | 
			
		||||
 | 
			
		||||
/// Helper function for development and debugging.
 | 
			
		||||
/// @param $var Any variable. Raw content will be dumped to stderr.
 | 
			
		||||
/// @return undefined
 | 
			
		||||
function dump($var) {
 | 
			
		||||
		error_log(print_r($var, TRUE) . "\n", 3, 'php://stderr');
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/// Central class of the extension. Sets up parser hooks.
 | 
			
		||||
/// This class contains only static functions; do not instantiate.
 | 
			
		||||
class Extension {
 | 
			
		||||
	/// Caching variable for page titles that are fetched from the DB.
 | 
			
		||||
	private static $pageTitles;
 | 
			
		||||
 | 
			
		||||
	/// Caching variable for the current namespace.
 | 
			
		||||
	/// This is needed because the sort order of the page titles that
 | 
			
		||||
	/// are cached in self::$pageTitles depends on the namespace of
 | 
			
		||||
	/// the page currently being processed.
 | 
			
		||||
	private static $currentNamespace;
 | 
			
		||||
 | 
			
		||||
	/// A Title object for the page that is being parsed.
 | 
			
		||||
	private static $currentTitle;
 | 
			
		||||
 | 
			
		||||
	/// A Title object for the target page currently being examined.
 | 
			
		||||
	private static $targetTitle;
 | 
			
		||||
 | 
			
		||||
	// The TitleValue object of the target page
 | 
			
		||||
	private static $targetTitleValue;
 | 
			
		||||
 | 
			
		||||
	/// The content object for the currently processed target page.
 | 
			
		||||
	/// This variable is necessary to be able to prevent loading the target
 | 
			
		||||
	/// content twice.
 | 
			
		||||
	private static $targetContent;
 | 
			
		||||
 | 
			
		||||
	/// Holds the page title of the currently processed target page
 | 
			
		||||
	/// as a string.
 | 
			
		||||
	private static $targetTitleText;
 | 
			
		||||
 | 
			
		||||
	/// Delimiter used in a regexp split operation to seperate those parts
 | 
			
		||||
	/// of the page that should be parsed from those that should not be
 | 
			
		||||
	/// parsed (e.g. inside pre-existing links etc.).
 | 
			
		||||
	private static $delimiter;
 | 
			
		||||
 | 
			
		||||
	private static $wordStartDelim;
 | 
			
		||||
	private static $wordEndDelim;
 | 
			
		||||
 | 
			
		||||
	public static $ltConsoleOutput;
 | 
			
		||||
	public static $ltConsoleOutputDebug;
 | 
			
		||||
 | 
			
		||||
	/// Setup method
 | 
			
		||||
	public static function setup() {
 | 
			
		||||
		self::BuildDelimiters();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Event handler that is hooked to the PageContentSave event.
 | 
			
		||||
	public static function onPageContentSave( &$wikiPage, &$user, &$content, &$summary,
 | 
			
		||||
			$isMinor, $isWatch, $section, &$flags, &$status ) {
 | 
			
		||||
		global $wgLinkTitlesParseOnEdit;
 | 
			
		||||
		global $wgLinkTitlesNamespaces;
 | 
			
		||||
		if ( !$wgLinkTitlesParseOnEdit ) return true;
 | 
			
		||||
 | 
			
		||||
		if ( !$isMinor ) {
 | 
			
		||||
			$title = $wikiPage->getTitle();
 | 
			
		||||
 | 
			
		||||
			// Only process if page is in one of our namespaces we want to link
 | 
			
		||||
			// Fixes ugly autolinking of sidebar pages
 | 
			
		||||
			if ( in_array( $title->getNamespace(), $wgLinkTitlesNamespaces )) {
 | 
			
		||||
					$text = $content->getContentHandler()->serializeContent( $content );
 | 
			
		||||
					if ( !\MagicWord::get( 'MAG_LINKTITLES_NOAUTOLINKS' )->match( $text ) ) {
 | 
			
		||||
						$newText = self::parseContent( $title, $text );
 | 
			
		||||
						if ( $newText != $text ) {
 | 
			
		||||
								$content = $content->getContentHandler()->unserializeContent( $newText );
 | 
			
		||||
						}
 | 
			
		||||
					}
 | 
			
		||||
			}
 | 
			
		||||
		};
 | 
			
		||||
		return true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Event handler that is hooked to the InternalParseBeforeLinks event.
 | 
			
		||||
	/// @param Parser $parser Parser that raised the event.
 | 
			
		||||
	/// @param $text          Preprocessed text of the page.
 | 
			
		||||
	public static function onInternalParseBeforeLinks( \Parser &$parser, &$text ) {
 | 
			
		||||
		global $wgLinkTitlesParseOnRender;
 | 
			
		||||
		if (!$wgLinkTitlesParseOnRender) return true;
 | 
			
		||||
		global $wgLinkTitlesNamespaces;
 | 
			
		||||
		$title = $parser->getTitle();
 | 
			
		||||
 | 
			
		||||
		// If the page contains the magic word '__NOAUTOLINKS__', do not parse it.
 | 
			
		||||
		// Only process if page is in one of our namespaces we want to link
 | 
			
		||||
		if ( !\MagicWord::get( 'MAG_LINKTITLES_NOAUTOLINKS' )->match( $text ) &&
 | 
			
		||||
				in_array( $title->getNamespace(), $wgLinkTitlesNamespaces ) ) {
 | 
			
		||||
			$text = self::parseContent( $title, $text );
 | 
			
		||||
		}
 | 
			
		||||
		return true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Core function of the extension, performs the actual parsing of the content.
 | 
			
		||||
	/// @param Parser $parser Parser instance for the current page
 | 
			
		||||
	/// @param $text          String that holds the article content
 | 
			
		||||
	/// @returns string: parsed text with links added if needed
 | 
			
		||||
	private static function parseContent( $title, &$text ) {
 | 
			
		||||
 | 
			
		||||
		// Configuration variables need to be defined here as globals.
 | 
			
		||||
		global $wgLinkTitlesFirstOnly;
 | 
			
		||||
		global $wgLinkTitlesSmartMode;
 | 
			
		||||
		global $wgCapitalLinks;
 | 
			
		||||
 | 
			
		||||
		( $wgLinkTitlesFirstOnly ) ? $limit = 1 : $limit = -1;
 | 
			
		||||
		$limitReached = false;
 | 
			
		||||
		self::$currentTitle = $title;
 | 
			
		||||
		$currentNamespace = $title->getNamespace();
 | 
			
		||||
		$newText = $text;
 | 
			
		||||
 | 
			
		||||
		if ( !isset( self::$pageTitles ) || ( $currentNamespace != self::$currentNamespace ) ) {
 | 
			
		||||
			self::$currentNamespace = $currentNamespace;
 | 
			
		||||
			self::$pageTitles = self::fetchPageTitles( $currentNamespace );
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Iterate through the page titles
 | 
			
		||||
		foreach( self::$pageTitles as $row ) {
 | 
			
		||||
			self::newTarget( $row->page_namespace, $row->page_title );
 | 
			
		||||
 | 
			
		||||
			// Don't link current page
 | 
			
		||||
			if ( self::$targetTitle->equals( self::$currentTitle ) ) { continue; }
 | 
			
		||||
 | 
			
		||||
			// split the page content by [[...]] groups
 | 
			
		||||
			// credits to inhan @ StackOverflow for suggesting preg_split
 | 
			
		||||
			// see http://stackoverflow.com/questions/10672286
 | 
			
		||||
			$arr = preg_split( self::$delimiter, $newText, -1, PREG_SPLIT_DELIM_CAPTURE );
 | 
			
		||||
 | 
			
		||||
			// Escape certain special characters in the page title to prevent
 | 
			
		||||
			// regexp compilation errors
 | 
			
		||||
			self::$targetTitleText = self::$targetTitle->getText();
 | 
			
		||||
			$quotedTitle = preg_quote( self::$targetTitleText, '/' );
 | 
			
		||||
 | 
			
		||||
			self::ltDebugLog( 'TargetTitle='. self::$targetTitleText, 'private' );
 | 
			
		||||
			self::ltDebugLog( 'TargetTitleQuoted='. $quotedTitle, 'private' );
 | 
			
		||||
 | 
			
		||||
			// Depending on the global configuration setting $wgCapitalLinks,
 | 
			
		||||
			// the title has to be searched for either in a strictly case-sensitive
 | 
			
		||||
			// way, or in a 'fuzzy' way where the first letter of the title may
 | 
			
		||||
			// be either case.
 | 
			
		||||
			if ( $wgCapitalLinks && ( $quotedTitle[0] != '\\' )) {
 | 
			
		||||
				$searchTerm = '((?i)' . $quotedTitle[0] . '(?-i)' .
 | 
			
		||||
					substr($quotedTitle, 1) . ')';
 | 
			
		||||
			}	else {
 | 
			
		||||
				$searchTerm = '(' . $quotedTitle . ')';
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			$regex = '/(?<![\:\.\@\/\?\&])' . self::$wordStartDelim .
 | 
			
		||||
				$searchTerm . self::$wordEndDelim . '/S';
 | 
			
		||||
			for ( $i = 0; $i < count( $arr ); $i+=2 ) {
 | 
			
		||||
				// even indexes will point to text that is not enclosed by brackets
 | 
			
		||||
				$arr[$i] = preg_replace_callback( $regex,
 | 
			
		||||
					'LinkTitles\Extension::simpleModeCallback', $arr[$i], $limit, $count );
 | 
			
		||||
				if ( $wgLinkTitlesFirstOnly && ( $count > 0 ) ) {
 | 
			
		||||
					$limitReached = true;
 | 
			
		||||
					break;
 | 
			
		||||
				};
 | 
			
		||||
			};
 | 
			
		||||
			$newText = implode( '', $arr );
 | 
			
		||||
 | 
			
		||||
			// If smart mode is turned on, the extension will perform a second
 | 
			
		||||
			// pass on the page and add links with aliases where the case does
 | 
			
		||||
			// not match.
 | 
			
		||||
			if ( $wgLinkTitlesSmartMode && !$limitReached ) {
 | 
			
		||||
				$arr = preg_split( self::$delimiter, $newText, -1, PREG_SPLIT_DELIM_CAPTURE );
 | 
			
		||||
 | 
			
		||||
				for ( $i = 0; $i < count( $arr ); $i+=2 ) {
 | 
			
		||||
					// even indexes will point to text that is not enclosed by brackets
 | 
			
		||||
					$arr[$i] = preg_replace_callback( '/(?<![\:\.\@\/\?\&])' .
 | 
			
		||||
						self::$wordStartDelim . '(' . $quotedTitle . ')' .
 | 
			
		||||
						self::$wordEndDelim . '/iS', 'LinkTitles\Extension::smartModeCallback',
 | 
			
		||||
						$arr[$i], $limit, $count );
 | 
			
		||||
					if ( $wgLinkTitlesFirstOnly && ( $count > 0  )) {
 | 
			
		||||
						break;
 | 
			
		||||
					};
 | 
			
		||||
				};
 | 
			
		||||
				$newText = implode( '', $arr );
 | 
			
		||||
			} // $wgLinkTitlesSmartMode
 | 
			
		||||
		}; // foreach $res as $row
 | 
			
		||||
		return $newText;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Automatically processes a single page, given a $title Title object.
 | 
			
		||||
	/// This function is called by the SpecialLinkTitles class and the
 | 
			
		||||
	/// LinkTitlesJob class.
 | 
			
		||||
	/// @param Title 					$title            Title object.
 | 
			
		||||
	/// @param RequestContext $context					Current request context.
 | 
			
		||||
	///                  If in doubt, call MediaWiki's `RequestContext::getMain()`
 | 
			
		||||
	///                  to obtain such an object.
 | 
			
		||||
	/// @returns boolean True if the page exists, false if the page does not exist
 | 
			
		||||
	public static function processPage( \Title $title, \RequestContext $context ) {
 | 
			
		||||
		self::ltLog('Processing '. $title->getPrefixedText());
 | 
			
		||||
		$page = \WikiPage::factory($title);
 | 
			
		||||
		$content = $page->getContent();
 | 
			
		||||
		if ( $content != null ) {
 | 
			
		||||
			$text = $content->getContentHandler()->serializeContent($content);
 | 
			
		||||
			$newText = self::parseContent($title, $text);
 | 
			
		||||
			if ( $text != $newText ) {
 | 
			
		||||
				$content = $content->getContentHandler()->unserializeContent( $newText );
 | 
			
		||||
				$page->doEditContent(
 | 
			
		||||
					$content,
 | 
			
		||||
					"Links to existing pages added by LinkTitles bot.", // TODO: i18n
 | 
			
		||||
					EDIT_MINOR | EDIT_FORCE_BOT,
 | 
			
		||||
					false, // baseRevId
 | 
			
		||||
					$context->getUser()
 | 
			
		||||
				);
 | 
			
		||||
			};
 | 
			
		||||
			return true;
 | 
			
		||||
		}
 | 
			
		||||
		else {
 | 
			
		||||
			return false;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Adds the two magic words defined by this extension to the list of
 | 
			
		||||
	/// 'double-underscore' terms that are automatically removed before a
 | 
			
		||||
	/// page is displayed.
 | 
			
		||||
	/// @param $doubleUnderscoreIDs Array of magic word IDs.
 | 
			
		||||
	/// @return true
 | 
			
		||||
	public static function onGetDoubleUnderscoreIDs( array &$doubleUnderscoreIDs ) {
 | 
			
		||||
		$doubleUnderscoreIDs[] = 'MAG_LINKTITLES_NOTARGET';
 | 
			
		||||
		$doubleUnderscoreIDs[] = 'MAG_LINKTITLES_NOAUTOLINKS';
 | 
			
		||||
		return true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	public static function onParserFirstCallInit( \Parser $parser ) {
 | 
			
		||||
		$parser->setHook( 'noautolinks', 'LinkTitles\Extension::doNoautolinksTag' );
 | 
			
		||||
		$parser->setHook( 'autolinks', 'LinkTitles\Extension::doAutolinksTag' );
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	///	Removes the extra tag that this extension provides (<noautolinks>)
 | 
			
		||||
	///	by simply returning the text between the tags (if any).
 | 
			
		||||
	///	See https://www.mediawiki.org/wiki/Manual:Tag_extensions#Example
 | 
			
		||||
	public static function doNoautolinksTag( $input, array $args, \Parser $parser, \PPFrame $frame ) {
 | 
			
		||||
		return htmlspecialchars( $input );
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	///	Removes the extra tag that this extension provides (<noautolinks>)
 | 
			
		||||
	///	by simply returning the text between the tags (if any).
 | 
			
		||||
	///	See https://www.mediawiki.org/wiki/Manual:Tag_extensions#How_do_I_render_wikitext_in_my_extension.3F
 | 
			
		||||
	public static function doAutolinksTag( $input, array $args, \Parser $parser, \PPFrame $frame ) {
 | 
			
		||||
		$withLinks = self::parseContent( $parser->getTitle(), $input );
 | 
			
		||||
		$output = $parser->recursiveTagParse( $withLinks, $frame );
 | 
			
		||||
		return $output;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Fetches the page titles from the database.
 | 
			
		||||
	// @param $currentNamespace String holding the namespace of the page currently being processed.
 | 
			
		||||
	private static function fetchPageTitles( $currentNamespace ) {
 | 
			
		||||
		global $wgLinkTitlesPreferShortTitles;
 | 
			
		||||
		global $wgLinkTitlesMinimumTitleLength;
 | 
			
		||||
		global $wgLinkTitlesBlackList;
 | 
			
		||||
		global $wgLinkTitlesNamespaces;
 | 
			
		||||
 | 
			
		||||
		( $wgLinkTitlesPreferShortTitles ) ? $sort_order = 'ASC' : $sort_order = 'DESC';
 | 
			
		||||
		// Build a blacklist of pages that are not supposed to be link
 | 
			
		||||
		// targets. This includes the current page.
 | 
			
		||||
		$blackList = str_replace( ' ', '_', '("' . implode( '","',$wgLinkTitlesBlackList ) . '")' );
 | 
			
		||||
 | 
			
		||||
		// Build our weight list. Make sure current namespace is first element
 | 
			
		||||
		$namespaces = array_diff( $wgLinkTitlesNamespaces, [ $currentNamespace ] );
 | 
			
		||||
		array_unshift( $namespaces,  $currentNamespace );
 | 
			
		||||
 | 
			
		||||
		// No need for sanitiy check. we are sure that we have at least one element in the array
 | 
			
		||||
		$weightSelect = "CASE page_namespace ";
 | 
			
		||||
		$currentWeight = 0;
 | 
			
		||||
		foreach ($namespaces as &$namspacevalue) {
 | 
			
		||||
				$currentWeight = $currentWeight + 100;
 | 
			
		||||
				$weightSelect = $weightSelect . " WHEN " . $namspacevalue . " THEN " . $currentWeight . PHP_EOL;
 | 
			
		||||
		}
 | 
			
		||||
		$weightSelect = $weightSelect . " END ";
 | 
			
		||||
		$namespacesClause = '(' . implode( ', ', $namespaces ) . ')';
 | 
			
		||||
 | 
			
		||||
		// Build an SQL query and fetch all page titles ordered by length from
 | 
			
		||||
		// shortest to longest. Only titles from 'normal' pages (namespace uid
 | 
			
		||||
		// = 0) are returned. Since the db may be sqlite, we need a try..catch
 | 
			
		||||
		// structure because sqlite does not support the CHAR_LENGTH function.
 | 
			
		||||
		$dbr = wfGetDB( DB_SLAVE );
 | 
			
		||||
		try {
 | 
			
		||||
			$res = $dbr->select(
 | 
			
		||||
				'page',
 | 
			
		||||
				array( 'page_title', 'page_namespace' , "weight" => $weightSelect),
 | 
			
		||||
				array(
 | 
			
		||||
					'page_namespace IN ' . $namespacesClause,
 | 
			
		||||
					'CHAR_LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength,
 | 
			
		||||
					'page_title NOT IN ' . $blackList,
 | 
			
		||||
				),
 | 
			
		||||
				__METHOD__,
 | 
			
		||||
				array( 'ORDER BY' => 'weight ASC, CHAR_LENGTH(page_title) ' . $sort_order )
 | 
			
		||||
			);
 | 
			
		||||
		} catch (Exception $e) {
 | 
			
		||||
			$res = $dbr->select(
 | 
			
		||||
				'page',
 | 
			
		||||
				array( 'page_title', 'page_namespace' , "weight" => $weightSelect ),
 | 
			
		||||
				array(
 | 
			
		||||
					'page_namespace IN ' . $namespacesClause,
 | 
			
		||||
					'LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength,
 | 
			
		||||
					'page_title NOT IN ' . $blackList,
 | 
			
		||||
				),
 | 
			
		||||
				__METHOD__,
 | 
			
		||||
				array( 'ORDER BY' => 'weight ASC, LENGTH(page_title) ' . $sort_order )
 | 
			
		||||
			);
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		return $res;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Build an anonymous callback function to be used in simple mode.
 | 
			
		||||
	private static function simpleModeCallback( array $matches ) {
 | 
			
		||||
		if ( self::checkTargetPage() ) {
 | 
			
		||||
			self::ltLog( "Linking '$matches[0]' to '" . self::$targetTitle . "'" );
 | 
			
		||||
			return '[[' . $matches[0] . ']]';
 | 
			
		||||
		}
 | 
			
		||||
		else
 | 
			
		||||
		{
 | 
			
		||||
			return $matches[0];
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Callback function for use with preg_replace_callback.
 | 
			
		||||
	// This essentially performs a case-sensitive comparison of the
 | 
			
		||||
	// current page title and the occurrence found on the page; if
 | 
			
		||||
	// the cases do not match, it builds an aliased (piped) link.
 | 
			
		||||
	// If $wgCapitalLinks is set to true, the case of the first
 | 
			
		||||
	// letter is ignored by MediaWiki and we don't need to build a
 | 
			
		||||
	// piped link if only the case of the first letter is different.
 | 
			
		||||
	private static function smartModeCallback( array $matches ) {
 | 
			
		||||
		global $wgCapitalLinks;
 | 
			
		||||
 | 
			
		||||
		if ( $wgCapitalLinks ) {
 | 
			
		||||
			// With $wgCapitalLinks set to true we have a slightly more
 | 
			
		||||
			// complicated version of the callback than if it were false;
 | 
			
		||||
			// we need to ignore the first letter of the page titles, as
 | 
			
		||||
			// it does not matter for linking.
 | 
			
		||||
			if ( self::checkTargetPage() ) {
 | 
			
		||||
				self::ltLog( "Linking (smart) '$matches[0]' to '" . self::$targetTitle . "'" );
 | 
			
		||||
				if ( strcmp(substr(self::$targetTitleText, 1), substr($matches[0], 1)) == 0 ) {
 | 
			
		||||
					// Case-sensitive match: no need to bulid piped link.
 | 
			
		||||
					return '[[' . $matches[0] . ']]';
 | 
			
		||||
				} else  {
 | 
			
		||||
					// Case-insensitive match: build piped link.
 | 
			
		||||
					return '[[' . self::$targetTitleText . '|' . $matches[0] . ']]';
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			else
 | 
			
		||||
			{
 | 
			
		||||
				return $matches[0];
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			// If $wgCapitalLinks is false, we can use the simple variant
 | 
			
		||||
			// of the callback function.
 | 
			
		||||
			if ( self::checkTargetPage() ) {
 | 
			
		||||
				self::ltLog( "Linking (smart) '$matches[0]' to '" . self::$targetTitle . "'" );
 | 
			
		||||
				if ( strcmp(self::$targetTitleText, $matches[0]) == 0 ) {
 | 
			
		||||
					// Case-sensitive match: no need to bulid piped link.
 | 
			
		||||
					return '[[' . $matches[0] . ']]';
 | 
			
		||||
				} else  {
 | 
			
		||||
					// Case-insensitive match: build piped link.
 | 
			
		||||
					return '[[' . self::$targetTitleText . '|' . $matches[0] . ']]';
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			else
 | 
			
		||||
			{
 | 
			
		||||
				return $matches[0];
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Sets member variables for the current target page.
 | 
			
		||||
	private static function newTarget( $ns, $title ) {
 | 
			
		||||
		self::$targetTitle = \Title::makeTitleSafe( $ns, $title );
 | 
			
		||||
		self::ltDebugLog( 'newtarget='.  self::$targetTitle->getText(), "private" );
 | 
			
		||||
		self::$targetTitleValue = self::$targetTitle->getTitleValue();
 | 
			
		||||
		self::ltDebugLog( 'altTarget='. self::$targetTitleValue->getText(), "private" );
 | 
			
		||||
		self::$targetContent = null;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Returns the content of the current target page.
 | 
			
		||||
	/// This function serves to be used in preg_replace_callback callback
 | 
			
		||||
	/// functions, in order to load the target page content from the
 | 
			
		||||
	/// database only when needed.
 | 
			
		||||
	/// @note It is absolutely necessary that the newTarget()
 | 
			
		||||
	/// function is called for every new page.
 | 
			
		||||
	private static function getTargetContent() {
 | 
			
		||||
		if ( ! isset( $targetContent ) ) {
 | 
			
		||||
			self::$targetContent = \WikiPage::factory(
 | 
			
		||||
				self::$targetTitle)->getContent();
 | 
			
		||||
		};
 | 
			
		||||
		return self::$targetContent;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Examines the current target page. Returns true if it may be linked;
 | 
			
		||||
	/// false if not. This depends on the settings
 | 
			
		||||
	/// $wgLinkTitlesCheckRedirect and $wgLinkTitlesEnableNoTargetMagicWord
 | 
			
		||||
	/// and whether the target page is a redirect or contains the
 | 
			
		||||
	/// __NOAUTOLINKTARGET__ magic word.
 | 
			
		||||
	/// @returns boolean
 | 
			
		||||
	private static function checkTargetPage() {
 | 
			
		||||
		global $wgLinkTitlesEnableNoTargetMagicWord;
 | 
			
		||||
		global $wgLinkTitlesCheckRedirect;
 | 
			
		||||
 | 
			
		||||
		// If checking for redirects is enabled and the target page does
 | 
			
		||||
		// indeed redirect to the current page, return the page title as-is
 | 
			
		||||
		// (unlinked).
 | 
			
		||||
		if ( $wgLinkTitlesCheckRedirect ) {
 | 
			
		||||
			$redirectTitle = self::getTargetContent()->getUltimateRedirectTarget();
 | 
			
		||||
			if ( $redirectTitle && $redirectTitle->equals(self::$currentTitle) ) {
 | 
			
		||||
				return false;
 | 
			
		||||
			}
 | 
			
		||||
		};
 | 
			
		||||
 | 
			
		||||
		// If the magic word __NOAUTOLINKTARGET__ is enabled and the target
 | 
			
		||||
		// page does indeed contain this magic word, return the page title
 | 
			
		||||
		// as-is (unlinked).
 | 
			
		||||
		if ( $wgLinkTitlesEnableNoTargetMagicWord ) {
 | 
			
		||||
			if ( self::getTargetContent()->matchMagicWord(
 | 
			
		||||
					\MagicWord::get('MAG_LINKTITLES_NOTARGET') ) ) {
 | 
			
		||||
				return false;
 | 
			
		||||
			}
 | 
			
		||||
		};
 | 
			
		||||
		return true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Builds the delimiter that is used in a regexp to separate
 | 
			
		||||
	/// text that should be parsed from text that should not be
 | 
			
		||||
	/// parsed (e.g. inside existing links etc.)
 | 
			
		||||
	private static function BuildDelimiters() {
 | 
			
		||||
		// Configuration variables need to be defined here as globals.
 | 
			
		||||
		global $wgLinkTitlesParseHeadings;
 | 
			
		||||
		global $wgLinkTitlesSkipTemplates;
 | 
			
		||||
		global $wgLinkTitlesWordStartOnly;
 | 
			
		||||
		global $wgLinkTitlesWordEndOnly;
 | 
			
		||||
 | 
			
		||||
		// Use unicode character properties rather than \b escape sequences
 | 
			
		||||
		// to detect whole words containing non-ASCII characters as well.
 | 
			
		||||
		// Note that this requires a PCRE library that was compiled with
 | 
			
		||||
		// --enable-unicode-properties
 | 
			
		||||
		( $wgLinkTitlesWordStartOnly ) ? self::$wordStartDelim = '(?<!\pL)' : self::$wordStartDelim = '';
 | 
			
		||||
		( $wgLinkTitlesWordEndOnly ) ? self::$wordEndDelim = '(?!\pL)' : self::$wordEndDelim = '';
 | 
			
		||||
 | 
			
		||||
		if ( $wgLinkTitlesSkipTemplates )
 | 
			
		||||
		{
 | 
			
		||||
			// Use recursive regex to balance curly braces;
 | 
			
		||||
			// see http://www.regular-expressions.info/recurse.html
 | 
			
		||||
			$templatesDelimiter = '{{(?>[^{}]|(?R))*}}|';
 | 
			
		||||
		} else {
 | 
			
		||||
			// Match template names (ignoring any piped [[]] links in them)
 | 
			
		||||
			// along with the trailing pipe and parameter name or closing
 | 
			
		||||
			// braces; also match sequences of '|wordcharacters=' (without
 | 
			
		||||
			// spaces in them) that usually only occur as parameter names in
 | 
			
		||||
			// transclusions (but could also occur as wiki table cell contents).
 | 
			
		||||
			// TODO: Find a way to match parameter names in transclusions, but
 | 
			
		||||
			// not in table cells or other sequences involving a pipe character
 | 
			
		||||
			// and equal sign.
 | 
			
		||||
			$templatesDelimiter = '{{[^|]*?(?:(?:\[\[[^]]+]])?)[^|]*?(?:\|(?:\w+=)?|(?:}}))|\|\w+=|';
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Build a regular expression that will capture existing wiki links ("[[...]]"),
 | 
			
		||||
		// wiki headings ("= ... =", "== ... ==" etc.),
 | 
			
		||||
		// urls ("http://example.com", "[http://example.com]", "[http://example.com Description]",
 | 
			
		||||
		// and email addresses ("mail@example.com").
 | 
			
		||||
		// Since there is a user option to skip headings, we make this part of the expression
 | 
			
		||||
		// optional. Note that in order to use preg_split(), it is important to have only one
 | 
			
		||||
		// capturing subpattern (which precludes the use of conditional subpatterns).
 | 
			
		||||
		( $wgLinkTitlesParseHeadings ) ? $delimiter = '' : $delimiter = '=+.+?=+|';
 | 
			
		||||
		$urlPattern = '[a-z]+?\:\/\/(?:\S+\.)+\S+(?:\/.*)?';
 | 
			
		||||
		self::$delimiter = '/(' .                     // exclude from linking:
 | 
			
		||||
			'\[\[.*?\]\]|' .                            // links
 | 
			
		||||
			$delimiter .                                // titles (if requested)
 | 
			
		||||
			$templatesDelimiter .                       // templates (if requested)
 | 
			
		||||
			'^ .+?\n|\n .+?\n|\n .+?$|^ .+?$|' .        // preformatted text
 | 
			
		||||
			'<nowiki>.*?<.nowiki>|<code>.*?<\/code>|' . // nowiki/code
 | 
			
		||||
			'<pre>.*?<\/pre>|<html>.*?<\/html>|' .      // pre/html
 | 
			
		||||
			'<script>.*?<\/script>|' .                  // script
 | 
			
		||||
			'<gallery>.*?<\/gallery>|' .                // gallery
 | 
			
		||||
			'<div.+?>|<\/div>|' .                       // attributes of div elements
 | 
			
		||||
			'<span.+?>|<\/span>|' .                     // attributes of span elements
 | 
			
		||||
			'<file>[^<]*<\/file>|' .                    // stuff inside file elements
 | 
			
		||||
			'style=".+?"|class=".+?"|' .                // styles and classes (e.g. of wikitables)
 | 
			
		||||
			'<noautolinks>.*?<\/noautolinks>|' .        // custom tag 'noautolinks'
 | 
			
		||||
			'\[' . $urlPattern . '\s.+?\]|'. $urlPattern .  '(?=\s|$)|' . // urls
 | 
			
		||||
			'(?<=\b)\S+\@(?:\S+\.)+\S+(?=\b)' .        // email addresses
 | 
			
		||||
			')/ismS';
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Local Debugging output function which can send output to console as well
 | 
			
		||||
	public static function ltDebugLog($text) {
 | 
			
		||||
		if ( self::$ltConsoleOutputDebug ) {
 | 
			
		||||
			print $text . "\n";
 | 
			
		||||
		}
 | 
			
		||||
		wfDebugLog( 'LinkTitles', $text , 'private' );
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Local Logging output function which can send output to console as well
 | 
			
		||||
	public static function ltLog($text) {
 | 
			
		||||
		if (self::$ltConsoleOutput) {
 | 
			
		||||
			print $text . "\n";
 | 
			
		||||
		}
 | 
			
		||||
		wfDebugLog( 'LinkTitles', $text , 'private' );
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// vim: ts=2:sw=2:noet:comments^=\:///
 | 
			
		||||
							
								
								
									
										184
									
								
								includes/Linker.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										184
									
								
								includes/Linker.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,184 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * The LinkTitles\Linker class does the heavy linking for the extension.
 | 
			
		||||
 *
 | 
			
		||||
 * Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
 | 
			
		||||
 *
 | 
			
		||||
 * This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 * it under the terms of the GNU General Public License as published by
 | 
			
		||||
 * the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 * (at your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 * This program is distributed in the hope that it will be useful,
 | 
			
		||||
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 * GNU General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 * You should have received a copy of the GNU General Public License
 | 
			
		||||
 * along with this program; if not, write to the Free Software
 | 
			
		||||
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
			
		||||
 * MA 02110-1301, USA.
 | 
			
		||||
 *
 | 
			
		||||
 * @author Daniel Kraus <bovender@bovender.de>
 | 
			
		||||
 */
 | 
			
		||||
namespace LinkTitles;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Performs the actual linking of content to existing pages.
 | 
			
		||||
 */
 | 
			
		||||
class Linker {
 | 
			
		||||
	/**
 | 
			
		||||
	 * LinkTitles configuration.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @var Config $config
 | 
			
		||||
	 */
 | 
			
		||||
	public $config;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * The string representation of the title object for the potential target page
 | 
			
		||||
	 * that is currently being processed.
 | 
			
		||||
	 *
 | 
			
		||||
	 * This is an instance variable (rather than a local method variable) so it
 | 
			
		||||
	 * can be accessed in the preg_replace_callback callbacks.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @var String $targetTitleString
 | 
			
		||||
	 */
 | 
			
		||||
	private $targetTitleText;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Constructs a new instance of the Linker class.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @param Config $config LinkTitles configuration object.
 | 
			
		||||
	 */
 | 
			
		||||
	public function __construct( Config &$config ) {
 | 
			
		||||
		$this->config = $config;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Core function of the extension, performs the actual parsing of the content.
 | 
			
		||||
	 *
 | 
			
		||||
	 * This method receives a Title object and the string representation of the
 | 
			
		||||
	 * source page. It does not work on a WikiPage object directly because the
 | 
			
		||||
	 * callbacks in the Extension class do not always get a WikiPage object in the
 | 
			
		||||
	 * first place.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @param \Title &$title Title object for the current page.
 | 
			
		||||
	 * @param String $text String that holds the article content
 | 
			
		||||
	 * @return String with links to target pages
 | 
			
		||||
	 */
 | 
			
		||||
	public function linkContent( \Title &$title, &$text ) {
 | 
			
		||||
 | 
			
		||||
		( $this->config->firstOnly ) ? $limit = 1 : $limit = -1;
 | 
			
		||||
		$limitReached = false;
 | 
			
		||||
		$newText = $text;
 | 
			
		||||
 | 
			
		||||
		$splitter = Splitter::default( $this->config );
 | 
			
		||||
		$targets = Targets::default( $title, $this->config );
 | 
			
		||||
 | 
			
		||||
		// Iterate through the target page titles
 | 
			
		||||
		foreach( $targets->queryResult as $row ) {
 | 
			
		||||
			$target = new Target( $row->page_namespace, $row->page_title, $this->config );
 | 
			
		||||
 | 
			
		||||
			// Don't link current page and don't link if the target page redirects
 | 
			
		||||
			// to the current page or has the __NOAUTOLINKTARGET__ magic word
 | 
			
		||||
			// (as required by the actual LinkTitles configuration).
 | 
			
		||||
			if ( $target->isSameTitle( $title ) || !$target->mayLinkTo( $title ) ) {
 | 
			
		||||
				continue;
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			// Split the page content by non-linkable sections.
 | 
			
		||||
			// Credits to inhan @ StackOverflow for suggesting preg_split.
 | 
			
		||||
			// See http://stackoverflow.com/questions/10672286
 | 
			
		||||
			$arr = $splitter->split( $newText );
 | 
			
		||||
			$count = 0;
 | 
			
		||||
 | 
			
		||||
			// Cache the target title text for the regex callbacks
 | 
			
		||||
			$this->targetTitleText = $target->getTitleText();
 | 
			
		||||
 | 
			
		||||
			// Even indexes will point to sections of the text that may be linked
 | 
			
		||||
			for ( $i = 0; $i < count( $arr ); $i += 2 ) {
 | 
			
		||||
				$arr[$i] = preg_replace_callback( $target->getCaseSensitiveRegex(),
 | 
			
		||||
					array( $this, 'simpleModeCallback'),
 | 
			
		||||
					$arr[$i], $limit, $count );
 | 
			
		||||
				if ( $this->config->firstOnly && ( $count > 0 ) ) {
 | 
			
		||||
					$limitReached = true;
 | 
			
		||||
					break;
 | 
			
		||||
				};
 | 
			
		||||
			};
 | 
			
		||||
			$newText = implode( '', $arr );
 | 
			
		||||
 | 
			
		||||
			// If smart mode is turned on, the extension will perform a second
 | 
			
		||||
			// pass on the page and add links with aliases where the case does
 | 
			
		||||
			// not match.
 | 
			
		||||
			if ( $this->config->smartMode && !$limitReached ) {
 | 
			
		||||
				if ( $count > 0 ) {
 | 
			
		||||
					// Split the text again because it was changed in the first pass.
 | 
			
		||||
					$arr = $splitter->split( $newText );
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
				for ( $i = 0; $i < count( $arr ); $i+=2 ) {
 | 
			
		||||
					// even indexes will point to text that is not enclosed by brackets
 | 
			
		||||
					$arr[$i] = preg_replace_callback( $target->getCaseInsensitiveRegex(),
 | 
			
		||||
						array( $this, 'smartModeCallback'),
 | 
			
		||||
						$arr[$i], $limit, $count );
 | 
			
		||||
					if ( $this->config->firstOnly && ( $count > 0  )) {
 | 
			
		||||
						break;
 | 
			
		||||
					};
 | 
			
		||||
				};
 | 
			
		||||
				$newText = implode( '', $arr );
 | 
			
		||||
			} // $wgLinkTitlesSmartMode
 | 
			
		||||
		}; // foreach $res as $row
 | 
			
		||||
 | 
			
		||||
		return $newText;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Callback for preg_replace_callback in simple mode.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @param array $matches Matches provided by preg_replace_callback
 | 
			
		||||
	 * @return string Target page title with or without link markup
 | 
			
		||||
	 */
 | 
			
		||||
	private function simpleModeCallback( array $matches ) {
 | 
			
		||||
		return '[[' . $matches[0] . ']]';
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Callback function for use with preg_replace_callback.
 | 
			
		||||
	 * This essentially performs a case-sensitive comparison of the
 | 
			
		||||
	 * current page title and the occurrence found on the page; if
 | 
			
		||||
	 * the cases do not match, it builds an aliased (piped) link.
 | 
			
		||||
	 * If $wgCapitalLinks is set to true, the case of the first
 | 
			
		||||
	 * letter is ignored by MediaWiki and we don't need to build a
 | 
			
		||||
	 * piped link if only the case of the first letter is different.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @param array $matches Matches provided by preg_replace_callback
 | 
			
		||||
	 * @return string Target page title with or without link markup
 | 
			
		||||
	 */
 | 
			
		||||
	private function smartModeCallback( array $matches ) {
 | 
			
		||||
		if ( $this->config->capitalLinks ) {
 | 
			
		||||
			// With $wgCapitalLinks set to true we have a slightly more
 | 
			
		||||
			// complicated version of the callback than if it were false;
 | 
			
		||||
			// we need to ignore the first letter of the page titles, as
 | 
			
		||||
			// it does not matter for linking.
 | 
			
		||||
			if ( strcmp( substr( $this->targetTitleText, 1 ), substr( $matches[ 0 ], 1) ) == 0 ) {
 | 
			
		||||
				// Case-sensitive match: no need to bulid piped link.
 | 
			
		||||
				return '[[' . $matches[ 0 ]  . ']]';
 | 
			
		||||
			} else  {
 | 
			
		||||
				// Case-insensitive match: build piped link.
 | 
			
		||||
				return '[[' . $this->targetTitleText . '|' . $matches[ 0 ] . ']]';
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			// If $wgCapitalLinks is false, we can use the simple variant
 | 
			
		||||
			// of the callback function.
 | 
			
		||||
			if ( strcmp( $this->targetTitleText, $matches[ 0 ] ) == 0 ) {
 | 
			
		||||
				// Case-sensitive match: no need to bulid piped link.
 | 
			
		||||
				return '[[' . $matches[ 0 ] . ']]';
 | 
			
		||||
			} else  {
 | 
			
		||||
				// Case-insensitive match: build piped link.
 | 
			
		||||
				return '[[' . $this->targetTitleText . '|' . $matches[ 0 ] . ']]';
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// vim: ts=2:sw=2:noet:comments^=\:///
 | 
			
		||||
@@ -1,21 +1,25 @@
 | 
			
		||||
<?php
 | 
			
		||||
/*
 | 
			
		||||
 *      Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
 | 
			
		||||
/**
 | 
			
		||||
 * Provides a special page for the LinkTitles extension.
 | 
			
		||||
 *
 | 
			
		||||
 *      This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 *      it under the terms of the GNU General Public License as published by
 | 
			
		||||
 *      the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 *      (at your option) any later version.
 | 
			
		||||
 * Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
 | 
			
		||||
 *
 | 
			
		||||
 *      This program is distributed in the hope that it will be useful,
 | 
			
		||||
 *      but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 *      GNU General Public License for more details.
 | 
			
		||||
 * This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 * it under the terms of the GNU General Public License as published by
 | 
			
		||||
 * the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 * (at your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 *      You should have received a copy of the GNU General Public License
 | 
			
		||||
 *      along with this program; if not, write to the Free Software
 | 
			
		||||
 *      Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
			
		||||
 *      MA 02110-1301, USA.
 | 
			
		||||
 * This program is distributed in the hope that it will be useful,
 | 
			
		||||
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 * GNU General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 * You should have received a copy of the GNU General Public License
 | 
			
		||||
 * along with this program; if not, write to the Free Software
 | 
			
		||||
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
			
		||||
 * MA 02110-1301, USA.
 | 
			
		||||
 *
 | 
			
		||||
 * @author Daniel Kraus <bovender@bovender.de>
 | 
			
		||||
 */
 | 
			
		||||
namespace LinkTitles;
 | 
			
		||||
/// @defgroup batch Batch processing
 | 
			
		||||
@@ -25,17 +29,21 @@ if ( !defined( 'MEDIAWIKI' ) ) {
 | 
			
		||||
	die( 'Not an entry point.' );
 | 
			
		||||
}
 | 
			
		||||
/// @endcond
 | 
			
		||||
 
 | 
			
		||||
/// Provides a special page that can be used to batch-process all pages in 
 | 
			
		||||
/// the wiki. By default, this can only be performed by sysops.
 | 
			
		||||
/// @ingroup batch
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Provides a special page that can be used to batch-process all pages in
 | 
			
		||||
 * the wiki. By default, this can only be performed by sysops.
 | 
			
		||||
 * @ingroup batch
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
class Special extends \SpecialPage {
 | 
			
		||||
 | 
			
		||||
	/// Constructor. Announces the special page title and required user right 
 | 
			
		||||
	/// to the parent constructor.
 | 
			
		||||
	/**
 | 
			
		||||
	 * Constructor. Announces the special page title and required user right to the parent constructor.
 | 
			
		||||
	 */
 | 
			
		||||
	function __construct() {
 | 
			
		||||
		// the second parameter in the following function call ensures that only 
 | 
			
		||||
		// users who have the 'linktitles-batch' right get to see this page (by 
 | 
			
		||||
		// the second parameter in the following function call ensures that only
 | 
			
		||||
		// users who have the 'linktitles-batch' right get to see this page (by
 | 
			
		||||
		// default, this are all sysop users).
 | 
			
		||||
		parent::__construct( 'LinkTitles', 'linktitles-batch' );
 | 
			
		||||
	}
 | 
			
		||||
@@ -44,9 +52,11 @@ class Special extends \SpecialPage {
 | 
			
		||||
		return 'pagetools';
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Entry function of the special page class. Will abort if the user does 
 | 
			
		||||
	/// not have appropriate permissions ('linktitles-batch').
 | 
			
		||||
	/// @return undefined
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Entry function of the special page class. Will abort if the user does not have appropriate permissions ('linktitles-batch').
 | 
			
		||||
	 * @param  $par Additional parameters (required by interface; currently not used)
 | 
			
		||||
	 */
 | 
			
		||||
	function execute($par) {
 | 
			
		||||
		// Prevent non-authorized users from executing the batch processing.
 | 
			
		||||
		if ( !$this->userCanExecute( $this->getUser() ) ) {
 | 
			
		||||
@@ -76,18 +86,19 @@ class Special extends \SpecialPage {
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Processes wiki articles, starting at the page indicated by 
 | 
			
		||||
	/// $startTitle. If $wgLinkTitlesTimeLimit is reached before all pages are 
 | 
			
		||||
	/// processed, returns the title of the next page that needs processing.
 | 
			
		||||
	/// @param WebRequest $request WebRequest object that is associated with the special 
 | 
			
		||||
	///                            page.
 | 
			
		||||
	/// @param OutputPage $output  Output page for the special page.
 | 
			
		||||
	/**
 | 
			
		||||
	 * Processes wiki articles, starting at the page indicated by
 | 
			
		||||
	 * $startTitle. If $wgLinkTitlesTimeLimit is reached before all pages are
 | 
			
		||||
	 * processed, returns the title of the next page that needs processing.
 | 
			
		||||
	 * @param WebRequest $request WebRequest object that is associated with the special page.
 | 
			
		||||
	 * @param OutputPage $output  Output page for the special page.
 | 
			
		||||
	 */
 | 
			
		||||
	private function process( \WebRequest &$request, \OutputPage &$output) {
 | 
			
		||||
		global $wgLinkTitlesTimeLimit;
 | 
			
		||||
        global $wgLinkTitlesNamespaces;
 | 
			
		||||
		global $wgLinkTitlesNamespaces;
 | 
			
		||||
 | 
			
		||||
        // get our Namespaces
 | 
			
		||||
        $namespacesClause = str_replace( '_', ' ','(' . implode( ', ',$wgLinkTitlesNamespaces ) . ')' );
 | 
			
		||||
		// get our Namespaces
 | 
			
		||||
		$namespacesClause = str_replace( '_', ' ','(' . implode( ', ',$wgLinkTitlesNamespaces ) . ')' );
 | 
			
		||||
 | 
			
		||||
		// Start the stopwatch
 | 
			
		||||
		$startTime = microtime(true);
 | 
			
		||||
@@ -95,7 +106,7 @@ class Special extends \SpecialPage {
 | 
			
		||||
		// Connect to the database
 | 
			
		||||
		$dbr = wfGetDB( DB_SLAVE );
 | 
			
		||||
 | 
			
		||||
		// Fetch the start index and max number of records from the POST 
 | 
			
		||||
		// Fetch the start index and max number of records from the POST
 | 
			
		||||
		// request.
 | 
			
		||||
		$postValues = $request->getValues();
 | 
			
		||||
 | 
			
		||||
@@ -107,26 +118,24 @@ class Special extends \SpecialPage {
 | 
			
		||||
		if ( array_key_exists('e', $postValues) ) {
 | 
			
		||||
			$end = intval($postValues['e']);
 | 
			
		||||
		}
 | 
			
		||||
		else 
 | 
			
		||||
		else
 | 
			
		||||
		{
 | 
			
		||||
			// No end index was given. Therefore, count pages now.
 | 
			
		||||
			$end = $this->countPages($dbr, $namespacesClause );
 | 
			
		||||
		};
 | 
			
		||||
 | 
			
		||||
		array_key_exists('r', $postValues) ?
 | 
			
		||||
				$reloads = $postValues['r'] :
 | 
			
		||||
				$reloads = 0;
 | 
			
		||||
		array_key_exists('r', $postValues) ? $reloads = $postValues['r'] : $reloads = 0;
 | 
			
		||||
 | 
			
		||||
		// Retrieve page names from the database.
 | 
			
		||||
		$res = $dbr->select( 
 | 
			
		||||
		$res = $dbr->select(
 | 
			
		||||
			'page',
 | 
			
		||||
			array('page_title', 'page_namespace'),
 | 
			
		||||
			array( 
 | 
			
		||||
				'page_namespace IN ' . $namespacesClause, 
 | 
			
		||||
			), 
 | 
			
		||||
			__METHOD__, 
 | 
			
		||||
			array(
 | 
			
		||||
		 		'LIMIT' => 999999999,
 | 
			
		||||
				'page_namespace IN ' . $namespacesClause,
 | 
			
		||||
			),
 | 
			
		||||
			__METHOD__,
 | 
			
		||||
			array(
 | 
			
		||||
				'LIMIT' => 999999999,
 | 
			
		||||
				'OFFSET' => $start
 | 
			
		||||
			)
 | 
			
		||||
		);
 | 
			
		||||
@@ -136,7 +145,7 @@ class Special extends \SpecialPage {
 | 
			
		||||
			$curTitle = \Title::makeTitleSafe( $row->page_namespace, $row->page_title);
 | 
			
		||||
			Extension::processPage($curTitle, $this->getContext());
 | 
			
		||||
			$start += 1;
 | 
			
		||||
			
 | 
			
		||||
 | 
			
		||||
			// Check if the time limit is exceeded
 | 
			
		||||
			if ( microtime(true)-$startTime > $wgLinkTitlesTimeLimit )
 | 
			
		||||
			{
 | 
			
		||||
@@ -149,11 +158,11 @@ class Special extends \SpecialPage {
 | 
			
		||||
		// If we have not reached the last page yet, produce code to reload
 | 
			
		||||
		// the extension's special page.
 | 
			
		||||
		if ( $start < $end )
 | 
			
		||||
	 	{
 | 
			
		||||
		{
 | 
			
		||||
			$reloads += 1;
 | 
			
		||||
			// Build a form with hidden values and output JavaScript code that 
 | 
			
		||||
			// Build a form with hidden values and output JavaScript code that
 | 
			
		||||
			// immediately submits the form in order to continue the process.
 | 
			
		||||
			$output->addHTML($this->getReloaderForm($request->getRequestURL(), 
 | 
			
		||||
			$output->addHTML($this->getReloaderForm($request->getRequestURL(),
 | 
			
		||||
				$start, $end, $reloads));
 | 
			
		||||
		}
 | 
			
		||||
		else // Last page has been processed
 | 
			
		||||
@@ -162,8 +171,10 @@ class Special extends \SpecialPage {
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Adds WikiText to the output containing information about the extension 
 | 
			
		||||
	/// and a form and button to start linking.
 | 
			
		||||
	/*
 | 
			
		||||
	 * Adds WikiText to the output containing information about the extension
 | 
			
		||||
	 * and a form and button to start linking.
 | 
			
		||||
	 */
 | 
			
		||||
	private function buildInfoPage( &$request, &$output ) {
 | 
			
		||||
		$url = $request->getRequestURL();
 | 
			
		||||
 | 
			
		||||
@@ -176,8 +187,8 @@ Source code: http://github.com/bovender/LinkTitles
 | 
			
		||||
 | 
			
		||||
== Batch Linking ==
 | 
			
		||||
You can start a batch linking process by clicking on the button below.
 | 
			
		||||
This will go through every page in the normal namespace of your Wiki and 
 | 
			
		||||
insert links automatically. This page will repeatedly reload itself, in 
 | 
			
		||||
This will go through every page in the normal namespace of your Wiki and
 | 
			
		||||
insert links automatically. This page will repeatedly reload itself, in
 | 
			
		||||
order to prevent blocking the server. To interrupt the process, simply
 | 
			
		||||
close this page.
 | 
			
		||||
EOF
 | 
			
		||||
@@ -192,12 +203,13 @@ EOF
 | 
			
		||||
		);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Produces informative output in WikiText format to show while working.
 | 
			
		||||
	/// @param $output    Output object.
 | 
			
		||||
	/// @param $curTitle  Title of the currently processed page.
 | 
			
		||||
	/// @param $index     Index of the currently processed page.     
 | 
			
		||||
	/// @param $end       Last index that will be processed (i.e., number of 
 | 
			
		||||
	///                   pages).
 | 
			
		||||
  /*
 | 
			
		||||
	 * Produces informative output in WikiText format to show while working.
 | 
			
		||||
	 * @param $output    Output object.
 | 
			
		||||
	 * @param $curTitle  Title of the currently processed page.
 | 
			
		||||
	 * @param $index     Index of the currently processed page.
 | 
			
		||||
	 * @param $end       Last index that will be processed (i.e., number of pages).
 | 
			
		||||
	 */
 | 
			
		||||
	private function addProgressInfo( &$output, $curTitle, $index, $end ) {
 | 
			
		||||
		$progress = $index / $end * 100;
 | 
			
		||||
		$percent = sprintf("%01.1f", $progress);
 | 
			
		||||
@@ -205,8 +217,8 @@ EOF
 | 
			
		||||
		$output->addWikiText(
 | 
			
		||||
<<<EOF
 | 
			
		||||
== Processing pages... ==
 | 
			
		||||
The [http://www.mediawiki.org/wiki/Extension:LinkTitles LinkTitles] 
 | 
			
		||||
extension is currently going through every page of your wiki, adding links to 
 | 
			
		||||
The [http://www.mediawiki.org/wiki/Extension:LinkTitles LinkTitles]
 | 
			
		||||
extension is currently going through every page of your wiki, adding links to
 | 
			
		||||
existing pages as appropriate.
 | 
			
		||||
 | 
			
		||||
=== Current page: $curTitle ===
 | 
			
		||||
@@ -232,14 +244,15 @@ EOF
 | 
			
		||||
		);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Generates an HTML form and JavaScript to automatically submit the 
 | 
			
		||||
	/// form.
 | 
			
		||||
	/// @param $url     URL to reload with a POST request.
 | 
			
		||||
	/// @param $start   Index of the next page that shall be processed.
 | 
			
		||||
	/// @param $end     Index of the last page to be processed.
 | 
			
		||||
	/// @param $reloads Counter that holds the number of reloads so far.
 | 
			
		||||
	/// @returns        String that holds the HTML for a form and a
 | 
			
		||||
	///                 JavaScript command.
 | 
			
		||||
	/*
 | 
			
		||||
	 * Generates an HTML form and JavaScript to automatically submit the
 | 
			
		||||
	 * form.
 | 
			
		||||
	 * @param $url     URL to reload with a POST request.
 | 
			
		||||
	 * @param $start   Index of the next page that shall be processed.
 | 
			
		||||
	 * @param $end     Index of the last page to be processed.
 | 
			
		||||
	 * @param $reloads Counter that holds the number of reloads so far.
 | 
			
		||||
	 * @returns        String that holds the HTML for a form and a JavaScript command.
 | 
			
		||||
	 */
 | 
			
		||||
	private function getReloaderForm( $url, $start, $end, $reloads ) {
 | 
			
		||||
		return
 | 
			
		||||
<<<EOF
 | 
			
		||||
@@ -255,12 +268,14 @@ EOF
 | 
			
		||||
		;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Adds statistics to the page when all processing is done.
 | 
			
		||||
	/// @param $output  Output object
 | 
			
		||||
	/// @param $start   Index of the first page that was processed.
 | 
			
		||||
	/// @param $end     Index of the last processed page.
 | 
			
		||||
	/// @param $reloads Number of reloads of the page.
 | 
			
		||||
	/// @returns undefined
 | 
			
		||||
  /*
 | 
			
		||||
	 * Adds statistics to the page when all processing is done.
 | 
			
		||||
	 * @param $output  Output object
 | 
			
		||||
	 * @param $start   Index of the first page that was processed.
 | 
			
		||||
	 * @param $end     Index of the last processed page.
 | 
			
		||||
	 * @param $reloads Number of reloads of the page.
 | 
			
		||||
	 * @returns undefined
 | 
			
		||||
	 */
 | 
			
		||||
	private function addCompletedInfo( &$output, $start, $end, $reloads ) {
 | 
			
		||||
		global $wgLinkTitlesTimeLimit;
 | 
			
		||||
		$pagesPerReload = sprintf('%0.1f', $end / $reloads);
 | 
			
		||||
@@ -281,19 +296,21 @@ EOF
 | 
			
		||||
			);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Counts the number of pages in a read-access wiki database ($dbr).
 | 
			
		||||
	/// @param $dbr Read-only `Database` object.
 | 
			
		||||
	/// @returns Number of pages in the default namespace (0) of the wiki.
 | 
			
		||||
	/*
 | 
			
		||||
	 * Counts the number of pages in a read-access wiki database ($dbr).
 | 
			
		||||
	 * @param $dbr Read-only `Database` object.
 | 
			
		||||
	 * @returns Number of pages in the default namespace (0) of the wiki.
 | 
			
		||||
	 */
 | 
			
		||||
	private function countPages(&$dbr, $namespacesClause) {
 | 
			
		||||
		$res = $dbr->select(
 | 
			
		||||
			'page',
 | 
			
		||||
			array('pagecount' => "COUNT(page_id)"),
 | 
			
		||||
			array( 
 | 
			
		||||
				'page_namespace IN ' . $namespacesClause, 
 | 
			
		||||
			), 
 | 
			
		||||
			__METHOD__ 
 | 
			
		||||
			array(
 | 
			
		||||
				'page_namespace IN ' . $namespacesClause,
 | 
			
		||||
			),
 | 
			
		||||
			__METHOD__
 | 
			
		||||
		);
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
		return $res->current()->pagecount;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										140
									
								
								includes/Splitter.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										140
									
								
								includes/Splitter.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,140 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * The Splitter class caches a regular expression that delimits text to be parsed.
 | 
			
		||||
 *
 | 
			
		||||
 * Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
 | 
			
		||||
 *
 | 
			
		||||
 * This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 * it under the terms of the GNU General Public License as published by
 | 
			
		||||
 * the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 * (at your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 * This program is distributed in the hope that it will be useful,
 | 
			
		||||
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 * GNU General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 * You should have received a copy of the GNU General Public License
 | 
			
		||||
 * along with this program; if not, write to the Free Software
 | 
			
		||||
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
			
		||||
 * MA 02110-1301, USA.
 | 
			
		||||
 *
 | 
			
		||||
 * @author Daniel Kraus <bovender@bovender.de>
 | 
			
		||||
 */
 | 
			
		||||
namespace LinkTitles;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Caches a regular expression that delimits text to be parsed.
 | 
			
		||||
 */
 | 
			
		||||
class Splitter {
 | 
			
		||||
	/**
 | 
			
		||||
	 * The splitting expression that separates text to be parsed from text that
 | 
			
		||||
	 * must not be parsed.
 | 
			
		||||
	 * @var String $splitter
 | 
			
		||||
	 */
 | 
			
		||||
	public $splitter;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * The LinkTitles configuration for this Splitter instance.
 | 
			
		||||
	 * @var Config $config
 | 
			
		||||
	 */
 | 
			
		||||
	public $config;
 | 
			
		||||
 | 
			
		||||
	private static $instance;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Gets the Splitter singleton; may build one with the given config or the
 | 
			
		||||
	 * default config if none is given.
 | 
			
		||||
	 *
 | 
			
		||||
	 * If the instance was already created, it does not matter what Config this
 | 
			
		||||
	 * method is called with. To re-create an instance with a different Config,
 | 
			
		||||
	 * call Splitter::invalidate() first.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @param  Config|null $config LinkTitles configuration.
 | 
			
		||||
	 */
 | 
			
		||||
	public static function default( Config &$config = null ) {
 | 
			
		||||
		if ( self::$instance === null ) {
 | 
			
		||||
			if ( $config === null ) {
 | 
			
		||||
				$config = new Config();
 | 
			
		||||
			}
 | 
			
		||||
			self::$instance = new Splitter( $config );
 | 
			
		||||
		}
 | 
			
		||||
		return self::$instance;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Invalidates the singleton instance.
 | 
			
		||||
	 *
 | 
			
		||||
	 * Used for unit testing.
 | 
			
		||||
	 */
 | 
			
		||||
	public static function invalidate() {
 | 
			
		||||
		self::$instance = null;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	protected function __construct( Config $config) {
 | 
			
		||||
		$this->config = $config;
 | 
			
		||||
		$this->buildSplitter();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Splits a text into sections that may be linked and sections that may not
 | 
			
		||||
	 * be linked (e.g., because they already are a link, or a template, etc.).
 | 
			
		||||
	 *
 | 
			
		||||
	 * @param  String &$text Text to split.
 | 
			
		||||
	 * @return Array of strings where even indexes point to linkable sections.
 | 
			
		||||
	 */
 | 
			
		||||
	public function split( &$text ) {
 | 
			
		||||
		return preg_split( $this->splitter, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Builds the delimiter that is used in a regexp to separate
 | 
			
		||||
	 * text that should be parsed from text that should not be
 | 
			
		||||
	 * parsed (e.g. inside existing links etc.)
 | 
			
		||||
	 */
 | 
			
		||||
	private function buildSplitter() {
 | 
			
		||||
		if ( $this->config->skipTemplates )
 | 
			
		||||
		{
 | 
			
		||||
			// Use recursive regex to balance curly braces;
 | 
			
		||||
			// see http://www.regular-expressions.info/recurse.html
 | 
			
		||||
			$templatesDelimiter = '{{(?>[^{}]|(?R))*}}|';
 | 
			
		||||
		} else {
 | 
			
		||||
			// Match template names (ignoring any piped [[]] links in them)
 | 
			
		||||
			// along with the trailing pipe and parameter name or closing
 | 
			
		||||
			// braces; also match sequences of '|wordcharacters=' (without
 | 
			
		||||
			// spaces in them) that usually only occur as parameter names in
 | 
			
		||||
			// transclusions (but could also occur as wiki table cell contents).
 | 
			
		||||
			// TODO: Find a way to match parameter names in transclusions, but
 | 
			
		||||
			// not in table cells or other sequences involving a pipe character
 | 
			
		||||
			// and equal sign.
 | 
			
		||||
			$templatesDelimiter = '{{[^|]*?(?:(?:\[\[[^]]+]])?)[^|]*?(?:\|(?:\w+=)?|(?:}}))|\|\w+=|';
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Build a regular expression that will capture existing wiki links ("[[...]]"),
 | 
			
		||||
		// wiki headings ("= ... =", "== ... ==" etc.),
 | 
			
		||||
		// urls ("http://example.com", "[http://example.com]", "[http://example.com Description]",
 | 
			
		||||
		// and email addresses ("mail@example.com").
 | 
			
		||||
		// Since there is a user option to skip headings, we make this part of the expression
 | 
			
		||||
		// optional. Note that in order to use preg_split(), it is important to have only one
 | 
			
		||||
		// capturing subpattern (which precludes the use of conditional subpatterns).
 | 
			
		||||
		( $this->config->parseHeadings ) ? $delimiter = '' : $delimiter = '=+.+?=+|';
 | 
			
		||||
		$urlPattern = '[a-z]+?\:\/\/(?:\S+\.)+\S+(?:\/.*)?';
 | 
			
		||||
		$this->splitter = '/(' .                     // exclude from linking:
 | 
			
		||||
			'\[\[.*?\]\]|' .                            // links
 | 
			
		||||
			$delimiter .                                // titles (if requested)
 | 
			
		||||
			$templatesDelimiter .                       // templates (if requested)
 | 
			
		||||
			'^ .+?\n|\n .+?\n|\n .+?$|^ .+?$|' .        // preformatted text
 | 
			
		||||
			'<nowiki>.*?<.nowiki>|<code>.*?<\/code>|' . // nowiki/code
 | 
			
		||||
			'<pre>.*?<\/pre>|<html>.*?<\/html>|' .      // pre/html
 | 
			
		||||
			'<script>.*?<\/script>|' .                  // script
 | 
			
		||||
			'<gallery>.*?<\/gallery>|' .                // gallery
 | 
			
		||||
			'<div.+?>|<\/div>|' .                       // attributes of div elements
 | 
			
		||||
			'<span.+?>|<\/span>|' .                     // attributes of span elements
 | 
			
		||||
			'<file>[^<]*<\/file>|' .                    // stuff inside file elements
 | 
			
		||||
			'style=".+?"|class=".+?"|' .                // styles and classes (e.g. of wikitables)
 | 
			
		||||
			'<noautolinks>.*?<\/noautolinks>|' .        // custom tag 'noautolinks'
 | 
			
		||||
			'\[' . $urlPattern . '\s.+?\]|'. $urlPattern .  '(?=\s|$)|' . // urls
 | 
			
		||||
			'(?<=\b)\S+\@(?:\S+\.)+\S+(?=\b)' .        // email addresses
 | 
			
		||||
			')/ismS';
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										194
									
								
								includes/Target.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										194
									
								
								includes/Target.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,194 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * The LinkTitles\Target represents a Wiki page that is a potential link target.
 | 
			
		||||
 *
 | 
			
		||||
 * Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
 | 
			
		||||
 *
 | 
			
		||||
 * This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 * it under the terms of the GNU General Public License as published by
 | 
			
		||||
 * the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 * (at your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 * This program is distributed in the hope that it will be useful,
 | 
			
		||||
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 * GNU General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 * You should have received a copy of the GNU General Public License
 | 
			
		||||
 * along with this program; if not, write to the Free Software
 | 
			
		||||
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
			
		||||
 * MA 02110-1301, USA.
 | 
			
		||||
 *
 | 
			
		||||
 * @author Daniel Kraus <bovender@bovender.de>
 | 
			
		||||
 */
 | 
			
		||||
namespace LinkTitles;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Represents a page that is a potential link target.
 | 
			
		||||
 */
 | 
			
		||||
class Target {
 | 
			
		||||
	/**
 | 
			
		||||
	 * A Title object for the target page currently being examined.
 | 
			
		||||
	 * @var \Title $title
 | 
			
		||||
	 */
 | 
			
		||||
	private $title;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Caches the target page content as a \Content object.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @var \Content $content
 | 
			
		||||
	 */
 | 
			
		||||
	private $content;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Regex that matches the start of a word; this expression depends on the
 | 
			
		||||
	 * setting of LinkTitles\Config->wordStartOnly;
 | 
			
		||||
	 * @var String $wordStart
 | 
			
		||||
	 */
 | 
			
		||||
	public $wordStart;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Regex that matches the end of a word; this expression depends on the
 | 
			
		||||
	 * setting of LinkTitles\Config->wordEndOnly;
 | 
			
		||||
	 * @var String $wordEnd
 | 
			
		||||
	 */
 | 
			
		||||
	public $wordEnd;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * LinkTitles configuration.
 | 
			
		||||
	 * @var Config $config
 | 
			
		||||
	 */
 | 
			
		||||
	private $config;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Constructs a new Target object
 | 
			
		||||
	 *
 | 
			
		||||
	 * The parameters may be taken from database rows, for example.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @param Int $nameSpace Name space of the target page
 | 
			
		||||
	 * @param String &$title Title of the target page
 | 
			
		||||
	 */
 | 
			
		||||
	public function __construct( $nameSpace, $title, Config &$config ) {
 | 
			
		||||
		$this->title = \Title::makeTitleSafe( $nameSpace, $title );
 | 
			
		||||
		$this->titleValue = $this->title->getTitleValue();
 | 
			
		||||
		$this->config = $config;
 | 
			
		||||
 | 
			
		||||
		// Use unicode character properties rather than \b escape sequences
 | 
			
		||||
		// to detect whole words containing non-ASCII characters as well.
 | 
			
		||||
		// Note that this requires a PCRE library that was compiled with
 | 
			
		||||
		// --enable-unicode-properties
 | 
			
		||||
		( $config->wordStartOnly ) ? $this->wordStart = '(?<!\pL)' : $this->wordStart = '';
 | 
			
		||||
		( $config->wordEndOnly ) ? $this->wordEnd = '(?!\pL)' : $this->wordEnd = '';
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Gets the string representation of the target title.
 | 
			
		||||
	 * @return String title text
 | 
			
		||||
	 */
 | 
			
		||||
	public function getTitleText() {
 | 
			
		||||
		return $this->title->getText();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Gets the title string with certain characters escaped that may interfere
 | 
			
		||||
	 * with regular expressions.
 | 
			
		||||
	 * @return String representation of the title, regex-safe
 | 
			
		||||
	 */
 | 
			
		||||
	public function getRegexSafeTitle() {
 | 
			
		||||
		return preg_quote( $this->title->getText(), '/' );
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Builds a regular expression of the title
 | 
			
		||||
	 * @return String regular expression for this title.
 | 
			
		||||
	 */
 | 
			
		||||
	public function getCaseSensitiveRegex() {
 | 
			
		||||
		$regexSafeTitle = $this->getRegexSafeTitle();
 | 
			
		||||
 | 
			
		||||
		// Depending on the $config->capitalLinks setting,
 | 
			
		||||
		// the title has to be searched for either in a strictly case-sensitive
 | 
			
		||||
		// way, or in a 'fuzzy' way where the first letter of the title may
 | 
			
		||||
		// be either case.
 | 
			
		||||
		//
 | 
			
		||||
		if ( $this->config->capitalLinks && ( $regexSafeTitle[0] != '\\' )) {
 | 
			
		||||
			$searchTerm = '((?i)' . $regexSafeTitle[0] . '(?-i)' . substr($regexSafeTitle, 1) . ')';
 | 
			
		||||
		}	else {
 | 
			
		||||
			$searchTerm = '(' . $regexSafeTitle . ')';
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		return $this->buildRegex( $searchTerm );
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Builds a regular expression pattern for the title in a case-insensitive
 | 
			
		||||
	 * way.
 | 
			
		||||
	 * @return String case-insensitive regular expression pattern for the title
 | 
			
		||||
	 */
 | 
			
		||||
	public function getCaseInsensitiveRegex() {
 | 
			
		||||
		return $this->buildRegex( $this->getRegexSafeTitle() ) . 'i';
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Builds the basic regex that is used to match target page titles in a source
 | 
			
		||||
	 * text.
 | 
			
		||||
	 * @param  String $searchTerm Target page title (special characters must be quoted)
 | 
			
		||||
	 * @return String regular expression pattern
 | 
			
		||||
	 */
 | 
			
		||||
	private function buildRegex( $searchTerm ) {
 | 
			
		||||
		return '/(?<![\:\.\@\/\?\&])' . $this->wordStart . $searchTerm . $this->wordEnd . '/S';
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Returns the \Content of the target page.
 | 
			
		||||
	 *
 | 
			
		||||
	 * The value is cached.
 | 
			
		||||
	 * @return \Content Content of the Target page.
 | 
			
		||||
	 */
 | 
			
		||||
	public function getContent() {
 | 
			
		||||
		if ( $this->content === null ) {
 | 
			
		||||
			$this->content = \WikiPage::factory( $this->title )->getContent();
 | 
			
		||||
		};
 | 
			
		||||
		return $this->content;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Examines the current target page. Returns true if it may be linked;
 | 
			
		||||
	 * false if not. This depends on two settings:
 | 
			
		||||
	 * $wgLinkTitlesCheckRedirect and $wgLinkTitlesEnableNoTargetMagicWord
 | 
			
		||||
	 * and whether the target page is a redirect or contains the
 | 
			
		||||
	 * __NOAUTOLINKTARGET__ magic word.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @param \Title $fromTitle
 | 
			
		||||
	 *
 | 
			
		||||
	 * @return boolean
 | 
			
		||||
	 */
 | 
			
		||||
	public function mayLinkTo( \Title $fromTitle ) {
 | 
			
		||||
		// If checking for redirects is enabled and the target page does
 | 
			
		||||
		// indeed redirect to the current page, return the page title as-is
 | 
			
		||||
		// (unlinked).
 | 
			
		||||
		if ( $this->config->checkRedirect ) {
 | 
			
		||||
			$redirectTitle = $this->getContent()->getUltimateRedirectTarget();
 | 
			
		||||
			if ( $redirectTitle && $redirectTitle->equals( $fromTitle ) ) {
 | 
			
		||||
				return false;
 | 
			
		||||
			}
 | 
			
		||||
		};
 | 
			
		||||
		// If the magic word __NOAUTOLINKTARGET__ is enabled and the target
 | 
			
		||||
		// page does indeed contain this magic word, return the page title
 | 
			
		||||
		// as-is (unlinked).
 | 
			
		||||
		if ( $this->config->enableNoTargetMagicWord ) {
 | 
			
		||||
			if ( $this->getContent()->matchMagicWord( \MagicWord::get('MAG_LINKTITLES_NOTARGET') ) ) {
 | 
			
		||||
				return false;
 | 
			
		||||
			}
 | 
			
		||||
		};
 | 
			
		||||
		return true;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Determines if the Target's title is the same as another title.
 | 
			
		||||
	 * @param  Title   $otherTitle Other title
 | 
			
		||||
	 * @return boolean             True if the $otherTitle is the same, false if not.
 | 
			
		||||
	 */
 | 
			
		||||
	public function isSameTitle( \Title $otherTitle) {
 | 
			
		||||
		return $this->title->equals( $otherTitle );
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										142
									
								
								includes/Targets.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										142
									
								
								includes/Targets.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,142 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * The LinkTitles\Targets class.
 | 
			
		||||
 *
 | 
			
		||||
 * Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> ('bovender')
 | 
			
		||||
 *
 | 
			
		||||
 * This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 * it under the terms of the GNU General Public License as published by
 | 
			
		||||
 * the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 * (at your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 * This program is distributed in the hope that it will be useful,
 | 
			
		||||
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 * GNU General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 * You should have received a copy of the GNU General Public License
 | 
			
		||||
 * along with this program; if not, write to the Free Software
 | 
			
		||||
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
			
		||||
 * MA 02110-1301, USA.
 | 
			
		||||
 *
 | 
			
		||||
 * @author Daniel Kraus <bovender@bovender.de>
 | 
			
		||||
 */
 | 
			
		||||
namespace LinkTitles;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Fetches potential target page titles from the database.
 | 
			
		||||
 */
 | 
			
		||||
class Targets {
 | 
			
		||||
	private static $instance;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Singleton factory that returns a (cached) database query results with
 | 
			
		||||
	 * potential target page titles.
 | 
			
		||||
	 *
 | 
			
		||||
	 * The subset of pages that may serve as target pages depends on the
 | 
			
		||||
	 * name space of the source page. Therefore, if the $nameSpace differs from
 | 
			
		||||
	 * the cached name space, the database is queried again.
 | 
			
		||||
	 *
 | 
			
		||||
	 * @param  String $nameSpace The namespace of the current page.
 | 
			
		||||
	 * @param  Config $config    LinkTitles configuration.
 | 
			
		||||
	 */
 | 
			
		||||
	public static function default( \Title $title, Config $config ) {
 | 
			
		||||
		if ( ( self::$instance === null ) || ( self::$instance->nameSpace != $title->getNamespace() ) ) {
 | 
			
		||||
			self::$instance = new Targets( $title, $config );
 | 
			
		||||
		}
 | 
			
		||||
		return self::$instance;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Invalidates the cache; the next call of Targets::default() will trigger
 | 
			
		||||
	 * a database query.
 | 
			
		||||
	 *
 | 
			
		||||
	 * Use this in unit tests which are performed in a single request cycle so that
 | 
			
		||||
	 * changes to the pages list may not be picked up by the cached Targets instance.
 | 
			
		||||
	 */
 | 
			
		||||
	public static function invalidate() {
 | 
			
		||||
		self::$instance = null;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Holds the results of a database query for target page titles, filtered
 | 
			
		||||
	 * and sorted.
 | 
			
		||||
	 * @var IResultWrapper $queryResult
 | 
			
		||||
	 */
 | 
			
		||||
	public $queryResult;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Holds the name space (integer) for which the list of target pages was built.
 | 
			
		||||
	 * @var Int $nameSpace
 | 
			
		||||
	 */
 | 
			
		||||
	public $nameSpace;
 | 
			
		||||
 | 
			
		||||
	private $config;
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * The constructor is private to enforce using the singleton pattern.
 | 
			
		||||
	 * @param  \Title $title
 | 
			
		||||
	 */
 | 
			
		||||
	private function __construct( \Title $title, Config $config) {
 | 
			
		||||
		$this->config = $config;
 | 
			
		||||
		$this->nameSpace = $title->getNameSpace();
 | 
			
		||||
		$this->fetch();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	//
 | 
			
		||||
	/**
 | 
			
		||||
	 * Fetches the page titles from the database.
 | 
			
		||||
	 */
 | 
			
		||||
	private function fetch() {
 | 
			
		||||
 | 
			
		||||
		( $this->config->preferShortTitles ) ? $sortOrder = 'ASC' : $sortOrder = 'DESC';
 | 
			
		||||
		// Build a blacklist of pages that are not supposed to be link
 | 
			
		||||
		// targets. This includes the current page.
 | 
			
		||||
		$blackList = str_replace( ' ', '_', '("' . implode( '","',$this->config->blackList ) . '")' );
 | 
			
		||||
 | 
			
		||||
		// Build our weight list. Make sure current namespace is first element
 | 
			
		||||
		$nameSpaces = array_diff( $this->config->nameSpaces, [ $this->nameSpace ] );
 | 
			
		||||
		array_unshift( $nameSpaces,  $this->nameSpace );
 | 
			
		||||
 | 
			
		||||
		// No need for sanitiy check. we are sure that we have at least one element in the array
 | 
			
		||||
		$weightSelect = "CASE page_namespace ";
 | 
			
		||||
		$currentWeight = 0;
 | 
			
		||||
		foreach ($nameSpaces as &$nameSpaceValue) {
 | 
			
		||||
				$currentWeight = $currentWeight + 100;
 | 
			
		||||
				$weightSelect = $weightSelect . " WHEN " . $nameSpaceValue . " THEN " . $currentWeight . PHP_EOL;
 | 
			
		||||
		}
 | 
			
		||||
		$weightSelect = $weightSelect . " END ";
 | 
			
		||||
		$nameSpacesClause = '(' . implode( ', ', $nameSpaces ) . ')';
 | 
			
		||||
 | 
			
		||||
		// Build an SQL query and fetch all page titles ordered by length from
 | 
			
		||||
		// shortest to longest. Only titles from 'normal' pages (namespace uid
 | 
			
		||||
		// = 0) are returned. Since the db may be sqlite, we need a try..catch
 | 
			
		||||
		// structure because sqlite does not support the CHAR_LENGTH function.
 | 
			
		||||
		$dbr = wfGetDB( DB_SLAVE );
 | 
			
		||||
		try {
 | 
			
		||||
			$this->queryResult = $dbr->select(
 | 
			
		||||
				'page',
 | 
			
		||||
				array( 'page_title', 'page_namespace' , "weight" => $weightSelect),
 | 
			
		||||
				array(
 | 
			
		||||
					'page_namespace IN ' . $nameSpacesClause,
 | 
			
		||||
					'CHAR_LENGTH(page_title) >= ' . $this->config->minimumTitleLength,
 | 
			
		||||
					'page_title NOT IN ' . $blackList,
 | 
			
		||||
				),
 | 
			
		||||
				__METHOD__,
 | 
			
		||||
				array( 'ORDER BY' => 'weight ASC, CHAR_LENGTH(page_title) ' . $sortOrder )
 | 
			
		||||
			);
 | 
			
		||||
		} catch (Exception $e) {
 | 
			
		||||
			$this->queryResult = $dbr->select(
 | 
			
		||||
				'page',
 | 
			
		||||
				array( 'page_title', 'page_namespace' , "weight" => $weightSelect ),
 | 
			
		||||
				array(
 | 
			
		||||
					'page_namespace IN ' . $nameSpacesClause,
 | 
			
		||||
					'LENGTH(page_title) >= ' . $this->config->minimumTitleLength,
 | 
			
		||||
					'page_title NOT IN ' . $blackList,
 | 
			
		||||
				),
 | 
			
		||||
				__METHOD__,
 | 
			
		||||
				array( 'ORDER BY' => 'weight ASC, LENGTH(page_title) ' . $sortOrder )
 | 
			
		||||
			);
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
@@ -1,21 +1,23 @@
 | 
			
		||||
<?php
 | 
			
		||||
/*
 | 
			
		||||
 *      Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> @bovender
 | 
			
		||||
/**
 | 
			
		||||
 * LinkTitles command line interface (CLI)/maintenance script
 | 
			
		||||
 *
 | 
			
		||||
 *      This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 *      it under the terms of the GNU General Public License as published by
 | 
			
		||||
 *      the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 *      (at your option) any later version.
 | 
			
		||||
 *  Copyright 2012-2017 Daniel Kraus <bovender@bovender.de> @bovender
 | 
			
		||||
 *
 | 
			
		||||
 *      This program is distributed in the hope that it will be useful,
 | 
			
		||||
 *      but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 *      GNU General Public License for more details.
 | 
			
		||||
 *  This program is free software; you can redistribute it and/or modify
 | 
			
		||||
 *  it under the terms of the GNU General Public License as published by
 | 
			
		||||
 *  the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
 *  (at your option) any later version.
 | 
			
		||||
 *
 | 
			
		||||
 *      You should have received a copy of the GNU General Public License
 | 
			
		||||
 *      along with this program; if not, write to the Free Software
 | 
			
		||||
 *      Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
			
		||||
 *      MA 02110-1301, USA.
 | 
			
		||||
 *  This program is distributed in the hope that it will be useful,
 | 
			
		||||
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
 *  GNU General Public License for more details.
 | 
			
		||||
 *
 | 
			
		||||
 *  You should have received a copy of the GNU General Public License
 | 
			
		||||
 *  along with this program; if not, write to the Free Software
 | 
			
		||||
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 | 
			
		||||
 *  MA 02110-1301, USA.
 | 
			
		||||
 */
 | 
			
		||||
namespace LinkTitles;
 | 
			
		||||
 | 
			
		||||
@@ -44,17 +46,21 @@ else
 | 
			
		||||
	}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
require_once( __DIR__ . "/includes/LinkTitles_Extension.php" );
 | 
			
		||||
require_once( __DIR__ . "/includes/Extension.php" );
 | 
			
		||||
 | 
			
		||||
/// Core class of the maintanance script.
 | 
			
		||||
/// @note Note that the execution of maintenance scripts is prohibited for
 | 
			
		||||
/// an Apache web server due to a `.htaccess` file that declares `deny from
 | 
			
		||||
/// all`. Other webservers may exhibit different behavior. Be aware that
 | 
			
		||||
/// anybody who is able to execute this script may place a high load on the
 | 
			
		||||
/// server.
 | 
			
		||||
/// @ingroup batch
 | 
			
		||||
/**
 | 
			
		||||
 * Core class of the maintanance script.
 | 
			
		||||
 * @note Note that the execution of maintenance scripts is prohibited for
 | 
			
		||||
 * an Apache web server due to a `.htaccess` file that declares `deny from
 | 
			
		||||
 * all`. Other webservers may exhibit different behavior. Be aware that
 | 
			
		||||
 * anybody who is able to execute this script may place a high load on the
 | 
			
		||||
 * server.
 | 
			
		||||
 * @ingroup batch
 | 
			
		||||
 */
 | 
			
		||||
class Cli extends \Maintenance {
 | 
			
		||||
	/// The constructor adds a description and one option.
 | 
			
		||||
	/**
 | 
			
		||||
	 * Constructor.
 | 
			
		||||
	 */
 | 
			
		||||
	public function __construct() {
 | 
			
		||||
		parent::__construct();
 | 
			
		||||
		$this->addDescription("Iterates over wiki pages and automatically adds links to other pages.");
 | 
			
		||||
@@ -65,41 +71,45 @@ class Cli extends \Maintenance {
 | 
			
		||||
			true,  // requires argument
 | 
			
		||||
			"s"
 | 
			
		||||
		);
 | 
			
		||||
				$this->addOption(
 | 
			
		||||
		$this->addOption(
 | 
			
		||||
			"page",
 | 
			
		||||
			"page name to process",
 | 
			
		||||
			false, // not required
 | 
			
		||||
			true,  // requires argument
 | 
			
		||||
			"p"
 | 
			
		||||
		);
 | 
			
		||||
				$this->addOption(
 | 
			
		||||
			"log",
 | 
			
		||||
			"enables logging to console",
 | 
			
		||||
			false, // not required
 | 
			
		||||
			false,  // requires no argument
 | 
			
		||||
			"l"
 | 
			
		||||
		);
 | 
			
		||||
				$this->addOption(
 | 
			
		||||
			"debug",
 | 
			
		||||
			"enables debug logging to console",
 | 
			
		||||
			false, // not required
 | 
			
		||||
			false  // requires no argument
 | 
			
		||||
		);
 | 
			
		||||
		// TODO: Add back logging options.
 | 
			
		||||
		// TODO: Add configuration options.
 | 
			
		||||
		// $this->addOption(
 | 
			
		||||
		// 	"log",
 | 
			
		||||
		// 	"enables logging to console",
 | 
			
		||||
		// 	false, // not required
 | 
			
		||||
		// 	false,  // requires no argument
 | 
			
		||||
		// 	"l"
 | 
			
		||||
		// );
 | 
			
		||||
		// $this->addOption(
 | 
			
		||||
		// 	"debug",
 | 
			
		||||
		// 	"enables debug logging to console",
 | 
			
		||||
		// 	false, // not required
 | 
			
		||||
		// 	false  // requires no argument
 | 
			
		||||
		// );
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/// Main function of the maintenance script.
 | 
			
		||||
	/// Will iterate over all pages in the wiki (starting at a certain index,
 | 
			
		||||
	/// if the `--start` option is given) and call LinkTitles::processPage() for
 | 
			
		||||
	/// each page.
 | 
			
		||||
	/*
 | 
			
		||||
	 * Main function of the maintenance script.
 | 
			
		||||
	 * Will iterate over all pages in the wiki (starting at a certain index,
 | 
			
		||||
	 * if the `--start` option is given) and call LinkTitles::processPage() for
 | 
			
		||||
	 * each page.
 | 
			
		||||
	 */
 | 
			
		||||
	public function execute() {
 | 
			
		||||
		if ($this->hasOption('log'))
 | 
			
		||||
		{
 | 
			
		||||
				Extension::$ltConsoleOutput = true;
 | 
			
		||||
		}
 | 
			
		||||
		if ($this->hasOption('debug'))
 | 
			
		||||
		{
 | 
			
		||||
				Extension::$ltConsoleOutputDebug = true;
 | 
			
		||||
		}
 | 
			
		||||
		// if ($this->hasOption('log'))
 | 
			
		||||
		// {
 | 
			
		||||
		// 	Extension::$ltConsoleOutput = true;
 | 
			
		||||
		// }
 | 
			
		||||
		// if ($this->hasOption('debug'))
 | 
			
		||||
		// {
 | 
			
		||||
		// 	Extension::$ltConsoleOutputDebug = true;
 | 
			
		||||
		// }
 | 
			
		||||
		if ( $this->hasOption('page') ) {
 | 
			
		||||
			if ( !$this->hasOption( 'start' ) ) {
 | 
			
		||||
				$this->singlePage();
 | 
			
		||||
@@ -113,10 +123,14 @@ class Cli extends \Maintenance {
 | 
			
		||||
			if ( $startIndex < 0 ) {
 | 
			
		||||
				$this->error( 'FATAL: Start index must be 0 or greater.', 1 );
 | 
			
		||||
			};
 | 
			
		||||
			$this->allPages( $startIndex);
 | 
			
		||||
			$this->allPages( $startIndex );
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Processes a single page.
 | 
			
		||||
	 * @return bool True on success, false on failure.
 | 
			
		||||
	 */
 | 
			
		||||
	private function singlePage() {
 | 
			
		||||
		$pageName = strval( $this->getOption( 'page' ) );
 | 
			
		||||
		$this->output( "Processing single page: '$pageName'\n" );
 | 
			
		||||
@@ -131,17 +145,22 @@ class Cli extends \Maintenance {
 | 
			
		||||
		return $success;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * Process all pages in the Wiki.
 | 
			
		||||
	 * @param  integer $index Index of the start page.
 | 
			
		||||
	 * @return bool           True on success, false on failure.
 | 
			
		||||
	 */
 | 
			
		||||
	private function allPages( $index = 0 ) {
 | 
			
		||||
		global $wgLinkTitlesNamespaces;
 | 
			
		||||
		$config = new Config();
 | 
			
		||||
 | 
			
		||||
		// Retrieve page names from the database.
 | 
			
		||||
		$dbr = $this->getDB( DB_SLAVE );
 | 
			
		||||
		$namespacesClause = str_replace( '_', ' ','(' . implode( ', ', $wgLinkTitlesNamespaces ) . ')' );
 | 
			
		||||
		$nameSpacesClause = str_replace( '_', ' ','(' . implode( ', ', $config->nameSpaces ) . ')' );
 | 
			
		||||
		$res = $dbr->select(
 | 
			
		||||
			'page',
 | 
			
		||||
			array( 'page_title', 'page_namespace' ),
 | 
			
		||||
			array(
 | 
			
		||||
				'page_namespace IN ' . $namespacesClause,
 | 
			
		||||
				'page_namespace IN ' . $nameSpacesClause,
 | 
			
		||||
			),
 | 
			
		||||
			__METHOD__,
 | 
			
		||||
			array(
 | 
			
		||||
							
								
								
									
										20
									
								
								tests/phpunit/ConfigTest.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								tests/phpunit/ConfigTest.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,20 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * Tests the LinkTitles\Config class.
 | 
			
		||||
 *
 | 
			
		||||
 * This single unit test basically serves to ensure the Config class is working.
 | 
			
		||||
 * @group bovender
 | 
			
		||||
 * @group Database
 | 
			
		||||
 */
 | 
			
		||||
class ConfigTest extends LinkTitles\TestCase {
 | 
			
		||||
 | 
			
		||||
  public function testParseOnEdit() {
 | 
			
		||||
    $this->setMwGlobals( [
 | 
			
		||||
      'wgLinkTitlesParseOnEdit' => true,
 | 
			
		||||
      'wgLinkTitlesParseOnRender' => false
 | 
			
		||||
    ] );
 | 
			
		||||
    $config = new LinkTitles\Config();
 | 
			
		||||
    global $wgLinkTitlesParseOnEdit;
 | 
			
		||||
    $this->assertSame( $config->parseOnEdit, $wgLinkTitlesParseOnEdit );
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										27
									
								
								tests/phpunit/ParseOnEditTest.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								tests/phpunit/ParseOnEditTest.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,27 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * @group bovender
 | 
			
		||||
 * @group Database
 | 
			
		||||
 */
 | 
			
		||||
class ParseOnEditTest extends LinkTitles\TestCase {
 | 
			
		||||
 | 
			
		||||
  public function testParseOnEdit() {
 | 
			
		||||
    $this->setMwGlobals( [
 | 
			
		||||
      'wgLinkTitlesParseOnEdit' => true,
 | 
			
		||||
      'wgLinkTitlesParseOnRender' => false
 | 
			
		||||
    ] );
 | 
			
		||||
    $pageId = $this->insertPage( 'test page', 'This page should link to the link target but not to test page' )['id'];
 | 
			
		||||
    $page = WikiPage::newFromId( $pageId );
 | 
			
		||||
    $this->assertSame( 'This page should link to the [[link target]] but not to test page', self::getPageText( $page ) );
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  public function testDoNotParseOnEdit() {
 | 
			
		||||
    $this->setMwGlobals( [
 | 
			
		||||
      'wgLinkTitlesParseOnEdit' => false,
 | 
			
		||||
      'wgLinkTitlesParseOnRender' => false
 | 
			
		||||
    ] );
 | 
			
		||||
    $pageId = $this->insertPage( 'test page', 'This page should not link to the link target' )['id'];
 | 
			
		||||
    $page = WikiPage::newFromId( $pageId );
 | 
			
		||||
    $this->assertSame( 'This page should not link to the link target', self::getPageText( $page ) );
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										31
									
								
								tests/phpunit/SplitterTest.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								tests/phpunit/SplitterTest.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,31 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * @group bovender
 | 
			
		||||
 */
 | 
			
		||||
class SplitterTest extends MediaWikiTestCase {
 | 
			
		||||
	/**
 | 
			
		||||
	 * @dataProvider provideSplitData
 | 
			
		||||
	 */
 | 
			
		||||
	public function testSplit( $input, $expectedOutput ) {
 | 
			
		||||
		$splitter = LinkTitles\Splitter::default();
 | 
			
		||||
		$this->assertSame( $expectedOutput, $splitter->split( $input ) );
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// TODO: Add more examples.
 | 
			
		||||
	public static function provideSplitData() {
 | 
			
		||||
		return [
 | 
			
		||||
			[
 | 
			
		||||
				'this may be linked [[this may not be linked]]',
 | 
			
		||||
				[ 'this may be linked ', '[[this may not be linked]]', '' ]
 | 
			
		||||
			],
 | 
			
		||||
			[
 | 
			
		||||
				'this may be linked <gallery>this may not be linked</gallery>',
 | 
			
		||||
				[ 'this may be linked ', '<gallery>this may not be linked</gallery>', '' ]
 | 
			
		||||
			],
 | 
			
		||||
			[
 | 
			
		||||
				'this may be linked {{mytemplate|param={{transcluded}}}}',
 | 
			
		||||
				[ 'this may be linked ', '{{mytemplate|param={{transcluded}}}}', '' ]
 | 
			
		||||
			],
 | 
			
		||||
		];
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										40
									
								
								tests/phpunit/TargetTest.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								tests/phpunit/TargetTest.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,40 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * @group bovender
 | 
			
		||||
 */
 | 
			
		||||
class TargetTest extends MediaWikiTestCase {
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * @dataProvider provideStartOnly
 | 
			
		||||
	 */
 | 
			
		||||
	public function testTargetWordStartOnly( $enabled, $delimiter ) {
 | 
			
		||||
		$config = new LinkTitles\Config();
 | 
			
		||||
		$config->wordStartOnly = $enabled;
 | 
			
		||||
		$target = new LinKTitles\Target( NS_MAIN, 'test page', $config );
 | 
			
		||||
		$this->assertSame( $delimiter, $target->wordStart );
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	public static function provideStartOnly() {
 | 
			
		||||
		return [
 | 
			
		||||
			[ true, '(?<!\pL)' ],
 | 
			
		||||
			[ false, '' ]
 | 
			
		||||
		];
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * @dataProvider provideEndOnly
 | 
			
		||||
	 */
 | 
			
		||||
	public function testTargetWordEndOnly( $enabled, $delimiter ) {
 | 
			
		||||
		$config = new LinkTitles\Config();
 | 
			
		||||
		$config->wordEndOnly = $enabled;
 | 
			
		||||
		$target = new LinKTitles\Target( NS_MAIN, 'test page', $config );
 | 
			
		||||
		$this->assertSame( $delimiter, $target->wordEnd );
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	public static function provideEndOnly() {
 | 
			
		||||
		return [
 | 
			
		||||
			[ true, '(?!\pL)' ],
 | 
			
		||||
			[ false, '' ]
 | 
			
		||||
		];
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										26
									
								
								tests/phpunit/TargetsTest.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								tests/phpunit/TargetsTest.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,26 @@
 | 
			
		||||
<?php
 | 
			
		||||
/**
 | 
			
		||||
 * Tests the LinkTitles\Targets class.
 | 
			
		||||
 *
 | 
			
		||||
 * @group bovender
 | 
			
		||||
 * @group Database
 | 
			
		||||
 */
 | 
			
		||||
class TargetsTest extends LinkTitles\TestCase {
 | 
			
		||||
 | 
			
		||||
	/**
 | 
			
		||||
	 * This test asserts that the list of potential link targets is 0
 | 
			
		||||
	 * @return [type] [description]
 | 
			
		||||
	 */
 | 
			
		||||
	public function testTargets() {
 | 
			
		||||
		$title = \Title::newFromText( 'link target' );
 | 
			
		||||
		$targets = LinkTitles\Targets::default( $title, new LinkTitles\Config() );
 | 
			
		||||
 | 
			
		||||
		// Count number of articles: Inspired by updateArticleCount.php maintenance
 | 
			
		||||
		// script: https://doc.wikimedia.org/mediawiki-core/master/php/updateArticleCount_8php_source.html
 | 
			
		||||
		$dbr = wfGetDB( DB_SLAVE );
 | 
			
		||||
		$counter = new SiteStatsInit( $dbr );
 | 
			
		||||
		$count = $counter->pages();
 | 
			
		||||
 | 
			
		||||
		$this->assertEquals( $targets->queryResult->numRows(), $count );
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										19
									
								
								tests/phpunit/TestCase.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								tests/phpunit/TestCase.php
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,19 @@
 | 
			
		||||
<?php
 | 
			
		||||
namespace LinkTitles;
 | 
			
		||||
 | 
			
		||||
abstract class TestCase extends \MediaWikiTestCase {
 | 
			
		||||
  protected function setUp() {
 | 
			
		||||
    parent::setUp();
 | 
			
		||||
    $this->insertPage( 'link target', 'This page serves as a link target' );
 | 
			
		||||
    Targets::invalidate(); // force re-querying the pages table
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  protected function tearDown() {
 | 
			
		||||
    parent::tearDown();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  protected function getPageText( \WikiPage $page ) {
 | 
			
		||||
    $content = $page->getContent();
 | 
			
		||||
    return $page->getContentHandler()->serializeContent( $content );
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user