From 124e92aafdc3d597f630a6a63644901b3ea692c8 Mon Sep 17 00:00:00 2001 From: c0nnex Date: Fri, 2 Oct 2015 02:35:47 +0200 Subject: [PATCH] * Fixed linking in Namespace pages * added paramters to cli interface --page={pagename} only process that page --log : show some output about the process --debug : show debug output in cli * changed pagecount on SpecialPage to use COUNT instead of selecting all rows --- LinkTitles.body.php | 78 +++++++++++++++++++++++++++---------------- LinkTitles.cli.php | 40 +++++++++++++++++++++- SpecialLinkTitles.php | 13 ++++---- 3 files changed, 95 insertions(+), 36 deletions(-) diff --git a/LinkTitles.body.php b/LinkTitles.body.php index 16d0895..c01748c 100755 --- a/LinkTitles.body.php +++ b/LinkTitles.body.php @@ -18,7 +18,7 @@ * MA 02110-1301, USA. */ /// @file - + /// Helper function for development and debugging. /// @param $var Any variable. Raw content will be dumped to stderr. /// @return undefined @@ -35,6 +35,9 @@ /// A Title object for the target page currently being examined. private static $targetTitle; + // The TiltleValue object of the target page + private static $targetTitleValue; + /// The content object for the currently processed target page. /// This variable is necessary to be able to prevent loading the target /// content twice. @@ -52,6 +55,9 @@ private static $wordStartDelim; private static $wordEndDelim; + public static $ltConsoleOutput; + public static $ltConsoleOutputDebug; + /// Setup function, hooks the extension's functions to MediaWiki events. public static function setup() { global $wgLinkTitlesParseOnEdit; @@ -91,6 +97,24 @@ return true; } + /// Local Debugging output function which can send output to console as well + public static function ltDebugLog($text) { + if (LinkTitles::$ltConsoleOutputDebug) + { + print $text . "\n"; + } + wfDebugLog('LinkTitles', $text , 'private'); + } + + /// Local Logging output function which can send output to console as well + public static function ltLog($text) { + if (LinkTitles::$ltConsoleOutput) + { + print $text . "\n"; + } + wfDebugLog('LinkTitles', $text , 'private'); + } + /// Core function of the extension, performs the actual parsing of the content. /// @param Title $title Title of the page being parsed /// @param $text String that holds the article content @@ -116,6 +140,8 @@ LinkTitles::$currentTitle = $title; $newText = $text; + + // Build a blacklist of pages that are not supposed to be link // targets. This includes the current page. $blackList = str_replace( '_', ' ', @@ -123,14 +149,12 @@ LinkTitles::$currentTitle->getDbKey() . '")' ); $currentNamespace[] = $title->getNamespace(); - wfDebugLog("LinkTitles",'$wgLinkTitlesNamespaces = '. print_r($wgLinkTitlesNamespaces,true),'private'); - wfDebugLog("LinkTitles",'$currentNamespace = '. print_r($currentNamespace,true),'private'); - + + // Build our weight list. Make sure current namespace is first element $namespaces = array_diff($wgLinkTitlesNamespaces, $currentNamespace); array_unshift($namespaces, $currentNamespace[0] ); - wfDebugLog("LinkTitles",'$namespaces = '. print_r($namespaces,true),'private'); - + // No need for sanitiy check. we are sure that we have at least one element in the array $weightSelect = "CASE page_namespace "; $currentWeight = 0; @@ -186,9 +210,11 @@ // Escape certain special characters in the page title to prevent // regexp compilation errors LinkTitles::$targetTitleText = LinkTitles::$targetTitle->getPrefixedText(); // containes Namespace ! - $quotedTitle = preg_quote( LinkTitles::$targetTitle->getTitleValue()->getText(), '/'); - wfDebugLog("LinkTitles",'TargetTitle='. LinkTitles::$targetTitleText,"private"); - wfDebugLog("LinkTitles",'TargetTitleQuoted='. $quotedTitle,"private"); + $quotedTitle = preg_quote( LinkTitles::$targetTitleValue->getText(), '/'); + + LinkTitles::ltDebugLog('TargetTitle='. LinkTitles::$targetTitleText,"private"); + LinkTitles::ltDebugLog('TargetTitleQuoted='. $quotedTitle,"private"); + // Depending on the global configuration setting $wgCapitalLinks, // the title has to be searched for either in a strictly case-sensitive // way, or in a 'fuzzy' way where the first letter of the title may @@ -242,7 +268,9 @@ /// to obtain such an object. /// @returns undefined public static function processPage(Title $title, RequestContext $context) { - // TODO: make this namespace-aware + + LinkTitles::ltLog('Processing '. $title->getPrefixedText()); + $page = WikiPage::factory($title); $content = $page->getContent(); $text = $content->getContentHandler()->serializeContent($content); @@ -270,7 +298,9 @@ // Build an anonymous callback function to be used in simple mode. private static function simpleModeCallback( array $matches ) { + if ( LinkTitles::checkTargetPage() ) { + LinkTitles::ltLog("Linking '$matches[0]' to '" . LinkTitles::$targetTitle . "'"); return '[[' . LinkTitles::$targetTitle . "|" . $matches[0] . ']]'; } else @@ -288,20 +318,15 @@ // piped link if only the case of the first letter is different. private static function smartModeCallback( array $matches ) { global $wgCapitalLinks; - + if ( $wgCapitalLinks ) { // With $wgCapitalLinks set to true we have a slightly more // complicated version of the callback than if it were false; // we need to ignore the first letter of the page titles, as // it does not matter for linking. if ( LinkTitles::checkTargetPage() ) { - if ( strcmp(substr(LinkTitles::$targetTitleText, 1), substr($matches[0], 1)) == 0 ) { - // Case-sensitive match: no need to bulid piped link. - return '[[' . LinkTitles::$targetTitle . "|" . $matches[0] . ']]'; - } else { - // Case-insensitive match: build piped link. - return '[[' . LinkTitles::$targetTitleText . '|' . $matches[0] . ']]'; - } + LinkTitles::ltLog("Linking (smart) '$matches[0]' to '" . LinkTitles::$targetTitle . "'"); + return '[[' . LinkTitles::$targetTitle . "|" . $matches[0] . ']]'; } else { @@ -311,13 +336,8 @@ // If $wgCapitalLinks is false, we can use the simple variant // of the callback function. if ( LinkTitles::checkTargetPage() ) { - if ( strcmp(LinkTitles::$targetTitleText, $matches[0]) == 0 ) { - // Case-sensitive match: no need to bulid piped link. - return '[[' . $matches[0] . ']]'; - } else { - // Case-insensitive match: build piped link. - return '[[' . LinkTitles::$targetTitleText . '|' . $matches[0] . ']]'; - } + LinkTitles::ltLog("Linking (smart) '$matches[0]' to '" . LinkTitles::$targetTitle . "'"); + return '[[' . LinkTitles::$targetTitle . '|' . $matches[0] . ']]'; } else { @@ -329,10 +349,10 @@ /// Sets member variables for the current target page. private static function newTarget($ns, $title ) { // @todo Make this wiki namespace aware. - LinkTitles::$targetTitle = Title::newFromText( $title , $ns ); - wfDebugLog("LinkTitles",'newtarget='. print_r( LinkTitles::$targetTitle, true ) ,"private"); - - wfDebugLog("LinkTitles",'altTarget='. print_r( LinkTitles::$targetTitle->getTitleValue(), true ) ,"private"); + LinkTitles::$targetTitle = Title::makeTitleSafe($ns,$title); + LinkTitles::ltDebugLog('newtarget='. LinkTitles::$targetTitle->getText() ,"private"); + LinkTitles::$targetTitleValue = LinkTitles::$targetTitle->getTitleValue(); + LinkTitles::ltDebugLog('altTarget='. LinkTitles::$targetTitleValue->getText() ,"private"); LinkTitles::$targetContent = null; } diff --git a/LinkTitles.cli.php b/LinkTitles.cli.php index bf734fc..fc198c4 100755 --- a/LinkTitles.cli.php +++ b/LinkTitles.cli.php @@ -64,6 +64,26 @@ class LinkTitlesCli extends Maintenance { true, // requires argument "s" ); + $this->addOption( + "page", + "page to process", + false, // not required + true, // requires argument + "p" + ); + $this->addOption( + "log", + "enables logging to console", + false, // not required + false, // requires no argument + "l" + ); + $this->addOption( + "debug", + "enables debug logging to console", + false, // not required + false // requires no argument + ); } /// Main function of the maintenance script. @@ -78,6 +98,24 @@ class LinkTitlesCli extends Maintenance { $this->error('FATAL: Start index must be 0 or greater.', 1); }; + if ($this->hasOption('log')) + { + LinkTitles::$ltConsoleOutput = true; + } + if ($this->hasOption('debug')) + { + LinkTitles::$ltConsoleOutputDebug = true; + } + + $pagename = strval($this->getOption('page')); + if ($pagename != null) + { + + $curTitle = Title::newFromDBkey( $pagename ); + LinkTitles::processPage($curTitle,RequestContext::getMain() ); + $this->output("\nFinished parsing.\n"); + return; + } // get our Namespaces $namespacesClause = str_replace( '_', ' ','(' . implode( ', ',$wgLinkTitlesNamespaces ) . ')' ); @@ -104,7 +142,7 @@ class LinkTitlesCli extends Maintenance { // Iterate through the pages; break if a time limit is exceeded. foreach ( $res as $row ) { $index += 1; - $curTitle = Title::makeTitle( $row->page_namespace, $row->page_title); + $curTitle = Title::makeTitleSafe( $row->page_namespace, $row->page_title); $this->output( sprintf("\rPage #%d (%02.0f%%)", $index, $index / $numPages * 100) ); diff --git a/SpecialLinkTitles.php b/SpecialLinkTitles.php index 5d205f3..e5b4949 100644 --- a/SpecialLinkTitles.php +++ b/SpecialLinkTitles.php @@ -105,7 +105,7 @@ class SpecialLinkTitles extends SpecialPage { else { // No end index was given. Therefore, count pages now. - $end = $this->countPages($dbr); + $end = $this->countPages($dbr, $namespacesClause ); }; array_key_exists('r', $postValues) ? @@ -128,7 +128,7 @@ class SpecialLinkTitles extends SpecialPage { // Iterate through the pages; break if a time limit is exceeded. foreach ( $res as $row ) { - $curTitle = Title::makeTitle( $row->page_namespace, $row->page_title); + $curTitle = Title::makeTitleSafe( $row->page_namespace, $row->page_title); LinkTitles::processPage($curTitle, $this->getContext()); $start += 1; @@ -279,16 +279,17 @@ EOF /// Counts the number of pages in a read-access wiki database ($dbr). /// @param $dbr Read-only `Database` object. /// @returns Number of pages in the default namespace (0) of the wiki. - private function countPages(&$dbr) { + private function countPages(&$dbr, $namespacesClause) { $res = $dbr->select( 'page', - 'page_id', + array('pagecount' => "COUNT(page_id)"), array( - 'page_namespace = 0', + 'page_namespace IN ' . $namespacesClause, ), __METHOD__ ); - return $res->numRows(); + + return $res->current()->pagecount; } }