diff --git a/NEWS b/NEWS index 74e833d..0e1ed8e 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,15 @@ +Version 4.0.7 (2017-01-02) +------------------------------------------------------------------------ + +- Improvement: Increase performance of special page and maintenance script. + +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + + Version 4.0.6 (2016-12-28) ------------------------------------------------------------------------ +- Fix: Bug fixes. - Fix: Custom namespace weights were not respected. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * diff --git a/README.md b/README.md index abc3451..3dcafe3 100644 --- a/README.md +++ b/README.md @@ -28,4 +28,4 @@ Contributors - Daniel Kraus (@bovender), main developer - Ulrich Strauss (@c0nnex), namespaces -- Brent Laabs (@labster), bug fixes +- Brent Laabs (@labster), code review and bug fixes diff --git a/extension.json b/extension.json index 52f25fe..da708e9 100644 --- a/extension.json +++ b/extension.json @@ -7,7 +7,7 @@ ], "type": "parserhook", "url": "https://www.mediawiki.org/wiki/Extension:LinkTitles", - "version": "4.0.6", + "version": "4.0.7", "license-name": "GPL-2.0+", "descriptionmsg": "linktitles-desc", "requires": { diff --git a/gh-pages b/gh-pages index 4735662..933a842 160000 --- a/gh-pages +++ b/gh-pages @@ -1 +1 @@ -Subproject commit 47356620d1eafbadd1f77ea89c24b0141519eddc +Subproject commit 933a842d56a8c4a7f956751ebd31deaff57361de diff --git a/includes/LinkTitles_Extension.php b/includes/LinkTitles_Extension.php index d983dd3..b95c77b 100644 --- a/includes/LinkTitles_Extension.php +++ b/includes/LinkTitles_Extension.php @@ -30,6 +30,15 @@ function dump($var) { /// Central class of the extension. Sets up parser hooks. /// This class contains only static functions; do not instantiate. class Extension { + /// Caching variable for page titles that are fetched from the DB. + private static $pageTitles; + + /// Caching variable for the current namespace. + /// This is needed because the sort order of the page titles that + /// are cached in self::$pageTitles depends on the namespace of + /// the page currently being processed. + private static $currentNamespace; + /// A Title object for the page that is being parsed. private static $currentTitle; @@ -113,78 +122,28 @@ class Extension { private static function parseContent( $title, &$text ) { // Configuration variables need to be defined here as globals. - global $wgLinkTitlesPreferShortTitles; - global $wgLinkTitlesMinimumTitleLength; - global $wgLinkTitlesBlackList; global $wgLinkTitlesFirstOnly; global $wgLinkTitlesSmartMode; global $wgCapitalLinks; - global $wgLinkTitlesNamespaces; - ( $wgLinkTitlesPreferShortTitles ) ? $sort_order = 'ASC' : $sort_order = 'DESC'; ( $wgLinkTitlesFirstOnly ) ? $limit = 1 : $limit = -1; $limitReached = false; - self::$currentTitle = $title; + $currentNamespace = $title->getNamespace(); $newText = $text; - // Build a blacklist of pages that are not supposed to be link - // targets. This includes the current page. - $blackList = str_replace( ' ', '_', - '("' . implode( '","',$wgLinkTitlesBlackList ) . '","' . - addslashes( self::$currentTitle->getDbKey() ) . '")' ); - - $currentNamespace[] = $title->getNamespace(); - - // Build our weight list. Make sure current namespace is first element - $namespaces = array_diff($wgLinkTitlesNamespaces, $currentNamespace); - array_unshift($namespaces, $currentNamespace[0] ); - - // No need for sanitiy check. we are sure that we have at least one element in the array - $weightSelect = "CASE page_namespace "; - $currentWeight = 0; - foreach ($namespaces as &$namspacevalue) { - $currentWeight = $currentWeight + 100; - $weightSelect = $weightSelect . " WHEN " . $namspacevalue . " THEN " . $currentWeight . PHP_EOL; - } - $weightSelect = $weightSelect . " END "; - $namespacesClause = '(' . implode( ', ', $namespaces ) . ')'; - - // Build an SQL query and fetch all page titles ordered by length from - // shortest to longest. Only titles from 'normal' pages (namespace uid - // = 0) are returned. Since the db may be sqlite, we need a try..catch - // structure because sqlite does not support the CHAR_LENGTH function. - $dbr = wfGetDB( DB_SLAVE ); - try { - $res = $dbr->select( - 'page', - array( 'page_title', 'page_namespace' , "weight" => $weightSelect), - array( - 'page_namespace IN ' . $namespacesClause, - 'CHAR_LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength, - 'page_title NOT IN ' . $blackList, - ), - __METHOD__, - array( 'ORDER BY' => 'weight ASC, CHAR_LENGTH(page_title) ' . $sort_order ) - ); - } catch (Exception $e) { - $res = $dbr->select( - 'page', - array( 'page_title', 'page_namespace' , "weight" => $weightSelect ), - array( - 'page_namespace IN ' . $namespacesClause, - 'LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength, - 'page_title NOT IN ' . $blackList, - ), - __METHOD__, - array( 'ORDER BY' => 'weight ASC, LENGTH(page_title) ' . $sort_order ) - ); + if ( !isset( self::$pageTitles ) || ( $currentNamespace != self::$currentNamespace ) ) { + self::$currentNamespace = $currentNamespace; + self::$pageTitles = self::fetchPageTitles( $currentNamespace ); } // Iterate through the page titles - foreach( $res as $row ) { + foreach( self::$pageTitles as $row ) { self::newTarget( $row->page_namespace, $row->page_title ); + // Don't link current page + if ( self::$targetTitle->equals( self::$currentTitle ) ) { continue; } + // split the page content by [[...]] groups // credits to inhan @ StackOverflow for suggesting preg_split // see http://stackoverflow.com/questions/10672286 @@ -286,6 +245,67 @@ class Extension { return true; } + // Fetches the page titles from the database. + // @param $currentNamespace String holding the namespace of the page currently being processed. + private static function fetchPageTitles( $currentNamespace ) { + global $wgLinkTitlesPreferShortTitles; + global $wgLinkTitlesMinimumTitleLength; + global $wgLinkTitlesBlackList; + global $wgLinkTitlesNamespaces; + + ( $wgLinkTitlesPreferShortTitles ) ? $sort_order = 'ASC' : $sort_order = 'DESC'; + // Build a blacklist of pages that are not supposed to be link + // targets. This includes the current page. + $blackList = str_replace( ' ', '_', '("' . implode( '","',$wgLinkTitlesBlackList ) . '")' ); + + // Build our weight list. Make sure current namespace is first element + $namespaces = array_diff( $wgLinkTitlesNamespaces, [ $currentNamespace ] ); + array_unshift( $namespaces, $currentNamespace ); + + // No need for sanitiy check. we are sure that we have at least one element in the array + $weightSelect = "CASE page_namespace "; + $currentWeight = 0; + foreach ($namespaces as &$namspacevalue) { + $currentWeight = $currentWeight + 100; + $weightSelect = $weightSelect . " WHEN " . $namspacevalue . " THEN " . $currentWeight . PHP_EOL; + } + $weightSelect = $weightSelect . " END "; + $namespacesClause = '(' . implode( ', ', $namespaces ) . ')'; + + // Build an SQL query and fetch all page titles ordered by length from + // shortest to longest. Only titles from 'normal' pages (namespace uid + // = 0) are returned. Since the db may be sqlite, we need a try..catch + // structure because sqlite does not support the CHAR_LENGTH function. + $dbr = wfGetDB( DB_SLAVE ); + try { + $res = $dbr->select( + 'page', + array( 'page_title', 'page_namespace' , "weight" => $weightSelect), + array( + 'page_namespace IN ' . $namespacesClause, + 'CHAR_LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength, + 'page_title NOT IN ' . $blackList, + ), + __METHOD__, + array( 'ORDER BY' => 'weight ASC, CHAR_LENGTH(page_title) ' . $sort_order ) + ); + } catch (Exception $e) { + $res = $dbr->select( + 'page', + array( 'page_title', 'page_namespace' , "weight" => $weightSelect ), + array( + 'page_namespace IN ' . $namespacesClause, + 'LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength, + 'page_title NOT IN ' . $blackList, + ), + __METHOD__, + array( 'ORDER BY' => 'weight ASC, LENGTH(page_title) ' . $sort_order ) + ); + } + + return $res; + } + // Build an anonymous callback function to be used in simple mode. private static function simpleModeCallback( array $matches ) { if ( self::checkTargetPage() ) {