diff --git a/includes/LinkTitles_Extension.php b/includes/LinkTitles_Extension.php index d983dd3..b95c77b 100644 --- a/includes/LinkTitles_Extension.php +++ b/includes/LinkTitles_Extension.php @@ -30,6 +30,15 @@ function dump($var) { /// Central class of the extension. Sets up parser hooks. /// This class contains only static functions; do not instantiate. class Extension { + /// Caching variable for page titles that are fetched from the DB. + private static $pageTitles; + + /// Caching variable for the current namespace. + /// This is needed because the sort order of the page titles that + /// are cached in self::$pageTitles depends on the namespace of + /// the page currently being processed. + private static $currentNamespace; + /// A Title object for the page that is being parsed. private static $currentTitle; @@ -113,78 +122,28 @@ class Extension { private static function parseContent( $title, &$text ) { // Configuration variables need to be defined here as globals. - global $wgLinkTitlesPreferShortTitles; - global $wgLinkTitlesMinimumTitleLength; - global $wgLinkTitlesBlackList; global $wgLinkTitlesFirstOnly; global $wgLinkTitlesSmartMode; global $wgCapitalLinks; - global $wgLinkTitlesNamespaces; - ( $wgLinkTitlesPreferShortTitles ) ? $sort_order = 'ASC' : $sort_order = 'DESC'; ( $wgLinkTitlesFirstOnly ) ? $limit = 1 : $limit = -1; $limitReached = false; - self::$currentTitle = $title; + $currentNamespace = $title->getNamespace(); $newText = $text; - // Build a blacklist of pages that are not supposed to be link - // targets. This includes the current page. - $blackList = str_replace( ' ', '_', - '("' . implode( '","',$wgLinkTitlesBlackList ) . '","' . - addslashes( self::$currentTitle->getDbKey() ) . '")' ); - - $currentNamespace[] = $title->getNamespace(); - - // Build our weight list. Make sure current namespace is first element - $namespaces = array_diff($wgLinkTitlesNamespaces, $currentNamespace); - array_unshift($namespaces, $currentNamespace[0] ); - - // No need for sanitiy check. we are sure that we have at least one element in the array - $weightSelect = "CASE page_namespace "; - $currentWeight = 0; - foreach ($namespaces as &$namspacevalue) { - $currentWeight = $currentWeight + 100; - $weightSelect = $weightSelect . " WHEN " . $namspacevalue . " THEN " . $currentWeight . PHP_EOL; - } - $weightSelect = $weightSelect . " END "; - $namespacesClause = '(' . implode( ', ', $namespaces ) . ')'; - - // Build an SQL query and fetch all page titles ordered by length from - // shortest to longest. Only titles from 'normal' pages (namespace uid - // = 0) are returned. Since the db may be sqlite, we need a try..catch - // structure because sqlite does not support the CHAR_LENGTH function. - $dbr = wfGetDB( DB_SLAVE ); - try { - $res = $dbr->select( - 'page', - array( 'page_title', 'page_namespace' , "weight" => $weightSelect), - array( - 'page_namespace IN ' . $namespacesClause, - 'CHAR_LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength, - 'page_title NOT IN ' . $blackList, - ), - __METHOD__, - array( 'ORDER BY' => 'weight ASC, CHAR_LENGTH(page_title) ' . $sort_order ) - ); - } catch (Exception $e) { - $res = $dbr->select( - 'page', - array( 'page_title', 'page_namespace' , "weight" => $weightSelect ), - array( - 'page_namespace IN ' . $namespacesClause, - 'LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength, - 'page_title NOT IN ' . $blackList, - ), - __METHOD__, - array( 'ORDER BY' => 'weight ASC, LENGTH(page_title) ' . $sort_order ) - ); + if ( !isset( self::$pageTitles ) || ( $currentNamespace != self::$currentNamespace ) ) { + self::$currentNamespace = $currentNamespace; + self::$pageTitles = self::fetchPageTitles( $currentNamespace ); } // Iterate through the page titles - foreach( $res as $row ) { + foreach( self::$pageTitles as $row ) { self::newTarget( $row->page_namespace, $row->page_title ); + // Don't link current page + if ( self::$targetTitle->equals( self::$currentTitle ) ) { continue; } + // split the page content by [[...]] groups // credits to inhan @ StackOverflow for suggesting preg_split // see http://stackoverflow.com/questions/10672286 @@ -286,6 +245,67 @@ class Extension { return true; } + // Fetches the page titles from the database. + // @param $currentNamespace String holding the namespace of the page currently being processed. + private static function fetchPageTitles( $currentNamespace ) { + global $wgLinkTitlesPreferShortTitles; + global $wgLinkTitlesMinimumTitleLength; + global $wgLinkTitlesBlackList; + global $wgLinkTitlesNamespaces; + + ( $wgLinkTitlesPreferShortTitles ) ? $sort_order = 'ASC' : $sort_order = 'DESC'; + // Build a blacklist of pages that are not supposed to be link + // targets. This includes the current page. + $blackList = str_replace( ' ', '_', '("' . implode( '","',$wgLinkTitlesBlackList ) . '")' ); + + // Build our weight list. Make sure current namespace is first element + $namespaces = array_diff( $wgLinkTitlesNamespaces, [ $currentNamespace ] ); + array_unshift( $namespaces, $currentNamespace ); + + // No need for sanitiy check. we are sure that we have at least one element in the array + $weightSelect = "CASE page_namespace "; + $currentWeight = 0; + foreach ($namespaces as &$namspacevalue) { + $currentWeight = $currentWeight + 100; + $weightSelect = $weightSelect . " WHEN " . $namspacevalue . " THEN " . $currentWeight . PHP_EOL; + } + $weightSelect = $weightSelect . " END "; + $namespacesClause = '(' . implode( ', ', $namespaces ) . ')'; + + // Build an SQL query and fetch all page titles ordered by length from + // shortest to longest. Only titles from 'normal' pages (namespace uid + // = 0) are returned. Since the db may be sqlite, we need a try..catch + // structure because sqlite does not support the CHAR_LENGTH function. + $dbr = wfGetDB( DB_SLAVE ); + try { + $res = $dbr->select( + 'page', + array( 'page_title', 'page_namespace' , "weight" => $weightSelect), + array( + 'page_namespace IN ' . $namespacesClause, + 'CHAR_LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength, + 'page_title NOT IN ' . $blackList, + ), + __METHOD__, + array( 'ORDER BY' => 'weight ASC, CHAR_LENGTH(page_title) ' . $sort_order ) + ); + } catch (Exception $e) { + $res = $dbr->select( + 'page', + array( 'page_title', 'page_namespace' , "weight" => $weightSelect ), + array( + 'page_namespace IN ' . $namespacesClause, + 'LENGTH(page_title) >= ' . $wgLinkTitlesMinimumTitleLength, + 'page_title NOT IN ' . $blackList, + ), + __METHOD__, + array( 'ORDER BY' => 'weight ASC, LENGTH(page_title) ' . $sort_order ) + ); + } + + return $res; + } + // Build an anonymous callback function to be used in simple mode. private static function simpleModeCallback( array $matches ) { if ( self::checkTargetPage() ) {