@@ -302,8 +302,7 @@ public function addFootnotes(\DOMElement $articleContent): void
302302 $ articleLinks = $ articleContent ->getElementsByTagName ('a ' );
303303 $ linkCount = 0 ;
304304
305- for ($ i = 0 ; $ i < $ articleLinks ->length ; ++$ i ) {
306- $ articleLink = $ articleLinks ->item ($ i );
305+ foreach ($ articleLinks as $ articleLink ) {
307306 $ footnoteLink = $ articleLink ->cloneNode (true );
308307 $ refLink = $ this ->dom ->createElement ('a ' );
309308 $ footnote = $ this ->dom ->createElement ('li ' );
@@ -383,8 +382,8 @@ public function prepArticle(\DOMNode $articleContent): void
383382
384383 // Remove service data-candidate attribute.
385384 $ elems = $ xpath ->query ('.//*[@data-candidate] ' , $ articleContent );
386- for ($ i = $ elems-> length - 1 ; $ i >= 0 ; -- $ i ) {
387- $ elems -> item ( $ i ) ->removeAttribute ('data-candidate ' );
385+ foreach ($ elems as $ elem ) {
386+ $ elem ->removeAttribute ('data-candidate ' );
388387 }
389388
390389 // Clean out junk from the article content.
@@ -520,11 +519,12 @@ public function getLinkDensity(\DOMElement $e, bool $excludeExternal = false): f
520519 $ textLength = mb_strlen ($ this ->getInnerText ($ e , true , true ));
521520 $ linkLength = 0 ;
522521
523- for ($ dRe = $ this ->domainRegExp , $ i = 0 , $ il = $ links ->length ; $ i < $ il ; ++$ i ) {
524- if ($ excludeExternal && $ dRe && !preg_match ($ dRe , $ links ->item ($ i )->getAttribute ('href ' ))) {
522+ $ dRe = $ this ->domainRegExp ;
523+ foreach ($ links as $ link ) {
524+ if ($ excludeExternal && $ dRe && !preg_match ($ dRe , $ link ->getAttribute ('href ' ))) {
525525 continue ;
526526 }
527- $ linkLength += mb_strlen ($ this ->getInnerText ($ links -> item ( $ i ) ));
527+ $ linkLength += mb_strlen ($ this ->getInnerText ($ link ));
528528 }
529529
530530 if ($ textLength > 0 && $ linkLength > 0 ) {
@@ -586,7 +586,7 @@ public function clean(\DOMElement $e, string $tag): void
586586 }
587587
588588 // Then check the elements inside this element for the same.
589- if (preg_match ($ this ->regexps ['media ' ], $ targetList -> item ( $ y ) ->getInnerHTML ())) {
589+ if (preg_match ($ this ->regexps ['media ' ], $ currentItem ->getInnerHTML ())) {
590590 continue ;
591591 }
592592 }
@@ -640,15 +640,15 @@ public function cleanConditionally(\DOMElement $e, string $tag): void
640640 $ embedCount = 0 ;
641641 $ embeds = $ node ->getElementsByTagName ('embed ' );
642642
643- for ($ ei = 0 , $ il = $ embeds-> length ; $ ei < $ il ; ++ $ ei ) {
644- if (preg_match ($ this ->regexps ['media ' ], $ embeds -> item ( $ ei ) ->getAttribute ('src ' ))) {
643+ foreach ($ embeds as $ embed ) {
644+ if (preg_match ($ this ->regexps ['media ' ], $ embed ->getAttribute ('src ' ))) {
645645 ++$ embedCount ;
646646 }
647647 }
648648
649649 $ embeds = $ node ->getElementsByTagName ('iframe ' );
650- for ($ ei = 0 , $ il = $ embeds-> length ; $ ei < $ il ; ++ $ ei ) {
651- if (preg_match ($ this ->regexps ['media ' ], $ embeds -> item ( $ ei ) ->getAttribute ('src ' ))) {
650+ foreach ($ embeds as $ embed ) {
651+ if (preg_match ($ this ->regexps ['media ' ], $ embed ->getAttribute ('src ' ))) {
652652 ++$ embedCount ;
653653 }
654654 }
@@ -719,8 +719,9 @@ public function cleanHeaders(\DOMElement $e): void
719719 $ headers = $ e ->getElementsByTagName ('h ' . $ headerIndex );
720720
721721 for ($ i = $ headers ->length - 1 ; $ i >= 0 ; --$ i ) {
722- if ($ this ->getWeight ($ headers ->item ($ i )) < 0 || $ this ->getLinkDensity ($ headers ->item ($ i )) > 0.33 ) {
723- $ headers ->item ($ i )->parentNode ->removeChild ($ headers ->item ($ i ));
722+ $ header = $ headers ->item ($ i );
723+ if ($ this ->getWeight ($ header ) < 0 || $ this ->getLinkDensity ($ header ) > 0.33 ) {
724+ $ header ->parentNode ->removeChild ($ header );
724725 }
725726 }
726727 }
@@ -812,12 +813,14 @@ protected function prepDocument(): void
812813 // Remove all style tags in head.
813814 $ styleTags = $ this ->dom ->getElementsByTagName ('style ' );
814815 for ($ i = $ styleTags ->length - 1 ; $ i >= 0 ; --$ i ) {
815- $ styleTags ->item ($ i )->parentNode ->removeChild ($ styleTags ->item ($ i ));
816+ $ styleTag = $ styleTags ->item ($ i );
817+ $ styleTag ->parentNode ->removeChild ($ styleTag );
816818 }
817819
818820 $ linkTags = $ this ->dom ->getElementsByTagName ('link ' );
819821 for ($ i = $ linkTags ->length - 1 ; $ i >= 0 ; --$ i ) {
820- $ linkTags ->item ($ i )->parentNode ->removeChild ($ linkTags ->item ($ i ));
822+ $ linkTag = $ linkTags ->item ($ i );
823+ $ linkTag ->parentNode ->removeChild ($ linkTag );
821824 }
822825 }
823826
@@ -1015,15 +1018,15 @@ protected function grabArticle(?\DOMElement $page = null)
10151018 * A score is determined by things like number of commas, class names, etc.
10161019 * Maybe eventually link density.
10171020 */
1018- for ($ pt = 0 , $ scored = \count ( $ nodesToScore); $ pt < $ scored ; ++ $ pt ) {
1019- $ ancestors = $ this ->getAncestors ($ nodesToScore [ $ pt ] , 5 );
1021+ foreach ($ nodesToScore as $ nodeToScore ) {
1022+ $ ancestors = $ this ->getAncestors ($ nodeToScore , 5 );
10201023
10211024 // No parent node? Move on...
10221025 if (0 === \count ($ ancestors )) {
10231026 continue ;
10241027 }
10251028
1026- $ innerText = $ this ->getInnerText ($ nodesToScore [ $ pt ] );
1029+ $ innerText = $ this ->getInnerText ($ nodeToScore );
10271030
10281031 // If this paragraph is less than MIN_PARAGRAPH_LENGTH (default:20) characters, don't even count it.
10291032 if (mb_strlen ($ innerText ) < self ::MIN_PARAGRAPH_LENGTH ) {
@@ -1076,11 +1079,6 @@ protected function grabArticle(?\DOMElement $page = null)
10761079 }
10771080 }
10781081
1079- $ candidates = $ xpath ->query ('.//*[not(self::body) and (@class or @id or @style) and ((number(@readability) < 40) or not(@readability))] ' , $ page ->documentElement );
1080-
1081- for ($ c = $ candidates ->length - 1 ; $ c >= 0 ; --$ c ) {
1082- $ node = $ candidates ->item ($ c );
1083- }
10841082 unset($ candidates );
10851083 }
10861084
@@ -1231,11 +1229,6 @@ protected function grabArticle(?\DOMElement $page = null)
12311229 $ parentOfTopCandidate = $ topCandidate ->parentNode ;
12321230 $ siblingNodes = $ parentOfTopCandidate ->childNodes ;
12331231
1234- if (0 === $ siblingNodes ->length ) {
1235- $ siblingNodes = new \stdClass ();
1236- $ siblingNodes ->length = 0 ;
1237- }
1238-
12391232 for ($ s = 0 , $ sl = $ siblingNodes ->length ; $ s < $ sl ; ++$ s ) {
12401233 $ siblingNode = $ siblingNodes ->item ($ s );
12411234 $ siblingNodeName = $ siblingNode ->nodeName ;
0 commit comments