Skip to content

Commit acea848

Browse files
committed
added a configuration to allow external links #133
1 parent b8f292f commit acea848

File tree

1 file changed

+25
-4
lines changed

1 file changed

+25
-4
lines changed

src/Providers/Html.php

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use Embed\Bag;
66
use Embed\Utils;
7+
use Embed\Url;
78

89
/**
910
* Generic html provider.
@@ -14,6 +15,7 @@ class Html extends Provider implements ProviderInterface
1415
{
1516
protected $config = [
1617
'maxImages' => -1,
18+
'externalImages' => false
1719
];
1820

1921
/**
@@ -271,28 +273,30 @@ protected static function extractFromMeta(\DOMDocument $html, Bag $bag)
271273
*/
272274
protected function extractImages(\DOMElement $html)
273275
{
274-
$domain = $this->request->getDomain();
275-
276276
foreach ($html->getElementsByTagName('img') as $img) {
277277
if ($img->hasAttribute('src')) {
278278
$src = $this->request->createUrl($img->getAttribute('src'));
279279

280280
//Avoid external images
281-
if ($src->getContent() === null && $src->getDomain() !== $domain) {
281+
if (!$this->imageIsValid($src)) {
282282
continue;
283283
}
284284

285285
$parent = $img->parentNode;
286286

287+
//The image is in a link
287288
while ($parent && isset($parent->tagName)) {
288289
if ($parent->tagName === 'a') {
290+
//The link is external
289291
if ($parent->hasAttribute('href')) {
290292
$href = $this->request->createUrl($parent->getAttribute('href'));
291293

292-
if ($href->getDomain() !== $domain) {
294+
if (!$this->imageIsValid($href)) {
293295
continue 2;
294296
}
295297
}
298+
299+
//The link has rel=nofollow
296300
if ($parent->hasAttribute('rel') && (string) $parent->getAttribute('rel') === 'nofollow') {
297301
continue 2;
298302
}
@@ -308,6 +312,23 @@ protected function extractImages(\DOMElement $html)
308312
}
309313
}
310314

315+
/**
316+
* Check whether a image url is valid or not
317+
*
318+
* @param Url $url
319+
*
320+
* return bool
321+
*/
322+
protected function imageIsValid(Url $url)
323+
{
324+
//base64 or same domain
325+
if ($url->getContent() !== null || $url->getDomain() === $this->request->getDomain()) {
326+
return true;
327+
}
328+
329+
return is_bool($this->config['externalImages']) ? $this->config['externalImages'] : $url->match($this->config['externalImages']);
330+
}
331+
311332
/**
312333
* Returns the main element of the document.
313334
*

0 commit comments

Comments
 (0)