Skip to content

Commit 56d5a6c

Browse files
committed
encoding fixes
1 parent 5576cf6 commit 56d5a6c

File tree

1 file changed

+34
-14
lines changed

1 file changed

+34
-14
lines changed

src/Http/Response.php

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -61,20 +61,40 @@ public function getHtmlContent()
6161
$entities = libxml_disable_entity_loader(true);
6262

6363
$this->htmlContent = new DOMDocument();
64-
65-
if (mb_detect_encoding($content, 'UTF-8', true) === 'UTF-8') {
66-
$content = mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8');
67-
$content = preg_replace(
68-
'/<head[^>]*>/',
69-
'<head><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">',
70-
$content
71-
);
72-
} elseif (mb_detect_encoding($content, 'SJIS', true) === 'SJIS') {
73-
$content = mb_convert_encoding($content, 'HTML-ENTITIES', 'SJIS');
74-
$content = preg_replace('/<head[^>]*>/', '<head><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=shift_jis">', $content);
75-
} elseif (mb_detect_encoding($content, 'ISO-8859-1', true) === 'ISO-8859-1') {
76-
$content = mb_convert_encoding($content, 'HTML-ENTITIES', 'ISO-8859-1');
77-
$content = preg_replace('/<head[^>]*>/', '<head><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">', $content);
64+
if (stripos($content, '<meta charset=') === false) {
65+
$encodings = [
66+
'ASCII' => 'ascii',
67+
'UTF-8' => 'utf-8',
68+
'SJIS' => 'shift_jis',
69+
'Windows-1251' => 'windows-1251',
70+
'Windows-1252' => 'windows-1252',
71+
'Windows-1254' => 'windows-1254',
72+
'ISO-8859-16' => 'iso-8859-16',
73+
'ISO-8859-15' => 'iso-8859-15',
74+
'ISO-8859-14' => 'iso-8859-14',
75+
'ISO-8859-13' => 'iso-8859-13',
76+
'ISO-8859-10' => 'iso-8859-10',
77+
'ISO-8859-9' => 'iso-8859-9',
78+
'ISO-8859-8' => 'iso-8859-8',
79+
'ISO-8859-7' => 'iso-8859-7',
80+
'ISO-8859-6' => 'iso-8859-6',
81+
'ISO-8859-5' => 'iso-8859-5',
82+
'ISO-8859-4' => 'iso-8859-4',
83+
'ISO-8859-3' => 'iso-8859-3',
84+
'ISO-8859-2' => 'iso-8859-2',
85+
'ISO-8859-1' => 'iso-8859-1',
86+
];
87+
88+
$detected = mb_detect_encoding($content, implode(',', array_keys($encodings)), true);
89+
90+
if ($detected && !empty($encodings[$detected])) {
91+
$content = mb_convert_encoding($content, 'HTML-ENTITIES', $detected);
92+
$content = preg_replace(
93+
'/<head[^>]*>/',
94+
'<head><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset='.$encodings[$detected].'">',
95+
$content
96+
);
97+
}
7898
}
7999

80100
$this->htmlContent->loadHTML(trim($content));

0 commit comments

Comments
 (0)