01.
$html
= '<html>
02.
<body>
03.
<!--a comment-->
04.
<div>some content</div><br>
05.
Hello.
06.
<p>Hello in paragraph.</p>
07.
<p>Lorem Ipsum paragraph.</p>
08.
<!--another comment-->
09.
<!--------------another comment with wrong dashes------------>
10.
</body>
11.
</html>';
12.
13.
14.
$dom
=
new
DOMDocument();
15.
$dom
->preserveWhiteSpace = false;
16.
$dom
->formatOutput = false;
17.
$dom
->loadHTML(
$html
, LIBXML_HTML_NODEFDTD);
18.
19.
$xpath
=
new
DOMXPath(
$dom
);
20.
21.
foreach
(
$xpath
->query(
'//comment()'
)
as
$comment
) {
22.
$comment
->parentNode->removeChild(
$comment
);
23.
}
24.
25.
$body
=
$xpath
->query(
'//body'
)->item(0);
26.
$newHtml
=
$body
instanceof DOMNode ?
$dom
->saveHTML() :
'something failed'
;
27.
28.
29.
$htmlExp
=
explode
(
"\n"
,
str_replace
([
"\r\n"
,
"\r"
,
"\n"
],
"\n"
,
$newHtml
));
30.
foreach
(
$htmlExp
as
$key
=>
$htmlLine
) {
31.
if
(trim(
$htmlLine
) ===
''
) {
32.
unset(
$htmlExp
[
$key
]);
33.
}
34.
}
35.
$newHtml
= implode(
"\n"
,
$htmlExp
);
36.
37.
38.
var_dump(
$newHtml
);