Simple parsing of HTML documents
let
page = Web.Page("
<html>
<head>
<title>Test</title>
</head>
<body>
<div>First div</div>
<div>Second div</div>
<div>Third div</div>
</body>
</html>"
),
pageContent = page[Data],
pageCont = pageContent{0},
chldPage = pageCont[Children],
elemHtml = chldPage{0},
elemHead = elemHtml{0},
elemBody = elemHtml{1},
chldHead = elemHead[Children],
elemTitle = chldHead{0},
chldBody = elemBody[Children],
elemDiv1 = chldBody{0},
elemDiv2 = chldBody{1},
elemDiv3 = chldBody{2}
in
[
title = elemTitle , // No Text ???
div1 = elemDiv1[Children]{0}[Text],
div2 = elemDiv2[Children]{0}[Text],
div3 = elemDiv3[Children]{0}[Text]
]
Evaluating JavaScript and returning values
A
<script>
element can be included into the web page which writes text using
document.write
. The written text can then be read from the Power Query environment like so shown below. This program returns
Hello World
:
let
page = Web.Page(
"<script>
document.write('Hello');
document.write(' ');
document.write('world');
</script>"
),
pageData = page[Data],
htmlChildren = pageData{0}[Children],
headChildren = htmlChildren{0}[Children],
textWritten = headChildren{1}[Text]{0}
in
textWritten