Мне нужно сделать парсер веб-данных.
- Мне нужно войти на сайт: пользователь, пароль, нажмите кнопку входа
- Нажмите вторую кнопку
- Подождите, пока страница загрузится, вот таблица, о которой идет речь. Таблица представляет собой журнал вызовов и динамически добавляет новый контент, поэтому он всегда обновляется.
- Я хочу исключить форму из содержимого таблицы и ограничить количество строк, вставляемых в Excel.
Я заставляю его работать с кодом InternetExplorer.Application, но мне нужно переключиться на код MSXML2.XMLHTTP, потому что он очень медленный.
Рабочая версия InternetExplorer.Application:
Sub extractTablesData()
'we define the essential variables
Dim IE As Object, obj As Object
Dim r As Integer, c As Integer, t As Integer
Dim elemCollection As Object
'add the "Microsoft Internet Controls" reference in your VBA Project indirectly
Set IE = CreateObject("InternetExplorer.Application")
With IE
.Silent = True
.Visible = True
.navigate ("https://www.clickphone.ro")
' we ensure that the web page downloads completely before we fill the form automatically
While IE.readyState <> 4
DoEvents
Wend
Application.Wait Now + TimeValue("00:00:03")
Set HTMLDoc = IE.document
HTMLDoc.all.user.Value = "user or email" 'Enter your email/user id here
HTMLDoc.all.pass.Value = "xXXxXXXxxXXXxx" 'Enter your password here
'Login Button Click
With IE.document
Set elems = .getElementsByTagName("a")
For Each e In elems
If (e.getAttribute("class") = "orange_button") Then
e.Click
Exit For
End If
Next e
End With
'Needed Table page Button Click https://www.clickphone.ro/account/istoric_apel_in.html
While IE.readyState <> 4
DoEvents
Wend
Set iedoc = IE.document
Set elems = iedoc.getElementsByClassName("black")(12)
elems.Click
' again ensuring that the web page loads completely before we start scraping data
While IE.readyState <> 4
DoEvents
Wend
Application.Wait Now + TimeValue("00:00:05")
Set iedoc = IE.document
'Clearing any unnecessary or old data in Sheet1
ThisWorkbook.Sheets("Sheet1").Range("A1:K1000").ClearContents
'Scrapping Data and past to Sheet1
Set elemCollection = IE.document.getElementsByTagName("table")
For t = 0 To (elemCollection.Length - 1)
For r = 0 To (elemCollection(t).Rows.Length - 1)
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ThisWorkbook.Worksheets(1).Cells(r + 1, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
Next c
Next r
Next t
End With
' cleaning up memory
Set IE = Nothing
End Sub
Это моя попытка MSXMLHTTP:
Option Explicit
'reference to Microsoft Internet Controls
'reference to Microsoft HTML Object Library
Sub Web_Table_Option_One()
Dim xml As Object
Dim html As Object
Dim objTable As Object
Dim result As String
Dim lRow As Long
Dim lngTable As Long
Dim lngRow As Long
Dim lngCol As Long
Dim ActRw As Long
Set xml = CreateObject("MSXML2.XMLHTTP.6.0")
Set html = CreateObject("htmlFile")
With xml
.Open "POST", "https://www.clickphone.ro/login.html", False
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send "userName=USER&password=XXXXxxxxXxxxxXXX"
.Open "GET", "https://www.clickphone.ro/account/istoric_apel_in.html", False
.setRequestHeader "Content-type", "text/xml"
.send
End With
html.body.innerHTML = xml.responseText
Set objTable = html.getElementsByTagName("table")
For lngTable = 0 To objTable.Length - 1
For lngRow = 0 To objTable(lngTable).Rows.Length - 1
For lngCol = 0 To objTable(lngTable).Rows(lngRow).Cells.Length - 1
ThisWorkbook.Sheets("Sheet2").Cells(ActRw + lngRow + 1, lngCol + 1) = objTable(lngTable).Rows(lngRow).Cells(lngCol).innerText
Next lngCol
Next lngRow
ActRw = ActRw + objTable(lngTable).Rows.Length + 1
Next lngTable
End Sub
Исходный код HTML:
Для пользователя, пройти, кнопка входа:
<form action="/login.html" id="toploginform" name="toploginform" method="post">
<script>
function processLoginForm(){
with (document.toploginform) {
if (user.value=="Email"){alert('Email/Parola incorecte!'); return false}
document.getElementById('toploginform').submit();
}
}
</script>
<fieldset>
<input name="userlogin" type="hidden" id="userlogin" value="true" />
<span class="text">
<input name="user" type="text" onFocus="if(this.value=='Email'){this.value=''}" onBlur="if(this.value==''){this.value='Email'}" value="Email">
</span> <span class="text">
<input name="pass" type="password" onFocus="if(this.value=='Password'){this.value=''}" onBlur="if(this.value==''){this.value='Password'}" value="Password">
</span>
<input name="authcode" type="hidden" id="authcode" value="false" />
<span><a href="#" class="orange_button" onClick="return processLoginForm()">Login</a></span>
<span class="links"><a href="/login~parola.html">Am uitat parola</a><br/>
<input class="css-checkbox" id="checkbox2" type="checkbox" name="rememberpass" value="da" />
<label for="checkbox2" name="checkbox2_lbl" class="css-label lite-orange-check">Retin datele?</label>
</span>
</fieldset>
</form>
Кнопка страницы таблицы:
<br /> <img src="/images/sageata_orange.gif" width="7" height="8" /> <a class="black" href="/account/istoric_apel_in.html">Apeluri primite</a>
Исходный код таблицы:
<table class="TabelDate" cellspacing="0">
<thead>
<tr>
<th width="130">Data</th>
<th>Sursa</th>
<th>Destinatie</th>
<th>Durata</th>
<th class="ultima">Status</th>
</tr>
</thead>
<tr class="u"> <td class="prima">19-03-2017 17:31:16</td><td><font color="green"><form name="form24-1489937476.41719" method="post" action="">0720145931 <a class="TipFB" href="#"><span class="tip"><span class="tipTitle">Suna inapoi la 0720145931</span><span class="tipBody">Click si serverul te va suna gratuit pe numarul tau 0371780434.<br />Dupa ce raspunzi se formeaza automat numarul 0720145931.</span><span class="tipArrow"></span></span><input type="image" name="btn_opentextbox" src="/images/phone_small.gif" value="Submit" /></a> <input name="numartel" type="hidden" id="numartel" value="0720145931" /></form></font></td><td align="center"><font color="green">0371780444</font></td><td align="center"><font color="green">00:00:07</font></td>
<td class="ultima" align="center"><font color="green">Apel preluat</font></td></tr> <tr class="gri"> <td class="prima">19-03-2017 17:30:48</td><td><font color="green"><form name="form24-1489937448.41715" method="post" action="">0728409617 <a class="TipFB" href="#"><span class="tip"><span class="tipTitle">Suna inapoi la 0728409617</span><span class="tipBody">Click si serverul te va suna gratuit pe numarul tau 0371780434.<br />Dupa ce raspunzi se formeaza automat numarul 0728409617.</span><span class="tipArrow"></span></span><input type="image" name="btn_opentextbox" src="/images/phone_small.gif" value="Submit" /></a> <input name="numartel" type="hidden" id="numartel" value="0728409617" /></form></font></td><td align="center"><font color="green">0371780655</font></td><td align="center"><font color="green">00:00:07</font></td>