当前位置:首页 -> 综合频道 -> 站长之家

vb webbrowser 保存网页源码的各种方法

我在网上找到使用rft控件保存webbrowse文本  txtHtml是RichTextBox
txtHTML.Text = WebBrowser1.document.body.innerText
昀氀愀最 :rsftext 保存为txt文件,strtmp文件路径
txtHTML.saveFile strtmp, rtfText


将其name属性设置为web

Private Sub Command1_Click()
   web.Navigate ""
End Sub

Private Sub web_DocumentComplete(ByVal pDisp As Object, URL As Variant)
Set doc = web.Document
For Each i In doc.All
   msgbox typename(i)
   Text1.Text = Text1.text & vbclrf & i.innertext
Next
End sub


===========================================================================================
转载

引用 Microsoft HTML Object Library


   Dim oDoc As HTMLDocument
   Dim oElement As Object
   Dim oTxtRgn As Object
   Dim sSelectedText As String
 
   Set oDoc = WebBrowser1.Document获得文档对象
   Set oElement = oDoc.getElementById("T1")获得ID="T1"的对象
   Set oTxtRgn = oDoc.selection.createRange获得文档当前正选择的区域对象
 
   sSelectedText = oTxtRgn.Text选择区域文本赋值

   oElement.Focus"T1"对象获得焦点

   oElement.Select栀选对象"T1"

   Debug.Print "你选择了文本:" & sSelectedText


上面这段儿还附送了其他功能,呵呵。精简一下是这样:
   Dim oDoc As Object
   Dim oTxtRgn As Object
   Dim sSelectedHTML As String
 
   Set oDoc = WebBrowser1.Document 获得文档对象
   Set oTxtRgn = oDoc.selection.createRange 获得文档当前正选择的区域对象
 
   sSelectedHTML = oTxtRgn.htmlText 选择区域文本赋值


   Text1.Text=sSelectedHTML 蜀本框显示抓取得HTML源码
   ......或者继续分析源码


==================================================================================================

我用WebBrowser取得网页源码,直接运行正常,但在编译后出错
Private Sub Command1_Click()
WebBrowser1.Navigate ""
End Sub

Private Sub WebBrowser1_DownloadComplete()
甀折下载完毕
Dim doc, objhtml
Set doc = WebBrowser1.Document

Set objhtml = doc.body.createtextrange()
If Not IsNull(objhtml) Then
Text1.Text = objhtml.htmltext
End If


End Sub

我用WebBrowser取得网页源码,直接运行正常,但在编译后出错


提示:实时错误“91”    Object 变量或 with 块变量没有设置
可能是没有下载完所致,

Private Sub WebBrowser1_DownloadComplete()
if webbrowser.busy=false then
Dim doc, objhtml
Set doc = WebBrowser1.Document


Set objhtml = doc.body.createtextrange()
If Not IsNull(objhtml) Then
Text1.Text = objhtml.htmltext
End If
end if
End Sub

你要得网页源码用 xmlhttp比较好


先引用 msxml

Dim x As New MSXML2.XMLHTTP
x.open "get", "", False
x.send

MsgBox StrConv(x.responseBody, vbUnicode)

===============================================================================================
我在网上找到使用rft控件保存webbrowse文本  txtHtml是RichTextBox
txtHTML.Text = WebBrowser1.document.body.innerText
昀氀愀最 :rsftext 保存为txt文件,strtmp文件路径
txtHTML.saveFile strtmp, rtfText


=====================================================================================


Private Sub WebBrowser1_DownloadComplete()
   Dim objHtml As Object
   下载完成时状态栏显示“Link Finished”
   Set objHtml = Me.WebBrowser1.Document.Body.Createtextrange()
   If Not IsNull(objHtml) Then
       Text1.Text = objHtml.htmltext
   End If
End Sub
使用inet控件
Source1 = Inet1.OpenURL("")
If Source1 <> "" Then
RichTextBox1.Text = Source1
Me.Inet1.Cancel
Else
Source = MsgBox("Source code is not available.", vbInformation, "Source Code")
End If

Private Sub Command1_Click()
   Text1.Text = WebBrowser1.Document.body.innerHTML
End Sub


==================================================================================
加入timer,commandbutton,text
private sub command1_click()
webbrowser1.navigate
timer1.enabled=true
end sub


private sub timer1_timer()
dim doc,objhtml as object
dim i as integer
dim strhtml as string

if not webbrowser1.busy then
set doc=webbrowser1.document
i=0
set objhtml=doc.body.createtextrange()
if not isnull(objhtml) then
text1.text=objhtml.htmltext
end if
timer1.enabled=false
end if
end sub


Dim doc, objhtml As Object
If Not webbrowser1.Busy Then
        Set doc = webbrowser1.Document
        Set objhtml = doc.body.createtextrange()
        If Not IsNull(objhtml) Then
           text1.text=objhtml.htmltext
        End If
        Set doc = Nothing
        Set objhtml = Nothing

End If

===================================================================================================
或者试试用InternetReadFile,效果也可以:
Option Explicit

Private Declare Function InternetOpen Lib "wininet.dll" Alias "InternetOpenA" ( _
                   ByVal sAgent As String, ByVal lAclearcase/" target="_blank" >ccessType As Long, _
                   ByVal sProxyName As String, ByVal sProxyBypass As String, _
                   ByVal lFlags As Long) As Long
Private Declare Function InternetOpenUrl Lib "wininet.dll" Alias "InternetOpenUrlA" ( _
                   ByVal hInternetSession As Long, ByVal sUrl As String, _

                   ByVal sHeaders As String, ByVal lHeadersLength As Long, _
                   ByVal lFlags As Long, ByVal lContext As Long) As Long
Private Declare Function InternetReadFile Lib "wininet.dll" ( _
                   ByVal hFile As Long, ByVal sBuffer As String, _
                   ByVal lNumBytesToRead As Long, _
                   lNumberOfBytesRead As Long) As Integer
Private Declare Function InternetCloseHandle Lib "wininet.dll" ( _
                   ByVal hInet As Long) As Integer

Private Const INTERNET_FLAG_NO_CACHE_WRITE = &H4000000
Dim s

Private Function GetUrlFile(stUrl As String) As String
   Dim lgInternet As Long, lgSession As Long
   Dim stBuf As String * 1024
   Dim inRes As Integer
   Dim lgRet As Long
   Dim stTotal As String
   stTotal = vbNullString
   lgSession = InternetOpen("VBTagEdit", 1, vbNullString, vbNullString, 0)
   If lgSession Then
       lgInternet = InternetOpenUrl(lgSession, stUrl, vbNullString, _
                                    0, INTERNET_FLAG_NO_CACHE_WRITE, 0)
       If lgInternet Then
           Do

               inRes = InternetReadFile(lgInternet, stBuf, 1024, lgRet)
               stTotal = stTotal & Mid$(stBuf, 1, lgRet)
           Loop While (lgRet <> 0)
       End If
       inRes = InternetCloseHandle(lgInternet)
   End If
   GetUrlFile = stTotal
End Function


Private Sub Command1_Click()
   Text1.Text = GetUrlFile("")
End Sub

=====================================================================================================

Set vDoc = WebBrowser1.Document
获取网页的源码
For Each o In vDoc.All
  DoEvents
  htmlpage = htmlpage & o.innerHTML
Next
然后用写二进制文件的方法将htmlpage的内容写入到.html文件中如果这个网页中含有框架那么要对框加进行处理。


猜你喜欢