Delphi提取网页中的图片

无意中,在csdn论坛中看到关于“提取网页中的图片资源”的帖子,特摘抄之。
simonhehe提供相关代码如下: 
procedure TfrmMain.DomImg2Image(wb:TWebBrowser);
var
  i:Integer;
  rang:IHTMLControlRange;
  s:string;
begin
  try
  s := (IHTMLDocument2(wb.Document).images.item('pic',EmptyParam) as IHTMLElement).getAttribute('src',0);
      rang:=((IHTMLDocument2(wb.Document).body as HTMLBody).createControlRange)as
        IHTMLControlRange;
      rang.add(IHTMLDocument2(wb.Document).images.item('pic',EmptyParam)as
        IHTMLControlElement);
      rang.execCommand('Copy',False,0);
      image1.Picture.Assign(ClipBoard);
  except
  end;
end;

其中,网页中的图片对象ID为“pic”,通过剪贴板,复制到image1控件中。

以下是Delphi XE的单元文件:

unit Unit11;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls, OleCtrls, SHDocVw, ExtCtrls;

type
  TForm11 = class(TForm)
    btn1: TButton;
    img1: TImage;
    wb1: TWebBrowser;
    btn2: TButton;
    mmo1: TMemo;
    edt1: TEdit;
    procedure FormCreate(Sender: TObject);
    procedure btn2Click(Sender: TObject);
    procedure btn1Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
  end;

var
  Form11: TForm11;

implementation
uses ActiveX, wininet, mshtml, Clipbrd;
{$R *.dfm}

procedure DomImg2Image(id:string; wb:TWebBrowser; img:TImage );
var
  rang:IHTMLControlRange;
begin
  rang:=((IHTMLDocument2(wb.Document).body as HTMLBody).createControlRange)as
    IHTMLControlRange;
  rang.add(IHTMLDocument2(wb.Document).images.item(id,EmptyParam)as
    IHTMLControlElement);
  rang.execCommand('Copy',False,0);
  img.Picture.Assign(ClipBoard);
end;

procedure TForm11.btn1Click(Sender: TObject);
var
  i:Integer;
  rang:IHTMLControlRange;
begin
  //遍历图片元素, 在memo中显示Img的ID和src属性
  mmo1.clear;
  for i:= 0 to IHTMLDocument2(wb1.Document).images.length-1 do
  begin
    mmo1.lines.add((IHTMLDocument2(wb1.Document).images.item(i,EmptyParam)as
      IHTMLElement).getAttribute('id',0));
    mmo1.lines.add((IHTMLDocument2(wb1.Document).images.item(i,EmptyParam)as
      IHTMLElement).getAttribute('src',0));
  end;
end;

procedure TForm11.btn2Click(Sender: TObject);
begin
  //edt1.Text是Img的ID值, webbrowse对象, TImage
  DomImg2Image(edt1.Text, wb1, img1);
end;

procedure TForm11.FormCreate(Sender: TObject);
begin
  wb1.Navigate('c:\aa.html');
end;

end.

以下是网页文件:

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312" />
<title>无标题文档</title>
</head>

<body>
                <img id='signup_join_beanfun_signup_samplecaptcha_CaptchaImage' src='http://www.baidu.com/img/shouye_b5486898c692066bd2cbaeda86d74448.gif' alt='CAPTCHA code image' />


            		<img src='http://info-database.csdn.net/Upload/2012-10-08/amd-475-60-1008.jpg' alt='重新取得' />

</body>
</html>


由此,我们也可从网页中提取图片之外的资源。

猜你喜欢

转载自blog.csdn.net/wangsj741/article/details/9526615