whats new ¦  programming tips ¦  indy articles ¦  intraweb articles ¦  informations ¦  links ¦  interviews
 misc ¦  tutorials ¦  Add&Win Game

Tips (1541)

Database (90)
Files (137)
Forms (107)
Graphic (114)
IDE (21)
Indy (5)
Internet / LAN (130)
IntraWeb (0)
Math (76)
Misc (126)
Multimedia (45)
Objects/
ActiveX (51)

OpenTools API (3)
Printing (35)
Strings (83)
System (266)
VCL (242)

Top15

Tips sort by
component


Search Tip

Add new Tip

Add&Win Game

Advertising

41 Visitors Online


 
...retrieve all image links from an HTML document?
Autor: mrbaseball34
[ Print tip ]  

Tip Rating (31):  
     


uses mshtml, ActiveX, COMObj, IdHTTP, idURI;

{ .... }

procedure GetImageLinks(AURL: string; AList: TStrings);
var
  
IDoc: IHTMLDocument2;
  strHTML: string;
  v: Variant;
  x: Integer;
  ovLinks: OleVariant;
  DocURL: string;
  URI: TidURI;
  ImgURL: string;
  idHTTP: TidHTTP;
begin
  
AList.Clear;
  URI := TidURI.Create(AURL);
  try
    
DocURL := 'http://' + URI.Host;
    if URI.Path <> '/' then
      
DocURL := DocURL + URI.Path;
  finally
    
URI.Free;
  end;
  Idoc := CreateComObject(Class_HTMLDocument) as IHTMLDocument2;
  try
    
IDoc.designMode := 'on';
    while IDoc.readyState <> 'complete' do
      
Application.ProcessMessages;
    v      := VarArrayCreate([0, 0], VarVariant);
    idHTTP := TidHTTP.Create(nil);
    try
      
strHTML := idHTTP.Get(AURL);
    finally
      
idHTTP.Free;
    end;
    v[0] := strHTML;
    IDoc.Write(PSafeArray(System.TVarData(v).VArray));
    IDoc.designMode := 'off';
    while IDoc.readyState <> 'complete' do
      
Application.ProcessMessages;
    ovLinks := IDoc.all.tags('IMG');
    if ovLinks.Length > 0 then
    begin
      for 
x := 0 to ovLinks.Length - 1 do
      begin
        
ImgURL := ovLinks.Item(x).src;
        // The stuff below will probably need a little tweaking
        // Deteriming and turning realtive URLs into absolute URLs
        // is not that difficult but this is all I could come up with
        // in such a short notice.
        
if (ImgURL[1] = '/') then
        begin
          
// more than likely a relative URL so
          // append the DocURL
          
ImgURL := DocURL + ImgUrl;
        end
        else
        begin
          if 
(Copy(ImgURL, 1, 11) = 'about:blank') then
          begin
            
ImgURL := DocURL + Copy(ImgUrl, 12, Length(ImgURL));
          end;
        end;
        AList.Add(ImgURL);
      end;
    end;
  finally
    
IDoc := nil;
  end;
end;


// Beispiel:
// Example:
procedure TForm1.Button1Click(Sender: TObject);
begin
  
GetImageLinks('http://www.swissdelphicenter.ch', Memo1.Lines);
end;


 

Rate this tip:

poor
very good


Copyright © by SwissDelphiCenter.ch
All trademarks are the sole property of their respective owners