program GoogleProductsSearch; // Sample script to search and extract product // information from Google Products const MaxRecords = 5000; cPrefix = 'allintitle:'; cTablePos = 5; cInputFile = 'C:\InputFile.csv'; cOutputFile = 'C:\OutputFile.csv'; type ProductRecord = record sProductName : string; sProductUrl : string; sRetailerName : string; sRetailerUrl : string; sPrice : string; end; var I, nProductIndex, nCount : integer; ProductArray : array[1..MaxRecords] of ProductRecord; function ProcessDblQuote(str: string): string; var bFlag : boolean; begin result := str; bFlag := False; if (Pos('"', str) > 0) then begin str := StringReplace(str, '"', '""', true, true); bFlag := True; end; if (Pos(#13, str) > 0) or (Pos(',', str) > 0) then bflag := True; if (bFlag) then // has double quote or #13 result := '"' + str + '"'; end; function GetProductUrl(str : string): string; var sTmp : string; nPos : integer; begin result := ''; sTmp := Trim(ExtractSubStr(str, 'href="/product_url?q=', '">')); if (sTmp <> '') then begin sTmp := StringReplace(sTmp, '%3F', '?', True, True); sTmp := StringReplace(sTmp, '%3D', '=', True, True); sTmp := StringReplace(sTmp, '%26', '&', True, True); nPos := Pos('&', sTmp); if (nPos > 0) then begin sTmp := Trim(Copy(sTmp, 1, nPos-1)); end; result := sTmp; end; end; function GetRetailerName(nIdx : integer): string; var sTmp : string; nPos : integer; begin sTmp := GetTableCell(cTablePos, nIdx, 2); nPos := Pos(#13, sTmp); result := sTmp; if (nPos > 0) then begin result := Copy(sTmp, 1, nPos-1); end; end; function GetRetailerUrl(str: string): string; var nPos : integer; begin result := str; nPos := Pos('.COM', UpperCase(str)); if (nPos > 0) then begin result := Copy(str, 1, nPos+4); end; end; procedure GetProductDetails(str : string; nIdx : integer); var sTmp : string; begin nCount := nCount + 1; ProductArray[nCount].sProductName := ProcessDblQuote(GetTableCell(cTablePos, nIdx, 1)); sTmp := GetProductUrl(str); ProductArray[nCount].sProductUrl := sTmp; ProductArray[nCount].sRetailerName := GetRetailerName(nIdx); ProductArray[nCount].sRetailerUrl := GetRetailerUrl(ProductArray[nCount].sProductUrl); ProductArray[nCount].sPrice := ProcessDblQuote(GetTableCell(cTablePos, nIdx, 3)); WriteToLog(cOutputFile, '', MatrixGetVal(nProductIndex, 1) + ',' + ProductArray[nCount].sProductName + ',' + ProductArray[nCount].sProductUrl + ',' + ProductArray[nCount].sRetailerName + ',' + ProductArray[nCount].sRetailerUrl + ',' + ProductArray[nCount].sPrice); end; procedure OnDocumentComplete(URL : string); begin if IsPartOf('did not match any products', GetHTMLBody) then begin WriteToLog(cOutputFile, '', MatrixGetVal(nProductIndex, 1) +',not found'); GotoURL('http://froogle.google.com/'); end else if IsPartOf('http://www.google.com/products?q=', URL) then begin for I := 1 to 10 do begin if (Trim(GetTableCell(cTablePos, I, 1)) = '') then begin break; end; GetProductDetails(GetTableCellUrls(cTablePos, I, 1), I); end; // next page if (HyperLinkExists('Next')) then ClickHyperLink('Next') // goto next record else if (nProductIndex < MatrixMaxRow) then begin GotoURL('http://froogle.google.com/'); end // script finished else begin ShowMessage('Data extraction completed. '+cOutputFile); NewbieScriptEnd; end; end else if IsPartOf('http://www.google.com/products', URL) then begin nProductIndex := nProductIndex + 1; Fill('q', cPrefix + MatrixGetVal(nProductIndex, 1)); ClickButton('btnG'); end; end; begin nCount := 0; nProductIndex := 0; DeleteFile(cOutputFile); MatrixReadFile(cInputFile); Navigate('http://www.google.com/products'); end.