//Test01 // Sample script to search and extract product const MaxRecords = 1000; cPrefix = 'allintitle:'; cTablePos = 4; //cInputFile = 'C:\Program Files\Newbie\Data\admin\scripts_newbie\InputFile.csv'; //cOutputFile = 'C:\Program Files\Newbie\Data\admin\scripts_newbie\OutputFile.csv'; cInputFile = 'C:\InputFile.csv'; cOutputFile = 'C:\OutputFile.csv'; type ProductRecord = record sProductName : string; sProductUrl : string; sRetailerName : string; sRetailerUrl : string; sPrice : string; end; var I, nProductIndex, nCount : integer; ProductArray : array[1..MaxRecords] of ProductRecord; function ProcessDblQuote(str: string): string; var bFlag : boolean; begin result := str; bFlag := False; if (Pos('"', str) > 0) then begin str := StringReplace(str, '"', '""', true, true); bFlag := True; end; if (Pos(#13, str) > 0) or (Pos(',', str) > 0) then bflag := True; if (bFlag) then // has double quote or #13 result := '"' + str + '"'; end; function GetProductUrl(str : string): string; var sTmp : string; begin result := ''; sTmp := Trim(ExtractSubStr(str, 'href="', '">')); if (sTmp <> '') then begin sTmp := StringReplace(sTmp, '%3F', '?', True, True); sTmp := StringReplace(sTmp, '%3D', '=', True, True); sTmp := StringReplace(sTmp, '%26', '&', True, True); sTmp := StringReplace(sTmp, '&', '&', True, True); result := 'http://www.google.com' + sTmp; end; end; function GetRetailerName(str: string): string; begin result := ExtractSubStr(str, 'sa=merchant">', ''); end; function GetRetailerUrl(str: string): string; var sTmp : string; begin sTmp := ExtractSubStr(str, 'href="/product_url?q=', '&'); sTmp := StringReplace(sTmp, '%3F', '?', True, True); sTmp := StringReplace(sTmp, '%3D', '=', True, True); sTmp := StringReplace(sTmp, '%26', '&', True, True); sTmp := StringReplace(sTmp, '&', '&', True, True); result := sTmp; end; function GetProductName(str: string): string; var nPos : integer; begin result := ''; nPos := Pos(#13, str); if (nPos > 0) then begin result := Copy(str, 1, nPos-1); end; end; function GetProductPrice(str: string): string; var nPos : integer; begin nPos := Pos(#13, str); if (nPos > 0) then begin result := Copy(str, 1, nPos-1); end; end; procedure GetProductDetails(str : string; nIdx : integer); var sTmp : string; begin nCount := nCount + 1; ProductArray[nCount].sProductName := ProcessDblQuote(GetProductName(GetTableCell(cTablePos, nIdx, 1))); sTmp := GetProductUrl(str); ProductArray[nCount].sProductUrl := sTmp; ProductArray[nCount].sRetailerName := ProcessDblQuote(GetRetailerName(GetTableCellUrls(cTablePos, nIdx, 3))); ProductArray[nCount].sRetailerUrl := ProcessDblQuote(GetRetailerUrl(GetTableCellUrls(cTablePos, nIdx, 3))); ProductArray[nCount].sPrice := ProcessDblQuote(GetProductPrice(GetTableCell(cTablePos, nIdx, 3))); WriteToLog(cOutputFile, '', MatrixGetVal(nProductIndex, 1) + ',' + ProductArray[nCount].sProductName + ',' + ProductArray[nCount].sProductUrl + ',' + ProductArray[nCount].sRetailerName + ',' + ProductArray[nCount].sRetailerUrl + ',' + ProductArray[nCount].sPrice); end; procedure OnDocumentComplete(URL : string); begin if IsPartOf('did not match any products', GetHTMLBody) then begin WriteToLog(cOutputFile, '', MatrixGetVal(nProductIndex, 1) +',not found'); GotoURL('http://www.google.com/products'); end else if IsPartOf('http://www.google.com/products?q=', URL) then begin for I := 1 to 10 do begin if (Trim(GetTableCell(cTablePos, I, 1)) = '') then begin break; end; GetProductDetails(GetTableCellUrls(cTablePos, I, 1), I); end; // next page if (HyperLinkExists('Next')) then ClickHyperLink('Next') // goto next record else if (nProductIndex < MatrixMaxRow) then begin GotoURL('http://www.google.com/products'); end // script finished else begin ShowMessage('Data extraction completed. '+cOutputFile); NewbieScriptEnd; end; end else if IsPartOf('http://www.google.com/products', URL) then begin nProductIndex := nProductIndex + 1; Fill('q', cPrefix + MatrixGetVal(nProductIndex, 1)); ClickButton('btnG'); end; end; begin nCount := 0; nProductIndex := 0; DeleteFile(cOutputFile); MatrixReadFile(cInputFile); Navigate('http://www.google.com/products'); end.