/****************************
*
* Lấy tin từ trang web khác
*
*laytin_banh($url_array)
*
****************************/
function laytin_banh($url_array){
print_r($url_array);
$data=array();
$j=0;
for($i=0;$i<sizeof($url_array);$i++){
$html = $this->curl_get($url_array[$i]);
foreach ($html->find(".normal") as $link){
echo "<h1>".$j."</h2>";
$html2 = str_get_html($link->innertext);
foreach ($html2->find(".price") as $link2){
$data[$j]["price"]=$link2->innertext;
}
foreach ($html2->find(".info>a") as $link2){
$data[$j]["title"]=$link2->innertext;
}
foreach ($html2->find(".picture img") as $link2){
$data[$j]["src"]=$link2->src;
}
$html2->clear();
echo "<pre>";
print_r($data[$j]);
echo "</pre>";
$j++;
}
$html->clear();
}
file_put_contents("banhbong_lan_new.json", json_encode($data));
}
/******************************
* Lấy Html bằng phương thức cURL (Rất nhanh và hiệu quả)
*
* curl_get($url)
*
*******************************/
function curl_get($url){
$cookie = tmpfile();
$userAgent = 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31' ;
$ch = curl_init($url);
$options = array(
CURLOPT_CONNECTTIMEOUT => 20 ,
CURLOPT_USERAGENT => $userAgent,
CURLOPT_AUTOREFERER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_COOKIEFILE => $cookie,
CURLOPT_COOKIEJAR => $cookie ,
CURLOPT_SSL_VERIFYPEER => 0 ,
CURLOPT_SSL_VERIFYHOST => 0
);
curl_setopt_array($ch, $options);
$kl = curl_exec($ch);
curl_close($ch);
$dom=str_get_html($kl);
return $dom;
}
Không có nhận xét nào:
Đăng nhận xét