分享一个强大的HTTP访问类(可做采集)
做采集的时候,可以使用file_get_contents()去获取网页源代码,但是使用file_get_contents采集,速度慢,而且超时时间,不好控制。如果采集的页面不存在,需要等待的时间很长。一般来说,curl的速度最快,其次是socket,最后是file_get_contents。
现在跟大家分享一个很强大的采集类,会根据你的服务器当前的配置,自动选择最快的方式。已经封装了curl和socket,file_get_contents
用法很简单:
1、采用get方法请求
Http::doGet(网址);//超市时间可忽略,默认是5秒
Http::doGet(网址,超时时间);
如echo Http::doGet('http://www.baidu.com');
2、采用post方法请求
Http::doPost(网址,数据,超时时间);
如
$url='http://www.canphp.com/test.php';
$data['name']='单骑';
$data['email']='admin@canphp.com';
Http::doPost($url,$data,10);
test.php页面接收数据
$_POST['name'];
$_POST['email'];
这个http类不仅可以用来采集,还有一个很强大的作用,模拟php异步多进程。
比如有index.php和a.php, b.php, c.php
在index.php中
Http::doGet('http://www.canphp.com/a.php',1);
Http::doGet('http://www.canphp.com/b.php',1);
Http::doGet('http://www.canphp.com/c.php',1);
a.php, b.php, c.php程序分别在头部加上ignore_user_abort(true);
那么就可以实现多进程了。
原理:
通过curl或socket发送请求给a.php, b.php, c.php,由于超时时间比较短,只是触发了a.php, b.php, c.php三个页面,不需要等待数据返回,连接已中断,但是a.php, b.php, c.php程序中加上了ignore_user_abort(true);忽略客户端连接,还会继续执行。
<?php
// 数据采集,doGET,doPOST,文件下载,
class Http {
static public $way = 0;
// 手动设置访问方式
static public function setWay($way) {
self :: $way = intval($way);
}
static public function getSupport() {
// 如果指定访问方式,则按指定的方式去访问
if (isset(self :: $way) && in_array(self :: $way, array(1, 2, 3)))
return self :: $way;
// 自动获取最佳访问方式
if (function_exists('curl_init')) { // curl方式
return 1;
} else if (function_exists('fsockopen')) { // socket
return 2;
} else if (function_exists('file_get_contents')) { // php系统函数file_get_contents
return 3;
} else {
return 0;
}
}
// 通过get方式获取数据
static public function doGet($url, $timeout = 5, $header = "") {
if (empty($url) || empty($timeout))
return false;
if (!preg_match('/^(http|https)/is', $url))
$url = "http://" . $url;
$code = self :: getSupport();
switch ($code) {
case 1:return self :: curlGet($url, $timeout, $header);
break;
case 2:return self :: socketGet($url, $timeout, $header);
break;
case 3:return self :: phpGet($url, $timeout, $header);
break;
default:return false;
}
}
// 通过POST方式发送数据
static public function doPost($url, $post_data = array(), $timeout = 5, $header = "") {
if (empty($url) || empty($post_data) || empty($timeout))
return false;
if (!preg_match('/^(http|https)/is', $url))
$url = "http://" . $url;
$code = self :: getSupport();
switch ($code) {
case 1:return self :: curlPost($url, $post_data, $timeout, $header);
break;
case 2:return self :: socketPost($url, $post_data, $timeout, $header);
break;
case 3:return self :: phpPost($url, $post_data, $timeout, $header);
break;
default:return false;
}
}
// 通过curl get数据
static public function curlGet($url, $timeout = 5, $header = "") {
$header = empty($header)?self :: defaultHeader():$header;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_HTTPHEADER, array($header)); //模拟的header头
$result = curl_exec($ch);
curl_close($ch);
return $result;
}
// 通过curl post数据
static public function curlPost($url, $post_data = array(), $timeout = 5, $header = "") {
$header = empty($header)?'':$header;
$post_string = http_build_query($post_data);
$ch = curl_init();
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_HTTPHEADER, array($header)); //模拟的header头
$result = curl_exec($ch);
curl_close($ch);
return $result;
}
// 通过socket get数据
static public function socketGet($url, $timeout = 5, $header = "") {
$header = empty($header)?self :: defaultHeader():$header;
$url2 = parse_url($url);
$url2["path"] = isset($url2["path"])? $url2["path"]: "/" ;
$url2["port"] = isset($url2["port"])? $url2["port"] : 80;
$url2["query"] = isset($url2["query"])? "?" . $url2["query"] : "";
$host_ip = @gethostbyname($url2["host"]);
if (($fsock = fsockopen($host_ip, $url2['port'], $errno, $errstr, $timeout)) < 0) {
return false;
}
$request = $url2["path"] . $url2["query"];
$in = "GET " . $request . " HTTP/1.0\r\n";
if (false === strpos($header, "Host:")) {
$in .= "Host: " . $url2["host"] . "\r\n";
}
$in .= $header;
$in .= "Connection: Close\r\n\r\n";
if (!@fwrite($fsock, $in, strlen($in))) {
@fclose($fsock);
return false;
}
return self :: GetHttpContent($fsock);
}
// 通过socket post数据
static public function socketPost($url, $post_data = array(), $timeout = 5, $header = "") {
$header = empty($header)?self :: defaultHeader():$header;
$post_string = http_build_query($post_data);
$url2 = parse_url($url);
$url2["path"] = ($url2["path"] == "" ? "/" : $url2["path"]);
$url2["port"] = ($url2["port"] == "" ? 80 : $url2["port"]);
$host_ip = @gethostbyname($url2["host"]);
$fsock_timeout = $timeout; //超时时间
if (($fsock = fsockopen($host_ip, $url2['port'], $errno, $errstr, $fsock_timeout)) < 0) {
return false;
}
$request = $url2["path"] . ($url2["query"] ? "?" . $url2["query"] : "");
$in = "POST " . $request . " HTTP/1.0\r\n";
$in .= "Host: " . $url2["host"] . "\r\n";
$in .= $header;
$in .= "Content-type: application/x-www-form-urlencoded\r\n";
$in .= "Content-Length: " . strlen($post_string) . "\r\n";
$in .= "Connection: Close\r\n\r\n";
$in .= $post_string . "\r\n\r\n";
unset($post_string);
if (!@fwrite($fsock, $in, strlen($in))) {
@fclose($fsock);
return false;
}
return self :: GetHttpContent($fsock);
}
// 通过file_get_contents函数get数据
static public function phpGet($url, $timeout = 5, $header = "") {
$header = empty($header)?self :: defaultHeader():$header;
$opts = array(
'http' => array('protocol_version' => '1.0', // http协议版本(若不指定php5.2系默认为http1.0)
'method' => "GET", // 获取方式
'timeout' => $timeout , // 超时时间
'header' => $header)
);
$context = stream_context_create($opts);
return @file_get_contents($url, false, $context);
}
// 通过file_get_contents 函数post数据
static public function phpPost($url, $post_data = array(), $timeout = 5, $header = "") {
$header = empty($header)?self :: defaultHeader():$header;
$post_string = http_build_query($post_data);
$header .= "Content-length: " . strlen($post_string);
$opts = array('http' => array(
'protocol_version' => '1.0', // http协议版本(若不指定php5.2系默认为http1.0)
'method' => "POST", // 获取方式
'timeout' => $timeout , // 超时时间
'header' => $header,
'content' => $post_string)
);
$context = stream_context_create($opts);
return @file_get_contents($url, false, $context);
}
// 默认模拟的header头
static private function defaultHeader() {
$header = "User-Agent:Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12\r\n";
$header .= "Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n";
$header .= "Accept-language: zh-cn,zh;q=0.5\r\n";
$header .= "Accept-Charset: GB2312,utf-8;q=0.7,*;q=0.7\r\n";
return $header;
}
// 获取通过socket方式get和post页面的返回数据
static private function GetHttpContent($fsock = null) {
$out = null;
while ($buff = @fgets($fsock, 2048)) {
$out .= $buff;
}
fclose($fsock);
$pos = strpos($out, "\r\n\r\n");
$head = substr($out, 0, $pos); //http head
$status = substr($head, 0, strpos($head, "\r\n")); //http status line
$body = substr($out, $pos + 4, strlen($out) - ($pos + 4)); //page body
if (preg_match("/^HTTP\/\d\.\d\s([\d]+)\s.*$/", $status, $matches)) {
if (intval($matches[1]) / 100 == 2) {
return $body;
} else {
return false;
}
} else {
return false;
}
}
/**
* 功能: 下载文件
* 参数:$filename 下载文件路径
* $showname 下载显示的文件名
* $expire 下载内容浏览器缓存时间
*/
static public function download($filename, $showname = '', $expire = 1800) {
if (file_exists($filename) && is_file($filename)) {
$length = filesize($filename);
} else {
die('下载文件不存在!');
}
$type = mime_content_type($filename);
// 发送Http Header信息 开始下载
header("Pragma: public");
header("Cache-control: max-age=" . $expire);
// header('Cache-Control: no-store, no-cache, must-reva lidate');
header("Expires: " . gmdate("D, d M Y H:i:s", time() + $expire) . "GMT");
header("Last-Modified: " . gmdate("D, d M Y H:i:s", time()) . "GMT");
header("Content-Disposition: attachment; filename=" . $showname);
header("Content-Length: " . $length);
header("Content-type: " . $type);
header('Content-Encoding: none');
header("Content-Transfer-Encoding: binary");
readfile($filename);
return true;
}
}
if (!function_exists ('mime_content_type')) {
/**
* +----------------------------------------------------------
* 获取文件的mime_content类型
* +----------------------------------------------------------
*
* @return string +----------------------------------------------------------
*/
function mime_content_type($filename) {
static $contentType = array(
'ai' => 'application/postscript',
'aif' => 'audio/x-aiff',
'aifc' => 'audio/x-aiff',
'aiff' => 'audio/x-aiff',
'asc' => 'application/pgp', // changed by skwashd - was text/plain
'asf' => 'video/x-ms-asf',
'asx' => 'video/x-ms-asf',
'au' => 'audio/basic',
'avi' => 'video/x-msvideo',
'bcpio' => 'application/x-bcpio',
'bin' => 'application/octet-stream',
'bmp' => 'image/bmp',
'c' => 'text/plain', // or 'text/x-csrc', //added by skwashd
'cc' => 'text/plain', // or 'text/x-c++src', //added by skwashd
'cs' => 'text/plain', // added by skwashd - for C# src
'cpp' => 'text/x-c++src', // added by skwashd
'cxx' => 'text/x-c++src', // added by skwashd
'cdf' => 'application/x-netcdf',
'class' => 'application/octet-stream', // secure but application/java-class is correct
'com' => 'application/octet-stream', // added by skwashd
'cpio' => 'application/x-cpio',
'cpt' => 'application/mac-compactpro',
'csh' => 'application/x-csh',
'css' => 'text/css',
'csv' => 'text/comma-separated-values', // added by skwashd
'dcr' => 'application/x-director',
'diff' => 'text/diff',
'dir' => 'application/x-director',
'dll' => 'application/octet-stream',
'dms' => 'application/octet-stream',
'doc' => 'application/msword',
'dot' => 'application/msword', // added by skwashd
'dvi' => 'application/x-dvi',
'dxr' => 'application/x-director',
'eps' => 'application/postscript',
'etx' => 'text/x-setext',
'exe' => 'application/octet-stream',
'ez' => 'application/andrew-inset',
'gif' => 'image/gif',
'gtar' => 'application/x-gtar',
'gz' => 'application/x-gzip',
'h' => 'text/plain', // or 'text/x-chdr',//added by skwashd
'h++' => 'text/plain', // or 'text/x-c++hdr', //added by skwashd
'hh' => 'text/plain', // or 'text/x-c++hdr', //added by skwashd
'hpp' => 'text/plain', // or 'text/x-c++hdr', //added by skwashd
'hxx' => 'text/plain', // or 'text/x-c++hdr', //added by skwashd
'hdf' => 'application/x-hdf',
'hqx' => 'application/mac-binhex40',
'htm' => 'text/html',
'html' => 'text/html',
'ice' => 'x-conference/x-cooltalk',
'ics' => 'text/calendar',
'ief' => 'image/ief',
'ifb' => 'text/calendar',
'iges' => 'model/iges',
'igs' => 'model/iges',
'jar' => 'application/x-jar', // added by skwashd - alternative mime type
'java' => 'text/x-java-source', // added by skwashd
'jpe' => 'image/jpeg',
'jpeg' => 'image/jpeg',
'jpg' => 'image/jpeg',
'js' => 'application/x-javascript',
'kar' => 'audio/midi',
'latex' => 'application/x-latex',
'lha' => 'application/octet-stream',
'log' => 'text/plain',
'lzh' => 'application/octet-stream',
'm3u' => 'audio/x-mpegurl',
'man' => 'application/x-troff-man',
'me' => 'application/x-troff-me',
'mesh' => 'model/mesh',
'mid' => 'audio/midi',
'midi' => 'audio/midi',
'mif' => 'application/vnd.mif',
'mov' => 'video/quicktime',
'movie' => 'video/x-sgi-movie',
'mp2' => 'audio/mpeg',
'mp3' => 'audio/mpeg',
'mpe' => 'video/mpeg',
'mpeg' => 'video/mpeg',
'mpg' => 'video/mpeg',
'mpga' => 'audio/mpeg',
'ms' => 'application/x-troff-ms',
'msh' => 'model/mesh',
'mxu' => 'video/vnd.mpegurl',
'nc' => 'application/x-netcdf',
'oda' => 'application/oda',
'patch' => 'text/diff',
'pbm' => 'image/x-portable-bitmap',
'pdb' => 'chemical/x-pdb',
'pdf' => 'application/pdf',
'pgm' => 'image/x-portable-graymap',
'pgn' => 'application/x-chess-pgn',
'pgp' => 'application/pgp', // added by skwashd
'php' => 'application/x-httpd-php',
'php3' => 'application/x-httpd-php3',
'pl' => 'application/x-perl',
'pm' => 'application/x-perl',
'png' => 'image/png',
'pnm' => 'image/x-portable-anymap',
'po' => 'text/plain',
'ppm' => 'image/x-portable-pixmap',
'ppt' => 'application/vnd.ms-powerpoint',
'ps' => 'application/postscript',
'qt' => 'video/quicktime',
'ra' => 'audio/x-realaudio',
'rar' => 'application/octet-stream',
'ram' => 'audio/x-pn-realaudio',
'ras' => 'image/x-cmu-raster',
'rgb' => 'image/x-rgb',
'rm' => 'audio/x-pn-realaudio',
'roff' => 'application/x-troff',
'rpm' => 'audio/x-pn-realaudio-plugin',
'rtf' => 'text/rtf',
'rtx' => 'text/richtext',
'sgm' => 'text/sgml',
'sgml' => 'text/sgml',
'sh' => 'application/x-sh',
'shar' => 'application/x-shar',
'shtml' => 'text/html',
'silo' => 'model/mesh',
'sit' => 'application/x-stuffit',
'skd' => 'application/x-koan',
'skm' => 'application/x-koan',
'skp' => 'application/x-koan',
'skt' => 'application/x-koan',
'smi' => 'application/smil',
'smil' => 'application/smil',
'snd' => 'audio/basic',
'so' => 'application/octet-stream',
'spl' => 'application/x-futuresplash',
'src' => 'application/x-wais-source',
'stc' => 'application/vnd.sun.xml.calc.template',
'std' => 'application/vnd.sun.xml.draw.template',
'sti' => 'application/vnd.sun.xml.impress.template',
'stw' => 'application/vnd.sun.xml.writer.template',
'sv4cpio' => 'application/x-sv4cpio',
'sv4crc' => 'application/x-sv4crc',
'swf' => 'application/x-shockwave-flash',
'sxc' => 'application/vnd.sun.xml.calc',
'sxd' => 'application/vnd.sun.xml.draw',
'sxg' => 'application/vnd.sun.xml.writer.global',
'sxi' => 'application/vnd.sun.xml.impress',
'sxm' => 'application/vnd.sun.xml.math',
'sxw' => 'application/vnd.sun.xml.writer',
't' => 'application/x-troff',
'tar' => 'application/x-tar',
'tcl' => 'application/x-tcl',
'tex' => 'application/x-tex',
'texi' => 'application/x-texinfo',
'texinfo' => 'application/x-texinfo',
'tgz' => 'application/x-gtar',
'tif' => 'image/tiff',
'tiff' => 'image/tiff',
'tr' => 'application/x-troff',
'tsv' => 'text/tab-separated-values',
'txt' => 'text/plain',
'ustar' => 'application/x-ustar',
'vbs' => 'text/plain', // added by skwashd - for obvious reasons
'vcd' => 'application/x-cdlink',
'vcf' => 'text/x-vcard',
'vcs' => 'text/calendar',
'vfb' => 'text/calendar',
'vrml' => 'model/vrml',
'vsd' => 'application/vnd.visio',
'wav' => 'audio/x-wav',
'wax' => 'audio/x-ms-wax',
'wbmp' => 'image/vnd.wap.wbmp',
'wbxml' => 'application/vnd.wap.wbxml',
'wm' => 'video/x-ms-wm',
'wma' => 'audio/x-ms-wma',
'wmd' => 'application/x-ms-wmd',
'wml' => 'text/vnd.wap.wml',
'wmlc' => 'application/vnd.wap.wmlc',
'wmls' => 'text/vnd.wap.wmlscript',
'wmlsc' => 'application/vnd.wap.wmlscriptc',
'wmv' => 'video/x-ms-wmv',
'wmx' => 'video/x-ms-wmx',
'wmz' => 'application/x-ms-wmz',
'wrl' => 'model/vrml',
'wvx' => 'video/x-ms-wvx',
'xbm' => 'image/x-xbitmap',
'xht' => 'application/xhtml+xml',
'xhtml' => 'application/xhtml+xml',
'xls' => 'application/vnd.ms-excel',
'xlt' => 'application/vnd.ms-excel',
'xml' => 'application/xml',
'xpm' => 'image/x-xpixmap',
'xsl' => 'text/xml',
'xwd' => 'image/x-xwindowdump',
'xyz' => 'chemical/x-xyz',
'z' => 'application/x-compress',
'zip' => 'application/zip',
);
$type = strtolower(substr(strrchr($filename, '.'), 1));
if (isset($contentType[$type])) {
$mime = $contentType[$type];
} else {
$mime = 'application/octet-stream';
}
return $mime;
}
}
if (!function_exists('image_type_to_extension')) {
function image_type_to_extension($imagetype) {
if (empty($imagetype)) return false;
switch ($imagetype) {
case IMAGETYPE_GIF : return '.gif';
case IMAGETYPE_JPEG : return '.jpg';
case IMAGETYPE_PNG : return '.png';
case IMAGETYPE_SWF : return '.swf';
case IMAGETYPE_PSD : return '.psd';
case IMAGETYPE_BMP : return '.bmp';
case IMAGETYPE_TIFF_II : return '.tiff';
case IMAGETYPE_TIFF_MM : return '.tiff';
case IMAGETYPE_JPC : return '.jpc';
case IMAGETYPE_JP2 : return '.jp2';
case IMAGETYPE_JPX : return '.jpf';
case IMAGETYPE_JB2 : return '.jb2';
case IMAGETYPE_SWC : return '.swc';
case IMAGETYPE_IFF : return '.aiff';
case IMAGETYPE_WBMP : return '.wbmp';
case IMAGETYPE_XBM : return '.xbm';
default : return false;
}
}
}
?>
方法:download($filename, $showname='',$expire=1800)
说明:用于下载文件
参数:
$filename,包含路径的文件名
$showname,下载显示的文件名,需要自行转成gbk编码,如果带空格,需要自行替换成其他字符
$expire,下载内容浏览器缓存时间
使用方法:
$showname='最新资料.zip';
$showname=auto_charset($showname,'utf-8','gbk');//utf-8编码转成gbk编码
Http::download('upload/123.zip',$showname);
方法:doGet($url,$timeout=5,$header='')
说明:采用get方法请求页面,会自动使用最快的访问方式,获取数据
参数:
$url,网址
$timeout,超时时间
$header,http请求头,用于发送cookie等信息
使用方法:
1echo Http::doGet('http://www.baidu.com');
方法:doPost($url,$data,$timeout=5,$header='')
说明:采用post方法请求页面,会自动使用最快的访问方式,获取数据
参数:
$url,网址
$data,待发送的数据,类型数组。
$timeout,超时时间
$header,http请求头,用于发送cookie等信息
使用方法:
$url='http://www.canphp.com/test.php';
$data['name']='单骑';
$data['email']='admin@canphp.com';
Http::doPost($url,$data,10);
//test.php页面接收数据
$_POST['name'];
$_POST['email'];
方法:setWay($way)
说明:手动设置doGet()和doPost()访问方式
参数:
$way:参数可以1或2或3
参数1时:采用curl
参数2时:采用socket
参数3时:采用file_get_contents()函数模拟
若不设置访问方式,会自动获取当前环境的支持方式,选择最佳的方式去访问,优先度curl > socket > file_get_contents
使用方法:
Http::setWay(3);
echo Http::doGet('http://www.baidu.com');//将采用file_get_contents()方式获取内容
-
下一篇:php调用dll经验小结