当前位置:Linux教程 - Php - url地址合法性检查

url地址合法性检查

这个类可以用来检查URL地址的合法性,目前支持ftp,file,http,https,gopher,news,nntp,telnet,wais.功能:
检查URL的合法性
技术资料:
Uniform Resource Locators (URL)
http://info.internet.isi.edu/in-notes/rfc/files/rfc1738.txt
Regex edition 0.12a
http://www.cs.utah.edu/dept/old/texinfo/regex/regex_toc.html
*/
class urlcheck{
var $regex = array(//协议名(注意在这里必须写成小写) => 对应的正则表达式
'ftp' => '$this->ftpurl',
'file' => '$this->fileurl',
'http' => '$this->httpurl',
'https' => '$this->httpurl',
'gopher' => '$this->gopherurl',
'news' => '$this->newsurl',
'nntp' => '$this->nntpurl',
'telnet' => '$this->telneturl',
'wais' => '$this->waisurl'
);

var $lowalpha;
var $hialpha;
var $alpha;
var $digit;
var $safe;
var $extra;
var $national;
var $punctuation;
var $reserved;
var $hex;
var $escape;
var $unreserved;
var $uchar;
var $xchar;
var $digits;

var $urlpath;
var $password;
var $user;
var $port;
var $hostnumber;
var $alphadigit;
var $toplabel;
var $domainlabel;
var $hostname;
var $host;
var $hostport;
var $login;

//ftp
var $ftptype;
var $fsegment;
var $fpath;
var $ftpurl;

//file
var $fileurl;

//http,https
var $search;
var $hsegment;
var $hpath;
var $httpurl;

//gopher
var $gopher_string;
var $selector;
var $gtype;
var $gopherurl;

//news
var $article;
var $group;
var $grouppart;
var $newsurl;

//nntp
var $nntpurl;

//telnet
var $telneturl;

//wais
var $wpath;
var $wtype;
var $database;
var $waisdoc;
var $waisindex;
var $waisdatabase;
var $waisurl;

function check($url){
$pos = @strpos($url,':',1);
if($pos<1) return false;
$prot = substr($url,0,$pos);
if(!isset($this->regex[$prot])) return false;
eval('$regex = '.$this->regex[$prot].';');
return ereg('^'.$regex.'$',$url);
}

function urlcheck(){
$this->lowalpha = '[a-z]';
$this->hialpha = '[A-Z]';
$this->alpha = '('.$this->lowalpha.'|'.$this->hialpha.')';
$this->digit = '[0-9]';
$this->safe = '[$.+_-]';
$this->extra = '[*()\'!,]';
$this->national = '([{}|\^~`]|\\[|\\])';
$this->punctuation = '[<>#%"]';
$this->reserved = '[?;/:@&amp;=]';
$this->hex = '('.$this->digit.'|[a-fA-F])';
$this->escape = '(%'.$this->hex.'{2})';
$this->unreserved = '('.$this->alpha.'|'.$this->digit.'|'.$this->safe.'|'.$this->extra.')';
$this->uchar = '('.$this->unreserved.'|'.$this->escape.')';
$this->xchar = '('.$this->unreserved.'|'.$this->reserved.'|'.$this->escape.')';
$this->digits = '('.$this->digit.'+)';

$this->urlpath = '('.$this->xchar.'*)';
$this->password = '(('.$this->uchar.'|[?;&amp;=]'.')*)';
$this->user = '(('.$this->uchar.'|[?;&amp;=]'.')*)';
$this->port = $this->digits;
$this->hostnumber = '('.$this->digits.'.'.$this->digits.'.'.$this->digits.'.'.$this->digits.')';
$this->alphadigit = '('.$this->alpha.'|'.$this->digit.')';
$this->toplabel = '('.$this->alpha.'|('.$this->alpha.'('.$this->alphadigit.'|-)*'.$this->alphadigit.'))';
$this->domainlabel = '('.$this->alphadigit.'|('.$this->alphadigit.'('.$this->alphadigit.'|-)*'.$this->alphadigit.'))';
$this->hostname = '(('.$this->domainlabel.'\\.)*'.$this->toplabel.')';
$this->host = '('.$this->hostname.'|'.$this->hostnumber.')';
$this->hostport = '('.$this->host.'(:'.$this->port.')?)';
$this->login = '(('.$this->user.'(:'.$this->password.')?@)?'.$this->hostport.')';

$this->ftptype = '[aidAID]';
$this->fsegment = '(('.$this->uchar.'|[?:@&amp;=])*)';
$this->fpath = '('.$this->fsegment.'(/'.$this->fsegment.')*)';
$this->ftpurl = '([fF][tT][pP]://'.$this->login.'(/'.$this->fpath.'(;[tT][yY][pP][eE]='.$this->ftptype.')?)?)';

$this->fileurl = '([fF][iI][lL][eE]://('.$this->host.'|[lL][oO][cC][aA][lL][hH][oO][sS][tT])?/'.$this->fpath.')';

$this->search = '(('.$this->uchar.'|[;:@&amp;=])*)';
$this->hsegment = '(('.$this->uchar.'|[;:@&amp;=])*)';
$this->hpath = '('.$this->hsegment.'(/'.$this->hsegment.')*)';
$this->httpurl = '([hH][tT][tT][pP][sS]?://'.$this->hostport.'(/'.$this->hpath.'([?]'.$this->search.')?)?)';

$this->gopher_string = '('.$this->xchar.'*)';
$this->selector = '('.$this->xchar.'*)';
$this->gtype = $this->xchar;
$this->gopherurl = '([gG][oO][pP][hH][eE][rR]://'.$this->hostport.'(/('.$this->gtype.'('.$this->selector.'(%09'.$this->search.'(%09'.$this->gopher_string.')?)?)?)?)?)';

$this->article = '(('.$this->uchar.'|[;/?:&amp;=])+@'.$this->host.')';
$this->group = '('.$this->alpha.'('.$this->alpha.'|'.$this->digit.'|[-.+_])*)';
$this->grouppart = '([*]|'.$this->group.'|'.$this->article.')';
$this->newsurl = '([nN][eE][wW][sS]:'.$this->grouppart.')';

$this->nntpurl = '([nN][nN][tT][pP]://'.$this->hostport.'/'.$this->group.'(/'.$this->digits.')?)';

$this->telneturl = '([tT][eE][lL][nN][eE][tT]://'.$this->login.'/?)';

$this->wpath = '('.$this->uchar.'*)';
$this->wtype = '('.$this->uchar.'*)';
$this->database = '('.$this->uchar.'*)';
$this->waisdoc = '([wW][aA][iI][sS]://'.$this->hostport.'/'.$this->database.'/'.$this->wtype.'/'.$this->wpath.')';
$this->waisindex = '([wW][aA][iI][sS]://'.$this->hostport.'/'.$this->database.'[?]'.$this->search.')';
$this->waisdatabase = '([wW][aA][iI][sS]://'.$this->hostport.'/'.$this->database.')';
$this->waisurl = '('.$this->waisdatabase.'|'.$this->waisindex.'|'.$this->waisdoc.')';
}
}

/*测试
$urlcheck = new urlcheck();

$url = array(
'ftp://localhost/test/a',
'file://localhost/test/a',
'http://localhost/test/a',
'https://localhost/test/a',
'gopher://localhost/test/a',
'news:comp.os.linux',
'nntp://localhost/test/12',
'telnet://localhost/',
'wais://localhost/db/type/path',
'error'
);
$count = count($url);
for($i=0;$i<$count;$i++){
if($urlcheck->check($url[$i])) echo $url[$i].' ==> 合法
';
else echo $url[$i].' ==> 非法
';
}

$waisurl = array(
'wais://localhost/',
'wais://localhost/db',
'wais://localhost/db?a=b&amp;c=d',
'wais://localhost/db/type/path',
'WAIS://localhost/db',
'error',
'\\',
'wai://localhost/db',
'wais://localhost/db/',
'wais://localhost/db/type/path/ext'
);
echo '
'.$urlcheck->waisurl.' 测试...
--------------------------------------------------------------------------------
';
$count = count($waisurl);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->waisurl.'$',$waisurl[$i])) echo $waisurl[$i].' ==> 合法
';
else echo $waisurl[$i].' ==> 非法
';
}

$telneturl = array(
'telnet://localhost',
'telnet://localhost/',
'TELNET://localhost',
'telnet://guest:guest@localhost',
'error',
'\\',
'telne://localhost'
);
echo '
'.$urlcheck->telneturl.' 测试...
--------------------------------------------------------------------------------
';
$count = count($telneturl);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->telneturl.'$',$telneturl[$i])) echo $telneturl[$i].' ==> 合法
';
else echo $telneturl[$i].' ==> 非法
';
}

$nntpurl = array(
'nntp://localhost/php/12',
'NNTP://localhost/php/12',
'nntp://localhost:119/php/12',
'nntp://localhost/php',
'error',
'\\',
'nnt://localhost/php',
'nntp://localhost/php/'
);
echo '
'.$urlcheck->nntpurl.' 测试...
--------------------------------------------------------------------------------
';
$count = count($nntpurl);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->nntpurl.'$',$nntpurl[$i])) echo $nntpurl[$i].' ==> 合法
';
else echo $nntpurl[$i].' ==> 非法
';
}

$newsurl = array(
'news:123@localhost',
'NEWS:123@localhost',
'news:comp.os.linux',
'news:*',
'error',
'\\',
'news:http://www.phpchina.com/',
'new:comp.os.linux',
'news:'
);
echo '
'.$urlcheck->newsurl.' 测试...
--------------------------------------------------------------------------------
';
$count = count($newsurl);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->newsurl.'$',$newsurl[$i])) echo $newsurl[$i].' ==> 合法
';
else echo $newsurl[$i].' ==> 非法
';
}

$gopherurl = array(
'gopher://phpchina.com',
'gopher://phpchina.com/',
'gopher://phpchina.com/atest%09search%09string',
'GOPHER://phpcina.com/',
'error',
'\\',
'goph://phpchina.com'
);
echo '
'.$urlcheck->gopherurl.' 测试...
--------------------------------------------------------------------------------
';
$count = count($gopherurl);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->gopherurl.'$',$gopherurl[$i])) echo $gopherurl[$i].' ==> 合法
';
else echo $gopherurl[$i].' ==> 非法
';
}

$httpurl = array(
'http://www.phpchina.com',
'http://www.phpchina.com/',
'http://www.phpchina.com/default.php',
'https://www.phpchina.com/',
'HTTPS://www.phpchina.com/',
'http://www.phpchina.com/?a=b&amp;c=d',
'error',
'\\',
'htt://www.phpchina.com/'
);
echo '
'.$urlcheck->httpurl.' 测试...
--------------------------------------------------------------------------------
';
$count = count($httpurl);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->httpurl.'$',$httpurl[$i])) echo $httpurl[$i].' ==> 合法
';
else echo $httpurl[$i].' ==> 非法
';
}

$fileurl = array(
'file://localhost/',
'FILE://localhost/tmp/',
'file://ahost/test/',
'error',
'\\',
'file://localhost'
);
echo '
'.$urlcheck->fileurl.' 测试...
--------------------------------------------------------------------------------
';
$count = count($fileurl);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->fileurl.'$',$fileurl[$i])) echo $fileurl[$i].' ==> 合法
';
else echo $fileurl[$i].' ==> 非法
';
}

$ftpurl = array(
'ftp://www.phpchina.com',
'ftp://www.phpchina.com/',
'ftp://www.phpchina.com/test',
'ftp://www.phpchina.com/test/',
'ftp://www.phpchina.com/test/abc/aa',
'ftp://www.phpchina.com:21',
'ftp://www.phpchina.com:21/',
'ftp://[email protected]/',
'ftp://@www.phpchina.com/',
'ftp://:@www.phpchina.com/',
'ftp://guest:@www.phpchina.com/',
'ftp://guest:[email protected]/',
'ftp://guest:[email protected]:21',
'Ftp://www.phpchina.com/',
'ftp://www.phpchina.com/test;type=a',
'ftp://www.phpchina.com/;type=a',
'FTP://www.phpchina.com/;TYPE=A',
'error',
'\\',
'ftp://',
'www.phpchina.com',
'ff://www.phpchina.com',
'ftp://www.phpchina.com;type=a',
'ftp://www.phpchina.com/;type=b'
);
echo '
'.$urlcheck->ftpurl.' 测试...
';
$count = count($ftpurl);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->ftpurl.'$',$ftpurl[$i])) echo $ftpurl[$i].' ==> 合法
';
else echo $ftpurl[$i].' ==> 非法
';
}

$login = array(
'www.chinaasp.com',
'www.phpchina.com',
'202.101.17.181',
'www.phpchina.com:80',
'202.101.17.181:80',
'[email protected]:80',
'[email protected]:80',
'guest:[email protected]:80',
'guest:[email protected]:80',
'guest:@www.phpchina.com:80',
'guest:@202.101.17.181:80',
'@www.phpchina.com:80',
'@202.101.17.181:80',
'error',
'\\',
'@',
'guest@'
);
echo '
'.$urlcheck->login.' 测试...
--------------------------------------------------------------------------------
';
$count = count($login);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->login.'$',$login[$i])) echo $login[$i].' ==> 合法
';
else echo $login[$i].' ==> 非法
';
}

$hostport = array(
'www.chinaasp.com',
'www.phpchina.com',
'202.101.17.181',
'www.phpchina.com:80',
'202.101.17.181:80',
'error',
'\\',
'www.phpchina.com:abc'
);
echo '
'.$urlcheck->hostport.' 测试...
--------------------------------------------------------------------------------
';
$count = count($hostport);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->hostport.'$',$hostport[$i])) echo $hostport[$i].' ==> 合法
';
else echo $hostport[$i].' ==> 非法
';
}

$host = array(
'www.chinaasp.com',
'www.phpchina.com',
'202.101.17.181',
'error',
'\\',
'www.phpchina.com:80'
);
echo '
'.$urlcheck->host.' 测试...
--------------------------------------------------------------------------------
';
$count = count($host);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->host.'$',$host[$i])) echo $host[$i].' ==> 合法
';
else echo $host[$i].' ==> 非法
';
}

$hostname = array(
'www.chinaasp.com',
'www.phpchina.com',
'1.a',
'1.abc',
'1.a-c',
'1.ab-c',
'1.a8-9',
'1-2.a',
'1-2.abc',
'1-2.a-c',
'1-2.ab-c',
'1-2.a8-9',
'1-2.1-2.a',
'1-2.1-2.abc',
'1-2.1-2.a-c',
'1-2.1-2.ab-c',
'1-2.1-2.a8-9',
'a',
'e',
'z',
'A',
'E',
'Z',
'abc',
'a-c',
'ab-c',
'ab-9',
'a8-9',
'error',
'\\',
'1',
'123',
'1a3',
'1-3',
'abc*',
'!safa'
);
echo '
'.$urlcheck->hostname.' 测试
--------------------------------------------------------------------------------
';
$count = count($hostname);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->hostname.'$',$hostname[$i])) echo $hostname[$i].' ==> 合法
';
else echo $hostname[$i].' ==> 非法
';
}

$hostnumber = array(
'202.101.17.181',
'127.0.0.1',
'error',
'\\',
'202.101.17.',
'202.101..',
'202...',
'....',
'ab.cd.ef.gh'
);
echo '
'.$urlcheck->hostnumber.' 测试...
--------------------------------------------------------------------------------
';
$count = count($hostnumber);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->hostnumber.'$',$hostnumber[$i])) echo $hostnumber[$i].' ==> 合法
';
else echo $hostnumber[$i].' ==> 非法
';
}

$lowalpha = array(
'a',
'e',
'z',
'error',
'\\',
'A',
'0',
'ae'
);
echo '
'.$urlcheck->lowalpha.' 测试...
--------------------------------------------------------------------------------
';
$count = count($lowalpha);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->lowalpha.'$',$lowalpha[$i])) echo $lowalpha[$i].' ==> 合法
';
else echo $lowalpha[$i].' ==> 非法
';
}

$hialpha = array(
'A',
'E',
'Z',
'error',
'\\',
'a',
'2',
'AB'
);
echo '
'.$urlcheck->hialpha.' 测试...
--------------------------------------------------------------------------------
';
$count = count($hialpha);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->hialpha.'$',$hialpha[$i])) echo $hialpha[$i].' ==> 合法
';
else echo $hialpha[$i].' ==> 非法
';
}

$alpha = array(
'a',
'e',
'z',
'A',
'E',
'Z',
'error',
'\\',
'ab',
'AB',
'1',
'a1'
);
echo '
'.$urlcheck->alpha.' 测试...
--------------------------------------------------------------------------------
';
$count = count($alpha);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->alpha.'$',$alpha[$i])) echo $alpha[$i].' ==> 合法
';
else echo $alpha[$i].' ==> 非法
';
}

$digit = array(
'0',
'4',
'9',
'error',
'\\',
'12',
'ab'
);
echo '
'.$urlcheck->digit.' 测试...
--------------------------------------------------------------------------------
';
$count = count($digit);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->digit.'$',$digit[$i])) echo $digit[$i].' ==> 合法
';
else echo $digit[$i].' ==> 非法
';
}

$hex = array(
'0',
'4',
'9',
'a',
'e',
'f',
'A',
'E',
'F',
'error',
'\\',
'01',
'G',
'g',
'ab'
);
echo '
'.$urlcheck->hex.' 测试...
--------------------------------------------------------------------------------
';
$count = count($hex);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->hex.'$',$hex[$i])) echo $hex[$i].' ==> 合法
';
else echo $hex[$i].' ==> 非法
';
}

$digits = array(
'0',
'4',
'9',
'12',
'124',
'error',
'\\',
'',
'a',
'afasdf'
);
echo '
'.$urlcheck->digits.' 测试...
--------------------------------------------------------------------------------
';
$count = count($digits);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->digits.'$',$digits[$i])) echo $digits[$i].' ==> 合法
';
else echo $digits[$i].' ==> 非法
';
}

$xchar = array(
'a',
'e',
'z',
'A',
'E',
'Z',
'0',
'5',
'9',
'$',
'.',
'+',
'_',
'-',
'*',
'(',
')',
'\'',
'!',
',',
'?',
';',
'/',
':',
'@',
'&amp;',
'=',
'%02',
'%a0',
'%af',
'error',
'\\',
'ae',
'AE',
'$-',
'*)',
'?/',
'%0z',
'%az',
'%abc',
'%012'
);
echo '
'.$urlcheck->xchar.' 测试...
--------------------------------------------------------------------------------
';
$count = count($xchar);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->xchar.'$',$xchar[$i])) echo $xchar[$i].' ==> 合法
';
else echo $xchar[$i].' ==> 非法
';
}

$uchar = array(
'a',
'e',
'z',
'A',
'E',
'Z',
'0',
'5',
'9',
'$',
'.',
'+',
'_',
'-',
'*',
'(',
')',
'\'',
'!',
'%02',
'%a0',
'%Af',
'error',
'\\',
'ab',
'Aa',
'01',
'$+',
'(!',
'%124',
'%abc',
'%az'
);
echo '
'.$urlcheck->uchar.' 测试...
--------------------------------------------------------------------------------
';
$count = count($uchar);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->uchar.'$',$uchar[$i])) echo $uchar[$i].' ==> 合法
';
else echo $uchar[$i].' ==> 非法
';
}

$unreserved = array(
'a',
'e',
'z',
'A',
'E',
'Z',
'0',
'5',
'9',
'$',
'.',
'+',
'_',
'-',
'*',
'(',
')',
'\'',
'!',
',',
'error',
'\\',
'ab',
'Ab',
'01',
'$.',
'*('
);
echo '
'.$urlcheck->unreserved.' 测试...
--------------------------------------------------------------------------------
';
$count = count($unreserved);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->unreserved.'$',$unreserved[$i])) echo $unreserved[$i].' ==> 合法
';
else echo $unreserved[$i].' ==> 非法
';
}

$reserved = array(
';',
'/',
'?',
':',
'@',
'&amp;',
'=',
'error',
'\\',
'a',
'=='
);
echo '
'.$urlcheck->reserved.' 测试...
--------------------------------------------------------------------------------
';
$count = count($reserved);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->reserved.'$',$reserved[$i])) echo $reserved[$i].' ==> 合法
';
else echo $reserved[$i].' ==> 非法
';
}

$punctuation = array(
'<',
'>',
'#',
'%',
'"',
'error',
'\\',
'<<',
'>>',
'a'
);
echo '
'.$urlcheck->punctuation.' 测试...
--------------------------------------------------------------------------------
';
$count = count($punctuation);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->punctuation.'$',$punctuation[$i])) echo $punctuation[$i].' ==> 合法
';
else echo $punctuation[$i].' ==> 非法
';
}

$national = array(
'{',
'}',
'|',
'\\',
'^',
'~',
'[',
']',
'`',
'error',
'``',
'^^'
);
echo '
'.$urlcheck->national.' 测试...
--------------------------------------------------------------------------------
';
$count = count($national);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->national.'$',$national[$i])) echo $national[$i].' ==> 合法
';
else echo $national[$i].' ==> 非法
';
}

$safe = array(
'$',
'-',
'_',
'.',
'+',
'error',
'\\',
'1',
'$$'
);
echo '
'.$urlcheck->safe.' 测试...
--------------------------------------------------------------------------------
';
$count = count($safe);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->safe.'$',$safe[$i])) echo $safe[$i].' ==> 合法
';
else echo $safe[$i].' ==> 非法
';
}

$extra = array(
'!',
'*',
'\'',
'(',
')',
',',
'error',
'\\',
'a'
);
echo '
'.$urlcheck->extra.'测试...
--------------------------------------------------------------------------------
';
$count = count($extra);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->extra.'$',$extra[$i])) echo $extra[$i].' ==> 合法
';
else echo $extra[$i].' ==> 非法
';
}
$escape = array(
'%12',
'%1a',
'%A1',
'error',
'\\',
'%az',
'%123',
'%abc'
);
echo '
'.$urlcheck->escape.' 测试...
--------------------------------------------------------------------------------
';
$count = count($escape);
for($i=0;$i<$count;$i++){
if(ereg('^'.$urlcheck->escape.'$',$escape[$i])) echo $escape[$i].' ==> 合法
';
else echo $escape[$i].' ==> 非法
';
}
*/
?>