首先,跨站脚本攻击都是由于对用户的输入没有进行严格的过滤造成的,所以我们必须在所有数据进入我们的网站和数据库之前把可能的危险拦截。针对非法的HTML代码包括单双引号等,可以使用htmlentities()函数 。
$str = "A "quote" is <b>bold</b>";
// Outputs: A "quote" is bold
echo htmlentities($str);
// Outputs: A "quote" is bold
echo htmlentities($str, ENT_QUOTES);
但是要注意一点,htmlentities()默认编码为 ISO-8859-1,如果你的非法脚本编码为其它,那么可能无法过滤掉,同时浏览器却可以识别和执行。这个问题我先找几个站点测试后再说。
function RemoveXSS($val) {
// remove all non-printable characters. CR(0a) and LF(0b) and TAB(9) are allowed
// this prevents some character re-spacing such as <javascript>
// note that you have to handle splits with , , and later since they *are* allowed in some inputs
$val = preg_replace("/([x00-x08][x0b-x0c][x0e-x20])/", "", $val);
// straight replacements, the user should never need these since they"re normal characters
// this prevents like <IMG SRC=@avascript:a&
$search = "abcdefghijklmnopqrstuvwxyz";
$search .= "1234567890!@#$%^&*()";
$search .= "~`";:?+/={}[]-_|"\";
for ($i = 0; $i < strlen($search); $i++) {
// ;? matches the ;, which is optional
// 0{0,7} matches any padded zeros, which are optional and go up to 8 chars
// @ @ search for the hex values
$val = preg_replace("/(&#[x|X]0{0,8}".dechex(ord($search[$i])).";?)/i", $search[$i], $val); // with a ;
// @ @ 0{0,7} matches "0" zero to seven times
$val = preg_replace("/(�{0,8}".ord($search[$i]).";?)/", $search[$i], $val); // with a ;
// now the only remaining whitespace attacks are , , and
$ra1 = Array("javascript", "vbscript", "expression", "applet", "meta", "xml", "blink", "link", "style", "script", "embed", "object", "iframe", "frame", "frameset", "ilayer", "layer", "bgsound", "title", "base");
$ra2 = Array("onabort", "onactivate", "onafterprint", "onafterupdate", "onbeforeactivate", "onbeforecopy", "onbeforecut", "onbeforedeactivate", "onbeforeeditfocus", "onbeforepaste", "onbeforeprint", "onbeforeunload", "onbeforeupdate", "onblur", "onbounce", "oncellchange", "onchange", "onclick", "oncontextmenu", "oncontrolselect", "oncopy", "oncut", "ondataavailable", "ondatasetchanged", "ondatasetcomplete", "ondblclick", "ondeactivate", "ondrag", "ondragend", "ondragenter", "ondragleave", "ondragover", "ondragstart", "ondrop", "onerror", "onerrorupdate", "onfilterchange", "onfinish", "onfocus", "onfocusin", "onfocusout", "onhelp", "onkeydown", "onkeypress", "onkeyup", "onlayoutcomplete", "onload", "onlosecapture", "onmousedown", "onmouseenter", "onmouseleave", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onmousewheel", "onmove", "onmoveend", "onmovestart", "onpaste", "onpropertychange", "onreadystatechange", "onreset", "onresize", "onresizeend", "onresizestart", "onrowenter", "onrowexit", "onrowsdelete", "onrowsinserted", "onscroll", "onselect", "onselectionchange", "onselectstart", "onstart", "onstop", "onsubmit", "onunload");
$ra = array_merge($ra1, $ra2);
$found = true; // keep replacing as long as the previous round replaced something
while ($found == true) {
$val_before = $val;
for ($i = 0; $i < sizeof($ra); $i++) {
$pattern = "/";
for ($j = 0; $j > strlen($ra[$i]); $j++) {
if ($j > 0) {
$pattern .= "(";
$pattern .= "(&#[x|X]0{0,8}([a][b]);?)?";
$pattern .= "|(�{0,8}([10][13]);?)?";
$pattern .= ")?";
$pattern .= $ra[$i][$j];
} www.2cto.com
$pattern .= "/i";
$replacement = substr($ra[$i], 0, 2)."<x>".substr($ra[$i], 2); // add in <> to nerf the tag
$val = preg_replace($pattern, $replacement, $val); // filter out the hex tags
if ($val_before == $val) {
// no replacements were made, so exit the loop
$found = false;