最近热衷于刷twitter,各种大牛的东西让我应接不暇,感觉确实新有干货,前几天看到Yosuke发状态了:
是发的一个DOMParser处理、过滤html的小程序。想想觉得还是挺新颖的,以前自己也做了一个XssHtml过滤类(http://phith0n.github.io/XssHtml/),但都是基于后端语言的,不能处理前端比如DOMXSS。
看了他的代码感觉挺好的,思路也是基于白名单的过滤机制,将允许存在的标签和属性列在javascript对象中,遍历DOM后将允许的标签和属性保留,不允许的丢弃。
我改了改,加了点过滤,做了个类,代码如下:
function Jsdxss(allows){ this.allows = allows || { "a" : [ "title", "ping", "href", "class", "target", "style" ], "b" : [ "class", "style" ], "img" : [ "src", "class", "style" ], "div" : [ "class", "style"], "p" : ["class", "style"] } var buildNodes = function( node ){ var i, newNode, attributes, child; switch( node.nodeType ){ case 1: // ELEMENT_NODE attributes = allows[ node.tagName.toLowerCase() ]; if( attributes === undefined ) return undefined; newNode = document.createElement( node.tagName ); for( i = 0; i < node.attributes.length; i++ ){ if( attributes.indexOf( node.attributes[ i ].name ) != -1 ){ switch(node.attributes[ i ].name){ case "href": node.attributes[ i ] = _deal_href(node.attributes[ i ]);break; case "style": node.attributes[ i ] = _deal_style(node.attributes[ i ]);break; } newNode.setAttribute( node.attributes[ i ].name, node.attributes[ i ].value ); } } for( i = 0; i < node.childNodes.length; i++ ){ child = buildNodes( node.childNodes[ i ] ); if( child !== undefined ){ newNode.appendChild( child ); } } return newNode; case 3: // TEXT_NODE return document.createTextNode( node.textContent ); default: return undefined; } } var _deal_href = function(attr){ var href = attr.value; if (href.indexOf("http://") === 0 || href.indexOf("http://") === 0) { attr.value = href; }else{ attr.value = "http://" + href; } return attr; } var _deal_style = function(attr){ var style = attr.value; var re = /expression/gim style = style.replace(/\\/g, ' ').replace(/&#/g, ' ').replace(/\/\*/g, ' ').replace(/\*\//g, ' '); attr.value = style.replace(re, ' '); return attr; } this.filter = function(html, target){ try{ var parser = new DOMParser(); var newDoc = parser.parseFromString( html, "text/html" ); }catch(e){ var doc = new ActiveXObject ("MSXML2.DOMDocument"); var newDoc = doc.loadXML(html); } var newBody = newDoc.body; var target = document.getElementById( target ); var i, childeNode; target.innerHTML = ""; for( i = 0; i < newBody.childNodes.length; i++ ){ childNode = buildNodes( newBody.childNodes[ i ] ); if( childNode !== undefined ){ target.appendChild( childNode ); } } } }使用方法:
var html = "HTML CODE"; (new Jsdxss()).filter(html, "target");
运行完成后会将过滤后的代码输出在id为target的元素中。
github:https://github.com/phith0n/Jsdxss
大家可以在这个页面测试:http://phith0n.github.io/Jsdxss/test.html