后端应用经常接收各种信息参数,例如评论,回复等文本内容。除了一些场景下面,可以特定接受的富文本标签和属性之外(如:b,ul,li,h1, h2, h3…),需要过滤掉危险的字符和标签,防止xss攻击。
一、什么是XSS?
看完这个,应该有一个大致的概念。
二、准则
- 永远不要相信用户的输入和请求的参数(包括文字、上传等一切内容)
- 参考第1条
三、实现做法
结合具体业务场景,对相应内容进行过滤,这里使用Jsoup。
jsoup是一款Java的HTML解析器。Jsoup提供的**Whitelist(白名单)**对文本内容进行过滤,过滤掉字符、属性,但是又保留必要的富文本格式。
如,白名单中允许b标签存在(并且不允许b标签带有其他属性)那么在一段Html内容,在过滤之后,会变成:
过滤前:
<b style="xxx" onclick="<script>alert(0);</script>">abc</>
过滤后:
<b>abc</b>
Whitelist主要方法说明
四、例子
基于springboot
pom.xml依赖
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.6</version>
</dependency>
</dependencies>
HtmlFilter过滤类
package net.lofish.xpra.xss;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.safety.Whitelist;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.List;
/**
* HtmlFilter
*
* @author 撸小鱼
* Created by lofish@foxmail.com on 2020-04-12
*/
public class HtmlFilter {
/**
* 默认使用relaxed()
* 允许的标签: a, b, blockquote, br, caption, cite, code, col, colgroup, dd, dl, dt, em, h1, h2, h3, h4, h5, h6, i, img, li, ol, p, pre, q, small, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, u, ul。结果不包含标签rel=nofollow ,如果需要可以手动添加。
*/
private Whitelist whiteList;
/**
* 配置过滤化参数,不对代码进行格式化
*/
private Document.OutputSettings outputSettings;
private HtmlFilter() {
}
/**
* 静态创建HtmlFilter方法
* @param whiteList 白名单标签
* @param pretty 是否格式化
* @return HtmlFilter
*/
public static HtmlFilter create(Whitelist whiteList, boolean pretty) {
HtmlFilter filter = new HtmlFilter();
if (whiteList == null) {
filter.whiteList = Whitelist.relaxed();
}
filter.outputSettings = new Document.OutputSettings().prettyPrint(pretty);
return filter;
}
/**
* 静态创建HtmlFilter方法
* @return HtmlFilter
*/
public static HtmlFilter create() {
return create(null, false);
}
/**
* 静态创建HtmlFilter方法
* @param whiteList 白名单标签
* @return HtmlFilter
*/
public static HtmlFilter create(Whitelist whiteList) {
return create(whiteList, false);
}
/**
* 静态创建HtmlFilter方法
* @param excludeTags 例外的特定标签
* @param includeTags 需要过滤的特定标签
* @param pretty 是否格式化
* @return HtmlFilter
*/
public static HtmlFilter create( List<String> excludeTags,List<String> includeTags, boolean pretty) {
HtmlFilter filter = create(null, pretty);
//要过滤的标签
if (includeTags != null && !includeTags.isEmpty()) {
String[] tags = (String[]) includeTags.toArray(new String[0]);
filter.whiteList.removeTags(tags);
}
//例外标签
if (excludeTags != null && !excludeTags.isEmpty()) {
String[] tags = (String[]) excludeTags.toArray(new String[0]);
filter.whiteList.addTags(tags);
}
return filter;
}
/**
* 静态创建HtmlFilter方法
* @param excludeTags 例外的特定标签
* @param includeTags 需要过滤的特定标签
* @return HtmlFilter
*/
public static HtmlFilter create(List<String> excludeTags,List<String> includeTags) {
return create( includeTags, excludeTags, false );
}
/**
* @param content 需要过滤内容
* @return 过滤后的String
*/
public String clean(String content) {
return Jsoup.clean(content, "", this.whiteList, this.outputSettings);
}
public static void main(String[] args) throws FileNotFoundException, IOException {
String text = "<a href=\"http://www.baidu.com/a\" onclick=\"alert(1);\"></a><script>alert(0);</script><b style=\"xxx\" onclick=\"<script>alert(0);</script>\">abc</>";
System.out.println(HtmlFilter.create().clean(text));
}
}
XssFilter过滤器
package net.lofish.xpra.xss;
import org.apache.commons.lang3.StringUtils;
import javax.servlet.*;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* XssFilter
*
* @author 撸小鱼
* Created by lofish@foxmail.com on 2020-04-12
*/
public class XssFilter implements Filter {
/**
* 例外urls
*/
private List<String> excludeUrls = new ArrayList<>();
/**
* 例外标签
*/
private List<String> excludeTags = new ArrayList<>();
/**
* 需要过滤标签
*/
private List<String> includeTags = new ArrayList<>();
/**
* 开关
*/
public boolean enabled = false;
/**
* 编码
*/
private String encoding = "UTF-8";
@Override
public void init(FilterConfig filterConfig) throws ServletException {
String enabledStr = filterConfig.getInitParameter("enabled");
String excludeUrlStr = filterConfig.getInitParameter("excludeUrls");
String excludeTagStr = filterConfig.getInitParameter("excludeTagStr");
String includeTagStr = filterConfig.getInitParameter("excludeTagStr");
String encodingStr = filterConfig.getInitParameter("encoding");
if (StringUtils.isNotEmpty(excludeUrlStr)) {
String[] url = excludeUrlStr.split(",");
Collections.addAll(this.excludeUrls, url);
}
if (StringUtils.isNotEmpty(excludeTagStr)) {
String[] url = excludeTagStr.split(",");
Collections.addAll(this.excludeTags, url);
}
if (StringUtils.isNotEmpty(excludeTagStr)) {
String[] url = excludeTagStr.split(",");
Collections.addAll(this.includeTags, url);
}
if (StringUtils.isNotEmpty(enabledStr)) {
this.enabled = Boolean.parseBoolean(enabledStr);
}
if (StringUtils.isNotEmpty(encodingStr)) {
this.encoding = encodingStr;
}
}
@Override
public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException {
HttpServletRequest req = (HttpServletRequest) request;
HttpServletResponse resp = (HttpServletResponse) response;
if (handleExcludeUrls(req, resp)) {
chain.doFilter(request, response);
return;
}
XssHttpServletRequestWrapper xssRequest = new XssHttpServletRequestWrapper((HttpServletRequest) request, encoding, excludeTags, includeTags );
chain.doFilter(xssRequest, response);
}
private boolean handleExcludeUrls(HttpServletRequest request, HttpServletResponse response) {
if (!enabled) {
return true;
}
if (excludeUrls == null || excludeUrls.isEmpty()) {
return false;
}
String url = request.getServletPath();
for (String pattern : excludeUrls) {
Pattern p = Pattern.compile("^" + pattern);
Matcher m = p.matcher(url);
if (m.find()) {
return true;
}
}
return false;
}
}
一般情况下,我们都是通过request的parameter来传递参数。
但是,如果在某些场景下面,通过requestBody体(json等),来传递相应参数应该怎么办?
这就要需要我们对request的inputStream来进行来过滤处理了
有个地方需要注意一下的:
servlet中inputStream只能一次读取,后续不能再次读取inputStream。Xss过滤器中读取了stream之后,后续如果其他逻辑涉及到inputStream读取,会抛出异常。那我们就需要想办法把已经读取的stream,重新放回到请求中。
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import javax.servlet.ReadListener;
import javax.servlet.ServletInputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
/**
* XSS过滤处理
* @author 撸小鱼
* Created by lofish@foxmail.com
*/
public class XssHttpServletRequestWrapper extends HttpServletRequestWrapper{
HttpServletRequest orgRequest;
String encoding;
HtmlFilter htmlFilter;
private final static String JSON_CONTENT_TYPE = "application/json";
private final static String CONTENT_TYPE = "Content-Type";
/**
* @param request HttpServletRequest
* @param encoding 编码
* @param excludeTags 例外的特定标签
* @param includeTags 需要过滤的标签
*/
public XssHttpServletRequestWrapper( HttpServletRequest request, String encoding, List<String> excludeTags, List<String> includeTags ){
super( request );
orgRequest = request;
this.encoding = encoding;
this.htmlFilter = HtmlFilter.create( excludeTags, includeTags );
}
/**
*
* @param request HttpServletRequest
* @param encoding 编码
*/
public XssHttpServletRequestWrapper( HttpServletRequest request, String encoding ){
this( request, encoding, null, null );
}
private String xssFilter( String input ){
return htmlFilter.clean( input );
}
@Override
public ServletInputStream getInputStream() throws IOException{
// 非json处理
if( !JSON_CONTENT_TYPE.equalsIgnoreCase( super.getHeader( CONTENT_TYPE ) ) ){
return super.getInputStream();
}
InputStream in = super.getInputStream();
String body = IOUtils.toString( in, encoding );
IOUtils.closeQuietly( in );
//空串处理直接返回
if( StringUtils.isBlank( body ) ){
return super.getInputStream();
}
// xss过滤
body = xssFilter( body );
return new RequestCachingInputStream( body.getBytes( encoding ) );
}
@Override
public String getParameter( String name ){
String value = super.getParameter( xssFilter( name ) );
if( StringUtils.isNotBlank( value ) ){
value = xssFilter( value );
}
return value;
}
@Override
public String[] getParameterValues( String name ){
String[] parameters = super.getParameterValues( name );
if( parameters == null || parameters.length == 0 ){
return null;
}
for( int i = 0; i < parameters.length; i++ ){
parameters[i] = xssFilter( parameters[i] );
}
return parameters;
}
@Override
public Map<String, String[]> getParameterMap(){
Map<String, String[]> map = new LinkedHashMap<>();
Map<String, String[]> parameters = super.getParameterMap();
for( String key : parameters.keySet() ){
String[] values = parameters.get( key );
for( int i = 0; i < values.length; i++ ){
values[i] = xssFilter( values[i] );
}
map.put( key, values );
}
return map;
}
@Override
public String getHeader( String name ){
String value = super.getHeader( xssFilter( name ) );
if( StringUtils.isNotBlank( value ) ){
value = xssFilter( value );
}
return value;
}
/**
* <b>
* #获取最原始的request
* </b>
*/
public HttpServletRequest getOrgRequest(){
return orgRequest;
}
/**
* <b>
* #获取最原始的request
* </b>
* @param request HttpServletRequest
*/
public static HttpServletRequest getOrgRequest( HttpServletRequest request ){
if( request instanceof XssHttpServletRequestWrapper ){
return ((XssHttpServletRequestWrapper) request).getOrgRequest();
}
return request;
}
/**
* <pre>
* servlet中inputStream只能一次读取,后续不能再次读取inputStream
* xss过滤body后,重新把流放入ServletInputStream中
* </pre>
*/
private static class RequestCachingInputStream extends ServletInputStream {
private final ByteArrayInputStream inputStream;
public RequestCachingInputStream(byte[] bytes) {
inputStream = new ByteArrayInputStream(bytes);
}
@Override
public int read() throws IOException {
return inputStream.read();
}
@Override
public boolean isFinished() {
return inputStream.available() == 0;
}
@Override
public boolean isReady() {
return true;
}
@Override
public void setReadListener( ReadListener readListener ){
}
}
}
springboot2.2.4.RELEASE中注册Filter
@Configuration
public class XssFilterConfig {
@Value("${xss.enabled:true}")
private String enabled;
@Value("${xss.excludes:}")
private String excludes;
@Value("${xss.includes$:}")
private String includes;
@Value("${xss.urlPatterns:/*}")
private String urlPatterns;
@Bean
public FilterRegistrationBean<XssFilter> xssFilterRegistrationBean() {
FilterRegistrationBean<XssFilter> registration = new FilterRegistrationBean<>();
registration.setDispatcherTypes(DispatcherType.REQUEST);
registration.setFilter(new XssFilter());
registration.addUrlPatterns(urlPatterns.split(","));
registration.setName("XssFilter");
registration.setOrder(Integer.MAX_VALUE);
Map<String, String> initParameters = new HashMap<String, String>();
initParameters.put("excludes", excludes);
initParameters.put("includes", excludes);
initParameters.put("enabled", enabled);
registration.setInitParameters(initParameters);
return registration;
}
}
测试
http://localhost:8080/demo/th/xss?abc=%3Ca%20href=%22http://www.baidu.com/a%22%20onclick=%22alert(1);%22%3Eabc%3C/a%3E%3Cscript%3Ealert(0);%3C/script%3E&abc=%3Cb%20style=%22xxx%22%20onclick=%22%3Cscript%3Ealert(0);%3C/script%3E%22%3Eabc%3C/%3E