过滤器的操作符
LESS < LESS_OR_EQUAL <=EQUAL = NOT_EQUAL <> GREATER_OR_EQUAL >=GREATER > NO_OP no operation
比较器
BinaryComparator 按字节索引顺序比较指定字节数组,采用Bytes.compareTo(byte[]) BinaryPrefixComparator 跟前面相同,只是比较左端的数据是否相同 NullComparator 判断给定的是否为空 BitComparator 按位比较 a BitwiseOp class 做异或,与,并操作 RegexStringComparator 提供一个正则的比较器,仅支持 EQUAL 和非EQUAL SubstringComparator 判断提供的子串是否出现在table的value中。
Hbase的过滤器分类
1.比较过滤器 Comparison Filters
1.1 RowFilter
构造函数:
public RowFilter(org.apache.hadoop.hbase.filter.CompareFilter.CompareOp rowCompareOp, org.apache.hadoop.hbase.filter.WritableByteArrayComparable rowComparator) {} //选择比较RowKey来确认返回讯息
示例代码:
public class RowFilterExample { public static void main(String[] args) throws IOException { Configuration conf = HBaseConfiguration.create(); HBaseHelper helper = HBaseHelper.getHelper(conf); helper.dropTable("testtable"); helper.createTable("testtable", "colfam1", "colfam2"); System.out.println("Adding rows to table..."); helper.fillTable("testtable", 1, 100, 100, "colfam1", "colfam2"); HTable table = new HTable(conf, "testtable"); // vv RowFilterExample Scan scan = new Scan(); scan.addColumn(Bytes.toBytes("colfam1"), Bytes.toBytes("col-0")); Filter filter1 = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, // co RowFilterExample-1-Filter1 Create filter, while specifying the comparison operator and comparator. Here an exact match is needed. new BinaryComparator(Bytes.toBytes("row-22"))); scan.setFilter(filter1); ResultScanner scanner1 = table.getScanner(scan); // ^^ RowFilterExample System.out.println("Scanning table #1..."); // vv RowFilterExample for (Result res : scanner1) { System.out.println(res); } scanner1.close(); Filter filter2 = new RowFilter(CompareFilter.CompareOp.EQUAL, // co RowFilterExample-2-Filter2 Another filter, this time using a regular expression to match the row keys. new RegexStringComparator(".*-.5")); scan.setFilter(filter2); ResultScanner scanner2 = table.getScanner(scan); // ^^ RowFilterExample System.out.println("Scanning table #2..."); // vv RowFilterExample for (Result res : scanner2) { System.out.println(res); } scanner2.close(); Filter filter3 = new RowFilter(CompareFilter.CompareOp.EQUAL, // co RowFilterExample-3-Filter3 The third filter uses a substring match approach. new SubstringComparator("-5")); scan.setFilter(filter3); ResultScanner scanner3 = table.getScanner(scan); // ^^ RowFilterExample System.out.println("Scanning table #3..."); // vv RowFilterExample for (Result res : scanner3) { System.out.println(res); } scanner3.close(); // ^^ RowFilterExample } }
1.2 FamilyFilter
构造函数
public FamilyFilter(CompareOp familyCompareOp, WritableByteArrayComparable familyComparator) {}
1
示例代码
//通过对比FamilyKey去获取数据public class FamilyFilterExample { public static void main(String[] args) throws IOException { Configuration conf = HBaseConfiguration.create(); HBaseHelper helper = HBaseHelper.getHelper(conf); helper.dropTable("testtable"); helper.createTable("testtable", "colfam1", "colfam2", "colfam3", "colfam4"); System.out.println("Adding rows to table..."); helper.fillTable("testtable", 1, 10, 2, "colfam1", "colfam2", "colfam3", "colfam4"); HTable table = new HTable(conf, "testtable"); // vv FamilyFilterExample Filter filter1 = new FamilyFilter(CompareFilter.CompareOp.LESS, // co FamilyFilterExample-1-Filter Create filter, while specifying the comparison operator and comparator. new BinaryComparator(Bytes.toBytes("colfam3"))); Scan scan = new Scan(); scan.setFilter(filter1); ResultScanner scanner = table.getScanner(scan); // co FamilyFilterExample-2-Scan Scan over table while applying the filter. // ^^ FamilyFilterExample System.out.println("Scanning table... "); // vv FamilyFilterExample for (Result result : scanner) { System.out.println(result); } scanner.close(); Get get1 = new Get(Bytes.toBytes("row-5")); get1.setFilter(filter1); Result result1 = table.get(get1); // co FamilyFilterExample-3-Get Get a row while applying the same filter. System.out.println("Result of get(): " + result1); Filter filter2 = new FamilyFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("colfam3"))); Get get2 = new Get(Bytes.toBytes("row-5")); // co FamilyFilterExample-4-Mismatch Create a filter on one column family while trying to retrieve another. get2.addFamily(Bytes.toBytes("colfam1")); get2.setFilter(filter2); Result result2 = table.get(get2); // co FamilyFilterExample-5-Get2 Get the same row while applying the new filter, this will return "NONE". System.out.println("Result of get(): " + result2); // ^^ FamilyFilterExample } }
1.3QualifierFilter
构造函数
public QualifierFilter(CompareOp qualifierCompareOp, WritableByteArrayComparable qualifierComparator) { }
示例代码:
//通过和列名比较,返回为真的数据// vv QualifierFilterExample Filter filter = new QualifierFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("col-2"))); Scan scan = new Scan(); scan.setFilter(filter); ResultScanner scanner = table.getScanner(scan); // ^^ QualifierFilterExample System.out.println("Scanning table... "); // vv QualifierFilterExample for (Result result : scanner) { System.out.println(result); } scanner.close(); Get get = new Get(Bytes.toBytes("row-5")); get.setFilter(filter); Result result = table.get(get); System.out.println("Result of get(): " + result);
1.4 ValueFliter
public ValueFilter(CompareOp valueCompareOp, WritableByteArrayComparable valueComparator) { }
//对比列值获取返回数据 Filter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL, // co ValueFilterExample-1-Filter Create filter, while specifying the comparison operator and comparator. new SubstringComparator(".4") ); Scan scan = new Scan(); scan.setFilter(filter); // co ValueFilterExample-2-SetFilter Set filter for the scan. ResultScanner scanner = table.getScanner(scan); // ^^ ValueFilterExample System.out.println("Results of scan:"); // vv ValueFilterExample for (Result result : scanner) { for (KeyValue kv : result.raw()) { System.out.println("KV: " + kv + ", Value: " + // co ValueFilterExample-3-Print1 Print out value to check that filter works. Bytes.toString(kv.getValue())); } } scanner.close(); Get get = new Get(Bytes.toBytes("row-5")); get.setFilter(filter); // co ValueFilterExample-4-SetFilter2 Assign same filter to Get instance. Result result = table.get(get); // ^^ ValueFilterExample System.out.println("Result of get: "); // vv ValueFilterExample for (KeyValue kv : result.raw()) { System.out.println("KV: " + kv + ", Value: " + Bytes.toString(kv.getValue())); }
1.5 DependentColumnFilter
该过滤器有两个参数 —— 列族和列修饰。 尝试找到该列所在的每一行,并返回该行具有相同时间戳的全部键值对。如果某一行不包含指定的列,则该行的任何键值对都不返回。 该过滤器还可以有一个可选布尔参数 —— dropDependentColumn. 如果为true, 从属的列不返回。 该过滤器还可以有两个可选参数 —— 一个比较操作符和一个值比较器,用于列族和修饰的进一步检查。如果从属的列找到,其值还必须通过值检查,然后就是时间戳必须考虑。
示例代码
package filters; // cc DependentColumnFilterExample Example using a filter to include only specific column families import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.BinaryPrefixComparator; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.DependentColumnFilter; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.WritableByteArrayComparable; import org.apache.hadoop.hbase.util.Bytes; import util.HBaseHelper; import java.io.IOException; public class DependentColumnFilterExample { private static HTable table = null; // vv DependentColumnFilterExample private static void filter(boolean drop, CompareFilter.CompareOp operator, WritableByteArrayComparable comparator) throws IOException { Filter filter; if (comparator != null) { filter = new DependentColumnFilter(Bytes.toBytes("colfam1"), // co DependentColumnFilterExample-1-CreateFilter Create the filter with various options. Bytes.toBytes("col-5"), drop, operator, comparator); } else { filter = new DependentColumnFilter(Bytes.toBytes("colfam1"), Bytes.toBytes("col-5"), drop); } Scan scan = new Scan(); scan.setFilter(filter); ResultScanner scanner = table.getScanner(scan); // ^^ DependentColumnFilterExample System.out.println("Results of scan:"); // vv DependentColumnFilterExample for (Result result : scanner) { for (KeyValue kv : result.raw()) { System.out.println("KV: " + kv + ", Value: " + Bytes.toString(kv.getValue())); } } scanner.close(); Get get = new Get(Bytes.toBytes("row-5")); get.setFilter(filter); Result result = table.get(get); // ^^ DependentColumnFilterExample System.out.println("Result of get: "); // vv DependentColumnFilterExample for (KeyValue kv : result.raw()) { System.out.println("KV: " + kv + ", Value: " + Bytes.toString(kv.getValue())); } // ^^ DependentColumnFilterExample System.out.println(""); // vv DependentColumnFilterExample } public static void main(String[] args) throws IOException { // ^^ DependentColumnFilterExample Configuration conf = HBaseConfiguration.create(); HBaseHelper helper = HBaseHelper.getHelper(conf); helper.dropTable("testtable"); helper.createTable("testtable", "colfam1", "colfam2"); System.out.println("Adding rows to table..."); helper.fillTable("testtable", 1, 10, 10, true, "colfam1", "colfam2"); table = new HTable(conf, "testtable"); // vv DependentColumnFilterExample filter(true, CompareFilter.CompareOp.NO_OP, null); filter(false, CompareFilter.CompareOp.NO_OP, null); // co DependentColumnFilterExample-2-Filter Call filter method with various options. filter(true, CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator(Bytes.toBytes("val-5"))); filter(false, CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator(Bytes.toBytes("val-5"))); filter(true, CompareFilter.CompareOp.EQUAL, new RegexStringComparator(".*\\.5")); filter(false, CompareFilter.CompareOp.EQUAL, new RegexStringComparator(".*\\.5")); } // ^^ DependentColumnFilterExample}
2. Dedicated Filter
2.1 SingleColumValueFilter
//选定列簇和某一列,然后与列的value相比,正确的返回全部的row,注意如果某一行不含有该列,同样返回,除非通过filterIfColumnMissing 设置成真。//第一个构造函数相当于构建了一个BinaryComparator的实例。其他的跟CompareFilter的参数含义一样。 SingleColumnValueFilter(byte[] family, byte[] qualifier, CompareOp compareOp, byte[] value){} SingleColumnValueFilter(byte[] family, byte[] qualifier, CompareOp compareOp, WritableByteArrayComparable comparator){ }//第一个构造函数相当于构建了一个BinaryComparator的实例。其他的跟CompareFilter的参数含义一样。 boolean getFilterIfMissing() void setFilterIfMissing(boolean filterIfMissing) boolean getLatestVersionOnly() void setLatestVersionOnly(boolean latestVersionOnly) //如果 filterIfColumnMissing 标志设为真,如果该行没有指定的列,那么该行的所有列将不发出。缺省值为假。//如果 setLatestVersionOnly 标志设为假,将检查此前的版本。缺省值为真。实例如下:// vv SingleColumnValueFilterExample SingleColumnValueFilter filter = new SingleColumnValueFilter( Bytes.toBytes("colfam1"), Bytes.toBytes("col-5"), CompareFilter.CompareOp.NOT_EQUAL, new SubstringComparator("val-5")); filter.setFilterIfMissing(true); Scan scan = new Scan(); scan.setFilter(filter); ResultScanner scanner = table.getScanner(scan); // ^^ SingleColumnValueFilterExample System.out.println("Results of scan:"); // vv SingleColumnValueFilterExample for (Result result : scanner) { for (KeyValue kv : result.raw()) { System.out.println("KV: " + kv + ", Value: " + Bytes.toString(kv.getValue())); } } scanner.close(); Get get = new Get(Bytes.toBytes("row-6")); get.setFilter(filter); Result result = table.get(get); System.out.println("Result of get: "); for (KeyValue kv : result.raw()) { System.out.println("KV: " + kv + ", Value: " + Bytes.toString(kv.getValue())); }
2.2 SingleColumnValueExcludeFilter
与SingleColumnValueFilter相反,与条件相符的将不会返回
2.2 PrefixFilter
所有的row的实例匹配prefix的时候返回结果集合
Filter filter = new PrefixFilter(Bytes.toBytes("row1")); Scan scan = new Scan(); scan.setFilter(filter); ResultScanner scanner = table.getScanner(scan); for(Result result: scanner){ for(KeyValue kv: result.raw()) { System.out.println("KV:" + kv + ", Value:" + Bytes.toString(kv.getValue())); } } scanner.close(); Get get = new Get(Bytes.toBytes("row-5")); get.setFilter(filter); Result result = table.get(get); for(KeyValue kv : result.raw()){ System.out.println("KV:" + kv + ", Value:" + Bytes.toString(kv.getValue())); }
2.4 PageFilter
//页过滤 //通过设置pagesize可以设置返回每一页的page大小 //客户端需要记录上一次返回的row的Key值 package hbaseTest; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.PageFilter; import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException; /** * Hello world! */ public class PageFilterExample { public static void main(String[] args) throws IOException { Configuration config = HBaseConfiguration.create(); config.set("hbase.zookeeper.quorum", "QT-H-0038"); String tableName = "testTable"; String cfName = "colfam1"; final byte[] POSTFIX = new byte[] { 0x00 }; HTable table = new HTable(config, tableName); Filter filter = new PageFilter(15); byte[] lastRow = null; int totalRows = 0; while (true) { Scan scan = new Scan(); scan.setFilter(filter); if(lastRow != null){ //注意这里添加了POSTFIX操作,不然死循环了 byte[] startRow = Bytes.add(lastRow,POSTFIX); scan.setStartRow(startRow); } ResultScanner scanner = table.getScanner(scan); int localRows = 0; Result result; while((result = scanner.next()) != null){ System.out.println(localRows++ + ":" + result); totalRows ++; lastRow = result.getRow(); } scanner.close(); if(localRows == 0) break; } System.out.println("total rows:" + totalRows); } } //因为hbase的row是字典序列排列的,因此上一次的lastrow需要添加额外的0(0x00)表示新的开始。另外startKey的那一行是包含在scan里面的。 // final byte[] POSTFIX = new byte[] { 0x00 };
2.5 KeyOnlyFilter
因为一些应用只想获取data数据,而不是真实的val,可以使用这个过滤器。该过滤器通过
KeyOnlyFilter(boolean lenAsVal) //lenAsVal默认为假,表示不把val的长度作为val。否则 val的长度将作为val输出。 final byte[] POSTFIX = new byte[] { 0x00 }; HTable table = new HTable(config, tableName); Filter filter = new KeyOnlyFilter(false); byte[] lastRow = null; int totalRows = 0; Scan scan = new Scan(); scan.setFilter(filter); ResultScanner scanner = table.getScanner(scan); for(Result result: scanner){ for(KeyValue kv: result.raw()){ System.out.println(kv + ":" + Bytes.toString(kv.getValue())); } }
2.6 FirstKeyOnlyFilter
//在对hbase的表进行扫描的时候,如果指定了FirstKeyOnlyFilter过滤条件则仅仅会返回相同key的第一条kv。//当对hbase中的表进行count,sum操作等集合操作的时候,使用FirstKeyOnlyFilter会带来性能上的提升。public class KeyOnlyFilterExample { public static void main(String[] args) throws IOException { Configuration config = HBaseConfiguration.create(); config.set("hbase.zookeeper.quorum", "QT-H-0038"); String tableName = "testTable"; String cfName = "colfam1"; final byte[] POSTFIX = new byte[] { 0x00 }; HTable table = new HTable(config, tableName); Filter filter = new FirstKeyOnlyFilter(); byte[] lastRow = null; int totalRows = 0; Scan scan = new Scan(); scan.setFilter(filter); ResultScanner scanner = table.getScanner(scan); for(Result result: scanner){ for(KeyValue kv: result.raw()){ System.out.println(kv + ":" + Bytes.toString(kv.getValue())); } } } }
返回的结果是 row-5/colfam1:qual1/1354673733503/Put/vlen=4:row1 row1/colfam1:qual1/1354432930568/Put/vlen=4:val1 row2/colfam1:qual2/1354432930568/Put/vlen=4:val3 如果注释掉过滤器的返回的结果是: row-5/colfam1:qual1/1354673733503/Put/vlen=4:row1 row1/colfam1:qual1/1354432930568/Put/vlen=4:val1 row1/colfam1:qual2/1354435819120/Put/vlen=4:val2 row2/colfam1:qual2/1354432930568/Put/vlen=4:val3
2.7 InclusiveStopFilter
//因为hbase的scan包含start-row不包含stop-row 如果使用这个过滤器我们可以包含stop-row HTable table = new HTable(config, tableName); Filter filter = new InclusiveStopFilter(Bytes.toBytes("row1")); Scan scan = new Scan(); scan.setFilter(filter); scan.setStartRow(Bytes.toBytes("row-5")); ResultScanner scanner = table.getScanner(scan); for(Result result: scanner){ System.out.println(result); } //会看到row1包含在结果中了。
2.8 TimestampsFilter
当访问某个Timestamp的新闻的时候,我们需要如下的代码: TimestampsFilter(List<Long> timestamps) 接受的参数的list参数,该Filter也可以和scan.setTimeRange混合使用。例如:// vv TimestampFilterExample List<Long> ts = new ArrayList<Long>(); ts.add(new Long(5)); ts.add(new Long(10)); // co TimestampFilterExample-1-AddTS Add timestamps to the list. ts.add(new Long(15)); Filter filter = new TimestampsFilter(ts); Scan scan1 = new Scan(); scan1.setFilter(filter); // co TimestampFilterExample-2-AddFilter Add the filter to an otherwise default Scan instance. ResultScanner scanner1 = table.getScanner(scan1); // ^^ TimestampFilterExample System.out.println("Results of scan #1:"); // vv TimestampFilterExample for (Result result : scanner1) { System.out.println(result); } scanner1.close(); Scan scan2 = new Scan(); scan2.setFilter(filter); scan2.setTimeRange(8, 12); // co TimestampFilterExample-3-AddTSRange Also add a time range to verify how it affects the filter ResultScanner scanner2 = table.getScanner(scan2); // ^^ TimestampFilterExample System.out.println("Results of scan #2:"); // vv TimestampFilterExample for (Result result : scanner2) { System.out.println(result); } scanner2.close();
2.9 ColumnCountGetFilter
//在scan时是无用的
2.10 ColumnPaginationFilter(下来用到的时候在仔细研究下)
/** * A filter, based on the ColumnCountGetFilter, takes two arguments: limit and offset. * This filter can be used for row-based indexing, where references to other tables are stored across many columns, * in order to efficient lookups and paginated results for end users. */Filter filter = new ColumnPaginationFilter(5, 15); Scan scan = new Scan(); scan.setFilter(filter); ResultScanner scanner = table.getScanner(scan); // ^^ ColumnPaginationFilterExample System.out.println("Results of scan:"); // vv ColumnPaginationFilterExample for (Result result : scanner) { System.out.println(result); } scanner.close();
2.11 ColumnPrefixFilter
// 跟prefxiFilter相似,只是改成了Column,实例如下:// vv ColumnPaginationFilterExample Filter filter = new ColumnPrefixFilter(Bytes.toBytes("qual2")); Scan scan = new Scan(); scan.setFilter(filter); ResultScanner scanner = table.getScanner(scan); // ^^ ColumnPaginationFilterExample System.out.println("Results of scan:"); // vv ColumnPaginationFilterExample for (Result result : scanner) { System.out.println(result); } scanner.close(); // 值scan到与列值与前面匹配的数据。例如qual2匹配qual21。
2.12 RandomRowFilter
// 随即的返回row的数据,构造函数为RandomRowFilter(float chance) // chance取值为0到1.0,如果<0则为空,如果>1则包含所有的行。
3. Decorating Filters
3.1 SkipFilter
//这个过滤器只作用到keyValueFilter上。KeyValueFilter会返回所有满足条件的row及对应的列。而加上SkipFilter以后。会发现如果某一行的某一列不符合条件,则这一行全部不返回了。public static void main(String[] args) throws IOException { Configuration conf = HBaseConfiguration.create(); HBaseHelper helper = HBaseHelper.getHelper(conf); helper.dropTable("testtable"); helper.createTable("testtable", "colfam1"); System.out.println("Adding rows to table..."); helper.fillTable("testtable", 1, 30, 5, 2, true, true, "colfam1"); HTable table = new HTable(conf, "testtable"); // vv SkipFilterExample Filter filter1 = new ValueFilter(CompareFilter.CompareOp.NOT_EQUAL, new BinaryComparator(Bytes.toBytes("val-0"))); Scan scan = new Scan(); scan.setFilter(filter1); // co SkipFilterExample-1-AddFilter1 Only add the ValueFilter to the first scan. ResultScanner scanner1 = table.getScanner(scan); // ^^ SkipFilterExample System.out.println("Results of scan #1:"); int n = 0; // vv SkipFilterExample for (Result result : scanner1) { for (KeyValue kv : result.raw()) { System.out.println("KV: " + kv + ", Value: " + Bytes.toString(kv.getValue())); // ^^ SkipFilterExample n++; // vv SkipFilterExample } } scanner1.close(); Filter filter2 = new SkipFilter(filter1); scan.setFilter(filter2); // co SkipFilterExample-2-AddFilter2 Add the decorating skip filter for the second scan. ResultScanner scanner2 = table.getScanner(scan); // ^^ SkipFilterExample System.out.println("Total KeyValue count for scan #1: " + n); n = 0; System.out.println("Results of scan #2:"); // vv SkipFilterExample for (Result result : scanner2) { for (KeyValue kv : result.raw()) { System.out.println("KV: " + kv + ", Value: " + Bytes.toString(kv.getValue())); // ^^ SkipFilterExample n++; // vv SkipFilterExample } } scanner2.close(); // ^^ SkipFilterExample System.out.println("Total KeyValue count for scan #2: " + n); }
3.2 WhileMatchFilters
相当于while执行,知道不match就break了返回了。