Можно удалить именно строку
in001 : group : name -> employee
с помощью: compact
и настраиваемый фильтр, который точно исключает это значение от уплотнения. (Не проверено, но должно работать.) Используйте:
IteratorSetting config = new IteratorSetting(10, "excludeTermFilter", ExcludeTermFilter.class);
config.setTermToExclude("group","name","employee");
List<IteratorSetting> filterList = new ArrayList<IteratorSetting>();
filterList.add(config);
connector.tableOperations().compact(tableName, startRow, endRow, filterList, true, false);
с соответствующими значениями и этим настраиваемым фильтром (на основе GrepIterator):
public class ExcludeTermFilter extends Filter {
private byte termToExclude[];
private byte columnFamily[];
private byte columnQualifier[];
@Override
public boolean accept(Key k, Value v) {
return !(match(v.get(),termToExclude) &&
match(k.getColumnFamilyData(),columnFamily) &&
match(k.getColumnQualifierData(),columnQualifier)
);
}
private boolean match(ByteSequence bs, byte[] term) {
return indexOf(bs.getBackingArray(), bs.offset(), bs.length(), term) >= 0;
}
private boolean match(byte[] ba, byte[] term) {
return indexOf(ba, 0, ba.length, term) >= 0;
}
// copied code below from java string and modified
private static int indexOf(byte[] source, int sourceOffset, int sourceCount, byte[] target) {
byte first = target[0];
int targetCount = target.length;
int max = sourceOffset + (sourceCount - targetCount);
for (int i = sourceOffset; i <= max; i++) {
/* Look for first character. */
if (source[i] != first) {
while (++i <= max && source[i] != first)
continue;
}
/* Found first character, now look at the rest of v2 */
if (i <= max) {
int j = i + 1;
int end = j + targetCount - 1;
for (int k = 1; j < end && source[j] == target[k]; j++, k++)
continue;
if (j == end) {
/* Found whole string. */
return i - sourceOffset;
}
}
}
return -1;
}
@Override
public SortedKeyValueIterator<Key,Value> deepCopy(IteratorEnvironment env) {
GrepIterator copy = (GrepIterator) super.deepCopy(env);
copy.termToExclude = Arrays.copyOf(termToExclude, termToExcludelength);
copy.columnFamily = Arrays.copyOf(columnFamily, termToExcludelength);
copy.columnQualifier = Arrays.copyOf(columnQualifier, termToExcludelength);
return copy;
}
@Override
public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException {
super.init(source, options, env);
termToExclude = options.get("etf.term").getBytes(UTF_8);
columnFamily = options.get("etf.family").getBytes(UTF_8);
columnQualifier = options.get("etf.qualifier").getBytes(UTF_8);
}
/**
* Encode the family, qualifier and termToExclude as an option for a ScanIterator
*/
public static void setTermToExclude(IteratorSetting cfg, String family, String qualifier, String termToExclude) {
cfg.addOption("etf.family", family);
cfg.addOption("etf.qualifier", qualifier);
cfg.addOption("etf.term", termToExclude);
}
}
person
Martin Grimmer
schedule
19.08.2015