001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import java.io.IOException; 021import java.text.ParseException; 022import java.text.SimpleDateFormat; 023import java.util.Map; 024import java.util.TreeMap; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.conf.Configured; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.Cell; 029import org.apache.hadoop.hbase.CellUtil; 030import org.apache.hadoop.hbase.HBaseConfiguration; 031import org.apache.hadoop.hbase.KeyValue; 032import org.apache.hadoop.hbase.KeyValueUtil; 033import org.apache.hadoop.hbase.PrivateCellUtil; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.client.Connection; 036import org.apache.hadoop.hbase.client.ConnectionFactory; 037import org.apache.hadoop.hbase.client.Delete; 038import org.apache.hadoop.hbase.client.Mutation; 039import org.apache.hadoop.hbase.client.Put; 040import org.apache.hadoop.hbase.client.RegionLocator; 041import org.apache.hadoop.hbase.client.Table; 042import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 043import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec; 044import org.apache.hadoop.hbase.util.Bytes; 045import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 046import org.apache.hadoop.hbase.util.MapReduceExtendedCell; 047import org.apache.hadoop.hbase.wal.WALEdit; 048import org.apache.hadoop.hbase.wal.WALKey; 049import org.apache.hadoop.mapreduce.Job; 050import org.apache.hadoop.mapreduce.Mapper; 051import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 052import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 053import org.apache.hadoop.util.Tool; 054import org.apache.hadoop.util.ToolRunner; 055import org.apache.yetus.audience.InterfaceAudience; 056import org.slf4j.Logger; 057import org.slf4j.LoggerFactory; 058 059/** 060 * A tool to replay WAL files as a M/R job. The WAL can be replayed for a set of tables or all 061 * tables, and a time range can be provided (in milliseconds). The WAL is filtered to the passed set 062 * of tables and the output can optionally be mapped to another set of tables. WAL replay can also 063 * generate HFiles for later bulk importing, in that case the WAL is replayed for a single table 064 * only. 065 */ 066@InterfaceAudience.Public 067public class WALPlayer extends Configured implements Tool { 068 private static final Logger LOG = LoggerFactory.getLogger(WALPlayer.class); 069 final static String NAME = "WALPlayer"; 070 public final static String BULK_OUTPUT_CONF_KEY = "wal.bulk.output"; 071 public final static String TABLES_KEY = "wal.input.tables"; 072 public final static String TABLE_MAP_KEY = "wal.input.tablesmap"; 073 public final static String INPUT_FILES_SEPARATOR_KEY = "wal.input.separator"; 074 public final static String IGNORE_MISSING_FILES = "wal.input.ignore.missing.files"; 075 076 private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name"; 077 078 public WALPlayer() { 079 } 080 081 protected WALPlayer(final Configuration c) { 082 super(c); 083 } 084 085 /** 086 * A mapper that just writes out KeyValues. This one can be used together with 087 * {@link KeyValueSortReducer} 088 * @deprecated Use {@link WALCellMapper}. Will be removed from 3.0 onwards 089 */ 090 @Deprecated 091 static class WALKeyValueMapper extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue> { 092 private byte[] table; 093 094 @Override 095 public void map(WALKey key, WALEdit value, Context context) throws IOException { 096 try { 097 // skip all other tables 098 if (Bytes.equals(table, key.getTableName().getName())) { 099 for (Cell cell : value.getCells()) { 100 KeyValue kv = KeyValueUtil.ensureKeyValue(cell); 101 if (WALEdit.isMetaEditFamily(kv)) { 102 continue; 103 } 104 context.write(new ImmutableBytesWritable(CellUtil.cloneRow(kv)), kv); 105 } 106 } 107 } catch (InterruptedException e) { 108 e.printStackTrace(); 109 } 110 } 111 112 @Override 113 public void setup(Context context) throws IOException { 114 // only a single table is supported when HFiles are generated with HFileOutputFormat 115 String[] tables = context.getConfiguration().getStrings(TABLES_KEY); 116 if (tables == null || tables.length != 1) { 117 // this can only happen when WALMapper is used directly by a class other than WALPlayer 118 throw new IOException("Exactly one table must be specified for bulk HFile case."); 119 } 120 table = Bytes.toBytes(tables[0]); 121 122 } 123 124 } 125 126 /** 127 * A mapper that just writes out Cells. This one can be used together with {@link CellSortReducer} 128 */ 129 static class WALCellMapper extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, Cell> { 130 private byte[] table; 131 132 @Override 133 public void map(WALKey key, WALEdit value, Context context) throws IOException { 134 try { 135 // skip all other tables 136 if (Bytes.equals(table, key.getTableName().getName())) { 137 for (Cell cell : value.getCells()) { 138 if (WALEdit.isMetaEditFamily(cell)) { 139 continue; 140 } 141 142 // Set sequenceId from WALKey, since it is not included by WALCellCodec. The sequenceId 143 // on WALKey is the same value that was on the cells in the WALEdit. This enables 144 // CellSortReducer to use sequenceId to disambiguate duplicate cell timestamps. 145 // See HBASE-27649 146 PrivateCellUtil.setSequenceId(cell, key.getSequenceId()); 147 148 context.write(new ImmutableBytesWritable(CellUtil.cloneRow(cell)), 149 new MapReduceExtendedCell(cell)); 150 } 151 } 152 } catch (InterruptedException e) { 153 e.printStackTrace(); 154 } 155 } 156 157 @Override 158 public void setup(Context context) throws IOException { 159 // only a single table is supported when HFiles are generated with HFileOutputFormat 160 String[] tables = context.getConfiguration().getStrings(TABLES_KEY); 161 if (tables == null || tables.length != 1) { 162 // this can only happen when WALMapper is used directly by a class other than WALPlayer 163 throw new IOException("Exactly one table must be specified for bulk HFile case."); 164 } 165 table = Bytes.toBytes(tables[0]); 166 167 } 168 169 } 170 171 /** 172 * Enum for map metrics. Keep it out here rather than inside in the Map inner-class so we can find 173 * associated properties. 174 */ 175 protected static enum Counter { 176 /** Number of aggregated writes */ 177 PUTS, 178 /** Number of aggregated deletes */ 179 DELETES, 180 CELLS_READ, 181 CELLS_WRITTEN, 182 WALEDITS 183 } 184 185 /** 186 * A mapper that writes out {@link Mutation} to be directly applied to a running HBase instance. 187 */ 188 protected static class WALMapper 189 extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation> { 190 private Map<TableName, TableName> tables = new TreeMap<>(); 191 192 @Override 193 public void map(WALKey key, WALEdit value, Context context) throws IOException { 194 context.getCounter(Counter.WALEDITS).increment(1); 195 try { 196 if (tables.isEmpty() || tables.containsKey(key.getTableName())) { 197 TableName targetTable = 198 tables.isEmpty() ? key.getTableName() : tables.get(key.getTableName()); 199 ImmutableBytesWritable tableOut = new ImmutableBytesWritable(targetTable.getName()); 200 Put put = null; 201 Delete del = null; 202 Cell lastCell = null; 203 for (Cell cell : value.getCells()) { 204 context.getCounter(Counter.CELLS_READ).increment(1); 205 // Filtering WAL meta marker entries. 206 if (WALEdit.isMetaEditFamily(cell)) { 207 continue; 208 } 209 // Allow a subclass filter out this cell. 210 if (filter(context, cell)) { 211 // A WALEdit may contain multiple operations (HBASE-3584) and/or 212 // multiple rows (HBASE-5229). 213 // Aggregate as much as possible into a single Put/Delete 214 // operation before writing to the context. 215 if ( 216 lastCell == null || lastCell.getTypeByte() != cell.getTypeByte() 217 || !CellUtil.matchingRows(lastCell, cell) 218 ) { 219 // row or type changed, write out aggregate KVs. 220 if (put != null) { 221 context.write(tableOut, put); 222 context.getCounter(Counter.PUTS).increment(1); 223 } 224 if (del != null) { 225 context.write(tableOut, del); 226 context.getCounter(Counter.DELETES).increment(1); 227 } 228 if (CellUtil.isDelete(cell)) { 229 del = new Delete(CellUtil.cloneRow(cell)); 230 } else { 231 put = new Put(CellUtil.cloneRow(cell)); 232 } 233 } 234 if (CellUtil.isDelete(cell)) { 235 del.add(cell); 236 } else { 237 put.add(cell); 238 } 239 context.getCounter(Counter.CELLS_WRITTEN).increment(1); 240 } 241 lastCell = cell; 242 } 243 // write residual KVs 244 if (put != null) { 245 context.write(tableOut, put); 246 context.getCounter(Counter.PUTS).increment(1); 247 } 248 if (del != null) { 249 context.getCounter(Counter.DELETES).increment(1); 250 context.write(tableOut, del); 251 } 252 } 253 } catch (InterruptedException e) { 254 e.printStackTrace(); 255 } 256 } 257 258 protected boolean filter(Context context, final Cell cell) { 259 return true; 260 } 261 262 @Override 263 protected void 264 cleanup(Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation>.Context context) 265 throws IOException, InterruptedException { 266 super.cleanup(context); 267 } 268 269 @SuppressWarnings("checkstyle:EmptyBlock") 270 @Override 271 public void setup(Context context) throws IOException { 272 String[] tableMap = context.getConfiguration().getStrings(TABLE_MAP_KEY); 273 String[] tablesToUse = context.getConfiguration().getStrings(TABLES_KEY); 274 if (tableMap == null) { 275 tableMap = tablesToUse; 276 } 277 if (tablesToUse == null) { 278 // Then user wants all tables. 279 } else if (tablesToUse.length != tableMap.length) { 280 // this can only happen when WALMapper is used directly by a class other than WALPlayer 281 throw new IOException("Incorrect table mapping specified ."); 282 } 283 int i = 0; 284 if (tablesToUse != null) { 285 for (String table : tablesToUse) { 286 tables.put(TableName.valueOf(table), TableName.valueOf(tableMap[i++])); 287 } 288 } 289 } 290 } 291 292 void setupTime(Configuration conf, String option) throws IOException { 293 String val = conf.get(option); 294 if (null == val) { 295 return; 296 } 297 long ms; 298 try { 299 // first try to parse in user friendly form 300 ms = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SS").parse(val).getTime(); 301 } catch (ParseException pe) { 302 try { 303 // then see if just a number of ms's was specified 304 ms = Long.parseLong(val); 305 } catch (NumberFormatException nfe) { 306 throw new IOException( 307 option + " must be specified either in the form 2001-02-20T16:35:06.99 " 308 + "or as number of milliseconds"); 309 } 310 } 311 conf.setLong(option, ms); 312 } 313 314 /** 315 * Sets up the actual job. 316 * @param args The command line parameters. 317 * @return The newly created job. 318 * @throws IOException When setting up the job fails. 319 */ 320 public Job createSubmittableJob(String[] args) throws IOException { 321 Configuration conf = getConf(); 322 setupTime(conf, WALInputFormat.START_TIME_KEY); 323 setupTime(conf, WALInputFormat.END_TIME_KEY); 324 String inputDirs = args[0]; 325 String[] tables = args.length == 1 ? new String[] {} : args[1].split(","); 326 String[] tableMap; 327 if (args.length > 2) { 328 tableMap = args[2].split(","); 329 if (tableMap.length != tables.length) { 330 throw new IOException("The same number of tables and mapping must be provided."); 331 } 332 } else { 333 // if no mapping is specified, map each table to itself 334 tableMap = tables; 335 } 336 conf.setStrings(TABLES_KEY, tables); 337 conf.setStrings(TABLE_MAP_KEY, tableMap); 338 conf.set(FileInputFormat.INPUT_DIR, inputDirs); 339 Job job = Job.getInstance(conf, 340 conf.get(JOB_NAME_CONF_KEY, NAME + "_" + EnvironmentEdgeManager.currentTime())); 341 job.setJarByClass(WALPlayer.class); 342 343 job.setInputFormatClass(WALInputFormat.class); 344 job.setMapOutputKeyClass(ImmutableBytesWritable.class); 345 346 String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); 347 if (hfileOutPath != null) { 348 LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs); 349 350 // the bulk HFile case 351 if (tables.length != 1) { 352 throw new IOException("Exactly one table must be specified for the bulk export option"); 353 } 354 TableName tableName = TableName.valueOf(tables[0]); 355 356 // WALPlayer needs ExtendedCellSerialization so that sequenceId can be propagated when 357 // sorting cells in CellSortReducer 358 job.getConfiguration().setBoolean(HFileOutputFormat2.EXTENDED_CELL_SERIALIZATION_ENABLED_KEY, 359 true); 360 361 job.setMapperClass(WALCellMapper.class); 362 job.setReducerClass(CellSortReducer.class); 363 Path outputDir = new Path(hfileOutPath); 364 FileOutputFormat.setOutputPath(job, outputDir); 365 job.setMapOutputValueClass(MapReduceExtendedCell.class); 366 try (Connection conn = ConnectionFactory.createConnection(conf); 367 Table table = conn.getTable(tableName); 368 RegionLocator regionLocator = conn.getRegionLocator(tableName)) { 369 HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator); 370 } 371 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), 372 org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class); 373 } else { 374 // output to live cluster 375 job.setMapperClass(WALMapper.class); 376 job.setOutputFormatClass(MultiTableOutputFormat.class); 377 TableMapReduceUtil.addDependencyJars(job); 378 TableMapReduceUtil.initCredentials(job); 379 // No reducers. 380 job.setNumReduceTasks(0); 381 } 382 String codecCls = WALCellCodec.getWALCellCodecClass(conf).getName(); 383 try { 384 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), 385 Class.forName(codecCls)); 386 } catch (Exception e) { 387 throw new IOException("Cannot determine wal codec class " + codecCls, e); 388 } 389 return job; 390 } 391 392 /** 393 * Print usage 394 * @param errorMsg Error message. Can be null. 395 */ 396 private void usage(final String errorMsg) { 397 if (errorMsg != null && errorMsg.length() > 0) { 398 System.err.println("ERROR: " + errorMsg); 399 } 400 System.err.println("Usage: " + NAME + " [options] <WAL inputdir> [<tables> <tableMappings>]"); 401 System.err.println(" <WAL inputdir> directory of WALs to replay."); 402 System.err.println(" <tables> comma separated list of tables. If no tables specified,"); 403 System.err.println(" all are imported (even hbase:meta if present)."); 404 System.err.println( 405 " <tableMappings> WAL entries can be mapped to a new set of tables by " + "passing"); 406 System.err 407 .println(" <tableMappings>, a comma separated list of target " + "tables."); 408 System.err 409 .println(" If specified, each table in <tables> must have a " + "mapping."); 410 System.err.println("To generate HFiles to bulk load instead of loading HBase directly, pass:"); 411 System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output"); 412 System.err.println(" Only one table can be specified, and no mapping allowed!"); 413 System.err.println("To specify a time range, pass:"); 414 System.err.println(" -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]"); 415 System.err.println(" -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]"); 416 System.err.println(" The start and the end date of timerange (inclusive). The dates can be"); 417 System.err 418 .println(" expressed in milliseconds-since-epoch or yyyy-MM-dd'T'HH:mm:ss.SS " + "format."); 419 System.err.println(" E.g. 1234567890120 or 2009-02-13T23:32:30.12"); 420 System.err.println("Other options:"); 421 System.err.println(" -D" + JOB_NAME_CONF_KEY + "=jobName"); 422 System.err.println(" Use the specified mapreduce job name for the wal player"); 423 System.err.println(" -Dwal.input.separator=' '"); 424 System.err.println(" Change WAL filename separator (WAL dir names use default ','.)"); 425 System.err.println("For performance also consider the following options:\n" 426 + " -Dmapreduce.map.speculative=false\n" + " -Dmapreduce.reduce.speculative=false"); 427 } 428 429 /** 430 * Main entry point. 431 * @param args The command line parameters. 432 * @throws Exception When running the job fails. 433 */ 434 public static void main(String[] args) throws Exception { 435 int ret = ToolRunner.run(new WALPlayer(HBaseConfiguration.create()), args); 436 System.exit(ret); 437 } 438 439 @Override 440 public int run(String[] args) throws Exception { 441 if (args.length < 1) { 442 usage("Wrong number of arguments: " + args.length); 443 System.exit(-1); 444 } 445 Job job = createSubmittableJob(args); 446 return job.waitForCompletion(true) ? 0 : 1; 447 } 448}