001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import java.io.IOException; 021import java.lang.reflect.Method; 022import java.util.Map; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.hbase.DoNotRetryIOException; 025import org.apache.hadoop.hbase.client.Result; 026import org.apache.hadoop.hbase.client.ResultScanner; 027import org.apache.hadoop.hbase.client.Scan; 028import org.apache.hadoop.hbase.client.Table; 029import org.apache.hadoop.hbase.client.metrics.ScanMetrics; 030import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 031import org.apache.hadoop.hbase.util.Bytes; 032import org.apache.hadoop.mapreduce.Counter; 033import org.apache.hadoop.mapreduce.InputSplit; 034import org.apache.hadoop.mapreduce.TaskAttemptContext; 035import org.apache.hadoop.util.StringUtils; 036import org.apache.yetus.audience.InterfaceAudience; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039 040/** 041 * Iterate over an HBase table data, return (ImmutableBytesWritable, Result) pairs. 042 */ 043@InterfaceAudience.Public 044public class TableRecordReaderImpl { 045 public static final String LOG_PER_ROW_COUNT = "hbase.mapreduce.log.scanner.rowcount"; 046 047 private static final Logger LOG = LoggerFactory.getLogger(TableRecordReaderImpl.class); 048 049 // HBASE_COUNTER_GROUP_NAME is the name of mapreduce counter group for HBase 050 @InterfaceAudience.Private 051 static final String HBASE_COUNTER_GROUP_NAME = "HBaseCounters"; 052 053 private ResultScanner scanner = null; 054 private Scan scan = null; 055 private Scan currentScan = null; 056 private Table htable = null; 057 private byte[] lastSuccessfulRow = null; 058 private ImmutableBytesWritable key = null; 059 private Result value = null; 060 private TaskAttemptContext context = null; 061 private long numRestarts = 0; 062 private long numStale = 0; 063 private long timestamp; 064 private int rowcount; 065 private boolean logScannerActivity = false; 066 private int logPerRowCount = 100; 067 068 /** 069 * Restart from survivable exceptions by creating a new scanner. 070 * @param firstRow The first row to start at. 071 * @throws IOException When restarting fails. 072 */ 073 public void restart(byte[] firstRow) throws IOException { 074 // Update counter metrics based on current scan before reinitializing it 075 if (currentScan != null) { 076 updateCounters(); 077 } 078 currentScan = new Scan(scan); 079 currentScan.withStartRow(firstRow); 080 currentScan.setScanMetricsEnabled(true); 081 if (this.scanner != null) { 082 if (logScannerActivity) { 083 LOG.info("Closing the previously opened scanner object."); 084 } 085 this.scanner.close(); 086 } 087 this.scanner = this.htable.getScanner(currentScan); 088 if (logScannerActivity) { 089 LOG.info("Current scan=" + currentScan.toString()); 090 timestamp = System.currentTimeMillis(); 091 rowcount = 0; 092 } 093 } 094 095 /** 096 * In new mapreduce APIs, TaskAttemptContext has two getCounter methods Check if 097 * getCounter(String, String) method is available. 098 * @return The getCounter method or null if not available. 099 * @deprecated since 2.4.0 and 2.3.2, will be removed in 4.0.0 100 */ 101 @Deprecated 102 protected static Method retrieveGetCounterWithStringsParams(TaskAttemptContext context) 103 throws IOException { 104 Method m = null; 105 try { 106 m = context.getClass().getMethod("getCounter", new Class[] { String.class, String.class }); 107 } catch (SecurityException e) { 108 throw new IOException("Failed test for getCounter", e); 109 } catch (NoSuchMethodException e) { 110 // Ignore 111 } 112 return m; 113 } 114 115 /** 116 * Sets the HBase table. 117 * @param htable The {@link org.apache.hadoop.hbase.HTableDescriptor} to scan. 118 */ 119 public void setHTable(Table htable) { 120 Configuration conf = htable.getConfiguration(); 121 logScannerActivity = conf.getBoolean( 122 "hbase.client.log.scanner.activity" /* ScannerCallable.LOG_SCANNER_ACTIVITY */, false); 123 logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100); 124 this.htable = htable; 125 } 126 127 /** 128 * Sets the scan defining the actual details like columns etc. 129 * @param scan The scan to set. 130 */ 131 public void setScan(Scan scan) { 132 this.scan = scan; 133 } 134 135 /** 136 * Build the scanner. Not done in constructor to allow for extension. 137 */ 138 public void initialize(InputSplit inputsplit, TaskAttemptContext context) 139 throws IOException, InterruptedException { 140 if (context != null) { 141 this.context = context; 142 } 143 restart(scan.getStartRow()); 144 } 145 146 /** 147 * Closes the split. 148 */ 149 public void close() { 150 if (this.scanner != null) { 151 this.scanner.close(); 152 } 153 try { 154 this.htable.close(); 155 } catch (IOException ioe) { 156 LOG.warn("Error closing table", ioe); 157 } 158 } 159 160 /** 161 * Returns the current key. 162 * @return The current key. 163 * @throws InterruptedException When the job is aborted. 164 */ 165 public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException { 166 return key; 167 } 168 169 /** 170 * Returns the current value. 171 * @return The current value. 172 * @throws IOException When the value is faulty. 173 * @throws InterruptedException When the job is aborted. 174 */ 175 public Result getCurrentValue() throws IOException, InterruptedException { 176 return value; 177 } 178 179 /** 180 * Positions the record reader to the next record. 181 * @return <code>true</code> if there was another record. 182 * @throws IOException When reading the record failed. 183 * @throws InterruptedException When the job was aborted. 184 */ 185 public boolean nextKeyValue() throws IOException, InterruptedException { 186 if (key == null) { 187 key = new ImmutableBytesWritable(); 188 } 189 if (value == null) { 190 value = new Result(); 191 } 192 try { 193 try { 194 value = this.scanner.next(); 195 if (value != null && value.isStale()) { 196 numStale++; 197 } 198 if (logScannerActivity) { 199 rowcount++; 200 if (rowcount >= logPerRowCount) { 201 long now = System.currentTimeMillis(); 202 LOG.info("Mapper took {}ms to process {} rows", (now - timestamp), rowcount); 203 timestamp = now; 204 rowcount = 0; 205 } 206 } 207 } catch (IOException e) { 208 // do not retry if the exception tells us not to do so 209 if (e instanceof DoNotRetryIOException) { 210 updateCounters(); 211 throw e; 212 } 213 // try to handle all other IOExceptions by restarting 214 // the scanner, if the second call fails, it will be rethrown 215 LOG.info("recovered from " + StringUtils.stringifyException(e)); 216 if (lastSuccessfulRow == null) { 217 LOG.warn("We are restarting the first next() invocation," 218 + " if your mapper has restarted a few other times like this" 219 + " then you should consider killing this job and investigate" 220 + " why it's taking so long."); 221 } 222 if (lastSuccessfulRow == null) { 223 restart(scan.getStartRow()); 224 } else { 225 restart(lastSuccessfulRow); 226 scanner.next(); // skip presumed already mapped row 227 } 228 value = scanner.next(); 229 if (value != null && value.isStale()) { 230 numStale++; 231 } 232 numRestarts++; 233 } 234 235 if (value != null && value.size() > 0) { 236 key.set(value.getRow()); 237 lastSuccessfulRow = key.get(); 238 return true; 239 } 240 241 // Need handle cursor result 242 if (value != null && value.isCursor()) { 243 key.set(value.getCursor().getRow()); 244 lastSuccessfulRow = key.get(); 245 return true; 246 } 247 248 updateCounters(); 249 return false; 250 } catch (IOException ioe) { 251 updateCounters(); 252 if (logScannerActivity) { 253 long now = System.currentTimeMillis(); 254 LOG.info("Mapper took {}ms to process {} rows", (now - timestamp), rowcount); 255 LOG.info(ioe.toString(), ioe); 256 String lastRow = 257 lastSuccessfulRow == null ? "null" : Bytes.toStringBinary(lastSuccessfulRow); 258 LOG.info("lastSuccessfulRow=" + lastRow); 259 } 260 throw ioe; 261 } 262 } 263 264 /** 265 * If hbase runs on new version of mapreduce, RecordReader has access to counters thus can update 266 * counters based on scanMetrics. If hbase runs on old version of mapreduce, it won't be able to 267 * get access to counters and TableRecorderReader can't update counter values. 268 */ 269 private void updateCounters() { 270 ScanMetrics scanMetrics = scanner.getScanMetrics(); 271 if (scanMetrics == null) { 272 return; 273 } 274 275 updateCounters(scanMetrics, numRestarts, context, numStale); 276 } 277 278 /** 279 * @deprecated since 2.4.0 and 2.3.2, will be removed in 4.0.0 Use 280 * {@link #updateCounters(ScanMetrics, long, TaskAttemptContext, long)} instead. 281 */ 282 @Deprecated 283 protected static void updateCounters(ScanMetrics scanMetrics, long numScannerRestarts, 284 Method getCounter, TaskAttemptContext context, long numStale) { 285 updateCounters(scanMetrics, numScannerRestarts, context, numStale); 286 } 287 288 protected static void updateCounters(ScanMetrics scanMetrics, long numScannerRestarts, 289 TaskAttemptContext context, long numStale) { 290 // we can get access to counters only if hbase uses new mapreduce APIs 291 if (context == null) { 292 return; 293 } 294 295 for (Map.Entry<String, Long> entry : scanMetrics.getMetricsMap().entrySet()) { 296 Counter counter = context.getCounter(HBASE_COUNTER_GROUP_NAME, entry.getKey()); 297 if (counter != null) { 298 counter.increment(entry.getValue()); 299 } 300 } 301 if (numScannerRestarts != 0L) { 302 Counter counter = context.getCounter(HBASE_COUNTER_GROUP_NAME, "NUM_SCANNER_RESTARTS"); 303 if (counter != null) { 304 counter.increment(numScannerRestarts); 305 } 306 } 307 if (numStale != 0L) { 308 Counter counter = context.getCounter(HBASE_COUNTER_GROUP_NAME, "NUM_SCAN_RESULTS_STALE"); 309 if (counter != null) { 310 counter.increment(numStale); 311 } 312 } 313 } 314 315 /** 316 * The current progress of the record reader through its data. 317 * @return A number between 0.0 and 1.0, the fraction of the data read. 318 */ 319 public float getProgress() { 320 // Depends on the total number of tuples 321 return 0; 322 } 323 324}