“This is the 33rd day of my participation in the November Gwen Challenge. See details: The Last Gwen Challenge 2021”.
A, HBase API
1.1. Environment Preparation
Add dependencies to pom.xml after creating a new project:
< the dependency > < groupId > org. Apache. Hbase < / groupId > < artifactId > hbase - server < / artifactId > < version > 1.3.1 < / version > </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> The < version > 1.3.1 < / version > < / dependency >Copy the code
1.2, HBase API
1.2.1. Obtain the Configuration object
private Configuration conf;
@Before
public void init(a) {
// instantiate using a singleton method of HBaseConfiguration
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum"."hadoop102,hadoop103,hadoop104");
conf.set("hbase.zookeeper.property.clientPort"."2181");
}
Copy the code
1.2.2. Determine whether the table exists
@Test
public void isTableExist(a) throws Exception {
System.out.println(isTableExist("student"));
}
private boolean isTableExist(String tableName) throws Exception {
// Create an HBaseAdmin object before you manage and access tables in HBase
//Connection connection =
ConnectionFactory.createConnection(conf);
//HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
HBaseAdmin admin = new HBaseAdmin(conf);
return admin.tableExists(tableName);
}
Copy the code
1.2.3 Create a table
@Test
public void createTable(a) throws Exception {
createTable("student"."info");
}
private void createTable(String tableName, String... columnFamily) throws Exception {
HBaseAdmin admin = new HBaseAdmin(conf);
// Check whether the table exists
if (isTableExist(tableName)) {
System.out.println("Table" + tableName + "Pre-existing");
//System.exit(0);
} else {
// Create a table property object
HTableDescriptor descriptor = new HTableDescriptor(TableName.valueOf(tableName));
// Create multiple column families
for (String cf : columnFamily) {
descriptor.addFamily(new HColumnDescriptor(cf));
}
// Create a table based on the configuration of the table
admin.createTable(descriptor);
System.out.println("Table" + tableName + "Created successfully!"); }}Copy the code
1.2.4 drop a table
@Test
public void dropTable(a) throws Exception {
dropTable("student");
}
private void dropTable(String tableName) throws Exception {
HBaseAdmin admin = new HBaseAdmin(conf);
if (isTableExist(tableName)) {
admin.disableTable(tableName);
admin.deleteTable(tableName);
System.out.println("Table" + tableName + "Delete successful!");
} else {
System.out.println("Table" + tableName + "It doesn't exist!); }}Copy the code
Insert data into table 1.2.5
@Test
public void addRowData(a) throws Exception {
addRowData("student"."1001"."info"."name"."moe");
}
private void addRowData(String tableName, String rowKey, String columnFamily, String column, String value) throws Exception {
// Create an HTable object
HTable hTable = new HTable(conf, tableName);
// Insert data into the table
Put put = new Put(Bytes.toBytes(rowKey));
// Assembles data into the Put object
put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value));
hTable.put(put);
hTable.close();
System.out.println("Data inserted successfully");
}
Copy the code
1.2.6. Obtain all data
@Test
public void getAllRows(a) throws Exception {
getAllRows("student");
}
private void getAllRows(String tableName) throws Exception {
HTable hTable = new HTable(conf, tableName);
// Get the object used to scan region
Scan scan = new Scan();
// Use HTable to get the object of the resultScanner implementation class
ResultScanner resultScanner = hTable.getScanner(scan);
for (Result result : resultScanner) {
Cell[] cells = result.rawCells();
for (Cell cell : cells) {
/ / get rowkey
System.out.println("Certainly" button: + Bytes.toString(CellUtil.cloneRow(cell)));
// Get the column family
System.out.println("Column family" + Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println("Column." + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println("Values."+ Bytes.toString(CellUtil.cloneValue(cell))); }}}Copy the code
1.2.7. Get a row of data
@Test
public void getRow(a) throws Exception {
getRow("student"."1001");
}
private void getRow(String tableName, String rowKey) throws Exception {
HTable table = new HTable(conf, tableName);
Get get = new Get(Bytes.toBytes(rowKey));
//get.setMaxVersions(); Show all versions
//get.setTimeStamp(); Displays the version of the specified timestamp
Result result = table.get(get);
for (Cell cell : result.rawCells()) {
System.out.println("Certainly" button: + Bytes.toString(result.getRow()));
System.out.println("Column family" + Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println("Column." + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println("Values." + Bytes.toString(CellUtil.cloneValue(cell)));
System.out.println("Timestamp :"+ cell.getTimestamp()); }}Copy the code
1.2.8 Get the data of a row specified as “column family: column”
@Test
public void getRowQualifier(a) throws Exception {
getRowQualifier("student"."1001"."info"."name");
}
private void getRowQualifier(String tableName, String rowKey, String family, String qualifier) throws Exception {
HTable table = new HTable(conf, tableName);
Get get = new Get(Bytes.toBytes(rowKey));
get.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
Result result = table.get(get);
for (Cell cell : result.rawCells()) {
System.out.println("Certainly" button: + Bytes.toString(result.getRow()));
System.out.println("Column family" + Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println("Column." + Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println("Values." + Bytes.toString(CellUtil.cloneValue(cell)));
1.}}Copy the code
1.2.9 Delete multi-row data
@Test
public void deleteMultiRow(a) throws Exception {
deleteMultiRow("student"."1001");
}
private void deleteMultiRow(String tableName, String... rows) throws Exception {
HTable hTable = new HTable(conf, tableName);
List<Delete> deleteList = new ArrayList<>();
for (String row : rows) {
Delete delete = new Delete(Bytes.toBytes(row));
deleteList.add(delete);
}
hTable.delete(deleteList);
hTable.close();
}
Copy the code
1.3, graphs,
Using the Java apis of HBase, you can implement MapReduce processes associated with HBase operations. For example, MapReduce is used to import data from a local file system to HBase tables. For example, we read some raw data from HBase and use MapReduce for data analysis.
1.3.1 official HBase-MapReduce
-
View the execution of HBase MapReduce jobs
bin/hbase mapredcp Copy the code
-
Import of environment variables
-
Perform the import of environment variables (temporarily, on the command line)
Export HBASE_HOME=/opt/module/hbase export HADOOP_HOME=/opt/module/hadoop-3.1.3 export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`Copy the code
-
Permanent: Set it in /etc/profile.d/my_env.sh
Export HBASE_HOME = / opt/module/hbase export HADOOP_HOME = / opt/module/hadoop - 3.1.3Copy the code
-
Run the following command in hadoop-env.sh:
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/* Copy the code
-
-
Run an official MapReduce job
-
Example 1: Count how many rows are in the Student table
[moe@hadoop102 hbase]$/opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar rowCounter studentCopy the code
-
Case 2: Import local data to HBase using MapReduce
-
Create a TSV file locally: fruit.tsv
1001 Apple Red 1002 Pear Yellow 1003 Pineapple Yellow Copy the code
-
Create Hbase table
hbase(main):001:0> create 'fruit','info' Copy the code
-
Upload fruit. TSV file in HDFS
[moe@hadoop102 hbase]$ hadoop dfs -put fruit.tsv / Copy the code
-
Execute MapReduce to the HBase fruit table
/opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar importtsv \ -Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:color fruit \ hdfs://hadoop102:8020/fruit.tsv Copy the code
-
Run the scan command to view the import result
hbase(main):002:0> scan 'fruit' Copy the code
-
-
1.3.2. Customize hbase-MapReduce1
Objective: Migrate data from HDFS fruit.tsV into hbase fruit1 table by MR.
-
Build FruitMapper class for reading fruit.tsv data
public class FruitMapper extends Mapper<LongWritable.Text.LongWritable.Text> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { context.write(key, value); }}Copy the code
-
Construct the FruitReduce class, which is used to write every row of data in fruit.tsV read into the hbase Fruit1 table
public class FruitReducer extends TableReducer<LongWritable.Text.NullWritable> { @Override protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text value : values) { // 1001 Apple Red String line = value.toString(); String[] fields = line.split("\t"); Put put = new Put(Bytes.toBytes(fields[0])); put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(fields[1])); put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("color"), Bytes.toBytes(fields[2])); context.write(NullWritable.get(), put); }}}Copy the code
-
Build the FruitDriver class to assemble the Job task
public class FruitDriver implements Tool { private Configuration conf = null; @Override public int run(String[] args) throws Exception { 1. Obtain the Job object Job job = Job.getInstance(conf); // 2. Set the driver class path job.setJarByClass(FruitDriver.class); // 3. Set the KV type of mapper and mapper output job.setMapperClass(FruitMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); // 4. Set Reduce TableMapReduceUtil.initTableReducerJob(args[1], FruitReducer.class, job); // 5. Set input parameters FileInputFormat.setInputPaths(job, new Path(args[0])); // 6. Submit tasks boolean result = job.waitForCompletion(true); return result ? 0 : 1; } @Override public void setConf(Configuration configuration) { this.conf = configuration; } @Override public Configuration getConf(a) { return conf; } public static void main(String[] args) { try { Configuration conf = new Configuration(); int run = ToolRunner.run(conf, new FruitDriver(), args); System.exit(run); } catch(Exception e) { e.printStackTrace(); }}}Copy the code
-
The main function is called to run the Job
public static void main(String[] args) { try { Configuration conf = new Configuration(); int run = ToolRunner.run(conf, new FruitDriver(), args); System.exit(run); } catch(Exception e) { e.printStackTrace(); }}Copy the code
-
Package run task
[moe@hadoop102 test]$yarn jar hbase-1.0-snapshot.jar com.moe.mr1.FruitDriver /fruit.tsv fruit1Copy the code
Warning: If the table to be imported does not exist, create the table in advance.
1.3.3. Customize hbase-Mapreduce2
Objective: Import data in the Name column of the fruit1 table in HBase to the Fruit2 table.
-
Fruit2Mapper; Fruit2Mapper
public class Fruit2Mapper extends TableMapper<ImmutableBytesWritable.Put> { @Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { // Build the PUT object Put put = new Put(key.get()); for (Cell cell : value.rawCells()) { if ("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) { put.add(cell); } } context.write(key, put); }}Copy the code
-
Build Fruit2Reduce and write the data to fruit2
public class Fruit2Reduce extends TableReducer<ImmutableBytesWritable.Put.NullWritable> { @Override protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException { for(Put put : values) { context.write(NullWritable.get(), put); }}}Copy the code
-
Build Fruit2Driver to assemble Job tasks
public class Fruit2Driver implements Tool { private Configuration conf = null; @Override public int run(String[] args) throws Exception { Job job = Job.getInstance(conf); job.setJarByClass(Fruit2Driver.class); // args[0] : table 1 TableMapReduceUtil.initTableMapperJob("fruit1".new Scan(), Fruit2Mapper.class, ImmutableBytesWritable.class, Put.class, job); // args[1] : table 2 TableMapReduceUtil.initTableReducerJob("fruit2", Fruit2Reduce.class, job); boolean result = job.waitForCompletion(true); return result ? 0 : 1; } @Override public void setConf(Configuration configuration) { this.conf = configuration; } @Override public Configuration getConf(a) { return conf; } public static void main(String[] args) { try { // Configuration conf = new Configuration(); // Use a package that needs to be thrown into the cluster for testing // The hbase-site. XML configuration file is required to connect to HBase locally for testing Configuration conf = HBaseConfiguration.create(); int run = ToolRunner.run(conf, new Fruit2Driver(), args); System.exit(run); } catch(Exception e) { e.printStackTrace(); }}}Copy the code
-
The main function is called to run the Job
public static void main(String[] args) { try { // Configuration conf = new Configuration(); // Use a package that needs to be thrown into the cluster for testing // The hbase-site. XML configuration file is required to connect to HBase locally for testing Configuration conf = HBaseConfiguration.create(); int run = ToolRunner.run(conf, new Fruit2Driver(), args); System.exit(run); } catch(Exception e) { e.printStackTrace(); }}Copy the code
Two, friendship links
Big data HBase Learning Journey 1