“This is the 33rd day of my participation in the November Gwen Challenge. See details: The Last Gwen Challenge 2021”.

A, HBase API

1.1. Environment Preparation

Add dependencies to pom.xml after creating a new project:

< the dependency > < groupId > org. Apache. Hbase < / groupId > < artifactId > hbase - server < / artifactId > < version > 1.3.1 < / version > </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> The < version > 1.3.1 < / version > < / dependency >Copy the code

1.2, HBase API

1.2.1. Obtain the Configuration object

private Configuration conf;

@Before
public void init(a) {
    // instantiate using a singleton method of HBaseConfiguration
    conf = HBaseConfiguration.create();
    conf.set("hbase.zookeeper.quorum"."hadoop102,hadoop103,hadoop104");
    conf.set("hbase.zookeeper.property.clientPort"."2181");
}
Copy the code

1.2.2. Determine whether the table exists

@Test
public void isTableExist(a) throws Exception {
    System.out.println(isTableExist("student"));
}

private boolean isTableExist(String tableName) throws Exception {
    // Create an HBaseAdmin object before you manage and access tables in HBase
    //Connection connection =
    ConnectionFactory.createConnection(conf);
    //HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
    HBaseAdmin admin = new HBaseAdmin(conf);
    return admin.tableExists(tableName);
}
Copy the code

1.2.3 Create a table

@Test
public void createTable(a) throws Exception {
    createTable("student"."info");
}

private void createTable(String tableName, String... columnFamily) throws Exception {
    HBaseAdmin admin = new HBaseAdmin(conf);
    // Check whether the table exists
    if (isTableExist(tableName)) {
        System.out.println("Table" + tableName + "Pre-existing");
        //System.exit(0);
    } else {
        // Create a table property object
        HTableDescriptor descriptor = new HTableDescriptor(TableName.valueOf(tableName));
        // Create multiple column families
        for (String cf : columnFamily) {
            descriptor.addFamily(new HColumnDescriptor(cf));
        }
        // Create a table based on the configuration of the table
        admin.createTable(descriptor);
        System.out.println("Table" + tableName + "Created successfully!"); }}Copy the code

1.2.4 drop a table

@Test
public void dropTable(a) throws Exception {
    dropTable("student");
}

private void dropTable(String tableName) throws Exception {
    HBaseAdmin admin = new HBaseAdmin(conf);
    if (isTableExist(tableName)) {
        admin.disableTable(tableName);
        admin.deleteTable(tableName);
        System.out.println("Table" + tableName + "Delete successful!");
    } else {
        System.out.println("Table" + tableName + "It doesn't exist!); }}Copy the code

Insert data into table 1.2.5

@Test
public void addRowData(a) throws Exception {
    addRowData("student"."1001"."info"."name"."moe");
}

private void addRowData(String tableName, String rowKey, String columnFamily, String column, String value) throws Exception {
    // Create an HTable object
    HTable hTable = new HTable(conf, tableName);
    // Insert data into the table
    Put put = new Put(Bytes.toBytes(rowKey));
    // Assembles data into the Put object
    put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value));
    hTable.put(put);
    hTable.close();
    System.out.println("Data inserted successfully");
}
Copy the code

1.2.6. Obtain all data

@Test
public void getAllRows(a) throws Exception {
    getAllRows("student");
}

private void getAllRows(String tableName) throws Exception {
    HTable hTable = new HTable(conf, tableName);
    // Get the object used to scan region
    Scan scan = new Scan();
    // Use HTable to get the object of the resultScanner implementation class
    ResultScanner resultScanner = hTable.getScanner(scan);
    for (Result result : resultScanner) {
        Cell[] cells = result.rawCells();
        for (Cell cell : cells) {
            / / get rowkey
            System.out.println("Certainly" button: + Bytes.toString(CellUtil.cloneRow(cell)));
            // Get the column family
            System.out.println("Column family" + Bytes.toString(CellUtil.cloneFamily(cell)));
            System.out.println("Column." + Bytes.toString(CellUtil.cloneQualifier(cell)));
            System.out.println("Values."+ Bytes.toString(CellUtil.cloneValue(cell))); }}}Copy the code

1.2.7. Get a row of data

@Test
public void getRow(a) throws Exception {
    getRow("student"."1001");
}

private void getRow(String tableName, String rowKey) throws Exception {
    HTable table = new HTable(conf, tableName);
    Get get = new Get(Bytes.toBytes(rowKey));
    //get.setMaxVersions(); Show all versions
    //get.setTimeStamp(); Displays the version of the specified timestamp
    Result result = table.get(get);
    for (Cell cell : result.rawCells()) {
        System.out.println("Certainly" button: + Bytes.toString(result.getRow()));
        System.out.println("Column family" + Bytes.toString(CellUtil.cloneFamily(cell)));
        System.out.println("Column." + Bytes.toString(CellUtil.cloneQualifier(cell)));
        System.out.println("Values." + Bytes.toString(CellUtil.cloneValue(cell)));
        System.out.println("Timestamp :"+ cell.getTimestamp()); }}Copy the code

1.2.8 Get the data of a row specified as “column family: column”

@Test
public void getRowQualifier(a) throws Exception {
    getRowQualifier("student"."1001"."info"."name");
}

private void getRowQualifier(String tableName, String rowKey, String family, String qualifier) throws Exception {
    HTable table = new HTable(conf, tableName);
    Get get = new Get(Bytes.toBytes(rowKey));
    get.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
    Result result = table.get(get);
    for (Cell cell : result.rawCells()) {
        System.out.println("Certainly" button: + Bytes.toString(result.getRow()));
        System.out.println("Column family" + Bytes.toString(CellUtil.cloneFamily(cell)));
        System.out.println("Column." + Bytes.toString(CellUtil.cloneQualifier(cell)));
        System.out.println("Values." + Bytes.toString(CellUtil.cloneValue(cell)));
1.}}Copy the code

1.2.9 Delete multi-row data

@Test
public void deleteMultiRow(a) throws Exception {
    deleteMultiRow("student"."1001");
}

private void deleteMultiRow(String tableName, String... rows) throws Exception {
    HTable hTable = new HTable(conf, tableName);
    List<Delete> deleteList = new ArrayList<>();
    for (String row : rows) {
        Delete delete = new Delete(Bytes.toBytes(row));
        deleteList.add(delete);
    }
    hTable.delete(deleteList);
    hTable.close();
}
Copy the code

1.3, graphs,

Using the Java apis of HBase, you can implement MapReduce processes associated with HBase operations. For example, MapReduce is used to import data from a local file system to HBase tables. For example, we read some raw data from HBase and use MapReduce for data analysis.

1.3.1 official HBase-MapReduce

  1. View the execution of HBase MapReduce jobs

    bin/hbase mapredcp
    Copy the code
  2. Import of environment variables

    • Perform the import of environment variables (temporarily, on the command line)

      Export HBASE_HOME=/opt/module/hbase export HADOOP_HOME=/opt/module/hadoop-3.1.3 export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`Copy the code
    • Permanent: Set it in /etc/profile.d/my_env.sh

      Export HBASE_HOME = / opt/module/hbase export HADOOP_HOME = / opt/module/hadoop - 3.1.3Copy the code
    • Run the following command in hadoop-env.sh:

      export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*
      Copy the code
  3. Run an official MapReduce job

    • Example 1: Count how many rows are in the Student table

      [moe@hadoop102 hbase]$/opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar rowCounter studentCopy the code

    • Case 2: Import local data to HBase using MapReduce

      • Create a TSV file locally: fruit.tsv

        1001	Apple	Red
        1002	Pear	Yellow
        1003	Pineapple	Yellow
        Copy the code
      • Create Hbase table

        hbase(main):001:0> create 'fruit','info'
        Copy the code
      • Upload fruit. TSV file in HDFS

        [moe@hadoop102 hbase]$ hadoop dfs -put fruit.tsv /
        Copy the code
      • Execute MapReduce to the HBase fruit table

        /opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar importtsv \
        -Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:color fruit \
        hdfs://hadoop102:8020/fruit.tsv
        Copy the code
      • Run the scan command to view the import result

        hbase(main):002:0> scan 'fruit'
        Copy the code

1.3.2. Customize hbase-MapReduce1

Objective: Migrate data from HDFS fruit.tsV into hbase fruit1 table by MR.

  1. Build FruitMapper class for reading fruit.tsv data

    public class FruitMapper extends Mapper<LongWritable.Text.LongWritable.Text> {
    
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { context.write(key, value); }}Copy the code
  2. Construct the FruitReduce class, which is used to write every row of data in fruit.tsV read into the hbase Fruit1 table

    public class FruitReducer extends TableReducer<LongWritable.Text.NullWritable> {
    
        @Override
        protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            for (Text value : values) {
                // 1001 Apple Red
                String line = value.toString();
                String[] fields = line.split("\t");
                Put put = new Put(Bytes.toBytes(fields[0]));
                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(fields[1]));
                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("color"), Bytes.toBytes(fields[2])); context.write(NullWritable.get(), put); }}}Copy the code
  3. Build the FruitDriver class to assemble the Job task

    public class FruitDriver implements Tool {
    
        private Configuration conf = null;
    
        @Override
        public int run(String[] args) throws Exception {
            1. Obtain the Job object
            Job job = Job.getInstance(conf);
            // 2. Set the driver class path
            job.setJarByClass(FruitDriver.class);
            // 3. Set the KV type of mapper and mapper output
            job.setMapperClass(FruitMapper.class);
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(Text.class);
            // 4. Set Reduce
            TableMapReduceUtil.initTableReducerJob(args[1], FruitReducer.class, job);
            // 5. Set input parameters
            FileInputFormat.setInputPaths(job, new Path(args[0]));
            // 6. Submit tasks
            boolean result = job.waitForCompletion(true);
            return result ? 0 : 1;
        }
    
        @Override
        public void setConf(Configuration configuration) {
            this.conf = configuration;
        }
    
        @Override
        public Configuration getConf(a) {
            return conf;
        }
    
        public static void main(String[] args) {
            try {
                Configuration conf = new Configuration();
                int run = ToolRunner.run(conf, new FruitDriver(), args);
                System.exit(run);
            } catch(Exception e) { e.printStackTrace(); }}}Copy the code
  4. The main function is called to run the Job

    public static void main(String[] args) {
        try {
            Configuration conf = new Configuration();
            int run = ToolRunner.run(conf, new FruitDriver(), args);
            System.exit(run);
        } catch(Exception e) { e.printStackTrace(); }}Copy the code
  5. Package run task

    [moe@hadoop102 test]$yarn jar hbase-1.0-snapshot.jar com.moe.mr1.FruitDriver /fruit.tsv fruit1Copy the code

    Warning: If the table to be imported does not exist, create the table in advance.

1.3.3. Customize hbase-Mapreduce2

Objective: Import data in the Name column of the fruit1 table in HBase to the Fruit2 table.

  1. Fruit2Mapper; Fruit2Mapper

    public class Fruit2Mapper extends TableMapper<ImmutableBytesWritable.Put> {
    
        @Override
        protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
            // Build the PUT object
            Put put = new Put(key.get());
            for (Cell cell : value.rawCells()) {
                if ("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) { put.add(cell); } } context.write(key, put); }}Copy the code
  2. Build Fruit2Reduce and write the data to fruit2

    public class Fruit2Reduce extends TableReducer<ImmutableBytesWritable.Put.NullWritable> {
    
        @Override
        protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
            for(Put put : values) { context.write(NullWritable.get(), put); }}}Copy the code
  3. Build Fruit2Driver to assemble Job tasks

    public class Fruit2Driver implements Tool {
    
        private Configuration conf = null;
    
        @Override
        public int run(String[] args) throws Exception {
            Job job = Job.getInstance(conf);
            job.setJarByClass(Fruit2Driver.class);
            // args[0] : table 1
            TableMapReduceUtil.initTableMapperJob("fruit1".new Scan(),
                    Fruit2Mapper.class,
                    ImmutableBytesWritable.class,
                    Put.class, job);
            // args[1] : table 2
            TableMapReduceUtil.initTableReducerJob("fruit2", Fruit2Reduce.class, job);
            boolean result = job.waitForCompletion(true);
            return result ? 0 : 1;
        }
    
        @Override
        public void setConf(Configuration configuration) {
            this.conf = configuration;
        }
    
        @Override
        public Configuration getConf(a) {
            return conf;
        }
    
        public static void main(String[] args) {
            try {
                // Configuration conf = new Configuration(); // Use a package that needs to be thrown into the cluster for testing
                // The hbase-site. XML configuration file is required to connect to HBase locally for testing
                Configuration conf = HBaseConfiguration.create();
                int run = ToolRunner.run(conf, new Fruit2Driver(), args);
                System.exit(run);
            } catch(Exception e) { e.printStackTrace(); }}}Copy the code
  4. The main function is called to run the Job

    public static void main(String[] args) {
        try {
            // Configuration conf = new Configuration(); // Use a package that needs to be thrown into the cluster for testing
            // The hbase-site. XML configuration file is required to connect to HBase locally for testing
            Configuration conf = HBaseConfiguration.create();
            int run = ToolRunner.run(conf, new Fruit2Driver(), args);
            System.exit(run);
        } catch(Exception e) { e.printStackTrace(); }}Copy the code

Two, friendship links

Big data HBase Learning Journey 1