“This is the 33rd day of my participation in the November Gwen Challenge. See details: The Last Gwen Challenge 2021”.

A, HBase API

1.1. Environment Preparation

Add dependencies to pom.xml after creating a new project:

< the dependency > < groupId > org. Apache. Hbase < / groupId > < artifactId > hbase - server < / artifactId > < version > 1.3.1 < / version > </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> The < version > 1.3.1 < / version > < / dependency >Copy the code

1.2, HBase API

1.2.1. Obtain the Configuration object

private Configuration conf;

@Before
public void init(a) {
    // instantiate using a singleton method of HBaseConfiguration
    conf = HBaseConfiguration.create();
    conf.set("hbase.zookeeper.quorum"."hadoop102,hadoop103,hadoop104");
    conf.set("hbase.zookeeper.property.clientPort"."2181");
}
Copy the code

1.2.2. Determine whether the table exists

@Test
public void isTableExist(a) throws Exception {
    System.out.println(isTableExist("student"));
}

private boolean isTableExist(String tableName) throws Exception {
    // Create an HBaseAdmin object before you manage and access tables in HBase
    //Connection connection =
    ConnectionFactory.createConnection(conf);
    //HBaseAdmin admin = (HBaseAdmin) connection.getAdmin();
    HBaseAdmin admin = new HBaseAdmin(conf);
    return admin.tableExists(tableName);
}
Copy the code

1.2.3 Create a table

@Test
public void createTable(a) throws Exception {
    createTable("student"."info");
}

private void createTable(String tableName, String... columnFamily) throws Exception {
    HBaseAdmin admin = new HBaseAdmin(conf);
    // Check whether the table exists
    if (isTableExist(tableName)) {
        System.out.println("Table" + tableName + "Pre-existing");
        //System.exit(0);
    } else {
        // Create a table property object
        HTableDescriptor descriptor = new HTableDescriptor(TableName.valueOf(tableName));
        // Create multiple column families
        for (String cf : columnFamily) {
            descriptor.addFamily(new HColumnDescriptor(cf));
        }
        // Create a table based on the configuration of the table
        admin.createTable(descriptor);
        System.out.println("Table" + tableName + "Created successfully!"); }}Copy the code

1.2.4 drop a table

@Test
public void dropTable(a) throws Exception {
    dropTable("student");
}

private void dropTable(String tableName) throws Exception {
    HBaseAdmin admin = new HBaseAdmin(conf);
    if (isTableExist(tableName)) {
        admin.disableTable(tableName);
        admin.deleteTable(tableName);
        System.out.println("Table" + tableName + "Delete successful!");
    } else {
        System.out.println("Table" + tableName + "It doesn't exist!); }}Copy the code

Insert data into table 1.2.5

@Test
public void addRowData(a) throws Exception {
    addRowData("student"."1001"."info"."name"."moe");
}

private void addRowData(String tableName, String rowKey, String columnFamily, String column, String value) throws Exception {
    // Create an HTable object
    HTable hTable = new HTable(conf, tableName);
    // Insert data into the table
    Put put = new Put(Bytes.toBytes(rowKey));
    // Assembles data into the Put object
    put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value));
    hTable.put(put);
    hTable.close();
    System.out.println("Data inserted successfully");
}
Copy the code

1.2.6. Obtain all data

@Test
public void getAllRows(a) throws Exception {
    getAllRows("student");
}

private void getAllRows(String tableName) throws Exception {
    HTable hTable = new HTable(conf, tableName);
    // Get the object used to scan region
    Scan scan = new Scan();
    // Use HTable to get the object of the resultScanner implementation class
    ResultScanner resultScanner = hTable.getScanner(scan);
    for (Result result : resultScanner) {
        Cell[] cells = result.rawCells();
        for (Cell cell : cells) {
            / / get rowkey
            System.out.println("Certainly" button: + Bytes.toString(CellUtil.cloneRow(cell)));
            // Get the column family
            System.out.println("Column family" + Bytes.toString(CellUtil.cloneFamily(cell)));
            System.out.println("Column." + Bytes.toString(CellUtil.cloneQualifier(cell)));
            System.out.println("Values."+ Bytes.toString(CellUtil.cloneValue(cell))); }}}Copy the code

1.2.7. Get a row of data

@Test
public void getRow(a) throws Exception {
    getRow("student"."1001");
}

private void getRow(String tableName, String rowKey) throws Exception {
    HTable table = new HTable(conf, tableName);
    Get get = new Get(Bytes.toBytes(rowKey));
    //get.setMaxVersions(); Show all versions
    //get.setTimeStamp(); Displays the version of the specified timestamp
    Result result = table.get(get);
    for (Cell cell : result.rawCells()) {
        System.out.println("Certainly" button: + Bytes.toString(result.getRow()));
        System.out.println("Column family" + Bytes.toString(CellUtil.cloneFamily(cell)));
        System.out.println("Column." + Bytes.toString(CellUtil.cloneQualifier(cell)));
        System.out.println("Values." + Bytes.toString(CellUtil.cloneValue(cell)));
        System.out.println("Timestamp :"+ cell.getTimestamp()); }}Copy the code

1.2.8 Get the data of a row specified as “column family: column”

@Test
public void getRowQualifier(a) throws Exception {
    getRowQualifier("student"."1001"."info"."name");
}

private void getRowQualifier(String tableName, String rowKey, String family, String qualifier) throws Exception {
    HTable table = new HTable(conf, tableName);
    Get get = new Get(Bytes.toBytes(rowKey));
    get.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
    Result result = table.get(get);
    for (Cell cell : result.rawCells()) {
        System.out.println("Certainly" button: + Bytes.toString(result.getRow()));
        System.out.println("Column family" + Bytes.toString(CellUtil.cloneFamily(cell)));
        System.out.println("Column." + Bytes.toString(CellUtil.cloneQualifier(cell)));
        System.out.println("Values." + Bytes.toString(CellUtil.cloneValue(cell)));
1.}}Copy the code

1.2.9 Delete multi-row data

@Test
public void deleteMultiRow(a) throws Exception {
    deleteMultiRow("student"."1001");
}

private void deleteMultiRow(String tableName, String... rows) throws Exception {
    HTable hTable = new HTable(conf, tableName);
    List<Delete> deleteList = new ArrayList<>();
    for (String row : rows) {
        Delete delete = new Delete(Bytes.toBytes(row));
        deleteList.add(delete);
    }
    hTable.delete(deleteList);
    hTable.close();
}
Copy the code

1.3, graphs,

Using the Java apis of HBase, you can implement MapReduce processes associated with HBase operations. For example, MapReduce is used to import data from a local file system to HBase tables. For example, we read some raw data from HBase and use MapReduce for data analysis.

1.3.1 official HBase-MapReduce

View the execution of HBase MapReduce jobs
```
bin/hbase mapredcp
Copy the code
```

Import of environment variables

Perform the import of environment variables (temporarily, on the command line)

Export HBASE_HOME=/opt/module/hbase export HADOOP_HOME=/opt/module/hadoop-3.1.3 export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`Copy the code

Permanent: Set it in /etc/profile.d/my_env.sh

Export HBASE_HOME = / opt/module/hbase export HADOOP_HOME = / opt/module/hadoop - 3.1.3Copy the code

Run the following command in hadoop-env.sh:

export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*
Copy the code

Run an official MapReduce job

Example 1: Count how many rows are in the Student table

[moe@hadoop102 hbase]$/opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar rowCounter studentCopy the code

Case 2: Import local data to HBase using MapReduce

Create a TSV file locally: fruit.tsv

1001	Apple	Red
1002	Pear	Yellow
1003	Pineapple	Yellow
Copy the code

Create Hbase table

hbase(main):001:0> create 'fruit','info'
Copy the code

Upload fruit. TSV file in HDFS

[moe@hadoop102 hbase]$ hadoop dfs -put fruit.tsv /
Copy the code

Execute MapReduce to the HBase fruit table

/opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar importtsv \
-Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:color fruit \
hdfs://hadoop102:8020/fruit.tsv
Copy the code

Run the scan command to view the import result

hbase(main):002:0> scan 'fruit'
Copy the code

1.3.2. Customize hbase-MapReduce1

Objective: Migrate data from HDFS fruit.tsV into hbase fruit1 table by MR.

Build FruitMapper class for reading fruit.tsv data

public class FruitMapper extends Mapper<LongWritable.Text.LongWritable.Text> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { context.write(key, value); }}Copy the code

Construct the FruitReduce class, which is used to write every row of data in fruit.tsV read into the hbase Fruit1 table

public class FruitReducer extends TableReducer<LongWritable.Text.NullWritable> {

    @Override
    protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        for (Text value : values) {
            // 1001 Apple Red
            String line = value.toString();
            String[] fields = line.split("\t");
            Put put = new Put(Bytes.toBytes(fields[0]));
            put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(fields[1]));
            put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("color"), Bytes.toBytes(fields[2])); context.write(NullWritable.get(), put); }}}Copy the code

Build the FruitDriver class to assemble the Job task

public class FruitDriver implements Tool {

    private Configuration conf = null;

    @Override
    public int run(String[] args) throws Exception {
        1. Obtain the Job object
        Job job = Job.getInstance(conf);
        // 2. Set the driver class path
        job.setJarByClass(FruitDriver.class);
        // 3. Set the KV type of mapper and mapper output
        job.setMapperClass(FruitMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);
        // 4. Set Reduce
        TableMapReduceUtil.initTableReducerJob(args[1], FruitReducer.class, job);
        // 5. Set input parameters
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        // 6. Submit tasks
        boolean result = job.waitForCompletion(true);
        return result ? 0 : 1;
    }

    @Override
    public void setConf(Configuration configuration) {
        this.conf = configuration;
    }

    @Override
    public Configuration getConf(a) {
        return conf;
    }

    public static void main(String[] args) {
        try {
            Configuration conf = new Configuration();
            int run = ToolRunner.run(conf, new FruitDriver(), args);
            System.exit(run);
        } catch(Exception e) { e.printStackTrace(); }}}Copy the code

The main function is called to run the Job

public static void main(String[] args) {
    try {
        Configuration conf = new Configuration();
        int run = ToolRunner.run(conf, new FruitDriver(), args);
        System.exit(run);
    } catch(Exception e) { e.printStackTrace(); }}Copy the code

Package run task

[moe@hadoop102 test]$yarn jar hbase-1.0-snapshot.jar com.moe.mr1.FruitDriver /fruit.tsv fruit1Copy the code

Warning: If the table to be imported does not exist, create the table in advance.

1.3.3. Customize hbase-Mapreduce2

Objective: Import data in the Name column of the fruit1 table in HBase to the Fruit2 table.

Fruit2Mapper; Fruit2Mapper

public class Fruit2Mapper extends TableMapper<ImmutableBytesWritable.Put> {

    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
        // Build the PUT object
        Put put = new Put(key.get());
        for (Cell cell : value.rawCells()) {
            if ("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) { put.add(cell); } } context.write(key, put); }}Copy the code

Build Fruit2Reduce and write the data to fruit2

public class Fruit2Reduce extends TableReducer<ImmutableBytesWritable.Put.NullWritable> {

    @Override
    protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
        for(Put put : values) { context.write(NullWritable.get(), put); }}}Copy the code

Build Fruit2Driver to assemble Job tasks

public class Fruit2Driver implements Tool {

    private Configuration conf = null;

    @Override
    public int run(String[] args) throws Exception {
        Job job = Job.getInstance(conf);
        job.setJarByClass(Fruit2Driver.class);
        // args[0] : table 1
        TableMapReduceUtil.initTableMapperJob("fruit1".new Scan(),
                Fruit2Mapper.class,
                ImmutableBytesWritable.class,
                Put.class, job);
        // args[1] : table 2
        TableMapReduceUtil.initTableReducerJob("fruit2", Fruit2Reduce.class, job);
        boolean result = job.waitForCompletion(true);
        return result ? 0 : 1;
    }

    @Override
    public void setConf(Configuration configuration) {
        this.conf = configuration;
    }

    @Override
    public Configuration getConf(a) {
        return conf;
    }

    public static void main(String[] args) {
        try {
            // Configuration conf = new Configuration(); // Use a package that needs to be thrown into the cluster for testing
            // The hbase-site. XML configuration file is required to connect to HBase locally for testing
            Configuration conf = HBaseConfiguration.create();
            int run = ToolRunner.run(conf, new Fruit2Driver(), args);
            System.exit(run);
        } catch(Exception e) { e.printStackTrace(); }}}Copy the code

The main function is called to run the Job

public static void main(String[] args) {
    try {
        // Configuration conf = new Configuration(); // Use a package that needs to be thrown into the cluster for testing
        // The hbase-site. XML configuration file is required to connect to HBase locally for testing
        Configuration conf = HBaseConfiguration.create();
        int run = ToolRunner.run(conf, new Fruit2Driver(), args);
        System.exit(run);
    } catch(Exception e) { e.printStackTrace(); }}Copy the code

Two, friendship links

Big data HBase Learning Journey 1

mo4tech.com (Moment For Technology) is a global community with thousands techies from across the global hang out!Passionate technologists, be it gadget freaks, tech enthusiasts, coders, technopreneurs, or CIOs, you would find them all here.

Big data HBase Learning Journey Part 2

A, HBase API

1.1. Environment Preparation

1.2, HBase API

1.2.1. Obtain the Configuration object

1.2.2. Determine whether the table exists

1.2.3 Create a table

1.2.4 drop a table

Insert data into table 1.2.5

1.2.6. Obtain all data

1.2.7. Get a row of data

1.2.8 Get the data of a row specified as “column family: column”

1.2.9 Delete multi-row data

1.3, graphs,

1.3.1 official HBase-MapReduce

1.3.2. Customize hbase-MapReduce1

1.3.3. Customize hbase-Mapreduce2

Two, friendship links

Big data HBase Learning Journey Part 2

A, HBase API

1.1. Environment Preparation

1.2, HBase API

1.2.1. Obtain the Configuration object

1.2.2. Determine whether the table exists

1.2.3 Create a table

1.2.4 drop a table

Insert data into table 1.2.5

1.2.6. Obtain all data

1.2.7. Get a row of data

1.2.8 Get the data of a row specified as “column family: column”

1.2.9 Delete multi-row data

1.3, graphs,

1.3.1 official HBase-MapReduce

1.3.2. Customize hbase-MapReduce1

1.3.3. Customize hbase-Mapreduce2

Two, friendship links

Related Posts

The prototype pattern

HTTP network requests in Python, that’s the way to go

Concurrency issues: Why does Volatile solve the singleton null-pointer exception problem