preface

In the interview, it is easy for the interviewer to ask about b-tree data structure. It is still difficult to master this data structure. In order to improve my technical depth, I specially spent some time to sort out and summarize all the concepts and operations of B-tree.

The idea of B trees

B tree is a kind of multi-way balanced search tree, which is different from binary balanced tree, it has not only two branches, but multiple branches. A balanced B tree of Order M is a balanced M-way search tree. B tree is used for disk addressing, and it is an efficient search algorithm.

B tree properties

  1. The root node has at least two children
  2. The number of keywords x on each non-root node meets the following requirements: ⌈m/2⌉−1⩽x⩽m−1\ LCeil M /2 \ Rceil-1 \leqslant x \ LeQslant m-1 disk M /2 oceanstor −1 disk x ⌉ M −1
  3. All the leaf nodes are in the same layer
  4. The degree of all nodes except the root node (excluding the leaf node) is exactly the total number of keywords plus 1. Therefore, the number of internal subtrees k meets the following requirement: ⌈m/2⌉⩽k⩽m\lceil m/2 \rceil \leqslant k \leqslant m⌈m/2⌉⩽k⩽m \lceil m/2 \rceil \leqslant k \leqslant m⌈m/2⌉⩽k⩽m \leqslant m

B tree data structure description

A B-tree node contains the following key information:

  • Number of keywords
  • Key word group pointer
  • Child array pointer
  • Parent pointer

To make writing code easier, you also need to include the following information:

  • Order of the tree
  • Number of children

Expressed in code as follows:

typedef struct Node {
    int level; // The order of the tree
    int keyNum; // Number of keywords
    int childNum; // Number of children
    int* keys; // Key word count group
    struct Node* parent; // Parent pointer
    struct Node** children; // Child pointer array
} Node;
Copy the code

B tree insertion

The operation of inserting a B tree can only be performed on leaf nodes. The number of keywords on leaf nodes must strictly meet the properties of the B tree: ⌈ M /2⌉−1⩽x⩽m−1\ LCeil M /2 \ Rceil-1 \leqslant x \ Leqslant m-1 disk M /2 oceanstor −1 disk x ⌉ M −1

The insertion steps are as follows:

  1. Look for the right leaf nodes
  2. Find the right insertion position on the leaf node
  3. After insertion, judge whether the number of keywords exceeds M-1. If so, the node needs to be split, split from the middle, and insert the middle element into the parent node of the current node, judge whether the number of keywords of the parent node exceeds M-1. If so, continue splitting, repeat step 3

Build a 5-order b-tree using the sequence 1,2,3,4,5:

Delete the B tree

The deletion operation of B-tree is complicated and needs to be discussed in different cases:

The node to be deleted is a non-leaf node:

Find the largest keyword on the leaf from the subtree to the left of the current keyword, swap the keyword to be deleted with this keyword, and convert the deletion of non-leaf nodes into the deletion of leaf nodes

The node to be deleted is the leaf node:

X >⌈m/2⌉−1x > \lceil m/2 \ rceil-1x >⌈m/2⌉−1. In the following figure, I want to delete 18, so I need to perform the following operations:

X =⌈m/2⌉−1x = \lceil m/2 \rceil – 1x=⌈m/2⌉−1. In this case, the leaf node does not meet the minimum number of keywords after deletion, so the first step is to borrow a node from the father, and the father borrows a node from his brother. If x<⌈m/2⌉−1x < \lceil m/2 \rceil – 1x<⌈m/2⌉−1, the path cannot be borrowed. Then merge the current leaf node with its brother and the keyword between the two brothers to form a node

  • You can borrow and delete 11. The specific deletion is as follows:

  • Can not borrow, need to merge, delete 8, the specific deletion is as follows:

B tree Demo based on C language

#include <stdio.h>
#include <stdlib.h>

typedef struct Node {
    int level; // The order of the tree
    int keyNum; // Number of keywords
    int childNum;
    int* keys; // Key word count group
    struct Node* parent; // Parent pointer
    struct Node** children; // Child pointer array
} Node;

// Initialize the node
Node* initNode(int level) {
    Node* node = (Node*)malloc(sizeof(Node));
    node -> level = level;
    node -> keyNum = 0;
    node -> childNum = 0;
    node -> parent = NULL;
    node -> keys = (int*)malloc(sizeof(int) * (level + 1));
    node -> children = (Node**)malloc(sizeof(Node*) * level);
    for (int i = 0; i < level; i++) {
        node -> keys[i] = 0;
        node -> children[i] = NULL;
    }
    return node;
}

// Find the appropriate insertion position in the node
int findSuiteIndex(Node* node, int data) {
    int index;
    for (index = 1; index <= node -> keyNum; index++) {
         if (data < node -> keys[index])
             break;
    }
    return index;
}

// Insert data into the node
void addData(Node* node, int data, Node** T) {
    int index = findSuiteIndex(node, data);
    for (int i = node -> keyNum; i >= index; i--) {
        node -> keys[i + 1] = node -> keys[i]; 
    }
    node -> keys[index] = data;
    node -> keyNum = node -> keyNum + 1;
    if (node -> keyNum == node -> level) {
        // split
        int mid = node -> level / 2 + node -> level % 2;
        Node* lchild = initNode(node -> level);
        Node* rchild = initNode(node -> level);
        for (int i = 1; i < mid; i++) {
            addData(lchild, node -> keys[i], T);
        }
        for (int i = mid + 1; i <= node -> keyNum; i++) {
            addData(rchild, node -> keys[i], T);
        }
        for (int i = 0; i < mid; i++) {
            lchild -> children[i] = node -> children[i];
            if(node -> children[i] ! =NULL) { node -> children[i] -> parent = lchild; lchild -> childNum ++; }}int index = 0;
        for (int i = mid; i < node -> childNum; i++) {
            rchild -> children[index++] = node -> children[i];
            if(node -> children[i] ! =NULL) { node -> children[i] -> parent = rchild; rchild -> childNum ++; }}if (node -> parent == NULL) {
            Node* newParent = initNode(node -> level);
            addData(newParent, node -> keys[mid], T);
            newParent -> children[0] = lchild;
            newParent -> children[1] = rchild;
            newParent -> childNum = 2;
            lchild -> parent = newParent;
            rchild -> parent = newParent;
            *T = newParent;
        }
        else {
            int index = findSuiteIndex(node -> parent, node -> keys[mid]);
            lchild -> parent = node -> parent;
            rchild -> parent = node -> parent;
            node -> parent -> children[index - 1] = lchild;
            if(node -> parent -> children[index] ! =NULL) {
                for (int i = node -> parent -> childNum - 1; i >= index; i--) {
                    node -> parent -> children[i + 1] = node -> parent -> children[i]; } } node -> parent -> children[index] = rchild; node -> parent -> childNum ++; addData(node -> parent, node -> keys[mid], T); }}}Node* findSuiteLeafNode(Node* T, int data) {
    if (T -> childNum == 0)
        return T;
    else {
        int index = findSuiteIndex(T, data);
        return findSuiteLeafNode(T -> children[index - 1], data); }}Node* find(Node* node, int data) {
    if (node == NULL) {
        return NULL;
    }
    for (int i = 1; i <= node -> keyNum; i++) {
        if (data == node -> keys[i]) {
            return node;
        }
        else if (data < node -> keys[i]) {
            return find(node -> children[i - 1], data);
        }
        else {
            if(i ! = node -> keyNum && data < node -> keys[i +1]) 
                returnfind(node -> children[i], data); }}return find(node -> children[node -> keyNum], data);
}

// Insert node
void insert(Node** T, int data) {
    Node* node = findSuiteLeafNode(*T, data);
    addData(node, data, T);
}

void printTree(Node* T) {
    if(T ! =NULL) {
        for (int i  = 1; i <= T -> keyNum; i++) {
            printf("%d ", T -> keys[i]);
        }
        printf("\n");
        for (int i = 0; i < T -> childNum; i++) { printTree(T -> children[i]); }}}int main(a) {
    Node* T = initNode(5);
    insert(&T, 1);
    insert(&T, 2);
    insert(&T, 6);
    insert(&T, 7);
    insert(&T, 11);
    insert(&T, 4);
    insert(&T, 8);
    insert(&T, 13);
    insert(&T, 10);
    insert(&T, 5);
    insert(&T, 17);
    insert(&T, 9);
    insert(&T, 16);
    insert(&T, 20);
    insert(&T, 3);
    insert(&T, 12);
    insert(&T, 14);
    insert(&T, 18);
    insert(&T, 19);
    insert(&T, 15);
    printTree(T);
    Node* node = find(T, 7);
    if (node) {
        for (int i = 1; i <= node -> keyNum; i++) {
            printf("%d ", node -> keys[i]);
        }
        printf("\n");
    }
    return 0;
}
Copy the code