1. 多叉树介绍
多叉树是一种数据结构,它的每个结点可以有多个子结点,而不是像二叉树那样只能有两个子结点
多叉树的一个应用是表示文件系统的目录结构
多叉树相比二叉树,优点是可以减少树的高度,降低磁盘IO次数,适合存储文件和数据库;缺点是可能降低查找效率,增加内存空间消耗,需要复杂的平衡算法
2. B树介绍
B树是一种平衡的多路查找树,它的每个结点可以包含多个关键字和多个子结点
B树的特点是:
- 根结点至少有一个关键字,其他结点至少有 t-1 个关键字,其中 t 是B树的最小度数
- 每个结点至多有
2t-1
个关键字和 2t 个子结点 - 所有的叶子结点都在同一层,并且不存储数据
- 结点内的关键字按升序排列,每个关键字对应一个子树,该子树中所有关键字都在该关键字的左右范围内
B树的优点是可以减少树的高度,降低磁盘IO次数,适合存储大量数据;B树的缺点是插入和删除操作比较复杂,需要分裂和合并结点
一棵M阶B树T,需要满足以下条件:
- 每个结点至多拥有M棵子树
- 根节点至少拥有两棵子树
- 除了根节点以外,其余每个分支结点至少拥有M/2棵子树
- 所有的叶结点都在同一层上
- 有k棵子树的分支结点则存在k-1个关键字,关键字按照递增顺序进行排序
- 关键字数量满足
ceil(M/2)-1 <= n <= M-1
3. B+树介绍
B+树是B树的一种变体,通常用于数据库和操作系统的文件系统中
其特点是:
- 每个结点最多有m个子结点,其中m称为树的阶数
- 非叶子结点只存储关键字,不存储数据,只起到索引作用,数据只存储在叶子结点中
- 所有叶子结点都位于同一层,并且通过指针相连,形成一个有序链表
- 每个非叶子结点包含n个关键字和n个指向子结点的指针,其中
⌈m/2⌉ - 1 <= n <= m - 1
,⌈⌉表示向上取整 - 每个叶子结点包含n个关键字和n个数据指针,以及一个指向下一个叶子结点的指针,其中
⌈m/2⌉ - 1 <= n <= m - 1
B+树相比于B树,有以下优点:
- 非叶子结点占用更少的空间,可以存储更多的关键字,降低了树的高度,减少了磁盘I/O次数
- 所有数据都在叶子结点上,查询效率更稳定,不会因为关键字在不同层而导致性能差异
- 通过叶子结点的指针链表,可以方便地进行范围查询和排序查询
4. B树结点定义
#define M 3
typedef int KEY_TYPE;
typedef struct _btree_node
{
struct _btree_node *children[M * 2];
KEY_TYPE keys[2 * M - 1];
int num;
int is_leaf; // 1表示是叶子结点 0表示非叶子结点
} btree_node;
typedef struct _btree
{
btree_node *root;
int t;
}btree;
B树中的每个内部结点(非叶子结点)包含一定数量的关键字(key),关键字是用来分隔其子树(subtree)的值,也就是说,每个关键字都对应一个子树
例如,如果一个内部结点有3个子树,那么它必须有2个关键字:a和b,并且满足以下条件:
- 第一个子树中的所有关键字都小于a
- 第二个子树中的所有关键字都大于等于a且小于b
- 第三个子树中的所有关键字都大于等于b
B树还有一些特性:
- B树的阶(order)是指每个结点最多可以有多少个子树
- B树的高度(height)是指从根结点到任意叶子结点的最长路径上经过的结点数
- B树的每个结点至少包含⌈阶/2⌉-1个关键字,至多包含阶-1个关键字,除了根结点外
- B树的每个叶子结点都在同一层,并且包含实际存储的数据或指向数据的指针
5. B树结点创建和销毁
btree_node *btree_create_node(int node_num, int is_leaf)
{
btree_node *node = (btree_node*)calloc(1, sizeof(btree_node));
if (node == NULL)
return;
node->children = (btree_node**)calloc(1, 2 * node_num * sizeof(btree_node*));
node->keys = (KEY_TYPE*)calloc(1, (2 * node_num - 1) * sizeof(KEY_TYPE));
node->num = 0;
node ->is_leaf = is_leaf;
return node;
}
void btree_destroy_node(btree_node *node)
{
if (node)
{
if (node->children)
free(node->children);
if (node->keys)
free(node->keys);
free(node);
}
}
补充malloc和calloc的区别:
- malloc只需要一个参数,表示要分配的内存大小(以字节为单位),而calloc需要两个参数,表示要分配的内存块的数量和每个块的大小
- malloc分配的内存不会被初始化,可能包含垃圾值,而calloc分配的内存会被初始化为0
- malloc通常比calloc更快,因为它不需要初始化内存,而calloc需要额外的时间来清零内存
- malloc的全称是Memory Allocation,表示分配一个单一的动态内存块,而calloc的全称是Contiguous Allocation,表示分配多个连续的动态内存块
6. B树的创建
void btree_create(btree *T, int num)
{
T->num = num;
btree_node *node = btree_create_node(num, 1);
T->root = node;
}
7. B树的插入
B树的插入操作只发生在叶子结点,如果叶子结点已满,就需要进行分裂和上升操作,保持树的平衡性
插入操作可以分为以下几个步骤:
- 如果树为空,创建一个根结点,并插入键值
- 如果树不为空,从根结点开始,沿着键值的搜索路径向下找到合适的叶子结点
- 如果叶子结点未满,直接在该结点中按照升序插入键值
- 如果叶子结点已满,需要先将该结点分裂为两个兄弟结点,并将中间的键值上升到父结点中;如果父结点也已满,就重复这个过程,直到找到一个未满的父结点或者创建一个新的根结点
void btree_split_child(btree *T, btree_node *parent, int i)
{
btree_node *child = parent->children[i];
btree_node *new_node = btree_create_node(T->t, child->is_leaf);
// 关键字迁移
for (int j = 0; j < T->t - 1; j++)
new_node->keys[j] = child->keys[T->t + j];
// 子树迁移
if (child->is_leaf == 0)
for (int j = 0; j < T->t - 1; j++)
new_node->children[j] = child->children[T->t + j];
child->num = T->t - 1;
// 将新结点插入到父亲结点中
for (int j = parent->num; j >= i + 1; j--)
parent->children[j + 1] = parent->children[j];
parent->children[i] = new_node;
// 将分裂结点中间位置的关键字添加到父亲结点中
for (int j = parent->num - 1; j >= i; j--)
parent->keys[j + 1] = parent->keys[j];
parent->keys[i] = child->keys[T->t - 1];
parent->num++;
}
void btree_insert_not_full(btree *T, btree_node *x, KEY_TYPE key)
{
int i = x->num - 1;
if (x->is_leaf == 1)
{
for (; i >= 0 && x->keys[i] > key; i--)
x->keys[i + 1] = x->keys[i];
x->keys[i + 1] = key;
x->num++;
}
else
{
while (i >= 0 && x->keys[i] > key)
i--;
if (x->children[i + 1]->num == 2 * T->t - 1)
{
btree_split_child(T, x, i + 1);
if (x->keys[i + 1] < key)
i++;
}
btree_insert_not_full(T, x->children[i + 1], key);
}
}
void btree_insert(btree *T, KEY_TYPE key)
{
btree_node *root = T->root;
if (root->num == 2 * T->t - 1)
{
btree_node *new_node = btree_create_node(T->t, 0);
T->root = new_node;
T->root->children[0] = root;
btree_split_child(T, T->root, 0);
}
else
btree_insert_not_full(T, root, key);
}
8. B树的删除
- 如果该结点是非叶子结点,用其前驱或后继替换它,然后从叶子结点开始递归地删除前驱或后继
- 如果该结点是叶子结点,直接删除该关键字
- 如果删除后该结点的关键字数量小于最小值,需要从兄弟结点借一个关键字或者和兄弟结点合并,并递归地调整父结点的索引值
- 如果相邻两棵子树都是 M/2-1,则合并
- 如果左边的子树大于 M/2-1,向左子树借一个结点
- 如果右边的子树大于 M/2-1,向右子树借一个结点
void btree_merge(btree *T, btree_node *node, int idx)
{
btree_node *left = node->children[idx];
btree_node *right = node->children[idx + 1];
left->keys[T->t - 1] = node->keys[idx];
for (int i = 0; i < T->t - 1; i++)
left->keys[T->t + i] = right->keys[i];
if (left->is_leaf == 0)
for (int i = 0; i < T->t; i++)
left->children[T->t + i] = right->children[i];
btree_destroy_node(right);
int i = idx + 1;
for (; i < node->num; i++)
{
node->keys[i - 1] = node->keys[i];
node->children[i] = node->children[i + 1];
}
node->children[i + 1] = NULL;
node->num--;
if (node->num == 0)
{
T->root = left;
btree_destroy_node(node);
}
}
void btree_delete_key(btree *T, btree_node *node, KEY_TYPE key)
{
if (node == NULL)
return;
int idx = 0;
while (idx < node->num && key > node->keys[idx])
idx++;
if (idx < node->num && key == node->keys[idx])
{
if (node->is_leaf == 1)
{
// 要删除的结点为叶子结点
for (int i = idx; i < node->num - 1; i++)
node->keys[i] = node->keys[i + 1];
node->keys[node->num - 1] = 0;
node->num--;
if (node->num == 0)
{
free(node);
T->root = NULL;
}
return;
}
else if (node->children[idx]->num >= T->t)
{
// 左边的子树关键字数大于M/2-1
btree_node *left = node->children[idx];
node->keys[idx] = left->keys[left->num - 1];
btree_delete_key(T, left, left->keys[left->num - 1]);
}
else if (node->children[idx + 1]->num >= T->t)
{
// 右边的子树关键字数大于M/2-1
btree_node *right = node->children[idx + 1];
node->keys[idx] = right->keys[0];
btree_delete_key(T, right, right->keys[0]);
}
else
{
// 相邻的两棵子树关键字数都等于M/2-1
btree_merge(T, node, idx);
btree_delete_key(T, node->children[idx], key);
}
}
else
{
// 要删除的关键字不在该结点则向下递归
btree_node *child = node->children[idx];
if (child == NULL)
{
printf("Delete key[%d] failed\n", key);
return;
}
if (child->num == T->t - 1)
{
btree_node *left = NULL;
btree_node *right = NULL;
if (idx - 1 >= 0)
left = node->children[idx - 1];
if (idx + 1 <= node->num)
right = node->children[idx + 1];
if ((left && left->num >= T->t) || (right && right->num >= T->t))
{
int use_right = 0;
if (right)
use_right = 1;
if (left && right)
use_right = (right->num > left->num) ? 1 : 0;
if (right && right->num >= T->t && use_right)
{
// 从右子树借一个
child->keys[child->num] = node->keys[idx];
child->children[child->num + 1] = right->children[0];
child->num++;
node->keys[idx] = right->keys[0];
for (int i = 0; i < right->num - 1; i++)
{
right->keys[i] = right->keys[i + 1];
right->children[i] = right->children[i + 1];
}
right->keys[right->num - 1] = 0;
right->children[right->num - 1] = right->children[right->num];
right->children[right->num] = NULL;
right->num--;
}
else
{
// 从左子树借一个
for (int i = child->num; i > 0; i--)
{
child->keys[i] = child->keys[i - 1];
child->children[i + 1] = child->children[i];
}
child->children[1] = child->children[0];
child->children[0] = left->children[left->num];
child->keys[0] = node->keys[idx - 1];
child->num++;
node->keys[idx - 1] = left->keys[left->num - 1];
left->keys[left->num - 1] = 0;
left->children[left->num] = NULL;
left->num--;
}
}
else if ((!left || (left->num == T->t - 1)) && (!right || (right->num == T->t - 1)))
{
if (left && left->num == T->t - 1)
{
btree_merge(T, node, idx - 1);
child = left;
}
else if (right && right->num == T->t - 1)
btree_merge(T, node, idx);
}
}
btree_delete_key(T, child, key);
}
}
int btree_delete(btree *T, KEY_TYPE key)
{
if (T->root == NULL)
return -1;
btree_destroy_node(T, T->root, key);
return 0;
}
9. B树的查询
int btree_bin_search(btree_node *node, int low, int high, KEY_TYPE key)
{
// 二分查找结点
if (low > high || low < 0 || high < 0)
return -1;
while (low <= high)
{
int mid = low + high >> 1;
if (key > node->keys[mid])
low = mid + 1;
else
high = mid - 1;
}
return low;
}
10. 完整的代码
使用26个英文字母建立B树进行测试
#include <stdio.h>
#include <stdlib.h>
#define M 3
typedef int KEY_TYPE;
typedef struct _btree_node
{
struct _btree_node **children;
KEY_TYPE *keys;
int num; // 关键字的数量
int is_leaf; // 1表示是叶子结点 0表示非叶子结点
} btree_node;
typedef struct _btree
{
btree_node *root;
int t; // 2 * t为该树每个结点最大允许的指针数
} btree;
btree_node *btree_create_node(int node_num, int is_leaf)
{
btree_node *node = (btree_node *)calloc(1, sizeof(btree_node));
if (node == NULL)
return NULL;
node->children = (btree_node **)calloc(1, 2 * node_num * sizeof(btree_node *));
node->keys = (KEY_TYPE *)calloc(1, (2 * node_num - 1) * sizeof(KEY_TYPE));
node->num = 0;
node->is_leaf = is_leaf;
return node;
}
void btree_destroy_node(btree_node *node)
{
if (node)
{
if (node->children)
free(node->children);
if (node->keys)
free(node->keys);
free(node);
}
}
void btree_create(btree *T, int num)
{
T->t = num;
btree_node *node = btree_create_node(num, 1);
T->root = node;
}
void btree_split_child(btree *T, btree_node *parent, int i)
{
btree_node *child = parent->children[i];
btree_node *new_node = btree_create_node(T->t, child->is_leaf);
// 关键字迁移
for (int j = 0; j < T->t - 1; j++)
new_node->keys[j] = child->keys[T->t + j];
// 子树迁移
if (child->is_leaf == 0)
for (int j = 0; j < T->t - 1; j++)
new_node->children[j] = child->children[T->t + j];
child->num = T->t - 1;
// 将新结点插入到父亲结点中
for (int j = parent->num; j >= i + 1; j--)
parent->children[j + 1] = parent->children[j];
parent->children[i] = new_node;
// 将分裂结点中间位置的关键字添加到父亲结点中
for (int j = parent->num - 1; j >= i; j--)
parent->keys[j + 1] = parent->keys[j];
parent->keys[i] = child->keys[T->t - 1];
parent->num++;
}
void btree_insert_not_full(btree *T, btree_node *x, KEY_TYPE key)
{
int i = x->num - 1;
if (x->is_leaf == 1)
{
for (; i >= 0 && x->keys[i] > key; i--)
x->keys[i + 1] = x->keys[i];
x->keys[i + 1] = key;
x->num++;
}
else
{
while (i >= 0 && x->keys[i] > key)
i--;
if (x->children[i + 1]->num == 2 * T->t - 1)
{
btree_split_child(T, x, i + 1);
if (x->keys[i + 1] < key)
i++;
}
btree_insert_not_full(T, x->children[i + 1], key);
}
}
void btree_insert(btree *T, KEY_TYPE key)
{
btree_node *root = T->root;
if (root->num == 2 * T->t - 1)
{
btree_node *new_node = btree_create_node(T->t, 0);
T->root = new_node;
T->root->children[0] = root;
btree_split_child(T, T->root, 0);
}
else
btree_insert_not_full(T, root, key);
}
void btree_merge(btree *T, btree_node *node, int idx)
{
btree_node *left = node->children[idx];
btree_node *right = node->children[idx + 1];
left->keys[T->t - 1] = node->keys[idx];
for (int i = 0; i < T->t - 1; i++)
left->keys[T->t + i] = right->keys[i];
if (left->is_leaf == 0)
for (int i = 0; i < T->t; i++)
left->children[T->t + i] = right->children[i];
btree_destroy_node(right);
int i = idx + 1;
for (; i < node->num; i++)
{
node->keys[i - 1] = node->keys[i];
node->children[i] = node->children[i + 1];
}
node->children[i + 1] = NULL;
node->num--;
if (node->num == 0)
{
T->root = left;
btree_destroy_node(node);
}
}
void btree_delete_key(btree *T, btree_node *node, KEY_TYPE key)
{
if (node == NULL)
return;
int idx = 0;
while (idx < node->num && key > node->keys[idx])
idx++;
if (idx < node->num && key == node->keys[idx])
{
if (node->is_leaf == 1)
{
// 要删除的结点为叶子结点
for (int i = idx; i < node->num - 1; i++)
node->keys[i] = node->keys[i + 1];
node->keys[node->num - 1] = 0;
node->num--;
if (node->num == 0)
{
free(node);
T->root = NULL;
}
return;
}
else if (node->children[idx]->num >= T->t)
{
// 左边的子树关键字数大于M/2-1
btree_node *left = node->children[idx];
node->keys[idx] = left->keys[left->num - 1];
btree_delete_key(T, left, left->keys[left->num - 1]);
}
else if (node->children[idx + 1]->num >= T->t)
{
// 右边的子树关键字数大于M/2-1
btree_node *right = node->children[idx + 1];
node->keys[idx] = right->keys[0];
btree_delete_key(T, right, right->keys[0]);
}
else
{
// 相邻的两棵子树关键字数都等于M/2-1
btree_merge(T, node, idx);
btree_delete_key(T, node->children[idx], key);
}
}
else
{
// 要删除的关键字不在该结点则向下递归
btree_node *child = node->children[idx];
if (child == NULL)
{
printf("Delete key[%d] failed\n", key);
return;
}
if (child->num == T->t - 1)
{
btree_node *left = NULL;
btree_node *right = NULL;
if (idx - 1 >= 0)
left = node->children[idx - 1];
if (idx + 1 <= node->num)
right = node->children[idx + 1];
if ((left && left->num >= T->t) || (right && right->num >= T->t))
{
int use_right = 0;
if (right)
use_right = 1;
if (left && right)
use_right = (right->num > left->num) ? 1 : 0;
if (right && right->num >= T->t && use_right)
{
// 从右子树借一个
child->keys[child->num] = node->keys[idx];
child->children[child->num + 1] = right->children[0];
child->num++;
node->keys[idx] = right->keys[0];
for (int i = 0; i < right->num - 1; i++)
{
right->keys[i] = right->keys[i + 1];
right->children[i] = right->children[i + 1];
}
right->keys[right->num - 1] = 0;
right->children[right->num - 1] = right->children[right->num];
right->children[right->num] = NULL;
right->num--;
}
else
{
// 从左子树借一个
for (int i = child->num; i > 0; i--)
{
child->keys[i] = child->keys[i - 1];
child->children[i + 1] = child->children[i];
}
child->children[1] = child->children[0];
child->children[0] = left->children[left->num];
child->keys[0] = node->keys[idx - 1];
child->num++;
node->keys[idx - 1] = left->keys[left->num - 1];
left->keys[left->num - 1] = 0;
left->children[left->num] = NULL;
left->num--;
}
}
else if ((!left || (left->num == T->t - 1)) && (!right || (right->num == T->t - 1)))
{
if (left && left->num == T->t - 1)
{
btree_merge(T, node, idx - 1);
child = left;
}
else if (right && right->num == T->t - 1)
btree_merge(T, node, idx);
}
}
btree_delete_key(T, child, key);
}
}
int btree_delete(btree *T, KEY_TYPE key)
{
if (T->root == NULL)
return -1;
btree_delete_key(T, T->root, key);
return 0;
}
int btree_bin_search(btree_node *node, int low, int high, KEY_TYPE key)
{
// 二分查找结点
if (low > high || low < 0 || high < 0)
return -1;
while (low <= high)
{
int mid = low + high >> 1;
if (key > node->keys[mid])
low = mid + 1;
else
high = mid - 1;
}
return low;
}
void btree_print(btree *T, btree_node *node, int layer)
{
btree_node *t = node;
if (t)
{
printf("\nlayer = %d key_num = %d is_leaf = %d\n", layer, t->num, t->is_leaf);
for (int i = 0; i < node->num; i++)
printf("%c ", t->keys[i]);
puts("");
printf("%p\n", t);
for (int i = 0; i <= 2 * T->t; i++)
printf("%p ", t->children[i]);
puts("");
layer++;
for (int i = 0; i <= t->num; i++)
if (t->children[i])
btree_print(T, t->children[i], layer);
}
else
printf("this is a empty tree\n");
}
int main()
{
btree T = {
0};
btree_create(&T, 3);
srand(48);
int i = 0;
char key[26] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
for (i = 0;i < 26;i ++) {
key[i] = rand() % 1000;
printf("%c ", key[i]);
btree_insert(&T, key[i]);
}
btree_print(&T, T.root, 0);
for (i = 0;i < 26;i ++) {
printf("\n---------------------------------\n");
btree_delete(&T, key[25-i]);
btree_print(&T, T.root, 0);
}
return 0;
}