简单数据库实现——Part13 - 更新父节点

简单数据库实现——Part13 - 更新父节点

接下来一步,我们将处理在分裂叶节点后如何修复父节点。以下面的例子为参考:

Example of updating internal node

在我们的例子中,我们将key3添加到树中,这会导致左叶节点分裂。分裂之后我们将通过以下操作来修复树:

  1. 将父节点中的第一个键更新为左孩子中的最大键(3)
  2. 在更新key之后添加新的键指对
    • 新的指针指向新的子节点
    • 新键是新子节点中的最大键(5)

因此,首先,用两个新的函数调用替换原来的代码:步骤一的update_internal_node_key()和步骤二的internal_node_insert()

@@ -670,9 +725,11 @@ void leaf_node_split_and_insert(Cursor* cursor, uint32_t key, Row* value) {
   */
 
   void* old_node = get_page(cursor->table->pager, cursor->page_num);
+  uint32_t old_max = get_node_max_key(old_node);
   uint32_t new_page_num = get_unused_page_num(cursor->table->pager);
   void* new_node = get_page(cursor->table->pager, new_page_num);
   initialize_leaf_node(new_node);
+  *node_parent(new_node) = *node_parent(old_node);
   *leaf_node_next_leaf(new_node) = *leaf_node_next_leaf(old_node);
   *leaf_node_next_leaf(old_node) = new_page_num;
 
@@ -709,8 +766,12 @@ void leaf_node_split_and_insert(Cursor* cursor, uint32_t key, Row* value) {
   if (is_node_root(old_node)) {
     return create_new_root(cursor->table, new_page_num);
   } else {
-    printf("Need to implement updating parent after split\n");
-    exit(EXIT_FAILURE);
+    uint32_t parent_page_num = *node_parent(old_node);
+    uint32_t new_max = get_node_max_key(old_node);
+    void* parent = get_page(cursor->table->pager, parent_page_num);
+
+    update_internal_node_key(parent, old_max, new_max);
+    internal_node_insert(cursor->table, parent_page_num, new_page_num);
+    return;
   }
 }

为了获得对父节点的引用,我们要在每个节点中记录指向父节点的指针。

+uint32_t* node_parent(void* node) { return node + PARENT_POINTER_OFFSET; }
@@ -660,6 +675,48 @@ void create_new_root(Table* table, uint32_t right_child_page_num) {
   uint32_t left_child_max_key = get_node_max_key(left_child);
   *internal_node_key(root, 0) = left_child_max_key;
   *internal_node_right_child(root) = right_child_page_num;
+  *node_parent(left_child) = table->root_page_num;
+  *node_parent(right_child) = table->root_page_num;
 }

现在我们需要在父节点中找到受影响的单元。子节点不知道自己的页号,所以我们无法查找。但是它知道自己的最大键值,因此我们可以搜索父键值。

+void update_internal_node_key(void* node, uint32_t old_key, uint32_t new_key) {
+  uint32_t old_child_index = internal_node_find_child(node, old_key);
+  *internal_node_key(node, old_child_index) = new_key;
 }

internal_node_find_child()我们将重用一些已有的代码来查找内部节点中的键。重构internal_node_find()变成新的辅助函数。

-Cursor* internal_node_find(Table* table, uint32_t page_num, uint32_t key) {
-  void* node = get_page(table->pager, page_num);
+uint32_t internal_node_find_child(void* node, uint32_t key) {
+  /*
+  Return the index of the child which should contain
+  the given key.
+  */
+
   uint32_t num_keys = *internal_node_num_keys(node);
 
-  /* Binary search to find index of child to search */
+  /* Binary search */
   uint32_t min_index = 0;
   uint32_t max_index = num_keys; /* there is one more child than key */
 
@@ -386,7 +394,14 @@ Cursor* internal_node_find(Table* table, uint32_t page_num, uint32_t key) {
     }
   }
 
-  uint32_t child_num = *internal_node_child(node, min_index);
+  return min_index;
+}
+
+Cursor* internal_node_find(Table* table, uint32_t page_num, uint32_t key) {
+  void* node = get_page(table->pager, page_num);
+
+  uint32_t child_index = internal_node_find_child(node, key);
+  uint32_t child_num = *internal_node_child(node, child_index);
   void* child = get_page(table->pager, child_num);
   switch (get_node_type(child)) {
     case NODE_LEAF:

现在进入核心部分,实现internal_node_insert()。我会逐一解释。

+void internal_node_insert(Table* table, uint32_t parent_page_num,
+                          uint32_t child_page_num) {
+  /*
+  Add a new child/key pair to parent that corresponds to child
+  */
+
+  void* parent = get_page(table->pager, parent_page_num);
+  void* child = get_page(table->pager, child_page_num);
+  uint32_t child_max_key = get_node_max_key(child);
+  uint32_t index = internal_node_find_child(parent, child_max_key);
+
+  uint32_t original_num_keys = *internal_node_num_keys(parent);
+  *internal_node_num_keys(parent) = original_num_keys + 1;
+
+  if (original_num_keys >= INTERNAL_NODE_MAX_CELLS) {
+    printf("Need to implement splitting internal node\n");
+    exit(EXIT_FAILURE);
+  }

新的键值对的插入位置应该由新的子节点的最大键决定。在例子中,child_max_key是5所以位置应该是1。

如果内部节点中没有多余空间给新的键值对,抛出错误。我们过一会实现。

所以剩余的函数应该是这样的:

扫描二维码关注公众号,回复: 10335235 查看本文章
+
+  uint32_t right_child_page_num = *internal_node_right_child(parent);
+  void* right_child = get_page(table->pager, right_child_page_num);
+
+  if (child_max_key > get_node_max_key(right_child)) {
+    /* Replace right child */
+    *internal_node_child(parent, original_num_keys) = right_child_page_num;
+    *internal_node_key(parent, original_num_keys) =
+        get_node_max_key(right_child);
+    *internal_node_right_child(parent) = child_page_num;
+  } else {
+    /* Make room for the new cell */
+    for (uint32_t i = original_num_keys; i > index; i--) {
+      void* destination = internal_node_cell(parent, i);
+      void* source = internal_node_cell(parent, i - 1);
+      memcpy(destination, source, INTERNAL_NODE_CELL_SIZE);
+    }
+    *internal_node_child(parent, index) = child_page_num;
+    *internal_node_key(parent, index) = child_max_key;
+  }
+}

因为我们将最右边的孩子指和其他孩子键对分开存储,所以如果新的孩子要成为最右边的孩子,我们必须以一种不同的方式处理。

在我们的例子中,我们需要进入else块。首先,我们通过将其他单元格向右移动一个空间来为新单元腾出空间。(尽管我们的例子中没有单元格需要移动)

接下来我们将新的孩子指针和键写入确定的单元格。

为了减少测试用例的大小,我们先重新编码INTERNAL_NODE_MAX_CELLS

@@ -126,6 +126,8 @@ const uint32_t INTERNAL_NODE_KEY_SIZE = sizeof(uint32_t);
 const uint32_t INTERNAL_NODE_CHILD_SIZE = sizeof(uint32_t);
 const uint32_t INTERNAL_NODE_CELL_SIZE =
     INTERNAL_NODE_CHILD_SIZE + INTERNAL_NODE_KEY_SIZE;
+/* Keep this small for testing */
+const uint32_t INTERNAL_NODE_MAX_CELLS = 3;

我们的测试也要变为:

@@ -65,7 +65,7 @@ describe 'database' do
     result = run_script(script)
     expect(result.last(2)).to match_array([
       "db > Executed.",
-      "db > Need to implement updating parent after split",
+      "db > Need to implement splitting internal node",
     ])

我添加了另一个测试,测试非顺序的ID。

+  it 'allows printing out the structure of a 4-leaf-node btree' do
+    script = [
+      "insert 18 user18 [email protected]",
+      "insert 7 user7 [email protected]",
+      "insert 10 user10 [email protected]",
+      "insert 29 user29 [email protected]",
+      "insert 23 user23 [email protected]",
+      "insert 4 user4 [email protected]",
+      "insert 14 user14 [email protected]",
+      "insert 30 user30 [email protected]",
+      "insert 15 user15 [email protected]",
+      "insert 26 user26 [email protected]",
+      "insert 22 user22 [email protected]",
+      "insert 19 user19 [email protected]",
+      "insert 2 user2 [email protected]",
+      "insert 1 user1 [email protected]",
+      "insert 21 user21 [email protected]",
+      "insert 11 user11 [email protected]",
+      "insert 6 user6 [email protected]",
+      "insert 20 user20 [email protected]",
+      "insert 5 user5 [email protected]",
+      "insert 8 user8 [email protected]",
+      "insert 9 user9 [email protected]",
+      "insert 3 user3 [email protected]",
+      "insert 12 user12 [email protected]",
+      "insert 27 user27 [email protected]",
+      "insert 17 user17 [email protected]",
+      "insert 16 user16 [email protected]",
+      "insert 13 user13 [email protected]",
+      "insert 24 user24 [email protected]",
+      "insert 25 user25 [email protected]",
+      "insert 28 user28 [email protected]",
+      ".btree",
+      ".exit",
+    ]
+    result = run_script(script)

它的输出将是:

- internal (size 3)
  - leaf (size 7)
    - 1
    - 2
    - 3
    - 4
    - 5
    - 6
    - 7
  - key 1
  - leaf (size 8)
    - 8
    - 9
    - 10
    - 11
    - 12
    - 13
    - 14
    - 15
  - key 15
  - leaf (size 7)
    - 16
    - 17
    - 18
    - 19
    - 20
    - 21
    - 22
  - key 22
  - leaf (size 8)
    - 23
    - 24
    - 25
    - 26
    - 27
    - 28
    - 29
    - 30
db >

仔细观察你会发现一个问题:

    - 5
    - 6
    - 7
  - key 1

key应该是7而不是1。

这是由于一个指针的问题。

 uint32_t* internal_node_key(void* node, uint32_t key_num) {
-  return internal_node_cell(node, key_num) + INTERNAL_NODE_CHILD_SIZE;
+  return (void*)internal_node_cell(node, key_num) + INTERNAL_NODE_CHILD_SIZE;
 }

然后下一步一个是拆分内部节点了。

发布了136 篇原创文章 · 获赞 33 · 访问量 3万+

猜你喜欢

转载自blog.csdn.net/Radium_1209/article/details/104234598