文章目录
前言
上一篇文章笔者写了关于HDFS使用SkipList跳表的结构来加速Snapshot的diff比较过程,然后加速HDFS大Snapshot删除的过程(此部分文章可阅读上篇博文:聊聊HDFS删除Snapshot行为导致的NameNode crash)。本文笔者想继续聊聊这个跳表结构,简单说就是构造多链表层级结构,利用(数据存储空间)空间来换取(数据查找效率)时间的一个数据结构用例。
SkipList样例结构
为了方便SkipList的样例讲解,本文设计实现的简单SkipList的内部结构如下所示,以插入时间排序的链表。
跳表的简单实现,通过维护多层级链表的形式,加速节点的查询,通过内存中维护更多
的节点信息来换取查询的速度。
跳表结构如下所示,基于time-based排序的链表结构.
level 4: head----------------------------------s9->NULL
level 3: head----------------->s5--------------s9->NULL
level 2: head------s2--------->s5--------------s9->NULL
level 1: head->s1->s2->s3->s4->s5->s6->s7->s8->s9->NULL
在以上每个数据项内,可以包含实际对象的信息。
SkipList样例代码简单实现
在笔者对SkipList的代码实现中,总共包含3个类:
- SkipListNode:跳表内部节点实例
- DiffSkipList:跳表类,供外部调用使用的类
- TestSkipListNode:对于跳表类的功能测试类
此样例的实现思路参考了HDFS Snapshot删除行为中实现的跳表结构类DiffListBySkipList。
下面依次是实现代码:
SkipListNode.java
/**
* 跳表内部节点实现.
*/
public class SkipListNode {
/** 跳表节点id */
private int id;
/** 节点所代表的值 */
private int value;
/**
* 跳表节点对应不同level层级的下一节点信息
*/
private SkipDiff[] skipDiff;
public SkipListNode(int id, int value, int level) {
super();
this.id = id;
this.value = value;
this.skipDiff = new SkipDiff[level];
}
public void setSkipDiff(int level, SkipDiff diff) {
this.skipDiff[level - 1] = diff;
}
public void setNextNode(int level, SkipListNode nextNode) {
// 计算当前节点和下一节点的状态差值
int diff = (nextNode == null) ? -1 : Math.abs(nextNode.value - this.value);
SkipDiff skipDiff = new SkipDiff(diff, nextNode);
// 设置level级别对应的下一节点值
setSkipDiff(level, skipDiff);
}
public int getId() {
return this.id;
}
public int getValue() {
return this.value;
}
public SkipListNode getNextNode(int level) {
return this.skipDiff[level - 1] == null ? null
: this.skipDiff[level - 1].nextNode;
}
public int getDiff(int level) {
return this.skipDiff[level - 1] == null ? -1
: this.skipDiff[level - 1].diff;
}
public class SkipDiff {
/** 当前节点与下一节点的diff值 */
private int diff;
/** 一个level层级的下一跳表节点 */
private SkipListNode nextNode;
public SkipDiff(int diff, SkipListNode nextNode) {
this.diff = diff;
this.nextNode = nextNode;
}
}
}
DiffSkipList.java
import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;
/**
* 跳表的简单实现,通过维护多层级链表的形式,加速节点的查询,通过内存中维护更多
* 的节点信息来换取查询的速度。
* 跳表结构如下所示,基于time-based排序的链表结构.
* level 4: head----------------------------------s9->NULL
* level 3: head----------------->s5--------------s9->NULL
* level 2: head------s2--------->s5--------------s9->NULL
* level 1: head->s1->s2->s3->s4->s5->s6->s7->s8->s9->NULL
*
*/
public class DiffSkipList {
/** 跳表的层级数 */
private int level;
/** 跳表的头节点 */
private SkipListNode head;
public DiffSkipList(int level) {
// 初始化跳表层级数以及head节点
this.level = level;
this.head = new SkipListNode(-1, 0, level);
}
/**
* 添加节点到跳表中.
* @param addedNode 待添加节点
*/
public void addNode(SkipListNode addedNode) {
// 1)随机产生一个需要添加到的level值
final Random r = ThreadLocalRandom.current();
int level = r.nextInt(this.level);
// 2)此level值以及其下level链表都将添加此节点
for (int i = level + 1; i >= 1; i--) {
SkipListNode curNode = head;
// 3)找到每个level链表内的末尾节点
while (curNode.getNextNode(i) != null) {
curNode = curNode.getNextNode(i);
}
// 4)进行下一节点的重置
curNode.setNextNode(i, addedNode);
}
}
/**
* 从跳表中移除某节点
* @param removedNode 待移除节点
*/
public void removeNode(SkipListNode removedNode) {
// 1)从高层级level开始寻找遍历
for (int i = level; i >= 1; i--) {
SkipListNode curNode = head;
// 2)从头节点开始,遍历查找每个level对应链表内是否存在目标节点
while (curNode != null && !removedNode.equals(curNode.getNextNode(i))) {
curNode = curNode.getNextNode(i);
}
// 3)如果找到此节点,重置其上一节点的下一节点为待移除节点的下一节点
if (curNode != null) {
curNode.setNextNode(i, removedNode.getNextNode(i));
}
}
}
public int queryNodeValue(int nodeId) {
System.out.println("Start to find target node s" + nodeId);
for (int i = level; i >= 1; i--) {
SkipListNode curNode = head.getNextNode(i);
while (curNode != null && curNode.getId() != nodeId) {
curNode = curNode.getNextNode(i);
}
if (curNode != null) {
System.out.println("Find target node " + nodeId + " on Level" + i);
return curNode.getValue();
}
}
return -1;
}
public SkipListNode getHeadNode() {
return this.head;
}
}
TestSkipListNode.java
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;
import org.junit.Before;
import org.junit.Test;
/**
* 跳表结构功能测试类.
*/
public class TestSkipListNode {
private int level;
private List<SkipListNode> arrayListNode;
private DiffSkipList skipListNode;
@Before
public void setup() {
this.level = 5;
this.arrayListNode = new ArrayList<>();
this.skipListNode = new DiffSkipList(level);
}
@Test
public void testConstructAndQuerySkipListNode() {
int numNode = 10;
Random r = ThreadLocalRandom.current();
for (int i = 0; i < numNode; i++) {
SkipListNode node = new SkipListNode(i, r.nextInt(100), level);
this.skipListNode.addNode(node);
this.arrayListNode.add(node);
}
printSkipListNode(this.skipListNode, false);
printSkipListNode(this.skipListNode, true);
this.skipListNode.queryNodeValue(7);
}
@Test
public void testRemoveSkipListNode() {
int numNode = 10;
Random r = ThreadLocalRandom.current();
for (int i = 0; i < numNode; i++) {
SkipListNode node = new SkipListNode(i, r.nextInt(100), level);
this.skipListNode.addNode(node);
this.arrayListNode.add(node);
}
printSkipListNode(this.skipListNode, false);
System.out.println("Start to remove node");
SkipListNode removedNode = arrayListNode.get(4);
this.skipListNode.removeNode(removedNode);
System.out.println("Removed node s" + removedNode.getId());
printSkipListNode(this.skipListNode, false);
printSkipListNode(this.skipListNode, true);
}
private void printSkipListNode(DiffSkipList skipListNode, boolean printDiff) {
System.out.println("Print SkipListNode with value,diff printed: " + printDiff);
StringBuilder sbuilder = new StringBuilder();
SkipListNode head = skipListNode.getHeadNode();
for (int i = level; i >= 1; i--) {
sbuilder.append("Level").append(i).append(": head");
SkipListNode curNode = head.getNextNode(i);
while (curNode != null) {
sbuilder.append("->s").append(curNode.getId());
if (printDiff) {
sbuilder.append("(v:").append(curNode.getValue()).append(",d:")
.append(curNode.getDiff(i)).append(")");
}
curNode = curNode.getNextNode(i);
}
sbuilder.append("->NULL\n");
}
System.out.println(sbuilder.toString());
System.out.println();
}
}
在以上实现中,笔者在SkipListNode内部定义了diff值来比较前后节点信息之间的状态差异,diff值等于前后node的value绝对值。
测试结果输出如下:
testConstructAndQuerySkipListNode
Print SkipListNode with value,diff printed: false
Level5: head->s2->NULL
Level4: head->s2->s5->s8->s9->NULL
Level3: head->s0->s2->s4->s5->s7->s8->s9->NULL
Level2: head->s0->s1->s2->s4->s5->s6->s7->s8->s9->NULL
Level1: head->s0->s1->s2->s3->s4->s5->s6->s7->s8->s9->NULL
Print SkipListNode with value,diff printed: true
Level5: head->s2(v:41,d:-1)->NULL
Level4: head->s2(v:41,d:36)->s5(v:5,d:58)->s8(v:63,d:22)->s9(v:85,d:-1)->NULL
Level3: head->s0(v:55,d:14)->s2(v:41,d:41)->s4(v:0,d:5)->s5(v:5,d:66)->s7(v:71,d:8)->s8(v:63,d:22)->s9(v:85,d:-1)->NULL
Level2: head->s0(v:55,d:17)->s1(v:72,d:31)->s2(v:41,d:41)->s4(v:0,d:5)->s5(v:5,d:34)->s6(v:39,d:32)->s7(v:71,d:8)->s8(v:63,d:22)->s9(v:85,d:-1)->NULL
Level1: head->s0(v:55,d:17)->s1(v:72,d:31)->s2(v:41,d:26)->s3(v:15,d:15)->s4(v:0,d:5)->s5(v:5,d:34)->s6(v:39,d:32)->s7(v:71,d:8)->s8(v:63,d:22)->s9(v:85,d:-1)->NULL
Start to find target node s7
Find target node 7 on Level3
在对SkipList数值查找时,是从高层级往低层级查找,这样能加速查找的效率,因为不同level级别的链表,节点直接的“间距”是逐级降低的。
testRemoveSkipListNode
Print SkipListNode with value,diff printed: false
Level5: head->s0->s4->s6->s9->NULL
Level4: head->s0->s2->s3->s4->s6->s9->NULL
Level3: head->s0->s2->s3->s4->s6->s7->s9->NULL
Level2: head->s0->s1->s2->s3->s4->s5->s6->s7->s8->s9->NULL
Level1: head->s0->s1->s2->s3->s4->s5->s6->s7->s8->s9->NULL
Start to remove node
Removed node s4
Print SkipListNode with value,diff printed: false
Level5: head->s0->s6->s9->NULL
Level4: head->s0->s2->s3->s6->s9->NULL
Level3: head->s0->s2->s3->s6->s7->s9->NULL
Level2: head->s0->s1->s2->s3->s5->s6->s7->s8->s9->NULL
Level1: head->s0->s1->s2->s3->s5->s6->s7->s8->s9->NULL
Print SkipListNode with value,diff printed: true
Level5: head->s0(v:27,d:69)->s6(v:96,d:76)->s9(v:20,d:-1)->NULL
Level4: head->s0(v:27,d:37)->s2(v:64,d:45)->s3(v:19,d:77)->s6(v:96,d:76)->s9(v:20,d:-1)->NULL
Level3: head->s0(v:27,d:37)->s2(v:64,d:45)->s3(v:19,d:77)->s6(v:96,d:46)->s7(v:50,d:30)->s9(v:20,d:-1)->NULL
Level2: head->s0(v:27,d:22)->s1(v:5,d:59)->s2(v:64,d:45)->s3(v:19,d:76)->s5(v:95,d:1)->s6(v:96,d:46)->s7(v:50,d:5)->s8(v:55,d:35)->s9(v:20,d:-1)->NULL
Level1: head->s0(v:27,d:22)->s1(v:5,d:59)->s2(v:64,d:45)->s3(v:19,d:76)->s5(v:95,d:1)->s6(v:96,d:46)->s7(v:50,d:5)->s8(v:55,d:35)->s9(v:20,d:-1)->NULL
引用
[1].https://github.com/apache/hadoop/blob/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffListBySkipList.java