The fourth summary of various problems encountered in the openstack project (migration experiment)

5. Experiment

 

5.1. Migration experiment

 

Cold Migration Experiment

 

1), suspend the cloud host

 

2), ssh login YUN-12 host

# cd /var/lib/nova/instances

# scp -rp dbaab72b-75c3-4dc5-99f2-95a579a315c5 root@test -compute:/var/lib/nova/instances

 

3), ssh login YUN-11 host to modify the database

# mysql

use nova;

update instances set host='YUN-12' where hostname='test1'

 

4), result verification

At this moment, you can see that the host to which the cloud host belongs has changed to test-compute

In the dashboard, select test1 "reply to cloud host", the result "state", "task" and "power state" are " Paused ", " None " and " No state " respectively

 

Another cold migration experiment

1), on YUN-19

 

# mysql

 

mysql> use nova;

 

mysql> update instances set host='YUN-20' where hostname='test1';

 

 

# scp -r a8814340-98d5-4ed3-b99b-32ee38cfb78f/ [email protected]:/var/lib/nova/instances/

2)、YUN-20

 

2.1)、[root@YUN-20 instances]# ll

total 20

drwxr-xr-x. 2 nova nova 4096 Apr 28 20:55 1c11a4b1-5df8-48f8-be5d-6e1c5efb7f99

drwxr-xr-x. 2 root root 4096 Apr 28 21:41 a8814340-98d5-4ed3-b99b-32ee38cfb78f

drwxr-xr-x. 2 nova nova 4096 Apr 28 20:55 _base

-rw-r--r--. 1 nova nova   29 Apr 28 21:28 compute_nodes

drwxr-xr-x. 2 nova nova 4096 Apr 23 23:49 locks

 

[root@YUN-20 instances]# chown -R nova:nova a8814340-98d5-4ed3-b99b-32ee38cfb78f/

 

[root@YUN-20 instances]# ll

total 20

drwxr-xr-x. 2 nova nova 4096 Apr 28 20:55 1c11a4b1-5df8-48f8-be5d-6e1c5efb7f99

drwxr-xr-x. 2 nova nova 4096 Apr 28 21:41 a8814340-98d5-4ed3-b99b-32ee38cfb78f

drwxr-xr-x. 2 nova nova 4096 Apr 28 20:55 _base

-rw-r--r--. 1 nova nova   29 Apr 28 21:28 compute_nodes

drwxr-xr-x. 2 nova nova 4096 Apr 23 23:49 locks

 

2.2)、网桥

# brctl addbr br0

# brctl add if br0 eth2

 

注:

eth2 192.168.0.20

 

结果 网络无法连通

 

做下面的操作

# brctl delbr br0

 

重启网络后主机连通

 

注:

brctl命令

 

brctl show  查看网桥

 

[root@YUN-20 a8814340-98d5-4ed3-b99b-32ee38cfb78f]# virsh define libvirt.xml

Domain instance-00000001 defined from libvirt.xml

 

[root@YUN-20 a8814340-98d5-4ed3-b99b-32ee38cfb78f]# virsh start instance-00000001

error: Failed to start domain instance-00000001

error: Cannot get interface MTU on 'qbr95221104-b9': No such device

 

[root@YUN-20 a8814340-98d5-4ed3-b99b-32ee38cfb78f]# brctl addbr qbr95221104-b9

[root@YUN-20 a8814340-98d5-4ed3-b99b-32ee38cfb78f]# brctl show

bridge name bridge id  STP enabled interfaces

qbr482b0524-26  8000.ea9b0ced7d50 no  qvb482b0524-26

tap482b0524-26

qbr95221104-b9  8000.000000000000 no  

show  8000.000000000000 no  

virbr0  8000.525400d2ae89 yes  virbr0-nic

 

[root@YUN-20 a8814340-98d5-4ed3-b99b-32ee38cfb78f]# virsh define libvirt.xml

Domain instance-00000001 defined from libvirt.xml

 

[root@YUN-20 a8814340-98d5-4ed3-b99b-32ee38cfb78f]# virsh start instance-00000001

Domain instance-00000001 started

 

2.3)、启动

发现云主机启动成功,但是其他机器无法PING通这台机器

 

进入控制台,发现无法进入系统,出现下面所示的错误

BIOS EDD facility 0 devices found

EDD information not available

Freeing unused kernel memory:900k freed

 

2.4)、发现YUN-19上还存在在YUN-20上创建的网桥

 

[root@YUN-19 ~(keystone_admin)]# brctl show

bridge name bridge id  STP enabled interfaces

qbr7a2e6ef4-55  8000.1e1edf473784 no  qvb7a2e6ef4-55

tap7a2e6ef4-55

qbr95221104-b9  8000.325dbda87640 no  qvb95221104-b9

qbra1cf60e8-36  8000.16f46f1ed7f8 no  qvba1cf60e8-36

tapa1cf60e8-36

show  8000.000000000000 no

 

删除不了,因为正在使用

[root@YUN-19 ~(keystone_admin)]# brctl delbr qbr95221104-b9

bridge qbr95221104-b9 is still up; can't delete it

 

关闭网桥

# ifconfig qbr95221104-b9 down

 

再次删除

[root@YUN-19 ~(keystone_admin)]# brctl delbr qbr95221104-b9

 

2.5)、关闭实例,重启系统

 

启动之后,启动实例,发现还是无法进入系统,错误相同

 

2.6)、解决问题

[root@YUN-20 ~]# ifconfig qbr95221104-b9 down

[root@YUN-20 ~]# brctl delbr qbr95221104-b9

[root@YUN-20 ~]# brctl show

bridge name bridge id  STP enabled interfaces

qbr482b0524-26  8000.1ec880fdff13 no  qvb482b0524-26

tap482b0524-26

virbr0  8000.525400d2ae89 yes  virbr0-nic

[root@YUN-20 ~]# brctl addbr qbr95221104-b9

[root@YUN-20 ~]# brctl show

bridge name bridge id  STP enabled interfaces

qbr482b0524-26  8000.1ec880fdff13 no  qvb482b0524-26

tap482b0524-26

qbr95221104-b9  8000.000000000000 no  

virbr0  8000.525400d2ae89 yes  virbr0-nic

 

2.7)、云主机ssh连不上

在控制节点进入控制台,发现屏幕背景是黑色的,在有下面出现一个弹窗

提示The configuration defaults for GNOME Power Manager have not been installed correctly.Please contact your computertor”。

 

再次重启进入桌面,进入命令行界面,查看系统空间信息,发现根下面被完全占用。

 

解决办法:

网上的解决办法:

1、On login Screen,press Control+Alt+F2

2、Remove same files or folders

3、Check the permissiions on your /tmp folder or just set them to: sudo chmod 0777 /tmp

4、Execute the command: reboot

但是执行完上边的操作后,ssh还是连不上系统。

 

执行下面的操作

yum remove and re-install gnome-power-manager

 

reboot

 

之后发现系统可以通过SSH连接

主机正常

 

2.8)、确定镜像文件在拷贝到远程的主机之前是否需要转换一下格式

YUN-11所在的集群实例迁移到YUN-19所在集群

 

YUN-17上的实例做实验

 

cirros的镜像创建的实例为例

直接把实例目录下的disk文件拷贝到远程主机上

然后添加镜像,镜像格式化为qcow2

之后再dash中从该镜像启动实例,结果失败,状态为“Error

 

在拷贝disk文件之前把镜像各是转换为qcow格式

拷贝后添加镜像,格式化为qcow2

之后再dash中从该镜像启动实例,结果启动成功,但是在随后绑定浮动IP后,结果在外部的机器无法PING通该实例的浮动IP

 

YUN-11所在的集群内做迁移

 

直接把实例目录下的disk文件拷贝到远程主机上

然后添加镜像,镜像格式化为qcow2

之后再dash中从该镜像启动实例,结果失败,状态为“Error

 

把上面转换为qcow格式的镜像拷贝到YUN-11上,在做之后的操作,最后发现外部主机可以PING通浮动IP

 

从这可以看出是YUN-19所在集群网络的问题,因为这两个集群网路相同

YUN-19集群dash中创建实例,在YUN-11所在的集群上的实例无法pingYUN-19所在集群上实例的内网IP,绑定浮动IP后也无法PING

 

事实证明的确需要装换镜像格式

 

 

 

六、问题

6.1、创建网桥和扩展计算节点的先后顺序颠倒之后会不会产生意外的后果?

我在日常的部署中两个顺序在颠倒的情况下暂时没有发现意外的后果,有待测试

 


Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325515487&siteId=291194637