1 Oracle11g RAC管理与维护总结-待更新

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/oradbm/article/details/85318546

1 Oracle11g RAC管理与维护

2 锁表处理

select T1.INST_ID,

t2.sid,

       t2.SERIAL#,

       t2.CLIENT_INFO,

       t3.OBJECT_NAME,

       t2.STATUS,

       t2.PROGRAM,

       t4.SQL_FULLTEXT,

       'ALTER SYSTEM KILL SESSION '||''''||T2.SID||','||T2.SERIAL#||',@'||T1.INST_ID||''''||' IMMEDIATE;' SQL_EXEC

  from gv$locked_object t1, gv$session t2, dba_objects t3,v$sql t4

 where t1.SESSION_ID = t2.SID

   and t1.INST_ID=t2.INST_ID

   and t1.OBJECT_ID = t3.OBJECT_ID

   and t2.sql_id=t4.SQL_ID(+)

   AND T2.STATUS='INACTIVE';

3 禁用资源

1. Stop and disable ora.crf resource.

2. On each node, as root user:

# <GI_HOME>/bin/crsctl stop res ora.crf -init

# <GI_HOME>/bin/crsctl modify res ora.crf -attr ENABLED=0 -init

4 手工启动has

nohup /etc/init.d/ohasd run &

--重启HAS进程 

针对于启动has无法自动启动has的agent进行启动集群情况。

kill -HUP ohas进程ID

5 RAC状态检查

##集群名称##

cemutlo –n

crs_stat –v –t

检查集群栈状态

crsctl stop cluster –all

crsctl start cluster –all

crsctl check cluster –all  --关闭crs css evm相关所有资源

crsctl stop has 关闭整个ha

srvctl status nodeapps

srvctl config scan

srvctl status scan

srvctl status scan_listener

srvctl config vip -n rac01

srvctl status asm -a

srvctl status diskgroup -g data

crsctl status res  -t

crsctl status res  -t  -init

crsctl check ctss

crsctl check crs

ocrcheck

crsctl query css votedisk

6 维护命令

./crsctl start res ora.crsd –init

crs_stat -t -v ora.registry.acfs 

crsctl start resource ora.cssd –init

将节点1的scan监听重新分配,则分配到节点2上面,节点2上面跑3个scan listener

crsctl relocate res ora.LISTENER_SCAN1.lsnr -f

[grid@myrac01 ~]$ crsctl start res ora.prod.db   =srvctl start database –d prod

CRS-2672: Attempting to start 'ora.prod.db' on 'myrac01'

CRS-2672: Attempting to start 'ora.prod.db' on 'myrac02'

CRS-2676: Start of 'ora.prod.db' on 'myrac01' succeeded

CRS-2676: Start of 'ora.prod.db' on 'myrac02' succeeded

[grid@myrac01 ~]$ crsctl status res ora.prod.db

appvipcfg create -network=1 \

> -ip=192.168.1.150 \

> -vipname=oggvip \

> -user=root

crsctl setperm resource oggvip -u user:oracle:r-x 

#./crsctl add resource oggapp -type cluster_resource -attr "ACTION_SCRIPT=/ogg/action/ogg_action.scr,CHECK_INTERVAL=30, START_DEPENDENCIES='hard(oggvip,ora.asm) pullup(oggvip)', STOP_DEPENDENCIES='hard(oggvip)'"

#./crsctl status resource oggapp

#./crsctl setperm resource oggapp -o oracle

#./crsctl start resource oggapp

#./crsctl relocate resource oggapp -f  --重新分配

#./crsctl delele res oggapp –f

#./crsctl delete res oggvip –f 

crsctl start res ora.cluster_interconnect.haip –init

crsctl status res ora.cluster_interconnect.haip –f -init

./crsctl modify res ora.cluster_interconnect.haip -attr "ENABLED=1" –init   --init初始化进程如果加上-init才能看见haip啊

crsctl status res ora.cluster_interconnect.haip -init -f

7 删除会话

select INST_ID,sid,serial# from gv$session where status='INACTIVE' AND TYPE='USER';

ALTER SYSTEM KILL SESSION '57,243,@1' immediate;

8 日志清单

 $GRID_HOME/log这个是每个集群节点为ORACLE集群以及ASM存储跟踪和诊断日志的目录。

 仅包含本地节点上ORACLE集群和ASM存储跟踪诊断日志文件

9 RAC DNS 配置

Oracle 11g RAC 集群中引入了SCAN(Single Client Access Name)的概念,也就是指集群的单客户端访问名称。SCAN 这个特性为客户端提供了单一的主机名,用于访问集群中运行的 oracle 数据库。如果您在集群中添加或删除节点,使用 SCAN 的客户端无需更改自己的 TNS 配置。无论集群包含哪些节点,SCAN 资源及其关联的 IP 地址提供了一个稳定的名称供客户端进行连接使用。在Oracle 11g grid 安装时即要求为该特性配置DNS解析方式或GNS解析方式。本文描述了安装Oracle 11g grid时的DNS配置。

9.1 基本信息

本文件将配置DNS服务器,并且测试oracle11Gr2下使用SCAN和DNS配置网络连接。

本文件涉及到的机器

  DNS机器 集群节点1 集群节点1 客户端

OS Rhel63 Rhel63 Rhel63 Win7

IP 192.168.114.138 IP:192.168.114.141

VIP:192.168.114.143

Priv:172.168.114.141 IP:192.168.114.142

VIP:192.168.114.144

Priv:172.168.114.142 192.168.114.127

SCAN IP:  192.168.114.145

192.168.114.146

192.168.114.147

# cat /etc/issue

Red Hat Enterprise Linux Server release 6.3(Santiago)

Kernel \r on an \m

SQL> select * from V$version;

BANNER

--------------------------------------------------------------------------------

Oracle Database 11g Enterprise EditionRelease 11.2.0.4.0 - 64bit Production

PL/SQL Release 11.2.0.4.0 - Production

CORE   11.2.0.4.0      Production

TNS for linux: Version 11.2.0.4.0 -Production

NLSRTL Version 11.2.0.4.0 - Production

9.2 DNS服务器配置

 安装相应的包

[root@dns ~]#  yum install bind*

[root@dns ~]# cat /etc/resolv.conf

search localdomain ---客户端书写域名

nameserver 192.168.205.130

保证resolv.conf不会自动修改

[root@dns ~]#chattr +i /etc/resolv.conf

 R6

 修改named.conf

[root@dns ~]# vi /etc/named.conf

修改:

 listen-on port 53 {any; };

    allow-query     { any; }; --也可以是具体IP段,如192.168.110.0/24;

并为配置简单,将一些安装相关的用//注释掉

options {

       listen-on port 53 {any; };

       listen-on-v6 port 53 { ::1; };

       directory      "/var/named";

       dump-file      "/var/named/data/cache_dump.db";

       statistics-file "/var/named/data/named_stats.txt";

       memstatistics-file "/var/named/data/named_mem_stats.txt";

       allow-query     { any; };

       recursion yes;

//     dnssec-enable yes;

//     dnssec-validation yes;

//      dnssec-lookaside auto;

       /* Path to ISC DLV key */

//     bindkeys-file "/etc/named.iscdlv.key";

//     managed-keys-directory "/var/named/dynamic";

};

logging {

       channel default_debug {

                file"data/named.run";

                severity dynamic;

       };

};

zone "." IN {

       type hint;

       file "named.ca";

};

include"/etc/named.rfc1912.zones";

//include "/etc/named.root.key";

named.conf中只有“.”区域,其他的在/etc/named.rfc1912.zones中配置

 修改/etc/named.rfc1912.zones

在/etc/named.rfc1912.zones中创建正向反向区域

[root@dns ~]# vi /etc/named.rfc1912.zones

加入下面两部分

zone "clgrac.com"IN {

       type master;

       file "clgrac.com.zone";

       allow-update { none; };

};

zone "214.168.192.in-addr.arpa"IN {

       type master;

       file "192.168.214.arpa";

       allow-update { none; };

};

 创建正反向解析文件

[root@dns ~]# cd /var/named/

[root@dns named]# cp -p named.localhost clgrac.com.zone

[root@dns named]# cp -p named.localhost 192.168.214.arpa

¥##############################R6 ############################

修改正向解析:

[root@dns named]# vi  /var/named/clgrac.com.zone

$TTL 5M

@      IN SOA  @ rname.invalid. (

                                        0       ; serial

                                        1D      ; refresh

                                        1H      ; retry

                                        1W      ; expire

                                        3H)    ; minimum

       NS      @

       A       192.168.205.130

       AAAA    ::1

dr01    A       192.168.205.130

@ MX   5       mail.localdomain

$GENERATE 10-250        stu$ A 192.205.130.$

修改反向解析:

[root@dns ~]# vi /var/named/192.168.214.arpa

$TTL 1D

@      IN SOA  @  rname.invalid. (

                                        0       ; serial

                                        1D      ; refresh

                                        1H     ; retry

                                        1W      ; expire

                                        3H)    ; minimum

       NS      dgtarget.com.

       A       127.0.0.1

       AAAA    ::1

130   PTR     dr01.dgtarget.com

 启动named

[root@dns named]# /etc/rc.d/init.d/named restart

Stopping named:                                           [  OK  ]

Starting named:                                            [  OK  ]

######################################R7##################################

[root@rac01 named]# vi clgrac.com.zone 

$TTL 1D

@       IN SOA  clgrac.com. rname.invalid. (

                                        0       ; serial

                                        1D      ; refresh

                                        1H      ; retry

                                        1W      ; expire

                                        3H )    ; minimum

        NS      clgrac.com.

        A       192.168.214.129

        AAAA    ::1

rac01   IN A  192.168.214.129

myscan  IN A  192.168.214.133

myscan  IN A  192.168.214.134

myscan  IN A  192.168.214.135

[root@rac01 named]# more 192.168.214.arpa 

$TTL 1D

@       IN SOA  clgrac.com. rname.invalid. (

                                        0       ; serial

                                        1D      ; refresh

                                        1H      ; retry

                                        1W      ; expire

                                        3H )    ; minimum

           NS      clgrac.com.

129    IN  PTR     rac01.clgrac.com.

133    IN  PTR     myscan.clgrac.com.

134    IN  PTR     myscan.clgrac.com.

135    IN  PTR     myscan.clgrac.com.

Systemctl enable named

 检查

[root@dns ~]#  netstat -anp|grep :53

tcp       0      0 192.168.114.138:53          0.0.0.0:*                   LISTEN      2104/named         

tcp       0      0 127.0.0.1:53                0.0.0.0:*                   LISTEN      2104/named         

tcp       0      0 ::1:53                      :::*                        LISTEN      2104/named         

udp       0      0 0.0.0.0:5353                0.0.0.0:*                               1930/avahi-daemon  

udp       0      0 172.168.146.138:53          0.0.0.0:*                               2104/named         

udp       0      0 192.168.114.138:53          0.0.0.0:*                               2104/named         

udp       0      0 127.0.0.1:53                0.0.0.0:*                               2104/named         

udp       0      0 ::1:53                      :::*                                    2104/named      

9.3 集群使用scan和DNS访问

 修改集群两节点的resolv.conf

其他linux访问时只需要修改:

[root@racnode2 grid]# vi /etc/resolv.conf

search  racn1.pera.com

nameserver 192.168.114.138

保证resolv.conf不会自动修改

chattr +i /etc/resolv.conf

 集群节点hosts文件

[root@racn2 ~]# more /etc/hosts

127.0.0.1  localhost localhost.pera.com localhost4 localhost4.pera.com4

::1        localhost localhost.pera.com localhost6 localhost6.pera.com6

192.168.114.141         racn1.pera.com  racn1

192.168.114.142         racn2.pera.com  racn2

192.168.114.143         racn1-vip.pera.com      racn1-vip

192.168.114.144         racn2-vip.pera.com      racn2-vip

172.168.1.141   racn1-priv.pera.com     racn1-priv

172.168.1.142   racn2-priv.pera.com     racn2-priv

192.168.114.145  racn-cluster-scan.pera.com  racn-cluster-scan

192.168.114.146  racn-cluster-scan.pera.com  racn-cluster-scan

192.168.114.147  racn-cluster-scan.pera.com  racn-cluster-scan

在保证以前内容正确的条件进行集群安装或修改现在集群,保证scan ip的访问正常(本文不做讨论)

可参考:http://blog.csdn.NET/bamuta/article/details/24410801

--chkconfig –list named

--chkconfig named on

 在客户端连接该集群

Windows下进行连接:

首先需要,配好DNS:

  

修改tnsnames.ora

vmpera =

 (DESCRIPTION =

   (ADDRESS_LIST =

     (ADDRESS = (PROTOCOL = TCP)(HOST = racn-cluster-scan.pera.com)(PORT =1521))

    )

   (CONNECT_DATA =

      (SERVICE_NAME = pera)

    )

  )

另外需要修改hosts文件把加入以下内容

192.168.114.127  localhost

--经测试,加入本网段内任一IP都可以,不管能不能ping通。

进行连接

SQL> conn system/oracle@vmpera

已连接。

简单测试具有oad_balance功能,在不同的连接中连接到的实例不同。

10 RAC调整资源

10.1 调整SCAN

为了修改scan,首先要保证SCAN所对应的主机名称和IP地址记录在DNS中,并且在操作系统中应该通过nslookup命令对这个名称解析进行测试。

$srvctl config scan

 调整SCAN配置

srvctl config scan

srvctl status scan

srvctl stop scan_listener   --要先停  

---关闭scan listener方法

srvctl stop scan_listener –i 1 -n

srvctl stop scan

srvctl stop listener 不加-l选项默认关闭全部监听程序。

srvctl status scan_listener

crs_stat -t | grep  scan

[root@rac01 bin]# ./srvctl modify scan -n myrac-scan

[root@rac01 bin]# ./srvctl config scan

SCAN name: myrac-scan, Network: 1/192.168.205.0/255.255.255.0/eth0

SCAN VIP name: scan1, IP: /myrac-scan/192.168.205.135

SCAN VIP name: scan2, IP: /myrac-scan/192.168.205.133

SCAN VIP name: scan3, IP: /myrac-scan/192.168.205.134

集群状态如下

但是此时只有在节点2上面有listener_scan1,添加listener_scan2

[root@db1 bin]# ./srvctl modify scan_listener -u   

------添加scanIP的监听,若不执行该命令,将只有一个listener_scan1监听,执行后将根据scan 

srvctl start scan

srvctl start scan_listener

验证

[grid@rac01 ~]$ ps -ef|grep tns|grep -v grep

root         10      2  0 19:52 ?        00:00:00 [netns]

grid       4047      1  0 19:55 ?        00:00:00 /u01/app/11.2.0/grid/bin/tnslsnr LISTENER -inherit

grid      10428      1  0 20:43 ?        00:00:00 /u01/app/11.2.0/grid/bin/tnslsnr LISTENER_SCAN2 -inherit

grid      10457      1  0 20:43 ?        00:00:00 /u01/app/11.2.0/grid/bin/tnslsnr LISTENER_SCAN3 –inherit

lsnrctl status listener_scan3

发现2个节点如果配置3个scan ip则扔有3个SCAN LISTENER。

http://www.askmaclean.com/archives/11gr2-rac-add-listener-static-register.html

10.2 调整SCAN到DNS

 DNS服务端配置

########################R6####################

1 写入dns

vi  /var/named/dgtarget.com.zone

$TTL 5M

@      IN SOA  dgtarget.com. rname.invalid. (

                                        0       ; serial

                                        1D      ; refresh

                                        1H      ; retry

                                        1W      ; expire

                                        3H)    ; minimum

       NS      dgtarget.com.

       A       192.168.205.130

       AAAA    ::1

dr01    A       192.168.205.130

myrac-scan A    192.168.205.133

myrac-scan A    192.168.205.134

myrac-scan A    192.168.205.135

@ MX   5       mail.localdomain

$GENERATE 10-250        stu$ A 192.205.130.$

vi /var/named/192.168.205.arpa

$TTL 1D

@      IN SOA  dgtarget.com.  rname.invalid. (

                                        0       ; serial

                                        1D      ; refresh

                                        1H     ; retry

                                        1W      ; expire

                                        3H)    ; minimum

       NS      dgtarget.com.

       A       127.0.0.1

       AAAA    ::1

130   PTR     dr01.dgtarget.com

133   PTR     myrac-scan.dgtarget.com.

134   PTR     myrac-scan.dgtarget.com.

135   PTR     myrac-scan.dgtarget.com.

重启dns服务

/etc/rc.d/init.d/named restart

##################################R7##################################

[root@rac01 named]# vi clgrac.com.zone 

$TTL 1D

@       IN SOA  clgrac.com. rname.invalid. (

                                        0       ; serial

                                        1D      ; refresh

                                        1H      ; retry

                                        1W      ; expire

                                        3H )    ; minimum

        NS      clgrac.com.

        A       192.168.214.129

        AAAA    ::1

rac01   IN A  192.168.214.129

myscan  IN A  192.168.214.133

myscan  IN A  192.168.214.134

myscan  IN A  192.168.214.135

[root@rac01 named]# more 192.168.214.arpa 

$TTL 1D

@       IN SOA  clgrac.com. rname.invalid. (

                                        0       ; serial

                                        1D      ; refresh

                                        1H      ; retry

                                        1W      ; expire

                                        3H )    ; minimum

           NS      clgrac.com.

129    IN  PTR     rac01.clgrac.com.

133    IN  PTR     myscan.clgrac.com.

134    IN  PTR     myscan.clgrac.com.

135    IN  PTR     myscan.clgrac.com.

 节点修改dns

chkconfig named on

rpm -ivh bind-9.8.2-0.10.rc1.el6.x86_64.rpm

rpm -ivh bind-9.8.2-0.10.rc1.el6.x86_64.rpm

rpm -ivh bind-chroot-9.8.2-0.10.rc1.el6.x86_64.rpm

chkconfig named on

vi  /etc/resolv.conf

search dgtarget.com

nameserver 192.168.205.130

/etc/hosts

chattr +i /etc/resolv.conf

#192.168.205.133 myrac-scan.dgtarget.com myrac-scan

#192.168.205.134 myrac-scan.dgtarget.com myrac-scan

#192.168.205.135 myrac-scan.dgtarget.com myrac-scan

10.3 调整VIP

[grid@rac01 ~]$ srvctl config vip -n rac01

如果需要修改VIP地址,需要停止数据库service的运行,service是OCR的一种资源,它的功能是提供数据库的高可用性

srvctl stop service –d orcl –s orcl –n rac01

srvctl stop vip –n rac01

# srvctl modify nodeapps –n rac01 –A 192.168.205.189/255.255.255.0/en0

srvctl start  vip –n rac01

srvctl start service –d orcl –s orcl –n rac01

---测试案例

[grid@rac01 ~]$ srvctl stop vip -n rac01

PRCR-1014 : Failed to stop resource ora.rac01.vip

PRCR-1065 : Failed to stop resource ora.rac01.vip

CRS-2529: Unable to act on 'ora.rac01.vip' because that would require stopping or relocating 'ora.LISTENER.lsnr', but the force option was not specified

--- 关闭listener

srvctl stop listener -l listener

srvctl stop listener -l listener_prod

srvctl stop vip -n rac01

--调整hosts文件

192.168.205.136 rac01-vip  将地址从131修改为136

192.168.205.132 rac02-vip

--修改

[root@rac01 bin]# ./srvctl modify nodeapps -n rac01 -A 192.168.205.136/255.255.255.0/eth0

--确认

[root@rac01 bin]# ./srvctl config vip -n rac01

VIP exists: /192.168.205.136/192.168.205.136/192.168.205.0/255.255.255.0/eth0, hosting node rac01

./srvctl start vip -n rac01

./srvctl start listener -l listener

./srvctl start listener -l listener_prod

10.4 调整public和private网络

 对于RAC所使用的私有网络和公共网络,我们可以修改它们的IP地址、子网掩码、以及所使用网卡的名称。通过oifcfg命令可以查看和调整RAC的网络配置。

Root

 1 查看

 oifcfg getif

在上述结果中public表示公共网络,global表示使用的网卡名称以及子网掩码相同,

2 修改public

$./oifcfg setif -global eth0/192.168.214.0:public 

3 修改private

$oifcfg setif –global en0/192.168.214.0:cluster_interconnect

如果修改了RAC所使用的IP地址,RAC将无法正常工作,为了使新地址生效,需要重启crs

crsctl stop crs

crstl start crs

将public和private网卡修改到其他网卡上面

$oifcfg setif –global en3/192.168.214.0:cluster_interconnect

$oifcfg setif –global en4/192.168.214.0:public

$oifcfg delif  –global en0/192.168.214.0

crsctl stop crs

crstl start crs   --重启crs,使配置生效

10.5 缩减RAC节点

----更新inventory

./crsctl delete node -n rac02

cd  $ORACLE_HOME/oui/bin

./runInstaller -updatenodelist ORACLE_HOME=$ORACLE_HOME cluster_nodes={rac01} CRS=TRUE –slient

./runInstaller -updatenodelist ORACLE_HOME=$ORACLE_HOME cluster_nodes={rac01} CRS=TRUE –slient –local

//////以上两句为更新节点清单脚本、///////

---测试删除节点myrac02,----完全未影响节点1

一、备份OCR

su - root

# $GRID_HOME/bin/ocrconfig -manualbackup

# GRID_HOME/bin/ocrdump /tmp/ocrdump_ocr.bak

二、删除DB实例

关闭被删除节点的实例

sqlplus / as sysdba

shutdown immediate

@保留的节点

su - oracle

dbca -silent -deleteInstance -nodeList myrac02 -gdbName prod -instanceName prod2 -sysDBAUserName sys -sysDBAPassword 123

[oracle@myrac01 ~]$ dbca -silent -deleteInstance -nodeList myrac02 -gdbName prod -instanceName prod2 -sysDBAUserName sys -sysDBAPassword 123

Deleting instance

20% complete

21% complete

22% complete

26% complete

33% complete

40% complete

46% complete

53% complete

60% complete

66% complete

Completing instance management.

100% complete

Look at the log file "/u01/app/oracle/cfgtoollogs/dbca/prod.log" for further details.

[oracle@myrac01 ~]$

三、RAC层面删除节点(删除oracle home)

1.停止并禁用listener

@被删除节点

# su - oracle

--监听状态

[oracle@myrac02 ~]$ srvctl stop listener -l listener –n myrac02

--停止并禁用listener

srvctl disable listener -n 被删除节点名

srvctl stop listener -n 被删除节点名

--确认

$ srvctl status listener      

2.移除oracle home

@被删除节点

<1>更新节点列表信息

cd  $ORACLE_HOME/oui/bin

./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac02}" -local

log如下:

[oracle@myrac02 bin]$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac02}" -local

Starting Oracle Universal Installer...

Checking swap space: must be greater than 500 MB.   Actual 3754 MB    Passed

The inventory pointer is located at /etc/oraInst.loc

The inventory is located at /u01/app/oraInventory

'UpdateNodeList' was successful.

<2>卸载oracle db,删除oracle home

$ORACLE_HOME/deinstall/deinstall -local

<3>在任意一个保留节点上更新节点列表信息

@保留节点

su - oracle

cd $ORACLE_HOME/oui/bin

./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac01}"

[oracle@myrac01 bin]$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac01}"

Starting Oracle Universal Installer...

Checking swap space: must be greater than 500 MB.   Actual 3811 MB    Passed

The inventory pointer is located at /etc/oraInst.loc

The inventory is located at /u01/app/oraInventory

'UpdateNodeList' was successful.

四、Grid Infrastructure层面删除节点(删除grid home)

1.确定节点状态是否是Unpinned

su - grid

[grid@rac1 ~]$ olsnodes -s -t

如果是pinned,请设为Unpinned

crsctl unpin css -n 被删除节点名

2.在被删除节点禁用clusterware的applications and daemons

@被删除节点

su - root

# cd $GRID_HOME/crs/install

# ./rootcrs.pl -deconfig -force

3.将被删除节点从节点信息中删除

@保留节点

su - root

# crsctl delete node -n myrac02

root@myrac01 bin]# ./crsctl delete node -n myrac02

CRS-4661: Node myrac02 successfully deleted.

[root@myrac01 bin]# ./olsnodes -s -t

myrac01 Active  Unpinned

4.更新节点列表信息

@被删除节点

su - grid

cd $Grid_home/oui/bin

$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac02}" CRS=TRUE -silent –local

grid@myrac02 bin]$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac02}" CRS=TRUE -silent -local

Starting Oracle Universal Installer...

Checking swap space: must be greater than 500 MB.   Actual 3940 MB    Passed

The inventory pointer is located at /etc/oraInst.loc

The inventory is located at /u01/app/oraInventory

'UpdateNodeList' was successful.

5.在保留节点更新节点列表

@保留节点

su - grid

cd $Grid_home/oui/bin

$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac01}" CRS=TRUE –silent

[grid@myrac01 bin]$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES={myrac01}" CRS=TRUE -silent

Starting Oracle Universal Installer...

Checking swap space: must be greater than 500 MB.   Actual 3632 MB    Passed

The inventory pointer is located at /etc/oraInst.loc

The inventory is located at /u01/app/oraInventory

'UpdateNodeList' was successful.

6.卸载GI,删除grid home

@被删除节点

su - grid

cd $Grid_home/deinstall

./deinstall –local

1 输入listener

2 输入VIP

3 输入diagnostic_dest目录

4 Enter the OCR/Voting Disk diskgroup name []: OCR

5 De-configuring ASM will drop the diskgroups at cleanup time. Do you want deconfig tool to drop the diskgroups y|n [y]: n

然后根据提示执行root脚本,完毕后回车继续。

7.在保留节点执行,更新节点列表

@保留节点

su - grid

cd $Grid_home/oui/bin

$ ./runInstaller -updateNodeList ORACLE_HOME=$ORACLE_HOME "CLUSTER_NODES=

{myrac01}" CRS=TRUE -silent

8.执行CVU命令,确认指定节点已经从集群中删除

su - grid

--注意:nodedel是参数名,不要修改

$ cluvfy stage -post nodedel -n 被删除节点名

$ cluvfy stage -post nodedel -n myrac02

grid@myrac01 bin]$ cluvfy stage -post nodedel -n myrac02

Performing post-checks for node removal 

Checking CRS integrity...

Clusterware version consistency passed

CRS integrity check passed

Node removal check passed

Post-check for node removal was successful.

$ crsctl status res -t

su - oracle

set linesize 200

select INSTANCE_NUMBER,INSTANCE_NAME,HOST_NAME,VERSION,STARTUP_TIME,STATUS from gv$instance;

---尝试重启has

发现scan以及listener均正常

至此完毕。

10.6 扩展RAC节点

概述:

ORACLE RAC 11.2的添加节点

1,停止应用程序,做好当前系统备份.实际上不停也可以,尽量申请到停机时间来增加新的节点,crs和rac数据库不需要停止,而且所以的节点都要启动.

比如集群原来有两个节点,不能关闭一个节点,然后再增加一个新的节点.

2,连接共享存储,需要注意ASM下是采用asmlib,udev或是rawdevice的方式,保持新的节点和原来的节点一致.

3,内核参数,limits.conf,用户相等性,ntp同步,目录变量,uid和gid 都要相等,软件包安装一致;

4,新节点(2),原来的节点(1).

在原来的点上检查一下配置,以grid用户执行

cluvfy stage -post hwos -n myrac02 -verbose

cluvfy stage -pre crsinst -n myrac02 -verbose

cluvfy comp peer -refnode myrac01 -n myrac02 -verbose  --参考当前的节点来进行检测.

对mismatched的要注意

cd $GRID_HOME/bin

cluvfy stage -pre nodeadd -n myrac02 -verbose   增加节点检查

5,在原来的节点的$GRID_HOME/out/bin(oracle 12C是在$GRID_HOME/addnode)下执行addNode.sh,增加CRS到新节点上去,GRID用户执行

addNode.sh "CLUSTER_NEW_NODES={myrac02}" "CLUSTER_NEW_VIRTUAL_HOSTNAMES={myrac02-vip}"

--- export IGNORE_PREADDNODE_CHECKS=Y 只有在确认cluvfy忽略错误不会导致真正失败时才可以设置。

Instantiating scripts for add node (Tuesday, July 11, 2017 7:54:32 AM PDT)

.                                                                 1% Done.

Instantiation of add node scripts complete

Copying to remote nodes (Tuesday, July 11, 2017 7:54:37 AM PDT)

...............................................................................................                                 96% Done.

Home copied to new nodes

Saving inventory on nodes (Tuesday, July 11, 2017 8:00:46 AM PDT)

.                                                               100% Done.

Save inventory complete

WARNING:

The following configuration scripts need to be executed as the "root" user in each new cluster node. Each script in the list below is followed by a list of nodes.

/u01/app/11.2.0/grid/root.sh #On nodes myrac02

To execute the configuration scripts:

    1. Open a terminal window

    2. Log in as "root"

    3. Run the scripts in each cluster node

接下来就是等待,根据服务器的性能和网络状况时间有所不同.

6,执行完毕后,再次在myrac01上检查nodeadd情况

[grid@myrac02 bin]$ ./cluvfy stage -post nodeadd -n myrac02 -verbose

[grid@r1 ~]$ olsnodes

r1

r2

[grid@r1 ~]$ crsctl check cluster -all

**************************************************************

myrac01:

CRS-4537: Cluster Ready Services is online

CRS-4529: Cluster Synchronization Services is online

CRS-4533: Event Manager is online

**************************************************************

myrac02:

CRS-4537: Cluster Ready Services is online

CRS-4529: Cluster Synchronization Services is online

CRS-4533: Event Manager is online

**************************************************************

7,扩展oracle rac database到新节点r2上,在原来的节点的$ORACLE_HOME/out/bin(oracle 12C是在$ORACLE_HOME/addnode)下执行addNode.sh

[oracle@r1 bin]$ ./addNode.sh "CLUSTER_NEW_NODES={myrac02}"   --new X window,longer time than add cluster node

部分日志:

WARNING:

The following configuration scripts need to be executed as the "root" user in each new cluster node. Each script in the list below is followed by a list of nodes.

/u01/app/oracle/product/11.2.0/db_1/root.sh #On nodes myrac02

To execute the configuration scripts:

    1. Open a terminal window

    2. Log in as "root"

    3. Run the scripts in each cluster node

    

The Cluster Node Addition of /u01/app/oracle/product/11.2.0/db_1 was successful.

Please check '/tmp/silentInstall.log' for more details.

8,执行完毕后,在新节点上增加一个新的实例用到集群数据库中

srvctl add instance -d prod -i prod2 -n myrac02

srvctl add instance -d tt -i tt2 -n r2   - add instance to new node

[oracle@r2 ~]$ srvctl config database -d tt | grep instance  --confirm add instnace sucessfully

Database instances: tt1,tt2

System altered.

10,启动新节点上的实例

1 修改pfile文件。

2 添加thread 日志。

3 启动thread线程日志

SQL> alter database add logfile thread 2 group 3  size 30M;

SQL> alter database add logfile thread 2 group 4 size 30M;

SQL> alter database enable thread 2;

$ dbca -silent -addInstance -nodeList myrac02 -gdbName prod -instanceName prod2 -sysDBAUserName sys -sysDBAPassword "1234"

 [oracle@r2 ~]$ srvctl start instance -d tt -i tt2

11,确认一下实例增加成功

[oracle@r1 bin]$ srvctl status database -d tt

Instance tt1 is running on node r1

Instance tt2 is running on node r2

基本操作步骤:

(一)设置网络

(二)添加组、目录和用户

(三)修改相关OS参数

(四)修改用户环境变量

(五)设置存储

(六)配置节点互信

(七)安装RPM包

注:1-7步略(其中的修改需要参照其他节点的配置)

(八)安装GI前检测

cd $GRID_HOME/bin

[grid@myrac01 bin]$ cluvfy stage -pre nodeadd -n myrac02 -fixup -verbose 

(九) 添加

进入节点1,$GRID_HOME /oui/bin

$ ./addNode.sh  CLUSTER_NEW_NODES={myrac02} cluster_new_virtual_hostnames={myrac02-vip} -ignorePrereq

(十)验证

cluvfy stage –post nodeadd –n myrac02  –verbose

10.7 配置APP资源到集群

Step 1: Add an application VIP 

The first step is to create an application VIP. The VIP will be used to access Oracle GoldenGate (e.g. by a remote pump or by the Management Pack for Oracle GoldenGate). Oracle Clusterware will assign the VIP to a physical server, and migrate the VIP if that server were to go down or if you instruct Clusterware to do so. 

To create the application VIP, login as root and run: 

GRID_HOME/bin/appvipcfg create -network=1 \ 

-ip=192.168.214.190 \ 

-vipname=mvggatevip \ 

-user=root

As root, allow the Oracle Grid infrastructure software owner (e.g. oracle) to run the script to start the VIP. 

GRID_HOME/bin/crsctl setperm resource mvggatevip -u user:oracle:r-x 

Then, as oracle, start the VIP: 

GRID_HOME/bin/crsctl start resource mvggatevip 

To validate whether the VIP is running and on which node it is running, execute: 

GRID_HOME/bin/crsctl status resource mvggatevip 

For example: 

[oracle@coe-01 ~]$ crsctl status resource mvggatevip 

NAME=mvggatevip 

TYPE=app.appvip.type 

TARGET=ONLINE 

STATE=ONLINE on coe-02

可以参考http://www.cnblogs.com/lhrbest/p/4576361.html进行配置

Step 2: Develop an agent script

Step 3: Register a resource in Oracle Clusterware

Connect as the oracle and execute:

1. Use another dependency to a local resource ora.asm. This resource is available if the ASM instance is running. This introduces a slight change to the crsctl add resource command (changes highlighted): GRID_HOME/bin/crsctl add resource ggateapp \ -type cluster_resource \ -attr 'ACTION_SCRIPT=/mnt/acfs/oracle/grid/11gr2_gg_action.scr, CHECK_INTERVAL=30, START_DEPENDENCIES='hard(mvggatevip,ora.asm) pullup(mvggatevip)', STOP_DEPENDENCIES='hard(mvggatevip)'' 

Oracle GoldenGate software owner. Run this command as root.

GRID_HOME/bin/crsctl setperm resource ggateapp -o mvandewiel

Step 4: Start the application

From now on you should always use Oracle Clusterware to start Oracle GoldenGate. Login as oracle and execute:

GRID_HOME/bin/crsctl start resource ggateapp

To check the status of the application:

GRID_HOME/bin/crsctl status resource ggateapp

For example:

[oracle@coe-02 grid]$ crsctl status resource ggateapp

NAME=ggateapp

TYPE=cluster_resource

TARGET=ONLINE

STATE=ONLINE on coe-02

[oracle@coe-02 grid]$

Manage the application

When Oracle GoldenGate is running, and you want to move Oracle GoldenGate to run on a different server, you can use the GRID_HOME/bin/crsctl relocate resource command with the force option to move the VIP as well (as oracle, on any node):

[oracle@coe-02 grid]$ crsctl relocate resource ggateapp -f

CRS-2673: Attempting to stop 'ggateapp' on 'coe-01'

CRS-2677: Stop of 'ggateapp' on 'coe-01' succeeded

CRS-2673: Attempting to stop 'mvggatevip' on 'coe-01'

CRS-2677: Stop of 'mvggatevip' on 'coe-01' succeeded

CRS-2672: Attempting to start 'mvggatevip' on 'coe-02'

CRS-2676: Start of 'mvggatevip' on 'coe-02' succeeded

CRS-2672: Attempting to start 'ggateapp' on 'coe-02'

CRS-2676: Start of 'ggateapp' on 'coe-02' succeeded

[oracle@coe-02 grid]$

10.8 清除APP资源从集群

Cleanup 

If you want to stop Oracle Clusterware from managing Oracle GoldenGate, and you want to cleanup the changes you made, then: 

Stop Oracle GoldenGate (login as oracle): 

GRID_HOME/bin/crsctl stop resource ggateapp 

Stop the VIP (as oracle): 

GRID_HOME/bin/crsctl stop resource mvggatevip 

Delete the application ggateapp as the application owner (mvandewiel) or root: 

GRID_HOME/bin/crsctl delete resource ggateapp 

Delete the VIP (login as root): 

GRID_HOME/bin/appvipcfg delete -vipname=mvggatevip 

Delete the agent action script 11gr2_gg_action.scr at the OS level.

11 RAC静态注册

srvctl add listener -l NEW_MACLEAN_LISTENER -o $CRS_HOME -p 1601 -k 1

在listener.ora文件中加入如下信息:

SID_LIST_NEW_MACLEAN_LISTENER =

  (SID_LIST =

    (SID_DESC =

      (GLOBAL_DBNAME = VPROD)

      (ORACLE_HOME = /g01/11.2.0/grid)

      (SID_NAME = VPROD1)

    )

  )

12 RAC负载测试脚本

for ((i=1;i<400;i++));do

sqlplus system/[email protected]/prod  <<eof

insert into connecttest select 'PROD'||userenv('INSTANCE'),sysdate from dual;

commit;

exit;

eof

done

13 RAC服务端TAF

 环境说明

1、本报告内容所使用的环境为OEL 6.5,Oracle RAC 11.2.0.3 PSU 10

2、 为了减少RAC负载均衡带来过多的GC等待,需要将业务进行节点分离。通过Oracle TAF技术,配置Service信息,对不同业务的连接进行管理,使每个业务固定在指定的一个或者多个节点。当节点出现故障时,Service能够自动Failover,而Failover的过程,前端应用是没有影响的。

3、通过Service实现节点的负载均衡。

 详细配置

1、主备模式

配置TAF,可以通过DBCA图形化配置。这里使用命令行的方式。

(1) 添加service

[oracle@rac1 ~]$ srvctl add service -d BDNP -s BDSM -r 'BDNP1' -a 'BDNP2' -P PRECONNECT -e SELECT -x TRUE

[oracle@rac1 ~]$ srvctl config service -d BDNP -s BDSM -a 

Warning:-a option has been deprecated and will be ignored.

Service name: BDSM

Service is enabled

Server pool: BDNP_BDSM

Cardinality: 1

Disconnect: false

Service role: PRIMARY

Management policy: AUTOMATIC

DTP transaction: true

AQ HA notifications: false

Failover type: SELECT

Failover method: NONE

TAF failover retries: 0

TAF failover delay: 0

Connection Load Balancing Goal: LONG

Runtime Load Balancing Goal: NONE

TAF policy specification: PRECONNECT

Edition:

Preferred instances: BDNP1

Available instances: BDNP2

(2) 更改service配置

 srvctl modify service -d BDNP -s BDSM -m BASIC -e SELECT -q TRUE -j LONG

 srvctl modify service –s prod –d handtask –a handtask2 –i handtak1 –n

srvctl modify service –s prod –d handtask –P BASIC –m BASIC –e select –j long

srvctl  config service -d BDNP -s BDSM

Service name: BDSM

Service is enabled

Server pool: BDNP_BDSM

Cardinality: 1

Disconnect: false

Service role: PRIMARY

Management policy: AUTOMATIC

DTP transaction: true

AQ HA notifications: true

Failover type: SELECT

Failover method: BASIC

TAF failover retries: 0

TAF failover delay: 0

Connection Load Balancing Goal: LONG

Runtime Load Balancing Goal: NONE

TAF policy specification: PRECONNECT

Edition:

Preferred instances: BDNP1

Available instances: BDNP2

(3)启动service

[oracle@rac1 ~]$ srvctl  start service -d BDNP -s BDSM

(4)关闭和删除service

删除service之前要先关闭,如果不关闭,要用-f参数。

[oracle@rac1 ~]$ srvctl  stop service -d BDNP -s BDSM

[oracle@rac1 ~]$ srvctl  remove service -d BDNP -s BDSM [-f]

(5) Switch service

当主节点挂掉后,service会切换到备节点。主节点恢复后,service不会自动切换回来,需要手工干预。-i是service启动节点,-t是切换目标节点。比如要将service从BDNP1切换到BDNP2上:

[oracle@rac1 ~]$srvctl relocate service -d BDNP -s BDVMP -i BDNP1 -t BDNP2

注意:不要选择业务高峰期切换,否则切换会超时导致切换失败,同时影响session连接。

2、 负载均衡模式

负载均衡模式相对简单,service会在所有的instance中注册。

(1) 添加service

[oracle@rac1 ~]$srvctl add service -d BDNP -s BDVMP -r BDNP1,BDNP2 –P BASIC

(2) 修改service

[oracle@rac1 ~]$ srvctl config service -d BDNP -s BDPK

Service name: BDPK

Service is enabled

Server pool: BDNP_BDPK

Cardinality: 2

Disconnect: false

Service role: PRIMARY

Management policy: AUTOMATIC

DTP transaction: false

AQ HA notifications: true

Failover type: SELECT

Failover method: BASIC

TAF failover retries: 0

TAF failover delay: 0

Connection Load Balancing Goal: LONG

Runtime Load Balancing Goal: NONE

TAF policy specification: BASIC

Edition:

Preferred instances: BDNP1,BDNP2

Available instances:

(3) 关闭和删除service

  同主备模式。

 测试过程(针对主备模式)

1、关闭数据库

通过shutdown immediate关闭数据库,Service可以自行切换到另一个节点。

通过srvctl stop instance关闭数据库,service不自动切换,service挂起,无法提供服务。要通过-f参数才可以实现service切换。

srvctl stop instance –d BDNP –I BDNP1 -f

2、关闭Cluster

关闭cluster,service可以自动切换。

 维护事项(针对主备模式)

1、 当关闭instance时候,要添加-f参数。

2、 Instance恢复后,及时将service relocate,否则业务繁忙期relocate可能会失败,还会影响业务。

3、 Service name 不会同步到dataguard中。

 客户端tnsnames配置    

  GOBO4_TAF =  

   (DESCRIPTION =  

     (ADDRESS = (PROTOCOL = TCP)(HOST = 192.168.7.61)(PORT = 1521))  

     (ADDRESS = (PROTOCOL = TCP)(HOST = 192.168.7.62)(PORT = 1521))  

     (LOAD_BALANCE = yes)  

     (CONNECT_DATA =  

       (SERVER = DEDICATED)  

       (SERVICE_NAME = GOBO4)  

      (FAILOVER_MODE =       #FAILOVER_MODE项参数   

       (TYPE = session)  

       (METHOD = basic)  

       (RETRIES = 180  

       (DELAY = 5)  

      )  

     )  

   )  

1.     

 FAILOVER_MODE项分析  

  FAILOVER_MODE项是实现TAF的主要配置内容,下面对其进行描述.  

    

  METHOD: 用户定义何时创建到其实例的连接,有BASIC 和 PRECONNECT 两种可选值  

    BASIC: 客户端通过地址列表成功建立连接后,即仅当客户端感知到节点故障时才创建到其他实例的连接  

    PRECONNECT: 预连接模式,是在最初建立连接时就同时建立到所有实例的连接,当发生故障时,立刻就可以切换到其他链路上  

      

    上述两种方式各有优劣,前者建立连接的开销相对较小,但failover时会产生延迟,而后者正好与前者相反  

      

  TYPE: 用于定义发生故障时对完成的SQL 语句如何处理,其中有2种类型:session 和select  

    select:使用select方式,Oracle net会跟踪事务期间的所有select语句,并跟踪每一个与当前select相关的游标已返回多少行给客户  

      端。此时,假定select查询已返回500行,客户端当前连接的节点出现故障,Oracle Net自动建立连接到幸存的实例上并继续返回  

      剩余的行数给客户端。假定总行数为1500,行,则1000行从剩余节点返回。  

    session: 使用session方式,所有select查询相关的结果在重新建立新的连接后将全部丢失,需要重新发布select命令。  

      

    上述两种方式适用于不同的情形,对于select方式,通常使用与OLAP数据库,而对于session方式则使用与OLTP数据库。因为select   

    方式,Oracle 必须为每个session保存更多的内容,包括游标,用户上下文等,需要更多的资源。  

      

    其次,两种方式期间所有未提交的DML事务将自动回滚且必须重启启动。alter session语句不会failover。  

    临时对象不会failover也不能被重新启动。  

      

  RETRIES: 表示重试的次数  

  DELAY:表示重试的间隔时间  

14 管理votedisk文件

1 3 5 建议配置个数

crsctl query css votedisk

crsctl replace votedisk +DGSYS

15 管理OCR文件

oracle通过在/etc/oracle/ocr.loc(linux)文件中指定ocr在共享存储上的位置,/var/opt/oracle/ocr.loc(Solaris System系统存放的位置)

ocrcheck

[root@rac01 bin]# ./ocrconfig –manualbackup  --手工备份

异常恢复

crsctl stop crs

crsctl start crs –excl

crsctl stop resource ora.crsd –init

ocrconfig –restore file_name

crsctl stop crs –f

crsctl start crs

---添加ocr文件

./ocrconfig -add +fra

./ocrconfig -delete +ocr

./ocrconfig -replace  +fra -replacement  +ocr

16 集群重新配置

16.1 整个集群重新配置 

Oracle 11g R2 Grid Infrastructure 的安装与配置较之前的版本提供了更多的灵活性。在Grid Infrastructure安装完毕前执行root.sh经常容易出现错误,并且需要修复该错误才能得以继续。在这个版本中我们可以直接通过执行脚本rootcrs.pl来重新配置Grid Infrastructure而无需先卸载Grid Infrastructure,然后修复故障后进行再次安装。下面描述了rootcrs.pl的用法。

#重新配置Grid Infrastructure并不会移除已经复制的二进制文件,仅仅是回复到配置crs之前的状态,下面是其步骤  

1 重新创建olr文件。

 清除配置

a、使用root用户登录,并执行下面的命令(所有节点,但最后一个节点除外)  

  #$GRID_HOME/crs/install/rootcrs.pl -verbose -deconfig -force  

    

b、同样使用root用户在最后一个节点执行下面的命令。该命令将清空ocr 配置和voting disk    

 # $GRID_HOME/crs/install/rootcrs.pl -verbose -deconfig -force -lastnode 

CRS-4611: Successful deletion of voting disk +DATA. 

CRS-4611: Successful deletion of voting disk +OCR.—删除OCR

---最后一个节点将删除OCR磁盘组,

---其他节点在执行时,不会对OCR磁盘组进行删除,只是删除ASM、VIP以及local OLR

---如果节点在

  

c、如果使用了ASM磁盘,继续下面的操作以使得ASM重新作为候选磁盘

  # dd if=/dev/zero of=/dev/sdb1 bs=1024 count=100  --清除对应的ocr磁盘组即可(是否对数据磁盘组有影响)

  # /etc/init.d/oracleasm deletedisk DATA /dev/sdb1  

  # /etc/init.d/oracleasm createdisk DATA /dev/sdb1 

 执行脚本

3.在节点1 执行下面脚本 

$GRID_HOME./root.sh 

日志:

tail -f /u01/app/11.2.0/grid/cfgtoollogs/crsconfig/rootcrs_myrac01.log

1 创建OLR OLR initialization – successful

2  Disk Group OCR created successfully. (第一个节点执行root.sh时)

[root@rac1 grid]# ./root.sh 

Performing root user operation for Oracle 11g 

The following environment variables are set as:

    ORACLE_OWNER= grid

    ORACLE_HOME=  /u01/app/11.2.0/grid

Enter the full pathname of the local bin directory: [/usr/local/bin]: 

The contents of "dbhome" have not changed. No need to overwrite.

The contents of "oraenv" have not changed. No need to overwrite.

The contents of "coraenv" have not changed. No need to overwrite.

Entries will be added to the /etc/oratab file as needed by

Database Configuration Assistant when a database is created

Finished running generic part of root script.

Now product-specific root actions will be performed.

Using configuration parameter file: /u01/app/11.2.0/grid/crs/install/crsconfig_params

User ignored Prerequisites during installation

Installing Trace File Analyzer

OLR initialization - successful

Adding Clusterware entries to upstart

CRS-2672: Attempting to start 'ora.mdnsd' on 'rac1'

CRS-2676: Start of 'ora.mdnsd' on 'rac1' succeeded

CRS-2672: Attempting to start 'ora.gpnpd' on 'rac1'

CRS-2676: Start of 'ora.gpnpd' on 'rac1' succeeded

CRS-2672: Attempting to start 'ora.cssdmonitor' on 'rac1'

CRS-2672: Attempting to start 'ora.gipcd' on 'rac1'

CRS-2676: Start of 'ora.cssdmonitor' on 'rac1' succeeded

CRS-2676: Start of 'ora.gipcd' on 'rac1' succeeded

CRS-2672: Attempting to start 'ora.cssd' on 'rac1'

CRS-2672: Attempting to start 'ora.diskmon' on 'rac1'

CRS-2676: Start of 'ora.diskmon' on 'rac1' succeeded

CRS-2676: Start of 'ora.cssd' on 'rac1' succeeded

已成功创建并启动 ASM。

已成功创建磁盘组DATA。

4.在节点2 执行下面脚本 

$GRID_HOME/root.sh 

监控日志:$GRID_HOME/log/{hostname}|alter* 5. 把其他磁盘组加载 

Sqlplus / as sysasm

alter diskgroup data mount; 

alter diskgroup fra mount; 

ora.DATA.dg    ora....up.type 0/5    0/     ONLINE    ONLINE    myrac02     

ora.FRA.dg     ora....up.type 0/5    0/     ONLINE    ONLINE    myrac02   --资源状态正常了

 注册资源 

srvctl add database -d pmstest -o /u01/app/oracle/11.2.0/db_1 

srvctl add instance -d pmstest -i pmstest1 -n pmstest1 

srvctl add instance -d pmstest -i pmstest2 -n pmstest2 

srvctl start database -d pmstest

16.2 节点重新配置

如果节点grid出现问题,可以进行重新配置 

  # perl $GRID_HOME/crs/install/rootcrs.pl -verbose -deconfig -force  

#GRID_HOME./root.sh 

 案例:

主节点出现问题,进行rootcrs.pl -verbose -deconfig –force,

[grid@myrac02 ~]$ olsnodes -s -t

myrac01 Inactive Unpinned

myrac02 Active  Unpinned

则节点2资源如下:

在节点1执行root.sh后:

部分日志:

Entries will be added to the /etc/oratab file as needed by

Database Configuration Assistant when a database is created

Finished running generic part of root script.

Now product-specific root actions will be performed.

Using configuration parameter file: /u01/app/11.2.0/grid/crs/install/crsconfig_params

User ignored Prerequisites during installation

Installing Trace File Analyzer

OLR initialization - successful

Adding Clusterware entries to upstart

CRS-4402: The CSS daemon was started in exclusive mode but found an active CSS daemon on node myrac02, number 2, and is terminating

An active cluster was found during exclusive startup, restarting to join the cluster

srvctl add database -d prod -o /u01/app/oracle/11.2.0/db_1 

srvctl add instance -d prod -i prod1 -n myrac01

17 重做日志维护

只添加到当前的实例

ALTER DATABASE ADD LOGFILE GROUP 5 '+DATA02'; 

ALTER DATABASE ADD STANDBY LOGFILE GROUP 5 '+DATA02'; 

SQL> desc v$standby_log

指定实例添加日志组

ALTER DATABASE ADD LOGFILE  instance 'orcll2' GROUP 6 '+DATA02' ; 

alter database add logfile thread 2 group 3  size 30M;

ALTER DATABASE ADD STANDBY  LOGFILE  instance 'orcll2' GROUP 6 '+DATA02' ; 

 alter database add STANDBY logfile thread 1 group 5 ;

ALTER SYSTEM ARCHIVE LOG instance 'orcll2' CURRENT;

ALTER SYSTTEM CHECKPOINT LOCAL;

Alter  database enable thread 2;

18 RAC修改归档模式

1、主要步骤:  

 备份spfile,以防止参数修改失败导致数据库无法启动  

 修改集群参数cluster_database为false  

 启动单实例到mount状态  

 将数据库置于归档模式(alter database archivelog/noarchivelog)  

 修改集群参数cluster_database为true  

 关闭单实例  

 启动集群数据库 

select instance_name,host_name,status from gv$instance; 

create pfile='/u01/oracle/db/dbs/ora10g_robin.ora' from spfile;

 alter system set cluster_database=false scope=spfile sid='*';

srvctl stop database -d ora10g                        -->关闭数据库  

srvctl start instance -d ora10g -i ora10g1 -o mount   -->启动单个实例到mount状态  

alter database archivelog;  

alter system set cluster_database=true scope=spfile sid='*';

ho srvctl stop instance -d ora10g -i ora10g1 

ho srvctl start database -d ora10g 

--调整闪回模式:

select flashback_on from v$database;

alter system set cluster_database=false scope=spfile;

SQL> alter system set db_recovery_file_dest_size=1g scope=spfile;

SQL> alter system set db_recovery_file_dest='/ogg' scope=spfile;

[grid@myrac02 ~]$ srvctl stop database -d prod

[grid@myrac02 ~]$ srvctl start  instance -d prod -i prod1 -o mount

alter database flashback on;

alter system set cluster_database=true scope=spfile;

[grid@myrac01 ~]$ crsctl start res ora.prod.db   =srvctl start database –d prod

CRS-2672: Attempting to start 'ora.prod.db' on 'myrac01'

CRS-2672: Attempting to start 'ora.prod.db' on 'myrac02'

CRS-2676: Start of 'ora.prod.db' on 'myrac01' succeeded

CRS-2676: Start of 'ora.prod.db' on 'myrac02' succeeded

[grid@myrac01 ~]$ crsctl status res ora.prod.db

NAME=ora.prod.db

TYPE=ora.database.type

TARGET=ONLINE           , ONLINE

STATE=ONLINE on myrac01, ONLINE on myrac02

18.1 备份归档

 共享存储

RMAN> backup archivelog all format '/u01/app/oracle/bakup/archbak_20151123.arc' delete all input;

delete archivelog until sequence 8 thread 1;

delete archivelog until sequence 8 thread 2;

DELETE ARCHIVELOG FROM SEQUENCE 20  UNTIL SEQUENCE 28 THREAD 1;

DELETE ARCHIVELOG FROM SEQUENCE 20  UNTIL SEQUENCE 28 THREAD 2;

BACKUP ARCHIVELOG FROM SEQUENCE 20  UNTIL SEQUENCE 28 THREAD 1;

BAKUP ARCHIVELOG FROM SEQUENCE 20  UNTIL SEQUENCE 28 THREAD 2;

本地存储

run

{

allocate channel c1 device type disk connect sys/oracle@orcl1; 

allocate channel c2 device type disk connect sys/oracle@orcl2;

backup archivelog all format '/home/oracle/123_%u.arc';

}

18.2 duplicate脚本

RMAN> run

{

allocate channel c1 device type disk format '/soft/backup/%U' connect sys/6212327@rac1;

allocate channel c2 device type disk format '/soft/backup/%U' connect sys/6212327@rac2;

allocate auxiliary channel ac1 device type disk format '/soft/backup/%U'; 

allocate auxiliary channel ac2 device type disk format '/soft/backup/%U';

duplicate target database for standby;

}

19 配置Oracle ACFS集群文件系统

oracle官方给出的定义:

Oracle Automatic Storage Management Cluster File System (Oracle ACFS) is a multi-platform, scalable file system, and storage management technology that extends Oracle Automatic Storage Management (Oracle ASM) functionality to support customer files maintained outside of Oracle Database. Oracle ACFS supports many database and application files, including executables,database trace files, database alert logs, application reports, BFILEs, and configuration files. Other supported files are video, audio, text, images, engineering drawings, and other general-purpose application file data.

大意是ACFS是一个支持多个平台,可扩展的,基于ASM的集群文件系统,可以用来存储数据库和各种应用的文件,包括数据库的trace文件,alert日志文件和配置文件等等,也可以用来存储视频,音频,文本,图片等文件!

在这之前,集群文件系统使用过redhat的gfs,还有开源的ocfs2,gfs的感觉是配置太繁琐且复杂,因为是基于RHCS套件,所以很多功能对应单纯的集群文件系统来说显得有些冗余;ocfs2属于被放弃的孩子,目前已经停止开发了,不过胜在配置简单;ACFS在11g r2中推出,基于grid infrastructure,配置上也算容易,且在ASM的基础上进行发展,稳定性应该还是有保证的,下一步打算利用ACFS测试下rac环境下的golden gate复制!下面来介绍下11G RAC环境下ACFS的使用!

19.1.1 图形配置

[grid@rac1 ~]$ crs_stat -t -v ora.registry.acfs  

Name           Type           R/RA   F/FT   Target    State     Host          

----------------------------------------------------------------------  

ora....ry.acfs ora....fs.type 0/5    0/     ONLINE    ONLINE    rac1     

[grid@rac1 ~]$ crs_stat -t -v ora.ACFS.dg  

Name           Type           R/RA   F/FT   Target    State     Host          

----------------------------------------------------------------------  

ora.ACFS.dg    ora....up.type 0/5    0/     ONLINE    ONLINE    rac1  

二:使用asmca图形化工具,在asm磁盘组中创建volume并格式化成ACFS文件系统

[root@rac1 ~]# su - grid  

[grid@rac1 ~]$ !exp  

export DISPLAY=192.168.1.105:0  

[grid@rac1 ~]$ asmca  

  

   

cd /u01/app/grid/cfgtoollogs/asmca/scripts

三:查看两个节点是否均已成功挂载ACFS,并测试读写

[grid@rac1 ~]$ df -h  

Filesystem            Size  Used Avail Use% Mounted on  

/dev/vda3              26G   14G   11G  58% /  

/dev/vda1              99M   12M   83M  13% /boot  

tmpfs                 1.2G  787M  441M  65% /dev/shm  

/dev/asm/vol1-330     5.0G   75M  5.0G   2% /u01/app/grid/acfsmounts/acfs_vol1  

[grid@rac1 ~]$ ssh rac2 "df -h"  

Filesystem            Size  Used Avail Use% Mounted on  

/dev/vda3              26G   14G   10G  59% /  

/dev/vda1              99M   12M   83M  13% /boot  

tmpfs                 1.2G  787M  441M  65% /dev/shm  

/dev/asm/vol1-330     5.0G   75M  5.0G   2% /u01/app/grid/acfsmounts/acfs_vol1  

[grid@rac1 ~]$ cd /u01/app/grid/acfsmounts/acfs_vol1  

[grid@rac1 acfs_vol1]$ ls  

lost+found  

drwx------ 2 root root 65536 Jul  9 09:24 lost+found  

[grid@rac1 acfs_vol1]$ cp /etc/passwd ./  

[grid@rac2 ~]$ cd /u01/app/grid/acfsmounts/acfs_vol1  

[grid@rac2 acfs_vol1]$ ls  

lost+found  passwd  

[grid@rac2 acfs_vol1]$ head -1 passwd   

root:x:0:0:root:/root:/bin/bash 

19.1.2 asmcmd命令行配置

ASMCMD> volcreate –G DATAC1 –s 200G vol1

ASMCMD> volinfo –G DATAC1  vol1

mount -t acfs /dev/asm/vol2-330 /vol2/  

mkfs.acfs /dev/asm/vol2-330

[root@rac1 ~]# acfsutil registry -a /dev/asm/vol2-330 /ogg(注册后,节点2会自动挂载)

双节点创建/aradmin目录。将权限给予oracle:oinstall

[grid@rac1 ~]$ echo $ORACLE_SID  

+ASM1  

[grid@rac1 ~]$ asmcmd  

ASMCMD> ls  

ACFS/  

CRS/  

DATA/  

FRA/  

ASMCMD> volcreate  //查看帮助命令  

usage: volcreate -G diskgroup -s size [ --column number ] [ --width stripe_width ] [--redundancy {high|mirror|unprotected} ]   

[--primary {hot|cold}] [--secondary {hot|cold}] volume  

help:  help volcreate  

ASMCMD> volcreate -G ACFS -s 5G vol2  //如果空间不足就会报这个错误  

ASMCMD> volcreate –G DATAC1 –s 200G vol1

ASMCMD>mkfs.acfs /dev/asm/vol2-330  

ORA-15032: not all alterations performed  

ORA-15041: diskgroup "ACFS" space exhausted (DBD ERROR: OCIStmtExecute)  

ASMCMD> volcreate -G ACFS -s 4G vol2   

ASMCMD> volinfo -G ACFS vol2  

Diskgroup Name: ACFS  

         Volume Name: VOL2  

         Volume Device: /dev/asm/vol2-330  

         State: ENABLED  

         Size (MB): 4096  

         Resize Unit (MB): 32  

         Redundancy: UNPROT  

         Stripe Columns: 4  

         Stripe Width (K): 128  

         Usage:   

         Mountpath:   

ASMCMD> volinfo -G ACFS vol1  

Diskgroup Name: ACFS  

         Volume Name: VOL1  

         Volume Device: /dev/asm/vol1-330  

         State: ENABLED  

         Size (MB): 5120  

         Resize Unit (MB): 32  

         Redundancy: UNPROT  

         Stripe Columns: 4  

         Stripe Width (K): 128  

         Usage: ACFS  

         Mountpath: /u01/app/grid/acfsmounts/acfs_vol1   

[root@rac1 ~]# mkdir /vol2 (节点2创建相同的挂载点)  

[root@rac1 ~]#  

mkfs.acfs: version                   = 11.2.0.3.0  

mkfs.acfs: on-disk version           = 39.0  

mkfs.acfs: volume                    = /dev/asm/vol2-330  

mkfs.acfs: volume size               = 4294967296 

mkfs.acfs: Format complete.  

[root@rac1 ~]# mount -t acfs /dev/asm/vol2-330 /vol2/  

[root@rac1 ~]# df -h /vol2  

Filesystem            Size  Used Avail Use% Mounted on  

/dev/asm/vol2-330     4.0G   45M  4.0G   2% /vol2  

[root@rac1 ~]# acfsutil registry -a /dev/asm/vol2-330 /ogg(注册后,节点2会自动挂载)  

acfsutil registry: mount point /vol2 successfully added to Oracle Registry

 ACFS维护

Asfs资源的启动与停止

crsctl stop res “ora.registry.acfs”

crsctl start res “ora.registry.acfs”

挂载与卸载

mount.acfs -o all

umount -t acfs –a

[root@rac1 ~]# mount -t acfs /dev/asm/vol2-330 /vol2/  

 acfsutil维护文件系统

把文件系统扩展到30G;

$acfsutil size 30G /ogg

$acfsutil size +20m /ogg

$acfsutil size -20m /ogg

$ acfsutil registry -a /dev/asm/oggvol-48 /ogg

20 RAC GPNP详解

gpnp profile是一个xml文件,保存在每个节点的本地,这个文件记录的信息是节点要加入集群中所需要的基础信息,这个文件也需要节点间同步,GRID设计GPNPD进程来进行节点间同步。

20.1 Gpnp profile位置

$GRID_HOME/gpnp/$HOSTNAME/profile/peer/profile.xml

$GRID_HOME/gpnp/profile/peer/profile.xml(全局备份)

20.2 Gpnp profile内容

GPnP Profile 的功能类似于 SPFILE,其用于保存启动数据库集群程序所需的必要信息,如下表所示。

GPnP Profile 参数

集群名称(Cluster name)

网络类型,包含业务及心跳网络(Network classifications, Public/Private)

用于 CSS 进程的存储(Storage to be used for CSS)

用于 ASM 的存储(Storage to be used for ASM : SPFILE location, ASM DiskString etc)

数字签名信息(Digital signature information):GPnP Profile 对安全十分敏感,其可识别根分区的信息,并且保存了数据签名的配置权限。

20.3 GPnP Profile作用

GPnP Profile 文件是一个保存于 $GRID_HOME/gpnp/<hostname>/profiles/peer 目录下的小型 XML 文件,名称为 profile.xml。

其用于正确描述 RAC 每个节点的全局特性。每个节点上都会保存一个本地的 GPnP Profile,并且由 GPnP 守护进程(GPnPD)维护。

20.4 GPnP Profile更新

GPnP Profile 将会在以下情况被更新。

(1).GPnP 守护进程(GPnPD)将在软件安装、系统启动或 Profile 更新时复制 Profile 的变化,以此实现所有节点 GPnP Profile 的同步更新。

(2).当由 oifcfg、crsctl、asmcmd 等命令更改集群配置时,GPnP Profile 都将被同步更新。 

20.5 GPnP Profile应用原理

启动集群软件时,需要访问仲裁盘(Voting Disk)。当仲裁盘为 ASM 磁盘时,以上仲裁盘的信息需要从 GPnP Profile 中读取,其中 GPnP Profile 对仲裁盘信息描述如下。

<orcl:CSS-Profile id=”css” DiscoveryString=”+asm” LeaseDuration=”400″/>

即使 ASM 实例没有启动,仲裁盘的信息依旧可以通过 kfed 功能读取。(he voting disk is read using kfed utility  even if ASM is not up.)

随后,集群软件将检查是否所有的 RAC 节点都更新了 GPnP Profile 信息,并且 RAC 各节点将依据 GPnP 的配置信息加入集群中。当一个节点加入集群或在集群中启动时,集群软件将在节点上启动 GPnP agent。当节点已经在集群内部时,GPnP agent 将读取已存在于节点内的 GPnP profile。当节点被新加至集群时,GPnP agent 将通过组播协议(mDNS)定位到一个已存在于集群的节点的 GPnP agent,并且从该源端的 agent 上获取 GPnP profile。

随后,CRSD 进程将需要读取 OCR 信息以启动节点上的多个资源,并根据资源状态更新 OCR 信息。(Next CRSD needs to read OCR to startup various resources on the node and hence update it as status of resources changes.)因为 OCR 信息保存于 ASM 磁盘内,所以 CRSD 读取 OCR 信息前需获知 ASM Spfile 参数文件的路径。需要在不同的参数文件中查找 ASM Spfile 路径,因此查找文件的顺序如下:(1).GPnP Profile;(2).ORACLE_HOME/dbs/spfile<sid.ora>;(3).ORACLE_HOME/dbs/init<sid.ora>

20.6 GPnP Profile 维护工具

可使用 gpnptool 工具对 GPnP Profile 进行维护,常见命令如下。

(1).$> gpnptool get:读取 GPnP Profile 内容(How to read the profile)

(2).$> gpnptool lfind:查看运行于本地节点的 GPnP 守护进程(How to find GPnP Deamons are running on the local node)

(3).$> gpnptool find:查看基于 mDNS 协议可以探测到的所有 GPnP 守护进程(How to find all RD-discoverable resources of given type)

(4).$> gpnptool getpval -<parameter_name>:查看 GPnP Profile 中对应参数的值,其中 <parameter_name> 为参数名称。

21 ASMCMD命令详解

md_backup /tmp/dgbackup20090716

md_restore --full -G oradg /tmp/oradgbackup20110323

22 DRM

Oracle RAC DRM基本概念

Linux社区 2012年07月18日

      在Oracle RAC中,每个实例均存在一个数据缓存池,每个block的改变都将实例间进行资源协调以获取最大化性能,从而保证数据的完整性。

      在RAC集群中,每个被缓存的数据copy,也叫做缓存资源,均存在一个叫做master节点的实例。

      在10.1.0.2中,一旦一个cache resource被master一个实例节点,

      对缓存资源的重新remaster或者说master节点的自动改变仅仅会发生在RAC实例节点的正常启停或者集群管理资源间的非正常问题发生。

      也就是说,如果NODE B是一个缓存资源的master节点,这个资源将被一直master到NODE B直到发生RAC节点的重新配置操作。

      在oracle 10g中引进一个新的资源remaster概念叫做DRM(Dynamic Resource management [ID 390483.1]),通过DRM,RAC实例节点的重新配置

      已经不再是cache资源被重新remaster的因素,如果cache resource被节点A频繁访问,一个资源可以从NODE B remaster到NODE A。

      其他的一些概念如下:

      In 10gR1 DRM is driven by affinity of files and in 10gR2 it is based on objects.

      DRM attributes are intentionally undocumented since they may change depending on the version. These attributes should not be changed without discussing with Support.

      Two instance will not start a DRM operation at the same time however lmd,lms,lmon processes from all instances collectively take part in the DRM operation.

      Normal activity on the database is not affected due to DRM. This means users continue insert/update/delete operations without any interruptions. Also DRM operations complete very quickly.

      DRM many cause" latch: cache buffers chains" and "latch: object queue header operation " wait event, you can go throught this way to disable DRM:

      _gc_affinity_time=0  

      _gc_undo_affinity=FALSE

      also, you can used another two implicit parameters dynamic change

      _gc_affinity_limit=10000000  

      _gc_affinity_minimum=10000000

      如上的值可以根据实际要求改变。

在10g中,可以采用如下方式禁用DRM(当然你也可以只禁用其中的一个模块object affinity或者undo affinity)

--disable object affinity

alter system set "_gc_affinity_time"=0 scope=spfile ;

--disable undo affinity

alter system set "_gc_undo_affinity"=FALSE  scope=spfile;

然后同时重启所有实例生效。

如果暂时无法重启实例,可以使用如下命令“事实上”禁用DRM:(以下两个参数可以动态调整)

alter system set “_gc_affinity_limit”=10000000 sid='*';

alter system set “_gc_affinity_minimum”=10000000 sid='*';

在11g中,同样可以使用如下方式禁用DRM,强烈建议关闭:

alter system set "_gc_policy_time"=0 scope=spfile;

然后同时重启所有实例生效。如果不想完全禁用DRM,但是需要禁用read-mostly locking或者reader bypass的机制。可以使用如下命令:

--disable read-mostly locking

alter system set "_gc_read_mostly_locking"=false scope=spfile sid='*';

--disable reader-bypass

alter system set "_gc_bypass_readers"=false scope=spfile sid='*';

23 在11g的RAC中使用两存储镜像作votedisk的方法

在11g中,使用如下语法创建普通冗余磁盘组用于存放ocr和votedisk

CREATE DISKGROUP ocr NORMAL REDUNDANCY

FAILGROUP a1 DISK

'/dev/asm_a1_ocr2' name a1ocr2

FAILGROUP a2 DISK

'/dev/asm_a2_ocr2' name a2ocr2

ATTRIBUTE 'compatible.asm' = '11.2';

这样可以做到一个存储物理故障时,无缝迁移到第二个存储上

但是在使用这种方法作votedisk盘的迁移工作时,报一下错误:

[grid@pay1 ~]$ crsctl replace votedisk +OCR

Failed to create voting files on disk group OCR.

Change to configuration failed, but was successfully rolled back.

CRS-4000: Command Replace failed, or completed with errors.

查找其原因,是因为 NORMAL REDUNDANCY的磁盘组需要有个3个failgroup才能用于存放votedisk.

因此需要想办法弄出第三个failgroup,但是第三个failgroup放置在任何一个存储上都不合适,那个存储上放置了2个failgroup,那么这个存储故障时,整个cluster都将由于votedisk没有超过半数而不能正常运行.

那如何构造第三个failgroup在此时就变得尤为重要了.以下提供集中办法

1.使用nfs技术新挂载一个磁盘,然后将此盘加载成quorum failgroup组,这样就成3个failgroup.metalink上有关于此的说明(Oracle Support Document 1421588.1 (How to Manually Add NFS voting disk to an Extended Cluster using ASM in 11.2) can be found at: https://support.oracle.com/epmos/faces/DocumentDisplay?id=1421588.1)

2.两个存储上都划一个小盘500M,在os系统中做lv镜像,然后使用lv镜像作quorum failgroup组,这样就成3个failgroup.原理和2相同,就是要一个盘做failgroup.

本文仅将第一种方案提供具体的步骤和说明

1.选择一个nfs服务器,并配置合适的权限.为简单示例,为对访问作任何限制

vi /etc/exports

/mypool/oraclevotedisk * (rw)

重启动nfs服务

service portmap restart

service nfs restart

2.在rac环境中的每一台都创建一个空目录

mkdir /oracle/votedisk

3.挂载分区

mount -o rw,bg,hard,intr,rsize=32768,wsize=32768,tcp,noac,vers=3,timeo=600 192.168.100.2:/mypool/oraclevotedisk /oracle/votedisk

3.在任意一台机器上使用此分区作一个投票盘,并授予合适的权限

dd if=/dev/zero of=/oracle/votedisk/vote_pay bs=1M count=500

--dd if=/dev/zero of=/oracle/votedisk/vote_pay bs=1M count=1000

授予权限

chown grid:asmadmin /oracle/votedisk/vote_pay

4.在一个asm实例中,修改查找串路径,并增加投票盘

alter system set asm_diskstring='/dev/asm*','/oracle/votedisk/vote_pay';

--看文件是否已经被找到

col path format A40

select group_number,name,path,mount_status,header_status,state,REDUNDANCY,FAILGROUP,voting_file from v$asm_disk;

--增加投票盘

alter diskgroup ocr add quorum failgroup nfs disk '/oracle/votedisk/vote_pay';

5.以grid用户切换votedisk

[grid@pay1 ~]$ crsctl replace votedisk +OCR

Successful addition of voting disk 58c1ac72dff94f25bffc8e649a36c883.

Successful addition of voting disk 076f0b3e9b0a4f5cbf26841c540211a7.

Successful addition of voting disk 84cf735c784e4f74bf5d55fc99e98422.

Successful deletion of voting disk 73fb4a797e624fa9bf382f841340dfa8.

Successfully replaced voting disk group with +OCR.

可以看到,现在就可以成功替换投票盘了

6.检查votedisk情况

[grid@pay1 ~]$ crsctl query css votedisk

##  STATE    File Universal Id                File Name Disk group

--  -----    -----------------                --------- ---------

 1. ONLINE   58c1ac72dff94f25bffc8e649a36c883 (/dev/asm_a1_ocr3) [OCR]

 2. ONLINE   076f0b3e9b0a4f5cbf26841c540211a7 (/dev/asm_a2_ocr3) [OCR]

 3. ONLINE   84cf735c784e4f74bf5d55fc99e98422 (/oracle/votedisk/vote_pay) [OCR]

Located 3 voting disk(s).

24 Quorum FailGroup

How to Manually Add NFS voting disk to an Extended Cluster using ASM in 11.2 (Doc ID 1421588.1)

Mount Options for Oracle files when used with NFS on NAS devices (Doc ID 359515.1)

RAC: Frequently Asked Questions [ID 220970.1]

***********************************************************

什么是Quorum FailGroup

***********************************************************

Quorum FailGroup中,只保存Voting Disk,用于RAC on Extended Distance Clusters,做仲裁盘用

要放在其他存储节点上或者通过NFS共享一个zero-padded文件作为voting disk

对于使用2个阵列提供保护的环境中,也可以使用Quorum FG

假设使用high冗余,一共5个投票盘。那么5个投票盘在2个阵列上的数目肯定不同

假设一个阵列上有3个,另一个阵列上有2个,如果存在3个投票盘的阵列宕机,那么将只有2个投票盘可用

这种情况下,集群的所有节点将宕机

这种情况下,必须在第三个地点放置一个投票盘,在2个阵列上各有2个投票盘。这样任意一个阵列宕机,还可以保证有3个投票盘是可用的

***********************************************************

如何添加Quorum FailGroup

***********************************************************

一般有3个阵列的情况不多,下面测试通过NFS来配置

1.当前情况:

HIGH冗余,5个投票盘

[oracle@database2 ~]$ crsctl query css votedisk 

##  STATE    File Universal Id                File Name Disk group

--  -----    -----------------                --------- ---------

 1. ONLINE   7fc99b8d4dc54f27bf967f23524a19e3 (/dev/asm-crs1) [CRSDG]

 2. ONLINE   2ffd39d609bc4f71bf2b19de2e71c7a8 (/dev/asm-crs2) [CRSDG]

 3. ONLINE   002eb188e9b14ffbbf4d5a607ade51c2 (/dev/asm-crs3) [CRSDG]

 4. ONLINE   2319348cf3cc4f6abf116f973d8fd922 (/dev/asm-crs4) [CRSDG]

 5. ONLINE   6c3cb875ba7e4fe2bffe97189e2bae25 (/dev/asm-crs5) [CRSDG]

Located 5 voting disk(s).

SQL>  select GROUP_NUMBER,DISK_NUMBER,OS_MB,NAME,PATH,FAILGROUP,state from v$asm_disk order by 1,2;

GROUP_NUMBER DISK_NUMBER      OS_MB NAME                           PATH            FAILGROUP                      STATE

------------ ----------- ---------- ------------------------------ --------------- ------------------------------ --------

           1           0       2048 CRSDG_0000                     /dev/asm-crs01  CRSDG_0000                     NORMAL

           1           1       2048 CRSDG_0001                     /dev/asm-crs02  CRSDG_0001                     NORMAL

           1           2       2048 CRSDG_0002                     /dev/asm-crs03  CRSDG_0002                     NORMAL

           1           3       2048 CRSDG_0003                     /dev/asm-crs04  CRSDG_0003                     NORMAL

           1           4       2048 CRSDG_0004                     /dev/asm-crs05  CRSDG_0004                     NORMAL

           2           0      20480 DATADG_0000                    /dev/asm-data1  DATADG_0000                    NORMAL

           2           1      20480 DATADG_0001                    /dev/asm-data2  DATADG_0001                    NORMAL

2.配置NFS

2.1 NFS服务器端

[root@dm01db01 /]# cat /etc/exports 

/oracle/votedisk 192.168.123.31(rw,sync,no_root_squash)

/oracle/votedisk 192.168.123.32(rw,sync,no_root_squash)

[root@dm01db01 /]#  /etc/rc.d/init.d/portmap start 

Starting portmap:                                          [  OK  ]

[root@dm01db01 /]#  /etc/rc.d/init.d/nfs start 

Starting NFS services:                                     [  OK  ]

Starting NFS quotas:                                       [  OK  ]

Starting NFS daemon:                                       [  OK  ]

Starting NFS mountd:                                       [  OK  ]

Stopping RPC idmapd:                                       [  OK  ]

Starting RPC idmapd:                                       [  OK  ]

2.2 数据库服务器端MOUNT文件系统

# mount -t nfs -o rw,bg,hard,nointr,rsize=32768,wsize=32768,tcp,noac,vers=3,timeo=600,actimeo=0 192.168.123.139:/oracle/votedisk /u01/app/oracle/votedisk

# dd if=/dev/zero of=/u01/app/oracle/votedisk/votedisk01 bs=1M count=2048

# chown -R grid.oinstall /u01/app/oracle/votedisk

2.3 添加 quorum fg disk

SQL> alter system set asm_diskstring="/dev/asm*","/u01/app/oracle/votedisk/vote*" sid='*' scope=both;

SQL> alter diskgroup CRSDG add quorum failgroup FGQ DISK '/u01/app/oracle/votedisk/votedisk01';

SQL> select GROUP_NUMBER,DISK_NUMBER,OS_MB,NAME,PATH,FAILGROUP,state from v$asm_disk order by 1,2;

GROUP_NUMBER DISK_NUMBER      OS_MB NAME                           PATH            FAILGROUP                      STATE

------------ ----------- ---------- ------------------------------ --------------- ------------------------------ --------

           1           0       2048 CRSDG_0000                     /dev/asm-crs01  CRSDG_0000                     NORMAL

           1           1       2048 CRSDG_0001                     /dev/asm-crs02  CRSDG_0001                     NORMAL

           1           2       2048 CRSDG_0002                     /dev/asm-crs03  CRSDG_0002                     NORMAL

           1           3       2048 CRSDG_0003                     /dev/asm-crs04  CRSDG_0003                     NORMAL

           1           4       2048 CRSDG_0004                     /dev/asm-crs05  CRSDG_0004                     NORMAL

           1           5       2048 CRSDG_0005                     /u01/app/oracle FGQ                            NORMAL      <==

                                                                   /votedisk/voted

                                                                   isk01

           2           0      20480 DATADG_0000                    /dev/asm-data1  DATADG_0000                    NORMAL

           2           1      20480 DATADG_0001                    /dev/asm-data2  DATADG_0001                    NORMAL

2.4 删除多余的磁盘    

在quorum fg添加后,可以看到还没有votedisk切换到其上

这时,我们将删除有3个投票盘阵列上的一个投票盘

[grid@database1 votedisk]$ crsctl query css votedisk

##  STATE    File Universal Id                File Name Disk group

--  -----    -----------------                --------- ---------

 1. ONLINE   46bf9f1d43574f5bbfefe1377db152c4 (/dev/asm-crs01) [CRSDG]

 2. ONLINE   10892f57e2e84ffabfa4a5e6fa86aee5 (/dev/asm-crs02) [CRSDG]

 3. ONLINE   0a66ffb250394f13bfcfdb1946056058 (/dev/asm-crs03) [CRSDG]

 4. ONLINE   83fa917a0e844f23bf27238aff51b57a (/dev/asm-crs05) [CRSDG]

 5. ONLINE   18e561a6a5ff4fc0bf996f740aff70da (/dev/asm-crs04) [CRSDG]

SQL> alter diskgroup crsdg drop disk CRSDG_0004;                                                 <==

[grid@database1 votedisk]$ crsctl query css votedisk

##  STATE    File Universal Id                File Name Disk group

--  -----    -----------------                --------- ---------

 1. ONLINE   46bf9f1d43574f5bbfefe1377db152c4 (/dev/asm-crs01) [CRSDG]

 2. ONLINE   10892f57e2e84ffabfa4a5e6fa86aee5 (/dev/asm-crs02) [CRSDG]

 3. ONLINE   0a66ffb250394f13bfcfdb1946056058 (/dev/asm-crs03) [CRSDG]

 4. ONLINE   18e561a6a5ff4fc0bf996f740aff70da (/dev/asm-crs04) [CRSDG]

 5. ONLINE   6fc616a6923a4fb2bffca18e44a58533 (/u01/app/oracle/votedisk/votedisk01) [CRSDG]      <==

***********************************************************

相关配置

***********************************************************

对于数据库DiskGroup,创建时应该为不同阵列的盘,指定好FAILGROUP,以确保2份数据(NORMAL冗余)放在不同的阵列中

修改参数asm_preferred_read_failure_groups,不同的节点,从不同的FG组中读取数据

***********************************************************

测试

***********************************************************

1.NFS服务器宕机

2014-02-13 16:56:38.476: 

[cssd(3577)]CRS-1615:No I/O has completed after 50% of the maximum interval. Voting file /u01/app/oracle/votedisk/votedisk01 will be considered not functional in 99910 milliseconds

2014-02-13 16:57:28.541: 

[cssd(3577)]CRS-1614:No I/O has completed after 75% of the maximum interval. Voting file /u01/app/oracle/votedisk/votedisk01 will be considered not functional in 49850 milliseconds

2014-02-13 16:57:58.565: 

[cssd(3577)]CRS-1613:No I/O has completed after 90% of the maximum interval. Voting file /u01/app/oracle/votedisk/votedisk01 will be considered not functional in 19830 milliseconds

2014-02-13 16:58:18.573: 

[cssd(3577)]CRS-1604:CSSD voting file is offline: /u01/app/oracle/votedisk/votedisk01; details at (:CSSNM00058:) in /u01/app/11.2.0/grid/log/database1/cssd/ocssd.log.

[root@database1 ~]# crsctl query css votedisk

##  STATE    File Universal Id                File Name Disk group

--  -----    -----------------                --------- ---------

 1. ONLINE   46bf9f1d43574f5bbfefe1377db152c4 (/dev/asm-crs01) [CRSDG]

 2. ONLINE   10892f57e2e84ffabfa4a5e6fa86aee5 (/dev/asm-crs02) [CRSDG]

 3. ONLINE   0a66ffb250394f13bfcfdb1946056058 (/dev/asm-crs03) [CRSDG]

 4. ONLINE   18e561a6a5ff4fc0bf996f740aff70da (/dev/asm-crs04) [CRSDG]

Located 4 voting disk(s).

集群件运行正常此时

恢复

[root@database1 ~]# umount -f /u01/app/oracle/votedisk

[root@database1 ~]# mount -t nfs -o rw,bg,hard,nointr,rsize=32768,wsize=32768,tcp,noac,vers=3,timeo=600,actimeo=0 192.168.123.139:/oracle/votedisk /u01/app/oracle/votedisk

GROUP_NUMBER DISK_NUMBER      OS_MB NAME                           PATH                           FAILGROUP                      STATE

------------ ----------- ---------- ------------------------------ ------------------------------ ------------------------------ --------

           0           5       2048                                /u01/app/oracle/votedisk/voted                                NORMAL  <== 文件此时并不属于CRSDG

                                                                   isk01

           0           9       2048                                /dev/asm-crs05                                                NORMAL

           1           0       2048 CRSDG_0000                     /dev/asm-crs01                 CRSDG_0000                     NORMAL

           1           1       2048 CRSDG_0001                     /dev/asm-crs02                 CRSDG_0001                     NORMAL

           1           2       2048 CRSDG_0002                     /dev/asm-crs03                 CRSDG_0002                     NORMAL

           1           3       2048 CRSDG_0003                     /dev/asm-crs04                 CRSDG_0003                     NORMAL

           1           4          0 CRSDG_0004                                                    FGQ                            NORMAL  <== 

           2           0      20480 DATADG_0000                    /dev/asm-data1                 DATADG_0000                    NORMAL

           2           1      20480 DATADG_0001                    /dev/asm-data2                 DATADG_0001                    NORMAL

[root@database1 votedisk]# dd if=/dev/zero of=/u01/app/oracle/votedisk/votedisk01 bs=1M count=2048      <== 重新格式化Voting disk

SQL> alter diskgroup CRSDG add quorum failgroup FGQ DISK '/u01/app/oracle/votedisk/votedisk01';         <== 重新添加

GROUP_NUMBER DISK_NUMBER      OS_MB NAME                           PATH            FAILGROUP                      STATE

------------ ----------- ---------- ------------------------------ --------------- ------------------------------ --------

           0           4       2048                                /dev/asm-crs05                                 NORMAL

           1           0       2048 CRSDG_0000                     /dev/asm-crs01  CRSDG_0000                     NORMAL

           1           1       2048 CRSDG_0001                     /dev/asm-crs02  CRSDG_0001                     NORMAL

           1           2       2048 CRSDG_0002                     /dev/asm-crs03  CRSDG_0002                     NORMAL

           1           3       2048 CRSDG_0003                     /dev/asm-crs04  CRSDG_0003                     NORMAL

           1           4       2048 CRSDG_0004                     /u01/app/oracle FGQ                            NORMAL        <==

                                                                   /votedisk/voted

                                                                   isk01

           1           5          0 CRSDG_0005                                     FGQ                            NORMAL        <==

           2           0      20480 DATADG_0000                    /dev/asm-data1  DATADG_0000                    NORMAL

           2           1      20480 DATADG_0001                    /dev/asm-data2  DATADG_0001                    NORMAL

SQL> alter diskgroup crsdg drop quorum disk CRSDG_0005 force;                                           <== 将旧的删除

GROUP_NUMBER DISK_NUMBER      OS_MB NAME                           PATH            FAILGROUP                      STATE

------------ ----------- ---------- ------------------------------ --------------- ------------------------------ --------

           0           4       2048                                /dev/asm-crs05                                 NORMAL        <==

           1           0       2048 CRSDG_0000                     /dev/asm-crs01  CRSDG_0000                     NORMAL

           1           1       2048 CRSDG_0001                     /dev/asm-crs02  CRSDG_0001                     NORMAL

           1           2       2048 CRSDG_0002                     /dev/asm-crs03  CRSDG_0002                     NORMAL

           1           3       2048 CRSDG_0003                     /dev/asm-crs04  CRSDG_0003                     NORMAL

           1           4       2048 CRSDG_0004                     /u01/app/oracle FGQ                            NORMAL        <==

                                                                   /votedisk/voted

                                                                   isk01

           2           0      20480 DATADG_0000                    /dev/asm-data1  DATADG_0000                    NORMAL

           2           1      20480 DATADG_0001                    /dev/asm-data2  DATADG_0001                    NORMAL

2.先于NFS服务器启动GRID

[root@database1 ~]# crsctl query css votedisk

##  STATE    File Universal Id                File Name Disk group

--  -----    -----------------                --------- ---------

 1. ONLINE   46bf9f1d43574f5bbfefe1377db152c4 (/dev/asm-crs01) [CRSDG]

 2. ONLINE   10892f57e2e84ffabfa4a5e6fa86aee5 (/dev/asm-crs02) [CRSDG]

 3. ONLINE   0a66ffb250394f13bfcfdb1946056058 (/dev/asm-crs03) [CRSDG]

 4. ONLINE   18e561a6a5ff4fc0bf996f740aff70da (/dev/asm-crs04) [CRSDG]

 5. OFFLINE  ad0268dcec654f14bfe60497d9490780 () []

Located 5 voting disk(s).

[root@database1 ~]# crsctl stat res -t                                             <== 无法启动

CRS-4535: Cannot communicate with Cluster Ready Services

CRS-4000: Command Status failed, or completed with errors.

[root@database2 ~]# crsctl stat res -t -init

--------------------------------------------------------------------------------

NAME           TARGET  STATE        SERVER                   STATE_DETAILS       

--------------------------------------------------------------------------------

Cluster Resources

--------------------------------------------------------------------------------

ora.asm

      1        ONLINE  INTERMEDIATE database2                OCR not started       <== ASM启动,但是CRSDG无法MOUNT

ora.cluster_interconnect.haip

      1        ONLINE  ONLINE       database2                                    

ora.crf

      1        ONLINE  ONLINE       database2                                    

ora.crsd

      1        ONLINE  OFFLINE                                                   

ora.cssd

      1        ONLINE  ONLINE       database2                                    

ora.cssdmonitor

      1        ONLINE  ONLINE       database2                                    

ora.ctssd

      1        ONLINE  ONLINE       database2                ACTIVE:0            

ora.diskmon

      1        OFFLINE OFFLINE                                                   

ora.drivers.acfs

      1        ONLINE  ONLINE       database2                                    

ora.evmd

      1        ONLINE  INTERMEDIATE database2                                    

ora.gipcd

      1        ONLINE  ONLINE       database2                                    

ora.gpnpd

      1        ONLINE  ONLINE       database2                                    

ora.mdnsd

      1        ONLINE  ONLINE       database2    

      

SQL> alter diskgroup crsdg mount;

alter diskgroup crsdg mount

*

ERROR at line 1:

ORA-15032: not all alterations performed

ORA-15040: diskgroup is incomplete

ORA-15042: ASM disk "4" is missing from group number "1"

SQL> alter diskgroup crsdg mount force;                     <== 强制MOUNT

Diskgroup altered.

[root@database1 ~]# crsctl stat res -t

--------------------------------------------------------------------------------

NAME           TARGET  STATE        SERVER                   STATE_DETAILS       

--------------------------------------------------------------------------------

Local Resources

--------------------------------------------------------------------------------

ora.CRSDG.dg

               ONLINE  ONLINE       database1                                    

               ONLINE  ONLINE       database2                                    

ora.DATADG.dg

               ONLINE  ONLINE       database1                                    

               ONLINE  ONLINE       database2                                    

ora.LISTENER.lsnr

               ONLINE  ONLINE       database1                                    

               ONLINE  ONLINE       database2                                    

ora.LISTENER_1522.lsnr

               ONLINE  ONLINE       database1                                    

               ONLINE  ONLINE       database2                                    

ora.LISTENER_1523.lsnr

               ONLINE  ONLINE       database1                                    

               ONLINE  ONLINE       database2                                    

ora.asm

               ONLINE  ONLINE       database1                Started             

               ONLINE  ONLINE       database2                Started             

ora.gsd

               OFFLINE OFFLINE      database1                                    

               OFFLINE OFFLINE      database2                                    

ora.net1.network

               ONLINE  ONLINE       database1                                    

               ONLINE  ONLINE       database2                                    

ora.ons

               ONLINE  ONLINE       database1                                    

               ONLINE  ONLINE       database2                                    

ora.registry.acfs

               ONLINE  ONLINE       database1                                    

               ONLINE  ONLINE       database2                                    

--------------------------------------------------------------------------------

Cluster Resources

--------------------------------------------------------------------------------

ora.LISTENER_SCAN1.lsnr

      1        ONLINE  ONLINE       database1                                    

ora.cvu

      1        OFFLINE OFFLINE                                                   

ora.database1.vip

      1        ONLINE  ONLINE       database1                                    

ora.database2.vip

      1        ONLINE  ONLINE       database2                                    

ora.oc4j

      1        ONLINE  ONLINE       database1                                    

ora.orcl.billa3.svc

      1        ONLINE  ONLINE       database1                                    

ora.orcl.db

      1        ONLINE  ONLINE       database1                Open                

      2        ONLINE  ONLINE       database2                Open                

ora.scan1.vip

      1        ONLINE  ONLINE       database1   

      

[root@database1 ~]# crsctl query css votedisk

##  STATE    File Universal Id                File Name Disk group

--  -----    -----------------                --------- ---------

 1. ONLINE   46bf9f1d43574f5bbfefe1377db152c4 (/dev/asm-crs01) [CRSDG]

 2. ONLINE   10892f57e2e84ffabfa4a5e6fa86aee5 (/dev/asm-crs02) [CRSDG]

 3. ONLINE   0a66ffb250394f13bfcfdb1946056058 (/dev/asm-crs03) [CRSDG]

 4. ONLINE   18e561a6a5ff4fc0bf996f740aff70da (/dev/asm-crs04) [CRSDG]

Located 4 voting disk(s).      

恢复和测试1相同

猜你喜欢

转载自blog.csdn.net/oradbm/article/details/85318546
今日推荐