处理这个死锁问题,花了好几天,相信遇到的同学,一样头疼,但有个好辅助类的话,就没这么头疼了
注意
本篇文章的解决方案只适合使用lock(obj),或是:Monitor.Enter(obj); …. Monitor.Exit(obj)的方式
类似酱紫的死锁
如果你使用的是:AutoResetEvent.Set/Rest, Monitor.Wait/Pulse, Mutex的方式,请另寻他法。
辅助类
//------------------------------------------------------------------------
// Craeted by Jave.Lin 4/21/2018 5:31:57 PM
//------------------------------------------------------------------------
using System;
using System.Collections.Generic;
using System.Threading;
namespace Common.ComUtil
{
/// <summary>
/// Locker Information
/// author : Jave.Lin
/// date : 4/21/2018 5:31:57 PM
/// </summary>
public class Locker
{
public object tag;
public int threadID;
public string name;
public int lockedTimes;
public int lockingTs;
public bool enter;
public bool exit;
public string lockingStackTrace;
public int lockingDLTs;
public Locker(string name)
{
this.name = name;
}
public override string ToString()
{
string[] strs = new string[]
{
"ThreadId:" + threadID,
"Name:" + name,
"LockedTimes:" + lockedTimes,
"LockingTs:" + lockingTs,
"LocingEt:" + (lockingDLTs - lockingTs) + ("(ms"),
"Enter:" + enter,
"Exit:" + exit,
"Tag:" + (tag != null ? tag.ToString() : "null"),
"LockingStackTrace:\n" + lockingStackTrace,
};
return string.Join("\r\n", strs);
}
}
/// <summary>
/// Check Dead Lock (CDL)
/// author : Jave.Lin
/// date : 4/21/2018 5:31:57 PM
/// </summary>
public static class CDL
{
// in locking map
public static readonly Dictionary<Locker, bool> _s_pLockingMap = new Dictionary<Locker, bool>();
public const bool FORCE_CHECK = true;
public const int CheckDeadLockTime = 3000; // 这个阀值按需调整,实际的线上产品服务器程序如果负载过大时,可能也会有部分任务处理过久,导致‘取锁’等待过久
public static string Dumps()
{
var list = new List<string>();
foreach (var item in _s_pLockingMap)
{
list.Add(item.Key.ToString());
}
return string.Join("\r\n=line============\r\n", list);
}
public static void CheckDL(Locker locker, Action actoin)
{
try
{
if (Monitor.TryEnter(locker, CheckDeadLockTime))
{
locker.enter = true;
locker.exit = false;
locker.lockingTs = Environment.TickCount;
locker.threadID = Thread.CurrentThread.ManagedThreadId;
locker.lockingStackTrace = GetCurStackTrace("->\n");
Interlocked.Increment(ref locker.lockedTimes);
_s_pLockingMap[locker] = true;
actoin.Invoke();
}
else
{
locker.lockingDLTs = Environment.TickCount;
_WarningWriteLine("TryEnter time out");
if (FORCE_CHECK)
{
_ShowGetLockTimeout();
}
else
{
actoin.Invoke();
}
}
}
catch (Exception e)
{
_ErrorWriteLine(e.ToString());
}
finally
{
if (!Monitor.IsEntered(locker))
{
var msg = "!Monitor.IsEntered(locker)";
if (FORCE_CHECK)
{
throw new Exception(msg);
}
else
{
_WarningWriteLine(msg);
}
}
else
{
locker.exit = true;
_s_pLockingMap.Remove(locker);
Monitor.Exit(locker);
}
}
}
public static T CheckDL<T>(Locker locker, Func<T> actoin)
{
T ret = default(T);
try
{
if (Monitor.TryEnter(locker, CheckDeadLockTime))
{
locker.enter = true;
locker.exit = false;
locker.lockingTs = Environment.TickCount;
locker.threadID = Thread.CurrentThread.ManagedThreadId;
locker.lockingStackTrace = GetCurStackTrace("->\n");
Interlocked.Increment(ref locker.lockedTimes);
_s_pLockingMap[locker] = true;
ret = actoin.Invoke();
}
else
{
locker.lockingDLTs = Environment.TickCount;
_WarningWriteLine("TryEnter time out");
if (FORCE_CHECK)
{
_ShowGetLockTimeout();
}
else
{
actoin.Invoke();
}
}
}
finally
{
if (!Monitor.IsEntered(locker))
{
locker.lockingTs = Environment.TickCount;
var msg = "!Monitor.IsEntered(locker)";
if (FORCE_CHECK)
{
throw new Exception(msg);
}
else
{
_WarningWriteLine(msg);
}
}
else
{
locker.exit = true;
_s_pLockingMap.Remove(locker);
Monitor.Exit(locker);
}
}
return ret;
}
public static string GetCurStackTrace(string separactor = "->")
{
System.Diagnostics.StackTrace st = new System.Diagnostics.StackTrace();
System.Diagnostics.StackFrame[] sfs = st.GetFrames();
List<string> methodNameList = new List<string>();
for (int i = 1; i < sfs.Length; ++i)
{
if (System.Diagnostics.StackFrame.OFFSET_UNKNOWN == sfs[i].GetILOffset()) break;
var m = sfs[i].GetMethod();
var dn = m.DeclaringType.Name;
var mn = m.Name;
methodNameList.Add(new string(' ', sfs.Length - i) + dn + "::" + mn + "()");
}
st = null;
sfs = null;
methodNameList.Reverse();
return string.Join(separactor, methodNameList);
}
private static void _ShowGetLockTimeout()
{
var msg = "!!!!!!!!!!!!!!!!!!DeadLock!!!!!!!!!!!!!!!!!!!!!!!!!!";
_ErrorWriteLine(msg);
throw new Exception(msg);
}
private static void _ErrorWriteLine(string msg, params object[] args)
{
var srcColor = Console.ForegroundColor;
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine(msg, args);
Console.ForegroundColor = srcColor;
}
private static void _WarningWriteLine(string msg, params object[] args)
{
var srcColor = Console.ForegroundColor;
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine(msg, args);
Console.ForegroundColor = srcColor;
}
}
}
使用方法
// 我们平常用的:
lock(obj)
{
// code here
}
改写成:
CDL.CheckDL(obj, ()=>
{
// code here
});
一旦有死锁出现,那么将会命中CDL::_ShowGetLockTimeout方法
然后将CDL.Dumps()的内容打印出来,就可以知道,当前哪些CDL.CheckDL的地方有死锁。
Dumps很详细,具体还可以根据自己的需要来对 Locker的信息做调整。
注意CDL.CheckDeadLockTime
CDL.CheckDeadLockTime = 3000; // 这个阀值按需调整,实际的线上产品服务器程序如果负载过大时,可能也会有部分任务处理过久,导致‘取锁’等待过久,所以出现Monitor.TryEnter timeout时,不一定是死锁。
你可以按你的需求来调整该值,如:调整个:60000(60秒),意思是你确定了,取锁时间超时为60秒的,都算是有死锁任务导致
CDL.Dumps内容实例
ThreadId:10
Name:CBCServerAliveLocker
LockedTimes:1
LockingTs:7496815
LocingEt:-7496815(ms
Enter:True
Exit:False
Tag:null
LockingStackTrace:
Program::Main()->
XXXServer::WaitForExit()->
CDL::CheckDL()
=line============
ThreadId:15
Name:CTcpNetworker
LockedTimes:486
LockingTs:7519248
LocingEt:2995(ms
Enter:True
Exit:False
Tag:null
LockingStackTrace:
_IOCompletionCallback::PerformIOCompletionCallback()->
BaseOverlappedAsyncResult::CompletionPortCallback()->
LazyAsyncResult::ProtectedInvokeCallback()->
ContextAwareResult::Complete()->
ExecutionContext::Run()->
ExecutionContext::Run()->
ExecutionContext::RunInternal()->
ContextAwareResult::CompleteCallback()->
LazyAsyncResult::Complete()->
XXXNetworker::_OnBeginReceiveCallback()->
CDL::CheckDL()
=line============
ThreadId:15
Name:CEventMgr
LockedTimes:247
LockingTs:7519248
LocingEt:-7519248(ms
Enter:True
Exit:False
Tag:null
LockingStackTrace:
_IOCompletionCallback::PerformIOCompletionCallback()->
BaseOverlappedAsyncResult::CompletionPortCallback()->
LazyAsyncResult::ProtectedInvokeCallback()->
ContextAwareResult::Complete()->
ExecutionContext::Run()->
ExecutionContext::Run()->
ExecutionContext::RunInternal()->
ContextAwareResult::CompleteCallback()->
LazyAsyncResult::Complete()->
XXXNetworker::_OnBeginReceiveCallback()->
CDL::CheckDL()->
<>c__DisplayClass78_0::<_OnBeginReceiveCallback>b__0()->
XXXConnection::XXXNetworker_OnPackageEvent()->
CEventMgr::Invoke()->
CDL::CheckDL()
=line============
ThreadId:6
Name:CBattleRoomMgr
LockedTimes:689
LockingTs:7519248
LocingEt:-7519248(ms
Enter:True
Exit:False
Tag:null
LockingStackTrace:
_ThreadPoolWaitCallback::PerformWaitCallback()->
ThreadPoolWorkQueue::Dispatch()->
Task::System.Threading.IThreadPoolWorkItem.ExecuteWorkItem()->
Task::ExecuteEntry()->
Task::ExecuteWithThreadLocal()->
ExecutionContext::Run()->
ExecutionContext::RunInternal()->
Task::ExecutionContextCallback()->
Task::Execute()->
Task::InnerInvoke()->
XXXServer::<StartUp>b__36_0()->
CDL::CheckDL()->
XXXServer::<StartUp>b__36_1()->
XXXServer::_DeadLockMethod()->
XXXServerInst::_DeadLockMethod1()->
CDL::CheckDL()
=line============
ThreadId:6
Name:CBattleRoom
LockedTimes:802
LockingTs:7519248
LocingEt:2995(ms
Enter:True
Exit:False
Tag:BroadcastAll3 starting
LockingStackTrace:
_ThreadPoolWaitCallback::PerformWaitCallback()->
ThreadPoolWorkQueue::Dispatch()->
Task::System.Threading.IThreadPoolWorkItem.ExecuteWorkItem()->
Task::ExecuteEntry()->
Task::ExecuteWithThreadLocal()->
ExecutionContext::Run()->
ExecutionContext::RunInternal()->
Task::ExecutionContextCallback()->
Task::Execute()->
Task::InnerInvoke()->
XXXServer::<StartUp>b__36_0()->
CDL::CheckDL()->
XXXServer::<StartUp>b__36_1()->
XXXServer::_DeadLockMethod1()->
XXXServerInst::_DeadLockMethod2()->
CDL::CheckDL()->
XXXServerInst::<_DeadLockMethod2>b__19_0()->
XXXServerInst1::_DeadLockMethod1()->
CDL::CheckDL()->
XXXServerInst1::<_DeadLockMethod1>b__43_0()->
XXXServerInst1::_DeadLockMethod2()->
CDL::CheckDL()
=line============
ThreadId:6
Name:CBCServerNormalLocker
LockedTimes:668
LockingTs:7519248
LocingEt:-7519248(ms
Enter:True
Exit:False
Tag:null
LockingStackTrace:
_ThreadPoolWaitCallback::PerformWaitCallback()->
ThreadPoolWorkQueue::Dispatch()->
Task::System.Threading.IThreadPoolWorkItem.ExecuteWorkItem()->
Task::ExecuteEntry()->
Task::ExecuteWithThreadLocal()->
ExecutionContext::Run()->
ExecutionContext::RunInternal()->
Task::ExecutionContextCallback()->
Task::Execute()->
Task::InnerInvoke()->
XXXServer::<StartUp>b__36_0()->
CDL::CheckDL()