本人的心跳包处理是这么设计的,在启动程序后,立刻开启一个心跳线程,专门用于处理客户的连接。这个线程用于处理所有的客户端的连接,当线程没有接到其中一个客户发来的请求达到20秒,即认为掉线。客户连接时,发送了一次数据之后,立刻退出。 
为了保证定时处理,启动了可等待定时器与事件机制。 
1.心跳线程是这么启动的: 
HANDLE hHeatBeat=CreateThread(NULL,0,CHeartBeat::WaitProc,NULL,0,NULL); 
CloseHandle(hHeatBeat); 

2.心跳包的主线程 

C/C++ code 

DWORD CALLBACK CHeartBeat::WaitProc(LPVOID lpVoid)
{
HANDLE          hTimer;
BOOL            bSuccess;
__int64         qwDueTime;
LARGE_INTEGER   liDueTime;
char           szError[255];
DWORD          dwResult;
HANDLE*  phTimers=NULL,*phEvents=NULL;
while(true)
{
if(hbList.size()>0)
{
//manage events
            phEvents=(HANDLE*)LocalAlloc(LPTR|LMEM_ZEROINIT,sizeof(HANDLE)*hbList.size());
for(vector<CHeartBeat*>::size_type i=0;i<hbList.size();++i)
{
if(hbList[i]->m_hEvent!=NULL)
phEvents[i]=hbList[i]->m_hEvent;
}
//wait for one of event become signal, if there are no events become sigal,after 500 millisecond, it return
            dwResult=WaitForMultipleObjects((DWORD)hbList.size(),phEvents,FALSE,500);
if(dwResult>=WAIT_OBJECT_0 && dwResult<=WAIT_OBJECT_0+hbList.size()-1)
{
vector<CHeartBeat*>::iterator iter=hbList.begin();
for (;iter!=hbList.end();++iter)
{
if((*iter)->m_hEvent==phEvents[dwResult-WAIT_OBJECT_0])
{
if((*iter)->m_hTimer!=NULL)
{
//when follow MCT health messages was recevied
                            CancelWaitableTimer((*iter)->m_hTimer);
bSuccess = SetWaitableTimer(
(*iter)->m_hTimer,           // Handle to the timer object
                                &liDueTime,       // When timer will become signaled
                                1200000,             // Periodic timer interval of 2 seconds
                                NULL,
NULL,
FALSE );          // Do not restore a suspended system

if (! bSuccess )
{
sprintf( szError, "SetWaitableTimer failed with Error \
                                                  %d.\n", GetLastError() );
                                cout<<szError<<endl;
CloseHandle( hTimer );
return false;
}
}
else
{
if ( hTimer = CreateWaitableTimer(
NULL,                   // Default security attributes
                                FALSE,                  // Create auto-reset timer
                                NULL ) )           // Name of waitable timer
                            {
// Create an integer that will be used to signal the timer
// 120 seconds from now.
                                qwDueTime = -20 * _SECOND;
// Copy the relative time into a LARGE_INTEGER.
                                liDueTime.LowPart  = (DWORD) ( qwDueTime & 0xFFFFFFFF );
liDueTime.HighPart = (LONG)  ( qwDueTime >> 32 );
bSuccess = SetWaitableTimer(
hTimer,           // Handle to the timer object
                                    &liDueTime,       // When timer will become signaled
                                    1200000,             // Periodic timer interval of 120 seconds
                                    NULL,
NULL,
FALSE );          // Do not restore a suspended system

if (! bSuccess )
{
sprintf( szError, "SetWaitableTimer failed with Error \
                                                      %d.\n", GetLastError() );
                                    cout<<szError<<endl;
CloseHandle( hTimer );
return false;
}
(*iter)->m_hTimer=hTimer;
}
else
{
sprintf( szError, "CreateWaitableTimer failed with Error \
                                                  %d.\n", GetLastError() );
                                cout<<szError<<endl;
return false;
}
}
ResetEvent((*iter)->m_hEvent);
}
}
}
else if(dwResult==WAIT_FAILED)
{
OutputDebugString(_T("wait event singal failed\n"));
}
LocalFree((HLOCAL)phEvents);
//manage waitable timers 
            phTimers=(HANDLE*)LocalAlloc(LPTR|LMEM_ZEROINIT,sizeof(HANDLE)*(hbList.size()));
for(vector<CHeartBeat*>::size_type i=0;i<hbList.size();++i)
{
if(hbList[i]->m_hTimer!=NULL)
phTimers[i]=hbList[i]->m_hTimer;
}
//wait for one of event become signal, if there are no events become sigal,after 500 millisecond, it return
            DWORD dwResult=WaitForMultipleObjects((DWORD)(hbList.size()),phTimers,FALSE,500);
if(dwResult>=WAIT_OBJECT_0 && dwResult<=WAIT_OBJECT_0+hbList.size()-1)
{
//a client is power off
                vector<CHeartBeat*>::iterator iter=hbList.begin();
int i=0;
for (;iter!=hbList.end();++iter)
{
i++;
if((*iter)->m_hTimer==phTimers[dwResult-WAIT_OBJECT_0])
{
CHeartBeat* phb=(*iter);
char mbProjNum[1024];
strcpy(mbProjNum,phb->m_strProj);
printf("\na power off event occuered on ProjNum:%s\n",mbProjNum);
//before remove element, clear event and timer
                        SetEvent((*iter)->m_hEvent);
WaitForSingleObject((*iter)->m_hEvent,INFINITE);
CloseHandle((*iter)->m_hEvent);
CloseHandle((*iter)->m_hTimer);
//remove element
                        hbList.erase(iter);
break;
}
}
}
LocalFree((HLOCAL)phTimers);
}
//#region 1
//heart beat thread will exist
        if(WaitForSingleObject(m_hExitHandle,500)==WAIT_OBJECT_0)
{
if(hbList.size()>0)
{
vector<CHeartBeat*>::iterator iter=hbList.begin();
for(;iter!=hbList.end();++iter)
{
CHeartBeat* pHB=(CHeartBeat*)(*iter);
if(pHB->m_hEvent!=NULL)
{
CloseHandle(pHB->m_hEvent);
SetEvent(pHB->m_hEvent);
WaitForSingleObject(pHB->m_hEvent,INFINITE);
}
if(pHB->m_hTimer!=NULL)
{
CancelWaitableTimer(pHB->m_hTimer);
CloseHandle(pHB->m_hTimer);
}
SAFE_DELETE(pHB);
}
hbList.clear();
break;
}
}
//#endregion

}
return true;
}




3.当有客户连接时,向心跳线程发送事件信号,对于客户每一次连接,服务端会创建一个心跳包对象,将其加入列表,而当客户后序连接时,服务端仅设置指定的事件信号 

C/C++ code 

bool bExist=false;
CHeartBeat* pHeartBeat=NULL;
for(vector<CHeartBeat*>::iterator iter=hbList.begin();
iter!=hbList.end();
++iter)
{
if(strcmp((*iter)->GetProjNum(),pMessageInfo->msg_base_info.szPlanNumber)==0)
{
//IM has received health message previous for special plannumber
            bExist=true;
pHeartBeat=*iter;
break;
}
}
if(bExist)
{
//notify CHeartBeat to reset waitable timer
        if(pHeartBeat!=NULL) SetEvent(pHeartBeat->GetEventHandle());
return true;
}
else
{
CHeartBeat* pHeart=new CHeartBeat();
//notify CHeartBeat to create waitable timer
        if(!pHeart->Start(pMessageInfo->msg_base_info.szPlanNumber))
{
SAFE_DELETE(pHeart);
return false;
}
hbList.push_back(pHeart);
return true;
}
return true;




3.以下是CHeartBeat类的Start方法: 

C/C++ code 

bool  CHeartBeat::Start(const char* projNum)
{
HANDLE handle;
handle=CreateEvent(NULL,TRUE,FALSE,NULL);
if(handle==NULL)
{
return false;
}
m_hEvent=handle;
SetEvent(m_hEvent);
strcpy(m_strProj,projNum);
return true;
}




4.当有服务端结束时,等待心跳线程退出 
HANDLE hExitHeartBeat=CreateEvent(NULL,TRUE,TRUE,NULL); 
CHeartBeat::SetExitEvent(hExitHeartBeat); 
WaitForSingleObject(hHeatBeat,INFINITE);,  (1) 
对于心跳线程的退出处理用代码段2中的region 1 

请问各位大侠,为什么当服务端运行到WaitForSingleObject(hHeatBeat,INFINITE)时,这个函数无法返回,而且心跳线程明明可以已经退出了。 
当把语句(1)修改成WaitForSingleObject(hHeatBeat,1000);时 
程序有时候会内存泄露?  

 

终于解决了,原来在结束线程的时候,要先检查一下线程是否退出,然后使用WaitForSingleObject等待线程变成有信号的.而且当要等待一个线程退出时,不能在创建线程之后就立刻关闭句柄,而是要在等待线程变成信号态之后再关闭句柄。 
因此 
代码段1与代码段3合并成: 


C/C++ code 

bool bExist=false;
CHeartBeat* pHeartBeat=NULL;
for(vector<CHeartBeat*>::iterator iter=hbList.begin();
iter!=hbList.end();
++iter)
{
if(strcmp((*iter)->GetProjNum(),pMessageInfo->msg_base_info.szPlanNumber)==0)
{
//IM has received health message previous for special plannumber
            bExist=true;
pHeartBeat=*iter;
break;
}
}
if(bExist)
{
//notify CHeartBeat to reset waitable timer
        if(pHeartBeat!=NULL) SetEvent(pHeartBeat->GetEventHandle());
return true;
}
else
{
if(hbList.size()==0)
{
g_hHeartBeat=CreateThread(NULL,0,CHeartBeat::WaitProc,NULL,0,NULL);
}
CHeartBeat* pHeart=new CHeartBeat();
//notify CHeartBeat to create waitable timer
        if(!pHeart->Start(pMessageInfo->msg_base_info.szPlanNumber))
{
SAFE_DELETE(pHeart);
return false;
}
hbList.push_back(pHeart);
return true;
}



代码段4写成: 


C/C++ code 

DWORD dwExitCode;
GetExitCodeThread(g_hHeartBeat,&dwExitCode);
if(dwExitCode==STILL_ACTIVE)
{
HANDLE hExitHeartBeat=CreateEvent(NULL,TRUE,TRUE,NULL);
CHeartBeat::SetExitEvent(hExitHeartBeat);
WaitForSingleObject(g_hHeartBeat,INFINITE);
CloseHandle(g_hHeartBeat);
}



只有当接收到消息时,才启动心跳线程,当心跳包列表为空时,立刻退出心跳线程。这样可以防止线程一直运行下去,因为在线程不调用WaitFor*处理而运行无限循环语句时,程序会吃CPU达99%。 



心跳包的处理需要启动一个新线程,这个线程需要管理三类核心对象: 
1.每个客户端上线时,发出的事件m_hEvent 
2.接受到客户连接请求时,开启一个定时器m_hTimer 
3.主线程退出时,通知心跳线程退出事件的m_hExitEvent; 
4.客户列表(包含每个客户的事件,等待定时器,ProjNum),ProjNum对于每个客户地址 

当有第一个客户上线时,定时一个事件m_hEvent,然后启动心跳线程,在心跳线程中使用WaitForMutiplyObjects等待事件变成信号的, 
当事件有信号时,使用CreateWaitableTimer创建定时器,然后再使用SetWaitableTimer来设置定时器。 
当有客户第二个发送消息时,会再设置m_hEvent为信号状态,这时心跳线程使用Waitfor返回后,使用SetWaitableTimer刷新定时器的等待时间,那么等待时间重新变成30秒 
当某个客户过了30秒还没有发消息过来,就定时器会变成信号态的,这时心咣线程使用WaitForMutiplyObjects等待定时器会返回一个定时器索引,之后心跳线程会移除客户列表中某个成员 
当有客户下线时,会发送一个特定消息,心跳线程处理这个消息时,直接将某个客户从列表中移除。 
当所有客户都下线时,主线程使用m_hExitEvent通知心跳线程来关闭自身 

注意:我这个示例中没有心跳线程向客户端回发消息的处理,添加时需要在接收到m_hEvent之后回发消息。 
WaitForMutiplyObjects最多接收64个客户,如果有更多客户上线时,需要当客户列表分段来循环等待。