1.CreateThread与_beginthreadex



#pragma once

#include<cstdio>
#include<Windows.h>
#include<crtdbg.h>
#include<process.h>

//子线程函数
DWORD WINAPI ThreadFun1(LPVOID pM)
{
    printf("子线程的线程ID号为:%d\nHello world!\n",GetCurrentThreadId());
    return 0;
}

void fun1()
{
    printf("简单多线程实例!\n\n");

    /*
       CreateThread参数解析
       1:线程内核安全属性
       2:线程栈空间大小
       3:线程执行函数地址
       4:传给线程执行函数参数
       5:线程创建控制参数(CREATE_SUSPENDED)
       6:线程ID号
    */
    HANDLE handle = CreateThread(NULL, 0, ThreadFun1, NULL, 0, NULL);
    WaitForSingleObject(handle, INFINITE);
    CloseHandle(handle);
}

//设置计数全局变量
int COUNT = 0;

//子线程函数
unsigned int _stdcall ThreadFun2(PVOID pM)
{
    ++COUNT;
    printf("子线程的线程ID号为:%d,报数为%d\nHello world!\n", GetCurrentThreadId(),COUNT);
    return 0;
}

void fun2()
{
    printf("简单多线程实例!\n\n");

    const int THREAD_NUM = 5;
    HANDLE handle[THREAD_NUM];

    for (size_t i = 0; i < THREAD_NUM; i++)
    {
        handle[i] = (HANDLE)_beginthreadex(NULL, 0, ThreadFun2, NULL, 0, NULL);
    }
    WaitForMultipleObjects(THREAD_NUM, handle, TRUE, INFINITE);
}

int main(void)
{
    //使用CreateThread
    //fun1();

    //使用_beginthreadex
    //推荐使用原因为:使用标准C运行库函数时,易发生race condition
    //使用_beginthreadex可以避免数据被其他线程篡改
    //更加合理的解释可以参考Win32多线程编程
    fun2();

    //检测内存泄漏
    _CrtDumpMemoryLeaks();
    return 0;
}



其中执行fun2结果为:(蛮有趣的,不加锁竟然这么直观)

spark udf 全局变量 parfor 全局变量_spark udf 全局变量

 

2.原子操作



#pragma once

#include<cstdio>
#include<Windows.h>
#include<crtdbg.h>
#include<process.h>

volatile long COUNT = 0;
const int THREAD_NUM = 500;

unsigned int _stdcall ThreadFun1(LPVOID pM)
{
    Sleep(50);
    //++COUNT;
    InterlockedIncrement((LPLONG)&COUNT); //使用原子锁替换
    Sleep(50);
    return 0;
}

unsigned int _stdcall ThreadFun2(LPVOID pM)
{
    Sleep(50);
    InterlockedIncrement((LPLONG)&COUNT); //使用原子锁替换
    printf("线程编号为%d,全局资源值为%d\n", *(int *)pM, COUNT);
    return 0;
}


void fun1()
{
    HANDLE handle[THREAD_NUM];

    for (size_t i = 0; i < THREAD_NUM; i++)
    {
        handle[i] = (HANDLE)_beginthreadex(NULL, 0, ThreadFun1, NULL, 0, NULL);
    }
    WaitForMultipleObjects(THREAD_NUM, handle, TRUE, INFINITE);
    printf("有%d个线程启动,记录结果为%d", THREAD_NUM, COUNT);
}

void fun2()
{
    HANDLE handle[THREAD_NUM];

    for (size_t i = 0; i < THREAD_NUM; i++)
    {
        handle[i] = (HANDLE)_beginthreadex(NULL, 0, ThreadFun2, &i, 0, NULL);
    }
    WaitForMultipleObjects(THREAD_NUM, handle, TRUE, INFINITE);
}

int main()
{
    //test1
    //fun1(); //易发现线程启动数和计数不匹配

    //test2
    fun2(); //线程编号和全局资源值

    //检测内存泄漏
    _CrtDumpMemoryLeaks();
    return 0;
}



fun1:这里++操作在汇编层面是分成三层的:(1)取值由内存存至寄存器;(2)寄存器中进行操作;(3)数值由寄存器转储至内存。这个过程容易出现问题。

但是使用原子操作在只有50个线程启动时准确,但是上限调至500次时,线程启动数和计数又不一致(个人理解我inc虽然汇编下为一个操作,但是实际会分为多层次执行)。

spark udf 全局变量 parfor 全局变量_子线程_02

fun2:运行结果更为混乱,其中原子操作部分不表。

线程ID不准确的原因是i在传递至子线程函数的过程中,在主线程中就已经被修改数值了。

spark udf 全局变量 parfor 全局变量_子线程_03

 

3.关键区域同步(CRITICAL_SECTION)



#pragma once

#include<cstdio>
#include<Windows.h>
#include<crtdbg.h>
#include<process.h>

unsigned int count = 0;
const int threadnum = 50;
CRITICAL_SECTION ThreadPar1, ThreadPar2;

unsigned int _stdcall ThreadFun(LPVOID pM)
{
    UINT num = *(UINT*)pM;
    //离开子线程ID号关键区域
    LeaveCriticalSection(&ThreadPar1);

    Sleep(50); //做点什么

    EnterCriticalSection(&ThreadPar2);
    ++count;
    printf("线程编号为%3d,全局资源值为%3d\n", num, count);
    LeaveCriticalSection(&ThreadPar2);

    return 0;
}


int main()
{
    InitializeCriticalSection(&ThreadPar1);
    InitializeCriticalSection(&ThreadPar2);

    HANDLE handle[threadnum];

    for (UINT i = 0; i < threadnum; i++)
    {
        //进入子线程ID号关键区域
        EnterCriticalSection(&ThreadPar1);
        handle[i] = (HANDLE)_beginthreadex(NULL, 0, ThreadFun, &i, 0, NULL);
    }

    WaitForMultipleObjects(threadnum, handle, TRUE, INFINITE);
    DeleteCriticalSection(&ThreadPar1);
    DeleteCriticalSection(&ThreadPar2);

    //检测内存泄漏
    _CrtDumpMemoryLeaks();
    return 0;
}



输出结果:(嘿嘿,ID号不正确,计数正确)

spark udf 全局变量 parfor 全局变量_spark udf 全局变量_04

出现此问题的原因在于ThreadPar1的线程所有权是主线程,而不是子线程,这也就导致其可以多次进入关键区域,继而导致ID号不同。

而ThreadPar2的线程所有权是子线程,所以不出出现这个问题,输出正常。

这也从侧面证明CRITICAL_SECTION无法解决同步问题,而只能解决互斥问题。

 

4.事件(Event)



#pragma once

#define _CRTDBG_MAP_ALLOC
#include<cstdio>
#include<Windows.h>
#include<crtdbg.h>
#include<process.h>

unsigned int count = 0;
const int threadnum = 50;
HANDLE ThreadEvent;
CRITICAL_SECTION ThreadPar;

unsigned int _stdcall ThreadFun1(LPVOID pM)
{
    int num = *(int*)pM;
    SetEvent(ThreadEvent); //触发事件

    Sleep(50); //some work should to do

    EnterCriticalSection(&ThreadPar);
    ++count;
    printf("线程编号为%d,全局资源值为%d\n", num, count);
    LeaveCriticalSection(&ThreadPar);
    return 0;
}

void fun1()
{
    //初始化事件(自动置位,初始无触发的匿名事件)和关键段
    /*CreateEvent参数说明
      1:安全控制
      2:手动置位(true)/自动置位(false)
         自动置位,对事件调用WaitForSingleObject后,
         会自动调用ResetEvent使事件变为未触发状态
      3:事件初始状态(TRUE表示已触发)
      4:事件名称(NULL表示匿名)
    */
    ThreadEvent = CreateEvent(NULL, false, false, NULL);
    InitializeCriticalSection(&ThreadPar);

    size_t i;
    HANDLE handle[threadnum];
    for (i = 0; i < threadnum; i++)
    {
        handle[i] = (HANDLE)_beginthreadex(NULL, 0, ThreadFun1, &i, 0, NULL);
        WaitForSingleObject(ThreadEvent, INFINITE);
    }
    WaitForMultipleObjects(threadnum, handle, TRUE, INFINITE);

    //销毁时间和关键段
    CloseHandle(ThreadEvent);
    DeleteCriticalSection(&ThreadPar);
    for ( i = 0; i < threadnum; i++)
    {
        CloseHandle(handle[i]);
    }
}

unsigned int _stdcall FastThreadFun(LPVOID pM)
{
    Sleep(10); //以此来保证各线程调用等待函数的次序具有随机性
    printf("%s 启动\n", (PSTR)pM);
    WaitForSingleObject(ThreadEvent, INFINITE);
    printf("%s 等到事件被触发,顺利结束\n", (PSTR)pM);
    return 0;
}

unsigned int _stdcall SlowThreadFun(LPVOID pM)
{
    Sleep(100); //以此来保证各线程调用等待函数的次序具有随机性
    printf("%s 启动\n", (PSTR)pM);
    WaitForSingleObject(ThreadEvent, INFINITE);
    printf("%s 等到事件被触发,顺利结束\n", (PSTR)pM);
    return 0;
}

void fun2()
{
    bool bManualReset = true;
    ThreadEvent = CreateEvent(NULL, bManualReset, false, NULL);
    if (bManualReset)
    {
        printf("当前使用手动置位事件\n");
    }
    else
    {
        printf("当前使用自动置位事件\n");
    }

    char szFast[5][30]{ "快线程001","快线程002","快线程003","快线程004","快线程005" };
    char szSlow[5][30]{ "慢线程001","慢线程002","慢线程003","慢线程004","慢线程005" };

    size_t i = 0;
    for ( i = 0; i < 5; i++)
    {
        _beginthreadex(NULL, 0, FastThreadFun, szFast[i], 0, NULL);
    }
    for (i = 0; i < 5; i++)
    {
        _beginthreadex(NULL, 0, SlowThreadFun, szSlow[i], 0, NULL);
    }

    Sleep(50); //确保快线程已经全部启动
    printf("现在主线程触发事件脉冲-PlusEvent\n");
    PulseEvent(ThreadEvent); //调用PulseEvent()就相当于同时调用下面二句 
    //SetEvent(ThreadEvent);
    //ResetEvent(ThreadEvent);

    Sleep(3000);
    printf("时间到,主线程结束运行\n");
    CloseHandle(ThreadEvent);
}

int main(void)
{
    //test1
    //fun1();

    //test2
    fun2();

    //检测内存泄漏
    _CrtDumpMemoryLeaks();
    return 0;
}



fun1:成功解决临界资源的问题

spark udf 全局变量 parfor 全局变量_子线程_05

fun2:PulseEvent:

函数说明:这是一个不常用的事件函数,此函数相当于SetEvent()后立即调用ResetEvent();此时情况可以分为两种:

1.对于手动置位事件,所有正处于等待状态下线程都变成可调度状态。

2.对于自动置位事件,所有正处于等待状态下线程只有一个变成可调度状态(随机)。

spark udf 全局变量 parfor 全局变量_i++_06

spark udf 全局变量 parfor 全局变量_i++_07