话说,有这样一个场景,就是客户送不断发送消息,需要服务端异步处理。

        一个一个的处理未免有些浪费资源,更好的方法是批量处理。

        当消息量特别大时,使用kafka之类的message queue自然是首选,但更多的时候,我们想用更加轻量的方案来解决这个问题。

        下面来详细分析一下技术需求,这个方案需要实现以下几点:

  • 消息聚合后处理(最大条数为BatchSize)
  • 延迟处理(延迟时间为LingerTime)
  • 自定义错误处理
  • 并发处理

        基于这样的需求,我快速的实现了第一步,消息聚合后处理。

var (
    eventQueue     = make(chan interface{}, 4)
    batchSize      = 8
    workers        = 2
    batchProcessor = func(messages []interface{}) {
        fmt.Printf("%+v \n", messages)
    }
)

for i := 0; i < workers; i++ {
    go func() {
        var batch []interface{}
        for {
            msg := <-eventQueue
            batch = append(batch, msg)
            if len(batch) == batchSize {
                batchProcessor(batch)
                batch = make([]interface{}, 0)
            }
        }
    }()
}

for i := 0; i < 100; i++ {
    eventQueue <- i
}

        代码虽然简单,但是核心已经有了。

  • 带buffer的channel相当于一个FIFO的队列
  • 多个常驻的goroutine来提高并发
  • goroutine之间是并行的,但每个goroutine内是串行的,所以对batch操作是不用加锁的。

        下一步就是添加延迟处理,和错误处理了。

var (
    eventQueue     = make(chan interface{}, 4)
    batchSize      = 8
    workers        = 2
    lingerTime     = 14 * time.Millisecond
    batchProcessor = func(batch []interface{}) error {
        fmt.Printf("%+v \n", batch)
        return nil
    }
    errHandler = func(err error, batch []interface{}) {
        fmt.Println("some error happens")
    }
)

for i := 0; i < workers; i++ {
    go func() {
        var batch []interface{}
        lingerTimer := time.NewTimer(0)
        if !lingerTimer.Stop() {
            <-lingerTimer.C
        }
        defer lingerTimer.Stop()

        for {
            select {
            case msg := <-eventQueue:
                batch = append(batch, msg)
                if len(batch) != batchSize {
                    if len(batch) == 1 {
                        lingerTimer.Reset(lingerTime)
                    }
                    break
                }

                if err := batchProcessor(batch); err != nil {
                    errHandler(err, batch)
                }

                if !lingerTimer.Stop() {
                    <-lingerTimer.C
                }

                batch = make([]interface{}, 0)
            case <-lingerTimer.C:
                if err := batchProcessor(batch); err != nil {
                    errHandler(err, batch)
                }

                batch = make([]interface{}, 0)
            }
        }
    }()
}

for i := 0; i < 100; i++ {
    eventQueue <- i
    time.Sleep(1 * time.Millisecond)
}

        虽然只多加了两个点,代码明显复杂了许多,这其实也是很多库的成长过程吧。

        一开始专注解决核心问题时,代码还很清晰,当功能逐渐扩展后,代码行数快速增加。

        这时,如果抓不住核心,很容易迷失在代码中。关于这一点,相信大家在加入一个新的项目,或者看一些成熟项目的源码时都有同感。(这也是为什么我把不同阶段的代码都列出来的原因,不知各位看官意下如何)

        言归正传,关于代码中为什么使用time.Timer而不是time.After,是因为time.After在for select中使用时,会发生内存泄露。
具体分析,请查看golang time.After内存泄露问题分析和GOLANG中time.After释放的问题。

        所以说呀,代码写的越多,越容易出bug,但是功能不完善,代码还是要写的。

        实现到这里,当个原型是绰绰有余了,但是要作为一个通用的库,还有很多功能要做,比如说:自定义配置。

        最终版的代码,不多不少,正好200行,代码如下:

package channelx

import (
	"runtime"
	"sync"
	"time"
)

// Represents the aggregator
type Aggregator struct {
	option         AggregatorOption
	wg             *sync.WaitGroup
	quit           chan struct{}
	eventQueue     chan interface{}
	batchProcessor BatchProcessFunc
}

// Represents the aggregator option
type AggregatorOption struct {
	BatchSize         int
	Workers           int
	ChannelBufferSize int
	LingerTime        time.Duration
	ErrorHandler      ErrorHandlerFunc
	Logger            Logger
}

// the func to batch process items
type BatchProcessFunc func([]interface{}) error

// the func to set option for aggregator
type SetAggregatorOptionFunc func(option AggregatorOption) AggregatorOption

// the func to handle error
type ErrorHandlerFunc func(err error, items []interface{}, batchProcessFunc BatchProcessFunc, aggregator *Aggregator)

// Creates a new aggregator
func NewAggregator(batchProcessor BatchProcessFunc, optionFuncs ...SetAggregatorOptionFunc) *Aggregator {
	option := AggregatorOption{
		BatchSize:  8,
		Workers:    runtime.NumCPU(),
		LingerTime: 1 * time.Minute,
	}

	for _, optionFunc := range optionFuncs {
		option = optionFunc(option)
	}

	if option.ChannelBufferSize <= option.Workers {
		option.ChannelBufferSize = option.Workers
	}

	return &Aggregator{
		eventQueue:     make(chan interface{}, option.ChannelBufferSize),
		option:         option,
		quit:           make(chan struct{}),
		wg:             new(sync.WaitGroup),
		batchProcessor: batchProcessor,
	}
}

// Try enqueue an item, and it is non-blocked
func (agt *Aggregator) TryEnqueue(item interface{}) bool {
	select {
	case agt.eventQueue <- item:
		return true
	default:
		if agt.option.Logger != nil {
			agt.option.Logger.Warnf("Aggregator: Event queue is full and try reschedule")
		}

		runtime.Gosched()

		select {
		case agt.eventQueue <- item:
			return true
		default:
			if agt.option.Logger != nil {
				agt.option.Logger.Warnf("Aggregator: Event queue is still full and %+v is skipped.", item)
			}
			return false
		}
	}
}

// Enqueue an item, will be blocked if the queue is full
func (agt *Aggregator) Enqueue(item interface{}) {
	agt.eventQueue <- item
}

// Start the aggregator
func (agt *Aggregator) Start() {
	for i := 0; i < agt.option.Workers; i++ {
		index := i
		go agt.work(index)
	}
}

// Stop the aggregator
func (agt *Aggregator) Stop() {
	close(agt.quit)
	agt.wg.Wait()
}

// Stop the aggregator safely, the difference with Stop is it guarantees no item is missed during stop
func (agt *Aggregator) SafeStop() {
	if len(agt.eventQueue) == 0 {
		close(agt.quit)
	} else {
		ticker := time.NewTicker(50 * time.Millisecond)
		for range ticker.C {
			if len(agt.eventQueue) == 0 {
				close(agt.quit)
				break
			}
		}
		ticker.Stop()
	}
	agt.wg.Wait()
}

func (agt *Aggregator) work(index int) {
	defer func() {
		if r := recover(); r != nil {
			if agt.option.Logger != nil {
				agt.option.Logger.Errorf("Aggregator: recover worker as bad thing happens %+v", r)
			}

			agt.work(index)
		}
	}()

	agt.wg.Add(1)
	defer agt.wg.Done()

	batch := make([]interface{}, 0, agt.option.BatchSize)
	lingerTimer := time.NewTimer(0)
	if !lingerTimer.Stop() {
		<-lingerTimer.C
	}
	defer lingerTimer.Stop()

loop:
	for {
		select {
		case req := <-agt.eventQueue:
			batch = append(batch, req)

			batchSize := len(batch)
			if batchSize < agt.option.BatchSize {
				if batchSize == 1 {
					lingerTimer.Reset(agt.option.LingerTime)
				}
				break
			}

			agt.batchProcess(batch)

			if !lingerTimer.Stop() {
				<-lingerTimer.C
			}
			batch = make([]interface{}, 0, agt.option.BatchSize)
		case <-lingerTimer.C:
			if len(batch) == 0 {
				break
			}

			agt.batchProcess(batch)
			batch = make([]interface{}, 0, agt.option.BatchSize)
		case <-agt.quit:
			if len(batch) != 0 {
				agt.batchProcess(batch)
			}

			break loop
		}
	}
}

func (agt *Aggregator) batchProcess(items []interface{}) {
	agt.wg.Add(1)
	defer agt.wg.Done()
	if err := agt.batchProcessor(items); err != nil {
		if agt.option.Logger != nil {
			agt.option.Logger.Errorf("Aggregator: error happens")
		}

		if agt.option.ErrorHandler != nil {
			go agt.option.ErrorHandler(err, items, agt.batchProcessor, agt)
		} else if agt.option.Logger != nil {
			agt.option.Logger.Errorf("Aggregator: error happens in batchProcess and is skipped")
		}
	} else if agt.option.Logger != nil {
		agt.option.Logger.Infof("Aggregator: %d items have been sent.", len(items))
	}
}

        到此 Go语言 channel 实现消息的批量处理介绍完成。