在长期运行的后台程序中, 如果没有外部干涉,一个goroutine 很容易进入一个不正常的状态,并且无法恢复。 在一个长期运行的程序中,建立一个机制来 监控 goroutine 是否 是健康的的状态 是有用的, 当它们变得异常时, 就可以重启。我们将这个重启goroutine 的过程称为 "治愈 Healing" 为了治愈goroutine, 需要使用心跳模式来检查我们正在监控的goroutine 是否活跃。心跳的类型取决于你想要监控的内容。 把监控 goroutine 的健康的这段逻辑称为管理员, 他监视一个管理区的goroutine. 如果有goroutine 变得不健康,管理员 将负责重新启动这个管理区的goroutine.
type startGoroutineFn func( done <- chan interface{}, pulseInterval time.Duration,)(heartbeat <-chan interface{})
var newSteward = func(timeout time.Duration, startGoroutine startGoroutineFn) startGoroutineFn {
return func(done <- chan interface{}, pulseInterval time.Duration,)(<- chan interface{}){
heartbeat := make(chan interface{})
go func() {
defer close(heartbeat)
var wardDone chan interface{}
var wardHeartbeat <- chan interface{}
//定义一个闭包 ,他实现了一个统一的方法来启动 我们正在监视的 goroutine。
startWard := func() {
// wardDone 这是一个 停止的信号,
wardDone = make(chan interface{})
//设定心跳间隔时间 是超时时间的一半。
wardHeartbeat = startGoroutine(or(wardDone, done), timeout/2)
}
startWard()
pulse := time.Tick(pulseInterval)
monitorLoop:
for{
timeoutSignal := time.After(timeout)
for {
select{
case <- pulse :
select {
case heartbeat <- struct {}{}:
default:
}
case <-wardHeartbeat:
continue monitorLoop
//这一行表示, 如果我们在暂停期间 没有收到管理区里 goroutine 的心跳,则会把管理区内的goroutine停下来,并启动一个新的
//goroutine, 然后继续监控
case <- timeoutSignal:
log.Println("steward: ward unhealthy; restarting")
close(wardDone)
startWard()
continue monitorLoop
case <- done:
return
}
}
}
}()
return heartbeat
}
}
func main() {
log.SetOutput(os.Stdout)
log.SetFlags(log.Ltime | log.LUTC)
doWork := func(done <- chan interface{}, _ time.Duration) <-chan interface{}{
log.Println("ward: Hello, I'm irresponsible!")
go func() {
<- done
log.Println("ward: I am halting.")
}()
return nil
}
doWorkWithSteward := newSteward(4*time.Second, doWork)
done := make(chan interface{})
time.AfterFunc(9*time.Second, func() {
log.Println("main: halting steward and ward.")
close(done)
})
for range doWorkWithSteward(done, 4*time.Second){}
//doWork(done,4*time.Second )
//<- time.After(12*time.Second)
log.Println("Done")
}
//08:48:44 ward: Hello, I'm irresponsible!
//08:48:48 steward: ward unhealthy; restarting
//08:48:48 ward: Hello, I'm irresponsible!
//08:48:48 ward: I am halting.
//08:48:52 steward: ward unhealthy; restarting
//08:48:52 ward: Hello, I'm irresponsible!
//08:48:52 ward: I am halting.
//08:48:53 main: halting steward and ward.
//08:48:53 Done
使用闭包来生成 doWork
var doWorkFn = func(done <-chan interface{}, intList ...int) (startGoroutineFn, <-chan interface{}) {
intChanStream := make(chan (<-chan interface{}))
intStream := bridge(done, intChanStream)
//在这里创建一个 将被管理员启动 和监控 的闭包
doWork := func(done <- chan interface{}, pulseInterval time.Duration) <-chan interface{} {
//利用这些 channel 与 管理区中的 goroutine 通信
intStream := make(chan interface{})
heartbeat := make(chan interface{})
go func() {
defer close(intStream)
select{
case intChanStream <- intStream:
case <- done:
return
}
pulse := time.Tick(pulseInterval)
for {
valueLoop:
for _ , intVal := range intList{
//for {
// intVal := intList[0]
// intList = intList[1:]
if intVal < 0 {
//打印一个错误,然后从goroutine 中返回
log.Printf("negative value: %v\n", intVal)
return
}
for {
select{
case <- pulse:
select{
case heartbeat <- struct {}{}:
default:
}
case intStream <- intVal:
continue valueLoop
case <- done:
return
}
}
}
}
}()
return heartbeat
}
return doWork, intStream
}
func main(){
log.SetFlags(log.Ltime | log.LUTC)
log.SetOutput(os.Stdout)
done := make(chan interface{})
defer close(done)
doWork, intStream := doWorkFn(done, 1, 2 ,-1, 3, 4,5)
//doWorkWithSteward := newSteward(1* time.Millisecond, doWork)
doWorkWithSteward := newSteward(1000* time.Millisecond, doWork)
doWorkWithSteward(done, 1*time.Hour)
for intVal := range take(done, intStream, 6){
fmt.Printf("Received:%v\n", intVal)
}
}
//Received:1
//Received:2
//11:33:47 negative value: -1
//11:33:47 steward: ward unhealthy; restarting
//Received:1
//11:33:47 negative value: -1
//Received:2
//11:33:47 steward: ward unhealthy; restarting
//Received:1
//Received:2