


# define model for multi-gpu
# 如果有多块GPU,将队列划分为多块,以分给每块GPU
q_inp_split, q_heat_split, q_vect_split = tf.split(q_inp, args.gpus), tf.split(q_heat, args.gpus), tf.split(q_vect, args.gpus)

output_vectmap = []
output_heatmap = []
losses = []
last_losses_l1 = []
last_losses_l2 = []
outputs = []

# 将任务分配到多块GPU上
for gpu_id in range(args.gpus):
    with tf.device(tf.DeviceSpec(device_type="GPU", device_index=gpu_id)):
        with tf.variable_scope(tf.get_variable_scope(), reuse=(gpu_id > 0)):
            # 根据传入的model参数获取net,已经训练好了的模型路径,最后一层网络名
            net, pretrain_path, last_layer = get_network(args.model, q_inp_split[gpu_id])

            # 如果传入参数checkpoint,则pretrain_path直接用checkpoint的路径而不是默认的路径
            if args.checkpoint:
                pretrain_path = args.checkpoint

            # 获取最后一层的输出 L 和 S
            vect, heat = net.loss_last()
            # 获取最后输出结果

            # 获取 stage2 后的每一层的输出 L 和 S
            l1s, l2s = net.loss_l1_l2()
            # 求每一层的L2范数 loss
            for idx, (l1, l2) in enumerate(zip(l1s, l2s)):
                loss_l1 = tf.nn.l2_loss(tf.concat(l1, axis=0) - q_vect_split[gpu_id], name='loss_l1_stage%d_tower%d' % (idx, gpu_id))
                loss_l2 = tf.nn.l2_loss(tf.concat(l2, axis=0) - q_heat_split[gpu_id], name='loss_l2_stage%d_tower%d' % (idx, gpu_id))
                losses.append(tf.reduce_mean([loss_l1, loss_l2]))

            # 最后一层的L2 范数 loss

outputs = tf.concat(outputs, axis=0)


def get_network(type, placeholder_input, sess_for_load=None, trainable=True):
    if type == 'mobilenet':
        net = MobilenetNetwork({'image': placeholder_input}, conv_width=0.75, conv_width2=1.00, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v1_0.75_224_2017_06_14/mobilenet_v1_0.75_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'
    elif type == 'mobilenet_fast':
        net = MobilenetNetwork({'image': placeholder_input}, conv_width=0.5, conv_width2=0.5, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v1_0.75_224_2017_06_14/mobilenet_v1_0.75_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'
    elif type == 'mobilenet_accurate':
        net = MobilenetNetwork({'image': placeholder_input}, conv_width=1.00, conv_width2=1.00, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v1_1.0_224_2017_06_14/mobilenet_v1_1.0_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'

    elif type == 'mobilenet_thin':
        net = MobilenetNetworkThin({'image': placeholder_input}, conv_width=0.75, conv_width2=0.50, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v1_0.75_224_2017_06_14/mobilenet_v1_0.75_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'

    elif type in ['mobilenet_v2_w1.4_r1.0', 'mobilenet_v2_large', 'mobilenet_v2_large_quantize']:       # m_v2_large
        net = Mobilenetv2Network({'image': placeholder_input}, conv_width=1.4, conv_width2=1.0, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'
    elif type == 'mobilenet_v2_w1.4_r0.5':
        net = Mobilenetv2Network({'image': placeholder_input}, conv_width=1.4, conv_width2=0.5, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'
    elif type == 'mobilenet_v2_w1.0_r1.0':
        net = Mobilenetv2Network({'image': placeholder_input}, conv_width=1.0, conv_width2=1.0, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'
    elif type == 'mobilenet_v2_w1.0_r0.75':
        net = Mobilenetv2Network({'image': placeholder_input}, conv_width=1.0, conv_width2=0.75, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'
    elif type == 'mobilenet_v2_w1.0_r0.5':
        net = Mobilenetv2Network({'image': placeholder_input}, conv_width=1.0, conv_width2=0.5, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'
    elif type == 'mobilenet_v2_w0.75_r0.75':
        net = Mobilenetv2Network({'image': placeholder_input}, conv_width=0.75, conv_width2=0.75, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v2_0.75_224/mobilenet_v2_0.75_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'
    elif type == 'mobilenet_v2_w0.5_r0.5' or type == 'mobilenet_v2_small':                                # m_v2_fast
        net = Mobilenetv2Network({'image': placeholder_input}, conv_width=0.5, conv_width2=0.5, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v2_0.5_224/mobilenet_v2_0.5_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'

    elif type == 'mobilenet_v2_1.4':
        net = Mobilenetv2Network({'image': placeholder_input}, conv_width=1.4, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'
    elif type == 'mobilenet_v2_1.0':
        net = Mobilenetv2Network({'image': placeholder_input}, conv_width=1.0, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v2_1.0_224/mobilenet_v2_1.0_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'
    elif type == 'mobilenet_v2_0.75':
        net = Mobilenetv2Network({'image': placeholder_input}, conv_width=0.75, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v2_0.75_224/mobilenet_v2_0.75_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'
    elif type == 'mobilenet_v2_0.5':
        net = Mobilenetv2Network({'image': placeholder_input}, conv_width=0.5, trainable=trainable)
        pretrain_path = 'pretrained/mobilenet_v2_0.5_224/mobilenet_v2_0.5_224.ckpt'
        last_layer = 'MConv_Stage6_L{aux}_5'

    elif type in ['cmu', 'openpose']:
        net = CmuNetwork({'image': placeholder_input}, trainable=trainable)
        pretrain_path = 'numpy/openpose_coco.npy'
        last_layer = 'Mconv7_stage6_L{aux}'
    elif type in ['cmu_quantize', 'openpose_quantize']:
        net = CmuNetwork({'image': placeholder_input}, trainable=trainable)
        pretrain_path = 'train/cmu/bs8_lr0.0001_q_e80/model_latest-18000'
        last_layer = 'Mconv7_stage6_L{aux}'
    elif type == 'vgg':
        net = CmuNetwork({'image': placeholder_input}, trainable=trainable)
        pretrain_path = 'numpy/openpose_vgg16.npy'
        last_layer = 'Mconv7_stage6_L{aux}'

        raise Exception('Invalid Model Name.')

    pretrain_path_full = os.path.join(_get_base_path(), pretrain_path)
    if sess_for_load is not None:
        if type in ['cmu', 'vgg', 'openpose']:
            if not os.path.isfile(pretrain_path_full):
                raise Exception('Model file doesn\'t exist, path=%s' % pretrain_path_full)
            net.load(os.path.join(_get_base_path(), pretrain_path), sess_for_load)
                s = '%dx%d' % (placeholder_input.shape[2], placeholder_input.shape[1])
                s = ''
            ckpts = {
                'mobilenet': 'trained/mobilenet_%s/model-246038' % s,
                'mobilenet_thin': 'trained/mobilenet_thin_%s/model-449003' % s,
                'mobilenet_fast': 'trained/mobilenet_fast_%s/model-189000' % s,
                'mobilenet_accurate': 'trained/mobilenet_accurate/model-170000',
                'mobilenet_v2_w1.4_r0.5': 'trained/mobilenet_v2_w1.4_r0.5/model_latest-380401',
                'mobilenet_v2_large': 'trained/mobilenet_v2_w1.4_r1.0/model-570000',
                'mobilenet_v2_small': 'trained/mobilenet_v2_w0.5_r0.5/model_latest-380401',
            ckpt_path = os.path.join(_get_base_path(), ckpts[type])
            loader = tf.train.Saver()
                loader.restore(sess_for_load, ckpt_path)
            except Exception as e:
                raise Exception('Fail to load model files. \npath=%s\nerr=%s' % (ckpt_path, str(e)))

    return net, pretrain_path_full, last_layer



tensorflow yolov3 人脸检测 tensorflow行人检测_tensorflow


class CmuNetwork(network_base.BaseNetwork):


class BaseNetwork(object):
    def __init__(self, inputs, trainable=True):
        # The input nodes for this network
        self.inputs = inputs
        # The current list of terminal nodes
        self.terminals = []
        # Mapping from layer names to layers
        self.layers = dict(inputs)
        # If true, the resulting variables are set as trainable
        self.trainable = trainable
        # Switch variable for dropout
        self.use_dropout = tf.placeholder_with_default(tf.constant(1.0),


def setup(self):
        # 用了 VGG19 的前10层,对后基层网络进行了微调
             .conv(3, 3, 64, 1, 1, name='conv1_1')
             .conv(3, 3, 64, 1, 1, name='conv1_2')
             .max_pool(2, 2, 2, 2, name='pool1_stage1', padding='VALID')
             .conv(3, 3, 128, 1, 1, name='conv2_1')
             .conv(3, 3, 128, 1, 1, name='conv2_2')
             .max_pool(2, 2, 2, 2, name='pool2_stage1', padding='VALID')
             .conv(3, 3, 256, 1, 1, name='conv3_1')
             .conv(3, 3, 256, 1, 1, name='conv3_2')
             .conv(3, 3, 256, 1, 1, name='conv3_3')
             .conv(3, 3, 256, 1, 1, name='conv3_4')
             .max_pool(2, 2, 2, 2, name='pool3_stage1', padding='VALID')
             .conv(3, 3, 512, 1, 1, name='conv4_1')
             .conv(3, 3, 512, 1, 1, name='conv4_2')    # 这里上去的都是VGG19的前10层网络

             .conv(3, 3, 256, 1, 1, name='conv4_3_CPM')
             .conv(3, 3, 128, 1, 1, name='conv4_4_CPM')          # ***** 得到原始图片的特征图F

        # stage 1 ,分别得到 S1 和 L1
             .conv(3, 3, 128, 1, 1, name='conv5_1_CPM_L1')
             .conv(3, 3, 128, 1, 1, name='conv5_2_CPM_L1')
             .conv(3, 3, 128, 1, 1, name='conv5_3_CPM_L1')
             .conv(1, 1, 512, 1, 1, name='conv5_4_CPM_L1')
             .conv(1, 1, 38, 1, 1, relu=False, name='conv5_5_CPM_L1'))

             .conv(3, 3, 128, 1, 1, name='conv5_1_CPM_L2')
             .conv(3, 3, 128, 1, 1, name='conv5_2_CPM_L2')
             .conv(3, 3, 128, 1, 1, name='conv5_3_CPM_L2')
             .conv(1, 1, 512, 1, 1, name='conv5_4_CPM_L2')
             .conv(1, 1, 19, 1, 1, relu=False, name='conv5_5_CPM_L2'))
        # stage2,将上一个stage得到的 S 和 L,再加上原始图片特征图F当成输入
        # L1 是矢量图 L, L2 是热图(置信图)S
             .concat(3, name='concat_stage2')
             .conv(7, 7, 128, 1, 1, name='Mconv1_stage2_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv2_stage2_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv3_stage2_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv4_stage2_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv5_stage2_L1')
             .conv(1, 1, 128, 1, 1, name='Mconv6_stage2_L1')
             .conv(1, 1, 38, 1, 1, relu=False, name='Mconv7_stage2_L1'))

             .conv(7, 7, 128, 1, 1, name='Mconv1_stage2_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv2_stage2_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv3_stage2_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv4_stage2_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv5_stage2_L2')
             .conv(1, 1, 128, 1, 1, name='Mconv6_stage2_L2')
             .conv(1, 1, 19, 1, 1, relu=False, name='Mconv7_stage2_L2'))
        # stage3,将上一个stage得到的 S 和 L,再加上原始图片特征图F当成输入
             .concat(3, name='concat_stage3')
             .conv(7, 7, 128, 1, 1, name='Mconv1_stage3_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv2_stage3_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv3_stage3_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv4_stage3_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv5_stage3_L1')
             .conv(1, 1, 128, 1, 1, name='Mconv6_stage3_L1')
             .conv(1, 1, 38, 1, 1, relu=False, name='Mconv7_stage3_L1'))

             .conv(7, 7, 128, 1, 1, name='Mconv1_stage3_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv2_stage3_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv3_stage3_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv4_stage3_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv5_stage3_L2')
             .conv(1, 1, 128, 1, 1, name='Mconv6_stage3_L2')
             .conv(1, 1, 19, 1, 1, relu=False, name='Mconv7_stage3_L2'))
        # stage4,将上一个stage得到的 S 和 L,再加上原始图片特征图F当成输入
             .concat(3, name='concat_stage4')
             .conv(7, 7, 128, 1, 1, name='Mconv1_stage4_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv2_stage4_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv3_stage4_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv4_stage4_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv5_stage4_L1')
             .conv(1, 1, 128, 1, 1, name='Mconv6_stage4_L1')
             .conv(1, 1, 38, 1, 1, relu=False, name='Mconv7_stage4_L1'))

             .conv(7, 7, 128, 1, 1, name='Mconv1_stage4_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv2_stage4_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv3_stage4_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv4_stage4_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv5_stage4_L2')
             .conv(1, 1, 128, 1, 1, name='Mconv6_stage4_L2')
             .conv(1, 1, 19, 1, 1, relu=False, name='Mconv7_stage4_L2'))
        # stage4,将上一个stage得到的 S 和 L,再加上原始图片特征图F当成输入
             .concat(3, name='concat_stage5')
             .conv(7, 7, 128, 1, 1, name='Mconv1_stage5_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv2_stage5_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv3_stage5_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv4_stage5_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv5_stage5_L1')
             .conv(1, 1, 128, 1, 1, name='Mconv6_stage5_L1')
             .conv(1, 1, 38, 1, 1, relu=False, name='Mconv7_stage5_L1'))

             .conv(7, 7, 128, 1, 1, name='Mconv1_stage5_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv2_stage5_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv3_stage5_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv4_stage5_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv5_stage5_L2')
             .conv(1, 1, 128, 1, 1, name='Mconv6_stage5_L2')
             .conv(1, 1, 19, 1, 1, relu=False, name='Mconv7_stage5_L2'))
        # stage6,将上一个stage得到的 S 和 L,再加上原始图片特征图F当成输入
             .concat(3, name='concat_stage6')
             .conv(7, 7, 128, 1, 1, name='Mconv1_stage6_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv2_stage6_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv3_stage6_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv4_stage6_L1')
             .conv(7, 7, 128, 1, 1, name='Mconv5_stage6_L1')
             .conv(1, 1, 128, 1, 1, name='Mconv6_stage6_L1')
             .conv(1, 1, 38, 1, 1, relu=False, name='Mconv7_stage6_L1'))

             .conv(7, 7, 128, 1, 1, name='Mconv1_stage6_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv2_stage6_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv3_stage6_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv4_stage6_L2')
             .conv(7, 7, 128, 1, 1, name='Mconv5_stage6_L2')
             .conv(1, 1, 128, 1, 1, name='Mconv6_stage6_L2')
             .conv(1, 1, 19, 1, 1, relu=False, name='Mconv7_stage6_L2'))
        # 最后一层,将得到的 S6 和 L6 拼接
        with tf.variable_scope('Openpose'):
                 .concat(3, name='concat_stage7'))




with tf.device(tf.DeviceSpec(device_type="GPU")):
    # define loss
    # 计算每张图片的L1和L2总损失
    total_loss = tf.reduce_sum(losses) / args.batchsize
    # 计算每张图片的L1总损失
    total_loss_ll_paf = tf.reduce_sum(last_losses_l1) / args.batchsize
    # 计算每张图片的L2总损失
    total_loss_ll_heat = tf.reduce_sum(last_losses_l2) / args.batchsize
    # 计算每个batch 的L1和L2总损失
    total_loss_ll = tf.reduce_sum([total_loss_ll_paf, total_loss_ll_heat])

    # define optimizer
    # 设置学习率
    # 每个epoch执行的步数
    step_per_epoch = 121745 // args.batchsize
    global_step = tf.Variable(0, trainable=False)
    if ',' not in args.lr:
        starter_learning_rate = float(args.lr)
        # learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
        #                                            decay_steps=10000, decay_rate=0.33, staircase=True)
        # 学习率余弦衰减
        learning_rate = tf.train.cosine_decay(starter_learning_rate, global_step, args.max_epoch * step_per_epoch, alpha=0.0)
        lrs = [float(x) for x in args.lr.split(',')]
        boundaries = [step_per_epoch * 5 * i for i, _ in range(len(lrs)) if i > 0]
        learning_rate = tf.train.piecewise_constant(global_step, boundaries, lrs)




# 优化器
optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8)
# optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.8, use_locking=True, use_nesterov=True)
# 关于tf.GraphKeys.UPDATE_OPS,这是一个tensorflow的计算图中内置的一个集合,其中会保存一些需要在训练操作之前完成的操作,并配合tf.control_dependencies函数使用。
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
# tf.control_dependencies,该函数保证其辖域中的操作必须要在该函数所传递的参数中的操作完成后再进行
with tf.control_dependencies(update_ops):
    train_op = optimizer.minimize(total_loss, global_step, colocate_gradients_with_ops=True)
logger.info('define model-')

# define summary
tf.summary.scalar("loss", total_loss)
tf.summary.scalar("loss_lastlayer", total_loss_ll)
tf.summary.scalar("loss_lastlayer_paf", total_loss_ll_paf)
tf.summary.scalar("loss_lastlayer_heat", total_loss_ll_heat)
tf.summary.scalar("queue_size", enqueuer.size())
tf.summary.scalar("lr", learning_rate)
merged_summary_op = tf.summary.merge_all()





# 定义验证集和示例的占位符
valid_loss = tf.placeholder(tf.float32, shape=[])
valid_loss_ll = tf.placeholder(tf.float32, shape=[])
valid_loss_ll_paf = tf.placeholder(tf.float32, shape=[])
valid_loss_ll_heat = tf.placeholder(tf.float32, shape=[])
sample_train = tf.placeholder(tf.float32, shape=(4, 640, 640, 3))
sample_valid = tf.placeholder(tf.float32, shape=(12, 640, 640, 3))
train_img = tf.summary.image('training sample', sample_train, 4)
valid_img = tf.summary.image('validation sample', sample_valid, 12)
valid_loss_t = tf.summary.scalar("loss_valid", valid_loss)
valid_loss_ll_t = tf.summary.scalar("loss_valid_lastlayer", valid_loss_ll)
merged_validate_op = tf.summary.merge([train_img, valid_img, valid_loss_t, valid_loss_ll_t])




# 用于保存模型
saver = tf.train.Saver(max_to_keep=1000)
# 创建会话
config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
    logger.info('model weights initialization')
    # 加载模型
    if args.checkpoint and os.path.isdir(args.checkpoint):
        logger.info('Restore from checkpoint...')
        # loader = tf.train.Saver(net.restorable_variables())
        # loader.restore(sess, tf.train.latest_checkpoint(args.checkpoint))
        saver.restore(sess, tf.train.latest_checkpoint(args.checkpoint))
        logger.info('Restore from checkpoint...Done')
    elif pretrain_path:
        logger.info('Restore pretrained weights... %s' % pretrain_path)
        if '.npy' in pretrain_path:
            # 如果是npy的格式
            net.load(pretrain_path, sess, False)
                loader = tf.train.Saver(net.restorable_variables(only_backbone=False))
                loader.restore(sess, pretrain_path)
                logger.info('Restore only weights in backbone layers.')
                loader = tf.train.Saver(net.restorable_variables())
                loader.restore(sess, pretrain_path)
        logger.info('Restore pretrained weights...Done')

    logger.info('prepare file writer')
    file_writer = tf.summary.FileWriter(os.path.join(logpath, args.tag), sess.graph)
    # 启动队列
    logger.info('prepare coordinator')
    coord = tf.train.Coordinator()

    logger.info('Training Started.')
    time_started = time.time()
    last_gs_num = last_gs_num2 = 0
    initial_gs_num = sess.run(global_step)

    last_log_epoch1 = last_log_epoch2 = -1
    while True:
        # 开始训练
        _, gs_num = sess.run([train_op, global_step])
        # 当前epoch
        curr_epoch = float(gs_num) / step_per_epoch

        # 训练到指定次数了,退出
        if gs_num > step_per_epoch * args.max_epoch:

        if gs_num - last_gs_num >= 500:
            # 训练500步输出一次损失
            train_loss, train_loss_ll, train_loss_ll_paf, train_loss_ll_heat, lr_val, summary = sess.run([total_loss, total_loss_ll, total_loss_ll_paf, total_loss_ll_heat, learning_rate, merged_summary_op])

            # log of training loss / accuracy
            batch_per_sec = (gs_num - initial_gs_num) / (time.time() - time_started)
            logger.info('epoch=%.2f step=%d, %0.4f examples/sec lr=%f, loss=%g, loss_ll=%g, loss_ll_paf=%g, loss_ll_heat=%g' % (gs_num / step_per_epoch, gs_num, batch_per_sec * args.batchsize, lr_val, train_loss, train_loss_ll, train_loss_ll_paf, train_loss_ll_heat))
            last_gs_num = gs_num

            if last_log_epoch1 < curr_epoch:
                file_writer.add_summary(summary, curr_epoch)
                last_log_epoch1 = curr_epoch

        if gs_num - last_gs_num2 >= 2000:
            # 训练2000次保存一次
            # save weights
            saver.save(sess, os.path.join(modelpath, args.tag, 'model_latest'), global_step=global_step)

            average_loss = average_loss_ll = average_loss_ll_paf = average_loss_ll_heat = 0
            total_cnt = 0

            if len(validation_cache) == 0:
                for images_test, heatmaps, vectmaps in tqdm(df_valid.get_data()):
                    validation_cache.append((images_test, heatmaps, vectmaps))
                del df_valid
                df_valid = None

            # log of test accuracy
            # 输出测试准确率
            for images_test, heatmaps, vectmaps in validation_cache:
                lss, lss_ll, lss_ll_paf, lss_ll_heat, vectmap_sample, heatmap_sample = sess.run(
                    [total_loss, total_loss_ll, total_loss_ll_paf, total_loss_ll_heat, output_vectmap, output_heatmap],
                    feed_dict={q_inp: images_test, q_vect: vectmaps, q_heat: heatmaps}
                average_loss += lss * len(images_test)
                average_loss_ll += lss_ll * len(images_test)
                average_loss_ll_paf += lss_ll_paf * len(images_test)
                average_loss_ll_heat += lss_ll_heat * len(images_test)
                total_cnt += len(images_test)

            logger.info('validation(%d) %s loss=%f, loss_ll=%f, loss_ll_paf=%f, loss_ll_heat=%f' % (total_cnt, args.tag, average_loss / total_cnt, average_loss_ll / total_cnt, average_loss_ll_paf / total_cnt, average_loss_ll_heat / total_cnt))
            last_gs_num2 = gs_num

            sample_image = [enqueuer.last_dp[0][i] for i in range(4)]
            outputMat = sess.run(
                feed_dict={q_inp: np.array((sample_image + val_image) * max(1, (args.batchsize // 16)))}
            pafMat, heatMat = outputMat[:, :, :, 19:], outputMat[:, :, :, :19]

            sample_results = []
            for i in range(len(sample_image)):
                test_result = CocoPose.display_image(sample_image[i], heatMat[i], pafMat[i], as_numpy=True)
                test_result = cv2.resize(test_result, (640, 640))
                test_result = test_result.reshape([640, 640, 3]).astype(float)

            test_results = []
            for i in range(len(val_image)):
                test_result = CocoPose.display_image(val_image[i], heatMat[len(sample_image) + i], pafMat[len(sample_image) + i], as_numpy=True)
                test_result = cv2.resize(test_result, (640, 640))
                test_result = test_result.reshape([640, 640, 3]).astype(float)

            # save summary
            summary = sess.run(merged_validate_op, feed_dict={
                valid_loss: average_loss / total_cnt,
                valid_loss_ll: average_loss_ll / total_cnt,
                valid_loss_ll_paf: average_loss_ll_paf / total_cnt,
                valid_loss_ll_heat: average_loss_ll_heat / total_cnt,
                sample_valid: test_results,
                sample_train: sample_results
            if last_log_epoch2 < curr_epoch:
                file_writer.add_summary(summary, curr_epoch)
                last_log_epoch2 = curr_epoch

    saver.save(sess, os.path.join(modelpath, args.tag, 'model'), global_step=global_step)
logger.info('optimization finished. %f' % (time.time() - time_started))
