一、背景

公司后端很多都是dubbo接口,python调用就回存在难题。网上的一些连接方法很多都是基于python2的;有的是基于telnetlib直连,这个方法存在一些监控及安全问题,所以需要通过socket方法建立安全链接,但是该方法就存在序列化(hession)的问题。于是就结合pydubbo包,以及网上大家一些现有探索,做总结并整合

二、python直连Dubbo

import telnetlib
import time
from urllib.parse import unquote
from kazoo.client import KazooClient
import random
import threading, socket
from sys import platform, maxsize

class ServiceNotAvailableError(ValueError):
    pass


class Zookeeper:
    client = None
    service_dict = {}
    def __init__(self, host_port, timeout=120):
        self.host, self.port = host_port.split(":")
        self.host_port = host_port
        # 连接zookeeper
        self.client = KazooClient(host_port, timeout=timeout)
        self.client.start()

        # 查找所有注册的dubbo服务
        service_list = self.client.get_children('dubbo')
        for service in service_list:
            name = str(service).split('.')[-1]  # 去掉包名,剩下的服务名作为key
            self.service_dict[name] = service  # 此处如果有重名的服务,会覆盖

    def get_service_address(self, service):
        """获取指定服务的注册地址信息"""
        if '.' not in service:
            # 如果传入的服务名不带包名,就从service_dict找到完整服务名
            service = self.service_dict[service]

        uri = 'dubbo/%s' % service
        if not self.client.exists(uri):
            raise ServiceNotAvailableError('服务"%s"不存在' % service)
        elif not self.client.exists('%s/providers' % uri):
            raise ServiceNotAvailableError('服务"%s"没有提供者' % service)
        else:
            providers = self.client.get_children('%s/providers' % uri)
            addrs = []
            for provider in providers:
                addr = str(unquote(provider)).split('/')[2]
                addrs.append((str(addr).split(':')[0], str(addr).split(':')[1], str(addr)))
            return addrs

    def close(self):
        self.client.stop()
        self.client



class Dubbo:
    def __init__(self, interface, host_port, zkClient=None):
        self.host, self.port = host_port.split(":")
        self.conn = telnetlib.Telnet()
        self.interface = interface + '.'
        self.zk = zkClient
    
    def __call__(self, method, method_args_list):
        return self.request(method, method_args_list)


    def request(self, method, method_args_list):
        if self.zk is not None:
            self._get_zk_host_port()

        self.conn.open(self.host, self.port, timeout=120)
        cmd_ = self.generate_cmd(method, method_args_list)
        self.conn.write('invoke {}\n'.format(cmd_).encode())
        res = self.conn.read_until('dubbo>'.encode()).decode().split('\r\n')
        return res


    def generate_cmd(self, method, method_args_list):
        param_str = '('
        if len(method_args_list):
            for p in method_args_list:
                param_str += str(p)
        param_str += ')'
        cmd_ = self.interface + method + param_str
        return cmd_
    
    def _get_zk_host_port(self):
        address_list = self.zk.get_service_address(self.interface[:-1])
        if len(address_list) > 1:
            # 对于多节点服务,默认连接第一个节点,可用index指定
            print('——' * 43)
            print('|%s服务有多个地址,使用index参数指定请求地址,随机圈选:|' % str(self.interface).center(30, ' '))
            print('-' * 86)
            for i, address in enumerate(address_list):
                print('| %d ==> %s:%s |' % (i, address[0], str(address[1]).ljust(80 - len(address[2]), ' ')))
            print('——' * 43)

        self.host, self.port = random.choice(address_list)[:2]
        print('当前连接地址: %s:%s' % (self.host, self.port))

    def close(self):
        self.zk.close()

简单调用

if __name__ == '__main__':
    serv_ = 'xx.xxxx.xx.xx.api.service.Xxxxx'
    zk = Zookeeper(host_dict['xx'])
    db_ = Dubbo(
        interface=serv_,
        host_port=host_dict['xx'], 
        zkClient=zk
    )
    res_ = db_.request(method='xxXX', method_args_list=method_args)
    print("res_: \n", res_)

三、python 通过socket及hession序列化链接Dubbo

socket连接相关知识

af 为地址族(Address Family),也就是 IP 地址类型,常用的有 AF_INET 和 AF_INET6。
AF 是“Address Family”的简写,
INET是“Inetnet”的简写。
AF_INET 表示 IPv4 地址,例如 127.0.0.1;
AF_INET6 表示 IPv6 地址,例如 1030::C9B4:FF12:48AA:1A2B。

type 为数据传输方式/套接字类型,常用的有 SOCK_STREAM(流格式套接字/面向连接的套接字) 和 SOCK_DGRAM(数据报套接字/无连接的套接字),
我们已经在《套接字有哪些类型》一节中进行了介绍。

protocol 表示传输协议,常用的有 IPPROTO_TCP 和 IPPTOTO_UDP,分别表示 TCP 传输协议和 UDP 传输协议。

有了地址类型和数据传输方式,还不足以决定采用哪种协议吗?为什么还需要第三个参数呢?
正如大家所想,一般情况下有了 af 和 type 两个参数就可以创建套接字了,操作系统会自动推演出协议类型,
除非遇到这样的情况:有两种不同的协议支持同一种地址类型和数据传输类型。如果我们不指明使用哪种协议,操作系统是没办法自动推演的。
本教程使用 IPv4 地址,参数 af 的值为 AF_INET。如果使用 SOCK_STREAM 传输数据,那么满足这两个条件的协议只有 TCP

socket连接Dubbo

脚本 github 地址: https://github.com/scchy/CSDN/blob/master/socketDubbo.py

主要流程:

  • zk获取代理ip
  • 如果是proxy-hashed 需要再用get方法
  • 可以参考笔者的文章: 《python连接redis/codis_直连与通过kz代理连接》 中zkCodis._get_codis_config
  • 建立socket连接
  • hession 序列化编码
  • 发送序列化之后的dubbo服务命令
  • 获取返回信息self.conn.recv(1024)
  • 将返回信息重新编码, 获取最终请求返回
class socketDubbo:
    def __init__(self, interface, host_port, zkClient=None):
        self.host, self.port = host_port.split(":")
        self.interface = interface
        self.zk = zkClient
        self.__initial_connect()
    
    def __initial_connect(self):
        self.conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
        self.conn.settimeout(120)
        self.conn.setblocking(True)
    
    def __call__(self, method, method_args):
        return self.request(method, method_args)

    def request(self, method, method_args):
        # 获取代理地址
        if self.zk is not None:
            self._get_zk_host_port()

        # 建立socket连接
        self.conn.connect((self.host, int(self.port)))
        # hession 序列化编码
        cmd_ = self.generate_cmd(method_args)
        self.conn.send(cmd_)
        res = self.conn.recv(1024)
        res_b = list(bytearray(res))
        data = bytearray(res_b)
        rp = Response(data[16:])
        flag = rp.read_int()
        res_f = rp.read_next()
        return res_f

    def generate_cmd(self, method_args):
        request_param = {
            'dubbo_version': '2.0.2',
            'version': 'dev',
            'path': self.interface,
            'method': '$invoke',
            'arguments': [json.dumps(method_args)]
        }
        rq_ = Request(request_param) 
        request_data = rq_.encode() 
        return request_data

    def _get_zk_host_port(self):
        address_list = self.zk.get_service_address(self.interface)
        if len(address_list) > 1:
            # 对于多节点服务,默认连接第一个节点,可用index指定
            print('——' * 43)
            print('|%s服务有多个地址,使用index参数指定请求地址,随机圈选:|' % str(self.interface).center(30, ' '))
            print('-' * 86)
            for i, address in enumerate(address_list):
                print('| %d ==> %s:%s |' % (i, address[0], str(address[1]).ljust(80 - len(address[2]), ' ')))
            print('——' * 43)

        self.host, self.port = random.choice(address_list)[:2]
        print('当前连接地址: %s:%s' % (self.host, self.port))

    def close(self):
        self.zk.close()



if __name__ == '__main__':
    a = datetime.strptime('2022-05-16 19:50:00', "%Y-%m-%d %H:%M:%S")
    serv_ = 'xx.xxxx.xx.xx.api.service.Xxxxx'
    zk = Zookeeper(host_dict['dev'])
    db_ = Dubbo(
        interface=serv_,
        host_port=host_dict['dev'], 
        zkClient=zk
    )
    res_ = db_.request(method='xxx', method_args=method_args)
    print("res_: \n")

hession序列化:核心是 _encode_request_body 和java同学联调, 需要确认入参正确。

主要对https://github.com/BSTester/dubbo-python中的dubbo.codec.encoder & dubbo.codec.decode 做一些简单修改

class Request(object):
    """
    A class for dumping dubbo request body.
    All types can be dumped:
    * boolean
    * int
    * long
    * double
    * string
    * object
    """
    def __init__(self, request):
        self.__body = request
        self.__classes = []
        self.types = []  # 泛型
        self.invoke_id = 0


    def encode(self):
        """
        把请求序列化为字节数组
        :return:
        """
        request_body = self._encode_request_body()
        invoke_id = list(bytearray(struct.pack('!q', self.invoke_id)))
        request_head = DEFAULT_REQUEST_META + invoke_id + get_request_body_length(request_body)
        return bytearray(request_head + request_body)


    def _get_parameter_types(self, arguments):
        """
        针对所有的参数计算得到参数类型字符串
        :param arguments:
        :return:
        """
        parameter_types = ''
        # 判断并得出参数的类型
        for argument in arguments:
            parameter_types += self._get_class_name(argument)
        return parameter_types


    def _get_class_name(self, _class):
        """
        根据一个字段的类型得到其在Java中对应类的全限定名
        转换规则:https://stackoverflow.com/a/3442100/4614538
        :param _class:
        :return:
        """
        if isinstance(_class, bool):  # bool类型的判断必须放在int类型判断的前面
            return 'Z'
        elif isinstance(_class, int):
            if MIN_INT_32 <= _class <= MAX_INT_32:
                return 'I'
            else:
                return 'J'
        elif isinstance(_class, float):
            return 'D'
        elif isinstance(_class, str):
            return 'L' + 'java/lang/String' + ';'
        elif isinstance(_class, Object):
            path = _class.get_path()
            path = 'L' + path.replace('.', '/') + ';'
            return path
        elif isinstance(_class, list):
            if len(_class) == 0:
                raise HessianTypeError('Method parameter {} is a list but length is zero'.format(_class))
            return '[' + self._get_class_name(_class[0])
        else:
            raise HessianTypeError('Unknown argument type: {0}'.format(_class))


    def _encode_request_body(self):
        """
        对所有的已知的参数根据dubbo协议进行编码
        :return:
        """
        dubbo_version = self.__body['dubbo_version']
        path = self.__body['path']
        version = self.__body['version']
        method = self.__body['method']
        arguments = self.__body['arguments']


        body = []
        body.extend(self._encode_single_value(dubbo_version))
        body.extend(self._encode_single_value(path))
        body.extend(self._encode_single_value(version))
        body.extend(self._encode_single_value(method))
        body.extend(self._encode_single_value(arguments))
        for argument in arguments:
            body.extend(self._encode_single_value(argument))

        attachments = {
            'path': path,
            'interface': path,
            'version': version
        }
        # attachments参数以H开头,以Z结尾
        body.append(ord('H'))
        for key in attachments.keys():
            value = attachments[key]
            body.extend(self._encode_single_value(key))
            body.extend(self._encode_single_value(value))
        body.append(ord('Z'))


        # 因为在上面的逻辑中没有对byte大小进行检测,所以在这里进行统一的处理
        for i in range(len(body)):
            body[i] = body[i] & 0xff
        return body


    @staticmethod
    def _encode_bool(value):
        """
        对bool类型进行编码
        :param value:
        :return:
        """
        result = []
        if value:
            result.append(ord('T'))
        else:
            result.append(ord('F'))
        return result


    @staticmethod
    def _encode_int(value):
        """
        对整数进行编码
        :param value:
        :return:
        """
        result = []
        # 超出int类型范围的值则转化为long类型
        # 这里问题在于对于落在int范围内的数字,我们无法判断其是long类型还是int类型,所以一律认为其是int类型
        if value > MAX_INT_32 or value < MIN_INT_32:
            result.append(ord('L'))
            result.extend(list(bytearray(struct.pack('!q', value))))
            return result


        if -0x10 <= value <= 0x2f:
            result.append(value + 0x90)
        elif -0x800 <= value <= 0x7ff:
            result.append(0xc8 + (value >> 8))
            result.append(value)
        elif -0x40000 <= value <= 0x3ffff:
            result.append(0xd4 + (value >> 16))
            result.append(value >> 8)
            result.append(value)
        else:
            result.append(ord('I'))
            result.append(value >> 24)
            result.append(value >> 16)
            result.append(value >> 8)
            result.append(value)
        return result


    @staticmethod
    def _encode_float(value):
        """
        对浮点类型进行编码
        :param value:
        :return:
        """
        result = []
        int_value = int(value)
        if int_value == value:
            if int_value == 0:
                result.append(0x5b)
                return result
            elif int_value == 1:
                result.append(0x5c)
                return result
            elif -0x80 <= int_value < 0x80:
                result.append(0x5d)
                result.append(int_value)
                return result
            elif -0x8000 <= int_value < 0x8000:
                result.append(0x5e)
                result.append(int_value >> 8)
                result.append(int_value)
                return result


        mills = int(value * 1000)
        if 0.001 * mills == value and MIN_INT_32 <= mills <= MAX_INT_32:
            result.append(0x5f)
            result.append(mills >> 24)
            result.append(mills >> 16)
            result.append(mills >> 8)
            result.append(mills)
            return result


        bits = double_to_long_bits(value)
        result.append(ord('D'))
        result.append(bits >> 56)
        result.append(bits >> 48)
        result.append(bits >> 40)
        result.append(bits >> 32)
        result.append(bits >> 24)
        result.append(bits >> 16)
        result.append(bits >> 8)
        result.append(bits)
        return result


    @staticmethod
    def _encode_utf(value):
        """
        对字符串进行编码,编码格式utf-8
        参见方法:com.alibaba.com.caucho.hessian.io.Hessian2Output#printString
        :param value:
        :return:
        """
        result = []
        for v in value:
            ch = ord(v)
            if ch < 0x80:
                result.append(ch & 0xff)
            elif ch < 0x800:
                result.append((0xc0 + ((ch >> 6) & 0x1f)) & 0xff)
                result.append((0x80 + (ch & 0x3f)) & 0xff)
            else:
                result.append((0xe0 + ((ch >> 12) & 0xf)) & 0xff)
                result.append((0x80 + ((ch >> 6) & 0x3f)) & 0xff)
                result.append((0x80 + (ch & 0x3f)) & 0xff)
        return result


    def _encode_str(self, value):
        """
        对一个字符串进行编码
        :param value:
        :return:
        """
        result = []
        # 在进行网络传输操作时一律使用unicode进行操作
        if isinstance(value, str):
            value = value.encode().decode('utf-8')
        length = len(value)
        if length <= 0x1f:
            result.append(0x00 + length)
        elif length <= 0x3ff:
            result.append(0x30 + (length >> 8))
            result.append(length)
        else:
            result.append(ord('S'))
            result.append(length >> 8)
            result.append(length)


        result.extend(self._encode_utf(value))
        return result


    def _encode_object(self, value):
        """
        对一个对象进行编码
        :param value:
        :return:
        """
        result = []
        path = value.get_path()
        field_names = value.keys()


        if path not in self.__classes:
            result.append(ord('C'))
            result.extend(self._encode_single_value(path))


            result.extend(self._encode_single_value(len(field_names)))


            for field_name in field_names:
                print(f"field_name: {field_name}")
                result.extend(self._encode_single_value(field_name))
            self.__classes.append(path)
        class_id = self.__classes.index(path)
        if class_id <= 0xf:
            class_id += 0x60
            class_id &= 0xff
            result.append(class_id)
        else:
            result.append(ord('O'))
            result.extend(self._encode_single_value(class_id))
        for field_name in field_names:
            result.extend(self._encode_single_value(value[field_name]))
        return result


    def _encode_list(self, value):
        """
        对一个列表进行编码
        :param value:
        :return:
        """
        result = []
        length = len(value)
        if length == 0:
            # 没有值则无法判断类型,一律返回null
            return self._encode_single_value(None)
        if isinstance(value[0], bool):
            _type = '[boolean'
        elif isinstance(value[0], int):
            _type = '[int'
        elif isinstance(value[0], float):
            _type = '[double'
        elif isinstance(value[0], str):
            _type = '[string'
        elif isinstance(value[0], Object):
            _type = '[object'
        else:
            raise HessianTypeError('Unknown list type: {}'.format(value[0]))
        if length < 0x7:
            result.append(0x70 + length)
            if _type not in self.types:
                self.types.append(_type)
                result.extend(self._encode_single_value(_type))
            else:
                result.extend(self._encode_single_value(self.types.index(_type)))
        else:
            result.append(0x56)
            if _type not in self.types:
                self.types.append(_type)
                result.extend(self._encode_single_value(_type))
            else:
                result.extend(self._encode_single_value(self.types.index(_type)))
            result.extend(self._encode_single_value(length))
        for v in value:
            if type(value[0]) != type(v):
                raise HessianTypeError('All elements in list must be the same type, first type'
                                       ' is {0} but current type is {1}'.format(type(value[0]), type(v)))
            result.extend(self._encode_single_value(v))
        return result


    def _encode_single_value(self, value):
        """
        根据hessian协议对单个变量进行编码
        :param value:
        :return:
        """
        # 布尔类型
        if isinstance(value, bool):
            return self._encode_bool(value)
        # 整型(包括长整型)
        elif isinstance(value, int):
            return self._encode_int(value)
        # 浮点类型
        elif isinstance(value, float):
            return self._encode_float(value)
        # 字符串类型
        elif isinstance(value, str):
            return self._encode_str(value)
        # 对象类型
        elif isinstance(value, Object):
            return self._encode_object(value)
        # 列表(list)类型,不可以使用tuple替代
        elif isinstance(value, list):
            return self._encode_list(value)
        # null
        elif value is None:
            return [ord('N')]
        else:
            raise HessianTypeError('Unknown argument type: {}'.format(value))

参考

  • python 调用 dubbo 接口
  • Python3&Python2,通过zk,Telnet 调用dubbo
  • https://github.com/BSTester/dubbo-python