一、背景
公司后端很多都是dubbo接口,python调用就回存在难题。网上的一些连接方法很多都是基于python2的;有的是基于telnetlib
直连,这个方法存在一些监控及安全问题,所以需要通过socket方法建立安全链接,但是该方法就存在序列化(hession)的问题。于是就结合pydubbo包,以及网上大家一些现有探索,做总结并整合
二、python直连Dubbo
import telnetlib
import time
from urllib.parse import unquote
from kazoo.client import KazooClient
import random
import threading, socket
from sys import platform, maxsize
class ServiceNotAvailableError(ValueError):
pass
class Zookeeper:
client = None
service_dict = {}
def __init__(self, host_port, timeout=120):
self.host, self.port = host_port.split(":")
self.host_port = host_port
# 连接zookeeper
self.client = KazooClient(host_port, timeout=timeout)
self.client.start()
# 查找所有注册的dubbo服务
service_list = self.client.get_children('dubbo')
for service in service_list:
name = str(service).split('.')[-1] # 去掉包名,剩下的服务名作为key
self.service_dict[name] = service # 此处如果有重名的服务,会覆盖
def get_service_address(self, service):
"""获取指定服务的注册地址信息"""
if '.' not in service:
# 如果传入的服务名不带包名,就从service_dict找到完整服务名
service = self.service_dict[service]
uri = 'dubbo/%s' % service
if not self.client.exists(uri):
raise ServiceNotAvailableError('服务"%s"不存在' % service)
elif not self.client.exists('%s/providers' % uri):
raise ServiceNotAvailableError('服务"%s"没有提供者' % service)
else:
providers = self.client.get_children('%s/providers' % uri)
addrs = []
for provider in providers:
addr = str(unquote(provider)).split('/')[2]
addrs.append((str(addr).split(':')[0], str(addr).split(':')[1], str(addr)))
return addrs
def close(self):
self.client.stop()
self.client
class Dubbo:
def __init__(self, interface, host_port, zkClient=None):
self.host, self.port = host_port.split(":")
self.conn = telnetlib.Telnet()
self.interface = interface + '.'
self.zk = zkClient
def __call__(self, method, method_args_list):
return self.request(method, method_args_list)
def request(self, method, method_args_list):
if self.zk is not None:
self._get_zk_host_port()
self.conn.open(self.host, self.port, timeout=120)
cmd_ = self.generate_cmd(method, method_args_list)
self.conn.write('invoke {}\n'.format(cmd_).encode())
res = self.conn.read_until('dubbo>'.encode()).decode().split('\r\n')
return res
def generate_cmd(self, method, method_args_list):
param_str = '('
if len(method_args_list):
for p in method_args_list:
param_str += str(p)
param_str += ')'
cmd_ = self.interface + method + param_str
return cmd_
def _get_zk_host_port(self):
address_list = self.zk.get_service_address(self.interface[:-1])
if len(address_list) > 1:
# 对于多节点服务,默认连接第一个节点,可用index指定
print('——' * 43)
print('|%s服务有多个地址,使用index参数指定请求地址,随机圈选:|' % str(self.interface).center(30, ' '))
print('-' * 86)
for i, address in enumerate(address_list):
print('| %d ==> %s:%s |' % (i, address[0], str(address[1]).ljust(80 - len(address[2]), ' ')))
print('——' * 43)
self.host, self.port = random.choice(address_list)[:2]
print('当前连接地址: %s:%s' % (self.host, self.port))
def close(self):
self.zk.close()
简单调用
if __name__ == '__main__':
serv_ = 'xx.xxxx.xx.xx.api.service.Xxxxx'
zk = Zookeeper(host_dict['xx'])
db_ = Dubbo(
interface=serv_,
host_port=host_dict['xx'],
zkClient=zk
)
res_ = db_.request(method='xxXX', method_args_list=method_args)
print("res_: \n", res_)
三、python 通过socket及hession序列化链接Dubbo
socket连接相关知识
af 为地址族(Address Family),也就是 IP 地址类型,常用的有 AF_INET 和 AF_INET6。
AF 是“Address Family”的简写,
INET是“Inetnet”的简写。
AF_INET 表示 IPv4 地址,例如 127.0.0.1;
AF_INET6 表示 IPv6 地址,例如 1030::C9B4:FF12:48AA:1A2B。
type 为数据传输方式/套接字类型,常用的有 SOCK_STREAM(流格式套接字/面向连接的套接字) 和 SOCK_DGRAM(数据报套接字/无连接的套接字),
我们已经在《套接字有哪些类型》一节中进行了介绍。
protocol 表示传输协议,常用的有 IPPROTO_TCP 和 IPPTOTO_UDP,分别表示 TCP 传输协议和 UDP 传输协议。
有了地址类型和数据传输方式,还不足以决定采用哪种协议吗?为什么还需要第三个参数呢?
正如大家所想,一般情况下有了 af 和 type 两个参数就可以创建套接字了,操作系统会自动推演出协议类型,
除非遇到这样的情况:有两种不同的协议支持同一种地址类型和数据传输类型。如果我们不指明使用哪种协议,操作系统是没办法自动推演的。
本教程使用 IPv4 地址,参数 af 的值为 AF_INET。如果使用 SOCK_STREAM 传输数据,那么满足这两个条件的协议只有 TCP
socket连接Dubbo
脚本 github 地址: https://github.com/scchy/CSDN/blob/master/socketDubbo.py
主要流程:
- zk获取代理ip
- 如果是
proxy-hashed
需要再用get方法 - 可以参考笔者的文章: 《python连接redis/codis_直连与通过kz代理连接》 中
zkCodis._get_codis_config
- 建立socket连接
- hession 序列化编码
- 发送序列化之后的dubbo服务命令
- 获取返回信息
self.conn.recv(1024)
- 将返回信息重新编码, 获取最终请求返回
class socketDubbo:
def __init__(self, interface, host_port, zkClient=None):
self.host, self.port = host_port.split(":")
self.interface = interface
self.zk = zkClient
self.__initial_connect()
def __initial_connect(self):
self.conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP)
self.conn.settimeout(120)
self.conn.setblocking(True)
def __call__(self, method, method_args):
return self.request(method, method_args)
def request(self, method, method_args):
# 获取代理地址
if self.zk is not None:
self._get_zk_host_port()
# 建立socket连接
self.conn.connect((self.host, int(self.port)))
# hession 序列化编码
cmd_ = self.generate_cmd(method_args)
self.conn.send(cmd_)
res = self.conn.recv(1024)
res_b = list(bytearray(res))
data = bytearray(res_b)
rp = Response(data[16:])
flag = rp.read_int()
res_f = rp.read_next()
return res_f
def generate_cmd(self, method_args):
request_param = {
'dubbo_version': '2.0.2',
'version': 'dev',
'path': self.interface,
'method': '$invoke',
'arguments': [json.dumps(method_args)]
}
rq_ = Request(request_param)
request_data = rq_.encode()
return request_data
def _get_zk_host_port(self):
address_list = self.zk.get_service_address(self.interface)
if len(address_list) > 1:
# 对于多节点服务,默认连接第一个节点,可用index指定
print('——' * 43)
print('|%s服务有多个地址,使用index参数指定请求地址,随机圈选:|' % str(self.interface).center(30, ' '))
print('-' * 86)
for i, address in enumerate(address_list):
print('| %d ==> %s:%s |' % (i, address[0], str(address[1]).ljust(80 - len(address[2]), ' ')))
print('——' * 43)
self.host, self.port = random.choice(address_list)[:2]
print('当前连接地址: %s:%s' % (self.host, self.port))
def close(self):
self.zk.close()
if __name__ == '__main__':
a = datetime.strptime('2022-05-16 19:50:00', "%Y-%m-%d %H:%M:%S")
serv_ = 'xx.xxxx.xx.xx.api.service.Xxxxx'
zk = Zookeeper(host_dict['dev'])
db_ = Dubbo(
interface=serv_,
host_port=host_dict['dev'],
zkClient=zk
)
res_ = db_.request(method='xxx', method_args=method_args)
print("res_: \n")
hession序列化:核心是 _encode_request_body
和java同学联调, 需要确认入参正确。
主要对https://github.com/BSTester/dubbo-python中的dubbo.codec.encoder & dubbo.codec.decode
做一些简单修改
class Request(object):
"""
A class for dumping dubbo request body.
All types can be dumped:
* boolean
* int
* long
* double
* string
* object
"""
def __init__(self, request):
self.__body = request
self.__classes = []
self.types = [] # 泛型
self.invoke_id = 0
def encode(self):
"""
把请求序列化为字节数组
:return:
"""
request_body = self._encode_request_body()
invoke_id = list(bytearray(struct.pack('!q', self.invoke_id)))
request_head = DEFAULT_REQUEST_META + invoke_id + get_request_body_length(request_body)
return bytearray(request_head + request_body)
def _get_parameter_types(self, arguments):
"""
针对所有的参数计算得到参数类型字符串
:param arguments:
:return:
"""
parameter_types = ''
# 判断并得出参数的类型
for argument in arguments:
parameter_types += self._get_class_name(argument)
return parameter_types
def _get_class_name(self, _class):
"""
根据一个字段的类型得到其在Java中对应类的全限定名
转换规则:https://stackoverflow.com/a/3442100/4614538
:param _class:
:return:
"""
if isinstance(_class, bool): # bool类型的判断必须放在int类型判断的前面
return 'Z'
elif isinstance(_class, int):
if MIN_INT_32 <= _class <= MAX_INT_32:
return 'I'
else:
return 'J'
elif isinstance(_class, float):
return 'D'
elif isinstance(_class, str):
return 'L' + 'java/lang/String' + ';'
elif isinstance(_class, Object):
path = _class.get_path()
path = 'L' + path.replace('.', '/') + ';'
return path
elif isinstance(_class, list):
if len(_class) == 0:
raise HessianTypeError('Method parameter {} is a list but length is zero'.format(_class))
return '[' + self._get_class_name(_class[0])
else:
raise HessianTypeError('Unknown argument type: {0}'.format(_class))
def _encode_request_body(self):
"""
对所有的已知的参数根据dubbo协议进行编码
:return:
"""
dubbo_version = self.__body['dubbo_version']
path = self.__body['path']
version = self.__body['version']
method = self.__body['method']
arguments = self.__body['arguments']
body = []
body.extend(self._encode_single_value(dubbo_version))
body.extend(self._encode_single_value(path))
body.extend(self._encode_single_value(version))
body.extend(self._encode_single_value(method))
body.extend(self._encode_single_value(arguments))
for argument in arguments:
body.extend(self._encode_single_value(argument))
attachments = {
'path': path,
'interface': path,
'version': version
}
# attachments参数以H开头,以Z结尾
body.append(ord('H'))
for key in attachments.keys():
value = attachments[key]
body.extend(self._encode_single_value(key))
body.extend(self._encode_single_value(value))
body.append(ord('Z'))
# 因为在上面的逻辑中没有对byte大小进行检测,所以在这里进行统一的处理
for i in range(len(body)):
body[i] = body[i] & 0xff
return body
@staticmethod
def _encode_bool(value):
"""
对bool类型进行编码
:param value:
:return:
"""
result = []
if value:
result.append(ord('T'))
else:
result.append(ord('F'))
return result
@staticmethod
def _encode_int(value):
"""
对整数进行编码
:param value:
:return:
"""
result = []
# 超出int类型范围的值则转化为long类型
# 这里问题在于对于落在int范围内的数字,我们无法判断其是long类型还是int类型,所以一律认为其是int类型
if value > MAX_INT_32 or value < MIN_INT_32:
result.append(ord('L'))
result.extend(list(bytearray(struct.pack('!q', value))))
return result
if -0x10 <= value <= 0x2f:
result.append(value + 0x90)
elif -0x800 <= value <= 0x7ff:
result.append(0xc8 + (value >> 8))
result.append(value)
elif -0x40000 <= value <= 0x3ffff:
result.append(0xd4 + (value >> 16))
result.append(value >> 8)
result.append(value)
else:
result.append(ord('I'))
result.append(value >> 24)
result.append(value >> 16)
result.append(value >> 8)
result.append(value)
return result
@staticmethod
def _encode_float(value):
"""
对浮点类型进行编码
:param value:
:return:
"""
result = []
int_value = int(value)
if int_value == value:
if int_value == 0:
result.append(0x5b)
return result
elif int_value == 1:
result.append(0x5c)
return result
elif -0x80 <= int_value < 0x80:
result.append(0x5d)
result.append(int_value)
return result
elif -0x8000 <= int_value < 0x8000:
result.append(0x5e)
result.append(int_value >> 8)
result.append(int_value)
return result
mills = int(value * 1000)
if 0.001 * mills == value and MIN_INT_32 <= mills <= MAX_INT_32:
result.append(0x5f)
result.append(mills >> 24)
result.append(mills >> 16)
result.append(mills >> 8)
result.append(mills)
return result
bits = double_to_long_bits(value)
result.append(ord('D'))
result.append(bits >> 56)
result.append(bits >> 48)
result.append(bits >> 40)
result.append(bits >> 32)
result.append(bits >> 24)
result.append(bits >> 16)
result.append(bits >> 8)
result.append(bits)
return result
@staticmethod
def _encode_utf(value):
"""
对字符串进行编码,编码格式utf-8
参见方法:com.alibaba.com.caucho.hessian.io.Hessian2Output#printString
:param value:
:return:
"""
result = []
for v in value:
ch = ord(v)
if ch < 0x80:
result.append(ch & 0xff)
elif ch < 0x800:
result.append((0xc0 + ((ch >> 6) & 0x1f)) & 0xff)
result.append((0x80 + (ch & 0x3f)) & 0xff)
else:
result.append((0xe0 + ((ch >> 12) & 0xf)) & 0xff)
result.append((0x80 + ((ch >> 6) & 0x3f)) & 0xff)
result.append((0x80 + (ch & 0x3f)) & 0xff)
return result
def _encode_str(self, value):
"""
对一个字符串进行编码
:param value:
:return:
"""
result = []
# 在进行网络传输操作时一律使用unicode进行操作
if isinstance(value, str):
value = value.encode().decode('utf-8')
length = len(value)
if length <= 0x1f:
result.append(0x00 + length)
elif length <= 0x3ff:
result.append(0x30 + (length >> 8))
result.append(length)
else:
result.append(ord('S'))
result.append(length >> 8)
result.append(length)
result.extend(self._encode_utf(value))
return result
def _encode_object(self, value):
"""
对一个对象进行编码
:param value:
:return:
"""
result = []
path = value.get_path()
field_names = value.keys()
if path not in self.__classes:
result.append(ord('C'))
result.extend(self._encode_single_value(path))
result.extend(self._encode_single_value(len(field_names)))
for field_name in field_names:
print(f"field_name: {field_name}")
result.extend(self._encode_single_value(field_name))
self.__classes.append(path)
class_id = self.__classes.index(path)
if class_id <= 0xf:
class_id += 0x60
class_id &= 0xff
result.append(class_id)
else:
result.append(ord('O'))
result.extend(self._encode_single_value(class_id))
for field_name in field_names:
result.extend(self._encode_single_value(value[field_name]))
return result
def _encode_list(self, value):
"""
对一个列表进行编码
:param value:
:return:
"""
result = []
length = len(value)
if length == 0:
# 没有值则无法判断类型,一律返回null
return self._encode_single_value(None)
if isinstance(value[0], bool):
_type = '[boolean'
elif isinstance(value[0], int):
_type = '[int'
elif isinstance(value[0], float):
_type = '[double'
elif isinstance(value[0], str):
_type = '[string'
elif isinstance(value[0], Object):
_type = '[object'
else:
raise HessianTypeError('Unknown list type: {}'.format(value[0]))
if length < 0x7:
result.append(0x70 + length)
if _type not in self.types:
self.types.append(_type)
result.extend(self._encode_single_value(_type))
else:
result.extend(self._encode_single_value(self.types.index(_type)))
else:
result.append(0x56)
if _type not in self.types:
self.types.append(_type)
result.extend(self._encode_single_value(_type))
else:
result.extend(self._encode_single_value(self.types.index(_type)))
result.extend(self._encode_single_value(length))
for v in value:
if type(value[0]) != type(v):
raise HessianTypeError('All elements in list must be the same type, first type'
' is {0} but current type is {1}'.format(type(value[0]), type(v)))
result.extend(self._encode_single_value(v))
return result
def _encode_single_value(self, value):
"""
根据hessian协议对单个变量进行编码
:param value:
:return:
"""
# 布尔类型
if isinstance(value, bool):
return self._encode_bool(value)
# 整型(包括长整型)
elif isinstance(value, int):
return self._encode_int(value)
# 浮点类型
elif isinstance(value, float):
return self._encode_float(value)
# 字符串类型
elif isinstance(value, str):
return self._encode_str(value)
# 对象类型
elif isinstance(value, Object):
return self._encode_object(value)
# 列表(list)类型,不可以使用tuple替代
elif isinstance(value, list):
return self._encode_list(value)
# null
elif value is None:
return [ord('N')]
else:
raise HessianTypeError('Unknown argument type: {}'.format(value))
参考
- python 调用 dubbo 接口
- Python3&Python2,通过zk,Telnet 调用dubbo
- https://github.com/BSTester/dubbo-python