Android LocalSocket 详细解析

  • 一、Socket编程模型
  • 二、服务端的流程
  • 三、客户端的流程
  • 四、总结


一、Socket编程模型

偷用网上的一张图概述Socket编程模型:

ios socket 数据处理 local socket ios_ios socket 数据处理


LocalSocket也是按照这个模型来组织的。与图中略有不同的是Android LocalSocket是本地Socket,不需要TCP三路握手。

对于socket编程不熟悉的朋友,需要提取的内容有:

对于服务端需要做好以下准备,才能被客户端连接:
1.调用socket() 函数创建一个socket,这个socket是用于监听和接受客户端的连接请求的
2.调用bind()函数绑定通信地址,对于网络通信需要绑定的地址为IP:PORT的形式,而对于Android本地通信而言,需要绑定的地址为一个本地文件
3.调用listen()函数监听是否有客户端连接请求,能否连接成功需要后续accept()的判断
4.调用accept()函数接受和处理客户端的连接请求,如果连接成功返回一个新的socket,这个新的socket是用来和客户端进行通信的

对于客户端需要做以下准备去连接服务端:
1.调用socket()函数创建一个socket
2.调用connect()函数进行连接

了解了以上基本脉络后,接下来以Android的installd进程为例详细解析Android的LocalSocket(基于Android 7.0源码)。

二、服务端的流程

installd进程是在init进程中启动的,相关启动内容:

/* 文件:frameworks/native/cmds/installd/installd.rc */
service installd /system/bin/installd
    class main
    socket installd stream 600 system system

字面上可以看到在installd.rc中使用socket installd stream 600 system system创建了与installd进程相关的socket,其具体的创建流程是在init中完成的,接下来过一下这个流程。

/* 文件:system/core/init/service.cpp */
Service::OptionHandlerMap::Map& Service::OptionHandlerMap::map() const {
    constexpr std::size_t kMax = std::numeric_limits<std::size_t>::max();
    static const Map option_handlers = {
        {"class",       {1,     1,    &Service::HandleClass}},
        {"console",     {0,     0,    &Service::HandleConsole}},
        {"critical",    {0,     0,    &Service::HandleCritical}},
        {"disabled",    {0,     0,    &Service::HandleDisabled}},
        {"group",       {1,     NR_SVC_SUPP_GIDS + 1, &Service::HandleGroup}},
        {"ioprio",      {2,     2,    &Service::HandleIoprio}},
        {"keycodes",    {1,     kMax, &Service::HandleKeycodes}},
        {"oneshot",     {0,     0,    &Service::HandleOneshot}},
        {"onrestart",   {1,     kMax, &Service::HandleOnrestart}},
        {"seclabel",    {1,     1,    &Service::HandleSeclabel}},
        {"setenv",      {2,     2,    &Service::HandleSetenv}},
        {"socket",      {3,     6,    &Service::HandleSocket}},//处理socket关键字的函数为HandleSocket
        {"user",        {1,     1,    &Service::HandleUser}},
        {"writepid",    {1,     kMax, &Service::HandleWritepid}},
    };
    return option_handlers;
}

/* 文件:system/core/init/service.cpp */
/* name type perm [ uid gid context ] */
bool Service::HandleSocket(const std::vector<std::string>& args, std::string* err) {
	//指定的socket传输信息方式只能为dgram、stream或seqpacket,这里指定为stream
    if (args[2] != "dgram" && args[2] != "stream" && args[2] != "seqpacket") {
        *err = "socket type must be 'dgram', 'stream' or 'seqpacket'";
        return false;
    }
	//获取指定的读写权限,这里为600
    int perm = std::stoul(args[3], 0, 8);
    //获取指定的owner和group,可选项,这里指定为system system
    uid_t uid = args.size() > 4 ? decode_uid(args[4].c_str()) : 0;
    gid_t gid = args.size() > 5 ? decode_uid(args[5].c_str()) : 0;
    //获取指定的selinux context,可选项,这里没有指定
    std::string socketcon = args.size() > 6 ? args[6] : "";
	//sockets_的定义std::vector<SocketInfo> sockets_
	//将指定的信息整合成一个SocketInfo对象然后放入到vector中
    sockets_.emplace_back(args[1], args[2], uid, gid, perm, socketcon);
    return true;
}

HandleSocket()函数中完成了对.rc文件中socket声明的信息的收集,接下来就是对信息的处理。

/* 文件:system/core/init/service.cpp */
bool Service::Start() {
	//......

        for (const auto& si : sockets_) {  //遍历sockets_
            int socket_type = ((si.type == "stream" ? SOCK_STREAM :
                                (si.type == "dgram" ? SOCK_DGRAM :
                                 SOCK_SEQPACKET)));
            const char* socketcon =
                !si.socketcon.empty() ? si.socketcon.c_str() : scon.c_str();

			//第一步
            int s = create_socket(si.name.c_str(), socket_type, si.perm,
                                  si.uid, si.gid, socketcon);
            if (s >= 0) {
            	//第二步
                PublishSocket(si.name, s);
            }
        }
        
	//......
}

init中对每条socket声明做了以下两步处理:
1.create_socket
2.PublishSocket
这两步又具体做了什么呢?

/* 文件:system/core/init/util.cpp */
/*
 * create_socket - creates a Unix domain socket in ANDROID_SOCKET_DIR
 * ("/dev/socket") as dictated in init.rc. This socket is inherited by the
 * daemon. We communicate the file descriptor's value via the environment
 * variable ANDROID_SOCKET_ENV_PREFIX<name> ("ANDROID_SOCKET_foo").
 */
int create_socket(const char *name, int type, mode_t perm, uid_t uid,
                  gid_t gid, const char *socketcon)
{
    struct sockaddr_un addr;  
    int fd, ret, savederrno;
    char *filecon;

	//......

	//调用socket函数创建用于本地IPC的socket,也就是注释中说的Unix域socket
    fd = socket(PF_UNIX, type, 0);
    if (fd < 0) {
        ERROR("Failed to open socket '%s': %s\n", name, strerror(errno));
        return -1;
    }
	
	//......

    memset(&addr, 0 , sizeof(addr));
    addr.sun_family = AF_UNIX;  //AF_UNIX表示要使用一个本地文件作为通信地址
    snprintf(addr.sun_path, sizeof(addr.sun_path), ANDROID_SOCKET_DIR"/%s",
             name);  //指明使用的本地文件为 /dev/socket/installd

	//......
	//绑定创建socket的通信地址为 /dev/socket/installd
    ret = bind(fd, (struct sockaddr *) &addr, sizeof (addr));
	
	//......

	//根据声明修改owner和group
    ret = lchown(addr.sun_path, uid, gid);
	//根据声明修改权限
    ret = fchmodat(AT_FDCWD, addr.sun_path, perm, AT_SYMLINK_NOFOLLOW);

	//......

    return fd;
	
	//......
}

/* 文件:system/core/init/service.cpp */
//PublishSocket函数比较好理解,就是根据声明的name=installd添加一个环境变量ANDROID_SOCKET_installd,其值为前面创建的socket的文件描述符
void Service::PublishSocket(const std::string& name, int fd) const {
    std::string key = StringPrintf(ANDROID_SOCKET_ENV_PREFIX "%s", name.c_str());  //key=ANDROID_SOCKET_installd
    std::string val = StringPrintf("%d", fd);  //val=fd
    add_environment(key.c_str(), val.c_str());  //添加到环境变量

    /* make sure we don't close-on-exec */
    fcntl(fd, F_SETFD, 0);
}

init中对socket声明的处理分析完了,总结一下做了哪些事情
1.(调用socket函数)创建一个Unix域的socket,也就是用于本地IPC的socket,并根据socket声明中的name(调用bind函数)为其绑定/dev/socket/name文件作为通信地址
2.根据socket声明设置/dev/socket/name文件的owner、group以及权限等
3.添加一个名为ANDROID_SOCKET_name的环境变量,其值为前面创建的socket的文件描述符

然而init中只完成了socket和bind流程,接下来的listen和accept流程则由installd进程完成。installd进程的入口函数为main函数。

/* 文件:frameworks/native/cmds/installd/installd.cpp */
int main(const int argc, char *argv[]) {
    return android::installd::installd_main(argc, argv);
}

static int installd_main(const int argc ATTRIBUTE_UNUSED, char *argv[]) {
    char buf[BUFFER_MAX];
    struct sockaddr addr;
    socklen_t alen;
    int lsocket, s;
	//......
	/* 
	  android_get_control_socket函数定义在system/core/include/cutils/sockets.h中
	  这个函数的逻辑很简单,就是获取环境变量ANDROID_SOCKET_$SOCKET_PATH的值,也就是环境变量ANDROID_SOCKET_installd的值
	  显而易见,就是拿到init中创建的对应的socket的文件描述符
	*/
    lsocket = android_get_control_socket(SOCKET_PATH);  //SOCKET_PATH="installd"
    if (lsocket < 0) {
        ALOGE("Failed to get socket from environment: %s\n", strerror(errno));
        exit(1);
    }
    if (listen(lsocket, 5)) {  //调用listen函数监听是否有客户端连接请求
        ALOGE("Listen on socket failed: %s\n", strerror(errno));
        exit(1);
    }
    fcntl(lsocket, F_SETFD, FD_CLOEXEC);

    for (;;) {
        alen = sizeof(addr);
        s = accept(lsocket, &addr, &alen); //调用accept()函数接受和处理客户端的连接请求,返回一个新的socket用于和客户端通信
        if (s < 0) {
            ALOGE("Accept failed: %s\n", strerror(errno));
            continue;
        }
        fcntl(s, F_SETFD, FD_CLOEXEC);

        ALOGI("new connection\n");
        for (;;) {  //连接成功,准备读取来自客户端的消息并执行相应动作
            unsigned short count;
            if (readx(s, &count, sizeof(count))) {
                ALOGE("failed to read size\n");
                break;
            }
            if ((count < 1) || (count >= BUFFER_MAX)) {
                ALOGE("invalid size %d\n", count);
                break;
            }
            if (readx(s, buf, count)) {
                ALOGE("failed to read command\n");
                break;
            }
            buf[count] = 0;
            if (selinux_enabled && selinux_status_updated() > 0) {
                selinux_android_seapp_context_reload();
            }
            if (execute(s, buf)) break;
        }
        ALOGI("closing connection\n");
        close(s);
    }

    return 0;
}

至此服务端的准备工作都完成了,就等客户端的连接了。

三、客户端的流程

客户段流程以installd进程对应的framework层的客户端Installer服务的初始化为切入点。

/* 文件:frameworks/base/services/java/com/android/server/SystemServer.java */
private void startBootstrapServices() {
    // Wait for installd to finish starting up so that it has a chance to
    // create critical directories such as /data/user with the appropriate
    // permissions.  We need this to complete before we initialize other services.
    Installer installer = mSystemServiceManager.startService(Installer.class);
	//......
}

//startService方法会先调用Installer的构造方法,然后再调用它的onStart()方法

/* 文件:frameworks/base/services/core/java/com/android/server/pm/Installer.java */
//构造方法
public Installer(Context context) {
    super(context);
    mInstaller = new InstallerConnection();
}

//onStart()方法
public void onStart() {
    Slog.i(TAG, "Waiting for installd to be ready.");
    mInstaller.waitForConnection();
}

跟进waitForConnection()方法:

/* 文件:frameworks/base/core/java/com/android/internal/os/InstallerConnection.java */
//每隔一秒ping一次服务端,直到ping成功才退出
public void waitForConnection() {
    for (;;) {
        try {
            execute("ping");
            return;
        } catch (InstallerException ignored) {
        }
        Slog.w(TAG, "installd not ready");
        SystemClock.sleep(1000);
    }
}

public String[] execute(String cmd, Object... args) throws InstallerException {
	//......
    final String[] resRaw = transact(builder.toString()).split(" "); //transact("ping")
	//......
}

public synchronized String transact(String cmd) {
	//......
    if (!connect()) {   //先去连接
        Slog.e(TAG, "connection failed");
        return "-1";
    }

    if (!writeCommand(cmd)) {  //再往服务端发送"ping"消息
        /*
         * If installd died and restarted in the background (unlikely but
         * possible) we'll fail on the next write (this one). Try to
         * reconnect and write the command one more time before giving up.
         */
        Slog.e(TAG, "write command failed? reconnect!");
        if (!connect() || !writeCommand(cmd)) {
            return "-1";
        }
    }
    if (LOCAL_DEBUG) {
        Slog.i(TAG, "send: '" + cmd + "'");
    }

    final int replyLength = readReply();  //读取服务端的返回
    if (replyLength > 0) {
        String s = new String(buf, 0, replyLength);
        if (LOCAL_DEBUG) {
            Slog.i(TAG, "recv: '" + s + "'");
        }
        return s;
    } else {
        if (LOCAL_DEBUG) {
            Slog.i(TAG, "fail");
        }
        return "-1";
    }
}

关键在于connect()函数

/* 文件:frameworks/base/core/java/com/android/internal/os/InstallerConnection.java */
private boolean connect() {
    if (mSocket != null) {
        return true;
    }
    Slog.i(TAG, "connecting...");
    try {
        mSocket = new LocalSocket();   //后续分析点1
		//指明socket的通信地址为/dev/socket/installd,Namespace.RESERVED是指向/dev/socket路径的命名空间
        LocalSocketAddress address = new LocalSocketAddress("installd",
                LocalSocketAddress.Namespace.RESERVED);

        mSocket.connect(address);  //后续分析点2

        mIn = mSocket.getInputStream();
        mOut = mSocket.getOutputStream();
    } catch (IOException ex) {
        disconnect();
        return false;
    }
    return true;
}

//分析点1:mSocket = new LocalSocket()
/* 文件:frameworks/base/core/java/android/net/LocalSocket.java */
//这一句的最终调用如下:
LocalSocket(LocalSocketImpl impl, int sockType) {
    this.impl = impl;   // impl = new LocalSocketImpl()
    this.sockType = sockType;  // sockType = SOCKET_STREAM
    this.isConnected = false;
    this.isBound = false;
}

//分析点2:mSocket.connect(address)
/* 文件:frameworks/base/core/java/android/net/LocalSocket.java */
public void connect(LocalSocketAddress endpoint) throws IOException {
    synchronized (this) {
        if (isConnected) {
            throw new IOException("already connected");
        }

        implCreateIfNeeded();  //分析点3:创建socket
        impl.connect(endpoint, 0);  //分析点4:connect
        isConnected = true;
        isBound = true;
    }
}

看到客户端创建socket和进行连接的影子了,接下来分析具体是如何创建和连接的。
创建:

//分析点3:创建socket   implCreateIfNeeded()
/* 文件:frameworks/base/core/java/android/net/LocalSocket.java */
private void implCreateIfNeeded() throws IOException {
    if (!implCreated) {
        synchronized (this) {
            if (!implCreated) {
                try {
                    impl.create(sockType);  //sockType = SOCKET_STREAM
                } finally {
                    implCreated = true;
                }
            }
        }
    }
}

/* 文件:frameworks/base/core/java/android/net/LocalSocketImpl.java */
public void create(int sockType) throws IOException {
    // no error if socket already created
    // need this for LocalServerSocket.accept()
    if (fd == null) {
        int osType;
        switch (sockType) {
            case LocalSocket.SOCKET_DGRAM:
                osType = OsConstants.SOCK_DGRAM;
                break;
            case LocalSocket.SOCKET_STREAM:
                osType = OsConstants.SOCK_STREAM;
                break;
            case LocalSocket.SOCKET_SEQPACKET:
                osType = OsConstants.SOCK_SEQPACKET;
                break;
            default:
                throw new IllegalStateException("unknown sockType");
        }
        try {
            fd = Os.socket(OsConstants.AF_UNIX, osType, 0);  //前面介绍过,AF_UNIX表示要使用一个本地文件作为通信地址,与服务端相对应
            mFdCreatedInternally = true;
        } catch (ErrnoException e) {
            e.rethrowAsIOException();
        }
    }
}

//fd = Os.socket(OsConstants.AF_UNIX, osType, 0)最终会调用到native层的Posix_socket函数
/* 文件:libcore/luni/src/main/native/libcore_io_Posix.cpp */
static jobject Posix_socket(JNIEnv* env, jobject, jint domain, jint type, jint protocol) {
    if (domain == AF_PACKET) {
        protocol = htons(protocol);  // Packet sockets specify the protocol in host byte order.
    }
    //最终调用到socket函数了,接下来就需要调用connect函数进行连接了
    int fd = throwIfMinusOne(env, "socket", TEMP_FAILURE_RETRY(socket(domain, type, protocol)));  
    return fd != -1 ? jniCreateFileDescriptor(env, fd) : NULL;
}

连接:

//分析点4:impl.connect(endpoint, 0);
/* 文件:frameworks/base/core/java/android/net/LocalSocketImpl.java */
protected void connect(LocalSocketAddress address, int timeout)
                    throws IOException
{        
    if (fd == null) {
        throw new IOException("socket not created");
    }
	//native方法,实现在android_net_LocalSocketImpl.cpp的socket_connect_local函数
    connectLocal(fd, address.getName(), address.getNamespace().getId());  
}

/* frameworks/base/core/jni/android_net_LocalSocketImpl.cpp */
static void
socket_connect_local(JNIEnv *env, jobject object,
                        jobject fileDescriptor, jstring name, jint namespaceId)
{
	//......
    ret = socket_local_client_connect(
                fd,
                nameUtf8.c_str(),
                namespaceId,
                SOCK_STREAM);

    //......
}

/* 文件:system/core/libcutils/socket_local_client_unix.c */
int socket_local_client_connect(int fd, const char *name, int namespaceId, 
        int type UNUSED)
{
    struct sockaddr_un addr;
    socklen_t alen;
    int err;
	//根据LocalSocketAddress构造native层用到的地址结构体sockaddr_un,对应installd进程初始化时构造的结构体sockaddr_un,后续分析
    err = socket_make_sockaddr_un(name, namespaceId, &addr, &alen);  

    if (err < 0) {
        goto error;
    }

    if(connect(fd, (struct sockaddr *) &addr, alen) < 0) {  //调用到connect了!
        goto error;
    }

    return fd;

error:
    return -1;
}

//分析下socket_make_sockaddr_un函数
/* 文件:system/core/libcutils/socket_local_client_unix.c */
int socket_make_sockaddr_un(const char *name, int namespaceId, 
        struct sockaddr_un *p_addr, socklen_t *alen)
{
    memset (p_addr, 0, sizeof (*p_addr));
    size_t namelen;

    switch (namespaceId) {
        case ANDROID_SOCKET_NAMESPACE_ABSTRACT:
			//......
        break;

        case ANDROID_SOCKET_NAMESPACE_RESERVED:
        	//#define ANDROID_RESERVED_SOCKET_PREFIX "/dev/socket/"
            namelen = strlen(name) + strlen(ANDROID_RESERVED_SOCKET_PREFIX);
            /* unix_path_max appears to be missing on linux */
            if (namelen > sizeof(*p_addr) 
                    - offsetof(struct sockaddr_un, sun_path) - 1) {
                goto error;
            }

            strcpy(p_addr->sun_path, ANDROID_RESERVED_SOCKET_PREFIX);
            strcat(p_addr->sun_path, name);
            //现在p_addr->sun_path="/dev/socket/installd"
        break;

        case ANDROID_SOCKET_NAMESPACE_FILESYSTEM:
            //......
        break;
        default:
            // invalid namespace id
            return -1;
    }

    p_addr->sun_family = AF_LOCAL;
    //经过前面的处理后,地址结构体sockaddr_un里的参数不就和init进程里bind socket时传入的sockaddr_un的参数相同了吗?
    *alen = namelen + offsetof(struct sockaddr_un, sun_path) + 1;
    return 0;
error:
    return -1;
}

至此客户端的创建和请求连接的过程也分析完了,请求连接成功后,客户端就可以和服务端通信了,例如前面提到的发送"ping"消息和获取"ping"消息的返回。

四、总结

想要了解Android LocalSocket的工作机制,主要需要分析清楚以下两点:

  1. 客户端和服务端是如何基于Socket编程模型做好通信前的准备的
  2. framework客户端的LocalSocketAddress是如何和native服务端的struct sockaddr_un指向通信地址的