I am trying to implement a socket server by using epoll. I have 2 threads doing 2 tasks:

  1. listening to incoming connection
  2. writing on screen the data the client is sending.

For my test I have the client and the server on the same machine with 3 or 4 clients running.The server works fine until I don't kill one of the client by issuing a CTRL-C: as soon I do that the server starts looping and printing at a very fast rate data from other client. The strange thing is that

  1. the client sends data each 2 seconds but the rate of the server is higher
  2. epoll_wait is also supposed to print something when one of the client disconnects as it is checking also for EPOLLHUP or EPOLLERR
  3. epoll_wait should wait a bit before printing since I gave him a timeout of 3000 milliseconds.

Can you help? Could it be that I am passing in a wrong way the epoll descriptor to the other thread? I cannot understand since the code looks similar to many examples around.

Thanks a lot

Mn


server.cpp


// server.cpp
#include <iostream>
#include <cstdio>
#include <cstring>
extern "C" {
#include <sys/epoll.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <netdb.h>
#include <pthread.h>
#include <unistd.h>
}
#define MAX_BACKLOG 10
void *readerthread(void *args)
{
int epfd = *((int *)args);
epoll_event outwait[10];
while (true) {
int retpw = epoll_wait(epfd, outwait, 20, 3000);
if (retpw == -1) {
printf("epoll error %m\n");
} else if (retpw == 0) {
printf("nothing is ready yet\n");
continue;
} else {
for (int i = 0; i < retpw; i++) {
if (outwait[i].events & EPOLLIN) {
int fd = outwait[i].data.fd;
char buf[64];
if (-1 == read(fd, buf, 64)) {
printf("error reading %m\n");
}
printf("%s\n", buf);
} else {
std::cout << "other event" << std::endl;
}
}
}
}
}

int main()
{

int epfd = epoll_create(10);
if (-1 == epfd) {
std::cerr << "error creating EPOLL server" << std::endl;
return -1;
}

pthread_t reader;
int rt = pthread_create(&reader, NULL, readerthread, (void *)&epfd);
if (-1 == rt) {
printf("thread creation %m\n");
return -1;
}

struct addrinfo addr;
memset(&addr, 0, sizeof(addrinfo));
addr.ai_family = AF_INET;
addr.ai_socktype = SOCK_STREAM;
addr.ai_protocol = 0;
addr.ai_flags = AI_PASSIVE;

struct addrinfo *rp, *result;
getaddrinfo("localhost", "59000", &addr, &result);
for (rp = result; rp != NULL; rp = rp->ai_next) {

// we want to take the first ( it could be IP_V4
// or IP_V6 )
break;
}

int sd = socket(AF_INET, SOCK_STREAM, 0);
if (-1 == sd) {
std::cerr << "error creating the socket" << std::endl;
return -1;
}
// to avoid error 'Address already in Use'
int optval = 1;
setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));

if (-1 == bind(sd, result->ai_addr, result->ai_addrlen)) {
printf("%m\n");
std::cerr << "error binding" << std::endl;
return -1;
}

while (true) {

std::cout << "listen" << std::endl;
if (-1 == listen(sd, MAX_BACKLOG)) {
std::cerr << "listen didn't work" << std::endl;
return -1;
}

std::cout << "accept" << std::endl;
sockaddr peer;
socklen_t addr_size;
int pfd = accept(sd, &peer, &addr_size);
if (pfd == -1) {
std::cerr << "error calling accept()" << std::endl;
return -1;
}
epoll_event ev;
ev.data.fd = pfd;
ev.events = EPOLLIN;
std::cout << "adding to epoll list" << std::endl;
if (-1 == epoll_ctl(epfd, EPOLL_CTL_ADD, pfd, &ev)) {
printf("epoll_ctl error %m\n");
return -1;
}

}

}


client.cpp
//client.cpp
#include <iostream>
#include <cstring>
#include <cstdio>
extern "C" {
#include <sys/socket.h>
#include <sys/types.h>
#include <netdb.h>
#include <unistd.h>
}

int main()
{

const char *servername = "localhost";
const char *serverport = "59000";

struct addrinfo server_address;
memset(&server_address, 0, sizeof(struct addrinfo));
server_address.ai_family = AF_INET;
server_address.ai_socktype = SOCK_STREAM;
server_address.ai_protocol = 0; // any protocol
server_address.ai_flags = 0;

struct addrinfo *result, *rp;

int res = getaddrinfo(servername, serverport, &server_address, &result);
if (-1 == res) {
std::cout << "I cannot getaddress " << servername << std::endl;
return -1;
}

int fd = socket(server_address.ai_family, server_address.ai_socktype, server_address.ai_protocol);
if (-1 == fd) {
printf("I cannot open a socket %m\n");
return -1;
}

for (rp = result; rp != NULL; rp = rp->ai_next) {
std::cout << "************" << std::endl;
if (-1 == connect(fd, rp->ai_addr, rp->ai_addrlen)) {
close(fd);
} else {
std::cout << "connected" << std::endl;
break;
}
}
if (rp == NULL) {
std::cerr << "I couldn't connect server " << servername << std::endl;
}
while (true) {
sleep(2);
pid_t me = getpid();
char buf[64];
bzero(buf, sizeof(buf));
sprintf(buf, "%ld", me);
write(fd, buf, sizeof(buf));
printf("%s\n", buf);
}
}


g++ -lpthread server.cpp -o server

g++ -lpthread client.cpp -o client


++++++++++++++++++++++++++++++++++++

Answers

A client disconnection is signalled by a EOF condition on the file descriptor. The system considers EOF to be a state in which the file descriptor is 'readable'. But, of course, the EOF condition cannot be read. This is the source of your looping. epoll is acting like the file descriptor for the disconnected client is always readable. You can detect that you have an EOF condition by checking when read returns 0 bytes read.

The only way to deal with an EOF condition is to close the file descriptor in some way. Depending on exactly how the flow of things go, this could be with shutdown(sockfd, SHUT_RD), shutdown(sockfd, SHUT_RDWR) or close(sockfd);.

Unless you know that you need the ​​ shutdown(2)​​ call for whatever reason, I would recommend you use close. Of course, you should remember to tell epoll that the file descriptor is no longer of interest before you close. I'm not sure what will happen if you don't, but one possibility is that epoll will error. Another is that epoll will mysteriously begin reporting events for a new file descriptor that has the same numeric value before you add it to the list epoll should care about.