disinfect.c

/*
 * = 编译:
 * gcc -O2 disinfect.c -o disinfect
 * ./disinfect <executable>
 */

#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <fcntl.h>
#include <dirent.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <elf.h>
#include <errno.h>

//elf 相关信息
typedef struct elfdesc {
	Elf64_Ehdr *ehdr;
	Elf64_Phdr *phdr;
	Elf64_Shdr *shdr;
	Elf64_Addr textVaddr;
	Elf64_Addr dataVaddr;
	//程序头偏移
	Elf64_Addr dataOff;
	size_t textSize;
	size_t dataSize;
	uint8_t *mem;
	struct stat st;
	char *path;
} elfdesc_t;
//缓冲区
#define TMP ".disinfect_file.xyz"

//如果找到了push/ret 同时地址在正常x86_64范围内
//说明正常
//判断是否在正常范围内
//770CD526      68 00000000   PUSH 0x0
//770CD52B      C3                   RETN
uint32_t locate_orig_entry(elfdesc_t *elf)
{
	uint32_t i, entry;
        uint8_t *mem = elf->mem;
        for (i = 0; i < elf->st.st_size; i++) {
        	if (mem[0] == 0x68 && mem[5] == 0xc3) {
				entry = *(uint32_t *)&mem[1];
			if (entry >= 0x400000 && entry < 0x4fffff) 
				return entry;
		}
	}
	//没有找到
	return 0;
}

//770CD53A  |.  31ED          XOR EBP,EBP
//770CD53C  |.  49              DEC ECX
//770CD53D  |.  89D1          MOV ECX,EDX
//770CD53F  |.  5E               POP ESI
//770CD540  |.  48               DEC EAX
//770CD541  |.  89E2           MOV EDX,ESP
uint32_t locate_glibc_init_offset(elfdesc_t *elf)
{
	uint32_t i;
	uint8_t *mem = elf->mem;
	
	for (i = 0; i < elf->st.st_size; i++) {
		if (
		mem[i + 0] == 0x31 && mem[i + 1] == 0xed &&
		mem[i + 2] == 0x49 && mem[i + 3] == 0x89 &&
		mem[i + 4] == 0xd1 && mem[i + 5] == 0x5e &&
		mem[i + 6] == 0x48 && mem[i + 7] == 0x89 && mem[i + 8] == 0xe2)
			return i;
	}

	return 0;
}
//移除 PLT/GOT hooks
int disinfect_pltgot(elfdesc_t *elf)
{
	//文件头
	Elf64_Ehdr *ehdr = elf->ehdr;
	//程序头
	Elf64_Phdr *phdr = elf->phdr;
	//节表头
	Elf64_Shdr *shdr = elf->shdr;
	//映射基址
	uint8_t *mem = elf->mem;
	//动态库符号表基地
	Elf64_Sym *symtab = NULL;
	//重定位表
	Elf64_Rela *rela = NULL;
	Elf64_Addr addr = 0, plt_addr = 0;
	Elf64_Off plt_off = 0, gotoff = 0;
	size_t plt_size = 0, symtab_size = 0, rela_size = 0;
	//字符串表基址
  	char *shstrtab = (char *)&mem[shdr[elf->ehdr->e_shstrndx].sh_offset];
	char *strtab = NULL;
	uint8_t *gotptr, *plt;
	int i, j, symindex = 0, c = 0;
    //遍历所有的节表
	for (i = 0; i < ehdr->e_shnum; i++) {
		//类型
		switch(shdr[i].sh_type) {
			//动态库中符号
			case SHT_DYNSYM:
				//符号表
				symtab = (Elf64_Sym *)&mem[shdr[i].sh_offset];
				symtab_size = shdr[i].sh_size;
				//对于符号表段sh_link记录的是符号表使用的串表所在段(一般是.strtab)对应段表项在段表内的索引
				//.strtab偏移
				strtab = (char *)&mem[shdr[shdr[i].sh_link].sh_offset];
				break;
			//重定位所使用的节的节表索引
			case SHT_RELA:
				if (!strcmp(&shstrtab[shdr[i].sh_name], ".rela.plt")) {
					//重定位表
					rela = (Elf64_Rela *)&mem[shdr[i].sh_offset];
					//大小
					rela_size = shdr[i].sh_size;
				}
				break;
			//程序数据
			case SHT_PROGBITS:
				if (!strcmp(&shstrtab[shdr[i].sh_name], ".plt")) {
					//plt相关
					plt_off = shdr[i].sh_offset;
					plt_addr = shdr[i].sh_addr;
					plt_size = shdr[i].sh_size;
				}
				break;
		}
	}
	if (plt_off == 0 || symtab == NULL || rela == NULL) {
		printf("没有找到relocation/symbol/plt info!!!\n");
		return -1;
	}
	//第一个PLT地址
	plt = &mem[plt_off];
	//遍历所有重定位表条目
	for (i = 0; i < rela_size/sizeof(Elf64_Rela); i++) {
		//高24 位表示重定位符号对应符号表项在符号表内有索引
		symindex = ELF64_R_SYM(rela->r_info);
		//在symtab中找到puts函数符号
		if (!strcmp(&strtab[symtab[ELF64_R_SYM(rela->r_info)].st_name], "puts")) {
			printf("尝试消毒PLT/GOT!!!\n");
			gotoff = elf->dataOff + (rela->r_offset - elf->dataVaddr);
			gotptr = &mem[gotoff];
			addr = gotptr[0] + (gotptr[1] << 8) + (gotptr[2] << 16) + (gotptr[3] << 24);
			if (!(addr >= plt_addr && addr < plt_addr + plt_size)) {
				for (c = 0, j = 0; j < plt_size; j += 16, c++) {
					//判断索引号
					if (c == symindex) {
						printf("成功消毒PLT/GOT表!!!\n");
						*(uint32_t *)gotptr = plt_addr + j + 6;
						return 0;
					}	
				}	

			}
			printf(" PLT/GOT表解毒失败!!!\n");
			return -1;
		}
	}
	
	return 0;
}

//尝试消毒 一般64位的代码加载基址为0x400000
int disinfect(elfdesc_t *elf)
{
	size_t paddingSize;
	Elf64_Phdr *phdr = elf->phdr;
	Elf64_Shdr *shdr = elf->shdr;
	uint32_t text_offset = 0;
	char *strtab = NULL;
	uint8_t *mem = elf->mem;
	int i, textfound, fd;
	ssize_t c, last_chunk;
	//如果大于0x400000
	if (elf->textVaddr >= 0x400000) {
		printf("不是所要消除的特征!!!\n");
		return -1;
	}
    //0x400000-代码段的基址(病毒用的逆向text技术)
	//计算差值
	paddingSize = 0x400000 - elf->textVaddr;
	//如果存在 hook移除
	int ret = disinfect_pltgot(elf);
	 //移除magic 标记
	*(uint32_t *)&elf->ehdr->e_ident[EI_PAD] = 0x00000000;

	// PT_PHDR, PT_INTERP 前移
	phdr[0].p_offset -= paddingSize;
	phdr[1].p_offset -= paddingSize;
	//phdr设置回正常
	for (textfound = 0, i = 0; i < elf->ehdr->e_phnum; i++) {
		if (textfound) {
			phdr[i].p_offset -= paddingSize;
			continue;
		}
		if (phdr[i].p_type == PT_LOAD && phdr[i].p_offset == 0 && phdr[i].p_flags & PF_X) {
			if (phdr[i].p_paddr == phdr[i].p_vaddr) {
				phdr[i].p_vaddr += paddingSize;
				phdr[i].p_paddr += paddingSize;
			} else
				phdr[i].p_vaddr += paddingSize;
			//重置文本段大小
			phdr[i].p_filesz -= paddingSize;
			phdr[i].p_memsz -= paddingSize;
			phdr[i].p_align = 0x200000;
			phdr[i + 1].p_align = 0x200000;
			textfound = 1;
		}
	}
	//偏移
	text_offset = locate_glibc_init_offset(elf);
	//校正节表
	strtab = (char *)&mem[shdr[elf->ehdr->e_shstrndx].sh_offset];
	for (i = 0; i < elf->ehdr->e_shnum; i++) {
		//只要处理感染部分代码
		if (!strcmp(&strtab[shdr[i].sh_name], ".text")) {
			//保持不变
			if (text_offset == 0)
				continue;
			shdr[i].sh_offset = text_offset - paddingSize;
			shdr[i].sh_addr = (text_offset - paddingSize) + 0x400000;
			continue;
		}
		shdr[i].sh_offset -= paddingSize;
	}

	//设置phdr和shdr表
	elf->ehdr->e_shoff -= paddingSize;
	elf->ehdr->e_phoff -= paddingSize;
           
	//设回正常OEP
	elf->ehdr->e_entry = 0x400000 + text_offset;
      	elf->ehdr->e_entry -= paddingSize;
    //重建elf
	if ((fd = open(TMP, O_CREAT | O_TRUNC | O_WRONLY, elf->st.st_mode)) < 0) 
		return -1;

	if ((c = write(fd, mem, sizeof(Elf64_Ehdr))) != sizeof(Elf64_Ehdr)) 
		return -1;

	mem += paddingSize + sizeof(Elf64_Ehdr);
	last_chunk = elf->st.st_size - (paddingSize + sizeof(Elf64_Ehdr));
	
	if ((c = write(fd, mem, last_chunk)) != last_chunk) 
		return -1;

	if (fchown(fd, elf->st.st_uid, elf->st.st_gid) < 0)
		return -1;
    //重命名
	rename(TMP, elf->path);
	
	return 0;
}
//加载文件
int load_executable(const char *path, elfdesc_t *elf)
{
	uint8_t *mem;
	Elf64_Ehdr *ehdr;
	Elf64_Phdr *phdr;
	Elf64_Shdr *shdr;
	int fd;
	struct stat st;
	int i;

	if ((fd = open(path, O_RDONLY)) < 0) {
		perror("open");
		return -1;
	}
	fstat(fd, &st);
	
	mem = mmap(NULL, st.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
	if (mem == MAP_FAILED) {
		perror("mmap");
		return -1;
	}
	
	ehdr = (Elf64_Ehdr *)mem;
	phdr = (Elf64_Phdr *)&mem[ehdr->e_phoff];
	shdr = (Elf64_Shdr *)&mem[ehdr->e_shoff];
	
	elf->st = st;
	//为了找到以0 为偏移的代码段
	for (i = 0; i < ehdr->e_phnum; i++) {
		//非0 为1 0 还是0
		switch(!!phdr[i].p_offset) {
			case 0:
				elf->textVaddr = phdr[i].p_vaddr;
				elf->textSize = phdr[i].p_filesz;
				break;
			case 1:
				elf->dataOff = phdr[i].p_offset;
				elf->dataVaddr = phdr[i].p_vaddr;
				elf->dataSize = phdr[i].p_filesz;
				break;
		}
	}
	elf->mem = mem;
	elf->ehdr = ehdr;
	elf->phdr = phdr;
	elf->shdr = shdr;
	elf->path = (char *)path;
	return 0;
	
}
//检查是否为病毒
int test_for_skeksi(elfdesc_t *elf)
{
	uint32_t magic = *(uint32_t *)&elf->ehdr->e_ident[EI_PAD];
	return (magic == 0x15D25); 
}

int main(int argc, char **argv)
{
	elfdesc_t elf;

	if (argc < 2) {
		printf("Usage: %s <executable>\n", argv[0]);
		exit(0);
	}
	//加载 elf 同时保存相关信息
	if (load_executable(argv[1], &elf) < 0) {
		printf("加载失败: %s\n", argv[1]);
		exit(-1);
	}
	//检查病毒
	if (test_for_skeksi(&elf) == 0) {
		printf("File: %s, 没有感染virus\n", argv[1]);
		exit(-1);
	}
	printf("File: %s, 已经感染virus! 尝试消毒!\n", argv[1]);

	if (disinfect(&elf) < 0) {
		printf("消毒失败 file: %s\n", argv[1]);
		exit(-1);
	}

	printf("消毒成功: %s\n", argv[1]);
	
	
}