该程序使用pthread来统计某一文本中每个单词出现次数。每一个thread处理一行字符串。

使用一个map<string, size_t> word_count作为全局变量。

kernel function 中,使用pthread_mutex_lock来控制对全局变量word_count的改变。使用stringstream来处理字符串。


输入:

first sentence.

second sentence,

third sentence.

fourth sentence.

five sentence

six sentence

seven sentence


输出:

first occurs 1 time

five occurs 1 time

fourth occurs 1 time

second occurs 1 time

sentence occurs 7 times

seven occurs 1 time

six occurs 1 time

third occurs 1 time


Makefile

a.out : map.o

        g++ -std=c++0x -o a.out -lpthread map.o


map.o : map.cpp

        g++ -std=c++0x -c map.cpp


运行:

cat paragraph.txt | ./a.out


代码:

#include <pthread.h>
#include <map>
#include <string>
#include <iostream>
#include <sstream>
#include <algorithm>
#include <vector>
using namespace std;

#define LINE_PER_THREAD	1

pthread_mutex_t count_mutex = PTHREAD_MUTEX_INITIALIZER;

map<string, size_t> word_count;

struct para {
	int tidx;
	string str;
};

//kernel function
void * wordCount (void *pt){
	struct para *local = (struct para *) pt;
	string local_str = local->str;
	pthread_mutex_lock(&count_mutex);
	stringstream ss(local_str);
	string token;
	while(ss >> token)
		++word_count[token];
	pthread_mutex_unlock(&count_mutex);
}

int main(){
	
	string word;
	vector<string> vstr;
	int num_lines = 0;
	while(cin && getline(cin, word) && !cin.eof()){
		num_lines++;
		word.erase(remove(word.begin(), word.end(),','), word.end());
		word.erase(remove(word.begin(), word.end(),'.'), word.end());
		vstr.push_back(word);
	}
	int NUM_THREADS = (num_lines + LINE_PER_THREAD - 1) / LINE_PER_THREAD;

	pthread_t threads[NUM_THREADS];
	for(int i = 0; i < NUM_THREADS; i++){
		struct para *str_para = new para();
		str_para->tidx = i;
		str_para->str = vstr[i];
		pthread_create(&threads[i], NULL, wordCount, (void *) str_para);
	}
	for(int i = 0; i < NUM_THREADS; i++)
		pthread_join(threads[i], NULL);
 
	map<string, size_t>::iterator it;
	for (it = word_count.begin(); it != word_count.end(); ++it){
		cout << it->first << " occurs " << it->second 
		<< ((it->second > 1) ? " times" : " time") << endl;
	}
}