可以通过查询语句的组合进行检索,VS2015.
main函数,读取存有数据的文件,进行检索。提供两种入口。查词,与按照表达式查询。
1 #include <iostream>
2 #include <fstream>
3
4 #include "TextSearch.h"
5 #include "TextQueryI.h"
6
7
8 using namespace std;
9
10
11 int main() {
12
13 ifstream is;
14 is.open("c:/tmp/data.txt", ios::in);
15
16 // Search word
17 // TextSearch::run(is);
18
19 // Search word with Query
20 TextQueryI q = TextQueryI("good") | TextQueryI("nice") & TextQueryI("day");
21 TextSearch::run(is, q);
22
23 return 0;
24 }
封装检索功能的入口
TextSearch.h
1 #pragma once
2
3 #include <fstream>
4
5 #include "TextQueryI.h"
6
7 using std::ifstream;
8
9 // 搜索功能入口
10 class TextSearch
11 {
12 public:
13 TextSearch();
14 ~TextSearch();
15 public:
16 static int run(ifstream &infile);
17 static int run(ifstream &infile, TextQueryI &query);
18 };
TextSearch.cpp
1 #include "TextSearch.h"
2
3 #include <iostream>
4 #include <string>
5
6 #include "TextQuery.h"
7 #include "TextQueryI.h"
8
9 using std::cin; using std::cout; using std::endl;
10 using std::string;
11
12 int TextSearch::run(ifstream & infile)
13 {
14 // 读取、分析数据
15 TextQuery tq(infile);
16
17 // serach query
18 while (true) {
19 cout << "enter search word, or [q] to quit:" << endl;
20 string s;
21 if (!(cin >> s) || s == "q") break;
22 print(cout, tq.query(s)) << endl;
23 }
24 return 0;
25 }
26
27 int TextSearch::run(ifstream &infile, TextQueryI &query) {
28 cout << "start process query: ";
29 TextQuery tq(infile);
30 print(cout, query.eval(tq)) << endl;
31 return 0;
32 }
TextQuery.h 文本内容分析,保存原始数据的分词分析结果和行号信息。并提供单个词检索的功能。
1 #pragma once
2
3 #include <fstream>
4 #include <string>
5 #include <map>
6 #include <set>
7 #include <vector>
8 #include <memory>
9
10 #include <iostream>
11
12 using std::string;
13 using std::vector;
14 using std::endl;
15
16 class QueryResult;
17
18 // 完成分词与检索功能
19 class TextQuery
20 {
21 public:
22 using LineNo = vector<string>::size_type;
23 TextQuery(std::ifstream&);
24 ~TextQuery() {};
25 public:
26 QueryResult query(const string&) const;
27 private:
28 // 保存输入数据,每个元素是一行string
29 std::shared_ptr<vector<string> > data;
30 // 保存分析结果,key为检索词,value是包含检索词的行号
31 std::map<string, std::shared_ptr<std::set<LineNo>> > wm;
32 };
33
34 class QueryResult
35 {
36 friend std::ostream& print(std::ostream &, const QueryResult &);
37 public:
38 using LineNo = TextQuery::LineNo;
39 QueryResult(std::string word,
40 std::shared_ptr<std::set<LineNo> > pLineNo,
41 std::shared_ptr<std::vector<std::string> > data) :
42 query_word(word), lines(pLineNo), files(data) {};
43 ~QueryResult() {};
44 auto getFiles() { return files; }
45 auto begin() { return lines->begin(); }
46 auto end() { return lines->end(); }
47 private:
48 // 查询词
49 std::string query_word;
50 // 查询词的行号,ordered
51 std::shared_ptr<std::set<LineNo> > lines;
52 // 对原始数据的引用
53 std::shared_ptr<std::vector<std::string> > files;
54 };
TextQuery.cpp
1 #include "TextQuery.h"
2
3 #include <sstream>
4
5 using std::getline;
6 using std::istringstream;
7
8 TextQuery::TextQuery(std::ifstream &is): data(new vector<std::string>)
9 {
10 string text;
11 while (getline(is, text)) {
12 // 读取一行并记录数据,方便给出查询结果
13 data->push_back(text);
14 LineNo line_no = data->size() - 1;
15 // 单词分解
16 istringstream line(text);
17 string word;
18 // 单词查询结果记录
19 while (line >> word) {
20 auto &lines = wm[word]; // 获取智能指针,如果map没有会自动创建
21 if (!lines) {
22 lines.reset(new std::set<LineNo>); // 配置智能指针的对象
23 }
24 lines->insert(line_no); // 添加行号,如果重复什么都不做
25 }
26 }
27 }
28
29 QueryResult TextQuery::query(const string & word) const
30 {
31 static std::shared_ptr<std::set<LineNo> > p_no_result(new std::set<LineNo>);
32 auto loc = wm.find(word);
33 if (loc == wm.end()) {
34 return QueryResult(word, p_no_result, data);
35 }
36 else {
37 return QueryResult(word, loc->second, data);
38 }
39 }
40
41 // 格式化打印结果,类似于提供tostring
42 std::ostream& print(std::ostream &os, const QueryResult &qr) {
43 os << qr.query_word << " occours " << qr.lines->size() <<
44 (qr.lines->size() > 1 ? "times" : "time") << endl;
45 for (auto num : *qr.lines) {
46 os << "at line:" << num + 1 << " > ";
47 os << *(qr.files->begin() + num) << endl; // 尽量使用迭代器,不使用下标,获得较好的扩展性
48 }
49 return os;
50 }
TextQueryI.h 使用表达式进行查询的接口,用于屏蔽下层。
1 #pragma once
2 #include <vector>
3 #include <string>
4 #include <memory>
5
6 #include "TextQuery.h"
7 #include "TextQueryBase.h"
8 #include "TextQueryWordQuery.h"
9
10 // TextQuery接口类
11 class TextQueryI {
12 // 有一个私有的构造函数,需要运算符是友元
13 friend TextQueryI operator~(const TextQueryI &);
14 friend TextQueryI operator&(const TextQueryI &, const TextQueryI &);
15 friend TextQueryI operator|(const TextQueryI &, const TextQueryI &);
16
17 public:
18 using LineNo = std::vector<std::string>::size_type;
19 TextQueryI(const std::string &s): q(new TextQueryWordQuery(s)) {}
20 // 作为TextQueryBase的唯一接口,自己实现对应的方法来屏蔽TextQueryBase的行为
21 QueryResult eval(const TextQuery &tq) const
22 { return q->eval(tq); }
23 std::string rep() const
24 { return q->rep(); }
25 private:
26 TextQueryI(std::shared_ptr<TextQueryBase> query): q(query) {}
27 std::shared_ptr<TextQueryBase> q;
28 };
29
30 std::ostream &
31 operator<<(std::ostream &os, const TextQueryI &tq);
32
33 TextQueryI operator~(const TextQueryI &);
34 TextQueryI operator&(const TextQueryI &, const TextQueryI &);
35 TextQueryI operator|(const TextQueryI &, const TextQueryI &);
TextQueryI.cpp 实现重定向避免重复定义。
1 #include "TextQueryI.h"
2
3 std::ostream &
4 operator<<(std::ostream &os, const TextQueryI &tq) {
5 return os << tq.rep();
6 }
TextQueryBase.h 利用虚函数实现表达式功能实现的抽象类。
1 #pragma once
2
3 #include <string>
4
5 #include "TextQuery.h"
6
7 class TextQueryBase
8 {
9 // 用户不会使用TextQueryBase类,所有使用都通过TextQueryI完成
10 friend class TextQueryI;
11 protected:
12 using LineNo = TextQuery::LineNo;
13 virtual ~TextQueryBase() = default;
14 private:
15 // 执行查询
16 virtual QueryResult eval(const TextQuery &) const = 0;
17 // 获得查询对应的string形式表示,类似toString
18 virtual std::string rep() const = 0;
19 };
TextQueryNot.h 实现非逻辑的对象。完成对非逻辑表达式的string表示、完成对分词结果的Not分析。
1 #pragma once
2
3 #include <memory>
4
5 #include "TextQueryI.h"
6 #include "TextQueryBase.h"
7
8 class TextQueryNot : public TextQueryBase {
9 friend TextQueryI operator~(const TextQueryI &);
10 private:
11 TextQueryNot(const TextQueryI &q) : query(q) {}
12 virtual QueryResult eval(const TextQuery &) const override;
13 // 获得查询的string表示?
14 virtual std::string rep() const override {
15 return "~(" + query.rep() + ")";
16 }
17 private:
18 TextQueryI query;
19 };
20
21 inline TextQueryI operator~(const TextQueryI &operand) {
22 return std::shared_ptr<TextQueryBase>(new TextQueryNot(operand));
23 }
TextQueryNot.cpp 实现非逻辑的代码。完成对分词结果进行非逻辑的加工。
1 #include "TextQueryNot.h"
2
3 QueryResult
4 TextQueryNot::eval(const TextQuery &tq) const {
5 auto result = query.eval(tq);
6 auto ret = std::make_shared<std::set<LineNo> >();
7 auto beg = result.begin(), end = result.end();
8 auto sz = result.getFiles()->size();
9 for (size_t n = 0; n != sz; n++) {
10 // 考察结果中的每一行
11 if (beg == end || *beg != n) {
12 ret->insert(n);
13 }
14 else if (beg != end) {
15 ++beg;
16 }
17 }
18 return QueryResult(rep(), ret, result.getFiles());
19 }
TextQueryBinary.h 二元运算的共同基类,同时定义了And和Or运算
1 #pragma once
2
3 #include "TextQueryI.h"
4 #include "TextQueryBase.h"
5
6 class TextQueryBinary : public TextQueryBase {
7 protected:
8 TextQueryBinary(const TextQueryI &left, const TextQueryI &right, std::string s):
9 lhs(left), rhs(right), opSymbol(s) {}
10 // 只提供打印方法,实际操作还是虚函数
11 std::string rep() const override {
12 return "(" + lhs.rep() + " " + opSymbol + " " + rhs.rep() + ")";
13 }
14
15 protected:
16 TextQueryI lhs, rhs; // 操作对象
17 std::string opSymbol; // 操作符
18 };
19
20 class TextQueryAnd : public TextQueryBinary {
21 friend TextQueryI operator&(const TextQueryI &, const TextQueryI &);
22 private:
23 TextQueryAnd(const TextQueryI &lhs, const TextQueryI &rhs): TextQueryBinary(lhs, rhs, "&") {}
24 QueryResult eval(const TextQuery &) const override;
25 };
26
27 class TextQueryOr : public TextQueryBinary {
28 friend TextQueryI operator|(const TextQueryI &, const TextQueryI &);
29 private:
30 TextQueryOr(const TextQueryI &lhs, const TextQueryI &rhs) : TextQueryBinary(lhs, rhs, "|") {}
31 QueryResult eval(const TextQuery &) const override;
32 };
TextQueryBinary.cpp
1 #include "TextQueryBinary.h"
2
3 #include <set>
4 #include <algorithm>
5 #include <iterator>
6 #include <memory>
7
8 QueryResult
9 TextQueryOr::eval(const TextQuery &tq) const {
10 auto right = rhs.eval(tq), left = lhs.eval(tq);
11 auto ret = std::make_shared<std::set<LineNo> >(left.begin(), left.end());
12 ret->insert(right.begin(), right.end());
13 return QueryResult(rep(), ret, left.getFiles());
14 }
15
16 QueryResult
17 TextQueryAnd::eval(const TextQuery &tq) const {
18 auto right = rhs.eval(tq), left = lhs.eval(tq);
19 auto ret = std::make_shared<std::set<LineNo> >();
20 std::set_intersection(left.begin(), left.end(), right.begin(), right.end(), std::inserter(*ret, ret->begin()));
21 ret->insert(right.begin(), right.end());
22 return QueryResult(rep(), ret, left.getFiles());
23 }
24
25 TextQueryI operator&(const TextQueryI &lhs, const TextQueryI &rhs) {
26 return std::shared_ptr<TextQueryBase>(new TextQueryAnd(lhs, rhs));
27 }
28
29 TextQueryI operator|(const TextQueryI &lhs, const TextQueryI &rhs) {
30 return std::shared_ptr<TextQueryBase>(new TextQueryOr(lhs, rhs));
31 }
TextQueryWordQuery.h 表达式查询的叶子节点,表示对某个词进行查询,相当于表达式体系中对单个词查询的基础功能调用。
1 #pragma once
2
3 #include <string>
4
5 #include "TextQuery.h"
6 #include "TextQueryBase.h"
7
8 // 对象树的叶子节点
9 class TextQueryWordQuery : public TextQueryBase
10 {
11 friend class TextQueryI;
12 private:
13 TextQueryWordQuery(const std::string &s) : query_word(s) {}
14 virtual QueryResult eval(const TextQuery &tq) const override {
15 return tq.query(query_word);
16 }
17 virtual std::string rep() const override {
18 return query_word;
19 }
20 private:
21 std::string query_word;
22 };