TextQuery.cpp
程序代码:
//BUG疑问: 1 还没有初始化 Query中的静态变量 static vector <string>* _text_file ;
// //BUG: // 现在定义了_line_cnt,但是动态分配的。不知道delete了没有。把括号问题解决了再看看看。
//查询入口
void TextQuery::query_text()
{
string text; //局部变量:text: 按顺序存放查询中的每个单词
string caps( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ); //局部变量:caps: 支持 "把大写转换为小写" 的过滤器
vector<string> _query_text; //局部变量:query_text: 保存用户查询的 vector
UserQuery user_query; //局部变量:user_query : UserQuery 对象, 封装了用户查询的实际计算过程
// 初始化 UserQuery 的静态数据成员
NotQuery::all_locs( text_locations->second );
// 现在定义了_line_cnt,但是动态分配的。不知道delete了没有。把括号问题解决了再看看看。
AndQuery::max_col( _line_cnt ); //line_cnt未定义。它是一个vector<int>对象。代表所有单词的每一行的最大列数。应该在textquery中。
UserQuery::word_map( word_map );
do
{
// 如果有的话, 删除以前的查询
_query_text.clear();
cout << "请输入一条查询,每个单词或符号用空格间隔,并用点'.'做为查询语句的结束\n如果要结束程序,请输入一个单独的点\n\n "
<< "输入查询语句==> ";
// * 从标准输入获取查询, * 删除所有的大写字母 * 大量输入 query_text ... * 注意: 应该完成用户查询的所有处理 */
while( cin >> text )
{
if ( text == "." )
break;
string::size_type pos = 0;
while (( pos = text.find_first_of( caps, pos ))
!= string::npos )
text[pos] = tolower( text[pos] );
_query_text.push_back( text );
}
// ok: 如果有查询, 处理它...
if ( ! _query_text.empty() )
{
// 把查询传递给 UserQuery 对象
user_query.query( &_query_text );
// 计算 UserQuery
// 返回 Query* 层次结构
// 17.7 节描述了这一点
// query 是 TextQuery 的 Query* 成员
//多了括号 从这里就开始了。
query = user_query.eval_query();
//cout << *query << endl;
// 计算 Query 层次结构
query->eval();
// ok: 显示结果
// 一个 TextQuery 成员函数
display_solution();
// 在用户终端上给出额外一行
cout << endl;
}
}
while ( ! _query_text.empty() );
cout << "Ok, bye!\n";
}
string TextQuery::filt_elems( "\",.;:!<<)(\\/" );
const vector<int>* TextQuery::_line_cnt = new vector<int>();
void TextQuery::line_cnt(text_loc* words_and_locs) //为了设置AndQuery静态成员,所有单词的最大列数 的静态成员函数
{
//typedef pair<text*,loc*> text_loc;
//typedef vector<location> loc;
loc* p_locs=words_and_locs->second;//获取所有单词的行列位置
loc::iterator iter=p_locs->begin(),iter_end=p_locs->end();
vector<int> line_cnt_temp;
while(iter != iter_end)
{
line_cnt_temp.push_back((*iter).second);
iter++;
}
_line_cnt = new vector<int> (line_cnt_temp.begin(),line_cnt_temp.end());
}
void TextQuery:: retrieve_text()
{
//程序自动打开处理文件 text.txt
string file_name("text.txt");
//cout << "please enter file name: ";
//cin >> file_name;
ifstream infile( file_name.c_str(), ios::in );
if ( !infile )
{
cerr << "oops! unable to open file "
<< file_name << " -- bailing out!\n";
exit( - 1 );
}
else cout << "\n";
lines_of_text = new vector<string>;
string textline;
while ( getline( infile, textline, '\n' ))
lines_of_text->push_back( textline );
}
void TextQuery:: separate_words()
{
vector<string> *words = new vector<string>;
vector<location> *locations =
new vector<location>;
for ( short line_pos = 0; line_pos < lines_of_text->size();
line_pos++ )
{
short word_pos = 0;
string textline = (*lines_of_text)[ line_pos ];
string::size_type eol = textline.length();
string::size_type pos = 0, prev_pos = 0;
while (( pos = textline.find_first_of( ' ', pos ))
!= string::npos )
{
words->push_back(
textline.substr( prev_pos, pos - prev_pos ));
locations->push_back(
make_pair( line_pos, word_pos ));
word_pos++; pos++; prev_pos = pos;
}
words->push_back(
textline.substr( prev_pos, pos - prev_pos ));
locations ->push_back(make_pair(line_pos,word_pos));
}
text_locations = new text_loc( words, locations );
}
void TextQuery:: filter_text()
{
if ( filt_elems.empty() )
return;
vector<string> *words = text_locations ->first;
vector<string>::iterator iter = words ->begin();
vector<string>::iterator iter_end = words ->end();
while ( iter != iter_end )
{
string::size_type pos = 0;
while (( pos = (*iter).find_first_of( filt_elems, pos ))
!= string::npos )
(*iter).erase(pos,1);
++iter;
}
}
void TextQuery:: suffix_text()
{
vector<string> *words = text_locations ->first;
vector<string>::iterator iter = words ->begin();
vector<string>::iterator iter_end = words ->end();
while ( iter != iter_end )
{
if ( (*iter).size() <= 3 )
{
iter++;
continue;
}
if ( (*iter)[ (*iter).size()- 1 ] == 's' )
suffix_s( *iter );
// 其他的后缀处理放在这里
iter++;
}
}
void TextQuery:: suffix_s( string &word )
{
string::size_type spos = 0;
string::size_type pos3 = word.size()- 3;
// "ous", "ss", "is", "ius"
string suffixes( "oussisius" );
if ( ! ( pos3, 3, suffixes, spos, 3) ||
! ( pos3, 3, suffixes, spos+6, 3 ) ||
! ( pos3+1, 2, suffixes, spos+2, 2 ) ||
! ( pos3+1, 2, suffixes, spos+4, 2 ))
return;
string ies( "ies" );
if ( ! ( pos3, 3, ies ))
{
word.replace( pos3, 3, 1, 'y' );
return;
}
string ses( "ses" );
if ( ! ( pos3, 3, ses ))
{
word.erase( pos3+1, 2 );
return;
}
// 去掉尾部的 's'
word.erase( pos3+2 );
// watch out for "'s"
if ( word[ pos3+1 ] == '\'' )
word.erase( pos3+1 );
}
void TextQuery:: strip_caps()
{
vector<string> *words = text_locations ->first;
vector<string>::iterator iter = words ->begin();
vector<string>::iterator iter_end = words ->end();
string caps( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" );
while ( iter != iter_end )
{
string::size_type pos = 0;
while (( pos = (*iter).find_first_of( caps, pos )) != string::npos )
(*iter)[ pos ] = tolower( (*iter)[pos] );
++iter;
}
}
void TextQuery:: build_word_map()
{
word_map = new map< string, loc*>;
typedef map<string,loc*>::value_type value_type;
typedef set<string>::difference_type diff_type;
set<string> exclusion_set;
ifstream infile( "exclusion_set" );
if ( !infile )
{
static string default_excluded_words[25] = {
"the","and","but","that","then","are","been",
"can","can't","cannot","could","did","for",
"had","have","him","his","her","its","into",
"were","which","when","with","would" };
cerr << "warning! unable to open word exclusion file! -- "
<< "using default set\n";
copy( default_excluded_words, default_excluded_words+25,
inserter( exclusion_set, exclusion_set.begin() ));
}
else
{
istream_iterator< string>
input_set( infile ), eos;
copy( input_set, eos,
inserter( exclusion_set, exclusion_set.begin() ));
}
// 遍历单词, 输入键/值对
vector<string> *text_words = text_locations ->first;
vector<location> *text_locs = text_locations ->second;
register int elem_cnt = text_words ->size();
for ( int ix = 0; ix < elem_cnt; ++ix )
{
string textword = ( *text_words )[ ix ];
if ( textword.size() < 3 || exclusion_set.count( textword ))
continue;
if ( ! word_map->count((*text_words)[ix] ))
{ // 没有, 添加:
loc *ploc = new vector<location>;
ploc->push_back( (*text_locs)[ix] );
word_map->insert( value_type( (*text_words)[ix], ploc ));
}
else (*word_map)[(*text_words)[ix]]->
push_back( (*text_locs)[ix] );
}
}
void TextQuery:: display_map_text()
{
typedef map<string,loc*> map_text;
map_text::iterator iter = word_map->begin(),
iter_end = word_map->end();
while ( iter != iter_end )
{
cout << "word: " << (*iter).first << " (";
int loc_cnt = 0;
loc *text_locs = (*iter).second;
loc::iterator liter = text_locs->begin(),
liter_end = text_locs->end();
while ( liter != liter_end )
{
if ( loc_cnt )
cout << ",";
else ++loc_cnt;
cout << "(" << (*liter).first << "," << (*liter).second << ")";
++liter;
}
cout << ")\n";
++iter;
}
cout << endl;
}
void TextQuery:: display_text_locations()
{
vector<string> *text_words = text_locations ->first;
vector<location> *text_locs = text_locations ->second;
register int elem_cnt = text_words ->size();
if ( elem_cnt != text_locs->size() )
{
cerr << "oops! internal error: word and position vectors "
<< "are of unequal size \n"
<< "words: " << elem_cnt << " "
<< "locs: " << text_locs->size()
<< " -- bailing out!\n";
exit( - 2 );
}
for ( int ix = 0; ix < elem_cnt; ix++ )
{
cout << "word: " << (*text_words)[ ix ] << "\t"
<< "location: ("
<< (*text_locs)[ix].first << ","
<< (*text_locs)[ix].second << ")"
<< "\n";
}
cout << endl;
}
void TextQuery:: display_solution()
{
cout << "\n"
<< "Requested query: "
//下面的查询语句为什么多了个右括号?
<< *query << "\n\n";
//cout << "跟踪gggg" << endl; /**********************************************************************************/
const set<short> *solution = query->solution();
if ( ! solution->size() )
{
cout << "\n\tSorry, "
<< " no matching lines were found in text.\n"
<< endl;
return;
}
set<short>::const_iterator
it = solution->begin(),
end_it = solution->end();
for ( ; it != end_it; ++it )
{
int line = *it;
// 文本行从 0 开始 别把用户弄糊涂了 ...
cout << "( " << line+1 << " ) "
<< (*lines_of_text)[line] << '\n';
}
cout << endl;
}
UserQuery.h
程序代码:
//#ifndef USER_QUERY_H
//#define USER_QUERY_H
class UserQuery
{
public:
UserQuery( vector< string > *pquery = 0 ):_query( pquery ),_eval( 0 ),_paren( 0 ){} //构造函数
void query( vector< string > *pq ); //接收传递用户查询字串向量 地址
void displayQuery(); //显示查询
Query *eval_query(); // 建立层次结构
static void word_map( map<string,loc*> *pwm ) {/* if ( !_word_map )*/ _word_map = pwm; } //设置map
private:
enum QueryType { WORD = 1, AND, OR, NOT, RPAREN, LPAREN }; //定义 枚举类型名 QueryType
QueryType evalQueryString( const string &query ); //声明一个函数,用于 计算 查询字串中每一个字串的枚举类型
void evalWord( const string &query );
void evalNot();
void evalOr();
void evalAnd();
void evalRParen();
bool integrity_check();
vector<string> *_query; //实际的用户查询 字串向量
int _paren; //帮助我们改变操作符计算的缺省优先级
Query *_eval; //指向在 eval_query()中建立起来的查询层次表示
stack<Query*,vector<Query*> > _query_stack; //存放复合查询中的完整操作数(我们放置 NameQuery对象的地方)
stack<Query*,vector<Query*> > _current_op; //存放缺少右操作数的不完整操作符,即:当前要完成的操作
static short _lparenOn, _rparenOn; //记录了与当前查询节点相关联的括号的种类和数目
static map<string,loc*> *_word_map;
};
// #endif
UserQuery.cpp
程序代码:
//BUG 1
//下面的函数有问题。查询语句多了个括号。
// 跟踪发现在双参数构造函数构造的namequery多了一个右括号
//定义static 变量 左右括号数
short UserQuery::_lparenOn = 0;
short UserQuery::_rparenOn = 0;
//定义 static 变量 new map
map<string,loc*> * UserQuery::_word_map=new map<string,loc*>();
//下面的函数有问题。查询语句多了个括号。
inline void UserQuery:: evalWord( const string &query )
{
NameQuery *pq; //定义一个NameQuery指针
loc *ploc; //定义一个 vector< pair< short, short > > 指针 typedef vector<location> loc;
/************ /检测_word_map是否为空 ,
map<string,loc*>::iterator it = _word_map->begin(),it_end=_word_map->end();
while ( it != it_end )
{
cout << it->first << ":" << it->second << endl;
it++;
}
//*/
if ( ! _word_map->count( query )) //如果map中不存在当前单词
pq = new NameQuery( query ); //用单参数构造函数 建立一个当前单词的 NameQuery 并由pq指向它
else //如果map中 存在当前单词
{
//构造函数可能有问题。
ploc = ( *_word_map )[ query ]; //则让 ploc指向 map中的当前单词的 位置vector (vector< pair<short,short> > )
//这里构造时是不是多了个括号?
pq = new NameQuery( query, ploc ); //用双参数构造函数 用当前单词的位置vector 新建一个NameQuery
//下面开始检测pq指向的NameQuery
/*cout << "<< pq->name()" << pq->name() << "\n"
<< "pq->lparentheses()" << pq->lparentheses() << "\n"
<< "pq->rparentheses()" << pq->rparentheses() << "\n"
<< "pq->solution()" << pq->solution() << "\n"
<< endl;
检测出结果:专由NameQuery调用的基类构造函数未初始化左右括号数。已经修正。
//*/
}
if ( _current_op.size() <= _paren )
{
_query_stack.push( pq );
}
else
{
Query *pop = _current_op.top();
_current_op.pop();
pop->add_op( pq );
_query_stack.push( pop );
}
}
inline void UserQuery:: evalAnd()
{
Query *pop = _query_stack.top(); _query_stack.pop();
AndQuery *pq = new AndQuery( pop );
if ( _lparenOn )
{ pq->lparentheses( _lparenOn ); _lparenOn = 0; }
if ( _rparenOn )
{ pq->rparentheses( _rparenOn ); _rparenOn = 0; }
_current_op.push( pq );
}
inline void UserQuery:: evalOr()
{
Query *pop = _query_stack.top(); _query_stack.pop();
OrQuery *pq = new OrQuery( pop );
if ( _lparenOn )
{ pq->lparentheses( _lparenOn ); _lparenOn = 0; }
if ( _rparenOn )
{ pq->rparentheses( _rparenOn ); _rparenOn = 0; }
_current_op.push( pq );
}
inline void UserQuery:: evalNot()
{
NotQuery *pq = new NotQuery;
if ( _lparenOn )
{ pq->lparentheses( _lparenOn ); _lparenOn = 0; }
if ( _rparenOn )
{ pq->rparentheses( _rparenOn ); _rparenOn = 0; }
_current_op.push( pq );
}
inline void UserQuery:: evalRParen()
{
if ( _paren < _current_op.size() )
{
Query *poperand = _query_stack.top();
_query_stack.pop();
Query *pop = _current_op.top();
_current_op.pop();
pop->add_op( poperand );
_query_stack.push( pop );
}
}
//计算返回 字串枚举类型
inline UserQuery::QueryType UserQuery::evalQueryString(const string& q_str)
{
QueryType the_type = WORD;
if( "(" == q_str ) the_type = LPAREN;
if( ")" == q_str ) the_type = RPAREN;
if( "||"== q_str ) the_type = OR;
if( "&&"== q_str ) the_type = AND;
if( "!" == q_str ) the_type = NOT;
return the_type;
}
Query* UserQuery::eval_query()
{
vector<string >::iterator it = _query->begin(),end_it = _query->end();
for ( ; it != end_it; ++it ) //迭代访问查询中的 每一个字串
{
switch( evalQueryString( *it ) )
{
case WORD:
evalWord( *it ); //带一个string参数
break;
case AND:
evalAnd();
break;
case OR:
evalOr();
break;
case NOT:
evalNot();
break;
case LPAREN:
++_paren;
++_lparenOn;
break;
case RPAREN:
--_paren;
++_rparenOn;
evalRParen();
break;
}
}
return _query_stack.top() ;
}
void UserQuery::query( vector< string > *pq )
{
delete _query;
_query = new vector< string >(*pq);
}
还有一个记事本文件,用来被查询的。
text.txt
Alice Emma has long flowing red hair. Her Daddy says
when the wind blows through her hair, it looks almost alive,
like a fiery bird in flight. A beautiful fiery bird, he tells her,
magical but untamed. "Daddy, shush, there is no such thing,"
she tells him, at the same time wanting him to tell her more.
Shyly, she asks, "I mean, Daddy, is there?"