#include <pdf_searcher.h>
Public Member Functions | |
PDFSearcher (PDFController *doc) | |
~PDFSearcher () | |
bool | begin_search_next (const PDFSearchCriteria &criteria, const string &from_anchor) |
construct a search context for the "search next" task | |
bool | begin_search_all (const PDFSearchCriteria &criteria) |
construct a search context for the "search all" task | |
SearchResult | search_next (PDFSearchDocument &results, PDFSearchTask *task) |
search the next word | |
SearchResult | seach_all (PDFSearchDocument &results, PDFSearchTask *task) |
search in the whole document | |
bool | dump_search_process (string &anchor) |
dump the current search process, for restarting search task | |
void | notify (SearchResult ret_code, PDFSearchDocument &results, unsigned int search_id) |
notify the listeners that the searching is done | |
PDFController * | get_doc_ctrl () const |
get the pointer of PDFController instance | |
Static Public Member Functions | |
static void | export_search_doc_to_coll (PDFSearchDocument &doc, PDFRangeCollection &collection) |
Definition at line 97 of file pdf_searcher.h.
pdf::PDFSearcher::PDFSearcher | ( | PDFController * | doc | ) | [inline, explicit] |
Definition at line 100 of file pdf_searcher.h.
pdf::PDFSearcher::~PDFSearcher | ( | ) | [inline] |
Definition at line 105 of file pdf_searcher.h.
bool pdf::PDFSearcher::begin_search_all | ( | const PDFSearchCriteria & | criteria | ) |
construct a search context for the "search all" task
Definition at line 93 of file pdf_searcher.cpp.
References pdf::PDFSearchCriteria::case_sensitive, pdf::SearchContext::case_sensitive, pdf::SearchContext::char_cursor, pdf::SearchContext::dst_words, pdf::SearchContext::forward, pdf::PDFSearchCriteria::match_whole_word, pdf::SearchContext::match_whole_word, pdf::SearchContext::page_num, pdf::SearchContext::search_all, pdf::PDFSearchCriteria::text, and pdf::SearchContext::word_cursor.
Referenced by pdf::PDFSearchTask::execute().
00094 { 00095 clear_search_ctx(); 00096 00097 // construct the search context 00098 search_ctx.case_sensitive = criteria.case_sensitive; 00099 search_ctx.match_whole_word = criteria.match_whole_word; 00100 00101 // start from the first page 00102 search_ctx.page_num = 1; 00103 //search_ctx.forward = criteria.forward; 00104 search_ctx.forward = true; 00105 search_ctx.search_all = true; 00106 00107 // start from the first word 00108 search_ctx.word_cursor = 0; 00109 search_ctx.char_cursor = 0; 00110 parse_dst_string(criteria.text, search_ctx.dst_words); 00111 00112 return true; 00113 }
bool pdf::PDFSearcher::begin_search_next | ( | const PDFSearchCriteria & | criteria, | |
const string & | from_anchor | |||
) |
construct a search context for the "search next" task
Definition at line 53 of file pdf_searcher.cpp.
References pdf::PDFSearchCriteria::case_sensitive, pdf::SearchContext::case_sensitive, pdf::SearchContext::char_cursor, pdf::PDFAnchor::char_idx, pdf::SearchContext::dst_words, pdf::PDFSearchCriteria::forward, pdf::SearchContext::forward, pdf::PDFSearchCriteria::match_whole_word, pdf::SearchContext::match_whole_word, pdf::PDFController::page_count(), pdf::SearchContext::page_num, pdf::PDFAnchor::page_num, pdf::SearchContext::search_all, pdf::PDFSearchCriteria::text, pdf::SearchContext::word_cursor, and pdf::PDFAnchor::word_num.
Referenced by pdf::PDFSearchTask::execute().
00055 { 00056 PDFAnchor from_param(from_anchor); 00057 if (from_param.page_num <= 0 00058 || from_param.page_num > 00059 static_cast<int>(doc_controller->page_count())) 00060 { 00061 from_param.page_num = 1; 00062 } 00063 00064 clear_search_ctx(); 00065 // construct the search context 00066 00067 search_ctx.case_sensitive = criteria.case_sensitive; 00068 search_ctx.match_whole_word = criteria.match_whole_word; 00069 search_ctx.page_num = from_param.page_num; 00070 search_ctx.forward = criteria.forward; 00071 search_ctx.search_all = false; 00072 search_ctx.word_cursor = from_param.word_num; 00073 search_ctx.char_cursor = from_param.char_idx; 00074 00075 if (search_ctx.forward) 00076 { 00077 // move forward the start char index 00078 search_ctx.char_cursor++; 00079 } 00080 else 00081 { 00082 // move backward the start char index 00083 // NOTE: We should care about the boundary problem 00084 // the start anchor might be the last word and the last char 00085 search_ctx.char_cursor--; 00086 } 00087 00088 parse_dst_string(criteria.text, search_ctx.dst_words); 00089 00090 return true; 00091 }
bool pdf::PDFSearcher::dump_search_process | ( | string & | anchor | ) |
dump the current search process, for restarting search task
Definition at line 225 of file pdf_searcher.cpp.
References pdf::SearchContext::char_cursor, pdf::PDFAnchor::char_idx, pdf::PDFAnchor::get_string(), pdf::SearchContext::page_num, pdf::PDFAnchor::page_num, pdf::SearchContext::word_cursor, and pdf::PDFAnchor::word_num.
Referenced by pdf::PDFSearchTask::execute().
00226 { 00227 PDFAnchor process; 00228 process.page_num = search_ctx.page_num; 00229 process.word_num = search_ctx.word_cursor; 00230 process.char_idx = search_ctx.char_cursor; 00231 //process.file_name = get_doc_ctrl()->name(); 00232 00233 anchor = process.get_string(); 00234 return true; 00235 }
void pdf::PDFSearcher::export_search_doc_to_coll | ( | PDFSearchDocument & | doc, | |
PDFRangeCollection & | collection | |||
) | [static] |
export the PDFSearchDocument to PDFSearchCollection for UDS. content of the doc would be cleared after this function. TODO. This function and PDFSearchDocument should be removed due to the low efficiency.
Definition at line 332 of file pdf_searcher.cpp.
References pdf::PDFCollection< T >::add(), pdf::PDFCollection< T >::clear(), pdf::PluginRangeImpl::end_anchor, pdf::PDFCollection< T >::get(), _UDSString::get_buffer, pdf::PDFCollection< T >::size(), and pdf::PluginRangeImpl::start_anchor.
Referenced by notify().
00334 { 00335 for(int i = 0; i < doc.size(); ++i) 00336 { 00337 PDFSearchPage *page = doc.get(i); 00338 for(int k = 0; k < page->size(); ++k) 00339 { 00340 PluginRangeImpl *range = page->get(k); 00341 if (range == 0) 00342 { 00343 break; 00344 } 00345 00346 PluginRangeImpl *save_range = new PluginRangeImpl; 00347 save_range->start_anchor = new StringImpl( 00348 range->start_anchor->get_buffer(range->start_anchor)); 00349 save_range->end_anchor = new StringImpl( 00350 range->end_anchor->get_buffer(range->end_anchor)); 00351 00352 collection.add(save_range); 00353 } 00354 page->clear(); 00355 } 00356 doc.clear(); 00357 }
PDFController* pdf::PDFSearcher::get_doc_ctrl | ( | ) | const [inline] |
get the pointer of PDFController instance
Definition at line 129 of file pdf_searcher.h.
Referenced by pdf::PDFSearchTask::get_user_data().
void pdf::PDFSearcher::notify | ( | SearchResult | ret_code, | |
PDFSearchDocument & | results, | |||
unsigned int | search_id | |||
) |
notify the listeners that the searching is done
Definition at line 278 of file pdf_searcher.cpp.
References pdf::Signal< R, A1, A2, A3, A4, A5 >::broadcast(), export_search_doc_to_coll(), and pdf::PDFController::sig_search_results_ready.
Referenced by pdf::PDFSearchTask::execute().
00280 { 00281 // export the results from PDFSearchDocument to PDFRangeCollection 00282 PDFRangeCollection *coll = new PDFRangeCollection; 00283 export_search_doc_to_coll(results, *coll); 00284 00285 doc_controller->sig_search_results_ready.broadcast(ret_code, coll 00286 , search_id); 00287 }
SearchResult pdf::PDFSearcher::seach_all | ( | PDFSearchDocument & | results, | |
PDFSearchTask * | task | |||
) |
search in the whole document
Definition at line 168 of file pdf_searcher.cpp.
References pdf::PDFCollection< T >::add(), pdf::Task::is_aborted(), pdf::Task::is_paused(), LOGPRINTF, pdf::PDFController::page_count(), pdf::SearchContext::page_num, pdf::RES_ABORTED, pdf::RES_NOT_FOUND, pdf::RES_OK, pdf::RES_PAUSED, pdf::PDFElemCollection< T, E >::set_element(), pdf::PDFCollection< T >::size(), and pdf::SearchContext::word_cursor.
Referenced by pdf::PDFSearchTask::execute().
00170 { 00171 // return code of this function 00172 SearchResult res = RES_NOT_FOUND; 00173 00174 // return code of searching every page 00175 SearchResult res_once = res; 00176 00177 PDFSearchPage *search_page = new PDFSearchPage; 00178 00179 while ( search_ctx.page_num > 0 00180 && search_ctx.page_num <= 00181 static_cast<int>(doc_controller->page_count())) 00182 { 00183 00184 // search the whole page if it is not the current page 00185 res_once = search_current_page(search_ctx, *search_page); 00186 00187 if (res_once == RES_OK) 00188 { 00189 search_page->set_element(search_ctx.page_num); 00190 results.add(search_page); 00191 search_page = new PDFSearchPage; 00192 } 00193 00194 // forward : increase page number; otherwise decrease page number 00195 search_ctx.page_num++; 00196 00197 // reset the index of start word to be 0 00198 search_ctx.word_cursor = 0; 00199 00200 // abort current task 00201 if (task->is_aborted()) 00202 { 00203 LOGPRINTF("Task Search All canceled!\n"); 00204 res = RES_ABORTED; 00205 break; 00206 } 00207 else if (task->is_paused()) 00208 { 00209 LOGPRINTF("Task Search All paused!\n"); 00210 res = RES_PAUSED; 00211 break; 00212 } 00213 } 00214 00215 delete search_page; 00216 00217 if (results.size() > 0) 00218 { 00219 res = RES_OK; 00220 } 00221 00222 return res; 00223 }
SearchResult pdf::PDFSearcher::search_next | ( | PDFSearchDocument & | results, | |
PDFSearchTask * | task | |||
) |
search the next word
Definition at line 115 of file pdf_searcher.cpp.
References pdf::PDFCollection< T >::add(), pdf::SearchContext::forward, pdf::Task::is_aborted(), pdf::Task::is_paused(), LOGPRINTF, pdf::PDFController::page_count(), pdf::SearchContext::page_num, pdf::RES_ABORTED, pdf::RES_NOT_FOUND, pdf::RES_OK, pdf::RES_PAUSED, pdf::PDFElemCollection< T, E >::set_element(), and pdf::SearchContext::word_cursor.
Referenced by pdf::PDFSearchTask::execute().
00117 { 00118 00119 PDFSearchPage *search_page = new PDFSearchPage; 00120 SearchResult res = RES_NOT_FOUND; 00121 while(res != RES_OK 00122 && search_ctx.page_num > 0 00123 && search_ctx.page_num <= 00124 static_cast<int>(doc_controller->page_count())) 00125 { 00126 res = search_current_page(search_ctx, *search_page); 00127 00128 if (res != RES_OK) 00129 { 00130 // forward : increase page number; otherwise decrease page number 00131 search_ctx.forward ? search_ctx.page_num++ 00132 : search_ctx.page_num--; 00133 00134 // if it is not the first page, start from the first word if forward 00135 // else start from the last word 00136 search_ctx.word_cursor = -1; 00137 } 00138 00139 // abort current task 00140 if (task->is_aborted()) 00141 { 00142 LOGPRINTF("Task Search Next canceled!\n"); 00143 res = RES_ABORTED; 00144 break; 00145 } 00146 else if (task->is_paused()) 00147 { 00148 LOGPRINTF("Task Search Next paused!\n"); 00149 res = RES_PAUSED; 00150 break; 00151 } 00152 } 00153 00154 if (res != RES_OK) 00155 { 00156 delete search_page; 00157 } 00158 else 00159 { 00160 search_page->set_element(search_ctx.page_num); 00161 results.add(search_page); 00162 } 00163 00164 return res; 00165 00166 }