pdf_searcher.h

Go to the documentation of this file.
00001 /*
00002  * File Name: pdf_searcher.h
00003  */
00004 
00005 /*
00006  * This file is part of uds-plugin-pdf.
00007  *
00008  * uds-plugin-pdf is free software: you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation, either version 2 of the License, or
00011  * (at your option) any later version.
00012  *
00013  * uds-plugin-pdf is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program. If not, see <http://www.gnu.org/licenses/>.
00020  */
00021 
00022 /**
00023  * Copyright (C) 2008 iRex Technologies B.V.
00024  * All rights reserved.
00025  */
00026 
00027 #ifndef PDF_SEARCHER_H_
00028 #define PDF_SEARCHER_H_
00029 
00030 #include "pdf_define.h"
00031 #include "pdf_collection.h"
00032 #include "pdf_search_criteria.h"
00033 
00034 namespace pdf
00035 {
00036 
00037 /// @brief the search context wrappers not only the search criteria,
00038 /// but also the page number and start index. It would be only used in our
00039 /// searching algorithm.
00040 struct SearchContext
00041 {
00042     bool match_whole_word;
00043     bool case_sensitive;
00044     bool search_all;
00045     bool forward;
00046     int  word_cursor;
00047     int  char_cursor;
00048     int  page_num;
00049     stringlist dst_words;
00050 
00051     SearchContext(): match_whole_word(true)
00052         , case_sensitive(true)
00053         , search_all(false)
00054         , forward(true)
00055         , word_cursor(0)
00056         , char_cursor(0)
00057         , page_num(1)
00058         , dst_words() {}
00059     ~SearchContext() {}
00060 };
00061 
00062 /// @brief Information of a search task
00063 // TO BE REMOVED
00064 struct PDFSearchTaskInfo
00065 {
00066     unsigned int ref_id;
00067     bool search_all;
00068     bool forward;
00069 
00070     void operator = (PDFSearchTaskInfo &right)
00071     {
00072         this->ref_id = right.ref_id;
00073         this->search_all = right.search_all;
00074         this->forward = right.forward;
00075     }
00076 };
00077 //
00078 
00079 typedef PDFElemCollection<PluginRangeImpl*, int> PDFSearchPage;
00080 
00081 typedef PDFCollection<PDFSearchPage*> PDFSearchDocument;
00082 
00083 typedef enum
00084 {
00085     RES_OK = 0,
00086     RES_NOT_FOUND,
00087     RES_BEGIN,
00088     RES_END,
00089     RES_ERROR,
00090     RES_ABORTED,
00091     RES_PAUSED
00092 }SearchResult;
00093 
00094 /// @brief PDFSearcher provides the searching function
00095 class PDFController;
00096 class PDFSearchTask;
00097 class PDFSearcher
00098 {
00099 public:
00100     explicit PDFSearcher(PDFController *doc)
00101         : doc_controller(doc)
00102         , search_ctx()
00103     {}
00104 
00105     ~PDFSearcher()
00106     {}
00107 
00108     /// construct a search context for the "search next" task
00109     bool begin_search_next(const PDFSearchCriteria &criteria
00110                            , const string &from_anchor);
00111 
00112     /// construct a search context for the "search all" task
00113     bool begin_search_all(const PDFSearchCriteria &criteria);
00114 
00115     /// search the next word
00116     SearchResult search_next(PDFSearchDocument &results, PDFSearchTask *task);
00117 
00118     /// search in the whole document
00119     SearchResult seach_all(PDFSearchDocument &results, PDFSearchTask *task);
00120 
00121     /// dump the current search process, for restarting search task
00122     bool dump_search_process(string &anchor);
00123 
00124     /// notify the listeners that the searching is done
00125     void notify(SearchResult ret_code, PDFSearchDocument &results
00126         , unsigned int search_id);
00127 
00128     /// get the pointer of PDFController instance
00129     PDFController* get_doc_ctrl() const { return doc_controller; }
00130 
00131     /// export the PDFSearchDocument to PDFSearchCollection for UDS.
00132     /// content of the doc would be cleared after this function.
00133     /// TODO. This function and PDFSearchDocument should be removed due to
00134     /// the low efficiency.
00135     static void export_search_doc_to_coll(PDFSearchDocument &doc
00136         , PDFRangeCollection &collection);
00137 
00138 private:
00139     /// Search in the current PDFPage
00140     SearchResult search_current_page(SearchContext &ctx
00141                                      , PDFSearchPage &results);
00142 
00143     /// Parse the destination string
00144     void parse_dst_string(const string &dst_str, stringlist &str_list);
00145 
00146     /// Clear the search context
00147     void clear_search_ctx();
00148 
00149 private:
00150     // Reference to PDF renderer
00151     PDFController *doc_controller;
00152 
00153     // Search context contains the necessary status when executing a search
00154     // task
00155     SearchContext search_ctx;
00156 
00157 };
00158 
00159 };
00160 
00161 #endif //PDF_SEARCHER_H_
00162 
Generated by  doxygen 1.6.2-20100208